summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt2
-rw-r--r--arch/s390/include/asm/eadm.h6
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--block/badblocks.c1
-rw-r--r--block/bfq-iosched.c59
-rw-r--r--block/bio-integrity.c8
-rw-r--r--block/bio.c85
-rw-r--r--block/blk-core.c331
-rw-r--r--block/blk-exec.c4
-rw-r--r--block/blk-flush.c16
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-map.c7
-rw-r--r--block/blk-merge.c48
-rw-r--r--block/blk-mq-cpumap.c68
-rw-r--r--block/blk-mq-debugfs.c101
-rw-r--r--block/blk-mq-sched.c158
-rw-r--r--block/blk-mq-sched.h28
-rw-r--r--block/blk-mq.c399
-rw-r--r--block/blk-mq.h11
-rw-r--r--block/blk-settings.c5
-rw-r--r--block/blk-tag.c15
-rw-r--r--block/blk-timeout.c4
-rw-r--r--block/blk.h15
-rw-r--r--block/bounce.c47
-rw-r--r--block/bsg-lib.c5
-rw-r--r--block/bsg.c13
-rw-r--r--block/cfq-iosched.c9
-rw-r--r--block/elevator.c1
-rw-r--r--block/genhd.c4
-rw-r--r--block/ioprio.c3
-rw-r--r--block/kyber-iosched.c31
-rw-r--r--block/scsi_ioctl.c13
-rw-r--r--block/t10-pi.c32
-rw-r--r--drivers/block/DAC960.c2
-rw-r--r--drivers/block/amiflop.c10
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--drivers/block/aoe/aoedev.c2
-rw-r--r--drivers/block/ataflop.c16
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c6
-rw-r--r--drivers/block/drbd/drbd_int.h5
-rw-r--r--drivers/block/drbd/drbd_main.c14
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c6
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c16
-rw-r--r--drivers/block/floppy.c9
-rw-r--r--drivers/block/loop.c64
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c54
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h2
-rw-r--r--drivers/block/nbd.c44
-rw-r--r--drivers/block/null_blk.c125
-rw-r--r--drivers/block/paride/pcd.c9
-rw-r--r--drivers/block/paride/pd.c3
-rw-r--r--drivers/block/paride/pf.c19
-rw-r--r--drivers/block/pktcdvd.c40
-rw-r--r--drivers/block/ps3disk.c11
-rw-r--r--drivers/block/ps3vram.c16
-rw-r--r--drivers/block/rbd.c28
-rw-r--r--drivers/block/rsxx/dev.c17
-rw-r--r--drivers/block/rsxx/dma.c13
-rw-r--r--drivers/block/rsxx/rsxx_priv.h2
-rw-r--r--drivers/block/skd_main.c32
-rw-r--r--drivers/block/sunvdc.c4
-rw-r--r--drivers/block/swim.c8
-rw-r--r--drivers/block/swim3.c29
-rw-r--r--drivers/block/sx8.c20
-rw-r--r--drivers/block/umem.c4
-rw-r--r--drivers/block/virtio_blk.c23
-rw-r--r--drivers/block/xen-blkback/blkback.c19
-rw-r--r--drivers/block/xen-blkfront.c81
-rw-r--r--drivers/block/xsysace.c9
-rw-r--r--drivers/block/z2ram.c4
-rw-r--r--drivers/cdrom/cdrom.c7
-rw-r--r--drivers/cdrom/gdrom.c10
-rw-r--r--drivers/ide/ide-atapi.c12
-rw-r--r--drivers/ide/ide-cd.c11
-rw-r--r--drivers/ide/ide-cd_ioctl.c1
-rw-r--r--drivers/ide/ide-devsets.c1
-rw-r--r--drivers/ide/ide-disk.c1
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-eh.c16
-rw-r--r--drivers/ide/ide-floppy.c6
-rw-r--r--drivers/ide/ide-io.c10
-rw-r--r--drivers/ide/ide-ioctls.c2
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/ide/ide-pm.c8
-rw-r--r--drivers/ide/ide-probe.c7
-rw-r--r--drivers/ide/ide-tape.c3
-rw-r--r--drivers/ide/ide-taskfile.c7
-rw-r--r--drivers/ide/siimage.c6
-rw-r--r--drivers/lightnvm/core.c13
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c617
-rw-r--r--drivers/lightnvm/pblk-gc.c475
-rw-r--r--drivers/lightnvm/pblk-init.c389
-rw-r--r--drivers/lightnvm/pblk-map.c75
-rw-r--r--drivers/lightnvm/pblk-rb.c106
-rw-r--r--drivers/lightnvm/pblk-read.c93
-rw-r--r--drivers/lightnvm/pblk-recovery.c275
-rw-r--r--drivers/lightnvm/pblk-rl.c90
-rw-r--r--drivers/lightnvm/pblk-sysfs.c94
-rw-r--r--drivers/lightnvm/pblk-write.c353
-rw-r--r--drivers/lightnvm/pblk.h296
-rw-r--r--drivers/lightnvm/rrpc.c10
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/io.c6
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/movinggc.c10
-rw-r--r--drivers/md/bcache/request.c28
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c14
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.h2
-rw-r--r--drivers/md/dm-bufio.c28
-rw-r--r--drivers/md/dm-cache-target.c36
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-flakey.c13
-rw-r--r--drivers/md/dm-integrity.c30
-rw-r--r--drivers/md/dm-io.c13
-rw-r--r--drivers/md/dm-log-writes.c13
-rw-r--r--drivers/md/dm-mpath.c85
-rw-r--r--drivers/md/dm-raid1.c29
-rw-r--r--drivers/md/dm-rq.c30
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-snap.c15
-rw-r--r--drivers/md/dm-stripe.c17
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin.c67
-rw-r--r--drivers/md/dm-verity-target.c16
-rw-r--r--drivers/md/dm-zero.c4
-rw-r--r--drivers/md/dm.c88
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/multipath.c10
-rw-r--r--drivers/md/raid1.c38
-rw-r--r--drivers/md/raid10.c38
-rw-r--r--drivers/md/raid5-cache.c6
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/md/raid5.c24
-rw-r--r--drivers/memstick/core/ms_block.c7
-rw-r--r--drivers/memstick/core/mspro_block.c8
-rw-r--r--drivers/mmc/core/block.c37
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/mtd/mtd_blkdevs.c31
-rw-r--r--drivers/mtd/ubi/block.c8
-rw-r--r--drivers/nvdimm/blk.c5
-rw-r--r--drivers/nvdimm/btt.c5
-rw-r--r--drivers/nvdimm/pmem.c29
-rw-r--r--drivers/nvme/host/Kconfig12
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c525
-rw-r--r--drivers/nvme/host/fabrics.c69
-rw-r--r--drivers/nvme/host/fabrics.h4
-rw-r--r--drivers/nvme/host/fc.c137
-rw-r--r--drivers/nvme/host/lightnvm.c18
-rw-r--r--drivers/nvme/host/nvme.h42
-rw-r--r--drivers/nvme/host/pci.c647
-rw-r--r--drivers/nvme/host/rdma.c212
-rw-r--r--drivers/nvme/host/scsi.c2460
-rw-r--r--drivers/nvme/target/admin-cmd.c65
-rw-r--r--drivers/nvme/target/configfs.c68
-rw-r--r--drivers/nvme/target/core.c3
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fc.c10
-rw-r--r--drivers/nvme/target/fcloop.c2
-rw-r--r--drivers/nvme/target/io-cmd.c4
-rw-r--r--drivers/nvme/target/loop.c67
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/nvme/target/rdma.c102
-rw-r--r--drivers/s390/block/dasd.c36
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c8
-rw-r--r--drivers/s390/block/scm_blk.h4
-rw-r--r--drivers/s390/block/xpram.c2
-rw-r--r--drivers/s390/cio/eadm_sch.c6
-rw-r--r--drivers/s390/cio/scm.c2
-rw-r--r--drivers/sbus/char/jsflash.c5
-rw-r--r--drivers/scsi/osd/osd_initiator.c29
-rw-r--r--drivers/scsi/osst.c3
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_lib.c104
-rw-r--r--drivers/scsi/scsi_transport_sas.c10
-rw-r--r--drivers/scsi/sg.c8
-rw-r--r--drivers/scsi/st.c3
-rw-r--r--drivers/target/target_core_iblock.c12
-rw-r--r--drivers/target/target_core_pscsi.c6
-rw-r--r--fs/aio.c15
-rw-r--r--fs/block_dev.c25
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/check-integrity.c4
-rw-r--r--fs/btrfs/compression.c46
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c75
-rw-r--r--fs/btrfs/disk-io.h12
-rw-r--r--fs/btrfs/extent_io.c27
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file-item.c14
-rw-r--r--fs/btrfs/file.c33
-rw-r--r--fs/btrfs/inode.c82
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/buffer.c15
-rw-r--r--fs/crypto/bio.c2
-rw-r--r--fs/direct-io.c25
-rw-r--r--fs/ext4/file.c35
-rw-r--r--fs/ext4/page-io.c15
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/f2fs/data.c10
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/fcntl.c67
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/inode.c1
-rw-r--r--fs/iomap.c13
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_metapage.c4
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/open.c1
-rw-r--r--fs/read_write.c12
-rw-r--r--fs/xfs/xfs_aops.c9
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_file.c32
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_super.c3
-rw-r--r--include/linux/bio.h21
-rw-r--r--include/linux/blk-mq.h28
-rw-r--r--include/linux/blk_types.h26
-rw-r--r--include/linux/blkdev.h72
-rw-r--r--include/linux/device-mapper.h4
-rw-r--r--include/linux/elevator.h7
-rw-r--r--include/linux/fs.h74
-rw-r--r--include/linux/ide.h6
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/nvme.h102
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/scsi/osd_initiator.h2
-rw-r--r--include/scsi/scsi_cmnd.h1
-rw-r--r--include/scsi/scsi_request.h2
-rw-r--r--include/uapi/linux/aio_abi.h2
-rw-r--r--include/uapi/linux/dm-ioctl.h4
-rw-r--r--include/uapi/linux/fcntl.h21
-rw-r--r--include/uapi/linux/fs.h4
-rw-r--r--include/uapi/linux/loop.h3
-rw-r--r--include/uapi/linux/nbd.h4
-rw-r--r--kernel/power/swap.c14
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--lib/scatterlist.c35
-rw-r--r--mm/filemap.c64
-rw-r--r--mm/page_io.c4
265 files changed, 5912 insertions, 6237 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf64b0f..9490f2845f06 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
632i/o is issued (since the bio may otherwise get freed in case i/o completion 632i/o is issued (since the bio may otherwise get freed in case i/o completion
633happens in the meantime). 633happens in the meantime).
634 634
635The bio_clone() routine may be used to duplicate a bio, where the clone 635The bio_clone_fast() routine may be used to duplicate a bio, where the clone
636shares the bio_vec_list with the original bio (i.e. both point to the 636shares the bio_vec_list with the original bio (i.e. both point to the
637same bio_vec_list). This would typically be used for splitting i/o requests 637same bio_vec_list). This would typically be used for splitting i/o requests
638in lvm or md. 638in lvm or md.
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 67026300c88e..144809a3f4f6 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/device.h> 5#include <linux/device.h>
6#include <linux/blkdev.h>
6 7
7struct arqb { 8struct arqb {
8 u64 data; 9 u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
105 int (*probe) (struct scm_device *scmdev); 106 int (*probe) (struct scm_device *scmdev);
106 int (*remove) (struct scm_device *scmdev); 107 int (*remove) (struct scm_device *scmdev);
107 void (*notify) (struct scm_device *scmdev, enum scm_event event); 108 void (*notify) (struct scm_device *scmdev, enum scm_event event);
108 void (*handler) (struct scm_device *scmdev, void *data, int error); 109 void (*handler) (struct scm_device *scmdev, void *data,
110 blk_status_t error);
109}; 111};
110 112
111int scm_driver_register(struct scm_driver *scmdrv); 113int scm_driver_register(struct scm_driver *scmdrv);
112void scm_driver_unregister(struct scm_driver *scmdrv); 114void scm_driver_unregister(struct scm_driver *scmdrv);
113 115
114int eadm_start_aob(struct aob *aob); 116int eadm_start_aob(struct aob *aob);
115void scm_irq_handler(struct aob *aob, int error); 117void scm_irq_handler(struct aob *aob, blk_status_t error);
116 118
117#endif /* _ASM_S390_EADM_H */ 119#endif /* _ASM_S390_EADM_H */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 85410279beab..b55fe9bf5d3e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
534 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 534 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
535 blk_end_request( 535 blk_end_request(
536 (*irq_req_buffer)[count]->req, 536 (*irq_req_buffer)[count]->req,
537 0, 537 BLK_STS_OK,
538 (*irq_req_buffer)[count]->length 538 (*irq_req_buffer)[count]->length
539 ); 539 );
540 kfree((*irq_req_buffer)[count]); 540 kfree((*irq_req_buffer)[count]);
diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef282314..43c71166e1e2 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
533 case 3: 533 case 3:
534 if (newline != '\n') 534 if (newline != '\n')
535 return -EINVAL; 535 return -EINVAL;
536 /* fall through */
536 case 2: 537 case 2:
537 if (length <= 0) 538 if (length <= 0)
538 return -EINVAL; 539 return -EINVAL;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ed93da2462ab..12bbc6b8657d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -725,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
725} 725}
726 726
727static void 727static void
728bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) 728bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
729 struct bfq_io_cq *bic, bool bfq_already_existing)
729{ 730{
731 unsigned int old_wr_coeff = bfqq->wr_coeff;
732 bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
733
730 if (bic->saved_idle_window) 734 if (bic->saved_idle_window)
731 bfq_mark_bfqq_idle_window(bfqq); 735 bfq_mark_bfqq_idle_window(bfqq);
732 else 736 else
@@ -754,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
754 758
755 /* make sure weight will be updated, however we got here */ 759 /* make sure weight will be updated, however we got here */
756 bfqq->entity.prio_changed = 1; 760 bfqq->entity.prio_changed = 1;
761
762 if (likely(!busy))
763 return;
764
765 if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
766 bfqd->wr_busy_queues++;
767 else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
768 bfqd->wr_busy_queues--;
757} 769}
758 770
759static int bfqq_process_refs(struct bfq_queue *bfqq) 771static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -4290,10 +4302,16 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
4290 bfq_put_queue(bfqq); 4302 bfq_put_queue(bfqq);
4291} 4303}
4292 4304
4293static void bfq_put_rq_private(struct request_queue *q, struct request *rq) 4305static void bfq_finish_request(struct request *rq)
4294{ 4306{
4295 struct bfq_queue *bfqq = RQ_BFQQ(rq); 4307 struct bfq_queue *bfqq;
4296 struct bfq_data *bfqd = bfqq->bfqd; 4308 struct bfq_data *bfqd;
4309
4310 if (!rq->elv.icq)
4311 return;
4312
4313 bfqq = RQ_BFQQ(rq);
4314 bfqd = bfqq->bfqd;
4297 4315
4298 if (rq->rq_flags & RQF_STARTED) 4316 if (rq->rq_flags & RQF_STARTED)
4299 bfqg_stats_update_completion(bfqq_group(bfqq), 4317 bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4324,7 +4342,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
4324 */ 4342 */
4325 4343
4326 if (!RB_EMPTY_NODE(&rq->rb_node)) 4344 if (!RB_EMPTY_NODE(&rq->rb_node))
4327 bfq_remove_request(q, rq); 4345 bfq_remove_request(rq->q, rq);
4328 bfq_put_rq_priv_body(bfqq); 4346 bfq_put_rq_priv_body(bfqq);
4329 } 4347 }
4330 4348
@@ -4394,20 +4412,21 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
4394/* 4412/*
4395 * Allocate bfq data structures associated with this request. 4413 * Allocate bfq data structures associated with this request.
4396 */ 4414 */
4397static int bfq_get_rq_private(struct request_queue *q, struct request *rq, 4415static void bfq_prepare_request(struct request *rq, struct bio *bio)
4398 struct bio *bio)
4399{ 4416{
4417 struct request_queue *q = rq->q;
4400 struct bfq_data *bfqd = q->elevator->elevator_data; 4418 struct bfq_data *bfqd = q->elevator->elevator_data;
4401 struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); 4419 struct bfq_io_cq *bic;
4402 const int is_sync = rq_is_sync(rq); 4420 const int is_sync = rq_is_sync(rq);
4403 struct bfq_queue *bfqq; 4421 struct bfq_queue *bfqq;
4404 bool new_queue = false; 4422 bool new_queue = false;
4405 bool split = false; 4423 bool bfqq_already_existing = false, split = false;
4406 4424
4407 spin_lock_irq(&bfqd->lock); 4425 if (!rq->elv.icq)
4426 return;
4427 bic = icq_to_bic(rq->elv.icq);
4408 4428
4409 if (!bic) 4429 spin_lock_irq(&bfqd->lock);
4410 goto queue_fail;
4411 4430
4412 bfq_check_ioprio_change(bic, bio); 4431 bfq_check_ioprio_change(bic, bio);
4413 4432
@@ -4432,6 +4451,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
4432 bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, 4451 bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
4433 true, is_sync, 4452 true, is_sync,
4434 NULL); 4453 NULL);
4454 else
4455 bfqq_already_existing = true;
4435 } 4456 }
4436 } 4457 }
4437 4458
@@ -4457,7 +4478,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
4457 * queue: restore the idle window and the 4478 * queue: restore the idle window and the
4458 * possible weight raising period. 4479 * possible weight raising period.
4459 */ 4480 */
4460 bfq_bfqq_resume_state(bfqq, bic); 4481 bfq_bfqq_resume_state(bfqq, bfqd, bic,
4482 bfqq_already_existing);
4461 } 4483 }
4462 } 4484 }
4463 4485
@@ -4465,13 +4487,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
4465 bfq_handle_burst(bfqd, bfqq); 4487 bfq_handle_burst(bfqd, bfqq);
4466 4488
4467 spin_unlock_irq(&bfqd->lock); 4489 spin_unlock_irq(&bfqd->lock);
4468
4469 return 0;
4470
4471queue_fail:
4472 spin_unlock_irq(&bfqd->lock);
4473
4474 return 1;
4475} 4490}
4476 4491
4477static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) 4492static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4950,8 +4965,8 @@ static struct elv_fs_entry bfq_attrs[] = {
4950 4965
4951static struct elevator_type iosched_bfq_mq = { 4966static struct elevator_type iosched_bfq_mq = {
4952 .ops.mq = { 4967 .ops.mq = {
4953 .get_rq_priv = bfq_get_rq_private, 4968 .prepare_request = bfq_prepare_request,
4954 .put_rq_priv = bfq_put_rq_private, 4969 .finish_request = bfq_finish_request,
4955 .exit_icq = bfq_exit_icq, 4970 .exit_icq = bfq_exit_icq,
4956 .insert_requests = bfq_insert_requests, 4971 .insert_requests = bfq_insert_requests,
4957 .dispatch_request = bfq_dispatch_request, 4972 .dispatch_request = bfq_dispatch_request,
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index b5009a896a7f..b8a3a65f7364 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -224,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
224 * @bio: bio to generate/verify integrity metadata for 224 * @bio: bio to generate/verify integrity metadata for
225 * @proc_fn: Pointer to the relevant processing function 225 * @proc_fn: Pointer to the relevant processing function
226 */ 226 */
227static int bio_integrity_process(struct bio *bio, 227static blk_status_t bio_integrity_process(struct bio *bio,
228 integrity_processing_fn *proc_fn) 228 integrity_processing_fn *proc_fn)
229{ 229{
230 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 230 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -232,7 +232,7 @@ static int bio_integrity_process(struct bio *bio,
232 struct bvec_iter bviter; 232 struct bvec_iter bviter;
233 struct bio_vec bv; 233 struct bio_vec bv;
234 struct bio_integrity_payload *bip = bio_integrity(bio); 234 struct bio_integrity_payload *bip = bio_integrity(bio);
235 unsigned int ret = 0; 235 blk_status_t ret = BLK_STS_OK;
236 void *prot_buf = page_address(bip->bip_vec->bv_page) + 236 void *prot_buf = page_address(bip->bip_vec->bv_page) +
237 bip->bip_vec->bv_offset; 237 bip->bip_vec->bv_offset;
238 238
@@ -369,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
369 struct bio *bio = bip->bip_bio; 369 struct bio *bio = bip->bip_bio;
370 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 370 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
371 371
372 bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn); 372 bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
373 373
374 /* Restore original bio completion handler */ 374 /* Restore original bio completion handler */
375 bio->bi_end_io = bip->bip_end_io; 375 bio->bi_end_io = bip->bip_end_io;
@@ -398,7 +398,7 @@ void bio_integrity_endio(struct bio *bio)
398 * integrity metadata. Restore original bio end_io handler 398 * integrity metadata. Restore original bio end_io handler
399 * and run it. 399 * and run it.
400 */ 400 */
401 if (bio->bi_error) { 401 if (bio->bi_status) {
402 bio->bi_end_io = bip->bip_end_io; 402 bio->bi_end_io = bip->bip_end_io;
403 bio_endio(bio); 403 bio_endio(bio);
404 404
diff --git a/block/bio.c b/block/bio.c
index 26b0810fb8ea..1cfcd0df3f30 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -315,8 +315,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
315{ 315{
316 struct bio *parent = bio->bi_private; 316 struct bio *parent = bio->bi_private;
317 317
318 if (!parent->bi_error) 318 if (!parent->bi_status)
319 parent->bi_error = bio->bi_error; 319 parent->bi_status = bio->bi_status;
320 bio_put(bio); 320 bio_put(bio);
321 return parent; 321 return parent;
322} 322}
@@ -369,6 +369,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
369 struct bio_list punt, nopunt; 369 struct bio_list punt, nopunt;
370 struct bio *bio; 370 struct bio *bio;
371 371
372 if (WARN_ON_ONCE(!bs->rescue_workqueue))
373 return;
372 /* 374 /*
373 * In order to guarantee forward progress we must punt only bios that 375 * In order to guarantee forward progress we must punt only bios that
374 * were allocated from this bio_set; otherwise, if there was a bio on 376 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -480,7 +482,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
480 482
481 if (current->bio_list && 483 if (current->bio_list &&
482 (!bio_list_empty(&current->bio_list[0]) || 484 (!bio_list_empty(&current->bio_list[0]) ||
483 !bio_list_empty(&current->bio_list[1]))) 485 !bio_list_empty(&current->bio_list[1])) &&
486 bs->rescue_workqueue)
484 gfp_mask &= ~__GFP_DIRECT_RECLAIM; 487 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
485 488
486 p = mempool_alloc(bs->bio_pool, gfp_mask); 489 p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -550,7 +553,7 @@ EXPORT_SYMBOL(zero_fill_bio);
550 * 553 *
551 * Description: 554 * Description:
552 * Put a reference to a &struct bio, either one you have gotten with 555 * Put a reference to a &struct bio, either one you have gotten with
553 * bio_alloc, bio_get or bio_clone. The last put of a bio will free it. 556 * bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
554 **/ 557 **/
555void bio_put(struct bio *bio) 558void bio_put(struct bio *bio)
556{ 559{
@@ -599,6 +602,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
599 bio->bi_bdev = bio_src->bi_bdev; 602 bio->bi_bdev = bio_src->bi_bdev;
600 bio_set_flag(bio, BIO_CLONED); 603 bio_set_flag(bio, BIO_CLONED);
601 bio->bi_opf = bio_src->bi_opf; 604 bio->bi_opf = bio_src->bi_opf;
605 bio->bi_write_hint = bio_src->bi_write_hint;
602 bio->bi_iter = bio_src->bi_iter; 606 bio->bi_iter = bio_src->bi_iter;
603 bio->bi_io_vec = bio_src->bi_io_vec; 607 bio->bi_io_vec = bio_src->bi_io_vec;
604 608
@@ -682,6 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
682 return NULL; 686 return NULL;
683 bio->bi_bdev = bio_src->bi_bdev; 687 bio->bi_bdev = bio_src->bi_bdev;
684 bio->bi_opf = bio_src->bi_opf; 688 bio->bi_opf = bio_src->bi_opf;
689 bio->bi_write_hint = bio_src->bi_write_hint;
685 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; 690 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
686 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; 691 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
687 692
@@ -924,7 +929,7 @@ static void submit_bio_wait_endio(struct bio *bio)
924{ 929{
925 struct submit_bio_ret *ret = bio->bi_private; 930 struct submit_bio_ret *ret = bio->bi_private;
926 931
927 ret->error = bio->bi_error; 932 ret->error = blk_status_to_errno(bio->bi_status);
928 complete(&ret->event); 933 complete(&ret->event);
929} 934}
930 935
@@ -1823,8 +1828,8 @@ again:
1823 } 1828 }
1824 1829
1825 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { 1830 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1826 trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), 1831 trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
1827 bio, bio->bi_error); 1832 blk_status_to_errno(bio->bi_status));
1828 bio_clear_flag(bio, BIO_TRACE_COMPLETION); 1833 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
1829 } 1834 }
1830 1835
@@ -1927,9 +1932,29 @@ void bioset_free(struct bio_set *bs)
1927} 1932}
1928EXPORT_SYMBOL(bioset_free); 1933EXPORT_SYMBOL(bioset_free);
1929 1934
1930static struct bio_set *__bioset_create(unsigned int pool_size, 1935/**
1931 unsigned int front_pad, 1936 * bioset_create - Create a bio_set
1932 bool create_bvec_pool) 1937 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1938 * @front_pad: Number of bytes to allocate in front of the returned bio
1939 * @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
1940 * and %BIOSET_NEED_RESCUER
1941 *
1942 * Description:
1943 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1944 * to ask for a number of bytes to be allocated in front of the bio.
1945 * Front pad allocation is useful for embedding the bio inside
1946 * another structure, to avoid allocating extra data to go with the bio.
1947 * Note that the bio must be embedded at the END of that structure always,
1948 * or things will break badly.
1949 * If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
1950 * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
1951 * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
1952 * dispatch queued requests when the mempool runs out of space.
1953 *
1954 */
1955struct bio_set *bioset_create(unsigned int pool_size,
1956 unsigned int front_pad,
1957 int flags)
1933{ 1958{
1934 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); 1959 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1935 struct bio_set *bs; 1960 struct bio_set *bs;
@@ -1954,12 +1979,15 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
1954 if (!bs->bio_pool) 1979 if (!bs->bio_pool)
1955 goto bad; 1980 goto bad;
1956 1981
1957 if (create_bvec_pool) { 1982 if (flags & BIOSET_NEED_BVECS) {
1958 bs->bvec_pool = biovec_create_pool(pool_size); 1983 bs->bvec_pool = biovec_create_pool(pool_size);
1959 if (!bs->bvec_pool) 1984 if (!bs->bvec_pool)
1960 goto bad; 1985 goto bad;
1961 } 1986 }
1962 1987
1988 if (!(flags & BIOSET_NEED_RESCUER))
1989 return bs;
1990
1963 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); 1991 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1964 if (!bs->rescue_workqueue) 1992 if (!bs->rescue_workqueue)
1965 goto bad; 1993 goto bad;
@@ -1969,41 +1997,8 @@ bad:
1969 bioset_free(bs); 1997 bioset_free(bs);
1970 return NULL; 1998 return NULL;
1971} 1999}
1972
1973/**
1974 * bioset_create - Create a bio_set
1975 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1976 * @front_pad: Number of bytes to allocate in front of the returned bio
1977 *
1978 * Description:
1979 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1980 * to ask for a number of bytes to be allocated in front of the bio.
1981 * Front pad allocation is useful for embedding the bio inside
1982 * another structure, to avoid allocating extra data to go with the bio.
1983 * Note that the bio must be embedded at the END of that structure always,
1984 * or things will break badly.
1985 */
1986struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1987{
1988 return __bioset_create(pool_size, front_pad, true);
1989}
1990EXPORT_SYMBOL(bioset_create); 2000EXPORT_SYMBOL(bioset_create);
1991 2001
1992/**
1993 * bioset_create_nobvec - Create a bio_set without bio_vec mempool
1994 * @pool_size: Number of bio to cache in the mempool
1995 * @front_pad: Number of bytes to allocate in front of the returned bio
1996 *
1997 * Description:
1998 * Same functionality as bioset_create() except that mempool is not
1999 * created for bio_vecs. Saving some memory for bio_clone_fast() users.
2000 */
2001struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
2002{
2003 return __bioset_create(pool_size, front_pad, false);
2004}
2005EXPORT_SYMBOL(bioset_create_nobvec);
2006
2007#ifdef CONFIG_BLK_CGROUP 2002#ifdef CONFIG_BLK_CGROUP
2008 2003
2009/** 2004/**
@@ -2118,7 +2113,7 @@ static int __init init_bio(void)
2118 bio_integrity_init(); 2113 bio_integrity_init();
2119 biovec_init_slabs(); 2114 biovec_init_slabs();
2120 2115
2121 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 2116 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
2122 if (!fs_bio_set) 2117 if (!fs_bio_set)
2123 panic("bio: can't allocate bios\n"); 2118 panic("bio: can't allocate bios\n");
2124 2119
diff --git a/block/blk-core.c b/block/blk-core.c
index a7421b772d0e..af393d5a9680 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -129,11 +129,70 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
129} 129}
130EXPORT_SYMBOL(blk_rq_init); 130EXPORT_SYMBOL(blk_rq_init);
131 131
132static const struct {
133 int errno;
134 const char *name;
135} blk_errors[] = {
136 [BLK_STS_OK] = { 0, "" },
137 [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
138 [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
139 [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
140 [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
141 [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
142 [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
143 [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
144 [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
145 [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
146 [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
147
148 /* device mapper special case, should not leak out: */
149 [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
150
151 /* everything else not covered above: */
152 [BLK_STS_IOERR] = { -EIO, "I/O" },
153};
154
155blk_status_t errno_to_blk_status(int errno)
156{
157 int i;
158
159 for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
160 if (blk_errors[i].errno == errno)
161 return (__force blk_status_t)i;
162 }
163
164 return BLK_STS_IOERR;
165}
166EXPORT_SYMBOL_GPL(errno_to_blk_status);
167
168int blk_status_to_errno(blk_status_t status)
169{
170 int idx = (__force int)status;
171
172 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
173 return -EIO;
174 return blk_errors[idx].errno;
175}
176EXPORT_SYMBOL_GPL(blk_status_to_errno);
177
178static void print_req_error(struct request *req, blk_status_t status)
179{
180 int idx = (__force int)status;
181
182 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
183 return;
184
185 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
186 __func__, blk_errors[idx].name, req->rq_disk ?
187 req->rq_disk->disk_name : "?",
188 (unsigned long long)blk_rq_pos(req));
189}
190
132static void req_bio_endio(struct request *rq, struct bio *bio, 191static void req_bio_endio(struct request *rq, struct bio *bio,
133 unsigned int nbytes, int error) 192 unsigned int nbytes, blk_status_t error)
134{ 193{
135 if (error) 194 if (error)
136 bio->bi_error = error; 195 bio->bi_status = error;
137 196
138 if (unlikely(rq->rq_flags & RQF_QUIET)) 197 if (unlikely(rq->rq_flags & RQF_QUIET))
139 bio_set_flag(bio, BIO_QUIET); 198 bio_set_flag(bio, BIO_QUIET);
@@ -177,10 +236,13 @@ static void blk_delay_work(struct work_struct *work)
177 * Description: 236 * Description:
178 * Sometimes queueing needs to be postponed for a little while, to allow 237 * Sometimes queueing needs to be postponed for a little while, to allow
179 * resources to come back. This function will make sure that queueing is 238 * resources to come back. This function will make sure that queueing is
180 * restarted around the specified time. Queue lock must be held. 239 * restarted around the specified time.
181 */ 240 */
182void blk_delay_queue(struct request_queue *q, unsigned long msecs) 241void blk_delay_queue(struct request_queue *q, unsigned long msecs)
183{ 242{
243 lockdep_assert_held(q->queue_lock);
244 WARN_ON_ONCE(q->mq_ops);
245
184 if (likely(!blk_queue_dead(q))) 246 if (likely(!blk_queue_dead(q)))
185 queue_delayed_work(kblockd_workqueue, &q->delay_work, 247 queue_delayed_work(kblockd_workqueue, &q->delay_work,
186 msecs_to_jiffies(msecs)); 248 msecs_to_jiffies(msecs));
@@ -198,6 +260,9 @@ EXPORT_SYMBOL(blk_delay_queue);
198 **/ 260 **/
199void blk_start_queue_async(struct request_queue *q) 261void blk_start_queue_async(struct request_queue *q)
200{ 262{
263 lockdep_assert_held(q->queue_lock);
264 WARN_ON_ONCE(q->mq_ops);
265
201 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 266 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
202 blk_run_queue_async(q); 267 blk_run_queue_async(q);
203} 268}
@@ -210,11 +275,13 @@ EXPORT_SYMBOL(blk_start_queue_async);
210 * Description: 275 * Description:
211 * blk_start_queue() will clear the stop flag on the queue, and call 276 * blk_start_queue() will clear the stop flag on the queue, and call
212 * the request_fn for the queue if it was in a stopped state when 277 * the request_fn for the queue if it was in a stopped state when
213 * entered. Also see blk_stop_queue(). Queue lock must be held. 278 * entered. Also see blk_stop_queue().
214 **/ 279 **/
215void blk_start_queue(struct request_queue *q) 280void blk_start_queue(struct request_queue *q)
216{ 281{
282 lockdep_assert_held(q->queue_lock);
217 WARN_ON(!irqs_disabled()); 283 WARN_ON(!irqs_disabled());
284 WARN_ON_ONCE(q->mq_ops);
218 285
219 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 286 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
220 __blk_run_queue(q); 287 __blk_run_queue(q);
@@ -233,10 +300,13 @@ EXPORT_SYMBOL(blk_start_queue);
233 * or if it simply chooses not to queue more I/O at one point, it can 300 * or if it simply chooses not to queue more I/O at one point, it can
234 * call this function to prevent the request_fn from being called until 301 * call this function to prevent the request_fn from being called until
235 * the driver has signalled it's ready to go again. This happens by calling 302 * the driver has signalled it's ready to go again. This happens by calling
236 * blk_start_queue() to restart queue operations. Queue lock must be held. 303 * blk_start_queue() to restart queue operations.
237 **/ 304 **/
238void blk_stop_queue(struct request_queue *q) 305void blk_stop_queue(struct request_queue *q)
239{ 306{
307 lockdep_assert_held(q->queue_lock);
308 WARN_ON_ONCE(q->mq_ops);
309
240 cancel_delayed_work(&q->delay_work); 310 cancel_delayed_work(&q->delay_work);
241 queue_flag_set(QUEUE_FLAG_STOPPED, q); 311 queue_flag_set(QUEUE_FLAG_STOPPED, q);
242} 312}
@@ -289,6 +359,9 @@ EXPORT_SYMBOL(blk_sync_queue);
289 */ 359 */
290inline void __blk_run_queue_uncond(struct request_queue *q) 360inline void __blk_run_queue_uncond(struct request_queue *q)
291{ 361{
362 lockdep_assert_held(q->queue_lock);
363 WARN_ON_ONCE(q->mq_ops);
364
292 if (unlikely(blk_queue_dead(q))) 365 if (unlikely(blk_queue_dead(q)))
293 return; 366 return;
294 367
@@ -310,11 +383,13 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
310 * @q: The queue to run 383 * @q: The queue to run
311 * 384 *
312 * Description: 385 * Description:
313 * See @blk_run_queue. This variant must be called with the queue lock 386 * See @blk_run_queue.
314 * held and interrupts disabled.
315 */ 387 */
316void __blk_run_queue(struct request_queue *q) 388void __blk_run_queue(struct request_queue *q)
317{ 389{
390 lockdep_assert_held(q->queue_lock);
391 WARN_ON_ONCE(q->mq_ops);
392
318 if (unlikely(blk_queue_stopped(q))) 393 if (unlikely(blk_queue_stopped(q)))
319 return; 394 return;
320 395
@@ -328,10 +403,18 @@ EXPORT_SYMBOL(__blk_run_queue);
328 * 403 *
329 * Description: 404 * Description:
330 * Tells kblockd to perform the equivalent of @blk_run_queue on behalf 405 * Tells kblockd to perform the equivalent of @blk_run_queue on behalf
331 * of us. The caller must hold the queue lock. 406 * of us.
407 *
408 * Note:
409 * Since it is not allowed to run q->delay_work after blk_cleanup_queue()
410 * has canceled q->delay_work, callers must hold the queue lock to avoid
411 * race conditions between blk_cleanup_queue() and blk_run_queue_async().
332 */ 412 */
333void blk_run_queue_async(struct request_queue *q) 413void blk_run_queue_async(struct request_queue *q)
334{ 414{
415 lockdep_assert_held(q->queue_lock);
416 WARN_ON_ONCE(q->mq_ops);
417
335 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q))) 418 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
336 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); 419 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
337} 420}
@@ -349,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
349{ 432{
350 unsigned long flags; 433 unsigned long flags;
351 434
435 WARN_ON_ONCE(q->mq_ops);
436
352 spin_lock_irqsave(q->queue_lock, flags); 437 spin_lock_irqsave(q->queue_lock, flags);
353 __blk_run_queue(q); 438 __blk_run_queue(q);
354 spin_unlock_irqrestore(q->queue_lock, flags); 439 spin_unlock_irqrestore(q->queue_lock, flags);
@@ -377,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
377 int i; 462 int i;
378 463
379 lockdep_assert_held(q->queue_lock); 464 lockdep_assert_held(q->queue_lock);
465 WARN_ON_ONCE(q->mq_ops);
380 466
381 while (true) { 467 while (true) {
382 bool drain = false; 468 bool drain = false;
@@ -455,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
455 */ 541 */
456void blk_queue_bypass_start(struct request_queue *q) 542void blk_queue_bypass_start(struct request_queue *q)
457{ 543{
544 WARN_ON_ONCE(q->mq_ops);
545
458 spin_lock_irq(q->queue_lock); 546 spin_lock_irq(q->queue_lock);
459 q->bypass_depth++; 547 q->bypass_depth++;
460 queue_flag_set(QUEUE_FLAG_BYPASS, q); 548 queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -481,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
481 * @q: queue of interest 569 * @q: queue of interest
482 * 570 *
483 * Leave bypass mode and restore the normal queueing behavior. 571 * Leave bypass mode and restore the normal queueing behavior.
572 *
573 * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
574 * this function is called for both blk-sq and blk-mq queues.
484 */ 575 */
485void blk_queue_bypass_end(struct request_queue *q) 576void blk_queue_bypass_end(struct request_queue *q)
486{ 577{
@@ -732,7 +823,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
732 if (q->id < 0) 823 if (q->id < 0)
733 goto fail_q; 824 goto fail_q;
734 825
735 q->bio_split = bioset_create(BIO_POOL_SIZE, 0); 826 q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
736 if (!q->bio_split) 827 if (!q->bio_split)
737 goto fail_id; 828 goto fail_id;
738 829
@@ -878,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
878 969
879int blk_init_allocated_queue(struct request_queue *q) 970int blk_init_allocated_queue(struct request_queue *q)
880{ 971{
972 WARN_ON_ONCE(q->mq_ops);
973
881 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); 974 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
882 if (!q->fq) 975 if (!q->fq)
883 return -ENOMEM; 976 return -ENOMEM;
@@ -1015,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
1015 struct request_list *rl; 1108 struct request_list *rl;
1016 int on_thresh, off_thresh; 1109 int on_thresh, off_thresh;
1017 1110
1111 WARN_ON_ONCE(q->mq_ops);
1112
1018 spin_lock_irq(q->queue_lock); 1113 spin_lock_irq(q->queue_lock);
1019 q->nr_requests = nr; 1114 q->nr_requests = nr;
1020 blk_queue_congestion_threshold(q); 1115 blk_queue_congestion_threshold(q);
@@ -1077,6 +1172,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
1077 int may_queue; 1172 int may_queue;
1078 req_flags_t rq_flags = RQF_ALLOCED; 1173 req_flags_t rq_flags = RQF_ALLOCED;
1079 1174
1175 lockdep_assert_held(q->queue_lock);
1176
1080 if (unlikely(blk_queue_dying(q))) 1177 if (unlikely(blk_queue_dying(q)))
1081 return ERR_PTR(-ENODEV); 1178 return ERR_PTR(-ENODEV);
1082 1179
@@ -1250,12 +1347,20 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
1250 struct request_list *rl; 1347 struct request_list *rl;
1251 struct request *rq; 1348 struct request *rq;
1252 1349
1350 lockdep_assert_held(q->queue_lock);
1351 WARN_ON_ONCE(q->mq_ops);
1352
1253 rl = blk_get_rl(q, bio); /* transferred to @rq on success */ 1353 rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1254retry: 1354retry:
1255 rq = __get_request(rl, op, bio, gfp_mask); 1355 rq = __get_request(rl, op, bio, gfp_mask);
1256 if (!IS_ERR(rq)) 1356 if (!IS_ERR(rq))
1257 return rq; 1357 return rq;
1258 1358
1359 if (op & REQ_NOWAIT) {
1360 blk_put_rl(rl);
1361 return ERR_PTR(-EAGAIN);
1362 }
1363
1259 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) { 1364 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
1260 blk_put_rl(rl); 1365 blk_put_rl(rl);
1261 return rq; 1366 return rq;
@@ -1283,16 +1388,18 @@ retry:
1283 goto retry; 1388 goto retry;
1284} 1389}
1285 1390
1286static struct request *blk_old_get_request(struct request_queue *q, int rw, 1391static struct request *blk_old_get_request(struct request_queue *q,
1287 gfp_t gfp_mask) 1392 unsigned int op, gfp_t gfp_mask)
1288{ 1393{
1289 struct request *rq; 1394 struct request *rq;
1290 1395
1396 WARN_ON_ONCE(q->mq_ops);
1397
1291 /* create ioc upfront */ 1398 /* create ioc upfront */
1292 create_io_context(gfp_mask, q->node); 1399 create_io_context(gfp_mask, q->node);
1293 1400
1294 spin_lock_irq(q->queue_lock); 1401 spin_lock_irq(q->queue_lock);
1295 rq = get_request(q, rw, NULL, gfp_mask); 1402 rq = get_request(q, op, NULL, gfp_mask);
1296 if (IS_ERR(rq)) { 1403 if (IS_ERR(rq)) {
1297 spin_unlock_irq(q->queue_lock); 1404 spin_unlock_irq(q->queue_lock);
1298 return rq; 1405 return rq;
@@ -1305,14 +1412,24 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1305 return rq; 1412 return rq;
1306} 1413}
1307 1414
1308struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1415struct request *blk_get_request(struct request_queue *q, unsigned int op,
1416 gfp_t gfp_mask)
1309{ 1417{
1310 if (q->mq_ops) 1418 struct request *req;
1311 return blk_mq_alloc_request(q, rw, 1419
1420 if (q->mq_ops) {
1421 req = blk_mq_alloc_request(q, op,
1312 (gfp_mask & __GFP_DIRECT_RECLAIM) ? 1422 (gfp_mask & __GFP_DIRECT_RECLAIM) ?
1313 0 : BLK_MQ_REQ_NOWAIT); 1423 0 : BLK_MQ_REQ_NOWAIT);
1314 else 1424 if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
1315 return blk_old_get_request(q, rw, gfp_mask); 1425 q->mq_ops->initialize_rq_fn(req);
1426 } else {
1427 req = blk_old_get_request(q, op, gfp_mask);
1428 if (!IS_ERR(req) && q->initialize_rq_fn)
1429 q->initialize_rq_fn(req);
1430 }
1431
1432 return req;
1316} 1433}
1317EXPORT_SYMBOL(blk_get_request); 1434EXPORT_SYMBOL(blk_get_request);
1318 1435
@@ -1328,6 +1445,9 @@ EXPORT_SYMBOL(blk_get_request);
1328 */ 1445 */
1329void blk_requeue_request(struct request_queue *q, struct request *rq) 1446void blk_requeue_request(struct request_queue *q, struct request *rq)
1330{ 1447{
1448 lockdep_assert_held(q->queue_lock);
1449 WARN_ON_ONCE(q->mq_ops);
1450
1331 blk_delete_timer(rq); 1451 blk_delete_timer(rq);
1332 blk_clear_rq_complete(rq); 1452 blk_clear_rq_complete(rq);
1333 trace_block_rq_requeue(q, rq); 1453 trace_block_rq_requeue(q, rq);
@@ -1402,9 +1522,6 @@ static void blk_pm_put_request(struct request *rq)
1402static inline void blk_pm_put_request(struct request *rq) {} 1522static inline void blk_pm_put_request(struct request *rq) {}
1403#endif 1523#endif
1404 1524
1405/*
1406 * queue lock must be held
1407 */
1408void __blk_put_request(struct request_queue *q, struct request *req) 1525void __blk_put_request(struct request_queue *q, struct request *req)
1409{ 1526{
1410 req_flags_t rq_flags = req->rq_flags; 1527 req_flags_t rq_flags = req->rq_flags;
@@ -1417,6 +1534,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1417 return; 1534 return;
1418 } 1535 }
1419 1536
1537 lockdep_assert_held(q->queue_lock);
1538
1420 blk_pm_put_request(req); 1539 blk_pm_put_request(req);
1421 1540
1422 elv_completed_request(q, req); 1541 elv_completed_request(q, req);
@@ -1646,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
1646 req->ioprio = ioc->ioprio; 1765 req->ioprio = ioc->ioprio;
1647 else 1766 else
1648 req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 1767 req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
1768 req->write_hint = bio->bi_write_hint;
1649 blk_rq_bio_prep(req->q, req, bio); 1769 blk_rq_bio_prep(req->q, req, bio);
1650} 1770}
1651EXPORT_SYMBOL_GPL(blk_init_request_from_bio); 1771EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -1665,10 +1785,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1665 */ 1785 */
1666 blk_queue_bounce(q, &bio); 1786 blk_queue_bounce(q, &bio);
1667 1787
1668 blk_queue_split(q, &bio, q->bio_split); 1788 blk_queue_split(q, &bio);
1669 1789
1670 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1790 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1671 bio->bi_error = -EIO; 1791 bio->bi_status = BLK_STS_IOERR;
1672 bio_endio(bio); 1792 bio_endio(bio);
1673 return BLK_QC_T_NONE; 1793 return BLK_QC_T_NONE;
1674 } 1794 }
@@ -1726,7 +1846,10 @@ get_rq:
1726 req = get_request(q, bio->bi_opf, bio, GFP_NOIO); 1846 req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
1727 if (IS_ERR(req)) { 1847 if (IS_ERR(req)) {
1728 __wbt_done(q->rq_wb, wb_acct); 1848 __wbt_done(q->rq_wb, wb_acct);
1729 bio->bi_error = PTR_ERR(req); 1849 if (PTR_ERR(req) == -ENOMEM)
1850 bio->bi_status = BLK_STS_RESOURCE;
1851 else
1852 bio->bi_status = BLK_STS_IOERR;
1730 bio_endio(bio); 1853 bio_endio(bio);
1731 goto out_unlock; 1854 goto out_unlock;
1732 } 1855 }
@@ -1881,7 +2004,7 @@ generic_make_request_checks(struct bio *bio)
1881{ 2004{
1882 struct request_queue *q; 2005 struct request_queue *q;
1883 int nr_sectors = bio_sectors(bio); 2006 int nr_sectors = bio_sectors(bio);
1884 int err = -EIO; 2007 blk_status_t status = BLK_STS_IOERR;
1885 char b[BDEVNAME_SIZE]; 2008 char b[BDEVNAME_SIZE];
1886 struct hd_struct *part; 2009 struct hd_struct *part;
1887 2010
@@ -1900,6 +2023,14 @@ generic_make_request_checks(struct bio *bio)
1900 goto end_io; 2023 goto end_io;
1901 } 2024 }
1902 2025
2026 /*
2027 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
2028 * if queue is not a request based queue.
2029 */
2030
2031 if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
2032 goto not_supported;
2033
1903 part = bio->bi_bdev->bd_part; 2034 part = bio->bi_bdev->bd_part;
1904 if (should_fail_request(part, bio->bi_iter.bi_size) || 2035 if (should_fail_request(part, bio->bi_iter.bi_size) ||
1905 should_fail_request(&part_to_disk(part)->part0, 2036 should_fail_request(&part_to_disk(part)->part0,
@@ -1924,7 +2055,7 @@ generic_make_request_checks(struct bio *bio)
1924 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { 2055 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1925 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); 2056 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
1926 if (!nr_sectors) { 2057 if (!nr_sectors) {
1927 err = 0; 2058 status = BLK_STS_OK;
1928 goto end_io; 2059 goto end_io;
1929 } 2060 }
1930 } 2061 }
@@ -1976,9 +2107,9 @@ generic_make_request_checks(struct bio *bio)
1976 return true; 2107 return true;
1977 2108
1978not_supported: 2109not_supported:
1979 err = -EOPNOTSUPP; 2110 status = BLK_STS_NOTSUPP;
1980end_io: 2111end_io:
1981 bio->bi_error = err; 2112 bio->bi_status = status;
1982 bio_endio(bio); 2113 bio_endio(bio);
1983 return false; 2114 return false;
1984} 2115}
@@ -2057,7 +2188,7 @@ blk_qc_t generic_make_request(struct bio *bio)
2057 do { 2188 do {
2058 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 2189 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2059 2190
2060 if (likely(blk_queue_enter(q, false) == 0)) { 2191 if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
2061 struct bio_list lower, same; 2192 struct bio_list lower, same;
2062 2193
2063 /* Create a fresh bio_list for all subordinate requests */ 2194 /* Create a fresh bio_list for all subordinate requests */
@@ -2082,7 +2213,11 @@ blk_qc_t generic_make_request(struct bio *bio)
2082 bio_list_merge(&bio_list_on_stack[0], &same); 2213 bio_list_merge(&bio_list_on_stack[0], &same);
2083 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); 2214 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
2084 } else { 2215 } else {
2085 bio_io_error(bio); 2216 if (unlikely(!blk_queue_dying(q) &&
2217 (bio->bi_opf & REQ_NOWAIT)))
2218 bio_wouldblock_error(bio);
2219 else
2220 bio_io_error(bio);
2086 } 2221 }
2087 bio = bio_list_pop(&bio_list_on_stack[0]); 2222 bio = bio_list_pop(&bio_list_on_stack[0]);
2088 } while (bio); 2223 } while (bio);
@@ -2183,29 +2318,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
2183 * @q: the queue to submit the request 2318 * @q: the queue to submit the request
2184 * @rq: the request being queued 2319 * @rq: the request being queued
2185 */ 2320 */
2186int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 2321blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2187{ 2322{
2188 unsigned long flags; 2323 unsigned long flags;
2189 int where = ELEVATOR_INSERT_BACK; 2324 int where = ELEVATOR_INSERT_BACK;
2190 2325
2191 if (blk_cloned_rq_check_limits(q, rq)) 2326 if (blk_cloned_rq_check_limits(q, rq))
2192 return -EIO; 2327 return BLK_STS_IOERR;
2193 2328
2194 if (rq->rq_disk && 2329 if (rq->rq_disk &&
2195 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) 2330 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2196 return -EIO; 2331 return BLK_STS_IOERR;
2197 2332
2198 if (q->mq_ops) { 2333 if (q->mq_ops) {
2199 if (blk_queue_io_stat(q)) 2334 if (blk_queue_io_stat(q))
2200 blk_account_io_start(rq, true); 2335 blk_account_io_start(rq, true);
2201 blk_mq_sched_insert_request(rq, false, true, false, false); 2336 blk_mq_sched_insert_request(rq, false, true, false, false);
2202 return 0; 2337 return BLK_STS_OK;
2203 } 2338 }
2204 2339
2205 spin_lock_irqsave(q->queue_lock, flags); 2340 spin_lock_irqsave(q->queue_lock, flags);
2206 if (unlikely(blk_queue_dying(q))) { 2341 if (unlikely(blk_queue_dying(q))) {
2207 spin_unlock_irqrestore(q->queue_lock, flags); 2342 spin_unlock_irqrestore(q->queue_lock, flags);
2208 return -ENODEV; 2343 return BLK_STS_IOERR;
2209 } 2344 }
2210 2345
2211 /* 2346 /*
@@ -2222,7 +2357,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2222 __blk_run_queue(q); 2357 __blk_run_queue(q);
2223 spin_unlock_irqrestore(q->queue_lock, flags); 2358 spin_unlock_irqrestore(q->queue_lock, flags);
2224 2359
2225 return 0; 2360 return BLK_STS_OK;
2226} 2361}
2227EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 2362EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2228 2363
@@ -2238,9 +2373,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2238 * 2373 *
2239 * Return: 2374 * Return:
2240 * The number of bytes to fail. 2375 * The number of bytes to fail.
2241 *
2242 * Context:
2243 * queue_lock must be held.
2244 */ 2376 */
2245unsigned int blk_rq_err_bytes(const struct request *rq) 2377unsigned int blk_rq_err_bytes(const struct request *rq)
2246{ 2378{
@@ -2380,15 +2512,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
2380 * Return: 2512 * Return:
2381 * Pointer to the request at the top of @q if available. Null 2513 * Pointer to the request at the top of @q if available. Null
2382 * otherwise. 2514 * otherwise.
2383 *
2384 * Context:
2385 * queue_lock must be held.
2386 */ 2515 */
2387struct request *blk_peek_request(struct request_queue *q) 2516struct request *blk_peek_request(struct request_queue *q)
2388{ 2517{
2389 struct request *rq; 2518 struct request *rq;
2390 int ret; 2519 int ret;
2391 2520
2521 lockdep_assert_held(q->queue_lock);
2522 WARN_ON_ONCE(q->mq_ops);
2523
2392 while ((rq = __elv_next_request(q)) != NULL) { 2524 while ((rq = __elv_next_request(q)) != NULL) {
2393 2525
2394 rq = blk_pm_peek_request(q, rq); 2526 rq = blk_pm_peek_request(q, rq);
@@ -2456,15 +2588,14 @@ struct request *blk_peek_request(struct request_queue *q)
2456 rq = NULL; 2588 rq = NULL;
2457 break; 2589 break;
2458 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { 2590 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
2459 int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
2460
2461 rq->rq_flags |= RQF_QUIET; 2591 rq->rq_flags |= RQF_QUIET;
2462 /* 2592 /*
2463 * Mark this request as started so we don't trigger 2593 * Mark this request as started so we don't trigger
2464 * any debug logic in the end I/O path. 2594 * any debug logic in the end I/O path.
2465 */ 2595 */
2466 blk_start_request(rq); 2596 blk_start_request(rq);
2467 __blk_end_request_all(rq, err); 2597 __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
2598 BLK_STS_TARGET : BLK_STS_IOERR);
2468 } else { 2599 } else {
2469 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 2600 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
2470 break; 2601 break;
@@ -2505,12 +2636,12 @@ void blk_dequeue_request(struct request *rq)
2505 * 2636 *
2506 * Block internal functions which don't want to start timer should 2637 * Block internal functions which don't want to start timer should
2507 * call blk_dequeue_request(). 2638 * call blk_dequeue_request().
2508 *
2509 * Context:
2510 * queue_lock must be held.
2511 */ 2639 */
2512void blk_start_request(struct request *req) 2640void blk_start_request(struct request *req)
2513{ 2641{
2642 lockdep_assert_held(req->q->queue_lock);
2643 WARN_ON_ONCE(req->q->mq_ops);
2644
2514 blk_dequeue_request(req); 2645 blk_dequeue_request(req);
2515 2646
2516 if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { 2647 if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2535,14 +2666,14 @@ EXPORT_SYMBOL(blk_start_request);
2535 * Return: 2666 * Return:
2536 * Pointer to the request at the top of @q if available. Null 2667 * Pointer to the request at the top of @q if available. Null
2537 * otherwise. 2668 * otherwise.
2538 *
2539 * Context:
2540 * queue_lock must be held.
2541 */ 2669 */
2542struct request *blk_fetch_request(struct request_queue *q) 2670struct request *blk_fetch_request(struct request_queue *q)
2543{ 2671{
2544 struct request *rq; 2672 struct request *rq;
2545 2673
2674 lockdep_assert_held(q->queue_lock);
2675 WARN_ON_ONCE(q->mq_ops);
2676
2546 rq = blk_peek_request(q); 2677 rq = blk_peek_request(q);
2547 if (rq) 2678 if (rq)
2548 blk_start_request(rq); 2679 blk_start_request(rq);
@@ -2553,7 +2684,7 @@ EXPORT_SYMBOL(blk_fetch_request);
2553/** 2684/**
2554 * blk_update_request - Special helper function for request stacking drivers 2685 * blk_update_request - Special helper function for request stacking drivers
2555 * @req: the request being processed 2686 * @req: the request being processed
2556 * @error: %0 for success, < %0 for error 2687 * @error: block status code
2557 * @nr_bytes: number of bytes to complete @req 2688 * @nr_bytes: number of bytes to complete @req
2558 * 2689 *
2559 * Description: 2690 * Description:
@@ -2572,49 +2703,19 @@ EXPORT_SYMBOL(blk_fetch_request);
2572 * %false - this request doesn't have any more data 2703 * %false - this request doesn't have any more data
2573 * %true - this request has more data 2704 * %true - this request has more data
2574 **/ 2705 **/
2575bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) 2706bool blk_update_request(struct request *req, blk_status_t error,
2707 unsigned int nr_bytes)
2576{ 2708{
2577 int total_bytes; 2709 int total_bytes;
2578 2710
2579 trace_block_rq_complete(req, error, nr_bytes); 2711 trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
2580 2712
2581 if (!req->bio) 2713 if (!req->bio)
2582 return false; 2714 return false;
2583 2715
2584 if (error && !blk_rq_is_passthrough(req) && 2716 if (unlikely(error && !blk_rq_is_passthrough(req) &&
2585 !(req->rq_flags & RQF_QUIET)) { 2717 !(req->rq_flags & RQF_QUIET)))
2586 char *error_type; 2718 print_req_error(req, error);
2587
2588 switch (error) {
2589 case -ENOLINK:
2590 error_type = "recoverable transport";
2591 break;
2592 case -EREMOTEIO:
2593 error_type = "critical target";
2594 break;
2595 case -EBADE:
2596 error_type = "critical nexus";
2597 break;
2598 case -ETIMEDOUT:
2599 error_type = "timeout";
2600 break;
2601 case -ENOSPC:
2602 error_type = "critical space allocation";
2603 break;
2604 case -ENODATA:
2605 error_type = "critical medium";
2606 break;
2607 case -EIO:
2608 default:
2609 error_type = "I/O";
2610 break;
2611 }
2612 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
2613 __func__, error_type, req->rq_disk ?
2614 req->rq_disk->disk_name : "?",
2615 (unsigned long long)blk_rq_pos(req));
2616
2617 }
2618 2719
2619 blk_account_io_completion(req, nr_bytes); 2720 blk_account_io_completion(req, nr_bytes);
2620 2721
@@ -2680,7 +2781,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2680} 2781}
2681EXPORT_SYMBOL_GPL(blk_update_request); 2782EXPORT_SYMBOL_GPL(blk_update_request);
2682 2783
2683static bool blk_update_bidi_request(struct request *rq, int error, 2784static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
2684 unsigned int nr_bytes, 2785 unsigned int nr_bytes,
2685 unsigned int bidi_bytes) 2786 unsigned int bidi_bytes)
2686{ 2787{
@@ -2718,13 +2819,13 @@ void blk_unprep_request(struct request *req)
2718} 2819}
2719EXPORT_SYMBOL_GPL(blk_unprep_request); 2820EXPORT_SYMBOL_GPL(blk_unprep_request);
2720 2821
2721/* 2822void blk_finish_request(struct request *req, blk_status_t error)
2722 * queue lock must be held
2723 */
2724void blk_finish_request(struct request *req, int error)
2725{ 2823{
2726 struct request_queue *q = req->q; 2824 struct request_queue *q = req->q;
2727 2825
2826 lockdep_assert_held(req->q->queue_lock);
2827 WARN_ON_ONCE(q->mq_ops);
2828
2728 if (req->rq_flags & RQF_STATS) 2829 if (req->rq_flags & RQF_STATS)
2729 blk_stat_add(req); 2830 blk_stat_add(req);
2730 2831
@@ -2758,7 +2859,7 @@ EXPORT_SYMBOL(blk_finish_request);
2758/** 2859/**
2759 * blk_end_bidi_request - Complete a bidi request 2860 * blk_end_bidi_request - Complete a bidi request
2760 * @rq: the request to complete 2861 * @rq: the request to complete
2761 * @error: %0 for success, < %0 for error 2862 * @error: block status code
2762 * @nr_bytes: number of bytes to complete @rq 2863 * @nr_bytes: number of bytes to complete @rq
2763 * @bidi_bytes: number of bytes to complete @rq->next_rq 2864 * @bidi_bytes: number of bytes to complete @rq->next_rq
2764 * 2865 *
@@ -2772,12 +2873,14 @@ EXPORT_SYMBOL(blk_finish_request);
2772 * %false - we are done with this request 2873 * %false - we are done with this request
2773 * %true - still buffers pending for this request 2874 * %true - still buffers pending for this request
2774 **/ 2875 **/
2775static bool blk_end_bidi_request(struct request *rq, int error, 2876static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
2776 unsigned int nr_bytes, unsigned int bidi_bytes) 2877 unsigned int nr_bytes, unsigned int bidi_bytes)
2777{ 2878{
2778 struct request_queue *q = rq->q; 2879 struct request_queue *q = rq->q;
2779 unsigned long flags; 2880 unsigned long flags;
2780 2881
2882 WARN_ON_ONCE(q->mq_ops);
2883
2781 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2884 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2782 return true; 2885 return true;
2783 2886
@@ -2791,7 +2894,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2791/** 2894/**
2792 * __blk_end_bidi_request - Complete a bidi request with queue lock held 2895 * __blk_end_bidi_request - Complete a bidi request with queue lock held
2793 * @rq: the request to complete 2896 * @rq: the request to complete
2794 * @error: %0 for success, < %0 for error 2897 * @error: block status code
2795 * @nr_bytes: number of bytes to complete @rq 2898 * @nr_bytes: number of bytes to complete @rq
2796 * @bidi_bytes: number of bytes to complete @rq->next_rq 2899 * @bidi_bytes: number of bytes to complete @rq->next_rq
2797 * 2900 *
@@ -2803,9 +2906,12 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2803 * %false - we are done with this request 2906 * %false - we are done with this request
2804 * %true - still buffers pending for this request 2907 * %true - still buffers pending for this request
2805 **/ 2908 **/
2806static bool __blk_end_bidi_request(struct request *rq, int error, 2909static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
2807 unsigned int nr_bytes, unsigned int bidi_bytes) 2910 unsigned int nr_bytes, unsigned int bidi_bytes)
2808{ 2911{
2912 lockdep_assert_held(rq->q->queue_lock);
2913 WARN_ON_ONCE(rq->q->mq_ops);
2914
2809 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2915 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2810 return true; 2916 return true;
2811 2917
@@ -2817,7 +2923,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
2817/** 2923/**
2818 * blk_end_request - Helper function for drivers to complete the request. 2924 * blk_end_request - Helper function for drivers to complete the request.
2819 * @rq: the request being processed 2925 * @rq: the request being processed
2820 * @error: %0 for success, < %0 for error 2926 * @error: block status code
2821 * @nr_bytes: number of bytes to complete 2927 * @nr_bytes: number of bytes to complete
2822 * 2928 *
2823 * Description: 2929 * Description:
@@ -2828,8 +2934,10 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
2828 * %false - we are done with this request 2934 * %false - we are done with this request
2829 * %true - still buffers pending for this request 2935 * %true - still buffers pending for this request
2830 **/ 2936 **/
2831bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2937bool blk_end_request(struct request *rq, blk_status_t error,
2938 unsigned int nr_bytes)
2832{ 2939{
2940 WARN_ON_ONCE(rq->q->mq_ops);
2833 return blk_end_bidi_request(rq, error, nr_bytes, 0); 2941 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2834} 2942}
2835EXPORT_SYMBOL(blk_end_request); 2943EXPORT_SYMBOL(blk_end_request);
@@ -2837,12 +2945,12 @@ EXPORT_SYMBOL(blk_end_request);
2837/** 2945/**
2838 * blk_end_request_all - Helper function for drives to finish the request. 2946 * blk_end_request_all - Helper function for drives to finish the request.
2839 * @rq: the request to finish 2947 * @rq: the request to finish
2840 * @error: %0 for success, < %0 for error 2948 * @error: block status code
2841 * 2949 *
2842 * Description: 2950 * Description:
2843 * Completely finish @rq. 2951 * Completely finish @rq.
2844 */ 2952 */
2845void blk_end_request_all(struct request *rq, int error) 2953void blk_end_request_all(struct request *rq, blk_status_t error)
2846{ 2954{
2847 bool pending; 2955 bool pending;
2848 unsigned int bidi_bytes = 0; 2956 unsigned int bidi_bytes = 0;
@@ -2858,7 +2966,7 @@ EXPORT_SYMBOL(blk_end_request_all);
2858/** 2966/**
2859 * __blk_end_request - Helper function for drivers to complete the request. 2967 * __blk_end_request - Helper function for drivers to complete the request.
2860 * @rq: the request being processed 2968 * @rq: the request being processed
2861 * @error: %0 for success, < %0 for error 2969 * @error: block status code
2862 * @nr_bytes: number of bytes to complete 2970 * @nr_bytes: number of bytes to complete
2863 * 2971 *
2864 * Description: 2972 * Description:
@@ -2868,8 +2976,12 @@ EXPORT_SYMBOL(blk_end_request_all);
2868 * %false - we are done with this request 2976 * %false - we are done with this request
2869 * %true - still buffers pending for this request 2977 * %true - still buffers pending for this request
2870 **/ 2978 **/
2871bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2979bool __blk_end_request(struct request *rq, blk_status_t error,
2980 unsigned int nr_bytes)
2872{ 2981{
2982 lockdep_assert_held(rq->q->queue_lock);
2983 WARN_ON_ONCE(rq->q->mq_ops);
2984
2873 return __blk_end_bidi_request(rq, error, nr_bytes, 0); 2985 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2874} 2986}
2875EXPORT_SYMBOL(__blk_end_request); 2987EXPORT_SYMBOL(__blk_end_request);
@@ -2877,16 +2989,19 @@ EXPORT_SYMBOL(__blk_end_request);
2877/** 2989/**
2878 * __blk_end_request_all - Helper function for drives to finish the request. 2990 * __blk_end_request_all - Helper function for drives to finish the request.
2879 * @rq: the request to finish 2991 * @rq: the request to finish
2880 * @error: %0 for success, < %0 for error 2992 * @error: block status code
2881 * 2993 *
2882 * Description: 2994 * Description:
2883 * Completely finish @rq. Must be called with queue lock held. 2995 * Completely finish @rq. Must be called with queue lock held.
2884 */ 2996 */
2885void __blk_end_request_all(struct request *rq, int error) 2997void __blk_end_request_all(struct request *rq, blk_status_t error)
2886{ 2998{
2887 bool pending; 2999 bool pending;
2888 unsigned int bidi_bytes = 0; 3000 unsigned int bidi_bytes = 0;
2889 3001
3002 lockdep_assert_held(rq->q->queue_lock);
3003 WARN_ON_ONCE(rq->q->mq_ops);
3004
2890 if (unlikely(blk_bidi_rq(rq))) 3005 if (unlikely(blk_bidi_rq(rq)))
2891 bidi_bytes = blk_rq_bytes(rq->next_rq); 3006 bidi_bytes = blk_rq_bytes(rq->next_rq);
2892 3007
@@ -2898,7 +3013,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
2898/** 3013/**
2899 * __blk_end_request_cur - Helper function to finish the current request chunk. 3014 * __blk_end_request_cur - Helper function to finish the current request chunk.
2900 * @rq: the request to finish the current chunk for 3015 * @rq: the request to finish the current chunk for
2901 * @error: %0 for success, < %0 for error 3016 * @error: block status code
2902 * 3017 *
2903 * Description: 3018 * Description:
2904 * Complete the current consecutively mapped chunk from @rq. Must 3019 * Complete the current consecutively mapped chunk from @rq. Must
@@ -2908,7 +3023,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
2908 * %false - we are done with this request 3023 * %false - we are done with this request
2909 * %true - still buffers pending for this request 3024 * %true - still buffers pending for this request
2910 */ 3025 */
2911bool __blk_end_request_cur(struct request *rq, int error) 3026bool __blk_end_request_cur(struct request *rq, blk_status_t error)
2912{ 3027{
2913 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 3028 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2914} 3029}
@@ -3151,6 +3266,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
3151 bool from_schedule) 3266 bool from_schedule)
3152 __releases(q->queue_lock) 3267 __releases(q->queue_lock)
3153{ 3268{
3269 lockdep_assert_held(q->queue_lock);
3270
3154 trace_block_unplug(q, depth, !from_schedule); 3271 trace_block_unplug(q, depth, !from_schedule);
3155 3272
3156 if (from_schedule) 3273 if (from_schedule)
@@ -3249,7 +3366,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3249 * Short-circuit if @q is dead 3366 * Short-circuit if @q is dead
3250 */ 3367 */
3251 if (unlikely(blk_queue_dying(q))) { 3368 if (unlikely(blk_queue_dying(q))) {
3252 __blk_end_request_all(rq, -ENODEV); 3369 __blk_end_request_all(rq, BLK_STS_IOERR);
3253 continue; 3370 continue;
3254 } 3371 }
3255 3372
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3b8587..5c0f3dc446dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
16 * @rq: request to complete 16 * @rq: request to complete
17 * @error: end I/O status of the request 17 * @error: end I/O status of the request
18 */ 18 */
19static void blk_end_sync_rq(struct request *rq, int error) 19static void blk_end_sync_rq(struct request *rq, blk_status_t error)
20{ 20{
21 struct completion *waiting = rq->end_io_data; 21 struct completion *waiting = rq->end_io_data;
22 22
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
69 69
70 if (unlikely(blk_queue_dying(q))) { 70 if (unlikely(blk_queue_dying(q))) {
71 rq->rq_flags |= RQF_QUIET; 71 rq->rq_flags |= RQF_QUIET;
72 __blk_end_request_all(rq, -ENXIO); 72 __blk_end_request_all(rq, BLK_STS_IOERR);
73 spin_unlock_irq(q->queue_lock); 73 spin_unlock_irq(q->queue_lock);
74 return; 74 return;
75 } 75 }
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880b54bb..ed5fe322abba 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
164 */ 164 */
165static bool blk_flush_complete_seq(struct request *rq, 165static bool blk_flush_complete_seq(struct request *rq,
166 struct blk_flush_queue *fq, 166 struct blk_flush_queue *fq,
167 unsigned int seq, int error) 167 unsigned int seq, blk_status_t error)
168{ 168{
169 struct request_queue *q = rq->q; 169 struct request_queue *q = rq->q;
170 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; 170 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
216 return kicked | queued; 216 return kicked | queued;
217} 217}
218 218
219static void flush_end_io(struct request *flush_rq, int error) 219static void flush_end_io(struct request *flush_rq, blk_status_t error)
220{ 220{
221 struct request_queue *q = flush_rq->q; 221 struct request_queue *q = flush_rq->q;
222 struct list_head *running; 222 struct list_head *running;
@@ -341,11 +341,13 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
341 return blk_flush_queue_rq(flush_rq, false); 341 return blk_flush_queue_rq(flush_rq, false);
342} 342}
343 343
344static void flush_data_end_io(struct request *rq, int error) 344static void flush_data_end_io(struct request *rq, blk_status_t error)
345{ 345{
346 struct request_queue *q = rq->q; 346 struct request_queue *q = rq->q;
347 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); 347 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
348 348
349 lockdep_assert_held(q->queue_lock);
350
349 /* 351 /*
350 * Updating q->in_flight[] here for making this tag usable 352 * Updating q->in_flight[] here for making this tag usable
351 * early. Because in blk_queue_start_tag(), 353 * early. Because in blk_queue_start_tag(),
@@ -382,7 +384,7 @@ static void flush_data_end_io(struct request *rq, int error)
382 blk_run_queue_async(q); 384 blk_run_queue_async(q);
383} 385}
384 386
385static void mq_flush_data_end_io(struct request *rq, int error) 387static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
386{ 388{
387 struct request_queue *q = rq->q; 389 struct request_queue *q = rq->q;
388 struct blk_mq_hw_ctx *hctx; 390 struct blk_mq_hw_ctx *hctx;
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, int error)
411 * or __blk_mq_run_hw_queue() to dispatch request. 413 * or __blk_mq_run_hw_queue() to dispatch request.
412 * @rq is being submitted. Analyze what needs to be done and put it on the 414 * @rq is being submitted. Analyze what needs to be done and put it on the
413 * right queue. 415 * right queue.
414 *
415 * CONTEXT:
416 * spin_lock_irq(q->queue_lock) in !mq case
417 */ 416 */
418void blk_insert_flush(struct request *rq) 417void blk_insert_flush(struct request *rq)
419{ 418{
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
422 unsigned int policy = blk_flush_policy(fflags, rq); 421 unsigned int policy = blk_flush_policy(fflags, rq);
423 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); 422 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
424 423
424 if (!q->mq_ops)
425 lockdep_assert_held(q->queue_lock);
426
425 /* 427 /*
426 * @policy now records what operations need to be done. Adjust 428 * @policy now records what operations need to be done. Adjust
427 * REQ_PREFLUSH and FUA for the driver. 429 * REQ_PREFLUSH and FUA for the driver.
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9aff4d..feb30570eaf5 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
384 .sysfs_ops = &integrity_ops, 384 .sysfs_ops = &integrity_ops,
385}; 385};
386 386
387static int blk_integrity_nop_fn(struct blk_integrity_iter *iter) 387static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
388{ 388{
389 return 0; 389 return BLK_STS_OK;
390} 390}
391 391
392static const struct blk_integrity_profile nop_profile = { 392static const struct blk_integrity_profile nop_profile = {
diff --git a/block/blk-map.c b/block/blk-map.c
index 3b5cb863318f..2547016aa7aa 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,6 +16,8 @@
16 */ 16 */
17int blk_rq_append_bio(struct request *rq, struct bio *bio) 17int blk_rq_append_bio(struct request *rq, struct bio *bio)
18{ 18{
19 blk_queue_bounce(rq->q, &bio);
20
19 if (!rq->bio) { 21 if (!rq->bio) {
20 blk_rq_bio_prep(rq->q, rq, bio); 22 blk_rq_bio_prep(rq->q, rq, bio);
21 } else { 23 } else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
72 map_data->offset += bio->bi_iter.bi_size; 74 map_data->offset += bio->bi_iter.bi_size;
73 75
74 orig_bio = bio; 76 orig_bio = bio;
75 blk_queue_bounce(q, &bio);
76 77
77 /* 78 /*
78 * We link the bounce buffer in and could have to traverse it 79 * We link the bounce buffer in and could have to traverse it
79 * later so we have to get a ref to prevent it from being freed 80 * later so we have to get a ref to prevent it from being freed
80 */ 81 */
81 bio_get(bio);
82
83 ret = blk_rq_append_bio(rq, bio); 82 ret = blk_rq_append_bio(rq, bio);
83 bio_get(bio);
84 if (ret) { 84 if (ret) {
85 bio_endio(bio); 85 bio_endio(bio);
86 __blk_rq_unmap_user(orig_bio); 86 __blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
249 return ret; 249 return ret;
250 } 250 }
251 251
252 blk_queue_bounce(q, &rq->bio);
253 return 0; 252 return 0;
254} 253}
255EXPORT_SYMBOL(blk_rq_map_kern); 254EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae406341..99038830fb42 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -108,31 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
108 bool do_split = true; 108 bool do_split = true;
109 struct bio *new = NULL; 109 struct bio *new = NULL;
110 const unsigned max_sectors = get_max_io_size(q, bio); 110 const unsigned max_sectors = get_max_io_size(q, bio);
111 unsigned bvecs = 0;
112 111
113 bio_for_each_segment(bv, bio, iter) { 112 bio_for_each_segment(bv, bio, iter) {
114 /* 113 /*
115 * With arbitrary bio size, the incoming bio may be very
116 * big. We have to split the bio into small bios so that
117 * each holds at most BIO_MAX_PAGES bvecs because
118 * bio_clone() can fail to allocate big bvecs.
119 *
120 * It should have been better to apply the limit per
121 * request queue in which bio_clone() is involved,
122 * instead of globally. The biggest blocker is the
123 * bio_clone() in bio bounce.
124 *
125 * If bio is splitted by this reason, we should have
126 * allowed to continue bios merging, but don't do
127 * that now for making the change simple.
128 *
129 * TODO: deal with bio bounce's bio_clone() gracefully
130 * and convert the global limit into per-queue limit.
131 */
132 if (bvecs++ >= BIO_MAX_PAGES)
133 goto split;
134
135 /*
136 * If the queue doesn't support SG gaps and adding this 114 * If the queue doesn't support SG gaps and adding this
137 * offset would create a gap, disallow it. 115 * offset would create a gap, disallow it.
138 */ 116 */
@@ -202,8 +180,7 @@ split:
202 return do_split ? new : NULL; 180 return do_split ? new : NULL;
203} 181}
204 182
205void blk_queue_split(struct request_queue *q, struct bio **bio, 183void blk_queue_split(struct request_queue *q, struct bio **bio)
206 struct bio_set *bs)
207{ 184{
208 struct bio *split, *res; 185 struct bio *split, *res;
209 unsigned nsegs; 186 unsigned nsegs;
@@ -211,13 +188,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
211 switch (bio_op(*bio)) { 188 switch (bio_op(*bio)) {
212 case REQ_OP_DISCARD: 189 case REQ_OP_DISCARD:
213 case REQ_OP_SECURE_ERASE: 190 case REQ_OP_SECURE_ERASE:
214 split = blk_bio_discard_split(q, *bio, bs, &nsegs); 191 split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
215 break; 192 break;
216 case REQ_OP_WRITE_ZEROES: 193 case REQ_OP_WRITE_ZEROES:
217 split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs); 194 split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
218 break; 195 break;
219 case REQ_OP_WRITE_SAME: 196 case REQ_OP_WRITE_SAME:
220 split = blk_bio_write_same_split(q, *bio, bs, &nsegs); 197 split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
221 break; 198 break;
222 default: 199 default:
223 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); 200 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -671,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
671static struct request *attempt_merge(struct request_queue *q, 648static struct request *attempt_merge(struct request_queue *q,
672 struct request *req, struct request *next) 649 struct request *req, struct request *next)
673{ 650{
651 if (!q->mq_ops)
652 lockdep_assert_held(q->queue_lock);
653
674 if (!rq_mergeable(req) || !rq_mergeable(next)) 654 if (!rq_mergeable(req) || !rq_mergeable(next))
675 return NULL; 655 return NULL;
676 656
@@ -693,6 +673,13 @@ static struct request *attempt_merge(struct request_queue *q,
693 return NULL; 673 return NULL;
694 674
695 /* 675 /*
676 * Don't allow merge of different write hints, or for a hint with
677 * non-hint IO.
678 */
679 if (req->write_hint != next->write_hint)
680 return NULL;
681
682 /*
696 * If we are allowed to merge, then append bio list 683 * If we are allowed to merge, then append bio list
697 * from next to rq and release next. merge_requests_fn 684 * from next to rq and release next. merge_requests_fn
698 * will have updated segment counts, update sector 685 * will have updated segment counts, update sector
@@ -811,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
811 !blk_write_same_mergeable(rq->bio, bio)) 798 !blk_write_same_mergeable(rq->bio, bio))
812 return false; 799 return false;
813 800
801 /*
802 * Don't allow merge of different write hints, or for a hint with
803 * non-hint IO.
804 */
805 if (rq->write_hint != bio->bi_write_hint)
806 return false;
807
814 return true; 808 return true;
815} 809}
816 810
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e8640e17..2cca4fc43f45 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
14#include "blk.h" 14#include "blk.h"
15#include "blk-mq.h" 15#include "blk-mq.h"
16 16
17static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues, 17static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
18 const int cpu) 18 const struct cpumask *online_mask)
19{ 19{
20 return cpu * nr_queues / nr_cpus; 20 /*
21 * Non online CPU will be mapped to queue index 0.
22 */
23 if (!cpumask_test_cpu(cpu, online_mask))
24 return 0;
25 return cpu % nr_queues;
21} 26}
22 27
23static int get_first_sibling(unsigned int cpu) 28static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
36 unsigned int *map = set->mq_map; 41 unsigned int *map = set->mq_map;
37 unsigned int nr_queues = set->nr_hw_queues; 42 unsigned int nr_queues = set->nr_hw_queues;
38 const struct cpumask *online_mask = cpu_online_mask; 43 const struct cpumask *online_mask = cpu_online_mask;
39 unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; 44 unsigned int cpu, first_sibling;
40 cpumask_var_t cpus;
41
42 if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
43 return -ENOMEM;
44
45 cpumask_clear(cpus);
46 nr_cpus = nr_uniq_cpus = 0;
47 for_each_cpu(i, online_mask) {
48 nr_cpus++;
49 first_sibling = get_first_sibling(i);
50 if (!cpumask_test_cpu(first_sibling, cpus))
51 nr_uniq_cpus++;
52 cpumask_set_cpu(i, cpus);
53 }
54
55 queue = 0;
56 for_each_possible_cpu(i) {
57 if (!cpumask_test_cpu(i, online_mask)) {
58 map[i] = 0;
59 continue;
60 }
61 45
46 for_each_possible_cpu(cpu) {
62 /* 47 /*
63 * Easy case - we have equal or more hardware queues. Or 48 * First do sequential mapping between CPUs and queues.
64 * there are no thread siblings to take into account. Do 49 * In case we still have CPUs to map, and we have some number of
65 * 1:1 if enough, or sequential mapping if less. 50 * threads per cores then map sibling threads to the same queue for
51 * performace optimizations.
66 */ 52 */
67 if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) { 53 if (cpu < nr_queues) {
68 map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue); 54 map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
69 queue++; 55 } else {
70 continue; 56 first_sibling = get_first_sibling(cpu);
57 if (first_sibling == cpu)
58 map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
59 else
60 map[cpu] = map[first_sibling];
71 } 61 }
72
73 /*
74 * Less then nr_cpus queues, and we have some number of
75 * threads per cores. Map sibling threads to the same
76 * queue.
77 */
78 first_sibling = get_first_sibling(i);
79 if (first_sibling == i) {
80 map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
81 queue);
82 queue++;
83 } else
84 map[i] = map[first_sibling];
85 } 62 }
86 63
87 free_cpumask_var(cpus);
88 return 0; 64 return 0;
89} 65}
90EXPORT_SYMBOL_GPL(blk_mq_map_queues); 66EXPORT_SYMBOL_GPL(blk_mq_map_queues);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4d7221..9ebc2945f991 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
114 blk_mq_run_hw_queues(q, true); 114 blk_mq_run_hw_queues(q, true);
115 } else if (strcmp(op, "start") == 0) { 115 } else if (strcmp(op, "start") == 0) {
116 blk_mq_start_stopped_hw_queues(q, true); 116 blk_mq_start_stopped_hw_queues(q, true);
117 } else if (strcmp(op, "kick") == 0) {
118 blk_mq_kick_requeue_list(q);
117 } else { 119 } else {
118 pr_err("%s: unsupported operation '%s'\n", __func__, op); 120 pr_err("%s: unsupported operation '%s'\n", __func__, op);
119inval: 121inval:
120 pr_err("%s: use either 'run' or 'start'\n", __func__); 122 pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
121 return -EINVAL; 123 return -EINVAL;
122 } 124 }
123 return count; 125 return count;
@@ -133,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
133 } 135 }
134} 136}
135 137
138static int queue_write_hint_show(void *data, struct seq_file *m)
139{
140 struct request_queue *q = data;
141 int i;
142
143 for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
144 seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
145
146 return 0;
147}
148
149static ssize_t queue_write_hint_store(void *data, const char __user *buf,
150 size_t count, loff_t *ppos)
151{
152 struct request_queue *q = data;
153 int i;
154
155 for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
156 q->write_hints[i] = 0;
157
158 return count;
159}
160
136static int queue_poll_stat_show(void *data, struct seq_file *m) 161static int queue_poll_stat_show(void *data, struct seq_file *m)
137{ 162{
138 struct request_queue *q = data; 163 struct request_queue *q = data;
@@ -267,6 +292,14 @@ static const char *const rqf_name[] = {
267}; 292};
268#undef RQF_NAME 293#undef RQF_NAME
269 294
295#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
296static const char *const rqaf_name[] = {
297 RQAF_NAME(COMPLETE),
298 RQAF_NAME(STARTED),
299 RQAF_NAME(POLL_SLEPT),
300};
301#undef RQAF_NAME
302
270int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) 303int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
271{ 304{
272 const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; 305 const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +316,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
283 seq_puts(m, ", .rq_flags="); 316 seq_puts(m, ", .rq_flags=");
284 blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, 317 blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
285 ARRAY_SIZE(rqf_name)); 318 ARRAY_SIZE(rqf_name));
319 seq_puts(m, ", .atomic_flags=");
320 blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
286 seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, 321 seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
287 rq->internal_tag); 322 rq->internal_tag);
288 if (mq_ops->show_rq) 323 if (mq_ops->show_rq)
@@ -298,6 +333,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
298} 333}
299EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); 334EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
300 335
336static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
337 __acquires(&q->requeue_lock)
338{
339 struct request_queue *q = m->private;
340
341 spin_lock_irq(&q->requeue_lock);
342 return seq_list_start(&q->requeue_list, *pos);
343}
344
345static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
346{
347 struct request_queue *q = m->private;
348
349 return seq_list_next(v, &q->requeue_list, pos);
350}
351
352static void queue_requeue_list_stop(struct seq_file *m, void *v)
353 __releases(&q->requeue_lock)
354{
355 struct request_queue *q = m->private;
356
357 spin_unlock_irq(&q->requeue_lock);
358}
359
360static const struct seq_operations queue_requeue_list_seq_ops = {
361 .start = queue_requeue_list_start,
362 .next = queue_requeue_list_next,
363 .stop = queue_requeue_list_stop,
364 .show = blk_mq_debugfs_rq_show,
365};
366
301static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) 367static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
302 __acquires(&hctx->lock) 368 __acquires(&hctx->lock)
303{ 369{
@@ -329,6 +395,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
329 .show = blk_mq_debugfs_rq_show, 395 .show = blk_mq_debugfs_rq_show,
330}; 396};
331 397
398struct show_busy_params {
399 struct seq_file *m;
400 struct blk_mq_hw_ctx *hctx;
401};
402
403/*
404 * Note: the state of a request may change while this function is in progress,
405 * e.g. due to a concurrent blk_mq_finish_request() call.
406 */
407static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
408{
409 const struct show_busy_params *params = data;
410
411 if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
412 test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
413 __blk_mq_debugfs_rq_show(params->m,
414 list_entry_rq(&rq->queuelist));
415}
416
417static int hctx_busy_show(void *data, struct seq_file *m)
418{
419 struct blk_mq_hw_ctx *hctx = data;
420 struct show_busy_params params = { .m = m, .hctx = hctx };
421
422 blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
423 &params);
424
425 return 0;
426}
427
332static int hctx_ctx_map_show(void *data, struct seq_file *m) 428static int hctx_ctx_map_show(void *data, struct seq_file *m)
333{ 429{
334 struct blk_mq_hw_ctx *hctx = data; 430 struct blk_mq_hw_ctx *hctx = data;
@@ -655,7 +751,9 @@ const struct file_operations blk_mq_debugfs_fops = {
655 751
656static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { 752static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
657 {"poll_stat", 0400, queue_poll_stat_show}, 753 {"poll_stat", 0400, queue_poll_stat_show},
754 {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
658 {"state", 0600, queue_state_show, queue_state_write}, 755 {"state", 0600, queue_state_show, queue_state_write},
756 {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
659 {}, 757 {},
660}; 758};
661 759
@@ -663,6 +761,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
663 {"state", 0400, hctx_state_show}, 761 {"state", 0400, hctx_state_show},
664 {"flags", 0400, hctx_flags_show}, 762 {"flags", 0400, hctx_flags_show},
665 {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, 763 {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
764 {"busy", 0400, hctx_busy_show},
666 {"ctx_map", 0400, hctx_ctx_map_show}, 765 {"ctx_map", 0400, hctx_ctx_map_show},
667 {"tags", 0400, hctx_tags_show}, 766 {"tags", 0400, hctx_tags_show},
668 {"tags_bitmap", 0400, hctx_tags_bitmap_show}, 767 {"tags_bitmap", 0400, hctx_tags_bitmap_show},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 0ded5e846335..7f0dc48ffb40 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,11 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
31} 31}
32EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); 32EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
33 33
34static void __blk_mq_sched_assign_ioc(struct request_queue *q, 34void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
35 struct request *rq,
36 struct bio *bio,
37 struct io_context *ioc)
38{ 35{
36 struct request_queue *q = rq->q;
37 struct io_context *ioc = rq_ioc(bio);
39 struct io_cq *icq; 38 struct io_cq *icq;
40 39
41 spin_lock_irq(q->queue_lock); 40 spin_lock_irq(q->queue_lock);
@@ -47,25 +46,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
47 if (!icq) 46 if (!icq)
48 return; 47 return;
49 } 48 }
50 49 get_io_context(icq->ioc);
51 rq->elv.icq = icq; 50 rq->elv.icq = icq;
52 if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
53 rq->rq_flags |= RQF_ELVPRIV;
54 get_io_context(icq->ioc);
55 return;
56 }
57
58 rq->elv.icq = NULL;
59}
60
61static void blk_mq_sched_assign_ioc(struct request_queue *q,
62 struct request *rq, struct bio *bio)
63{
64 struct io_context *ioc;
65
66 ioc = rq_ioc(bio);
67 if (ioc)
68 __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
69} 51}
70 52
71/* 53/*
@@ -107,71 +89,6 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
107 return false; 89 return false;
108} 90}
109 91
110struct request *blk_mq_sched_get_request(struct request_queue *q,
111 struct bio *bio,
112 unsigned int op,
113 struct blk_mq_alloc_data *data)
114{
115 struct elevator_queue *e = q->elevator;
116 struct request *rq;
117
118 blk_queue_enter_live(q);
119 data->q = q;
120 if (likely(!data->ctx))
121 data->ctx = blk_mq_get_ctx(q);
122 if (likely(!data->hctx))
123 data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
124
125 if (e) {
126 data->flags |= BLK_MQ_REQ_INTERNAL;
127
128 /*
129 * Flush requests are special and go directly to the
130 * dispatch list.
131 */
132 if (!op_is_flush(op) && e->type->ops.mq.get_request) {
133 rq = e->type->ops.mq.get_request(q, op, data);
134 if (rq)
135 rq->rq_flags |= RQF_QUEUED;
136 } else
137 rq = __blk_mq_alloc_request(data, op);
138 } else {
139 rq = __blk_mq_alloc_request(data, op);
140 }
141
142 if (rq) {
143 if (!op_is_flush(op)) {
144 rq->elv.icq = NULL;
145 if (e && e->type->icq_cache)
146 blk_mq_sched_assign_ioc(q, rq, bio);
147 }
148 data->hctx->queued++;
149 return rq;
150 }
151
152 blk_queue_exit(q);
153 return NULL;
154}
155
156void blk_mq_sched_put_request(struct request *rq)
157{
158 struct request_queue *q = rq->q;
159 struct elevator_queue *e = q->elevator;
160
161 if (rq->rq_flags & RQF_ELVPRIV) {
162 blk_mq_sched_put_rq_priv(rq->q, rq);
163 if (rq->elv.icq) {
164 put_io_context(rq->elv.icq->ioc);
165 rq->elv.icq = NULL;
166 }
167 }
168
169 if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
170 e->type->ops.mq.put_request(rq);
171 else
172 blk_mq_finish_request(rq);
173}
174
175void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 92void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
176{ 93{
177 struct request_queue *q = hctx->queue; 94 struct request_queue *q = hctx->queue;
@@ -180,7 +97,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
180 bool did_work = false; 97 bool did_work = false;
181 LIST_HEAD(rq_list); 98 LIST_HEAD(rq_list);
182 99
183 if (unlikely(blk_mq_hctx_stopped(hctx))) 100 /* RCU or SRCU read lock is needed before checking quiesced flag */
101 if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
184 return; 102 return;
185 103
186 hctx->run++; 104 hctx->run++;
@@ -260,19 +178,73 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
260} 178}
261EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 179EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
262 180
181/*
182 * Reverse check our software queue for entries that we could potentially
183 * merge with. Currently includes a hand-wavy stop count of 8, to not spend
184 * too much time checking for merges.
185 */
186static bool blk_mq_attempt_merge(struct request_queue *q,
187 struct blk_mq_ctx *ctx, struct bio *bio)
188{
189 struct request *rq;
190 int checked = 8;
191
192 lockdep_assert_held(&ctx->lock);
193
194 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
195 bool merged = false;
196
197 if (!checked--)
198 break;
199
200 if (!blk_rq_merge_ok(rq, bio))
201 continue;
202
203 switch (blk_try_merge(rq, bio)) {
204 case ELEVATOR_BACK_MERGE:
205 if (blk_mq_sched_allow_merge(q, rq, bio))
206 merged = bio_attempt_back_merge(q, rq, bio);
207 break;
208 case ELEVATOR_FRONT_MERGE:
209 if (blk_mq_sched_allow_merge(q, rq, bio))
210 merged = bio_attempt_front_merge(q, rq, bio);
211 break;
212 case ELEVATOR_DISCARD_MERGE:
213 merged = bio_attempt_discard_merge(q, rq, bio);
214 break;
215 default:
216 continue;
217 }
218
219 if (merged)
220 ctx->rq_merged++;
221 return merged;
222 }
223
224 return false;
225}
226
263bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 227bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
264{ 228{
265 struct elevator_queue *e = q->elevator; 229 struct elevator_queue *e = q->elevator;
230 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
231 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
232 bool ret = false;
266 233
267 if (e->type->ops.mq.bio_merge) { 234 if (e && e->type->ops.mq.bio_merge) {
268 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
269 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
270
271 blk_mq_put_ctx(ctx); 235 blk_mq_put_ctx(ctx);
272 return e->type->ops.mq.bio_merge(hctx, bio); 236 return e->type->ops.mq.bio_merge(hctx, bio);
273 } 237 }
274 238
275 return false; 239 if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
240 /* default per sw-queue merge */
241 spin_lock(&ctx->lock);
242 ret = blk_mq_attempt_merge(q, ctx, bio);
243 spin_unlock(&ctx->lock);
244 }
245
246 blk_mq_put_ctx(ctx);
247 return ret;
276} 248}
277 249
278bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) 250bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5007edece51a..9267d0b7c197 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,8 +7,7 @@
7void blk_mq_sched_free_hctx_data(struct request_queue *q, 7void blk_mq_sched_free_hctx_data(struct request_queue *q,
8 void (*exit)(struct blk_mq_hw_ctx *)); 8 void (*exit)(struct blk_mq_hw_ctx *));
9 9
10struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data); 10void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
11void blk_mq_sched_put_request(struct request *rq);
12 11
13void blk_mq_sched_request_inserted(struct request *rq); 12void blk_mq_sched_request_inserted(struct request *rq);
14bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 13bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -38,35 +37,12 @@ int blk_mq_sched_init(struct request_queue *q);
38static inline bool 37static inline bool
39blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 38blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
40{ 39{
41 struct elevator_queue *e = q->elevator; 40 if (blk_queue_nomerges(q) || !bio_mergeable(bio))
42
43 if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
44 return false; 41 return false;
45 42
46 return __blk_mq_sched_bio_merge(q, bio); 43 return __blk_mq_sched_bio_merge(q, bio);
47} 44}
48 45
49static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
50 struct request *rq,
51 struct bio *bio)
52{
53 struct elevator_queue *e = q->elevator;
54
55 if (e && e->type->ops.mq.get_rq_priv)
56 return e->type->ops.mq.get_rq_priv(q, rq, bio);
57
58 return 0;
59}
60
61static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
62 struct request *rq)
63{
64 struct elevator_queue *e = q->elevator;
65
66 if (e && e->type->ops.mq.put_rq_priv)
67 e->type->ops.mq.put_rq_priv(q, rq);
68}
69
70static inline bool 46static inline bool
71blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, 47blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
72 struct bio *bio) 48 struct bio *bio)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 958cedaff8b8..05dfa3f270ae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
42 42
43static void blk_mq_poll_stats_start(struct request_queue *q); 43static void blk_mq_poll_stats_start(struct request_queue *q);
44static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); 44static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
45static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
46 45
47static int blk_mq_poll_stats_bkt(const struct request *rq) 46static int blk_mq_poll_stats_bkt(const struct request *rq)
48{ 47{
@@ -154,13 +153,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
154} 153}
155EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); 154EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
156 155
156/*
157 * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
158 * mpt3sas driver such that this function can be removed.
159 */
160void blk_mq_quiesce_queue_nowait(struct request_queue *q)
161{
162 unsigned long flags;
163
164 spin_lock_irqsave(q->queue_lock, flags);
165 queue_flag_set(QUEUE_FLAG_QUIESCED, q);
166 spin_unlock_irqrestore(q->queue_lock, flags);
167}
168EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
169
157/** 170/**
158 * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished 171 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
159 * @q: request queue. 172 * @q: request queue.
160 * 173 *
161 * Note: this function does not prevent that the struct request end_io() 174 * Note: this function does not prevent that the struct request end_io()
162 * callback function is invoked. Additionally, it is not prevented that 175 * callback function is invoked. Once this function is returned, we make
163 * new queue_rq() calls occur unless the queue has been stopped first. 176 * sure no dispatch can happen until the queue is unquiesced via
177 * blk_mq_unquiesce_queue().
164 */ 178 */
165void blk_mq_quiesce_queue(struct request_queue *q) 179void blk_mq_quiesce_queue(struct request_queue *q)
166{ 180{
@@ -168,11 +182,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
168 unsigned int i; 182 unsigned int i;
169 bool rcu = false; 183 bool rcu = false;
170 184
171 __blk_mq_stop_hw_queues(q, true); 185 blk_mq_quiesce_queue_nowait(q);
172 186
173 queue_for_each_hw_ctx(q, hctx, i) { 187 queue_for_each_hw_ctx(q, hctx, i) {
174 if (hctx->flags & BLK_MQ_F_BLOCKING) 188 if (hctx->flags & BLK_MQ_F_BLOCKING)
175 synchronize_srcu(&hctx->queue_rq_srcu); 189 synchronize_srcu(hctx->queue_rq_srcu);
176 else 190 else
177 rcu = true; 191 rcu = true;
178 } 192 }
@@ -181,6 +195,26 @@ void blk_mq_quiesce_queue(struct request_queue *q)
181} 195}
182EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); 196EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
183 197
198/*
199 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
200 * @q: request queue.
201 *
202 * This function recovers queue into the state before quiescing
203 * which is done by blk_mq_quiesce_queue.
204 */
205void blk_mq_unquiesce_queue(struct request_queue *q)
206{
207 unsigned long flags;
208
209 spin_lock_irqsave(q->queue_lock, flags);
210 queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
211 spin_unlock_irqrestore(q->queue_lock, flags);
212
213 /* dispatch requests which are inserted during quiescing */
214 blk_mq_run_hw_queues(q, true);
215}
216EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
217
184void blk_mq_wake_waiters(struct request_queue *q) 218void blk_mq_wake_waiters(struct request_queue *q)
185{ 219{
186 struct blk_mq_hw_ctx *hctx; 220 struct blk_mq_hw_ctx *hctx;
@@ -204,15 +238,33 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
204} 238}
205EXPORT_SYMBOL(blk_mq_can_queue); 239EXPORT_SYMBOL(blk_mq_can_queue);
206 240
207void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, 241static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
208 struct request *rq, unsigned int op) 242 unsigned int tag, unsigned int op)
209{ 243{
244 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
245 struct request *rq = tags->static_rqs[tag];
246
247 rq->rq_flags = 0;
248
249 if (data->flags & BLK_MQ_REQ_INTERNAL) {
250 rq->tag = -1;
251 rq->internal_tag = tag;
252 } else {
253 if (blk_mq_tag_busy(data->hctx)) {
254 rq->rq_flags = RQF_MQ_INFLIGHT;
255 atomic_inc(&data->hctx->nr_active);
256 }
257 rq->tag = tag;
258 rq->internal_tag = -1;
259 data->hctx->tags->rqs[rq->tag] = rq;
260 }
261
210 INIT_LIST_HEAD(&rq->queuelist); 262 INIT_LIST_HEAD(&rq->queuelist);
211 /* csd/requeue_work/fifo_time is initialized before use */ 263 /* csd/requeue_work/fifo_time is initialized before use */
212 rq->q = q; 264 rq->q = data->q;
213 rq->mq_ctx = ctx; 265 rq->mq_ctx = data->ctx;
214 rq->cmd_flags = op; 266 rq->cmd_flags = op;
215 if (blk_queue_io_stat(q)) 267 if (blk_queue_io_stat(data->q))
216 rq->rq_flags |= RQF_IO_STAT; 268 rq->rq_flags |= RQF_IO_STAT;
217 /* do not touch atomic flags, it needs atomic ops against the timer */ 269 /* do not touch atomic flags, it needs atomic ops against the timer */
218 rq->cpu = -1; 270 rq->cpu = -1;
@@ -241,44 +293,60 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
241 rq->end_io_data = NULL; 293 rq->end_io_data = NULL;
242 rq->next_rq = NULL; 294 rq->next_rq = NULL;
243 295
244 ctx->rq_dispatched[op_is_sync(op)]++; 296 data->ctx->rq_dispatched[op_is_sync(op)]++;
297 return rq;
245} 298}
246EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
247 299
248struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, 300static struct request *blk_mq_get_request(struct request_queue *q,
249 unsigned int op) 301 struct bio *bio, unsigned int op,
302 struct blk_mq_alloc_data *data)
250{ 303{
304 struct elevator_queue *e = q->elevator;
251 struct request *rq; 305 struct request *rq;
252 unsigned int tag; 306 unsigned int tag;
253 307
254 tag = blk_mq_get_tag(data); 308 blk_queue_enter_live(q);
255 if (tag != BLK_MQ_TAG_FAIL) { 309 data->q = q;
256 struct blk_mq_tags *tags = blk_mq_tags_from_data(data); 310 if (likely(!data->ctx))
311 data->ctx = blk_mq_get_ctx(q);
312 if (likely(!data->hctx))
313 data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
314 if (op & REQ_NOWAIT)
315 data->flags |= BLK_MQ_REQ_NOWAIT;
257 316
258 rq = tags->static_rqs[tag]; 317 if (e) {
318 data->flags |= BLK_MQ_REQ_INTERNAL;
259 319
260 if (data->flags & BLK_MQ_REQ_INTERNAL) { 320 /*
261 rq->tag = -1; 321 * Flush requests are special and go directly to the
262 rq->internal_tag = tag; 322 * dispatch list.
263 } else { 323 */
264 if (blk_mq_tag_busy(data->hctx)) { 324 if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
265 rq->rq_flags = RQF_MQ_INFLIGHT; 325 e->type->ops.mq.limit_depth(op, data);
266 atomic_inc(&data->hctx->nr_active); 326 }
267 }
268 rq->tag = tag;
269 rq->internal_tag = -1;
270 data->hctx->tags->rqs[rq->tag] = rq;
271 }
272 327
273 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); 328 tag = blk_mq_get_tag(data);
274 return rq; 329 if (tag == BLK_MQ_TAG_FAIL) {
330 blk_queue_exit(q);
331 return NULL;
275 } 332 }
276 333
277 return NULL; 334 rq = blk_mq_rq_ctx_init(data, tag, op);
335 if (!op_is_flush(op)) {
336 rq->elv.icq = NULL;
337 if (e && e->type->ops.mq.prepare_request) {
338 if (e->type->icq_cache && rq_ioc(bio))
339 blk_mq_sched_assign_ioc(rq, bio);
340
341 e->type->ops.mq.prepare_request(rq, bio);
342 rq->rq_flags |= RQF_ELVPRIV;
343 }
344 }
345 data->hctx->queued++;
346 return rq;
278} 347}
279EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
280 348
281struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 349struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
282 unsigned int flags) 350 unsigned int flags)
283{ 351{
284 struct blk_mq_alloc_data alloc_data = { .flags = flags }; 352 struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -289,7 +357,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
289 if (ret) 357 if (ret)
290 return ERR_PTR(ret); 358 return ERR_PTR(ret);
291 359
292 rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); 360 rq = blk_mq_get_request(q, NULL, op, &alloc_data);
293 361
294 blk_mq_put_ctx(alloc_data.ctx); 362 blk_mq_put_ctx(alloc_data.ctx);
295 blk_queue_exit(q); 363 blk_queue_exit(q);
@@ -304,8 +372,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
304} 372}
305EXPORT_SYMBOL(blk_mq_alloc_request); 373EXPORT_SYMBOL(blk_mq_alloc_request);
306 374
307struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, 375struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
308 unsigned int flags, unsigned int hctx_idx) 376 unsigned int op, unsigned int flags, unsigned int hctx_idx)
309{ 377{
310 struct blk_mq_alloc_data alloc_data = { .flags = flags }; 378 struct blk_mq_alloc_data alloc_data = { .flags = flags };
311 struct request *rq; 379 struct request *rq;
@@ -340,7 +408,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
340 cpu = cpumask_first(alloc_data.hctx->cpumask); 408 cpu = cpumask_first(alloc_data.hctx->cpumask);
341 alloc_data.ctx = __blk_mq_get_ctx(q, cpu); 409 alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
342 410
343 rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); 411 rq = blk_mq_get_request(q, NULL, op, &alloc_data);
344 412
345 blk_queue_exit(q); 413 blk_queue_exit(q);
346 414
@@ -351,17 +419,28 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
351} 419}
352EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); 420EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
353 421
354void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 422void blk_mq_free_request(struct request *rq)
355 struct request *rq)
356{ 423{
357 const int sched_tag = rq->internal_tag;
358 struct request_queue *q = rq->q; 424 struct request_queue *q = rq->q;
425 struct elevator_queue *e = q->elevator;
426 struct blk_mq_ctx *ctx = rq->mq_ctx;
427 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
428 const int sched_tag = rq->internal_tag;
359 429
430 if (rq->rq_flags & RQF_ELVPRIV) {
431 if (e && e->type->ops.mq.finish_request)
432 e->type->ops.mq.finish_request(rq);
433 if (rq->elv.icq) {
434 put_io_context(rq->elv.icq->ioc);
435 rq->elv.icq = NULL;
436 }
437 }
438
439 ctx->rq_completed[rq_is_sync(rq)]++;
360 if (rq->rq_flags & RQF_MQ_INFLIGHT) 440 if (rq->rq_flags & RQF_MQ_INFLIGHT)
361 atomic_dec(&hctx->nr_active); 441 atomic_dec(&hctx->nr_active);
362 442
363 wbt_done(q->rq_wb, &rq->issue_stat); 443 wbt_done(q->rq_wb, &rq->issue_stat);
364 rq->rq_flags = 0;
365 444
366 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 445 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
367 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); 446 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -372,29 +451,9 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
372 blk_mq_sched_restart(hctx); 451 blk_mq_sched_restart(hctx);
373 blk_queue_exit(q); 452 blk_queue_exit(q);
374} 453}
375
376static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
377 struct request *rq)
378{
379 struct blk_mq_ctx *ctx = rq->mq_ctx;
380
381 ctx->rq_completed[rq_is_sync(rq)]++;
382 __blk_mq_finish_request(hctx, ctx, rq);
383}
384
385void blk_mq_finish_request(struct request *rq)
386{
387 blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
388}
389EXPORT_SYMBOL_GPL(blk_mq_finish_request);
390
391void blk_mq_free_request(struct request *rq)
392{
393 blk_mq_sched_put_request(rq);
394}
395EXPORT_SYMBOL_GPL(blk_mq_free_request); 454EXPORT_SYMBOL_GPL(blk_mq_free_request);
396 455
397inline void __blk_mq_end_request(struct request *rq, int error) 456inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
398{ 457{
399 blk_account_io_done(rq); 458 blk_account_io_done(rq);
400 459
@@ -409,7 +468,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
409} 468}
410EXPORT_SYMBOL(__blk_mq_end_request); 469EXPORT_SYMBOL(__blk_mq_end_request);
411 470
412void blk_mq_end_request(struct request *rq, int error) 471void blk_mq_end_request(struct request *rq, blk_status_t error)
413{ 472{
414 if (blk_update_request(rq, error, blk_rq_bytes(rq))) 473 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
415 BUG(); 474 BUG();
@@ -753,50 +812,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
753 blk_queue_exit(q); 812 blk_queue_exit(q);
754} 813}
755 814
756/*
757 * Reverse check our software queue for entries that we could potentially
758 * merge with. Currently includes a hand-wavy stop count of 8, to not spend
759 * too much time checking for merges.
760 */
761static bool blk_mq_attempt_merge(struct request_queue *q,
762 struct blk_mq_ctx *ctx, struct bio *bio)
763{
764 struct request *rq;
765 int checked = 8;
766
767 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
768 bool merged = false;
769
770 if (!checked--)
771 break;
772
773 if (!blk_rq_merge_ok(rq, bio))
774 continue;
775
776 switch (blk_try_merge(rq, bio)) {
777 case ELEVATOR_BACK_MERGE:
778 if (blk_mq_sched_allow_merge(q, rq, bio))
779 merged = bio_attempt_back_merge(q, rq, bio);
780 break;
781 case ELEVATOR_FRONT_MERGE:
782 if (blk_mq_sched_allow_merge(q, rq, bio))
783 merged = bio_attempt_front_merge(q, rq, bio);
784 break;
785 case ELEVATOR_DISCARD_MERGE:
786 merged = bio_attempt_discard_merge(q, rq, bio);
787 break;
788 default:
789 continue;
790 }
791
792 if (merged)
793 ctx->rq_merged++;
794 return merged;
795 }
796
797 return false;
798}
799
800struct flush_busy_ctx_data { 815struct flush_busy_ctx_data {
801 struct blk_mq_hw_ctx *hctx; 816 struct blk_mq_hw_ctx *hctx;
802 struct list_head *list; 817 struct list_head *list;
@@ -968,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
968{ 983{
969 struct blk_mq_hw_ctx *hctx; 984 struct blk_mq_hw_ctx *hctx;
970 struct request *rq; 985 struct request *rq;
971 int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; 986 int errors, queued;
972 987
973 if (list_empty(list)) 988 if (list_empty(list))
974 return false; 989 return false;
@@ -979,6 +994,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
979 errors = queued = 0; 994 errors = queued = 0;
980 do { 995 do {
981 struct blk_mq_queue_data bd; 996 struct blk_mq_queue_data bd;
997 blk_status_t ret;
982 998
983 rq = list_first_entry(list, struct request, queuelist); 999 rq = list_first_entry(list, struct request, queuelist);
984 if (!blk_mq_get_driver_tag(rq, &hctx, false)) { 1000 if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1019,25 +1035,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
1019 } 1035 }
1020 1036
1021 ret = q->mq_ops->queue_rq(hctx, &bd); 1037 ret = q->mq_ops->queue_rq(hctx, &bd);
1022 switch (ret) { 1038 if (ret == BLK_STS_RESOURCE) {
1023 case BLK_MQ_RQ_QUEUE_OK:
1024 queued++;
1025 break;
1026 case BLK_MQ_RQ_QUEUE_BUSY:
1027 blk_mq_put_driver_tag_hctx(hctx, rq); 1039 blk_mq_put_driver_tag_hctx(hctx, rq);
1028 list_add(&rq->queuelist, list); 1040 list_add(&rq->queuelist, list);
1029 __blk_mq_requeue_request(rq); 1041 __blk_mq_requeue_request(rq);
1030 break; 1042 break;
1031 default: 1043 }
1032 pr_err("blk-mq: bad return on queue: %d\n", ret); 1044
1033 case BLK_MQ_RQ_QUEUE_ERROR: 1045 if (unlikely(ret != BLK_STS_OK)) {
1034 errors++; 1046 errors++;
1035 blk_mq_end_request(rq, -EIO); 1047 blk_mq_end_request(rq, BLK_STS_IOERR);
1036 break; 1048 continue;
1037 } 1049 }
1038 1050
1039 if (ret == BLK_MQ_RQ_QUEUE_BUSY) 1051 queued++;
1040 break;
1041 } while (!list_empty(list)); 1052 } while (!list_empty(list));
1042 1053
1043 hctx->dispatched[queued_to_index(queued)]++; 1054 hctx->dispatched[queued_to_index(queued)]++;
@@ -1075,7 +1086,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
1075 * - blk_mq_run_hw_queue() checks whether or not a queue has 1086 * - blk_mq_run_hw_queue() checks whether or not a queue has
1076 * been stopped before rerunning a queue. 1087 * been stopped before rerunning a queue.
1077 * - Some but not all block drivers stop a queue before 1088 * - Some but not all block drivers stop a queue before
1078 * returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq 1089 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
1079 * and dm-rq. 1090 * and dm-rq.
1080 */ 1091 */
1081 if (!blk_mq_sched_needs_restart(hctx) && 1092 if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1100,9 +1111,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
1100 } else { 1111 } else {
1101 might_sleep(); 1112 might_sleep();
1102 1113
1103 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); 1114 srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
1104 blk_mq_sched_dispatch_requests(hctx); 1115 blk_mq_sched_dispatch_requests(hctx);
1105 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); 1116 srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
1106 } 1117 }
1107} 1118}
1108 1119
@@ -1134,8 +1145,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
1134static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, 1145static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
1135 unsigned long msecs) 1146 unsigned long msecs)
1136{ 1147{
1137 if (unlikely(blk_mq_hctx_stopped(hctx) || 1148 if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
1138 !blk_mq_hw_queue_mapped(hctx))) 1149 return;
1150
1151 if (unlikely(blk_mq_hctx_stopped(hctx)))
1139 return; 1152 return;
1140 1153
1141 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { 1154 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1201,34 +1214,39 @@ bool blk_mq_queue_stopped(struct request_queue *q)
1201} 1214}
1202EXPORT_SYMBOL(blk_mq_queue_stopped); 1215EXPORT_SYMBOL(blk_mq_queue_stopped);
1203 1216
1204static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync) 1217/*
1218 * This function is often used for pausing .queue_rq() by driver when
1219 * there isn't enough resource or some conditions aren't satisfied, and
1220 * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
1221 *
1222 * We do not guarantee that dispatch can be drained or blocked
1223 * after blk_mq_stop_hw_queue() returns. Please use
1224 * blk_mq_quiesce_queue() for that requirement.
1225 */
1226void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
1205{ 1227{
1206 if (sync) 1228 cancel_delayed_work(&hctx->run_work);
1207 cancel_delayed_work_sync(&hctx->run_work);
1208 else
1209 cancel_delayed_work(&hctx->run_work);
1210 1229
1211 set_bit(BLK_MQ_S_STOPPED, &hctx->state); 1230 set_bit(BLK_MQ_S_STOPPED, &hctx->state);
1212} 1231}
1213
1214void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
1215{
1216 __blk_mq_stop_hw_queue(hctx, false);
1217}
1218EXPORT_SYMBOL(blk_mq_stop_hw_queue); 1232EXPORT_SYMBOL(blk_mq_stop_hw_queue);
1219 1233
1220static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) 1234/*
1235 * This function is often used for pausing .queue_rq() by driver when
1236 * there isn't enough resource or some conditions aren't satisfied, and
1237 * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
1238 *
1239 * We do not guarantee that dispatch can be drained or blocked
1240 * after blk_mq_stop_hw_queues() returns. Please use
1241 * blk_mq_quiesce_queue() for that requirement.
1242 */
1243void blk_mq_stop_hw_queues(struct request_queue *q)
1221{ 1244{
1222 struct blk_mq_hw_ctx *hctx; 1245 struct blk_mq_hw_ctx *hctx;
1223 int i; 1246 int i;
1224 1247
1225 queue_for_each_hw_ctx(q, hctx, i) 1248 queue_for_each_hw_ctx(q, hctx, i)
1226 __blk_mq_stop_hw_queue(hctx, sync); 1249 blk_mq_stop_hw_queue(hctx);
1227}
1228
1229void blk_mq_stop_hw_queues(struct request_queue *q)
1230{
1231 __blk_mq_stop_hw_queues(q, false);
1232} 1250}
1233EXPORT_SYMBOL(blk_mq_stop_hw_queues); 1251EXPORT_SYMBOL(blk_mq_stop_hw_queues);
1234 1252
@@ -1295,7 +1313,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
1295 1313
1296void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) 1314void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
1297{ 1315{
1298 if (unlikely(!blk_mq_hw_queue_mapped(hctx))) 1316 if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
1299 return; 1317 return;
1300 1318
1301 /* 1319 /*
@@ -1317,6 +1335,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
1317{ 1335{
1318 struct blk_mq_ctx *ctx = rq->mq_ctx; 1336 struct blk_mq_ctx *ctx = rq->mq_ctx;
1319 1337
1338 lockdep_assert_held(&ctx->lock);
1339
1320 trace_block_rq_insert(hctx->queue, rq); 1340 trace_block_rq_insert(hctx->queue, rq);
1321 1341
1322 if (at_head) 1342 if (at_head)
@@ -1330,6 +1350,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
1330{ 1350{
1331 struct blk_mq_ctx *ctx = rq->mq_ctx; 1351 struct blk_mq_ctx *ctx = rq->mq_ctx;
1332 1352
1353 lockdep_assert_held(&ctx->lock);
1354
1333 __blk_mq_insert_req_list(hctx, rq, at_head); 1355 __blk_mq_insert_req_list(hctx, rq, at_head);
1334 blk_mq_hctx_mark_pending(hctx, ctx); 1356 blk_mq_hctx_mark_pending(hctx, ctx);
1335} 1357}
@@ -1427,30 +1449,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
1427 !blk_queue_nomerges(hctx->queue); 1449 !blk_queue_nomerges(hctx->queue);
1428} 1450}
1429 1451
1430static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, 1452static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
1431 struct blk_mq_ctx *ctx, 1453 struct blk_mq_ctx *ctx,
1432 struct request *rq, struct bio *bio) 1454 struct request *rq)
1433{ 1455{
1434 if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) { 1456 spin_lock(&ctx->lock);
1435 blk_mq_bio_to_request(rq, bio); 1457 __blk_mq_insert_request(hctx, rq, false);
1436 spin_lock(&ctx->lock); 1458 spin_unlock(&ctx->lock);
1437insert_rq:
1438 __blk_mq_insert_request(hctx, rq, false);
1439 spin_unlock(&ctx->lock);
1440 return false;
1441 } else {
1442 struct request_queue *q = hctx->queue;
1443
1444 spin_lock(&ctx->lock);
1445 if (!blk_mq_attempt_merge(q, ctx, bio)) {
1446 blk_mq_bio_to_request(rq, bio);
1447 goto insert_rq;
1448 }
1449
1450 spin_unlock(&ctx->lock);
1451 __blk_mq_finish_request(hctx, ctx, rq);
1452 return true;
1453 }
1454} 1459}
1455 1460
1456static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) 1461static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1471,10 +1476,11 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
1471 .last = true, 1476 .last = true,
1472 }; 1477 };
1473 blk_qc_t new_cookie; 1478 blk_qc_t new_cookie;
1474 int ret; 1479 blk_status_t ret;
1475 bool run_queue = true; 1480 bool run_queue = true;
1476 1481
1477 if (blk_mq_hctx_stopped(hctx)) { 1482 /* RCU or SRCU read lock is needed before checking quiesced flag */
1483 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
1478 run_queue = false; 1484 run_queue = false;
1479 goto insert; 1485 goto insert;
1480 } 1486 }
@@ -1493,18 +1499,19 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
1493 * would have done 1499 * would have done
1494 */ 1500 */
1495 ret = q->mq_ops->queue_rq(hctx, &bd); 1501 ret = q->mq_ops->queue_rq(hctx, &bd);
1496 if (ret == BLK_MQ_RQ_QUEUE_OK) { 1502 switch (ret) {
1503 case BLK_STS_OK:
1497 *cookie = new_cookie; 1504 *cookie = new_cookie;
1498 return; 1505 return;
1499 } 1506 case BLK_STS_RESOURCE:
1500 1507 __blk_mq_requeue_request(rq);
1501 if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1508 goto insert;
1509 default:
1502 *cookie = BLK_QC_T_NONE; 1510 *cookie = BLK_QC_T_NONE;
1503 blk_mq_end_request(rq, -EIO); 1511 blk_mq_end_request(rq, ret);
1504 return; 1512 return;
1505 } 1513 }
1506 1514
1507 __blk_mq_requeue_request(rq);
1508insert: 1515insert:
1509 blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep); 1516 blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
1510} 1517}
@@ -1521,9 +1528,9 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
1521 1528
1522 might_sleep(); 1529 might_sleep();
1523 1530
1524 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); 1531 srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
1525 __blk_mq_try_issue_directly(hctx, rq, cookie, true); 1532 __blk_mq_try_issue_directly(hctx, rq, cookie, true);
1526 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); 1533 srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
1527 } 1534 }
1528} 1535}
1529 1536
@@ -1541,7 +1548,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1541 1548
1542 blk_queue_bounce(q, &bio); 1549 blk_queue_bounce(q, &bio);
1543 1550
1544 blk_queue_split(q, &bio, q->bio_split); 1551 blk_queue_split(q, &bio);
1545 1552
1546 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1553 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1547 bio_io_error(bio); 1554 bio_io_error(bio);
@@ -1559,9 +1566,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1559 1566
1560 trace_block_getrq(q, bio, bio->bi_opf); 1567 trace_block_getrq(q, bio, bio->bi_opf);
1561 1568
1562 rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data); 1569 rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
1563 if (unlikely(!rq)) { 1570 if (unlikely(!rq)) {
1564 __wbt_done(q->rq_wb, wb_acct); 1571 __wbt_done(q->rq_wb, wb_acct);
1572 if (bio->bi_opf & REQ_NOWAIT)
1573 bio_wouldblock_error(bio);
1565 return BLK_QC_T_NONE; 1574 return BLK_QC_T_NONE;
1566 } 1575 }
1567 1576
@@ -1639,11 +1648,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1639 blk_mq_put_ctx(data.ctx); 1648 blk_mq_put_ctx(data.ctx);
1640 blk_mq_bio_to_request(rq, bio); 1649 blk_mq_bio_to_request(rq, bio);
1641 blk_mq_sched_insert_request(rq, false, true, true, true); 1650 blk_mq_sched_insert_request(rq, false, true, true, true);
1642 } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1651 } else {
1643 blk_mq_put_ctx(data.ctx); 1652 blk_mq_put_ctx(data.ctx);
1653 blk_mq_bio_to_request(rq, bio);
1654 blk_mq_queue_io(data.hctx, data.ctx, rq);
1644 blk_mq_run_hw_queue(data.hctx, true); 1655 blk_mq_run_hw_queue(data.hctx, true);
1645 } else 1656 }
1646 blk_mq_put_ctx(data.ctx);
1647 1657
1648 return cookie; 1658 return cookie;
1649} 1659}
@@ -1866,7 +1876,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1866 set->ops->exit_hctx(hctx, hctx_idx); 1876 set->ops->exit_hctx(hctx, hctx_idx);
1867 1877
1868 if (hctx->flags & BLK_MQ_F_BLOCKING) 1878 if (hctx->flags & BLK_MQ_F_BLOCKING)
1869 cleanup_srcu_struct(&hctx->queue_rq_srcu); 1879 cleanup_srcu_struct(hctx->queue_rq_srcu);
1870 1880
1871 blk_mq_remove_cpuhp(hctx); 1881 blk_mq_remove_cpuhp(hctx);
1872 blk_free_flush_queue(hctx->fq); 1882 blk_free_flush_queue(hctx->fq);
@@ -1900,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
1900 spin_lock_init(&hctx->lock); 1910 spin_lock_init(&hctx->lock);
1901 INIT_LIST_HEAD(&hctx->dispatch); 1911 INIT_LIST_HEAD(&hctx->dispatch);
1902 hctx->queue = q; 1912 hctx->queue = q;
1903 hctx->queue_num = hctx_idx;
1904 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; 1913 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
1905 1914
1906 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); 1915 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
@@ -1939,7 +1948,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
1939 goto free_fq; 1948 goto free_fq;
1940 1949
1941 if (hctx->flags & BLK_MQ_F_BLOCKING) 1950 if (hctx->flags & BLK_MQ_F_BLOCKING)
1942 init_srcu_struct(&hctx->queue_rq_srcu); 1951 init_srcu_struct(hctx->queue_rq_srcu);
1943 1952
1944 blk_mq_debugfs_register_hctx(q, hctx); 1953 blk_mq_debugfs_register_hctx(q, hctx);
1945 1954
@@ -2224,6 +2233,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
2224} 2233}
2225EXPORT_SYMBOL(blk_mq_init_queue); 2234EXPORT_SYMBOL(blk_mq_init_queue);
2226 2235
2236static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
2237{
2238 int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
2239
2240 BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
2241 __alignof__(struct blk_mq_hw_ctx)) !=
2242 sizeof(struct blk_mq_hw_ctx));
2243
2244 if (tag_set->flags & BLK_MQ_F_BLOCKING)
2245 hw_ctx_size += sizeof(struct srcu_struct);
2246
2247 return hw_ctx_size;
2248}
2249
2227static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, 2250static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2228 struct request_queue *q) 2251 struct request_queue *q)
2229{ 2252{
@@ -2238,7 +2261,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2238 continue; 2261 continue;
2239 2262
2240 node = blk_mq_hw_queue_to_node(q->mq_map, i); 2263 node = blk_mq_hw_queue_to_node(q->mq_map, i);
2241 hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx), 2264 hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
2242 GFP_KERNEL, node); 2265 GFP_KERNEL, node);
2243 if (!hctxs[i]) 2266 if (!hctxs[i])
2244 break; 2267 break;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..1a06fdf9fd4d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,17 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
128 return data->hctx->tags; 128 return data->hctx->tags;
129} 129}
130 130
131/*
132 * Internal helpers for request allocation/init/free
133 */
134void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
135 struct request *rq, unsigned int op);
136void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
137 struct request *rq);
138void blk_mq_finish_request(struct request *rq);
139struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
140 unsigned int op);
141
142static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 131static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
143{ 132{
144 return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 133 return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4fa81ed383ca..be1f115b538b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
172 q->nr_batching = BLK_BATCH_REQ; 172 q->nr_batching = BLK_BATCH_REQ;
173 173
174 blk_set_default_limits(&q->limits); 174 blk_set_default_limits(&q->limits);
175
176 /*
177 * by default assume old behaviour and bounce for any highmem page
178 */
179 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
180} 175}
181EXPORT_SYMBOL(blk_queue_make_request); 176EXPORT_SYMBOL(blk_queue_make_request);
182 177
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 07cc329fa4b0..2290f65b9d73 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
258 * all transfers have been done for a request. It's important to call 258 * all transfers have been done for a request. It's important to call
259 * this function before end_that_request_last(), as that will put the 259 * this function before end_that_request_last(), as that will put the
260 * request back on the free list thus corrupting the internal tag list. 260 * request back on the free list thus corrupting the internal tag list.
261 *
262 * Notes:
263 * queue lock must be held.
264 **/ 261 **/
265void blk_queue_end_tag(struct request_queue *q, struct request *rq) 262void blk_queue_end_tag(struct request_queue *q, struct request *rq)
266{ 263{
267 struct blk_queue_tag *bqt = q->queue_tags; 264 struct blk_queue_tag *bqt = q->queue_tags;
268 unsigned tag = rq->tag; /* negative tags invalid */ 265 unsigned tag = rq->tag; /* negative tags invalid */
269 266
267 lockdep_assert_held(q->queue_lock);
268
270 BUG_ON(tag >= bqt->real_max_depth); 269 BUG_ON(tag >= bqt->real_max_depth);
271 270
272 list_del_init(&rq->queuelist); 271 list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
307 * calling this function. The request will also be removed from 306 * calling this function. The request will also be removed from
308 * the request queue, so it's the drivers responsibility to readd 307 * the request queue, so it's the drivers responsibility to readd
309 * it if it should need to be restarted for some reason. 308 * it if it should need to be restarted for some reason.
310 *
311 * Notes:
312 * queue lock must be held.
313 **/ 309 **/
314int blk_queue_start_tag(struct request_queue *q, struct request *rq) 310int blk_queue_start_tag(struct request_queue *q, struct request *rq)
315{ 311{
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
317 unsigned max_depth; 313 unsigned max_depth;
318 int tag; 314 int tag;
319 315
316 lockdep_assert_held(q->queue_lock);
317
320 if (unlikely((rq->rq_flags & RQF_QUEUED))) { 318 if (unlikely((rq->rq_flags & RQF_QUEUED))) {
321 printk(KERN_ERR 319 printk(KERN_ERR
322 "%s: request %p for device [%s] already tagged %d", 320 "%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
389 * Hardware conditions may dictate a need to stop all pending requests. 387 * Hardware conditions may dictate a need to stop all pending requests.
390 * In this case, we will safely clear the block side of the tag queue and 388 * In this case, we will safely clear the block side of the tag queue and
391 * readd all requests to the request queue in the right order. 389 * readd all requests to the request queue in the right order.
392 *
393 * Notes:
394 * queue lock must be held.
395 **/ 390 **/
396void blk_queue_invalidate_tags(struct request_queue *q) 391void blk_queue_invalidate_tags(struct request_queue *q)
397{ 392{
398 struct list_head *tmp, *n; 393 struct list_head *tmp, *n;
399 394
395 lockdep_assert_held(q->queue_lock);
396
400 list_for_each_safe(tmp, n, &q->tag_busy_list) 397 list_for_each_safe(tmp, n, &q->tag_busy_list)
401 blk_requeue_request(q, list_entry_rq(tmp)); 398 blk_requeue_request(q, list_entry_rq(tmp));
402} 399}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index cbff183f3d9f..17ec83bb0900 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
189 * Notes: 189 * Notes:
190 * Each request has its own timer, and as it is added to the queue, we 190 * Each request has its own timer, and as it is added to the queue, we
191 * set up the timer. When the request completes, we cancel the timer. 191 * set up the timer. When the request completes, we cancel the timer.
192 * Queue lock must be held for the non-mq case, mq case doesn't care.
193 */ 192 */
194void blk_add_timer(struct request *req) 193void blk_add_timer(struct request *req)
195{ 194{
196 struct request_queue *q = req->q; 195 struct request_queue *q = req->q;
197 unsigned long expiry; 196 unsigned long expiry;
198 197
198 if (!q->mq_ops)
199 lockdep_assert_held(q->queue_lock);
200
199 /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ 201 /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
200 if (!q->mq_ops && !q->rq_timed_out_fn) 202 if (!q->mq_ops && !q->rq_timed_out_fn)
201 return; 203 return;
diff --git a/block/blk.h b/block/blk.h
index 83c8e1100525..01ebb8185f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
143 struct request *rq; 143 struct request *rq;
144 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); 144 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
145 145
146 WARN_ON_ONCE(q->mq_ops);
147
146 while (1) { 148 while (1) {
147 if (!list_empty(&q->queue_head)) { 149 if (!list_empty(&q->queue_head)) {
148 rq = list_entry_rq(q->queue_head.next); 150 rq = list_entry_rq(q->queue_head.next);
@@ -334,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
334static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } 336static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
335#endif 337#endif
336 338
339#ifdef CONFIG_BOUNCE
340extern int init_emergency_isa_pool(void);
341extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
342#else
343static inline int init_emergency_isa_pool(void)
344{
345 return 0;
346}
347static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
348{
349}
350#endif /* CONFIG_BOUNCE */
351
337#endif /* BLK_INTERNAL_H */ 352#endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3a5da1..5793c2dc1a15 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -22,10 +22,12 @@
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23 23
24#include <trace/events/block.h> 24#include <trace/events/block.h>
25#include "blk.h"
25 26
26#define POOL_SIZE 64 27#define POOL_SIZE 64
27#define ISA_POOL_SIZE 16 28#define ISA_POOL_SIZE 16
28 29
30static struct bio_set *bounce_bio_set, *bounce_bio_split;
29static mempool_t *page_pool, *isa_page_pool; 31static mempool_t *page_pool, *isa_page_pool;
30 32
31#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL) 33#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +42,14 @@ static __init int init_emergency_pool(void)
40 BUG_ON(!page_pool); 42 BUG_ON(!page_pool);
41 pr_info("pool size: %d pages\n", POOL_SIZE); 43 pr_info("pool size: %d pages\n", POOL_SIZE);
42 44
45 bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
46 BUG_ON(!bounce_bio_set);
47 if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
48 BUG_ON(1);
49
50 bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
51 BUG_ON(!bounce_bio_split);
52
43 return 0; 53 return 0;
44} 54}
45 55
@@ -143,7 +153,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
143 mempool_free(bvec->bv_page, pool); 153 mempool_free(bvec->bv_page, pool);
144 } 154 }
145 155
146 bio_orig->bi_error = bio->bi_error; 156 bio_orig->bi_status = bio->bi_status;
147 bio_endio(bio_orig); 157 bio_endio(bio_orig);
148 bio_put(bio); 158 bio_put(bio);
149} 159}
@@ -163,7 +173,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
163{ 173{
164 struct bio *bio_orig = bio->bi_private; 174 struct bio *bio_orig = bio->bi_private;
165 175
166 if (!bio->bi_error) 176 if (!bio->bi_status)
167 copy_to_high_bio_irq(bio_orig, bio); 177 copy_to_high_bio_irq(bio_orig, bio);
168 178
169 bounce_end_io(bio, pool); 179 bounce_end_io(bio, pool);
@@ -186,20 +196,31 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
186 int rw = bio_data_dir(*bio_orig); 196 int rw = bio_data_dir(*bio_orig);
187 struct bio_vec *to, from; 197 struct bio_vec *to, from;
188 struct bvec_iter iter; 198 struct bvec_iter iter;
189 unsigned i; 199 unsigned i = 0;
190 200 bool bounce = false;
191 bio_for_each_segment(from, *bio_orig, iter) 201 int sectors = 0;
192 if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q)) 202
193 goto bounce; 203 bio_for_each_segment(from, *bio_orig, iter) {
204 if (i++ < BIO_MAX_PAGES)
205 sectors += from.bv_len >> 9;
206 if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
207 bounce = true;
208 }
209 if (!bounce)
210 return;
194 211
195 return; 212 if (sectors < bio_sectors(*bio_orig)) {
196bounce: 213 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
197 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set); 214 bio_chain(bio, *bio_orig);
215 generic_make_request(*bio_orig);
216 *bio_orig = bio;
217 }
218 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
198 219
199 bio_for_each_segment_all(to, bio, i) { 220 bio_for_each_segment_all(to, bio, i) {
200 struct page *page = to->bv_page; 221 struct page *page = to->bv_page;
201 222
202 if (page_to_pfn(page) <= queue_bounce_pfn(q)) 223 if (page_to_pfn(page) <= q->limits.bounce_pfn)
203 continue; 224 continue;
204 225
205 to->bv_page = mempool_alloc(pool, q->bounce_gfp); 226 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -251,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
251 * don't waste time iterating over bio segments 272 * don't waste time iterating over bio segments
252 */ 273 */
253 if (!(q->bounce_gfp & GFP_DMA)) { 274 if (!(q->bounce_gfp & GFP_DMA)) {
254 if (queue_bounce_pfn(q) >= blk_max_pfn) 275 if (q->limits.bounce_pfn >= blk_max_pfn)
255 return; 276 return;
256 pool = page_pool; 277 pool = page_pool;
257 } else { 278 } else {
@@ -264,5 +285,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
264 */ 285 */
265 __blk_queue_bounce(q, bio_orig, pool); 286 __blk_queue_bounce(q, bio_orig, pool);
266} 287}
267
268EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbba2d30..c4513b23f57a 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
37 struct bsg_job *job = container_of(kref, struct bsg_job, kref); 37 struct bsg_job *job = container_of(kref, struct bsg_job, kref);
38 struct request *rq = job->req; 38 struct request *rq = job->req;
39 39
40 blk_end_request_all(rq, scsi_req(rq)->result); 40 blk_end_request_all(rq, BLK_STS_OK);
41 41
42 put_device(job->dev); /* release reference for the request */ 42 put_device(job->dev); /* release reference for the request */
43 43
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
202 ret = bsg_create_job(dev, req); 202 ret = bsg_create_job(dev, req);
203 if (ret) { 203 if (ret) {
204 scsi_req(req)->result = ret; 204 scsi_req(req)->result = ret;
205 blk_end_request_all(req, ret); 205 blk_end_request_all(req, BLK_STS_OK);
206 spin_lock_irq(q->queue_lock); 206 spin_lock_irq(q->queue_lock);
207 continue; 207 continue;
208 } 208 }
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
246 q->bsg_job_size = dd_job_size; 246 q->bsg_job_size = dd_job_size;
247 q->bsg_job_fn = job_fn; 247 q->bsg_job_fn = job_fn;
248 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 248 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
249 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
249 blk_queue_softirq_done(q, bsg_softirq_done); 250 blk_queue_softirq_done(q, bsg_softirq_done);
250 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); 251 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
251 252
diff --git a/block/bsg.c b/block/bsg.c
index 6fd08544d77e..37663b664666 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
236 rq = blk_get_request(q, op, GFP_KERNEL); 236 rq = blk_get_request(q, op, GFP_KERNEL);
237 if (IS_ERR(rq)) 237 if (IS_ERR(rq))
238 return rq; 238 return rq;
239 scsi_req_init(rq);
240 239
241 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); 240 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
242 if (ret) 241 if (ret)
@@ -294,14 +293,14 @@ out:
294 * async completion call-back from the block layer, when scsi/ide/whatever 293 * async completion call-back from the block layer, when scsi/ide/whatever
295 * calls end_that_request_last() on a request 294 * calls end_that_request_last() on a request
296 */ 295 */
297static void bsg_rq_end_io(struct request *rq, int uptodate) 296static void bsg_rq_end_io(struct request *rq, blk_status_t status)
298{ 297{
299 struct bsg_command *bc = rq->end_io_data; 298 struct bsg_command *bc = rq->end_io_data;
300 struct bsg_device *bd = bc->bd; 299 struct bsg_device *bd = bc->bd;
301 unsigned long flags; 300 unsigned long flags;
302 301
303 dprintk("%s: finished rq %p bc %p, bio %p stat %d\n", 302 dprintk("%s: finished rq %p bc %p, bio %p\n",
304 bd->name, rq, bc, bc->bio, uptodate); 303 bd->name, rq, bc, bc->bio);
305 304
306 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); 305 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
307 306
@@ -750,6 +749,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
750#ifdef BSG_DEBUG 749#ifdef BSG_DEBUG
751 unsigned char buf[32]; 750 unsigned char buf[32];
752#endif 751#endif
752
753 if (!blk_queue_scsi_passthrough(rq)) {
754 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
755 return ERR_PTR(-EINVAL);
756 }
757
753 if (!blk_get_queue(rq)) 758 if (!blk_get_queue(rq))
754 return ERR_PTR(-ENXIO); 759 return ERR_PTR(-ENXIO);
755 760
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b7e9c7feeab2..3d5c28945719 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -982,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
982 return min_vdisktime; 982 return min_vdisktime;
983} 983}
984 984
985static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
986{
987 s64 delta = (s64)(vdisktime - min_vdisktime);
988 if (delta < 0)
989 min_vdisktime = vdisktime;
990
991 return min_vdisktime;
992}
993
994static void update_min_vdisktime(struct cfq_rb_root *st) 985static void update_min_vdisktime(struct cfq_rb_root *st)
995{ 986{
996 struct cfq_group *cfqg; 987 struct cfq_group *cfqg;
diff --git a/block/elevator.c b/block/elevator.c
index dac99fbfc273..4bb2f0c93fa6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
681 */ 681 */
682 if (elv_attempt_insert_merge(q, rq)) 682 if (elv_attempt_insert_merge(q, rq))
683 break; 683 break;
684 /* fall through */
684 case ELEVATOR_INSERT_SORT: 685 case ELEVATOR_INSERT_SORT:
685 BUG_ON(blk_rq_is_passthrough(rq)); 686 BUG_ON(blk_rq_is_passthrough(rq));
686 rq->rq_flags |= RQF_SORTED; 687 rq->rq_flags |= RQF_SORTED;
diff --git a/block/genhd.c b/block/genhd.c
index d252d29fe837..7f520fa25d16 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -36,7 +36,7 @@ struct kobject *block_depr;
36static DEFINE_SPINLOCK(ext_devt_lock); 36static DEFINE_SPINLOCK(ext_devt_lock);
37static DEFINE_IDR(ext_devt_idr); 37static DEFINE_IDR(ext_devt_idr);
38 38
39static struct device_type disk_type; 39static const struct device_type disk_type;
40 40
41static void disk_check_events(struct disk_events *ev, 41static void disk_check_events(struct disk_events *ev,
42 unsigned int *clearing_ptr); 42 unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
1183 return NULL; 1183 return NULL;
1184} 1184}
1185 1185
1186static struct device_type disk_type = { 1186static const struct device_type disk_type = {
1187 .name = "disk", 1187 .name = "disk",
1188 .groups = disk_attr_groups, 1188 .groups = disk_attr_groups,
1189 .release = disk_release, 1189 .release = disk_release,
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b120c9cf7e8..6f5d0b6625e3 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
75 case IOPRIO_CLASS_RT: 75 case IOPRIO_CLASS_RT:
76 if (!capable(CAP_SYS_ADMIN)) 76 if (!capable(CAP_SYS_ADMIN))
77 return -EPERM; 77 return -EPERM;
78 /* fall through, rt has prio field too */ 78 /* fall through */
79 /* rt has prio field too */
79 case IOPRIO_CLASS_BE: 80 case IOPRIO_CLASS_BE:
80 if (data >= IOPRIO_BE_NR || data < 0) 81 if (data >= IOPRIO_BE_NR || data < 0)
81 return -EINVAL; 82 return -EINVAL;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..a9f6fd3fab8e 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -426,33 +426,29 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
426 } 426 }
427} 427}
428 428
429static struct request *kyber_get_request(struct request_queue *q, 429static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
430 unsigned int op,
431 struct blk_mq_alloc_data *data)
432{ 430{
433 struct kyber_queue_data *kqd = q->elevator->elevator_data;
434 struct request *rq;
435
436 /* 431 /*
437 * We use the scheduler tags as per-hardware queue queueing tokens. 432 * We use the scheduler tags as per-hardware queue queueing tokens.
438 * Async requests can be limited at this stage. 433 * Async requests can be limited at this stage.
439 */ 434 */
440 if (!op_is_sync(op)) 435 if (!op_is_sync(op)) {
436 struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
437
441 data->shallow_depth = kqd->async_depth; 438 data->shallow_depth = kqd->async_depth;
439 }
440}
442 441
443 rq = __blk_mq_alloc_request(data, op); 442static void kyber_prepare_request(struct request *rq, struct bio *bio)
444 if (rq) 443{
445 rq_set_domain_token(rq, -1); 444 rq_set_domain_token(rq, -1);
446 return rq;
447} 445}
448 446
449static void kyber_put_request(struct request *rq) 447static void kyber_finish_request(struct request *rq)
450{ 448{
451 struct request_queue *q = rq->q; 449 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
452 struct kyber_queue_data *kqd = q->elevator->elevator_data;
453 450
454 rq_clear_domain_token(kqd, rq); 451 rq_clear_domain_token(kqd, rq);
455 blk_mq_finish_request(rq);
456} 452}
457 453
458static void kyber_completed_request(struct request *rq) 454static void kyber_completed_request(struct request *rq)
@@ -815,8 +811,9 @@ static struct elevator_type kyber_sched = {
815 .exit_sched = kyber_exit_sched, 811 .exit_sched = kyber_exit_sched,
816 .init_hctx = kyber_init_hctx, 812 .init_hctx = kyber_init_hctx,
817 .exit_hctx = kyber_exit_hctx, 813 .exit_hctx = kyber_exit_hctx,
818 .get_request = kyber_get_request, 814 .limit_depth = kyber_limit_depth,
819 .put_request = kyber_put_request, 815 .prepare_request = kyber_prepare_request,
816 .finish_request = kyber_finish_request,
820 .completed_request = kyber_completed_request, 817 .completed_request = kyber_completed_request,
821 .dispatch_request = kyber_dispatch_request, 818 .dispatch_request = kyber_dispatch_request,
822 .has_work = kyber_has_work, 819 .has_work = kyber_has_work,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4a294a5f7fab..7440de44dd85 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
326 if (IS_ERR(rq)) 326 if (IS_ERR(rq))
327 return PTR_ERR(rq); 327 return PTR_ERR(rq);
328 req = scsi_req(rq); 328 req = scsi_req(rq);
329 scsi_req_init(rq);
330 329
331 if (hdr->cmd_len > BLK_MAX_CDB) { 330 if (hdr->cmd_len > BLK_MAX_CDB) {
332 req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); 331 req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
456 goto error_free_buffer; 455 goto error_free_buffer;
457 } 456 }
458 req = scsi_req(rq); 457 req = scsi_req(rq);
459 scsi_req_init(rq);
460 458
461 cmdlen = COMMAND_SIZE(opcode); 459 cmdlen = COMMAND_SIZE(opcode);
462 460
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
542 rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM); 540 rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
543 if (IS_ERR(rq)) 541 if (IS_ERR(rq))
544 return PTR_ERR(rq); 542 return PTR_ERR(rq);
545 scsi_req_init(rq);
546 rq->timeout = BLK_DEFAULT_SG_TIMEOUT; 543 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
547 scsi_req(rq)->cmd[0] = cmd; 544 scsi_req(rq)->cmd[0] = cmd;
548 scsi_req(rq)->cmd[4] = data; 545 scsi_req(rq)->cmd[4] = data;
@@ -744,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
744} 741}
745EXPORT_SYMBOL(scsi_cmd_blk_ioctl); 742EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
746 743
747void scsi_req_init(struct request *rq) 744/**
745 * scsi_req_init - initialize certain fields of a scsi_request structure
746 * @req: Pointer to a scsi_request structure.
747 * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
748 * of struct scsi_request.
749 */
750void scsi_req_init(struct scsi_request *req)
748{ 751{
749 struct scsi_request *req = scsi_req(rq);
750
751 memset(req->__cmd, 0, sizeof(req->__cmd)); 752 memset(req->__cmd, 0, sizeof(req->__cmd));
752 req->cmd = req->__cmd; 753 req->cmd = req->__cmd;
753 req->cmd_len = BLK_MAX_CDB; 754 req->cmd_len = BLK_MAX_CDB;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d636298..3416dadf7b15 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
46 * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref 46 * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
47 * tag. 47 * tag.
48 */ 48 */
49static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn, 49static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
50 unsigned int type) 50 csum_fn *fn, unsigned int type)
51{ 51{
52 unsigned int i; 52 unsigned int i;
53 53
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
67 iter->seed++; 67 iter->seed++;
68 } 68 }
69 69
70 return 0; 70 return BLK_STS_OK;
71} 71}
72 72
73static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn, 73static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
74 unsigned int type) 74 csum_fn *fn, unsigned int type)
75{ 75{
76 unsigned int i; 76 unsigned int i;
77 77
@@ -91,7 +91,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
91 "(rcvd %u)\n", iter->disk_name, 91 "(rcvd %u)\n", iter->disk_name,
92 (unsigned long long) 92 (unsigned long long)
93 iter->seed, be32_to_cpu(pi->ref_tag)); 93 iter->seed, be32_to_cpu(pi->ref_tag));
94 return -EILSEQ; 94 return BLK_STS_PROTECTION;
95 } 95 }
96 break; 96 break;
97 case 3: 97 case 3:
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
108 "(rcvd %04x, want %04x)\n", iter->disk_name, 108 "(rcvd %04x, want %04x)\n", iter->disk_name,
109 (unsigned long long)iter->seed, 109 (unsigned long long)iter->seed,
110 be16_to_cpu(pi->guard_tag), be16_to_cpu(csum)); 110 be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
111 return -EILSEQ; 111 return BLK_STS_PROTECTION;
112 } 112 }
113 113
114next: 114next:
@@ -117,45 +117,45 @@ next:
117 iter->seed++; 117 iter->seed++;
118 } 118 }
119 119
120 return 0; 120 return BLK_STS_OK;
121} 121}
122 122
123static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) 123static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
124{ 124{
125 return t10_pi_generate(iter, t10_pi_crc_fn, 1); 125 return t10_pi_generate(iter, t10_pi_crc_fn, 1);
126} 126}
127 127
128static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) 128static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
129{ 129{
130 return t10_pi_generate(iter, t10_pi_ip_fn, 1); 130 return t10_pi_generate(iter, t10_pi_ip_fn, 1);
131} 131}
132 132
133static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) 133static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
134{ 134{
135 return t10_pi_verify(iter, t10_pi_crc_fn, 1); 135 return t10_pi_verify(iter, t10_pi_crc_fn, 1);
136} 136}
137 137
138static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) 138static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
139{ 139{
140 return t10_pi_verify(iter, t10_pi_ip_fn, 1); 140 return t10_pi_verify(iter, t10_pi_ip_fn, 1);
141} 141}
142 142
143static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) 143static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
144{ 144{
145 return t10_pi_generate(iter, t10_pi_crc_fn, 3); 145 return t10_pi_generate(iter, t10_pi_crc_fn, 3);
146} 146}
147 147
148static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) 148static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
149{ 149{
150 return t10_pi_generate(iter, t10_pi_ip_fn, 3); 150 return t10_pi_generate(iter, t10_pi_ip_fn, 3);
151} 151}
152 152
153static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) 153static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
154{ 154{
155 return t10_pi_verify(iter, t10_pi_crc_fn, 3); 155 return t10_pi_verify(iter, t10_pi_crc_fn, 3);
156} 156}
157 157
158static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) 158static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
159{ 159{
160 return t10_pi_verify(iter, t10_pi_ip_fn, 3); 160 return t10_pi_verify(iter, t10_pi_ip_fn, 3);
161} 161}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be77227..245a879b036e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
3464 bool SuccessfulIO) 3464 bool SuccessfulIO)
3465{ 3465{
3466 struct request *Request = Command->Request; 3466 struct request *Request = Command->Request;
3467 int Error = SuccessfulIO ? 0 : -EIO; 3467 blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
3468 3468
3469 pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist, 3469 pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
3470 Command->SegmentCount, Command->DmaDirection); 3470 Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f673adfe..49908c74bfcb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
1378 struct amiga_floppy_struct *floppy; 1378 struct amiga_floppy_struct *floppy;
1379 char *data; 1379 char *data;
1380 unsigned long flags; 1380 unsigned long flags;
1381 int err; 1381 blk_status_t err;
1382 1382
1383next_req: 1383next_req:
1384 rq = set_next_request(); 1384 rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
1392 1392
1393next_segment: 1393next_segment:
1394 /* Here someone could investigate to be more efficient */ 1394 /* Here someone could investigate to be more efficient */
1395 for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) { 1395 for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
1396#ifdef DEBUG 1396#ifdef DEBUG
1397 printk("fd: sector %ld + %d requested for %s\n", 1397 printk("fd: sector %ld + %d requested for %s\n",
1398 blk_rq_pos(rq), cnt, 1398 blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
1400#endif 1400#endif
1401 block = blk_rq_pos(rq) + cnt; 1401 block = blk_rq_pos(rq) + cnt;
1402 if ((int)block > floppy->blocks) { 1402 if ((int)block > floppy->blocks) {
1403 err = -EIO; 1403 err = BLK_STS_IOERR;
1404 break; 1404 break;
1405 } 1405 }
1406 1406
@@ -1413,7 +1413,7 @@ next_segment:
1413#endif 1413#endif
1414 1414
1415 if (get_track(drive, track) == -1) { 1415 if (get_track(drive, track) == -1) {
1416 err = -EIO; 1416 err = BLK_STS_IOERR;
1417 break; 1417 break;
1418 } 1418 }
1419 1419
@@ -1424,7 +1424,7 @@ next_segment:
1424 1424
1425 /* keep the drive spinning while writes are scheduled */ 1425 /* keep the drive spinning while writes are scheduled */
1426 if (!fd_motor_on(drive)) { 1426 if (!fd_motor_on(drive)) {
1427 err = -EIO; 1427 err = BLK_STS_IOERR;
1428 break; 1428 break;
1429 } 1429 }
1430 /* 1430 /*
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876370bc..6797e6c23c8a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
388 d->aoemajor, d->aoeminor); 388 d->aoemajor, d->aoeminor);
389 goto err_mempool; 389 goto err_mempool;
390 } 390 }
391 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
391 392
392 spin_lock_irqsave(&d->lock, flags); 393 spin_lock_irqsave(&d->lock, flags);
393 WARN_ON(!(d->flags & DEVFL_GD_NOW)); 394 WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c09fd5a..dc43254e05a4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1070 d->ip.rq = NULL; 1070 d->ip.rq = NULL;
1071 do { 1071 do {
1072 bio = rq->bio; 1072 bio = rq->bio;
1073 bok = !fastfail && !bio->bi_error; 1073 bok = !fastfail && !bio->bi_status;
1074 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size)); 1074 } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
1075 1075
1076 /* cf. http://lkml.org/lkml/2006/10/31/28 */ 1076 /* cf. http://lkml.org/lkml/2006/10/31/28 */
1077 if (!fastfail) 1077 if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
1131 ahout->cmdstat, ahin->cmdstat, 1131 ahout->cmdstat, ahin->cmdstat,
1132 d->aoemajor, d->aoeminor); 1132 d->aoemajor, d->aoeminor);
1133noskb: if (buf) 1133noskb: if (buf)
1134 buf->bio->bi_error = -EIO; 1134 buf->bio->bi_status = BLK_STS_IOERR;
1135 goto out; 1135 goto out;
1136 } 1136 }
1137 1137
@@ -1144,7 +1144,7 @@ noskb: if (buf)
1144 "aoe: runt data size in read from", 1144 "aoe: runt data size in read from",
1145 (long) d->aoemajor, d->aoeminor, 1145 (long) d->aoemajor, d->aoeminor,
1146 skb->len, n); 1146 skb->len, n);
1147 buf->bio->bi_error = -EIO; 1147 buf->bio->bi_status = BLK_STS_IOERR;
1148 break; 1148 break;
1149 } 1149 }
1150 if (n > f->iter.bi_size) { 1150 if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb: if (buf)
1152 "aoe: too-large data size in read from", 1152 "aoe: too-large data size in read from",
1153 (long) d->aoemajor, d->aoeminor, 1153 (long) d->aoemajor, d->aoeminor,
1154 n, f->iter.bi_size); 1154 n, f->iter.bi_size);
1155 buf->bio->bi_error = -EIO; 1155 buf->bio->bi_status = BLK_STS_IOERR;
1156 break; 1156 break;
1157 } 1157 }
1158 bvcpy(skb, f->buf->bio, f->iter, n); 1158 bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
1654 if (buf == NULL) 1654 if (buf == NULL)
1655 return; 1655 return;
1656 buf->iter.bi_size = 0; 1656 buf->iter.bi_size = 0;
1657 buf->bio->bi_error = -EIO; 1657 buf->bio->bi_status = BLK_STS_IOERR;
1658 if (buf->nframesout == 0) 1658 if (buf->nframesout == 0)
1659 aoe_end_buf(d, buf); 1659 aoe_end_buf(d, buf);
1660} 1660}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947500c6..b28fefb90391 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
170 if (rq == NULL) 170 if (rq == NULL)
171 return; 171 return;
172 while ((bio = d->ip.nxbio)) { 172 while ((bio = d->ip.nxbio)) {
173 bio->bi_error = -EIO; 173 bio->bi_status = BLK_STS_IOERR;
174 d->ip.nxbio = bio->bi_next; 174 d->ip.nxbio = bio->bi_next;
175 n = (unsigned long) rq->special; 175 n = (unsigned long) rq->special;
176 rq->special = (void *) --n; 176 rq->special = (void *) --n;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd52cb5..92da886180aa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
378static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0); 378static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
379static DEFINE_TIMER(fd_timer, check_change, 0, 0); 379static DEFINE_TIMER(fd_timer, check_change, 0, 0);
380 380
381static void fd_end_request_cur(int err) 381static void fd_end_request_cur(blk_status_t err)
382{ 382{
383 if (!__blk_end_request_cur(fd_request, err)) 383 if (!__blk_end_request_cur(fd_request, err))
384 fd_request = NULL; 384 fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
620 fd_request->error_count++; 620 fd_request->error_count++;
621 if (fd_request->error_count >= MAX_ERRORS) { 621 if (fd_request->error_count >= MAX_ERRORS) {
622 printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); 622 printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
623 fd_end_request_cur(-EIO); 623 fd_end_request_cur(BLK_STS_IOERR);
624 } 624 }
625 else if (fd_request->error_count == RECALIBRATE_ERRORS) { 625 else if (fd_request->error_count == RECALIBRATE_ERRORS) {
626 printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); 626 printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
739 } 739 }
740 else { 740 else {
741 /* all sectors finished */ 741 /* all sectors finished */
742 fd_end_request_cur(0); 742 fd_end_request_cur(BLK_STS_OK);
743 redo_fd_request(); 743 redo_fd_request();
744 return; 744 return;
745 } 745 }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
1144 } 1144 }
1145 else { 1145 else {
1146 /* all sectors finished */ 1146 /* all sectors finished */
1147 fd_end_request_cur(0); 1147 fd_end_request_cur(BLK_STS_OK);
1148 redo_fd_request(); 1148 redo_fd_request();
1149 } 1149 }
1150 return; 1150 return;
@@ -1445,7 +1445,7 @@ repeat:
1445 if (!UD.connected) { 1445 if (!UD.connected) {
1446 /* drive not connected */ 1446 /* drive not connected */
1447 printk(KERN_ERR "Unknown Device: fd%d\n", drive ); 1447 printk(KERN_ERR "Unknown Device: fd%d\n", drive );
1448 fd_end_request_cur(-EIO); 1448 fd_end_request_cur(BLK_STS_IOERR);
1449 goto repeat; 1449 goto repeat;
1450 } 1450 }
1451 1451
@@ -1461,12 +1461,12 @@ repeat:
1461 /* user supplied disk type */ 1461 /* user supplied disk type */
1462 if (--type >= NUM_DISK_MINORS) { 1462 if (--type >= NUM_DISK_MINORS) {
1463 printk(KERN_WARNING "fd%d: invalid disk format", drive ); 1463 printk(KERN_WARNING "fd%d: invalid disk format", drive );
1464 fd_end_request_cur(-EIO); 1464 fd_end_request_cur(BLK_STS_IOERR);
1465 goto repeat; 1465 goto repeat;
1466 } 1466 }
1467 if (minor2disktype[type].drive_types > DriveType) { 1467 if (minor2disktype[type].drive_types > DriveType) {
1468 printk(KERN_WARNING "fd%d: unsupported disk format", drive ); 1468 printk(KERN_WARNING "fd%d: unsupported disk format", drive );
1469 fd_end_request_cur(-EIO); 1469 fd_end_request_cur(BLK_STS_IOERR);
1470 goto repeat; 1470 goto repeat;
1471 } 1471 }
1472 type = minor2disktype[type].index; 1472 type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
1476 } 1476 }
1477 1477
1478 if (blk_rq_pos(fd_request) + 1 > UDT->blocks) { 1478 if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
1479 fd_end_request_cur(-EIO); 1479 fd_end_request_cur(BLK_STS_IOERR);
1480 goto repeat; 1480 goto repeat;
1481 } 1481 }
1482 1482
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f2f66a..6112e99bedf7 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
418 418
419 blk_queue_make_request(brd->brd_queue, brd_make_request); 419 blk_queue_make_request(brd->brd_queue, brd_make_request);
420 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 420 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
421 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
422 421
423 /* This is so fdisk will align partitions on 4k, because of 422 /* This is so fdisk will align partitions on 4k, because of
424 * direct_access API needing 4k alignment, returning a PFN 423 * direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd375503f7b0..02a611993bb4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
1864 /* set the residual count for pc requests */ 1864 /* set the residual count for pc requests */
1865 if (blk_rq_is_passthrough(rq)) 1865 if (blk_rq_is_passthrough(rq))
1866 scsi_req(rq)->resid_len = c->err_info->ResidualCnt; 1866 scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
1867 blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0); 1867 blk_end_request_all(rq, scsi_req(rq)->result ?
1868 BLK_STS_IOERR : BLK_STS_OK);
1868 1869
1869 spin_lock_irqsave(&h->lock, flags); 1870 spin_lock_irqsave(&h->lock, flags);
1870 cmd_free(h, c); 1871 cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1956 disk->queue->cmd_size = sizeof(struct scsi_request); 1957 disk->queue->cmd_size = sizeof(struct scsi_request);
1957 disk->queue->request_fn = do_cciss_request; 1958 disk->queue->request_fn = do_cciss_request;
1958 disk->queue->queue_lock = &h->lock; 1959 disk->queue->queue_lock = &h->lock;
1960 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
1959 if (blk_init_allocated_queue(disk->queue) < 0) 1961 if (blk_init_allocated_queue(disk->queue) < 0)
1960 goto cleanup_queue; 1962 goto cleanup_queue;
1961 1963
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa49c12..e02c45cd3c5a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
178 else 178 else
179 submit_bio(bio); 179 submit_bio(bio);
180 wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 180 wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
181 if (!bio->bi_error) 181 if (!bio->bi_status)
182 err = device->md_io.error; 182 err = device->md_io.error;
183 183
184 out: 184 out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a4107fbc..809fd245c3dc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
959 !bm_test_page_unchanged(b->bm_pages[idx])) 959 !bm_test_page_unchanged(b->bm_pages[idx]))
960 drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); 960 drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
961 961
962 if (bio->bi_error) { 962 if (bio->bi_status) {
963 /* ctx error will hold the completed-last non-zero error code, 963 /* ctx error will hold the completed-last non-zero error code,
964 * in case error codes differ. */ 964 * in case error codes differ. */
965 ctx->error = bio->bi_error; 965 ctx->error = blk_status_to_errno(bio->bi_status);
966 bm_set_page_io_err(b->bm_pages[idx]); 966 bm_set_page_io_err(b->bm_pages[idx]);
967 /* Not identical to on disk version of it. 967 /* Not identical to on disk version of it.
968 * Is BM_PAGE_IO_ERROR enough? */ 968 * Is BM_PAGE_IO_ERROR enough? */
969 if (__ratelimit(&drbd_ratelimit_state)) 969 if (__ratelimit(&drbd_ratelimit_state))
970 drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", 970 drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
971 bio->bi_error, idx); 971 bio->bi_status, idx);
972 } else { 972 } else {
973 bm_clear_page_io_err(b->bm_pages[idx]); 973 bm_clear_page_io_err(b->bm_pages[idx]);
974 dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); 974 dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..d17b6e6393c7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
1441/* to allocate from that set */ 1441/* to allocate from that set */
1442extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); 1442extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
1443 1443
1444/* And a bio_set for cloning */
1445extern struct bio_set *drbd_io_bio_set;
1446
1444extern struct mutex resources_mutex; 1447extern struct mutex resources_mutex;
1445 1448
1446extern int conn_lowest_minor(struct drbd_connection *connection); 1449extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
1627 __release(local); 1630 __release(local);
1628 if (!bio->bi_bdev) { 1631 if (!bio->bi_bdev) {
1629 drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); 1632 drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
1630 bio->bi_error = -ENODEV; 1633 bio->bi_status = BLK_STS_IOERR;
1631 bio_endio(bio); 1634 bio_endio(bio);
1632 return; 1635 return;
1633 } 1636 }
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..5fb99e06ebe4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
128mempool_t *drbd_ee_mempool; 128mempool_t *drbd_ee_mempool;
129mempool_t *drbd_md_io_page_pool; 129mempool_t *drbd_md_io_page_pool;
130struct bio_set *drbd_md_io_bio_set; 130struct bio_set *drbd_md_io_bio_set;
131struct bio_set *drbd_io_bio_set;
131 132
132/* I do not use a standard mempool, because: 133/* I do not use a standard mempool, because:
133 1) I want to hand out the pre-allocated objects first. 134 1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
2098 2099
2099 /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ 2100 /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
2100 2101
2102 if (drbd_io_bio_set)
2103 bioset_free(drbd_io_bio_set);
2101 if (drbd_md_io_bio_set) 2104 if (drbd_md_io_bio_set)
2102 bioset_free(drbd_md_io_bio_set); 2105 bioset_free(drbd_md_io_bio_set);
2103 if (drbd_md_io_page_pool) 2106 if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
2115 if (drbd_al_ext_cache) 2118 if (drbd_al_ext_cache)
2116 kmem_cache_destroy(drbd_al_ext_cache); 2119 kmem_cache_destroy(drbd_al_ext_cache);
2117 2120
2121 drbd_io_bio_set = NULL;
2118 drbd_md_io_bio_set = NULL; 2122 drbd_md_io_bio_set = NULL;
2119 drbd_md_io_page_pool = NULL; 2123 drbd_md_io_page_pool = NULL;
2120 drbd_ee_mempool = NULL; 2124 drbd_ee_mempool = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
2142 drbd_pp_pool = NULL; 2146 drbd_pp_pool = NULL;
2143 drbd_md_io_page_pool = NULL; 2147 drbd_md_io_page_pool = NULL;
2144 drbd_md_io_bio_set = NULL; 2148 drbd_md_io_bio_set = NULL;
2149 drbd_io_bio_set = NULL;
2145 2150
2146 /* caches */ 2151 /* caches */
2147 drbd_request_cache = kmem_cache_create( 2152 drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
2165 goto Enomem; 2170 goto Enomem;
2166 2171
2167 /* mempools */ 2172 /* mempools */
2168 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); 2173 drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
2174 if (drbd_io_bio_set == NULL)
2175 goto Enomem;
2176
2177 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
2178 BIOSET_NEED_BVECS |
2179 BIOSET_NEED_RESCUER);
2169 if (drbd_md_io_bio_set == NULL) 2180 if (drbd_md_io_bio_set == NULL)
2170 goto Enomem; 2181 goto Enomem;
2171 2182
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2839 /* Setting the max_hw_sectors to an odd value of 8kibyte here 2850 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2840 This triggers a max_bio_size message upon first attach or connect */ 2851 This triggers a max_bio_size message upon first attach or connect */
2841 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); 2852 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
2842 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2843 q->queue_lock = &resource->req_lock; 2853 q->queue_lock = &resource->req_lock;
2844 2854
2845 device->md_io.page = alloc_page(GFP_KERNEL); 2855 device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 02255a0d68b9..ad0fcb43e45c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
2294static enum drbd_ret_code 2294static enum drbd_ret_code
2295check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf) 2295check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2296{ 2296{
2297 static enum drbd_ret_code rv; 2297 enum drbd_ret_code rv;
2298 struct drbd_peer_device *peer_device; 2298 struct drbd_peer_device *peer_device;
2299 int i; 2299 int i;
2300 2300
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be24f39..c7e95e6380fb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
1229 struct drbd_device *device = octx->device; 1229 struct drbd_device *device = octx->device;
1230 struct issue_flush_context *ctx = octx->ctx; 1230 struct issue_flush_context *ctx = octx->ctx;
1231 1231
1232 if (bio->bi_error) { 1232 if (bio->bi_status) {
1233 ctx->error = bio->bi_error; 1233 ctx->error = blk_status_to_errno(bio->bi_status);
1234 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error); 1234 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1235 } 1235 }
1236 kfree(octx); 1236 kfree(octx);
1237 bio_put(bio); 1237 bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 656624314f0d..f6e865b2d543 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
203void complete_master_bio(struct drbd_device *device, 203void complete_master_bio(struct drbd_device *device,
204 struct bio_and_error *m) 204 struct bio_and_error *m)
205{ 205{
206 m->bio->bi_error = m->error; 206 m->bio->bi_status = errno_to_blk_status(m->error);
207 bio_endio(m->bio); 207 bio_endio(m->bio);
208 dec_ap_bio(device); 208 dec_ap_bio(device);
209} 209}
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
1157 1157
1158 if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9, 1158 if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
1159 GFP_NOIO, 0)) 1159 GFP_NOIO, 0))
1160 req->private_bio->bi_error = -EIO; 1160 req->private_bio->bi_status = BLK_STS_IOERR;
1161 bio_endio(req->private_bio); 1161 bio_endio(req->private_bio);
1162} 1162}
1163 1163
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
1225 /* only pass the error to the upper layers. 1225 /* only pass the error to the upper layers.
1226 * if user cannot handle io errors, that's not our business. */ 1226 * if user cannot handle io errors, that's not our business. */
1227 drbd_err(device, "could not kmalloc() req\n"); 1227 drbd_err(device, "could not kmalloc() req\n");
1228 bio->bi_error = -ENOMEM; 1228 bio->bi_status = BLK_STS_RESOURCE;
1229 bio_endio(bio); 1229 bio_endio(bio);
1230 return ERR_PTR(-ENOMEM); 1230 return ERR_PTR(-ENOMEM);
1231 } 1231 }
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
1560 struct drbd_device *device = (struct drbd_device *) q->queuedata; 1560 struct drbd_device *device = (struct drbd_device *) q->queuedata;
1561 unsigned long start_jif; 1561 unsigned long start_jif;
1562 1562
1563 blk_queue_split(q, &bio, q->bio_split); 1563 blk_queue_split(q, &bio);
1564 1564
1565 start_jif = jiffies; 1565 start_jif = jiffies;
1566 1566
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f2da91..9e1866ab238f 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
263static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) 263static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
264{ 264{
265 struct bio *bio; 265 struct bio *bio;
266 bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ 266 bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
267 267
268 req->private_bio = bio; 268 req->private_bio = bio;
269 269
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e02d8d..1d8726a8df34 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
63 struct drbd_device *device; 63 struct drbd_device *device;
64 64
65 device = bio->bi_private; 65 device = bio->bi_private;
66 device->md_io.error = bio->bi_error; 66 device->md_io.error = blk_status_to_errno(bio->bi_status);
67 67
68 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 68 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
69 * to timeout on the lower level device, and eventually detach from it. 69 * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
177 bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || 177 bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
178 bio_op(bio) == REQ_OP_DISCARD; 178 bio_op(bio) == REQ_OP_DISCARD;
179 179
180 if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) 180 if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
181 drbd_warn(device, "%s: error=%d s=%llus\n", 181 drbd_warn(device, "%s: error=%d s=%llus\n",
182 is_write ? (is_discard ? "discard" : "write") 182 is_write ? (is_discard ? "discard" : "write")
183 : "read", bio->bi_error, 183 : "read", bio->bi_status,
184 (unsigned long long)peer_req->i.sector); 184 (unsigned long long)peer_req->i.sector);
185 185
186 if (bio->bi_error) 186 if (bio->bi_status)
187 set_bit(__EE_WAS_ERROR, &peer_req->flags); 187 set_bit(__EE_WAS_ERROR, &peer_req->flags);
188 188
189 bio_put(bio); /* no need for the bio anymore */ 189 bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
243 if (__ratelimit(&drbd_ratelimit_state)) 243 if (__ratelimit(&drbd_ratelimit_state))
244 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 244 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
245 245
246 if (!bio->bi_error) 246 if (!bio->bi_status)
247 drbd_panic_after_delayed_completion_of_aborted_request(device); 247 drbd_panic_after_delayed_completion_of_aborted_request(device);
248 } 248 }
249 249
250 /* to avoid recursion in __req_mod */ 250 /* to avoid recursion in __req_mod */
251 if (unlikely(bio->bi_error)) { 251 if (unlikely(bio->bi_status)) {
252 switch (bio_op(bio)) { 252 switch (bio_op(bio)) {
253 case REQ_OP_WRITE_ZEROES: 253 case REQ_OP_WRITE_ZEROES:
254 case REQ_OP_DISCARD: 254 case REQ_OP_DISCARD:
255 if (bio->bi_error == -EOPNOTSUPP) 255 if (bio->bi_status == BLK_STS_NOTSUPP)
256 what = DISCARD_COMPLETED_NOTSUPP; 256 what = DISCARD_COMPLETED_NOTSUPP;
257 else 257 else
258 what = DISCARD_COMPLETED_WITH_ERROR; 258 what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
272 } 272 }
273 273
274 bio_put(req->private_bio); 274 bio_put(req->private_bio);
275 req->private_bio = ERR_PTR(bio->bi_error); 275 req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
276 276
277 /* not req_mod(), we need irqsave here! */ 277 /* not req_mod(), we need irqsave here! */
278 spin_lock_irqsave(&device->resource->req_lock, flags); 278 spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c7653178..ce823647a9c4 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
2202 * ============================= 2202 * =============================
2203 */ 2203 */
2204 2204
2205static void floppy_end_request(struct request *req, int error) 2205static void floppy_end_request(struct request *req, blk_status_t error)
2206{ 2206{
2207 unsigned int nr_sectors = current_count_sectors; 2207 unsigned int nr_sectors = current_count_sectors;
2208 unsigned int drive = (unsigned long)req->rq_disk->private_data; 2208 unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
2263 DRWE->last_error_generation = DRS->generation; 2263 DRWE->last_error_generation = DRS->generation;
2264 } 2264 }
2265 spin_lock_irqsave(q->queue_lock, flags); 2265 spin_lock_irqsave(q->queue_lock, flags);
2266 floppy_end_request(req, -EIO); 2266 floppy_end_request(req, BLK_STS_IOERR);
2267 spin_unlock_irqrestore(q->queue_lock, flags); 2267 spin_unlock_irqrestore(q->queue_lock, flags);
2268 } 2268 }
2269} 2269}
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
3780 struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private; 3780 struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
3781 int drive = cbdata->drive; 3781 int drive = cbdata->drive;
3782 3782
3783 if (bio->bi_error) { 3783 if (bio->bi_status) {
3784 pr_info("floppy: error %d while reading block 0\n", 3784 pr_info("floppy: error %d while reading block 0\n",
3785 bio->bi_error); 3785 bio->bi_status);
3786 set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); 3786 set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
3787 } 3787 }
3788 complete(&cbdata->complete); 3788 complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
4203 goto out_put_disk; 4203 goto out_put_disk;
4204 } 4204 }
4205 4205
4206 blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
4206 blk_queue_max_hw_sectors(disks[drive]->queue, 64); 4207 blk_queue_max_hw_sectors(disks[drive]->queue, 64);
4207 disks[drive]->major = FLOPPY_MAJOR; 4208 disks[drive]->major = FLOPPY_MAJOR;
4208 disks[drive]->first_minor = TOMINOR(drive); 4209 disks[drive]->first_minor = TOMINOR(drive);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ebbd0c3fe0ed..0de11444e317 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
221} 221}
222 222
223static int 223static int
224figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) 224figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
225 loff_t logical_blocksize)
225{ 226{
226 loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); 227 loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
227 sector_t x = (sector_t)size; 228 sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
233 lo->lo_offset = offset; 234 lo->lo_offset = offset;
234 if (lo->lo_sizelimit != sizelimit) 235 if (lo->lo_sizelimit != sizelimit)
235 lo->lo_sizelimit = sizelimit; 236 lo->lo_sizelimit = sizelimit;
237 if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
238 lo->lo_logical_blocksize = logical_blocksize;
239 blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
240 blk_queue_logical_block_size(lo->lo_queue,
241 lo->lo_logical_blocksize);
242 }
236 set_capacity(lo->lo_disk, x); 243 set_capacity(lo->lo_disk, x);
237 bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); 244 bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
238 /* let user-space know about the new size */ 245 /* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
457 zero_fill_bio(bio); 464 zero_fill_bio(bio);
458 } 465 }
459 466
460 blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0); 467 blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
461} 468}
462 469
463static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) 470static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -813,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
813 struct file *file = lo->lo_backing_file; 820 struct file *file = lo->lo_backing_file;
814 struct inode *inode = file->f_mapping->host; 821 struct inode *inode = file->f_mapping->host;
815 struct request_queue *q = lo->lo_queue; 822 struct request_queue *q = lo->lo_queue;
823 int lo_bits = 9;
816 824
817 /* 825 /*
818 * We use punch hole to reclaim the free space used by the 826 * We use punch hole to reclaim the free space used by the
@@ -832,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
832 840
833 q->limits.discard_granularity = inode->i_sb->s_blocksize; 841 q->limits.discard_granularity = inode->i_sb->s_blocksize;
834 q->limits.discard_alignment = 0; 842 q->limits.discard_alignment = 0;
835 blk_queue_max_discard_sectors(q, UINT_MAX >> 9); 843 if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
836 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); 844 lo_bits = blksize_bits(lo->lo_logical_blocksize);
845
846 blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
847 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
837 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 848 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
838} 849}
839 850
@@ -843,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
843 kthread_stop(lo->worker_task); 854 kthread_stop(lo->worker_task);
844} 855}
845 856
857static int loop_kthread_worker_fn(void *worker_ptr)
858{
859 current->flags |= PF_LESS_THROTTLE;
860 return kthread_worker_fn(worker_ptr);
861}
862
846static int loop_prepare_queue(struct loop_device *lo) 863static int loop_prepare_queue(struct loop_device *lo)
847{ 864{
848 kthread_init_worker(&lo->worker); 865 kthread_init_worker(&lo->worker);
849 lo->worker_task = kthread_run(kthread_worker_fn, 866 lo->worker_task = kthread_run(loop_kthread_worker_fn,
850 &lo->worker, "loop%d", lo->lo_number); 867 &lo->worker, "loop%d", lo->lo_number);
851 if (IS_ERR(lo->worker_task)) 868 if (IS_ERR(lo->worker_task))
852 return -ENOMEM; 869 return -ENOMEM;
@@ -921,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
921 938
922 lo->use_dio = false; 939 lo->use_dio = false;
923 lo->lo_blocksize = lo_blocksize; 940 lo->lo_blocksize = lo_blocksize;
941 lo->lo_logical_blocksize = 512;
924 lo->lo_device = bdev; 942 lo->lo_device = bdev;
925 lo->lo_flags = lo_flags; 943 lo->lo_flags = lo_flags;
926 lo->lo_backing_file = file; 944 lo->lo_backing_file = file;
@@ -1086,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1086 int err; 1104 int err;
1087 struct loop_func_table *xfer; 1105 struct loop_func_table *xfer;
1088 kuid_t uid = current_uid(); 1106 kuid_t uid = current_uid();
1107 int lo_flags = lo->lo_flags;
1089 1108
1090 if (lo->lo_encrypt_key_size && 1109 if (lo->lo_encrypt_key_size &&
1091 !uid_eq(lo->lo_key_owner, uid) && 1110 !uid_eq(lo->lo_key_owner, uid) &&
@@ -1118,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1118 if (err) 1137 if (err)
1119 goto exit; 1138 goto exit;
1120 1139
1140 if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
1141 if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
1142 lo->lo_logical_blocksize = 512;
1143 lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
1144 if (LO_INFO_BLOCKSIZE(info) != 512 &&
1145 LO_INFO_BLOCKSIZE(info) != 1024 &&
1146 LO_INFO_BLOCKSIZE(info) != 2048 &&
1147 LO_INFO_BLOCKSIZE(info) != 4096)
1148 return -EINVAL;
1149 if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
1150 return -EINVAL;
1151 }
1152
1121 if (lo->lo_offset != info->lo_offset || 1153 if (lo->lo_offset != info->lo_offset ||
1122 lo->lo_sizelimit != info->lo_sizelimit) 1154 lo->lo_sizelimit != info->lo_sizelimit ||
1123 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { 1155 lo->lo_flags != lo_flags ||
1156 ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
1157 lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
1158 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
1159 LO_INFO_BLOCKSIZE(info))) {
1124 err = -EFBIG; 1160 err = -EFBIG;
1125 goto exit; 1161 goto exit;
1126 } 1162 }
1163 }
1127 1164
1128 loop_config_discard(lo); 1165 loop_config_discard(lo);
1129 1166
@@ -1306,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1306 return err; 1343 return err;
1307} 1344}
1308 1345
1309static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) 1346static int loop_set_capacity(struct loop_device *lo)
1310{ 1347{
1311 if (unlikely(lo->lo_state != Lo_bound)) 1348 if (unlikely(lo->lo_state != Lo_bound))
1312 return -ENXIO; 1349 return -ENXIO;
1313 1350
1314 return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); 1351 return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
1352 lo->lo_logical_blocksize);
1315} 1353}
1316 1354
1317static int loop_set_dio(struct loop_device *lo, unsigned long arg) 1355static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1369,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1369 case LOOP_SET_CAPACITY: 1407 case LOOP_SET_CAPACITY:
1370 err = -EPERM; 1408 err = -EPERM;
1371 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1409 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1372 err = loop_set_capacity(lo, bdev); 1410 err = loop_set_capacity(lo);
1373 break; 1411 break;
1374 case LOOP_SET_DIRECT_IO: 1412 case LOOP_SET_DIRECT_IO:
1375 err = -EPERM; 1413 err = -EPERM;
@@ -1645,7 +1683,7 @@ int loop_unregister_transfer(int number)
1645EXPORT_SYMBOL(loop_register_transfer); 1683EXPORT_SYMBOL(loop_register_transfer);
1646EXPORT_SYMBOL(loop_unregister_transfer); 1684EXPORT_SYMBOL(loop_unregister_transfer);
1647 1685
1648static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, 1686static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1649 const struct blk_mq_queue_data *bd) 1687 const struct blk_mq_queue_data *bd)
1650{ 1688{
1651 struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 1689 struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1654,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1654 blk_mq_start_request(bd->rq); 1692 blk_mq_start_request(bd->rq);
1655 1693
1656 if (lo->lo_state != Lo_bound) 1694 if (lo->lo_state != Lo_bound)
1657 return BLK_MQ_RQ_QUEUE_ERROR; 1695 return BLK_STS_IOERR;
1658 1696
1659 switch (req_op(cmd->rq)) { 1697 switch (req_op(cmd->rq)) {
1660 case REQ_OP_FLUSH: 1698 case REQ_OP_FLUSH:
@@ -1669,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1669 1707
1670 kthread_queue_work(&lo->worker, &cmd->work); 1708 kthread_queue_work(&lo->worker, &cmd->work);
1671 1709
1672 return BLK_MQ_RQ_QUEUE_OK; 1710 return BLK_STS_OK;
1673} 1711}
1674 1712
1675static void loop_handle_cmd(struct loop_cmd *cmd) 1713static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f97ef8c..2c096b9a17b8 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,6 +49,7 @@ struct loop_device {
49 struct file * lo_backing_file; 49 struct file * lo_backing_file;
50 struct block_device *lo_device; 50 struct block_device *lo_device;
51 unsigned lo_blocksize; 51 unsigned lo_blocksize;
52 unsigned lo_logical_blocksize;
52 void *key_data; 53 void *key_data;
53 54
54 gfp_t old_gfp_mask; 55 gfp_t old_gfp_mask;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4f5653..61b046f256ca 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
532static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, 532static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
533 struct smart_attr *attrib); 533 struct smart_attr *attrib);
534 534
535static void mtip_complete_command(struct mtip_cmd *cmd, int status) 535static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
536{ 536{
537 struct request *req = blk_mq_rq_from_pdu(cmd); 537 struct request *req = blk_mq_rq_from_pdu(cmd);
538 538
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
568 if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { 568 if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
569 cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); 569 cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
570 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); 570 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
571 mtip_complete_command(cmd, -EIO); 571 mtip_complete_command(cmd, BLK_STS_IOERR);
572 return; 572 return;
573 } 573 }
574 574
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
667 tag, 667 tag,
668 fail_reason != NULL ? 668 fail_reason != NULL ?
669 fail_reason : "unknown"); 669 fail_reason : "unknown");
670 mtip_complete_command(cmd, -ENODATA); 670 mtip_complete_command(cmd, BLK_STS_MEDIUM);
671 continue; 671 continue;
672 } 672 }
673 } 673 }
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
690 dev_warn(&port->dd->pdev->dev, 690 dev_warn(&port->dd->pdev->dev,
691 "retiring tag %d\n", tag); 691 "retiring tag %d\n", tag);
692 692
693 mtip_complete_command(cmd, -EIO); 693 mtip_complete_command(cmd, BLK_STS_IOERR);
694 } 694 }
695 } 695 }
696 print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); 696 print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
1063 /* insert request and run queue */ 1063 /* insert request and run queue */
1064 blk_execute_rq(rq->q, NULL, rq, true); 1064 blk_execute_rq(rq->q, NULL, rq, true);
1065 1065
1066 rv = int_cmd->status; 1066 if (int_cmd->status) {
1067 if (rv < 0) { 1067 dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
1068 if (rv == -ERESTARTSYS) { /* interrupted */ 1068 fis->command, int_cmd->status);
1069 dev_err(&dd->pdev->dev, 1069 rv = -EIO;
1070 "Internal command [%02X] was interrupted after %u ms\n",
1071 fis->command,
1072 jiffies_to_msecs(jiffies - start));
1073 rv = -EINTR;
1074 goto exec_ic_exit;
1075 } else if (rv == 0) /* timeout */
1076 dev_err(&dd->pdev->dev,
1077 "Internal command did not complete [%02X] within timeout of %lu ms\n",
1078 fis->command, timeout);
1079 else
1080 dev_err(&dd->pdev->dev,
1081 "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
1082 fis->command, rv, timeout);
1083 1070
1084 if (mtip_check_surprise_removal(dd->pdev) || 1071 if (mtip_check_surprise_removal(dd->pdev) ||
1085 test_bit(MTIP_DDF_REMOVE_PENDING_BIT, 1072 test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
2753 dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); 2740 dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
2754 2741
2755 clear_bit(req->tag, dd->port->cmds_to_issue); 2742 clear_bit(req->tag, dd->port->cmds_to_issue);
2756 cmd->status = -EIO; 2743 cmd->status = BLK_STS_IOERR;
2757 mtip_softirq_done_fn(req); 2744 mtip_softirq_done_fn(req);
2758} 2745}
2759 2746
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
3597 int err; 3584 int err;
3598 3585
3599 err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); 3586 err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
3600 blk_mq_end_request(rq, err); 3587 blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
3601 return 0; 3588 return 0;
3602 } 3589 }
3603 3590
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
3633 return false; 3620 return false;
3634} 3621}
3635 3622
3636static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, 3623static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3637 struct request *rq) 3624 struct request *rq)
3638{ 3625{
3639 struct driver_data *dd = hctx->queue->queuedata; 3626 struct driver_data *dd = hctx->queue->queuedata;
3640 struct mtip_int_cmd *icmd = rq->special; 3627 struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3642 struct mtip_cmd_sg *command_sg; 3629 struct mtip_cmd_sg *command_sg;
3643 3630
3644 if (mtip_commands_active(dd->port)) 3631 if (mtip_commands_active(dd->port))
3645 return BLK_MQ_RQ_QUEUE_BUSY; 3632 return BLK_STS_RESOURCE;
3646 3633
3647 /* Populate the SG list */ 3634 /* Populate the SG list */
3648 cmd->command_header->opts = 3635 cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3666 3653
3667 blk_mq_start_request(rq); 3654 blk_mq_start_request(rq);
3668 mtip_issue_non_ncq_command(dd->port, rq->tag); 3655 mtip_issue_non_ncq_command(dd->port, rq->tag);
3669 return BLK_MQ_RQ_QUEUE_OK; 3656 return 0;
3670} 3657}
3671 3658
3672static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, 3659static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
3673 const struct blk_mq_queue_data *bd) 3660 const struct blk_mq_queue_data *bd)
3674{ 3661{
3675 struct request *rq = bd->rq; 3662 struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
3681 return mtip_issue_reserved_cmd(hctx, rq); 3668 return mtip_issue_reserved_cmd(hctx, rq);
3682 3669
3683 if (unlikely(mtip_check_unal_depth(hctx, rq))) 3670 if (unlikely(mtip_check_unal_depth(hctx, rq)))
3684 return BLK_MQ_RQ_QUEUE_BUSY; 3671 return BLK_STS_RESOURCE;
3685 3672
3686 blk_mq_start_request(rq); 3673 blk_mq_start_request(rq);
3687 3674
3688 ret = mtip_submit_request(hctx, rq); 3675 ret = mtip_submit_request(hctx, rq);
3689 if (likely(!ret)) 3676 if (likely(!ret))
3690 return BLK_MQ_RQ_QUEUE_OK; 3677 return BLK_STS_OK;
3691 3678 return BLK_STS_IOERR;
3692 return BLK_MQ_RQ_QUEUE_ERROR;
3693} 3679}
3694 3680
3695static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, 3681static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
3730 if (reserved) { 3716 if (reserved) {
3731 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); 3717 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
3732 3718
3733 cmd->status = -ETIME; 3719 cmd->status = BLK_STS_TIMEOUT;
3734 return BLK_EH_HANDLED; 3720 return BLK_EH_HANDLED;
3735 } 3721 }
3736 3722
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
3961{ 3947{
3962 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); 3948 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3963 3949
3964 cmd->status = -ENODEV; 3950 cmd->status = BLK_STS_IOERR;
3965 blk_mq_complete_request(rq); 3951 blk_mq_complete_request(rq);
3966} 3952}
3967 3953
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e0bb78..e8286af50e16 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
342 int retries; /* The number of retries left for this command. */ 342 int retries; /* The number of retries left for this command. */
343 343
344 int direction; /* Data transfer direction */ 344 int direction; /* Data transfer direction */
345 int status; 345 blk_status_t status;
346}; 346};
347 347
348/* Structure used to describe a port. */ 348/* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f3f191ba8ca4..977ec960dd2f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
116 int index; 116 int index;
117 int cookie; 117 int cookie;
118 struct completion send_complete; 118 struct completion send_complete;
119 int status; 119 blk_status_t status;
120}; 120};
121 121
122#if IS_ENABLED(CONFIG_DEBUG_FS) 122#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
286 struct nbd_config *config; 286 struct nbd_config *config;
287 287
288 if (!refcount_inc_not_zero(&nbd->config_refs)) { 288 if (!refcount_inc_not_zero(&nbd->config_refs)) {
289 cmd->status = -EIO; 289 cmd->status = BLK_STS_TIMEOUT;
290 return BLK_EH_HANDLED; 290 return BLK_EH_HANDLED;
291 } 291 }
292 292
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
331 "Connection timed out\n"); 331 "Connection timed out\n");
332 } 332 }
333 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 333 set_bit(NBD_TIMEDOUT, &config->runtime_flags);
334 cmd->status = -EIO; 334 cmd->status = BLK_STS_IOERR;
335 sock_shutdown(nbd); 335 sock_shutdown(nbd);
336 nbd_config_put(nbd); 336 nbd_config_put(nbd);
337 337
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
400 unsigned long size = blk_rq_bytes(req); 400 unsigned long size = blk_rq_bytes(req);
401 struct bio *bio; 401 struct bio *bio;
402 u32 type; 402 u32 type;
403 u32 nbd_cmd_flags = 0;
403 u32 tag = blk_mq_unique_tag(req); 404 u32 tag = blk_mq_unique_tag(req);
404 int sent = nsock->sent, skip = 0; 405 int sent = nsock->sent, skip = 0;
405 406
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
429 return -EIO; 430 return -EIO;
430 } 431 }
431 432
433 if (req->cmd_flags & REQ_FUA)
434 nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
435
432 /* We did a partial send previously, and we at least sent the whole 436 /* We did a partial send previously, and we at least sent the whole
433 * request struct, so just go and send the rest of the pages in the 437 * request struct, so just go and send the rest of the pages in the
434 * request. 438 * request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
442 } 446 }
443 cmd->index = index; 447 cmd->index = index;
444 cmd->cookie = nsock->cookie; 448 cmd->cookie = nsock->cookie;
445 request.type = htonl(type); 449 request.type = htonl(type | nbd_cmd_flags);
446 if (type != NBD_CMD_FLUSH) { 450 if (type != NBD_CMD_FLUSH) {
447 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 451 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
448 request.len = htonl(size); 452 request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
465 nsock->pending = req; 469 nsock->pending = req;
466 nsock->sent = sent; 470 nsock->sent = sent;
467 } 471 }
468 return BLK_MQ_RQ_QUEUE_BUSY; 472 return BLK_STS_RESOURCE;
469 } 473 }
470 dev_err_ratelimited(disk_to_dev(nbd->disk), 474 dev_err_ratelimited(disk_to_dev(nbd->disk),
471 "Send control failed (result %d)\n", result); 475 "Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ send_pages:
506 */ 510 */
507 nsock->pending = req; 511 nsock->pending = req;
508 nsock->sent = sent; 512 nsock->sent = sent;
509 return BLK_MQ_RQ_QUEUE_BUSY; 513 return BLK_STS_RESOURCE;
510 } 514 }
511 dev_err(disk_to_dev(nbd->disk), 515 dev_err(disk_to_dev(nbd->disk),
512 "Send data failed (result %d)\n", 516 "Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
574 if (ntohl(reply.error)) { 578 if (ntohl(reply.error)) {
575 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 579 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
576 ntohl(reply.error)); 580 ntohl(reply.error));
577 cmd->status = -EIO; 581 cmd->status = BLK_STS_IOERR;
578 return cmd; 582 return cmd;
579 } 583 }
580 584
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
599 */ 603 */
600 if (nbd_disconnected(config) || 604 if (nbd_disconnected(config) ||
601 config->num_connections <= 1) { 605 config->num_connections <= 1) {
602 cmd->status = -EIO; 606 cmd->status = BLK_STS_IOERR;
603 return cmd; 607 return cmd;
604 } 608 }
605 return ERR_PTR(-EIO); 609 return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
651 if (!blk_mq_request_started(req)) 655 if (!blk_mq_request_started(req))
652 return; 656 return;
653 cmd = blk_mq_rq_to_pdu(req); 657 cmd = blk_mq_rq_to_pdu(req);
654 cmd->status = -EIO; 658 cmd->status = BLK_STS_IOERR;
655 blk_mq_complete_request(req); 659 blk_mq_complete_request(req);
656} 660}
657 661
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
740 nbd_config_put(nbd); 744 nbd_config_put(nbd);
741 return -EINVAL; 745 return -EINVAL;
742 } 746 }
743 cmd->status = 0; 747 cmd->status = BLK_STS_OK;
744again: 748again:
745 nsock = config->socks[index]; 749 nsock = config->socks[index];
746 mutex_lock(&nsock->tx_lock); 750 mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ out:
794 return ret; 798 return ret;
795} 799}
796 800
797static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 801static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
798 const struct blk_mq_queue_data *bd) 802 const struct blk_mq_queue_data *bd)
799{ 803{
800 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 804 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
818 * appropriate. 822 * appropriate.
819 */ 823 */
820 ret = nbd_handle_cmd(cmd, hctx->queue_num); 824 ret = nbd_handle_cmd(cmd, hctx->queue_num);
821 if (ret < 0)
822 ret = BLK_MQ_RQ_QUEUE_ERROR;
823 if (!ret)
824 ret = BLK_MQ_RQ_QUEUE_OK;
825 complete(&cmd->send_complete); 825 complete(&cmd->send_complete);
826 826
827 return ret; 827 return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
828} 828}
829 829
830static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 830static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
910 continue; 910 continue;
911 } 911 }
912 sk_set_memalloc(sock->sk); 912 sk_set_memalloc(sock->sk);
913 sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
913 atomic_inc(&config->recv_threads); 914 atomic_inc(&config->recv_threads);
914 refcount_inc(&nbd->config_refs); 915 refcount_inc(&nbd->config_refs);
915 old = nsock->sock; 916 old = nsock->sock;
@@ -957,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
957 set_disk_ro(nbd->disk, false); 958 set_disk_ro(nbd->disk, false);
958 if (config->flags & NBD_FLAG_SEND_TRIM) 959 if (config->flags & NBD_FLAG_SEND_TRIM)
959 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 960 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
960 if (config->flags & NBD_FLAG_SEND_FLUSH) 961 if (config->flags & NBD_FLAG_SEND_FLUSH) {
961 blk_queue_write_cache(nbd->disk->queue, true, false); 962 if (config->flags & NBD_FLAG_SEND_FUA)
963 blk_queue_write_cache(nbd->disk->queue, true, true);
964 else
965 blk_queue_write_cache(nbd->disk->queue, true, false);
966 }
962 else 967 else
963 blk_queue_write_cache(nbd->disk->queue, false, false); 968 blk_queue_write_cache(nbd->disk->queue, false, false);
964} 969}
@@ -1071,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
1071 return -ENOMEM; 1076 return -ENOMEM;
1072 } 1077 }
1073 sk_set_memalloc(config->socks[i]->sock->sk); 1078 sk_set_memalloc(config->socks[i]->sock->sk);
1079 config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
1074 atomic_inc(&config->recv_threads); 1080 atomic_inc(&config->recv_threads);
1075 refcount_inc(&nbd->config_refs); 1081 refcount_inc(&nbd->config_refs);
1076 INIT_WORK(&args->work, recv_work); 1082 INIT_WORK(&args->work, recv_work);
@@ -1305,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1305 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1311 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1306 if (flags & NBD_FLAG_SEND_FLUSH) 1312 if (flags & NBD_FLAG_SEND_FLUSH)
1307 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1313 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
1314 if (flags & NBD_FLAG_SEND_FUA)
1315 seq_puts(s, "NBD_FLAG_SEND_FUA\n");
1308 if (flags & NBD_FLAG_SEND_TRIM) 1316 if (flags & NBD_FLAG_SEND_TRIM)
1309 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1317 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1310 1318
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1eeac8e..71f4422eba81 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,7 +35,8 @@ struct nullb {
35 struct request_queue *q; 35 struct request_queue *q;
36 struct gendisk *disk; 36 struct gendisk *disk;
37 struct nvm_dev *ndev; 37 struct nvm_dev *ndev;
38 struct blk_mq_tag_set tag_set; 38 struct blk_mq_tag_set *tag_set;
39 struct blk_mq_tag_set __tag_set;
39 struct hrtimer timer; 40 struct hrtimer timer;
40 unsigned int queue_depth; 41 unsigned int queue_depth;
41 spinlock_t lock; 42 spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
50static int null_major; 51static int null_major;
51static int nullb_indexes; 52static int nullb_indexes;
52static struct kmem_cache *ppa_cache; 53static struct kmem_cache *ppa_cache;
54static struct blk_mq_tag_set tag_set;
53 55
54enum { 56enum {
55 NULL_IRQ_NONE = 0, 57 NULL_IRQ_NONE = 0,
@@ -109,7 +111,7 @@ static int bs = 512;
109module_param(bs, int, S_IRUGO); 111module_param(bs, int, S_IRUGO);
110MODULE_PARM_DESC(bs, "Block size (in bytes)"); 112MODULE_PARM_DESC(bs, "Block size (in bytes)");
111 113
112static int nr_devices = 2; 114static int nr_devices = 1;
113module_param(nr_devices, int, S_IRUGO); 115module_param(nr_devices, int, S_IRUGO);
114MODULE_PARM_DESC(nr_devices, "Number of devices to register"); 116MODULE_PARM_DESC(nr_devices, "Number of devices to register");
115 117
@@ -121,6 +123,10 @@ static bool blocking;
121module_param(blocking, bool, S_IRUGO); 123module_param(blocking, bool, S_IRUGO);
122MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); 124MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
123 125
126static bool shared_tags;
127module_param(shared_tags, bool, S_IRUGO);
128MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
129
124static int irqmode = NULL_IRQ_SOFTIRQ; 130static int irqmode = NULL_IRQ_SOFTIRQ;
125 131
126static int null_set_irqmode(const char *str, const struct kernel_param *kp) 132static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
229 235
230 switch (queue_mode) { 236 switch (queue_mode) {
231 case NULL_Q_MQ: 237 case NULL_Q_MQ:
232 blk_mq_end_request(cmd->rq, 0); 238 blk_mq_end_request(cmd->rq, BLK_STS_OK);
233 return; 239 return;
234 case NULL_Q_RQ: 240 case NULL_Q_RQ:
235 INIT_LIST_HEAD(&cmd->rq->queuelist); 241 INIT_LIST_HEAD(&cmd->rq->queuelist);
236 blk_end_request_all(cmd->rq, 0); 242 blk_end_request_all(cmd->rq, BLK_STS_OK);
237 break; 243 break;
238 case NULL_Q_BIO: 244 case NULL_Q_BIO:
239 bio_endio(cmd->bio); 245 bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
356 } 362 }
357} 363}
358 364
359static int null_queue_rq(struct blk_mq_hw_ctx *hctx, 365static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
360 const struct blk_mq_queue_data *bd) 366 const struct blk_mq_queue_data *bd)
361{ 367{
362 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 368 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
373 blk_mq_start_request(bd->rq); 379 blk_mq_start_request(bd->rq);
374 380
375 null_handle_cmd(cmd); 381 null_handle_cmd(cmd);
376 return BLK_MQ_RQ_QUEUE_OK; 382 return BLK_STS_OK;
377}
378
379static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
380{
381 BUG_ON(!nullb);
382 BUG_ON(!nq);
383
384 init_waitqueue_head(&nq->wait);
385 nq->queue_depth = nullb->queue_depth;
386}
387
388static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
389 unsigned int index)
390{
391 struct nullb *nullb = data;
392 struct nullb_queue *nq = &nullb->queues[index];
393
394 hctx->driver_data = nq;
395 null_init_queue(nullb, nq);
396 nullb->nr_queues++;
397
398 return 0;
399} 383}
400 384
401static const struct blk_mq_ops null_mq_ops = { 385static const struct blk_mq_ops null_mq_ops = {
402 .queue_rq = null_queue_rq, 386 .queue_rq = null_queue_rq,
403 .init_hctx = null_init_hctx,
404 .complete = null_softirq_done_fn, 387 .complete = null_softirq_done_fn,
405}; 388};
406 389
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
422 405
423#ifdef CONFIG_NVM 406#ifdef CONFIG_NVM
424 407
425static void null_lnvm_end_io(struct request *rq, int error) 408static void null_lnvm_end_io(struct request *rq, blk_status_t status)
426{ 409{
427 struct nvm_rq *rqd = rq->end_io_data; 410 struct nvm_rq *rqd = rq->end_io_data;
428 411
429 rqd->error = error; 412 /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
413 rqd->error = status ? -EIO : 0;
430 nvm_end_io(rqd); 414 nvm_end_io(rqd);
431 415
432 blk_put_request(rq); 416 blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
591 else 575 else
592 del_gendisk(nullb->disk); 576 del_gendisk(nullb->disk);
593 blk_cleanup_queue(nullb->q); 577 blk_cleanup_queue(nullb->q);
594 if (queue_mode == NULL_Q_MQ) 578 if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
595 blk_mq_free_tag_set(&nullb->tag_set); 579 blk_mq_free_tag_set(nullb->tag_set);
596 if (!use_lightnvm) 580 if (!use_lightnvm)
597 put_disk(nullb->disk); 581 put_disk(nullb->disk);
598 cleanup_queues(nullb); 582 cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
614 .release = null_release, 598 .release = null_release,
615}; 599};
616 600
601static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
602{
603 BUG_ON(!nullb);
604 BUG_ON(!nq);
605
606 init_waitqueue_head(&nq->wait);
607 nq->queue_depth = nullb->queue_depth;
608}
609
610static void null_init_queues(struct nullb *nullb)
611{
612 struct request_queue *q = nullb->q;
613 struct blk_mq_hw_ctx *hctx;
614 struct nullb_queue *nq;
615 int i;
616
617 queue_for_each_hw_ctx(q, hctx, i) {
618 if (!hctx->nr_ctx || !hctx->tags)
619 continue;
620 nq = &nullb->queues[i];
621 hctx->driver_data = nq;
622 null_init_queue(nullb, nq);
623 nullb->nr_queues++;
624 }
625}
626
617static int setup_commands(struct nullb_queue *nq) 627static int setup_commands(struct nullb_queue *nq)
618{ 628{
619 struct nullb_cmd *cmd; 629 struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
694 return 0; 704 return 0;
695} 705}
696 706
707static int null_init_tag_set(struct blk_mq_tag_set *set)
708{
709 set->ops = &null_mq_ops;
710 set->nr_hw_queues = submit_queues;
711 set->queue_depth = hw_queue_depth;
712 set->numa_node = home_node;
713 set->cmd_size = sizeof(struct nullb_cmd);
714 set->flags = BLK_MQ_F_SHOULD_MERGE;
715 set->driver_data = NULL;
716
717 if (blocking)
718 set->flags |= BLK_MQ_F_BLOCKING;
719
720 return blk_mq_alloc_tag_set(set);
721}
722
697static int null_add_dev(void) 723static int null_add_dev(void)
698{ 724{
699 struct nullb *nullb; 725 struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
715 goto out_free_nullb; 741 goto out_free_nullb;
716 742
717 if (queue_mode == NULL_Q_MQ) { 743 if (queue_mode == NULL_Q_MQ) {
718 nullb->tag_set.ops = &null_mq_ops; 744 if (shared_tags) {
719 nullb->tag_set.nr_hw_queues = submit_queues; 745 nullb->tag_set = &tag_set;
720 nullb->tag_set.queue_depth = hw_queue_depth; 746 rv = 0;
721 nullb->tag_set.numa_node = home_node; 747 } else {
722 nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); 748 nullb->tag_set = &nullb->__tag_set;
723 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 749 rv = null_init_tag_set(nullb->tag_set);
724 nullb->tag_set.driver_data = nullb; 750 }
725 751
726 if (blocking)
727 nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
728
729 rv = blk_mq_alloc_tag_set(&nullb->tag_set);
730 if (rv) 752 if (rv)
731 goto out_cleanup_queues; 753 goto out_cleanup_queues;
732 754
733 nullb->q = blk_mq_init_queue(&nullb->tag_set); 755 nullb->q = blk_mq_init_queue(nullb->tag_set);
734 if (IS_ERR(nullb->q)) { 756 if (IS_ERR(nullb->q)) {
735 rv = -ENOMEM; 757 rv = -ENOMEM;
736 goto out_cleanup_tags; 758 goto out_cleanup_tags;
737 } 759 }
760 null_init_queues(nullb);
738 } else if (queue_mode == NULL_Q_BIO) { 761 } else if (queue_mode == NULL_Q_BIO) {
739 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); 762 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
740 if (!nullb->q) { 763 if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
787out_cleanup_blk_queue: 810out_cleanup_blk_queue:
788 blk_cleanup_queue(nullb->q); 811 blk_cleanup_queue(nullb->q);
789out_cleanup_tags: 812out_cleanup_tags:
790 if (queue_mode == NULL_Q_MQ) 813 if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
791 blk_mq_free_tag_set(&nullb->tag_set); 814 blk_mq_free_tag_set(nullb->tag_set);
792out_cleanup_queues: 815out_cleanup_queues:
793 cleanup_queues(nullb); 816 cleanup_queues(nullb);
794out_free_nullb: 817out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
821 queue_mode = NULL_Q_MQ; 844 queue_mode = NULL_Q_MQ;
822 } 845 }
823 846
847 if (queue_mode == NULL_Q_MQ && shared_tags)
848 null_init_tag_set(&tag_set);
849
824 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 850 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
825 if (submit_queues < nr_online_nodes) { 851 if (submit_queues < nr_online_nodes) {
826 pr_warn("null_blk: submit_queues param is set to %u.", 852 pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
881 } 907 }
882 mutex_unlock(&lock); 908 mutex_unlock(&lock);
883 909
910 if (queue_mode == NULL_Q_MQ && shared_tags)
911 blk_mq_free_tag_set(&tag_set);
912
884 kmem_cache_destroy(ppa_cache); 913 kmem_cache_destroy(ppa_cache);
885} 914}
886 915
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef34d5a..7b8c6368beb7 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
305 put_disk(disk); 305 put_disk(disk);
306 continue; 306 continue;
307 } 307 }
308 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
308 cd->disk = disk; 309 cd->disk = disk;
309 cd->pi = &cd->pia; 310 cd->pi = &cd->pia;
310 cd->present = 0; 311 cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
783 ps_set_intr(do_pcd_read, NULL, 0, nice); 784 ps_set_intr(do_pcd_read, NULL, 0, nice);
784 return; 785 return;
785 } else { 786 } else {
786 __blk_end_request_all(pcd_req, -EIO); 787 __blk_end_request_all(pcd_req, BLK_STS_IOERR);
787 pcd_req = NULL; 788 pcd_req = NULL;
788 } 789 }
789 } 790 }
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
794 pcd_request(); 795 pcd_request();
795} 796}
796 797
797static inline void next_request(int err) 798static inline void next_request(blk_status_t err)
798{ 799{
799 unsigned long saved_flags; 800 unsigned long saved_flags;
800 801
@@ -837,7 +838,7 @@ static void pcd_start(void)
837 838
838 if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) { 839 if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
839 pcd_bufblk = -1; 840 pcd_bufblk = -1;
840 next_request(-EIO); 841 next_request(BLK_STS_IOERR);
841 return; 842 return;
842 } 843 }
843 844
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
871 return; 872 return;
872 } 873 }
873 pcd_bufblk = -1; 874 pcd_bufblk = -1;
874 next_request(-EIO); 875 next_request(BLK_STS_IOERR);
875 return; 876 return;
876 } 877 }
877 878
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f90978..27a44b97393a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
438 phase = NULL; 438 phase = NULL;
439 spin_lock_irqsave(&pd_lock, saved_flags); 439 spin_lock_irqsave(&pd_lock, saved_flags);
440 if (!__blk_end_request_cur(pd_req, 440 if (!__blk_end_request_cur(pd_req,
441 res == Ok ? 0 : -EIO)) { 441 res == Ok ? 0 : BLK_STS_IOERR)) {
442 if (!set_next_request()) 442 if (!set_next_request())
443 stop = 1; 443 stop = 1;
444 } 444 }
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
863 return; 863 return;
864 } 864 }
865 blk_queue_max_hw_sectors(p->queue, cluster); 865 blk_queue_max_hw_sectors(p->queue, cluster);
866 blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
866 867
867 if (disk->drive == -1) { 868 if (disk->drive == -1) {
868 for (disk->drive = 0; disk->drive <= 1; disk->drive++) 869 for (disk->drive = 0; disk->drive <= 1; disk->drive++)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca7315ddc..eef7a91f667d 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
293 return; 293 return;
294 } 294 }
295 blk_queue_max_segments(disk->queue, cluster); 295 blk_queue_max_segments(disk->queue, cluster);
296 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
296 pf->disk = disk; 297 pf->disk = disk;
297 pf->pi = &pf->pia; 298 pf->pi = &pf->pia;
298 pf->media_status = PF_NM; 299 pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
801 return pf_req != NULL; 802 return pf_req != NULL;
802} 803}
803 804
804static void pf_end_request(int err) 805static void pf_end_request(blk_status_t err)
805{ 806{
806 if (pf_req && !__blk_end_request_cur(pf_req, err)) 807 if (pf_req && !__blk_end_request_cur(pf_req, err))
807 pf_req = NULL; 808 pf_req = NULL;
@@ -821,7 +822,7 @@ repeat:
821 pf_count = blk_rq_cur_sectors(pf_req); 822 pf_count = blk_rq_cur_sectors(pf_req);
822 823
823 if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) { 824 if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
824 pf_end_request(-EIO); 825 pf_end_request(BLK_STS_IOERR);
825 goto repeat; 826 goto repeat;
826 } 827 }
827 828
@@ -836,7 +837,7 @@ repeat:
836 pi_do_claimed(pf_current->pi, do_pf_write); 837 pi_do_claimed(pf_current->pi, do_pf_write);
837 else { 838 else {
838 pf_busy = 0; 839 pf_busy = 0;
839 pf_end_request(-EIO); 840 pf_end_request(BLK_STS_IOERR);
840 goto repeat; 841 goto repeat;
841 } 842 }
842} 843}
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
868 return 0; 869 return 0;
869} 870}
870 871
871static inline void next_request(int err) 872static inline void next_request(blk_status_t err)
872{ 873{
873 unsigned long saved_flags; 874 unsigned long saved_flags;
874 875
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
896 pi_do_claimed(pf_current->pi, do_pf_read_start); 897 pi_do_claimed(pf_current->pi, do_pf_read_start);
897 return; 898 return;
898 } 899 }
899 next_request(-EIO); 900 next_request(BLK_STS_IOERR);
900 return; 901 return;
901 } 902 }
902 pf_mask = STAT_DRQ; 903 pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
915 pi_do_claimed(pf_current->pi, do_pf_read_start); 916 pi_do_claimed(pf_current->pi, do_pf_read_start);
916 return; 917 return;
917 } 918 }
918 next_request(-EIO); 919 next_request(BLK_STS_IOERR);
919 return; 920 return;
920 } 921 }
921 pi_read_block(pf_current->pi, pf_buf, 512); 922 pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
942 pi_do_claimed(pf_current->pi, do_pf_write_start); 943 pi_do_claimed(pf_current->pi, do_pf_write_start);
943 return; 944 return;
944 } 945 }
945 next_request(-EIO); 946 next_request(BLK_STS_IOERR);
946 return; 947 return;
947 } 948 }
948 949
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
955 pi_do_claimed(pf_current->pi, do_pf_write_start); 956 pi_do_claimed(pf_current->pi, do_pf_write_start);
956 return; 957 return;
957 } 958 }
958 next_request(-EIO); 959 next_request(BLK_STS_IOERR);
959 return; 960 return;
960 } 961 }
961 pi_write_block(pf_current->pi, pf_buf, 512); 962 pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
975 pi_do_claimed(pf_current->pi, do_pf_write_start); 976 pi_do_claimed(pf_current->pi, do_pf_write_start);
976 return; 977 return;
977 } 978 }
978 next_request(-EIO); 979 next_request(BLK_STS_IOERR);
979 return; 980 return;
980 } 981 }
981 pi_disconnect(pf_current->pi); 982 pi_disconnect(pf_current->pi);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 205b865ebeb9..467beca397a2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -98,6 +98,7 @@ static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
98static int write_congestion_off = PKT_WRITE_CONGESTION_OFF; 98static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
99static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ 99static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
100static mempool_t *psd_pool; 100static mempool_t *psd_pool;
101static struct bio_set *pkt_bio_set;
101 102
102static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */ 103static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
103static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */ 104static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
707 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); 708 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
708 if (IS_ERR(rq)) 709 if (IS_ERR(rq))
709 return PTR_ERR(rq); 710 return PTR_ERR(rq);
710 scsi_req_init(rq);
711 711
712 if (cgc->buflen) { 712 if (cgc->buflen) {
713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, 713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
952 952
953 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", 953 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
954 bio, (unsigned long long)pkt->sector, 954 bio, (unsigned long long)pkt->sector,
955 (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error); 955 (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
956 956
957 if (bio->bi_error) 957 if (bio->bi_status)
958 atomic_inc(&pkt->io_errors); 958 atomic_inc(&pkt->io_errors);
959 if (atomic_dec_and_test(&pkt->io_wait)) { 959 if (atomic_dec_and_test(&pkt->io_wait)) {
960 atomic_inc(&pkt->run_sm); 960 atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
969 struct pktcdvd_device *pd = pkt->pd; 969 struct pktcdvd_device *pd = pkt->pd;
970 BUG_ON(!pd); 970 BUG_ON(!pd);
971 971
972 pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error); 972 pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
973 973
974 pd->stats.pkt_ended++; 974 pd->stats.pkt_ended++;
975 975
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1305 pkt_queue_bio(pd, pkt->w_bio); 1305 pkt_queue_bio(pd, pkt->w_bio);
1306} 1306}
1307 1307
1308static void pkt_finish_packet(struct packet_data *pkt, int error) 1308static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
1309{ 1309{
1310 struct bio *bio; 1310 struct bio *bio;
1311 1311
1312 if (error) 1312 if (status)
1313 pkt->cache_valid = 0; 1313 pkt->cache_valid = 0;
1314 1314
1315 /* Finish all bios corresponding to this packet */ 1315 /* Finish all bios corresponding to this packet */
1316 while ((bio = bio_list_pop(&pkt->orig_bios))) { 1316 while ((bio = bio_list_pop(&pkt->orig_bios))) {
1317 bio->bi_error = error; 1317 bio->bi_status = status;
1318 bio_endio(bio); 1318 bio_endio(bio);
1319 } 1319 }
1320} 1320}
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
1349 if (atomic_read(&pkt->io_wait) > 0) 1349 if (atomic_read(&pkt->io_wait) > 0)
1350 return; 1350 return;
1351 1351
1352 if (!pkt->w_bio->bi_error) { 1352 if (!pkt->w_bio->bi_status) {
1353 pkt_set_state(pkt, PACKET_FINISHED_STATE); 1353 pkt_set_state(pkt, PACKET_FINISHED_STATE);
1354 } else { 1354 } else {
1355 pkt_set_state(pkt, PACKET_RECOVERY_STATE); 1355 pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
1366 break; 1366 break;
1367 1367
1368 case PACKET_FINISHED_STATE: 1368 case PACKET_FINISHED_STATE:
1369 pkt_finish_packet(pkt, pkt->w_bio->bi_error); 1369 pkt_finish_packet(pkt, pkt->w_bio->bi_status);
1370 return; 1370 return;
1371 1371
1372 default: 1372 default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
2301 struct packet_stacked_data *psd = bio->bi_private; 2301 struct packet_stacked_data *psd = bio->bi_private;
2302 struct pktcdvd_device *pd = psd->pd; 2302 struct pktcdvd_device *pd = psd->pd;
2303 2303
2304 psd->bio->bi_error = bio->bi_error; 2304 psd->bio->bi_status = bio->bi_status;
2305 bio_put(bio); 2305 bio_put(bio);
2306 bio_endio(psd->bio); 2306 bio_endio(psd->bio);
2307 mempool_free(psd, psd_pool); 2307 mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
2310 2310
2311static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) 2311static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
2312{ 2312{
2313 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO); 2313 struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
2314 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO); 2314 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2315 2315
2316 psd->pd = pd; 2316 psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
2412 char b[BDEVNAME_SIZE]; 2412 char b[BDEVNAME_SIZE];
2413 struct bio *split; 2413 struct bio *split;
2414 2414
2415 blk_queue_bounce(q, &bio); 2415 blk_queue_split(q, &bio);
2416
2417 blk_queue_split(q, &bio, q->bio_split);
2418 2416
2419 pd = q->queuedata; 2417 pd = q->queuedata;
2420 if (!pd) { 2418 if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
2455 2453
2456 split = bio_split(bio, last_zone - 2454 split = bio_split(bio, last_zone -
2457 bio->bi_iter.bi_sector, 2455 bio->bi_iter.bi_sector,
2458 GFP_NOIO, fs_bio_set); 2456 GFP_NOIO, pkt_bio_set);
2459 bio_chain(split, bio); 2457 bio_chain(split, bio);
2460 } else { 2458 } else {
2461 split = bio; 2459 split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2583 bdev = bdget(dev); 2581 bdev = bdget(dev);
2584 if (!bdev) 2582 if (!bdev)
2585 return -ENOMEM; 2583 return -ENOMEM;
2584 if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
2585 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
2586 bdput(bdev);
2587 return -EINVAL;
2588 }
2586 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); 2589 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
2587 if (ret) 2590 if (ret)
2588 return ret; 2591 return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
2919 sizeof(struct packet_stacked_data)); 2922 sizeof(struct packet_stacked_data));
2920 if (!psd_pool) 2923 if (!psd_pool)
2921 return -ENOMEM; 2924 return -ENOMEM;
2925 pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
2926 if (!pkt_bio_set) {
2927 mempool_destroy(psd_pool);
2928 return -ENOMEM;
2929 }
2922 2930
2923 ret = register_blkdev(pktdev_major, DRIVER_NAME); 2931 ret = register_blkdev(pktdev_major, DRIVER_NAME);
2924 if (ret < 0) { 2932 if (ret < 0) {
@@ -2951,6 +2959,7 @@ out:
2951 unregister_blkdev(pktdev_major, DRIVER_NAME); 2959 unregister_blkdev(pktdev_major, DRIVER_NAME);
2952out2: 2960out2:
2953 mempool_destroy(psd_pool); 2961 mempool_destroy(psd_pool);
2962 bioset_free(pkt_bio_set);
2954 return ret; 2963 return ret;
2955} 2964}
2956 2965
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
2964 2973
2965 unregister_blkdev(pktdev_major, DRIVER_NAME); 2974 unregister_blkdev(pktdev_major, DRIVER_NAME);
2966 mempool_destroy(psd_pool); 2975 mempool_destroy(psd_pool);
2976 bioset_free(pkt_bio_set);
2967} 2977}
2968 2978
2969MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); 2979MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e9feb8..075662f2cf46 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
158 if (res) { 158 if (res) {
159 dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, 159 dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
160 __LINE__, op, res); 160 __LINE__, op, res);
161 __blk_end_request_all(req, -EIO); 161 __blk_end_request_all(req, BLK_STS_IOERR);
162 return 0; 162 return 0;
163 } 163 }
164 164
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
180 if (res) { 180 if (res) {
181 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", 181 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
182 __func__, __LINE__, res); 182 __func__, __LINE__, res);
183 __blk_end_request_all(req, -EIO); 183 __blk_end_request_all(req, BLK_STS_IOERR);
184 return 0; 184 return 0;
185 } 185 }
186 186
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
208 break; 208 break;
209 default: 209 default:
210 blk_dump_rq_flags(req, DEVICE_NAME " bad request"); 210 blk_dump_rq_flags(req, DEVICE_NAME " bad request");
211 __blk_end_request_all(req, -EIO); 211 __blk_end_request_all(req, BLK_STS_IOERR);
212 } 212 }
213 } 213 }
214} 214}
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
231 struct ps3_storage_device *dev = data; 231 struct ps3_storage_device *dev = data;
232 struct ps3disk_private *priv; 232 struct ps3disk_private *priv;
233 struct request *req; 233 struct request *req;
234 int res, read, error; 234 int res, read;
235 blk_status_t error;
235 u64 tag, status; 236 u64 tag, status;
236 const char *op; 237 const char *op;
237 238
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
269 if (status) { 270 if (status) {
270 dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, 271 dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
271 __LINE__, op, status); 272 __LINE__, op, status);
272 error = -EIO; 273 error = BLK_STS_IOERR;
273 } else { 274 } else {
274 dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__, 275 dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
275 __LINE__, op); 276 __LINE__, op);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..e0e81cacd781 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
428 kfree(priv->cache.tags); 428 kfree(priv->cache.tags);
429} 429}
430 430
431static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from, 431static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
432 size_t len, size_t *retlen, u_char *buf) 432 size_t len, size_t *retlen, u_char *buf)
433{ 433{
434 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); 434 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
438 (unsigned int)from, len); 438 (unsigned int)from, len);
439 439
440 if (from >= priv->size) 440 if (from >= priv->size)
441 return -EIO; 441 return BLK_STS_IOERR;
442 442
443 if (len > priv->size - from) 443 if (len > priv->size - from)
444 len = priv->size - from; 444 len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
472 return 0; 472 return 0;
473} 473}
474 474
475static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to, 475static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
476 size_t len, size_t *retlen, const u_char *buf) 476 size_t len, size_t *retlen, const u_char *buf)
477{ 477{
478 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); 478 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
479 unsigned int cached, count; 479 unsigned int cached, count;
480 480
481 if (to >= priv->size) 481 if (to >= priv->size)
482 return -EIO; 482 return BLK_STS_IOERR;
483 483
484 if (len > priv->size - to) 484 if (len > priv->size - to)
485 len = priv->size - to; 485 len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
554 int write = bio_data_dir(bio) == WRITE; 554 int write = bio_data_dir(bio) == WRITE;
555 const char *op = write ? "write" : "read"; 555 const char *op = write ? "write" : "read";
556 loff_t offset = bio->bi_iter.bi_sector << 9; 556 loff_t offset = bio->bi_iter.bi_sector << 9;
557 int error = 0; 557 blk_status_t error = 0;
558 struct bio_vec bvec; 558 struct bio_vec bvec;
559 struct bvec_iter iter; 559 struct bvec_iter iter;
560 struct bio *next; 560 struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
578 578
579 if (retlen != len) { 579 if (retlen != len) {
580 dev_err(&dev->core, "Short %s\n", op); 580 dev_err(&dev->core, "Short %s\n", op);
581 error = -EIO; 581 error = BLK_STS_IOERR;
582 goto out; 582 goto out;
583 } 583 }
584 584
@@ -593,7 +593,7 @@ out:
593 next = bio_list_peek(&priv->list); 593 next = bio_list_peek(&priv->list);
594 spin_unlock_irq(&priv->lock); 594 spin_unlock_irq(&priv->lock);
595 595
596 bio->bi_error = error; 596 bio->bi_status = error;
597 bio_endio(bio); 597 bio_endio(bio);
598 return next; 598 return next;
599} 599}
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
606 606
607 dev_dbg(&dev->core, "%s\n", __func__); 607 dev_dbg(&dev->core, "%s\n", __func__);
608 608
609 blk_queue_split(q, &bio, q->bio_split); 609 blk_queue_split(q, &bio);
610 610
611 spin_lock_irq(&priv->lock); 611 spin_lock_irq(&priv->lock);
612 busy = !bio_list_empty(&priv->list); 612 busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c16f74547804..b008b6a98098 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -442,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
442static struct kmem_cache *rbd_img_request_cache; 442static struct kmem_cache *rbd_img_request_cache;
443static struct kmem_cache *rbd_obj_request_cache; 443static struct kmem_cache *rbd_obj_request_cache;
444 444
445static struct bio_set *rbd_bio_clone;
446
445static int rbd_major; 447static int rbd_major;
446static DEFINE_IDA(rbd_dev_id_ida); 448static DEFINE_IDA(rbd_dev_id_ida);
447 449
@@ -1363,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
1363{ 1365{
1364 struct bio *bio; 1366 struct bio *bio;
1365 1367
1366 bio = bio_clone(bio_src, gfpmask); 1368 bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
1367 if (!bio) 1369 if (!bio)
1368 return NULL; /* ENOMEM */ 1370 return NULL; /* ENOMEM */
1369 1371
@@ -2293,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
2293 rbd_assert(img_request->obj_request != NULL); 2295 rbd_assert(img_request->obj_request != NULL);
2294 more = obj_request->which < img_request->obj_request_count - 1; 2296 more = obj_request->which < img_request->obj_request_count - 1;
2295 } else { 2297 } else {
2298 blk_status_t status = errno_to_blk_status(result);
2299
2296 rbd_assert(img_request->rq != NULL); 2300 rbd_assert(img_request->rq != NULL);
2297 2301
2298 more = blk_update_request(img_request->rq, result, xferred); 2302 more = blk_update_request(img_request->rq, status, xferred);
2299 if (!more) 2303 if (!more)
2300 __blk_mq_end_request(img_request->rq, result); 2304 __blk_mq_end_request(img_request->rq, status);
2301 } 2305 }
2302 2306
2303 return more; 2307 return more;
@@ -4150,17 +4154,17 @@ err_rq:
4150 obj_op_name(op_type), length, offset, result); 4154 obj_op_name(op_type), length, offset, result);
4151 ceph_put_snap_context(snapc); 4155 ceph_put_snap_context(snapc);
4152err: 4156err:
4153 blk_mq_end_request(rq, result); 4157 blk_mq_end_request(rq, errno_to_blk_status(result));
4154} 4158}
4155 4159
4156static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx, 4160static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
4157 const struct blk_mq_queue_data *bd) 4161 const struct blk_mq_queue_data *bd)
4158{ 4162{
4159 struct request *rq = bd->rq; 4163 struct request *rq = bd->rq;
4160 struct work_struct *work = blk_mq_rq_to_pdu(rq); 4164 struct work_struct *work = blk_mq_rq_to_pdu(rq);
4161 4165
4162 queue_work(rbd_wq, work); 4166 queue_work(rbd_wq, work);
4163 return BLK_MQ_RQ_QUEUE_OK; 4167 return BLK_STS_OK;
4164} 4168}
4165 4169
4166static void rbd_free_disk(struct rbd_device *rbd_dev) 4170static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -6414,8 +6418,16 @@ static int rbd_slab_init(void)
6414 if (!rbd_obj_request_cache) 6418 if (!rbd_obj_request_cache)
6415 goto out_err; 6419 goto out_err;
6416 6420
6421 rbd_assert(!rbd_bio_clone);
6422 rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
6423 if (!rbd_bio_clone)
6424 goto out_err_clone;
6425
6417 return 0; 6426 return 0;
6418 6427
6428out_err_clone:
6429 kmem_cache_destroy(rbd_obj_request_cache);
6430 rbd_obj_request_cache = NULL;
6419out_err: 6431out_err:
6420 kmem_cache_destroy(rbd_img_request_cache); 6432 kmem_cache_destroy(rbd_img_request_cache);
6421 rbd_img_request_cache = NULL; 6433 rbd_img_request_cache = NULL;
@@ -6431,6 +6443,10 @@ static void rbd_slab_exit(void)
6431 rbd_assert(rbd_img_request_cache); 6443 rbd_assert(rbd_img_request_cache);
6432 kmem_cache_destroy(rbd_img_request_cache); 6444 kmem_cache_destroy(rbd_img_request_cache);
6433 rbd_img_request_cache = NULL; 6445 rbd_img_request_cache = NULL;
6446
6447 rbd_assert(rbd_bio_clone);
6448 bioset_free(rbd_bio_clone);
6449 rbd_bio_clone = NULL;
6434} 6450}
6435 6451
6436static int __init rbd_init(void) 6452static int __init rbd_init(void)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..7f4acebf4657 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
149{ 149{
150 struct rsxx_cardinfo *card = q->queuedata; 150 struct rsxx_cardinfo *card = q->queuedata;
151 struct rsxx_bio_meta *bio_meta; 151 struct rsxx_bio_meta *bio_meta;
152 int st = -EINVAL; 152 blk_status_t st = BLK_STS_IOERR;
153 153
154 blk_queue_split(q, &bio, q->bio_split); 154 blk_queue_split(q, &bio);
155 155
156 might_sleep(); 156 might_sleep();
157 157
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
161 if (bio_end_sector(bio) > get_capacity(card->gendisk)) 161 if (bio_end_sector(bio) > get_capacity(card->gendisk))
162 goto req_err; 162 goto req_err;
163 163
164 if (unlikely(card->halt)) { 164 if (unlikely(card->halt))
165 st = -EFAULT;
166 goto req_err; 165 goto req_err;
167 }
168 166
169 if (unlikely(card->dma_fault)) { 167 if (unlikely(card->dma_fault))
170 st = (-EFAULT);
171 goto req_err; 168 goto req_err;
172 }
173 169
174 if (bio->bi_iter.bi_size == 0) { 170 if (bio->bi_iter.bi_size == 0) {
175 dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); 171 dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
178 174
179 bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); 175 bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
180 if (!bio_meta) { 176 if (!bio_meta) {
181 st = -ENOMEM; 177 st = BLK_STS_RESOURCE;
182 goto req_err; 178 goto req_err;
183 } 179 }
184 180
@@ -205,7 +201,7 @@ queue_err:
205 kmem_cache_free(bio_meta_pool, bio_meta); 201 kmem_cache_free(bio_meta_pool, bio_meta);
206req_err: 202req_err:
207 if (st) 203 if (st)
208 bio->bi_error = st; 204 bio->bi_status = st;
209 bio_endio(bio); 205 bio_endio(bio);
210 return BLK_QC_T_NONE; 206 return BLK_QC_T_NONE;
211} 207}
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
288 } 284 }
289 285
290 blk_queue_make_request(card->queue, rsxx_make_request); 286 blk_queue_make_request(card->queue, rsxx_make_request);
291 blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
292 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); 287 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
293 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); 288 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
294 289
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f87d0..6a1b2177951c 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
611 mutex_unlock(&ctrl->work_lock); 611 mutex_unlock(&ctrl->work_lock);
612} 612}
613 613
614static int rsxx_queue_discard(struct rsxx_cardinfo *card, 614static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
615 struct list_head *q, 615 struct list_head *q,
616 unsigned int laddr, 616 unsigned int laddr,
617 rsxx_dma_cb cb, 617 rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
621 621
622 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); 622 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
623 if (!dma) 623 if (!dma)
624 return -ENOMEM; 624 return BLK_STS_RESOURCE;
625 625
626 dma->cmd = HW_CMD_BLK_DISCARD; 626 dma->cmd = HW_CMD_BLK_DISCARD;
627 dma->laddr = laddr; 627 dma->laddr = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
640 return 0; 640 return 0;
641} 641}
642 642
643static int rsxx_queue_dma(struct rsxx_cardinfo *card, 643static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
644 struct list_head *q, 644 struct list_head *q,
645 int dir, 645 int dir,
646 unsigned int dma_off, 646 unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
655 655
656 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); 656 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
657 if (!dma) 657 if (!dma)
658 return -ENOMEM; 658 return BLK_STS_RESOURCE;
659 659
660 dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; 660 dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
661 dma->laddr = laddr; 661 dma->laddr = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
677 return 0; 677 return 0;
678} 678}
679 679
680int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, 680blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
681 struct bio *bio, 681 struct bio *bio,
682 atomic_t *n_dmas, 682 atomic_t *n_dmas,
683 rsxx_dma_cb cb, 683 rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
694 unsigned int dma_len; 694 unsigned int dma_len;
695 int dma_cnt[RSXX_MAX_TARGETS]; 695 int dma_cnt[RSXX_MAX_TARGETS];
696 int tgt; 696 int tgt;
697 int st; 697 blk_status_t st;
698 int i; 698 int i;
699 699
700 addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */ 700 addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
769 for (i = 0; i < card->n_targets; i++) 769 for (i = 0; i < card->n_targets; i++)
770 rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i], 770 rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
771 FREE_DMA); 771 FREE_DMA);
772
773 return st; 772 return st;
774} 773}
775 774
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d0f690..277f27e673a2 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
391void rsxx_dma_cleanup(void); 391void rsxx_dma_cleanup(void);
392void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); 392void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
393int rsxx_dma_configure(struct rsxx_cardinfo *card); 393int rsxx_dma_configure(struct rsxx_cardinfo *card);
394int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, 394blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
395 struct bio *bio, 395 struct bio *bio,
396 atomic_t *n_dmas, 396 atomic_t *n_dmas,
397 rsxx_dma_cb cb, 397 rsxx_dma_cb cb,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4dae2a..d0368682bd43 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
451 struct skd_special_context *skspcl); 451 struct skd_special_context *skspcl);
452static void skd_request_fn(struct request_queue *rq); 452static void skd_request_fn(struct request_queue *rq);
453static void skd_end_request(struct skd_device *skdev, 453static void skd_end_request(struct skd_device *skdev,
454 struct skd_request_context *skreq, int error); 454 struct skd_request_context *skreq, blk_status_t status);
455static int skd_preop_sg_list(struct skd_device *skdev, 455static bool skd_preop_sg_list(struct skd_device *skdev,
456 struct skd_request_context *skreq); 456 struct skd_request_context *skreq);
457static void skd_postop_sg_list(struct skd_device *skdev, 457static void skd_postop_sg_list(struct skd_device *skdev,
458 struct skd_request_context *skreq); 458 struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
491 if (req == NULL) 491 if (req == NULL)
492 break; 492 break;
493 blk_start_request(req); 493 blk_start_request(req);
494 __blk_end_request_all(req, -EIO); 494 __blk_end_request_all(req, BLK_STS_IOERR);
495 } 495 }
496} 496}
497 497
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
545 struct request *req = NULL; 545 struct request *req = NULL;
546 struct skd_scsi_request *scsi_req; 546 struct skd_scsi_request *scsi_req;
547 unsigned long io_flags; 547 unsigned long io_flags;
548 int error;
549 u32 lba; 548 u32 lba;
550 u32 count; 549 u32 count;
551 int data_dir; 550 int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
716 if (!req->bio) 715 if (!req->bio)
717 goto skip_sg; 716 goto skip_sg;
718 717
719 error = skd_preop_sg_list(skdev, skreq); 718 if (!skd_preop_sg_list(skdev, skreq)) {
720
721 if (error != 0) {
722 /* 719 /*
723 * Complete the native request with error. 720 * Complete the native request with error.
724 * Note that the request context is still at the 721 * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
730 */ 727 */
731 pr_debug("%s:%s:%d error Out\n", 728 pr_debug("%s:%s:%d error Out\n",
732 skdev->name, __func__, __LINE__); 729 skdev->name, __func__, __LINE__);
733 skd_end_request(skdev, skreq, error); 730 skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
734 continue; 731 continue;
735 } 732 }
736 733
@@ -805,7 +802,7 @@ skip_sg:
805} 802}
806 803
807static void skd_end_request(struct skd_device *skdev, 804static void skd_end_request(struct skd_device *skdev,
808 struct skd_request_context *skreq, int error) 805 struct skd_request_context *skreq, blk_status_t error)
809{ 806{
810 if (unlikely(error)) { 807 if (unlikely(error)) {
811 struct request *req = skreq->req; 808 struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
822 __blk_end_request_all(skreq->req, error); 819 __blk_end_request_all(skreq->req, error);
823} 820}
824 821
825static int skd_preop_sg_list(struct skd_device *skdev, 822static bool skd_preop_sg_list(struct skd_device *skdev,
826 struct skd_request_context *skreq) 823 struct skd_request_context *skreq)
827{ 824{
828 struct request *req = skreq->req; 825 struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
839 836
840 n_sg = blk_rq_map_sg(skdev->queue, req, sg); 837 n_sg = blk_rq_map_sg(skdev->queue, req, sg);
841 if (n_sg <= 0) 838 if (n_sg <= 0)
842 return -EINVAL; 839 return false;
843 840
844 /* 841 /*
845 * Map scatterlist to PCI bus addresses. 842 * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
847 */ 844 */
848 n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir); 845 n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
849 if (n_sg <= 0) 846 if (n_sg <= 0)
850 return -EINVAL; 847 return false;
851 848
852 SKD_ASSERT(n_sg <= skdev->sgs_per_request); 849 SKD_ASSERT(n_sg <= skdev->sgs_per_request);
853 850
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
882 } 879 }
883 } 880 }
884 881
885 return 0; 882 return true;
886} 883}
887 884
888static void skd_postop_sg_list(struct skd_device *skdev, 885static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
2333 switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) { 2330 switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
2334 case SKD_CHECK_STATUS_REPORT_GOOD: 2331 case SKD_CHECK_STATUS_REPORT_GOOD:
2335 case SKD_CHECK_STATUS_REPORT_SMART_ALERT: 2332 case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
2336 skd_end_request(skdev, skreq, 0); 2333 skd_end_request(skdev, skreq, BLK_STS_OK);
2337 break; 2334 break;
2338 2335
2339 case SKD_CHECK_STATUS_BUSY_IMMINENT: 2336 case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
2355 2352
2356 case SKD_CHECK_STATUS_REPORT_ERROR: 2353 case SKD_CHECK_STATUS_REPORT_ERROR:
2357 default: 2354 default:
2358 skd_end_request(skdev, skreq, -EIO); 2355 skd_end_request(skdev, skreq, BLK_STS_IOERR);
2359 break; 2356 break;
2360 } 2357 }
2361} 2358}
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
2748 * native request. 2745 * native request.
2749 */ 2746 */
2750 if (likely(cmp_status == SAM_STAT_GOOD)) 2747 if (likely(cmp_status == SAM_STAT_GOOD))
2751 skd_end_request(skdev, skreq, 0); 2748 skd_end_request(skdev, skreq, BLK_STS_OK);
2752 else 2749 else
2753 skd_resolve_req_exception(skdev, skreq); 2750 skd_resolve_req_exception(skdev, skreq);
2754 } 2751 }
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
3190 SKD_MAX_RETRIES) 3187 SKD_MAX_RETRIES)
3191 blk_requeue_request(skdev->queue, skreq->req); 3188 blk_requeue_request(skdev->queue, skreq->req);
3192 else 3189 else
3193 skd_end_request(skdev, skreq, -EIO); 3190 skd_end_request(skdev, skreq, BLK_STS_IOERR);
3194 3191
3195 skreq->req = NULL; 3192 skreq->req = NULL;
3196 3193
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
4276 rc = -ENOMEM; 4273 rc = -ENOMEM;
4277 goto err_out; 4274 goto err_out;
4278 } 4275 }
4276 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
4279 4277
4280 skdev->queue = q; 4278 skdev->queue = q;
4281 disk->queue = q; 4279 disk->queue = q;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab3d50a..6b16ead1da58 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
316 316
317 rqe->req = NULL; 317 rqe->req = NULL;
318 318
319 __blk_end_request(req, (desc->status ? -EIO : 0), desc->size); 319 __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
320 320
321 vdc_blk_queue_start(port); 321 vdc_blk_queue_start(port);
322} 322}
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
1023 struct request *req; 1023 struct request *req;
1024 1024
1025 while ((req = blk_fetch_request(port->disk->queue)) != NULL) 1025 while ((req = blk_fetch_request(port->disk->queue)) != NULL)
1026 __blk_end_request_all(req, -EIO); 1026 __blk_end_request_all(req, BLK_STS_IOERR);
1027} 1027}
1028 1028
1029static void vdc_ldc_reset_timer(unsigned long _arg) 1029static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6cf375..84434d3ea19b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
493 return ret; 493 return ret;
494} 494}
495 495
496static int floppy_read_sectors(struct floppy_state *fs, 496static blk_status_t floppy_read_sectors(struct floppy_state *fs,
497 int req_sector, int sectors_nb, 497 int req_sector, int sectors_nb,
498 unsigned char *buffer) 498 unsigned char *buffer)
499{ 499{
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
516 ret = swim_read_sector(fs, side, track, sector, 516 ret = swim_read_sector(fs, side, track, sector,
517 buffer); 517 buffer);
518 if (try-- == 0) 518 if (try-- == 0)
519 return -EIO; 519 return BLK_STS_IOERR;
520 } while (ret != 512); 520 } while (ret != 512);
521 521
522 buffer += ret; 522 buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
553 553
554 req = swim_next_request(swd); 554 req = swim_next_request(swd);
555 while (req) { 555 while (req) {
556 int err = -EIO; 556 blk_status_t err = BLK_STS_IOERR;
557 557
558 fs = req->rq_disk->private_data; 558 fs = req->rq_disk->private_data;
559 if (blk_rq_pos(req) >= fs->total_secs) 559 if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
864 put_disk(swd->unit[drive].disk); 864 put_disk(swd->unit[drive].disk);
865 goto exit_put_disks; 865 goto exit_put_disks;
866 } 866 }
867 blk_queue_bounce_limit(swd->unit[drive].disk->queue,
868 BLK_BOUNCE_HIGH);
867 swd->unit[drive].disk->queue->queuedata = swd; 869 swd->unit[drive].disk->queue->queuedata = swd;
868 swd->unit[drive].swd = swd; 870 swd->unit[drive].swd = swd;
869 } 871 }
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c9bdba..9f931f8f6b4c 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
257 unsigned int clearing); 257 unsigned int clearing);
258static int floppy_revalidate(struct gendisk *disk); 258static int floppy_revalidate(struct gendisk *disk);
259 259
260static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes) 260static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
261{ 261{
262 struct request *req = fs->cur_req; 262 struct request *req = fs->cur_req;
263 int rc; 263 int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
334 if (fs->mdev->media_bay && 334 if (fs->mdev->media_bay &&
335 check_media_bay(fs->mdev->media_bay) != MB_FD) { 335 check_media_bay(fs->mdev->media_bay) != MB_FD) {
336 swim3_dbg("%s", " media bay absent, dropping req\n"); 336 swim3_dbg("%s", " media bay absent, dropping req\n");
337 swim3_end_request(fs, -ENODEV, 0); 337 swim3_end_request(fs, BLK_STS_IOERR, 0);
338 continue; 338 continue;
339 } 339 }
340 340
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
350 if (blk_rq_pos(req) >= fs->total_secs) { 350 if (blk_rq_pos(req) >= fs->total_secs) {
351 swim3_dbg(" pos out of bounds (%ld, max is %ld)\n", 351 swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
352 (long)blk_rq_pos(req), (long)fs->total_secs); 352 (long)blk_rq_pos(req), (long)fs->total_secs);
353 swim3_end_request(fs, -EIO, 0); 353 swim3_end_request(fs, BLK_STS_IOERR, 0);
354 continue; 354 continue;
355 } 355 }
356 if (fs->ejected) { 356 if (fs->ejected) {
357 swim3_dbg("%s", " disk ejected\n"); 357 swim3_dbg("%s", " disk ejected\n");
358 swim3_end_request(fs, -EIO, 0); 358 swim3_end_request(fs, BLK_STS_IOERR, 0);
359 continue; 359 continue;
360 } 360 }
361 361
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
364 fs->write_prot = swim3_readbit(fs, WRITE_PROT); 364 fs->write_prot = swim3_readbit(fs, WRITE_PROT);
365 if (fs->write_prot) { 365 if (fs->write_prot) {
366 swim3_dbg("%s", " try to write, disk write protected\n"); 366 swim3_dbg("%s", " try to write, disk write protected\n");
367 swim3_end_request(fs, -EIO, 0); 367 swim3_end_request(fs, BLK_STS_IOERR, 0);
368 continue; 368 continue;
369 } 369 }
370 } 370 }
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
548 if (fs->retries > 5) { 548 if (fs->retries > 5) {
549 swim3_err("Wrong cylinder in transfer, want: %d got %d\n", 549 swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
550 fs->req_cyl, fs->cur_cyl); 550 fs->req_cyl, fs->cur_cyl);
551 swim3_end_request(fs, -EIO, 0); 551 swim3_end_request(fs, BLK_STS_IOERR, 0);
552 fs->state = idle; 552 fs->state = idle;
553 return; 553 return;
554 } 554 }
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
584 out_8(&sw->intr_enable, 0); 584 out_8(&sw->intr_enable, 0);
585 fs->cur_cyl = -1; 585 fs->cur_cyl = -1;
586 if (fs->retries > 5) { 586 if (fs->retries > 5) {
587 swim3_end_request(fs, -EIO, 0); 587 swim3_end_request(fs, BLK_STS_IOERR, 0);
588 fs->state = idle; 588 fs->state = idle;
589 start_request(fs); 589 start_request(fs);
590 } else { 590 } else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
608 out_8(&sw->select, RELAX); 608 out_8(&sw->select, RELAX);
609 out_8(&sw->intr_enable, 0); 609 out_8(&sw->intr_enable, 0);
610 swim3_err("%s", "Seek timeout\n"); 610 swim3_err("%s", "Seek timeout\n");
611 swim3_end_request(fs, -EIO, 0); 611 swim3_end_request(fs, BLK_STS_IOERR, 0);
612 fs->state = idle; 612 fs->state = idle;
613 start_request(fs); 613 start_request(fs);
614 spin_unlock_irqrestore(&swim3_lock, flags); 614 spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
637 goto unlock; 637 goto unlock;
638 } 638 }
639 swim3_err("%s", "Seek settle timeout\n"); 639 swim3_err("%s", "Seek settle timeout\n");
640 swim3_end_request(fs, -EIO, 0); 640 swim3_end_request(fs, BLK_STS_IOERR, 0);
641 fs->state = idle; 641 fs->state = idle;
642 start_request(fs); 642 start_request(fs);
643 unlock: 643 unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
666 swim3_err("Timeout %sing sector %ld\n", 666 swim3_err("Timeout %sing sector %ld\n",
667 (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"), 667 (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
668 (long)blk_rq_pos(fs->cur_req)); 668 (long)blk_rq_pos(fs->cur_req));
669 swim3_end_request(fs, -EIO, 0); 669 swim3_end_request(fs, BLK_STS_IOERR, 0);
670 fs->state = idle; 670 fs->state = idle;
671 start_request(fs); 671 start_request(fs);
672 spin_unlock_irqrestore(&swim3_lock, flags); 672 spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
703 swim3_err("%s", "Seen sector but cyl=ff?\n"); 703 swim3_err("%s", "Seen sector but cyl=ff?\n");
704 fs->cur_cyl = -1; 704 fs->cur_cyl = -1;
705 if (fs->retries > 5) { 705 if (fs->retries > 5) {
706 swim3_end_request(fs, -EIO, 0); 706 swim3_end_request(fs, BLK_STS_IOERR, 0);
707 fs->state = idle; 707 fs->state = idle;
708 start_request(fs); 708 start_request(fs);
709 } else { 709 } else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
786 swim3_err("Error %sing block %ld (err=%x)\n", 786 swim3_err("Error %sing block %ld (err=%x)\n",
787 rq_data_dir(req) == WRITE? "writ": "read", 787 rq_data_dir(req) == WRITE? "writ": "read",
788 (long)blk_rq_pos(req), err); 788 (long)blk_rq_pos(req), err);
789 swim3_end_request(fs, -EIO, 0); 789 swim3_end_request(fs, BLK_STS_IOERR, 0);
790 fs->state = idle; 790 fs->state = idle;
791 } 791 }
792 } else { 792 } else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
795 swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid); 795 swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
796 swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n", 796 swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n",
797 fs->state, rq_data_dir(req), intr, err); 797 fs->state, rq_data_dir(req), intr, err);
798 swim3_end_request(fs, -EIO, 0); 798 swim3_end_request(fs, BLK_STS_IOERR, 0);
799 fs->state = idle; 799 fs->state = idle;
800 start_request(fs); 800 start_request(fs);
801 break; 801 break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
1223 put_disk(disk); 1223 put_disk(disk);
1224 return -ENOMEM; 1224 return -ENOMEM;
1225 } 1225 }
1226 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
1226 disk->queue->queuedata = &floppy_states[index]; 1227 disk->queue->queuedata = &floppy_states[index];
1227 1228
1228 if (index == 0) { 1229 if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
1245 return 0; 1246 return 0;
1246} 1247}
1247 1248
1248static struct of_device_id swim3_match[] = 1249static const struct of_device_id swim3_match[] =
1249{ 1250{
1250 { 1251 {
1251 .name = "swim3", 1252 .name = "swim3",
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072caf56f..08586dc14e85 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
745 745
746static inline void carm_end_request_queued(struct carm_host *host, 746static inline void carm_end_request_queued(struct carm_host *host,
747 struct carm_request *crq, 747 struct carm_request *crq,
748 int error) 748 blk_status_t error)
749{ 749{
750 struct request *req = crq->rq; 750 struct request *req = crq->rq;
751 int rc; 751 int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
791} 791}
792 792
793static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq, 793static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
794 int error) 794 blk_status_t error)
795{ 795{
796 carm_end_request_queued(host, crq, error); 796 carm_end_request_queued(host, crq, error);
797 if (max_queue == 1) 797 if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
869 sg = &crq->sg[0]; 869 sg = &crq->sg[0];
870 n_elem = blk_rq_map_sg(q, rq, sg); 870 n_elem = blk_rq_map_sg(q, rq, sg);
871 if (n_elem <= 0) { 871 if (n_elem <= 0) {
872 carm_end_rq(host, crq, -EIO); 872 carm_end_rq(host, crq, BLK_STS_IOERR);
873 return; /* request with no s/g entries? */ 873 return; /* request with no s/g entries? */
874 } 874 }
875 875
876 /* map scatterlist to PCI bus addresses */ 876 /* map scatterlist to PCI bus addresses */
877 n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir); 877 n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
878 if (n_elem <= 0) { 878 if (n_elem <= 0) {
879 carm_end_rq(host, crq, -EIO); 879 carm_end_rq(host, crq, BLK_STS_IOERR);
880 return; /* request with no s/g entries? */ 880 return; /* request with no s/g entries? */
881 } 881 }
882 crq->n_elem = n_elem; 882 crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
937 937
938static void carm_handle_array_info(struct carm_host *host, 938static void carm_handle_array_info(struct carm_host *host,
939 struct carm_request *crq, u8 *mem, 939 struct carm_request *crq, u8 *mem,
940 int error) 940 blk_status_t error)
941{ 941{
942 struct carm_port *port; 942 struct carm_port *port;
943 u8 *msg_data = mem + sizeof(struct carm_array_info); 943 u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
997 997
998static void carm_handle_scan_chan(struct carm_host *host, 998static void carm_handle_scan_chan(struct carm_host *host,
999 struct carm_request *crq, u8 *mem, 999 struct carm_request *crq, u8 *mem,
1000 int error) 1000 blk_status_t error)
1001{ 1001{
1002 u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET; 1002 u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
1003 unsigned int i, dev_count = 0; 1003 unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
1029} 1029}
1030 1030
1031static void carm_handle_generic(struct carm_host *host, 1031static void carm_handle_generic(struct carm_host *host,
1032 struct carm_request *crq, int error, 1032 struct carm_request *crq, blk_status_t error,
1033 int cur_state, int next_state) 1033 int cur_state, int next_state)
1034{ 1034{
1035 DPRINTK("ENTER\n"); 1035 DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
1045} 1045}
1046 1046
1047static inline void carm_handle_rw(struct carm_host *host, 1047static inline void carm_handle_rw(struct carm_host *host,
1048 struct carm_request *crq, int error) 1048 struct carm_request *crq, blk_status_t error)
1049{ 1049{
1050 int pci_dir; 1050 int pci_dir;
1051 1051
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
1067 u32 handle = le32_to_cpu(ret_handle_le); 1067 u32 handle = le32_to_cpu(ret_handle_le);
1068 unsigned int msg_idx; 1068 unsigned int msg_idx;
1069 struct carm_request *crq; 1069 struct carm_request *crq;
1070 int error = (status == RMSG_OK) ? 0 : -EIO; 1070 blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
1071 u8 *mem; 1071 u8 *mem;
1072 1072
1073 VPRINTK("ENTER, handle == 0x%x\n", handle); 1073 VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
1155err_out: 1155err_out:
1156 printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n", 1156 printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
1157 pci_name(host->pdev), crq->msg_type, crq->msg_subtype); 1157 pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
1158 carm_end_rq(host, crq, -EIO); 1158 carm_end_rq(host, crq, BLK_STS_IOERR);
1159} 1159}
1160 1160
1161static inline void carm_handle_responses(struct carm_host *host) 1161static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..0677d2514665 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
454 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); 454 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
455 if (control & DMASCR_HARD_ERROR) { 455 if (control & DMASCR_HARD_ERROR) {
456 /* error */ 456 /* error */
457 bio->bi_error = -EIO; 457 bio->bi_status = BLK_STS_IOERR;
458 dev_printk(KERN_WARNING, &card->dev->dev, 458 dev_printk(KERN_WARNING, &card->dev->dev,
459 "I/O error on sector %d/%d\n", 459 "I/O error on sector %d/%d\n",
460 le32_to_cpu(desc->local_addr)>>9, 460 le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
529 (unsigned long long)bio->bi_iter.bi_sector, 529 (unsigned long long)bio->bi_iter.bi_sector,
530 bio->bi_iter.bi_size); 530 bio->bi_iter.bi_size);
531 531
532 blk_queue_split(q, &bio, q->bio_split); 532 blk_queue_split(q, &bio);
533 533
534 spin_lock_irq(&card->lock); 534 spin_lock_irq(&card->lock);
535 *card->biotail = bio; 535 *card->biotail = bio;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c542b4..0297ad7c1452 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
64 struct scatterlist sg[]; 64 struct scatterlist sg[];
65}; 65};
66 66
67static inline int virtblk_result(struct virtblk_req *vbr) 67static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
68{ 68{
69 switch (vbr->status) { 69 switch (vbr->status) {
70 case VIRTIO_BLK_S_OK: 70 case VIRTIO_BLK_S_OK:
71 return 0; 71 return BLK_STS_OK;
72 case VIRTIO_BLK_S_UNSUPP: 72 case VIRTIO_BLK_S_UNSUPP:
73 return -ENOTTY; 73 return BLK_STS_NOTSUPP;
74 default: 74 default:
75 return -EIO; 75 return BLK_STS_IOERR;
76 } 76 }
77} 77}
78 78
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
214 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 214 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
215} 215}
216 216
217static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 217static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
218 const struct blk_mq_queue_data *bd) 218 const struct blk_mq_queue_data *bd)
219{ 219{
220 struct virtio_blk *vblk = hctx->queue->queuedata; 220 struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
246 break; 246 break;
247 default: 247 default:
248 WARN_ON_ONCE(1); 248 WARN_ON_ONCE(1);
249 return BLK_MQ_RQ_QUEUE_ERROR; 249 return BLK_STS_IOERR;
250 } 250 }
251 251
252 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 252 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
276 /* Out of mem doesn't actually happen, since we fall back 276 /* Out of mem doesn't actually happen, since we fall back
277 * to direct descriptors */ 277 * to direct descriptors */
278 if (err == -ENOMEM || err == -ENOSPC) 278 if (err == -ENOMEM || err == -ENOSPC)
279 return BLK_MQ_RQ_QUEUE_BUSY; 279 return BLK_STS_RESOURCE;
280 return BLK_MQ_RQ_QUEUE_ERROR; 280 return BLK_STS_IOERR;
281 } 281 }
282 282
283 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 283 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
286 286
287 if (notify) 287 if (notify)
288 virtqueue_notify(vblk->vqs[qid].vq); 288 virtqueue_notify(vblk->vqs[qid].vq);
289 return BLK_MQ_RQ_QUEUE_OK; 289 return BLK_STS_OK;
290} 290}
291 291
292/* return id (s/n) string for *disk to *id_str 292/* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
307 goto out; 307 goto out;
308 308
309 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 309 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
310 err = virtblk_result(blk_mq_rq_to_pdu(req)); 310 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
311out: 311out:
312 blk_put_request(req); 312 blk_put_request(req);
313 return err; 313 return err;
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
720 /* We can handle whatever the host told us to handle. */ 720 /* We can handle whatever the host told us to handle. */
721 blk_queue_max_segments(q, vblk->sg_elems-2); 721 blk_queue_max_segments(q, vblk->sg_elems-2);
722 722
723 /* No need to bounce any requests */
724 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
725
726 /* No real sector limit. */ 723 /* No real sector limit. */
727 blk_queue_max_hw_sectors(q, -1U); 724 blk_queue_max_hw_sectors(q, -1U);
728 725
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0e824091a12f..fe7cd58c43d0 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1066,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
1066 atomic_set(&blkif->drain, 0); 1066 atomic_set(&blkif->drain, 0);
1067} 1067}
1068 1068
1069/* 1069static void __end_block_io_op(struct pending_req *pending_req,
1070 * Completion callback on the bio's. Called as bh->b_end_io() 1070 blk_status_t error)
1071 */
1072
1073static void __end_block_io_op(struct pending_req *pending_req, int error)
1074{ 1071{
1075 /* An error fails the entire request. */ 1072 /* An error fails the entire request. */
1076 if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && 1073 if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
1077 (error == -EOPNOTSUPP)) { 1074 error == BLK_STS_NOTSUPP) {
1078 pr_debug("flush diskcache op failed, not supported\n"); 1075 pr_debug("flush diskcache op failed, not supported\n");
1079 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0); 1076 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
1080 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 1077 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
1081 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && 1078 } else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
1082 (error == -EOPNOTSUPP)) { 1079 error == BLK_STS_NOTSUPP) {
1083 pr_debug("write barrier op failed, not supported\n"); 1080 pr_debug("write barrier op failed, not supported\n");
1084 xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0); 1081 xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
1085 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 1082 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1103,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
1103 */ 1100 */
1104static void end_block_io_op(struct bio *bio) 1101static void end_block_io_op(struct bio *bio)
1105{ 1102{
1106 __end_block_io_op(bio->bi_private, bio->bi_error); 1103 __end_block_io_op(bio->bi_private, bio->bi_status);
1107 bio_put(bio); 1104 bio_put(bio);
1108} 1105}
1109 1106
@@ -1420,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
1420 for (i = 0; i < nbio; i++) 1417 for (i = 0; i < nbio; i++)
1421 bio_put(biolist[i]); 1418 bio_put(biolist[i]);
1422 atomic_set(&pending_req->pendcnt, 1); 1419 atomic_set(&pending_req->pendcnt, 1);
1423 __end_block_io_op(pending_req, -EINVAL); 1420 __end_block_io_op(pending_req, BLK_STS_RESOURCE);
1424 msleep(1); /* back off a bit */ 1421 msleep(1); /* back off a bit */
1425 return -EIO; 1422 return -EIO;
1426} 1423}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 39459631667c..c852ed3c01d5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -110,11 +110,6 @@ struct blk_shadow {
110 unsigned long associated_id; 110 unsigned long associated_id;
111}; 111};
112 112
113struct split_bio {
114 struct bio *bio;
115 atomic_t pending;
116};
117
118struct blkif_req { 113struct blkif_req {
119 int error; 114 int error;
120}; 115};
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
881 !info->feature_fua)); 876 !info->feature_fua));
882} 877}
883 878
884static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, 879static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
885 const struct blk_mq_queue_data *qd) 880 const struct blk_mq_queue_data *qd)
886{ 881{
887 unsigned long flags; 882 unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
904 899
905 flush_requests(rinfo); 900 flush_requests(rinfo);
906 spin_unlock_irqrestore(&rinfo->ring_lock, flags); 901 spin_unlock_irqrestore(&rinfo->ring_lock, flags);
907 return BLK_MQ_RQ_QUEUE_OK; 902 return BLK_STS_OK;
908 903
909out_err: 904out_err:
910 spin_unlock_irqrestore(&rinfo->ring_lock, flags); 905 spin_unlock_irqrestore(&rinfo->ring_lock, flags);
911 return BLK_MQ_RQ_QUEUE_ERROR; 906 return BLK_STS_IOERR;
912 907
913out_busy: 908out_busy:
914 spin_unlock_irqrestore(&rinfo->ring_lock, flags); 909 spin_unlock_irqrestore(&rinfo->ring_lock, flags);
915 blk_mq_stop_hw_queue(hctx); 910 blk_mq_stop_hw_queue(hctx);
916 return BLK_MQ_RQ_QUEUE_BUSY; 911 return BLK_STS_RESOURCE;
917} 912}
918 913
919static void blkif_complete_rq(struct request *rq) 914static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
958 953
959 /* Make sure buffer addresses are sector-aligned. */ 954 /* Make sure buffer addresses are sector-aligned. */
960 blk_queue_dma_alignment(rq, 511); 955 blk_queue_dma_alignment(rq, 511);
961
962 /* Make sure we don't use bounce buffers. */
963 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
964} 956}
965 957
966static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, 958static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1601 continue; 1593 continue;
1602 } 1594 }
1603 1595
1604 blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 1596 if (bret->status == BLKIF_RSP_OKAY)
1597 blkif_req(req)->error = BLK_STS_OK;
1598 else
1599 blkif_req(req)->error = BLK_STS_IOERR;
1600
1605 switch (bret->operation) { 1601 switch (bret->operation) {
1606 case BLKIF_OP_DISCARD: 1602 case BLKIF_OP_DISCARD:
1607 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 1603 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
1608 struct request_queue *rq = info->rq; 1604 struct request_queue *rq = info->rq;
1609 printk(KERN_WARNING "blkfront: %s: %s op failed\n", 1605 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
1610 info->gd->disk_name, op_name(bret->operation)); 1606 info->gd->disk_name, op_name(bret->operation));
1611 blkif_req(req)->error = -EOPNOTSUPP; 1607 blkif_req(req)->error = BLK_STS_NOTSUPP;
1612 info->feature_discard = 0; 1608 info->feature_discard = 0;
1613 info->feature_secdiscard = 0; 1609 info->feature_secdiscard = 0;
1614 queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 1610 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1626 rinfo->shadow[id].req.u.rw.nr_segments == 0)) { 1622 rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
1627 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", 1623 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
1628 info->gd->disk_name, op_name(bret->operation)); 1624 info->gd->disk_name, op_name(bret->operation));
1629 blkif_req(req)->error = -EOPNOTSUPP; 1625 blkif_req(req)->error = BLK_STS_NOTSUPP;
1630 } 1626 }
1631 if (unlikely(blkif_req(req)->error)) { 1627 if (unlikely(blkif_req(req)->error)) {
1632 if (blkif_req(req)->error == -EOPNOTSUPP) 1628 if (blkif_req(req)->error == BLK_STS_NOTSUPP)
1633 blkif_req(req)->error = 0; 1629 blkif_req(req)->error = BLK_STS_OK;
1634 info->feature_fua = 0; 1630 info->feature_fua = 0;
1635 info->feature_flush = 0; 1631 info->feature_flush = 0;
1636 xlvbd_flush(info); 1632 xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
1996 return 0; 1992 return 0;
1997} 1993}
1998 1994
1999static void split_bio_end(struct bio *bio)
2000{
2001 struct split_bio *split_bio = bio->bi_private;
2002
2003 if (atomic_dec_and_test(&split_bio->pending)) {
2004 split_bio->bio->bi_phys_segments = 0;
2005 split_bio->bio->bi_error = bio->bi_error;
2006 bio_endio(split_bio->bio);
2007 kfree(split_bio);
2008 }
2009 bio_put(bio);
2010}
2011
2012static int blkif_recover(struct blkfront_info *info) 1995static int blkif_recover(struct blkfront_info *info)
2013{ 1996{
2014 unsigned int i, r_index; 1997 unsigned int r_index;
2015 struct request *req, *n; 1998 struct request *req, *n;
2016 int rc; 1999 int rc;
2017 struct bio *bio, *cloned_bio; 2000 struct bio *bio;
2018 unsigned int segs, offset; 2001 unsigned int segs;
2019 int pending, size;
2020 struct split_bio *split_bio;
2021 2002
2022 blkfront_gather_backend_features(info); 2003 blkfront_gather_backend_features(info);
2023 /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ 2004 /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
2056 2037
2057 while ((bio = bio_list_pop(&info->bio_list)) != NULL) { 2038 while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
2058 /* Traverse the list of pending bios and re-queue them */ 2039 /* Traverse the list of pending bios and re-queue them */
2059 if (bio_segments(bio) > segs) {
2060 /*
2061 * This bio has more segments than what we can
2062 * handle, we have to split it.
2063 */
2064 pending = (bio_segments(bio) + segs - 1) / segs;
2065 split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
2066 BUG_ON(split_bio == NULL);
2067 atomic_set(&split_bio->pending, pending);
2068 split_bio->bio = bio;
2069 for (i = 0; i < pending; i++) {
2070 offset = (i * segs * XEN_PAGE_SIZE) >> 9;
2071 size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
2072 (unsigned int)bio_sectors(bio) - offset);
2073 cloned_bio = bio_clone(bio, GFP_NOIO);
2074 BUG_ON(cloned_bio == NULL);
2075 bio_trim(cloned_bio, offset, size);
2076 cloned_bio->bi_private = split_bio;
2077 cloned_bio->bi_end_io = split_bio_end;
2078 submit_bio(cloned_bio);
2079 }
2080 /*
2081 * Now we have to wait for all those smaller bios to
2082 * end, so we can also end the "parent" bio.
2083 */
2084 continue;
2085 }
2086 /* We don't need to split this bio */
2087 submit_bio(bio); 2040 submit_bio(bio);
2088 } 2041 }
2089 2042
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
2137 merge_bio.tail = shadow[j].request->biotail; 2090 merge_bio.tail = shadow[j].request->biotail;
2138 bio_list_merge(&info->bio_list, &merge_bio); 2091 bio_list_merge(&info->bio_list, &merge_bio);
2139 shadow[j].request->bio = NULL; 2092 shadow[j].request->bio = NULL;
2140 blk_mq_end_request(shadow[j].request, 0); 2093 blk_mq_end_request(shadow[j].request, BLK_STS_OK);
2141 } 2094 }
2142 } 2095 }
2143 2096
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2147e0..14459d66ef0c 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
471 if (!blk_rq_is_passthrough(req)) 471 if (!blk_rq_is_passthrough(req))
472 break; 472 break;
473 blk_start_request(req); 473 blk_start_request(req);
474 __blk_end_request_all(req, -EIO); 474 __blk_end_request_all(req, BLK_STS_IOERR);
475 } 475 }
476 return req; 476 return req;
477} 477}
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
499 499
500 /* Drop all in-flight and pending requests */ 500 /* Drop all in-flight and pending requests */
501 if (ace->req) { 501 if (ace->req) {
502 __blk_end_request_all(ace->req, -EIO); 502 __blk_end_request_all(ace->req, BLK_STS_IOERR);
503 ace->req = NULL; 503 ace->req = NULL;
504 } 504 }
505 while ((req = blk_fetch_request(ace->queue)) != NULL) 505 while ((req = blk_fetch_request(ace->queue)) != NULL)
506 __blk_end_request_all(req, -EIO); 506 __blk_end_request_all(req, BLK_STS_IOERR);
507 507
508 /* Drop back to IDLE state and notify waiters */ 508 /* Drop back to IDLE state and notify waiters */
509 ace->fsm_state = ACE_FSM_STATE_IDLE; 509 ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
728 } 728 }
729 729
730 /* bio finished; is there another one? */ 730 /* bio finished; is there another one? */
731 if (__blk_end_request_cur(ace->req, 0)) { 731 if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
732 /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", 732 /* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
733 * blk_rq_sectors(ace->req), 733 * blk_rq_sectors(ace->req),
734 * blk_rq_cur_sectors(ace->req)); 734 * blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
993 if (ace->queue == NULL) 993 if (ace->queue == NULL)
994 goto err_blk_initq; 994 goto err_blk_initq;
995 blk_queue_logical_block_size(ace->queue, 512); 995 blk_queue_logical_block_size(ace->queue, 512);
996 blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
996 997
997 /* 998 /*
998 * Allocate and initialize GD structure 999 * Allocate and initialize GD structure
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e52effa..41c95c9b2ab4 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
74 while (req) { 74 while (req) {
75 unsigned long start = blk_rq_pos(req) << 9; 75 unsigned long start = blk_rq_pos(req) << 9;
76 unsigned long len = blk_rq_cur_bytes(req); 76 unsigned long len = blk_rq_cur_bytes(req);
77 int err = 0; 77 blk_status_t err = BLK_STS_OK;
78 78
79 if (start + len > z2ram_size) { 79 if (start + len > z2ram_size) {
80 pr_err(DEVICE_NAME ": bad access: block=%llu, " 80 pr_err(DEVICE_NAME ": bad access: block=%llu, "
81 "count=%u\n", 81 "count=%u\n",
82 (unsigned long long)blk_rq_pos(req), 82 (unsigned long long)blk_rq_pos(req),
83 blk_rq_cur_sectors(req)); 83 blk_rq_cur_sectors(req));
84 err = -EIO; 84 err = BLK_STS_IOERR;
85 goto done; 85 goto done;
86 } 86 }
87 while (len) { 87 while (len) {
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 76c952fd9ab9..e36d160c458f 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2178 if (!q) 2178 if (!q)
2179 return -ENXIO; 2179 return -ENXIO;
2180 2180
2181 if (!blk_queue_scsi_passthrough(q)) {
2182 WARN_ONCE(true,
2183 "Attempt read CDDA info through a non-SCSI queue\n");
2184 return -EINVAL;
2185 }
2186
2181 cdi->last_sense = 0; 2187 cdi->last_sense = 0;
2182 2188
2183 while (nframes) { 2189 while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2195 break; 2201 break;
2196 } 2202 }
2197 req = scsi_req(rq); 2203 req = scsi_req(rq);
2198 scsi_req_init(rq);
2199 2204
2200 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); 2205 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
2201 if (ret) { 2206 if (ret) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763a948f..6495b03f576c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
583 */ 583 */
584static void gdrom_readdisk_dma(struct work_struct *work) 584static void gdrom_readdisk_dma(struct work_struct *work)
585{ 585{
586 int err, block, block_cnt; 586 int block, block_cnt;
587 blk_status_t err;
587 struct packet_command *read_command; 588 struct packet_command *read_command;
588 struct list_head *elem, *next; 589 struct list_head *elem, *next;
589 struct request *req; 590 struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
641 __raw_writeb(1, GDROM_DMA_STATUS_REG); 642 __raw_writeb(1, GDROM_DMA_STATUS_REG);
642 wait_event_interruptible_timeout(request_queue, 643 wait_event_interruptible_timeout(request_queue,
643 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); 644 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
644 err = gd.transfer ? -EIO : 0; 645 err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
645 gd.transfer = 0; 646 gd.transfer = 0;
646 gd.pending = 0; 647 gd.pending = 0;
647 /* now seek to take the request spinlock 648 /* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
670 break; 671 break;
671 case REQ_OP_WRITE: 672 case REQ_OP_WRITE:
672 pr_notice("Read only device - write request ignored\n"); 673 pr_notice("Read only device - write request ignored\n");
673 __blk_end_request_all(req, -EIO); 674 __blk_end_request_all(req, BLK_STS_IOERR);
674 break; 675 break;
675 default: 676 default:
676 printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); 677 printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
677 __blk_end_request_all(req, -EIO); 678 __blk_end_request_all(req, BLK_STS_IOERR);
678 break; 679 break;
679 } 680 }
680 } 681 }
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
812 err = -ENOMEM; 813 err = -ENOMEM;
813 goto probe_fail_requestq; 814 goto probe_fail_requestq;
814 } 815 }
816 blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
815 817
816 err = probe_gdrom_setupqueue(); 818 err = probe_gdrom_setupqueue();
817 if (err) 819 if (err)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937284e7..14d1e7d9a1d6 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
93 int error; 93 int error;
94 94
95 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 95 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
96 scsi_req_init(rq);
97 ide_req(rq)->type = ATA_PRIV_MISC; 96 ide_req(rq)->type = ATA_PRIV_MISC;
98 rq->special = (char *)pc; 97 rq->special = (char *)pc;
99 98
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
200 memset(sense, 0, sizeof(*sense)); 199 memset(sense, 0, sizeof(*sense));
201 200
202 blk_rq_init(rq->q, sense_rq); 201 blk_rq_init(rq->q, sense_rq);
203 scsi_req_init(sense_rq); 202 scsi_req_init(req);
204 203
205 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, 204 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
206 GFP_NOIO); 205 GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
273 ide_requeue_and_plug(drive, failed_rq); 272 ide_requeue_and_plug(drive, failed_rq);
274 if (ide_queue_sense_rq(drive, pc)) { 273 if (ide_queue_sense_rq(drive, pc)) {
275 blk_start_request(failed_rq); 274 blk_start_request(failed_rq);
276 ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq)); 275 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
277 } 276 }
278} 277}
279EXPORT_SYMBOL_GPL(ide_retry_pc); 278EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
437 436
438 /* No more interrupts */ 437 /* No more interrupts */
439 if ((stat & ATA_DRQ) == 0) { 438 if ((stat & ATA_DRQ) == 0) {
440 int uptodate, error; 439 int uptodate;
440 blk_status_t error;
441 441
442 debug_log("Packet command completed, %d bytes transferred\n", 442 debug_log("Packet command completed, %d bytes transferred\n",
443 blk_rq_bytes(rq)); 443 blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
490 490
491 if (ata_misc_request(rq)) { 491 if (ata_misc_request(rq)) {
492 scsi_req(rq)->result = 0; 492 scsi_req(rq)->result = 0;
493 error = 0; 493 error = BLK_STS_OK;
494 } else { 494 } else {
495 495
496 if (blk_rq_is_passthrough(rq) && uptodate <= 0) { 496 if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
498 scsi_req(rq)->result = -EIO; 498 scsi_req(rq)->result = -EIO;
499 } 499 }
500 500
501 error = uptodate ? 0 : -EIO; 501 error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
502 } 502 }
503 503
504 ide_complete_rq(drive, error, blk_rq_bytes(rq)); 504 ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3a64c3..81e18f9628d0 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
228 scsi_req(failed)->sense_len = scsi_req(rq)->sense_len; 228 scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
229 cdrom_analyze_sense_data(drive, failed); 229 cdrom_analyze_sense_data(drive, failed);
230 230
231 if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed))) 231 if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
232 BUG(); 232 BUG();
233 } else 233 } else
234 cdrom_analyze_sense_data(drive, NULL); 234 cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
438 438
439 rq = blk_get_request(drive->queue, 439 rq = blk_get_request(drive->queue,
440 write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); 440 write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
441 scsi_req_init(rq);
442 memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB); 441 memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
443 ide_req(rq)->type = ATA_PRIV_PC; 442 ide_req(rq)->type = ATA_PRIV_PC;
444 rq->rq_flags |= rq_flags; 443 rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
508 nr_bytes -= cmd->last_xfer_len; 507 nr_bytes -= cmd->last_xfer_len;
509 508
510 if (nr_bytes > 0) { 509 if (nr_bytes > 0) {
511 ide_complete_rq(drive, 0, nr_bytes); 510 ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
512 return true; 511 return true;
513 } 512 }
514 513
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
674out_end: 673out_end:
675 if (blk_rq_is_scsi(rq) && rc == 0) { 674 if (blk_rq_is_scsi(rq) && rc == 0) {
676 scsi_req(rq)->resid_len = 0; 675 scsi_req(rq)->resid_len = 0;
677 blk_end_request_all(rq, 0); 676 blk_end_request_all(rq, BLK_STS_OK);
678 hwif->rq = NULL; 677 hwif->rq = NULL;
679 } else { 678 } else {
680 if (sense && uptodate) 679 if (sense && uptodate)
@@ -699,7 +698,7 @@ out_end:
699 scsi_req(rq)->resid_len += cmd->last_xfer_len; 698 scsi_req(rq)->resid_len += cmd->last_xfer_len;
700 } 699 }
701 700
702 ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq)); 701 ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
703 702
704 if (sense && rc == 2) 703 if (sense && rc == 2)
705 ide_error(drive, "request sense failure", stat); 704 ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ out_end:
844 if (nsectors == 0) 843 if (nsectors == 0)
845 nsectors = 1; 844 nsectors = 1;
846 845
847 ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9); 846 ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
848 847
849 return ide_stopped; 848 return ide_stopped;
850} 849}
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736c39c6..9d26c9737e21 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
304 int ret; 304 int ret;
305 305
306 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 306 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
307 scsi_req_init(rq);
308 ide_req(rq)->type = ATA_PRIV_MISC; 307 ide_req(rq)->type = ATA_PRIV_MISC;
309 rq->rq_flags = RQF_QUIET; 308 rq->rq_flags = RQF_QUIET;
310 blk_execute_rq(drive->queue, cd->disk, rq, 0); 309 blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32ee560..ef7c8c43a380 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
166 return setting->set(drive, arg); 166 return setting->set(drive, arg);
167 167
168 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); 168 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
169 scsi_req_init(rq);
170 ide_req(rq)->type = ATA_PRIV_MISC; 169 ide_req(rq)->type = ATA_PRIV_MISC;
171 scsi_req(rq)->cmd_len = 5; 170 scsi_req(rq)->cmd_len = 5;
172 scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; 171 scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237f3479..241983da5fc4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
478 return -EBUSY; 478 return -EBUSY;
479 479
480 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 480 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
481 scsi_req_init(rq);
482 ide_req(rq)->type = ATA_PRIV_TASKFILE; 481 ide_req(rq)->type = ATA_PRIV_TASKFILE;
483 482
484 drive->mult_req = arg; 483 drive->mult_req = arg;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c81223e56d..54d4d78ca46a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
104 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0) 104 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
105 ide_finish_cmd(drive, cmd, stat); 105 ide_finish_cmd(drive, cmd, stat);
106 else 106 else
107 ide_complete_rq(drive, 0, 107 ide_complete_rq(drive, BLK_STS_OK,
108 blk_rq_sectors(cmd->rq) << 9); 108 blk_rq_sectors(cmd->rq) << 9);
109 return ide_stopped; 109 return ide_stopped;
110 } 110 }
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7d158d..47d5f3379748 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
135 return ide_stopped; 135 return ide_stopped;
136 } 136 }
137 scsi_req(rq)->result = err; 137 scsi_req(rq)->result = err;
138 ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); 138 ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
139 return ide_stopped; 139 return ide_stopped;
140 } 140 }
141 141
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
143} 143}
144EXPORT_SYMBOL_GPL(ide_error); 144EXPORT_SYMBOL_GPL(ide_error);
145 145
146static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) 146static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
147{ 147{
148 struct request *rq = drive->hwif->rq; 148 struct request *rq = drive->hwif->rq;
149 149
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
151 scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) { 151 scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
152 if (err <= 0 && scsi_req(rq)->result == 0) 152 if (err <= 0 && scsi_req(rq)->result == 0)
153 scsi_req(rq)->result = -EIO; 153 scsi_req(rq)->result = -EIO;
154 ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); 154 ide_complete_rq(drive, err, blk_rq_bytes(rq));
155 } 155 }
156} 156}
157 157
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
191 } 191 }
192 /* done polling */ 192 /* done polling */
193 hwif->polling = 0; 193 hwif->polling = 0;
194 ide_complete_drive_reset(drive, 0); 194 ide_complete_drive_reset(drive, BLK_STS_OK);
195 return ide_stopped; 195 return ide_stopped;
196} 196}
197 197
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
225 ide_hwif_t *hwif = drive->hwif; 225 ide_hwif_t *hwif = drive->hwif;
226 const struct ide_port_ops *port_ops = hwif->port_ops; 226 const struct ide_port_ops *port_ops = hwif->port_ops;
227 u8 tmp; 227 u8 tmp;
228 int err = 0; 228 blk_status_t err = BLK_STS_OK;
229 229
230 if (port_ops && port_ops->reset_poll) { 230 if (port_ops && port_ops->reset_poll) {
231 err = port_ops->reset_poll(drive); 231 err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
247 printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n", 247 printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
248 hwif->name, tmp); 248 hwif->name, tmp);
249 drive->failures++; 249 drive->failures++;
250 err = -EIO; 250 err = BLK_STS_IOERR;
251 } else { 251 } else {
252 tmp = ide_read_error(drive); 252 tmp = ide_read_error(drive);
253 253
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
257 } else { 257 } else {
258 ide_reset_report_error(hwif, tmp); 258 ide_reset_report_error(hwif, tmp);
259 drive->failures++; 259 drive->failures++;
260 err = -EIO; 260 err = BLK_STS_IOERR;
261 } 261 }
262 } 262 }
263out: 263out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
392 392
393 if (io_ports->ctl_addr == 0) { 393 if (io_ports->ctl_addr == 0) {
394 spin_unlock_irqrestore(&hwif->lock, flags); 394 spin_unlock_irqrestore(&hwif->lock, flags);
395 ide_complete_drive_reset(drive, -ENXIO); 395 ide_complete_drive_reset(drive, BLK_STS_IOERR);
396 return ide_stopped; 396 return ide_stopped;
397 } 397 }
398 398
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048cd2df..627b1f62a749 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
143 143
144 drive->failed_pc = NULL; 144 drive->failed_pc = NULL;
145 drive->pc_callback(drive, 0); 145 drive->pc_callback(drive, 0);
146 ide_complete_rq(drive, -EIO, done); 146 ide_complete_rq(drive, BLK_STS_IOERR, done);
147 return ide_stopped; 147 return ide_stopped;
148 } 148 }
149 149
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
248 248
249 if (ata_misc_request(rq)) { 249 if (ata_misc_request(rq)) {
250 scsi_req(rq)->result = 0; 250 scsi_req(rq)->result = 0;
251 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 251 ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
252 return ide_stopped; 252 return ide_stopped;
253 } else 253 } else
254 goto out_end; 254 goto out_end;
@@ -303,7 +303,7 @@ out_end:
303 drive->failed_pc = NULL; 303 drive->failed_pc = NULL;
304 if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0) 304 if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
305 scsi_req(rq)->result = -EIO; 305 scsi_req(rq)->result = -EIO;
306 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 306 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
307 return ide_stopped; 307 return ide_stopped;
308} 308}
309 309
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af721f8cb..3a234701d92c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
54#include <linux/uaccess.h> 54#include <linux/uaccess.h>
55#include <asm/io.h> 55#include <asm/io.h>
56 56
57int ide_end_rq(ide_drive_t *drive, struct request *rq, int error, 57int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
58 unsigned int nr_bytes) 58 unsigned int nr_bytes)
59{ 59{
60 /* 60 /*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
112 } 112 }
113} 113}
114 114
115int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes) 115int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
116{ 116{
117 ide_hwif_t *hwif = drive->hwif; 117 ide_hwif_t *hwif = drive->hwif;
118 struct request *rq = hwif->rq; 118 struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
122 * if failfast is set on a request, override number of sectors 122 * if failfast is set on a request, override number of sectors
123 * and complete the whole request right now 123 * and complete the whole request right now
124 */ 124 */
125 if (blk_noretry_request(rq) && error <= 0) 125 if (blk_noretry_request(rq) && error)
126 nr_bytes = blk_rq_sectors(rq) << 9; 126 nr_bytes = blk_rq_sectors(rq) << 9;
127 127
128 rc = ide_end_rq(drive, rq, error, nr_bytes); 128 rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
149 scsi_req(rq)->result = -EIO; 149 scsi_req(rq)->result = -EIO;
150 } 150 }
151 151
152 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 152 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
153} 153}
154 154
155static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) 155static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
272 printk("%s: DRIVE_CMD (null)\n", drive->name); 272 printk("%s: DRIVE_CMD (null)\n", drive->name);
273#endif 273#endif
274 scsi_req(rq)->result = 0; 274 scsi_req(rq)->result = 0;
275 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 275 ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
276 276
277 return ide_stopped; 277 return ide_stopped;
278} 278}
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d17297a7a..3661abb16a5f 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
126 struct request *rq; 126 struct request *rq;
127 127
128 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 128 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
129 scsi_req_init(rq);
130 ide_req(rq)->type = ATA_PRIV_TASKFILE; 129 ide_req(rq)->type = ATA_PRIV_TASKFILE;
131 blk_execute_rq(drive->queue, NULL, rq, 0); 130 blk_execute_rq(drive->queue, NULL, rq, 0);
132 err = scsi_req(rq)->result ? -EIO : 0; 131 err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
224 int ret = 0; 223 int ret = 0;
225 224
226 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 225 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
227 scsi_req_init(rq);
228 ide_req(rq)->type = ATA_PRIV_MISC; 226 ide_req(rq)->type = ATA_PRIV_MISC;
229 scsi_req(rq)->cmd_len = 1; 227 scsi_req(rq)->cmd_len = 1;
230 scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET; 228 scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107f59b9..1f264d5d3f3f 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
32 spin_unlock_irq(&hwif->lock); 32 spin_unlock_irq(&hwif->lock);
33 33
34 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); 34 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
35 scsi_req_init(rq);
36 scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; 35 scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
37 scsi_req(rq)->cmd_len = 1; 36 scsi_req(rq)->cmd_len = 1;
38 ide_req(rq)->type = ATA_PRIV_MISC; 37 ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
48 * timeout has expired, so power management will be reenabled. 47 * timeout has expired, so power management will be reenabled.
49 */ 48 */
50 rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT); 49 rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
51 scsi_req_init(rq);
52 if (IS_ERR(rq)) 50 if (IS_ERR(rq))
53 goto out; 51 goto out;
54 52
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1f40ce..544f02d673ca 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
19 19
20 memset(&rqpm, 0, sizeof(rqpm)); 20 memset(&rqpm, 0, sizeof(rqpm));
21 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 21 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
22 scsi_req_init(rq);
23 ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; 22 ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
24 rq->special = &rqpm; 23 rq->special = &rqpm;
25 rqpm.pm_step = IDE_PM_START_SUSPEND; 24 rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
40 return ret; 39 return ret;
41} 40}
42 41
43static void ide_end_sync_rq(struct request *rq, int error) 42static void ide_end_sync_rq(struct request *rq, blk_status_t error)
44{ 43{
45 complete(rq->end_io_data); 44 complete(rq->end_io_data);
46} 45}
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
57 if (unlikely(blk_queue_dying(q))) { 56 if (unlikely(blk_queue_dying(q))) {
58 rq->rq_flags |= RQF_QUIET; 57 rq->rq_flags |= RQF_QUIET;
59 scsi_req(rq)->result = -ENXIO; 58 scsi_req(rq)->result = -ENXIO;
60 __blk_end_request_all(rq, 0); 59 __blk_end_request_all(rq, BLK_STS_OK);
61 spin_unlock_irq(q->queue_lock); 60 spin_unlock_irq(q->queue_lock);
62 return -ENXIO; 61 return -ENXIO;
63 } 62 }
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
91 90
92 memset(&rqpm, 0, sizeof(rqpm)); 91 memset(&rqpm, 0, sizeof(rqpm));
93 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 92 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
94 scsi_req_init(rq);
95 ide_req(rq)->type = ATA_PRIV_PM_RESUME; 93 ide_req(rq)->type = ATA_PRIV_PM_RESUME;
96 rq->rq_flags |= RQF_PREEMPT; 94 rq->rq_flags |= RQF_PREEMPT;
97 rq->special = &rqpm; 95 rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
235 233
236 drive->hwif->rq = NULL; 234 drive->hwif->rq = NULL;
237 235
238 if (blk_end_request(rq, 0, 0)) 236 if (blk_end_request(rq, BLK_STS_OK, 0))
239 BUG(); 237 BUG();
240} 238}
241 239
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 023562565d11..01b2adfd8226 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
741 } 741 }
742} 742}
743 743
744static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp) 744static void ide_initialize_rq(struct request *rq)
745{ 745{
746 struct ide_request *req = blk_mq_rq_to_pdu(rq); 746 struct ide_request *req = blk_mq_rq_to_pdu(rq);
747 747
748 scsi_req_init(&req->sreq);
748 req->sreq.sense = req->sense; 749 req->sreq.sense = req->sense;
749 return 0;
750} 750}
751 751
752/* 752/*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
771 return 1; 771 return 1;
772 772
773 q->request_fn = do_ide_request; 773 q->request_fn = do_ide_request;
774 q->init_rq_fn = ide_init_rq; 774 q->initialize_rq_fn = ide_initialize_rq;
775 q->cmd_size = sizeof(struct ide_request); 775 q->cmd_size = sizeof(struct ide_request);
776 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
776 if (blk_init_allocated_queue(q) < 0) { 777 if (blk_init_allocated_queue(q) < 0) {
777 blk_cleanup_queue(q); 778 blk_cleanup_queue(q);
778 return 1; 779 return 1;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f948b76..fd57e8ccc47a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
474 474
475 drive->failed_pc = NULL; 475 drive->failed_pc = NULL;
476 drive->pc_callback(drive, 0); 476 drive->pc_callback(drive, 0);
477 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 477 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
478 return ide_stopped; 478 return ide_stopped;
479 } 479 }
480 ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries, 480 ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
855 BUG_ON(size < 0 || size % tape->blk_size); 855 BUG_ON(size < 0 || size % tape->blk_size);
856 856
857 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 857 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
858 scsi_req_init(rq);
859 ide_req(rq)->type = ATA_PRIV_MISC; 858 ide_req(rq)->type = ATA_PRIV_MISC;
860 scsi_req(rq)->cmd[13] = cmd; 859 scsi_req(rq)->cmd[13] = cmd;
861 rq->rq_disk = tape->disk; 860 rq->rq_disk = tape->disk;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d23c9e..4efe4c6e956c 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
318 } 318 }
319 319
320 if (nr_bytes > 0) 320 if (nr_bytes > 0)
321 ide_complete_rq(drive, 0, nr_bytes); 321 ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
322 } 322 }
323} 323}
324 324
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
336 ide_driveid_update(drive); 336 ide_driveid_update(drive);
337 } 337 }
338 338
339 ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); 339 ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
340} 340}
341 341
342/* 342/*
@@ -394,7 +394,7 @@ out_end:
394 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0) 394 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
395 ide_finish_cmd(drive, cmd, stat); 395 ide_finish_cmd(drive, cmd, stat);
396 else 396 else
397 ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9); 397 ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
398 return ide_stopped; 398 return ide_stopped;
399out_err: 399out_err:
400 ide_error_cmd(drive, cmd); 400 ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
433 rq = blk_get_request(drive->queue, 433 rq = blk_get_request(drive->queue,
434 (cmd->tf_flags & IDE_TFLAG_WRITE) ? 434 (cmd->tf_flags & IDE_TFLAG_WRITE) ?
435 REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); 435 REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
436 scsi_req_init(rq);
437 ide_req(rq)->type = ATA_PRIV_TASKFILE; 436 ide_req(rq)->type = ATA_PRIV_TASKFILE;
438 437
439 /* 438 /*
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849bb476c..57eea5a9047f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
406 * yet. 406 * yet.
407 */ 407 */
408 408
409static int sil_sata_reset_poll(ide_drive_t *drive) 409static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
410{ 410{
411 ide_hwif_t *hwif = drive->hwif; 411 ide_hwif_t *hwif = drive->hwif;
412 void __iomem *sata_status_addr 412 void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
419 if ((sata_stat & 0x03) != 0x03) { 419 if ((sata_stat & 0x03) != 0x03) {
420 printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n", 420 printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
421 hwif->name, sata_stat); 421 hwif->name, sata_stat);
422 return -ENXIO; 422 return BLK_STS_IOERR;
423 } 423 }
424 } 424 }
425 425
426 return 0; 426 return BLK_STS_OK;
427} 427}
428 428
429/** 429/**
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa608ad95..ddae430b6eae 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
252 } 252 }
253 mutex_unlock(&dev->mlock); 253 mutex_unlock(&dev->mlock);
254 254
255 if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end)) 255 ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
256 return -ENOMEM; 256 if (ret)
257 return ret;
257 258
258 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); 259 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
259 if (!t) { 260 if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
640int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) 641int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
641{ 642{
642 struct nvm_dev *dev = tgt_dev->parent; 643 struct nvm_dev *dev = tgt_dev->parent;
644 int ret;
643 645
644 if (!dev->ops->submit_io) 646 if (!dev->ops->submit_io)
645 return -ENODEV; 647 return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
647 nvm_rq_tgt_to_dev(tgt_dev, rqd); 649 nvm_rq_tgt_to_dev(tgt_dev, rqd);
648 650
649 rqd->dev = tgt_dev; 651 rqd->dev = tgt_dev;
650 return dev->ops->submit_io(dev, rqd); 652
653 /* In case of error, fail with right address format */
654 ret = dev->ops->submit_io(dev, rqd);
655 if (ret)
656 nvm_rq_dev_to_tgt(tgt_dev, rqd);
657 return ret;
651} 658}
652EXPORT_SYMBOL(nvm_submit_io); 659EXPORT_SYMBOL(nvm_submit_io);
653 660
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea88db84..024a8fc93069 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
31 */ 31 */
32retry: 32retry:
33 ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); 33 ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
34 if (ret == NVM_IO_REQUEUE) { 34 switch (ret) {
35 case NVM_IO_REQUEUE:
35 io_schedule(); 36 io_schedule();
36 goto retry; 37 goto retry;
38 case NVM_IO_ERR:
39 pblk_pipeline_stop(pblk);
40 goto out;
37 } 41 }
38 42
39 if (unlikely(!bio_has_data(bio))) 43 if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
58 atomic_long_add(nr_entries, &pblk->req_writes); 62 atomic_long_add(nr_entries, &pblk->req_writes);
59#endif 63#endif
60 64
65 pblk_rl_inserted(&pblk->rl, nr_entries);
66
61out: 67out:
62 pblk_write_should_kick(pblk); 68 pblk_write_should_kick(pblk);
63 return ret; 69 return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..11fe0c5b2a9c 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
17 */ 17 */
18 18
19#include "pblk.h" 19#include "pblk.h"
20#include <linux/time.h>
21 20
22static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, 21static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
23 struct ppa_addr *ppa) 22 struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
34 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", 33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
35 line->id, pos); 34 line->id, pos);
36 35
37 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb); 36 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
38} 37}
39 38
40static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) 39static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
54 *ppa = rqd->ppa_addr; 53 *ppa = rqd->ppa_addr;
55 pblk_mark_bb(pblk, line, ppa); 54 pblk_mark_bb(pblk, line, ppa);
56 } 55 }
56
57 atomic_dec(&pblk->inflight_io);
57} 58}
58 59
59/* Erase completion assumes that only one block is erased at the time */ 60/* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
61{ 62{
62 struct pblk *pblk = rqd->private; 63 struct pblk *pblk = rqd->private;
63 64
64 up(&pblk->erase_sem);
65 __pblk_end_io_erase(pblk, rqd); 65 __pblk_end_io_erase(pblk, rqd);
66 mempool_free(rqd, pblk->r_rq_pool); 66 mempool_free(rqd, pblk->g_rq_pool);
67} 67}
68 68
69static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, 69void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
70 u64 paddr) 70 u64 paddr)
71{ 71{
72 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 72 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
73 struct list_head *move_list = NULL; 73 struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
88 spin_unlock(&line->lock); 88 spin_unlock(&line->lock);
89 return; 89 return;
90 } 90 }
91 line->vsc--; 91 le32_add_cpu(line->vsc, -1);
92 92
93 if (line->state == PBLK_LINESTATE_CLOSED) 93 if (line->state == PBLK_LINESTATE_CLOSED)
94 move_list = pblk_line_gc_list(pblk, line); 94 move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
130 __pblk_map_invalidate(pblk, line, paddr); 130 __pblk_map_invalidate(pblk, line, paddr);
131} 131}
132 132
133void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
134 u64 paddr)
135{
136 __pblk_map_invalidate(pblk, line, paddr);
137
138 pblk_rb_sync_init(&pblk->rwb, NULL);
139 line->left_ssecs--;
140 if (!line->left_ssecs)
141 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
142 pblk_rb_sync_end(&pblk->rwb, NULL);
143}
144
145static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, 133static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
146 unsigned int nr_secs) 134 unsigned int nr_secs)
147{ 135{
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
172 pool = pblk->w_rq_pool; 160 pool = pblk->w_rq_pool;
173 rq_size = pblk_w_rq_size; 161 rq_size = pblk_w_rq_size;
174 } else { 162 } else {
175 pool = pblk->r_rq_pool; 163 pool = pblk->g_rq_pool;
176 rq_size = pblk_r_rq_size; 164 rq_size = pblk_g_rq_size;
177 } 165 }
178 166
179 rqd = mempool_alloc(pool, GFP_KERNEL); 167 rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
189 if (rw == WRITE) 177 if (rw == WRITE)
190 pool = pblk->w_rq_pool; 178 pool = pblk->w_rq_pool;
191 else 179 else
192 pool = pblk->r_rq_pool; 180 pool = pblk->g_rq_pool;
193 181
194 mempool_free(rqd, pool); 182 mempool_free(rqd, pool);
195} 183}
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
271 complete(waiting); 259 complete(waiting);
272} 260}
273 261
274void pblk_flush_writer(struct pblk *pblk) 262void pblk_wait_for_meta(struct pblk *pblk)
275{ 263{
276 struct bio *bio; 264 do {
277 int ret; 265 if (!atomic_read(&pblk->inflight_io))
278 DECLARE_COMPLETION_ONSTACK(wait); 266 break;
279
280 bio = bio_alloc(GFP_KERNEL, 1);
281 if (!bio)
282 return;
283
284 bio->bi_iter.bi_sector = 0; /* internal bio */
285 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
286 bio->bi_private = &wait;
287 bio->bi_end_io = pblk_end_bio_sync;
288 267
289 ret = pblk_write_to_cache(pblk, bio, 0); 268 schedule();
290 if (ret == NVM_IO_OK) { 269 } while (1);
291 if (!wait_for_completion_io_timeout(&wait, 270}
292 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
293 pr_err("pblk: flush cache timed out\n");
294 }
295 } else if (ret != NVM_IO_DONE) {
296 pr_err("pblk: tear down bio failed\n");
297 }
298 271
299 if (bio->bi_error) 272static void pblk_flush_writer(struct pblk *pblk)
300 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error); 273{
274 pblk_rb_flush(&pblk->rwb);
275 do {
276 if (!pblk_rb_sync_count(&pblk->rwb))
277 break;
301 278
302 bio_put(bio); 279 pblk_write_kick(pblk);
280 schedule();
281 } while (1);
303} 282}
304 283
305struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) 284struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
307 struct pblk_line_meta *lm = &pblk->lm; 286 struct pblk_line_meta *lm = &pblk->lm;
308 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 287 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
309 struct list_head *move_list = NULL; 288 struct list_head *move_list = NULL;
289 int vsc = le32_to_cpu(*line->vsc);
310 290
311 if (!line->vsc) { 291 lockdep_assert_held(&line->lock);
292
293 if (!vsc) {
312 if (line->gc_group != PBLK_LINEGC_FULL) { 294 if (line->gc_group != PBLK_LINEGC_FULL) {
313 line->gc_group = PBLK_LINEGC_FULL; 295 line->gc_group = PBLK_LINEGC_FULL;
314 move_list = &l_mg->gc_full_list; 296 move_list = &l_mg->gc_full_list;
315 } 297 }
316 } else if (line->vsc < lm->mid_thrs) { 298 } else if (vsc < lm->high_thrs) {
317 if (line->gc_group != PBLK_LINEGC_HIGH) { 299 if (line->gc_group != PBLK_LINEGC_HIGH) {
318 line->gc_group = PBLK_LINEGC_HIGH; 300 line->gc_group = PBLK_LINEGC_HIGH;
319 move_list = &l_mg->gc_high_list; 301 move_list = &l_mg->gc_high_list;
320 } 302 }
321 } else if (line->vsc < lm->high_thrs) { 303 } else if (vsc < lm->mid_thrs) {
322 if (line->gc_group != PBLK_LINEGC_MID) { 304 if (line->gc_group != PBLK_LINEGC_MID) {
323 line->gc_group = PBLK_LINEGC_MID; 305 line->gc_group = PBLK_LINEGC_MID;
324 move_list = &l_mg->gc_mid_list; 306 move_list = &l_mg->gc_mid_list;
325 } 307 }
326 } else if (line->vsc < line->sec_in_line) { 308 } else if (vsc < line->sec_in_line) {
327 if (line->gc_group != PBLK_LINEGC_LOW) { 309 if (line->gc_group != PBLK_LINEGC_LOW) {
328 line->gc_group = PBLK_LINEGC_LOW; 310 line->gc_group = PBLK_LINEGC_LOW;
329 move_list = &l_mg->gc_low_list; 311 move_list = &l_mg->gc_low_list;
330 } 312 }
331 } else if (line->vsc == line->sec_in_line) { 313 } else if (vsc == line->sec_in_line) {
332 if (line->gc_group != PBLK_LINEGC_EMPTY) { 314 if (line->gc_group != PBLK_LINEGC_EMPTY) {
333 line->gc_group = PBLK_LINEGC_EMPTY; 315 line->gc_group = PBLK_LINEGC_EMPTY;
334 move_list = &l_mg->gc_empty_list; 316 move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
338 line->gc_group = PBLK_LINEGC_NONE; 320 line->gc_group = PBLK_LINEGC_NONE;
339 move_list = &l_mg->corrupt_list; 321 move_list = &l_mg->corrupt_list;
340 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", 322 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
341 line->id, line->vsc, 323 line->id, vsc,
342 line->sec_in_line, 324 line->sec_in_line,
343 lm->high_thrs, lm->mid_thrs); 325 lm->high_thrs, lm->mid_thrs);
344 } 326 }
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
397#endif 379#endif
398} 380}
399 381
382void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
383{
384 pblk->sec_per_write = sec_per_write;
385}
386
400int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) 387int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
401{ 388{
402 struct nvm_tgt_dev *dev = pblk->dev; 389 struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
431 } 418 }
432 } 419 }
433#endif 420#endif
421
422 atomic_inc(&pblk->inflight_io);
423
434 return nvm_submit_io(dev, rqd); 424 return nvm_submit_io(dev, rqd);
435} 425}
436 426
437struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, 427struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
438 unsigned int nr_secs, unsigned int len, 428 unsigned int nr_secs, unsigned int len,
439 gfp_t gfp_mask) 429 int alloc_type, gfp_t gfp_mask)
440{ 430{
441 struct nvm_tgt_dev *dev = pblk->dev; 431 struct nvm_tgt_dev *dev = pblk->dev;
442 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
443 void *kaddr = data; 432 void *kaddr = data;
444 struct page *page; 433 struct page *page;
445 struct bio *bio; 434 struct bio *bio;
446 int i, ret; 435 int i, ret;
447 436
448 if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META) 437 if (alloc_type == PBLK_KMALLOC_META)
449 return bio_map_kern(dev->q, kaddr, len, gfp_mask); 438 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
450 439
451 bio = bio_kmalloc(gfp_mask, nr_secs); 440 bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
478int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, 467int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
479 unsigned long secs_to_flush) 468 unsigned long secs_to_flush)
480{ 469{
481 int max = pblk->max_write_pgs; 470 int max = pblk->sec_per_write;
482 int min = pblk->min_write_pgs; 471 int min = pblk->min_write_pgs;
483 int secs_to_sync = 0; 472 int secs_to_sync = 0;
484 473
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
492 return secs_to_sync; 481 return secs_to_sync;
493} 482}
494 483
495static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, 484void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
496 int nr_secs) 485{
486 u64 addr;
487 int i;
488
489 addr = find_next_zero_bit(line->map_bitmap,
490 pblk->lm.sec_per_line, line->cur_sec);
491 line->cur_sec = addr - nr_secs;
492
493 for (i = 0; i < nr_secs; i++, line->cur_sec--)
494 WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
495}
496
497u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
497{ 498{
498 u64 addr; 499 u64 addr;
499 int i; 500 int i;
500 501
502 lockdep_assert_held(&line->lock);
503
501 /* logic error: ppa out-of-bounds. Prevent generating bad address */ 504 /* logic error: ppa out-of-bounds. Prevent generating bad address */
502 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) { 505 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
503 WARN(1, "pblk: page allocation out of bounds\n"); 506 WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
528 return addr; 531 return addr;
529} 532}
530 533
534u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
535{
536 u64 paddr;
537
538 spin_lock(&line->lock);
539 paddr = find_next_zero_bit(line->map_bitmap,
540 pblk->lm.sec_per_line, line->cur_sec);
541 spin_unlock(&line->lock);
542
543 return paddr;
544}
545
531/* 546/*
532 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when 547 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
533 * taking the per LUN semaphore. 548 * taking the per LUN semaphore.
534 */ 549 */
535static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, 550static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
536 u64 paddr, int dir) 551 void *emeta_buf, u64 paddr, int dir)
537{ 552{
538 struct nvm_tgt_dev *dev = pblk->dev; 553 struct nvm_tgt_dev *dev = pblk->dev;
539 struct nvm_geo *geo = &dev->geo; 554 struct nvm_geo *geo = &dev->geo;
555 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
540 struct pblk_line_meta *lm = &pblk->lm; 556 struct pblk_line_meta *lm = &pblk->lm;
557 void *ppa_list, *meta_list;
541 struct bio *bio; 558 struct bio *bio;
542 struct nvm_rq rqd; 559 struct nvm_rq rqd;
543 struct ppa_addr *ppa_list; 560 dma_addr_t dma_ppa_list, dma_meta_list;
544 dma_addr_t dma_ppa_list;
545 void *emeta = line->emeta;
546 int min = pblk->min_write_pgs; 561 int min = pblk->min_write_pgs;
547 int left_ppas = lm->emeta_sec; 562 int left_ppas = lm->emeta_sec[0];
548 int id = line->id; 563 int id = line->id;
549 int rq_ppas, rq_len; 564 int rq_ppas, rq_len;
550 int cmd_op, bio_op; 565 int cmd_op, bio_op;
551 int flags;
552 int i, j; 566 int i, j;
553 int ret; 567 int ret;
554 DECLARE_COMPLETION_ONSTACK(wait); 568 DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
556 if (dir == WRITE) { 570 if (dir == WRITE) {
557 bio_op = REQ_OP_WRITE; 571 bio_op = REQ_OP_WRITE;
558 cmd_op = NVM_OP_PWRITE; 572 cmd_op = NVM_OP_PWRITE;
559 flags = pblk_set_progr_mode(pblk, WRITE);
560 } else if (dir == READ) { 573 } else if (dir == READ) {
561 bio_op = REQ_OP_READ; 574 bio_op = REQ_OP_READ;
562 cmd_op = NVM_OP_PREAD; 575 cmd_op = NVM_OP_PREAD;
563 flags = pblk_set_read_mode(pblk);
564 } else 576 } else
565 return -EINVAL; 577 return -EINVAL;
566 578
567 ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list); 579 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
568 if (!ppa_list) 580 &dma_meta_list);
581 if (!meta_list)
569 return -ENOMEM; 582 return -ENOMEM;
570 583
584 ppa_list = meta_list + pblk_dma_meta_size;
585 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
586
571next_rq: 587next_rq:
572 memset(&rqd, 0, sizeof(struct nvm_rq)); 588 memset(&rqd, 0, sizeof(struct nvm_rq));
573 589
574 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 590 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
575 rq_len = rq_ppas * geo->sec_size; 591 rq_len = rq_ppas * geo->sec_size;
576 592
577 bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL); 593 bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
594 l_mg->emeta_alloc_type, GFP_KERNEL);
578 if (IS_ERR(bio)) { 595 if (IS_ERR(bio)) {
579 ret = PTR_ERR(bio); 596 ret = PTR_ERR(bio);
580 goto free_rqd_dma; 597 goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
584 bio_set_op_attrs(bio, bio_op, 0); 601 bio_set_op_attrs(bio, bio_op, 0);
585 602
586 rqd.bio = bio; 603 rqd.bio = bio;
587 rqd.opcode = cmd_op; 604 rqd.meta_list = meta_list;
588 rqd.flags = flags;
589 rqd.nr_ppas = rq_ppas;
590 rqd.ppa_list = ppa_list; 605 rqd.ppa_list = ppa_list;
606 rqd.dma_meta_list = dma_meta_list;
591 rqd.dma_ppa_list = dma_ppa_list; 607 rqd.dma_ppa_list = dma_ppa_list;
608 rqd.opcode = cmd_op;
609 rqd.nr_ppas = rq_ppas;
592 rqd.end_io = pblk_end_io_sync; 610 rqd.end_io = pblk_end_io_sync;
593 rqd.private = &wait; 611 rqd.private = &wait;
594 612
595 if (dir == WRITE) { 613 if (dir == WRITE) {
614 struct pblk_sec_meta *meta_list = rqd.meta_list;
615
616 rqd.flags = pblk_set_progr_mode(pblk, WRITE);
596 for (i = 0; i < rqd.nr_ppas; ) { 617 for (i = 0; i < rqd.nr_ppas; ) {
597 spin_lock(&line->lock); 618 spin_lock(&line->lock);
598 paddr = __pblk_alloc_page(pblk, line, min); 619 paddr = __pblk_alloc_page(pblk, line, min);
599 spin_unlock(&line->lock); 620 spin_unlock(&line->lock);
600 for (j = 0; j < min; j++, i++, paddr++) 621 for (j = 0; j < min; j++, i++, paddr++) {
622 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
601 rqd.ppa_list[i] = 623 rqd.ppa_list[i] =
602 addr_to_gen_ppa(pblk, paddr, id); 624 addr_to_gen_ppa(pblk, paddr, id);
625 }
603 } 626 }
604 } else { 627 } else {
605 for (i = 0; i < rqd.nr_ppas; ) { 628 for (i = 0; i < rqd.nr_ppas; ) {
606 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); 629 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
607 int pos = pblk_dev_ppa_to_pos(geo, ppa); 630 int pos = pblk_dev_ppa_to_pos(geo, ppa);
631 int read_type = PBLK_READ_RANDOM;
632
633 if (pblk_io_aligned(pblk, rq_ppas))
634 read_type = PBLK_READ_SEQUENTIAL;
635 rqd.flags = pblk_set_read_mode(pblk, read_type);
608 636
609 while (test_bit(pos, line->blk_bitmap)) { 637 while (test_bit(pos, line->blk_bitmap)) {
610 paddr += min; 638 paddr += min;
@@ -645,9 +673,11 @@ next_rq:
645 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 673 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
646 pr_err("pblk: emeta I/O timed out\n"); 674 pr_err("pblk: emeta I/O timed out\n");
647 } 675 }
676 atomic_dec(&pblk->inflight_io);
648 reinit_completion(&wait); 677 reinit_completion(&wait);
649 678
650 bio_put(bio); 679 if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
680 bio_put(bio);
651 681
652 if (rqd.error) { 682 if (rqd.error) {
653 if (dir == WRITE) 683 if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
656 pblk_log_read_err(pblk, &rqd); 686 pblk_log_read_err(pblk, &rqd);
657 } 687 }
658 688
659 emeta += rq_len; 689 emeta_buf += rq_len;
660 left_ppas -= rq_ppas; 690 left_ppas -= rq_ppas;
661 if (left_ppas) 691 if (left_ppas)
662 goto next_rq; 692 goto next_rq;
663free_rqd_dma: 693free_rqd_dma:
664 nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list); 694 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
665 return ret; 695 return ret;
666} 696}
667 697
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
697 bio_op = REQ_OP_WRITE; 727 bio_op = REQ_OP_WRITE;
698 cmd_op = NVM_OP_PWRITE; 728 cmd_op = NVM_OP_PWRITE;
699 flags = pblk_set_progr_mode(pblk, WRITE); 729 flags = pblk_set_progr_mode(pblk, WRITE);
700 lba_list = pblk_line_emeta_to_lbas(line->emeta); 730 lba_list = emeta_to_lbas(pblk, line->emeta->buf);
701 } else if (dir == READ) { 731 } else if (dir == READ) {
702 bio_op = REQ_OP_READ; 732 bio_op = REQ_OP_READ;
703 cmd_op = NVM_OP_PREAD; 733 cmd_op = NVM_OP_PREAD;
704 flags = pblk_set_read_mode(pblk); 734 flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
705 } else 735 } else
706 return -EINVAL; 736 return -EINVAL;
707 737
708 memset(&rqd, 0, sizeof(struct nvm_rq)); 738 memset(&rqd, 0, sizeof(struct nvm_rq));
709 739
710 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 740 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
711 &rqd.dma_ppa_list); 741 &rqd.dma_meta_list);
712 if (!rqd.ppa_list) 742 if (!rqd.meta_list)
713 return -ENOMEM; 743 return -ENOMEM;
714 744
745 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
746 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
747
715 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); 748 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
716 if (IS_ERR(bio)) { 749 if (IS_ERR(bio)) {
717 ret = PTR_ERR(bio); 750 ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
729 rqd.private = &wait; 762 rqd.private = &wait;
730 763
731 for (i = 0; i < lm->smeta_sec; i++, paddr++) { 764 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
765 struct pblk_sec_meta *meta_list = rqd.meta_list;
766
732 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 767 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
733 if (dir == WRITE) 768
734 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); 769 if (dir == WRITE) {
770 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
771
772 meta_list[i].lba = lba_list[paddr] = addr_empty;
773 }
735 } 774 }
736 775
737 /* 776 /*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
750 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 789 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
751 pr_err("pblk: smeta I/O timed out\n"); 790 pr_err("pblk: smeta I/O timed out\n");
752 } 791 }
792 atomic_dec(&pblk->inflight_io);
753 793
754 if (rqd.error) { 794 if (rqd.error) {
755 if (dir == WRITE) 795 if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
759 } 799 }
760 800
761free_ppa_list: 801free_ppa_list:
762 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); 802 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
763 803
764 return ret; 804 return ret;
765} 805}
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
771 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ); 811 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
772} 812}
773 813
774int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line) 814int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
815 void *emeta_buf)
775{ 816{
776 return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ); 817 return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
818 line->emeta_ssec, READ);
777} 819}
778 820
779static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, 821static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
789static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) 831static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
790{ 832{
791 struct nvm_rq rqd; 833 struct nvm_rq rqd;
792 int ret; 834 int ret = 0;
793 DECLARE_COMPLETION_ONSTACK(wait); 835 DECLARE_COMPLETION_ONSTACK(wait);
794 836
795 memset(&rqd, 0, sizeof(struct nvm_rq)); 837 memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
824 rqd.private = pblk; 866 rqd.private = pblk;
825 __pblk_end_io_erase(pblk, &rqd); 867 __pblk_end_io_erase(pblk, &rqd);
826 868
827 return 0; 869 return ret;
828} 870}
829 871
830int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) 872int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
831{ 873{
832 struct pblk_line_meta *lm = &pblk->lm; 874 struct pblk_line_meta *lm = &pblk->lm;
833 struct ppa_addr ppa; 875 struct ppa_addr ppa;
834 int bit = -1; 876 int ret, bit = -1;
835 877
836 /* Erase only good blocks, one at a time */ 878 /* Erase only good blocks, one at a time */
837 do { 879 do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
850 WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); 892 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
851 spin_unlock(&line->lock); 893 spin_unlock(&line->lock);
852 894
853 if (pblk_blk_erase_sync(pblk, ppa)) { 895 ret = pblk_blk_erase_sync(pblk, ppa);
896 if (ret) {
854 pr_err("pblk: failed to erase line %d\n", line->id); 897 pr_err("pblk: failed to erase line %d\n", line->id);
855 return -ENOMEM; 898 return ret;
856 } 899 }
857 } while (1); 900 } while (1);
858 901
859 return 0; 902 return 0;
860} 903}
861 904
905static void pblk_line_setup_metadata(struct pblk_line *line,
906 struct pblk_line_mgmt *l_mg,
907 struct pblk_line_meta *lm)
908{
909 int meta_line;
910
911 lockdep_assert_held(&l_mg->free_lock);
912
913retry_meta:
914 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
915 if (meta_line == PBLK_DATA_LINES) {
916 spin_unlock(&l_mg->free_lock);
917 io_schedule();
918 spin_lock(&l_mg->free_lock);
919 goto retry_meta;
920 }
921
922 set_bit(meta_line, &l_mg->meta_bitmap);
923 line->meta_line = meta_line;
924
925 line->smeta = l_mg->sline_meta[meta_line];
926 line->emeta = l_mg->eline_meta[meta_line];
927
928 memset(line->smeta, 0, lm->smeta_len);
929 memset(line->emeta->buf, 0, lm->emeta_len[0]);
930
931 line->emeta->mem = 0;
932 atomic_set(&line->emeta->sync, 0);
933}
934
862/* For now lines are always assumed full lines. Thus, smeta former and current 935/* For now lines are always assumed full lines. Thus, smeta former and current
863 * lun bitmaps are omitted. 936 * lun bitmaps are omitted.
864 */ 937 */
865static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line, 938static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
866 struct pblk_line *cur) 939 struct pblk_line *cur)
867{ 940{
868 struct nvm_tgt_dev *dev = pblk->dev; 941 struct nvm_tgt_dev *dev = pblk->dev;
869 struct nvm_geo *geo = &dev->geo; 942 struct nvm_geo *geo = &dev->geo;
870 struct pblk_line_meta *lm = &pblk->lm; 943 struct pblk_line_meta *lm = &pblk->lm;
871 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 944 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
872 struct line_smeta *smeta = line->smeta; 945 struct pblk_emeta *emeta = line->emeta;
873 struct line_emeta *emeta = line->emeta; 946 struct line_emeta *emeta_buf = emeta->buf;
947 struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
874 int nr_blk_line; 948 int nr_blk_line;
875 949
876 /* After erasing the line, new bad blocks might appear and we risk 950 /* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
893 } 967 }
894 968
895 /* Run-time metadata */ 969 /* Run-time metadata */
896 line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta); 970 line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
897 971
898 /* Mark LUNs allocated in this line (all for now) */ 972 /* Mark LUNs allocated in this line (all for now) */
899 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len); 973 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
900 974
901 smeta->header.identifier = cpu_to_le32(PBLK_MAGIC); 975 smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
902 memcpy(smeta->header.uuid, pblk->instance_uuid, 16); 976 memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
903 smeta->header.id = cpu_to_le32(line->id); 977 smeta_buf->header.id = cpu_to_le32(line->id);
904 smeta->header.type = cpu_to_le16(line->type); 978 smeta_buf->header.type = cpu_to_le16(line->type);
905 smeta->header.version = cpu_to_le16(1); 979 smeta_buf->header.version = cpu_to_le16(1);
906 980
907 /* Start metadata */ 981 /* Start metadata */
908 smeta->seq_nr = cpu_to_le64(line->seq_nr); 982 smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
909 smeta->window_wr_lun = cpu_to_le32(geo->nr_luns); 983 smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
910 984
911 /* Fill metadata among lines */ 985 /* Fill metadata among lines */
912 if (cur) { 986 if (cur) {
913 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len); 987 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
914 smeta->prev_id = cpu_to_le32(cur->id); 988 smeta_buf->prev_id = cpu_to_le32(cur->id);
915 cur->emeta->next_id = cpu_to_le32(line->id); 989 cur->emeta->buf->next_id = cpu_to_le32(line->id);
916 } else { 990 } else {
917 smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY); 991 smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
918 } 992 }
919 993
920 /* All smeta must be set at this point */ 994 /* All smeta must be set at this point */
921 smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta)); 995 smeta_buf->header.crc = cpu_to_le32(
922 smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta)); 996 pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
997 smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
923 998
924 /* End metadata */ 999 /* End metadata */
925 memcpy(&emeta->header, &smeta->header, sizeof(struct line_header)); 1000 memcpy(&emeta_buf->header, &smeta_buf->header,
926 emeta->seq_nr = cpu_to_le64(line->seq_nr); 1001 sizeof(struct line_header));
927 emeta->nr_lbas = cpu_to_le64(line->sec_in_line); 1002 emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
928 emeta->nr_valid_lbas = cpu_to_le64(0); 1003 emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
929 emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY); 1004 emeta_buf->nr_valid_lbas = cpu_to_le64(0);
930 emeta->crc = cpu_to_le32(0); 1005 emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
931 emeta->prev_id = smeta->prev_id; 1006 emeta_buf->crc = cpu_to_le32(0);
1007 emeta_buf->prev_id = smeta_buf->prev_id;
932 1008
933 return 1; 1009 return 1;
934} 1010}
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
965 /* Mark smeta metadata sectors as bad sectors */ 1041 /* Mark smeta metadata sectors as bad sectors */
966 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); 1042 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
967 off = bit * geo->sec_per_pl; 1043 off = bit * geo->sec_per_pl;
968retry_smeta:
969 bitmap_set(line->map_bitmap, off, lm->smeta_sec); 1044 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
970 line->sec_in_line -= lm->smeta_sec; 1045 line->sec_in_line -= lm->smeta_sec;
971 line->smeta_ssec = off; 1046 line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
973 1048
974 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { 1049 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
975 pr_debug("pblk: line smeta I/O failed. Retry\n"); 1050 pr_debug("pblk: line smeta I/O failed. Retry\n");
976 off += geo->sec_per_pl; 1051 return 1;
977 goto retry_smeta;
978 } 1052 }
979 1053
980 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); 1054 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
983 * blocks to make sure that there are enough sectors to store emeta 1057 * blocks to make sure that there are enough sectors to store emeta
984 */ 1058 */
985 bit = lm->sec_per_line; 1059 bit = lm->sec_per_line;
986 off = lm->sec_per_line - lm->emeta_sec; 1060 off = lm->sec_per_line - lm->emeta_sec[0];
987 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec); 1061 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
988 while (nr_bb) { 1062 while (nr_bb) {
989 off -= geo->sec_per_pl; 1063 off -= geo->sec_per_pl;
990 if (!test_bit(off, line->invalid_bitmap)) { 1064 if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
993 } 1067 }
994 } 1068 }
995 1069
996 line->sec_in_line -= lm->emeta_sec; 1070 line->sec_in_line -= lm->emeta_sec[0];
997 line->emeta_ssec = off; 1071 line->emeta_ssec = off;
998 line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line; 1072 line->nr_valid_lbas = 0;
1073 line->left_msecs = line->sec_in_line;
1074 *line->vsc = cpu_to_le32(line->sec_in_line);
999 1075
1000 if (lm->sec_per_line - line->sec_in_line != 1076 if (lm->sec_per_line - line->sec_in_line !=
1001 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { 1077 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1034 1110
1035 spin_lock(&line->lock); 1111 spin_lock(&line->lock);
1036 if (line->state != PBLK_LINESTATE_FREE) { 1112 if (line->state != PBLK_LINESTATE_FREE) {
1113 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1114 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1037 spin_unlock(&line->lock); 1115 spin_unlock(&line->lock);
1038 WARN(1, "pblk: corrupted line state\n"); 1116 WARN(1, "pblk: corrupted line %d, state %d\n",
1039 return -EINTR; 1117 line->id, line->state);
1118 return -EAGAIN;
1040 } 1119 }
1120
1041 line->state = PBLK_LINESTATE_OPEN; 1121 line->state = PBLK_LINESTATE_OPEN;
1042 1122
1043 atomic_set(&line->left_eblks, blk_in_line); 1123 atomic_set(&line->left_eblks, blk_in_line);
1044 atomic_set(&line->left_seblks, blk_in_line); 1124 atomic_set(&line->left_seblks, blk_in_line);
1125
1126 line->meta_distance = lm->meta_distance;
1045 spin_unlock(&line->lock); 1127 spin_unlock(&line->lock);
1046 1128
1047 /* Bad blocks do not need to be erased */ 1129 /* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
1091{ 1173{
1092 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1174 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1093 struct pblk_line_meta *lm = &pblk->lm; 1175 struct pblk_line_meta *lm = &pblk->lm;
1094 struct pblk_line *line = NULL; 1176 struct pblk_line *line;
1095 int bit; 1177 int ret, bit;
1096 1178
1097 lockdep_assert_held(&l_mg->free_lock); 1179 lockdep_assert_held(&l_mg->free_lock);
1098 1180
1099retry_get: 1181retry:
1100 if (list_empty(&l_mg->free_list)) { 1182 if (list_empty(&l_mg->free_list)) {
1101 pr_err("pblk: no free lines\n"); 1183 pr_err("pblk: no free lines\n");
1102 goto out; 1184 return NULL;
1103 } 1185 }
1104 1186
1105 line = list_first_entry(&l_mg->free_list, struct pblk_line, list); 1187 line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
1115 list_add_tail(&line->list, &l_mg->bad_list); 1197 list_add_tail(&line->list, &l_mg->bad_list);
1116 1198
1117 pr_debug("pblk: line %d is bad\n", line->id); 1199 pr_debug("pblk: line %d is bad\n", line->id);
1118 goto retry_get; 1200 goto retry;
1119 } 1201 }
1120 1202
1121 if (pblk_line_prepare(pblk, line)) { 1203 ret = pblk_line_prepare(pblk, line);
1122 pr_err("pblk: failed to prepare line %d\n", line->id); 1204 if (ret) {
1123 list_add(&line->list, &l_mg->free_list); 1205 if (ret == -EAGAIN) {
1124 return NULL; 1206 list_add(&line->list, &l_mg->corrupt_list);
1207 goto retry;
1208 } else {
1209 pr_err("pblk: failed to prepare line %d\n", line->id);
1210 list_add(&line->list, &l_mg->free_list);
1211 l_mg->nr_free_lines++;
1212 return NULL;
1213 }
1125 } 1214 }
1126 1215
1127out:
1128 return line; 1216 return line;
1129} 1217}
1130 1218
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1134 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1222 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1135 struct pblk_line *retry_line; 1223 struct pblk_line *retry_line;
1136 1224
1225retry:
1137 spin_lock(&l_mg->free_lock); 1226 spin_lock(&l_mg->free_lock);
1138 retry_line = pblk_line_get(pblk); 1227 retry_line = pblk_line_get(pblk);
1139 if (!retry_line) { 1228 if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1150 l_mg->data_line = retry_line; 1239 l_mg->data_line = retry_line;
1151 spin_unlock(&l_mg->free_lock); 1240 spin_unlock(&l_mg->free_lock);
1152 1241
1153 if (pblk_line_erase(pblk, retry_line)) {
1154 spin_lock(&l_mg->free_lock);
1155 l_mg->data_line = NULL;
1156 spin_unlock(&l_mg->free_lock);
1157 return NULL;
1158 }
1159
1160 pblk_rl_free_lines_dec(&pblk->rl, retry_line); 1242 pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1161 1243
1244 if (pblk_line_erase(pblk, retry_line))
1245 goto retry;
1246
1162 return retry_line; 1247 return retry_line;
1163} 1248}
1164 1249
1250static void pblk_set_space_limit(struct pblk *pblk)
1251{
1252 struct pblk_rl *rl = &pblk->rl;
1253
1254 atomic_set(&rl->rb_space, 0);
1255}
1256
1165struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) 1257struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1166{ 1258{
1167 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1259 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1168 struct pblk_line *line; 1260 struct pblk_line *line;
1169 int meta_line;
1170 int is_next = 0; 1261 int is_next = 0;
1171 1262
1172 spin_lock(&l_mg->free_lock); 1263 spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1180 line->type = PBLK_LINETYPE_DATA; 1271 line->type = PBLK_LINETYPE_DATA;
1181 l_mg->data_line = line; 1272 l_mg->data_line = line;
1182 1273
1183 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); 1274 pblk_line_setup_metadata(line, l_mg, &pblk->lm);
1184 set_bit(meta_line, &l_mg->meta_bitmap);
1185 line->smeta = l_mg->sline_meta[meta_line].meta;
1186 line->emeta = l_mg->eline_meta[meta_line].meta;
1187 line->meta_line = meta_line;
1188 1275
1189 /* Allocate next line for preparation */ 1276 /* Allocate next line for preparation */
1190 l_mg->data_next = pblk_line_get(pblk); 1277 l_mg->data_next = pblk_line_get(pblk);
1191 if (l_mg->data_next) { 1278 if (!l_mg->data_next) {
1279 /* If we cannot get a new line, we need to stop the pipeline.
1280 * Only allow as many writes in as we can store safely and then
1281 * fail gracefully
1282 */
1283 pblk_set_space_limit(pblk);
1284
1285 l_mg->data_next = NULL;
1286 } else {
1192 l_mg->data_next->seq_nr = l_mg->d_seq_nr++; 1287 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1193 l_mg->data_next->type = PBLK_LINETYPE_DATA; 1288 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1194 is_next = 1; 1289 is_next = 1;
1195 } 1290 }
1196 spin_unlock(&l_mg->free_lock); 1291 spin_unlock(&l_mg->free_lock);
1197 1292
1293 if (pblk_line_erase(pblk, line)) {
1294 line = pblk_line_retry(pblk, line);
1295 if (!line)
1296 return NULL;
1297 }
1298
1198 pblk_rl_free_lines_dec(&pblk->rl, line); 1299 pblk_rl_free_lines_dec(&pblk->rl, line);
1199 if (is_next) 1300 if (is_next)
1200 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); 1301 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1201 1302
1202 if (pblk_line_erase(pblk, line))
1203 return NULL;
1204
1205retry_setup: 1303retry_setup:
1206 if (!pblk_line_set_metadata(pblk, line, NULL)) { 1304 if (!pblk_line_init_metadata(pblk, line, NULL)) {
1207 line = pblk_line_retry(pblk, line); 1305 line = pblk_line_retry(pblk, line);
1208 if (!line) 1306 if (!line)
1209 return NULL; 1307 return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
1222 return line; 1320 return line;
1223} 1321}
1224 1322
1225struct pblk_line *pblk_line_replace_data(struct pblk *pblk) 1323static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
1324{
1325 lockdep_assert_held(&pblk->l_mg.free_lock);
1326
1327 pblk_set_space_limit(pblk);
1328 pblk->state = PBLK_STATE_STOPPING;
1329}
1330
1331void pblk_pipeline_stop(struct pblk *pblk)
1332{
1333 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1334 int ret;
1335
1336 spin_lock(&l_mg->free_lock);
1337 if (pblk->state == PBLK_STATE_RECOVERING ||
1338 pblk->state == PBLK_STATE_STOPPED) {
1339 spin_unlock(&l_mg->free_lock);
1340 return;
1341 }
1342 pblk->state = PBLK_STATE_RECOVERING;
1343 spin_unlock(&l_mg->free_lock);
1344
1345 pblk_flush_writer(pblk);
1346 pblk_wait_for_meta(pblk);
1347
1348 ret = pblk_recov_pad(pblk);
1349 if (ret) {
1350 pr_err("pblk: could not close data on teardown(%d)\n", ret);
1351 return;
1352 }
1353
1354 flush_workqueue(pblk->bb_wq);
1355 pblk_line_close_meta_sync(pblk);
1356
1357 spin_lock(&l_mg->free_lock);
1358 pblk->state = PBLK_STATE_STOPPED;
1359 l_mg->data_line = NULL;
1360 l_mg->data_next = NULL;
1361 spin_unlock(&l_mg->free_lock);
1362}
1363
1364void pblk_line_replace_data(struct pblk *pblk)
1226{ 1365{
1227 struct pblk_line_meta *lm = &pblk->lm;
1228 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1366 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1229 struct pblk_line *cur, *new; 1367 struct pblk_line *cur, *new;
1230 unsigned int left_seblks; 1368 unsigned int left_seblks;
1231 int meta_line;
1232 int is_next = 0; 1369 int is_next = 0;
1233 1370
1234 cur = l_mg->data_line; 1371 cur = l_mg->data_line;
1235 new = l_mg->data_next; 1372 new = l_mg->data_next;
1236 if (!new) 1373 if (!new)
1237 return NULL; 1374 return;
1238 l_mg->data_line = new; 1375 l_mg->data_line = new;
1239 1376
1240retry_line: 1377 spin_lock(&l_mg->free_lock);
1378 if (pblk->state != PBLK_STATE_RUNNING) {
1379 l_mg->data_line = NULL;
1380 l_mg->data_next = NULL;
1381 spin_unlock(&l_mg->free_lock);
1382 return;
1383 }
1384
1385 pblk_line_setup_metadata(new, l_mg, &pblk->lm);
1386 spin_unlock(&l_mg->free_lock);
1387
1388retry_erase:
1241 left_seblks = atomic_read(&new->left_seblks); 1389 left_seblks = atomic_read(&new->left_seblks);
1242 if (left_seblks) { 1390 if (left_seblks) {
1243 /* If line is not fully erased, erase it */ 1391 /* If line is not fully erased, erase it */
1244 if (atomic_read(&new->left_eblks)) { 1392 if (atomic_read(&new->left_eblks)) {
1245 if (pblk_line_erase(pblk, new)) 1393 if (pblk_line_erase(pblk, new))
1246 return NULL; 1394 return;
1247 } else { 1395 } else {
1248 io_schedule(); 1396 io_schedule();
1249 } 1397 }
1250 goto retry_line; 1398 goto retry_erase;
1251 } 1399 }
1252 1400
1253 spin_lock(&l_mg->free_lock);
1254 /* Allocate next line for preparation */
1255 l_mg->data_next = pblk_line_get(pblk);
1256 if (l_mg->data_next) {
1257 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1258 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1259 is_next = 1;
1260 }
1261
1262retry_meta:
1263 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
1264 if (meta_line == PBLK_DATA_LINES) {
1265 spin_unlock(&l_mg->free_lock);
1266 io_schedule();
1267 spin_lock(&l_mg->free_lock);
1268 goto retry_meta;
1269 }
1270
1271 set_bit(meta_line, &l_mg->meta_bitmap);
1272 new->smeta = l_mg->sline_meta[meta_line].meta;
1273 new->emeta = l_mg->eline_meta[meta_line].meta;
1274 new->meta_line = meta_line;
1275
1276 memset(new->smeta, 0, lm->smeta_len);
1277 memset(new->emeta, 0, lm->emeta_len);
1278 spin_unlock(&l_mg->free_lock);
1279
1280 if (is_next)
1281 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1282
1283retry_setup: 1401retry_setup:
1284 if (!pblk_line_set_metadata(pblk, new, cur)) { 1402 if (!pblk_line_init_metadata(pblk, new, cur)) {
1285 new = pblk_line_retry(pblk, new); 1403 new = pblk_line_retry(pblk, new);
1286 if (!new) 1404 if (!new)
1287 return NULL; 1405 return;
1288 1406
1289 goto retry_setup; 1407 goto retry_setup;
1290 } 1408 }
@@ -1292,12 +1410,30 @@ retry_setup:
1292 if (!pblk_line_init_bb(pblk, new, 1)) { 1410 if (!pblk_line_init_bb(pblk, new, 1)) {
1293 new = pblk_line_retry(pblk, new); 1411 new = pblk_line_retry(pblk, new);
1294 if (!new) 1412 if (!new)
1295 return NULL; 1413 return;
1296 1414
1297 goto retry_setup; 1415 goto retry_setup;
1298 } 1416 }
1299 1417
1300 return new; 1418 /* Allocate next line for preparation */
1419 spin_lock(&l_mg->free_lock);
1420 l_mg->data_next = pblk_line_get(pblk);
1421 if (!l_mg->data_next) {
1422 /* If we cannot get a new line, we need to stop the pipeline.
1423 * Only allow as many writes in as we can store safely and then
1424 * fail gracefully
1425 */
1426 pblk_stop_writes(pblk, new);
1427 l_mg->data_next = NULL;
1428 } else {
1429 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1430 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1431 is_next = 1;
1432 }
1433 spin_unlock(&l_mg->free_lock);
1434
1435 if (is_next)
1436 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1301} 1437}
1302 1438
1303void pblk_line_free(struct pblk *pblk, struct pblk_line *line) 1439void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1307 if (line->invalid_bitmap) 1443 if (line->invalid_bitmap)
1308 mempool_free(line->invalid_bitmap, pblk->line_meta_pool); 1444 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1309 1445
1446 *line->vsc = cpu_to_le32(EMPTY_ENTRY);
1447
1310 line->map_bitmap = NULL; 1448 line->map_bitmap = NULL;
1311 line->invalid_bitmap = NULL; 1449 line->invalid_bitmap = NULL;
1312 line->smeta = NULL; 1450 line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1339 struct nvm_rq *rqd; 1477 struct nvm_rq *rqd;
1340 int err; 1478 int err;
1341 1479
1342 rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL); 1480 rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
1343 memset(rqd, 0, pblk_r_rq_size); 1481 memset(rqd, 0, pblk_g_rq_size);
1344 1482
1345 pblk_setup_e_rq(pblk, rqd, ppa); 1483 pblk_setup_e_rq(pblk, rqd, ppa);
1346 1484
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1368 return pblk->l_mg.data_line; 1506 return pblk->l_mg.data_line;
1369} 1507}
1370 1508
1371struct pblk_line *pblk_line_get_data_next(struct pblk *pblk) 1509/* For now, always erase next line */
1510struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
1372{ 1511{
1373 return pblk->l_mg.data_next; 1512 return pblk->l_mg.data_next;
1374} 1513}
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
1378 return (line->left_msecs == 0); 1517 return (line->left_msecs == 0);
1379} 1518}
1380 1519
1520void pblk_line_close_meta_sync(struct pblk *pblk)
1521{
1522 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1523 struct pblk_line_meta *lm = &pblk->lm;
1524 struct pblk_line *line, *tline;
1525 LIST_HEAD(list);
1526
1527 spin_lock(&l_mg->close_lock);
1528 if (list_empty(&l_mg->emeta_list)) {
1529 spin_unlock(&l_mg->close_lock);
1530 return;
1531 }
1532
1533 list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
1534 spin_unlock(&l_mg->close_lock);
1535
1536 list_for_each_entry_safe(line, tline, &list, list) {
1537 struct pblk_emeta *emeta = line->emeta;
1538
1539 while (emeta->mem < lm->emeta_len[0]) {
1540 int ret;
1541
1542 ret = pblk_submit_meta_io(pblk, line);
1543 if (ret) {
1544 pr_err("pblk: sync meta line %d failed (%d)\n",
1545 line->id, ret);
1546 return;
1547 }
1548 }
1549 }
1550
1551 pblk_wait_for_meta(pblk);
1552 flush_workqueue(pblk->close_wq);
1553}
1554
1555static void pblk_line_should_sync_meta(struct pblk *pblk)
1556{
1557 if (pblk_rl_is_limit(&pblk->rl))
1558 pblk_line_close_meta_sync(pblk);
1559}
1560
1381void pblk_line_close(struct pblk *pblk, struct pblk_line *line) 1561void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1382{ 1562{
1383 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1563 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1384 struct list_head *move_list; 1564 struct list_head *move_list;
1385 1565
1386 line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta)); 1566#ifdef CONFIG_NVM_DEBUG
1387 1567 struct pblk_line_meta *lm = &pblk->lm;
1388 if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
1389 pr_err("pblk: line %d close I/O failed\n", line->id);
1390 1568
1391 WARN(!bitmap_full(line->map_bitmap, line->sec_in_line), 1569 WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
1392 "pblk: corrupt closed line %d\n", line->id); 1570 "pblk: corrupt closed line %d\n", line->id);
1571#endif
1393 1572
1394 spin_lock(&l_mg->free_lock); 1573 spin_lock(&l_mg->free_lock);
1395 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap)); 1574 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1410 1589
1411 spin_unlock(&line->lock); 1590 spin_unlock(&line->lock);
1412 spin_unlock(&l_mg->gc_lock); 1591 spin_unlock(&l_mg->gc_lock);
1592
1593 pblk_gc_should_kick(pblk);
1594}
1595
1596void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
1597{
1598 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1599 struct pblk_line_meta *lm = &pblk->lm;
1600 struct pblk_emeta *emeta = line->emeta;
1601 struct line_emeta *emeta_buf = emeta->buf;
1602
1603 /* No need for exact vsc value; avoid a big line lock and take aprox. */
1604 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
1605 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
1606
1607 emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
1608 emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
1609
1610 spin_lock(&l_mg->close_lock);
1611 spin_lock(&line->lock);
1612 list_add_tail(&line->list, &l_mg->emeta_list);
1613 spin_unlock(&line->lock);
1614 spin_unlock(&l_mg->close_lock);
1615
1616 pblk_line_should_sync_meta(pblk);
1413} 1617}
1414 1618
1415void pblk_line_close_ws(struct work_struct *work) 1619void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
1449} 1653}
1450 1654
1451void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, 1655void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1452 void (*work)(struct work_struct *)) 1656 void (*work)(struct work_struct *),
1657 struct workqueue_struct *wq)
1453{ 1658{
1454 struct pblk_line_ws *line_ws; 1659 struct pblk_line_ws *line_ws;
1455 1660
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1462 line_ws->priv = priv; 1667 line_ws->priv = priv;
1463 1668
1464 INIT_WORK(&line_ws->ws, work); 1669 INIT_WORK(&line_ws->ws, work);
1465 queue_work(pblk->kw_wq, &line_ws->ws); 1670 queue_work(wq, &line_ws->ws);
1466} 1671}
1467 1672
1468void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, 1673void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1471 struct nvm_tgt_dev *dev = pblk->dev; 1676 struct nvm_tgt_dev *dev = pblk->dev;
1472 struct nvm_geo *geo = &dev->geo; 1677 struct nvm_geo *geo = &dev->geo;
1473 struct pblk_lun *rlun; 1678 struct pblk_lun *rlun;
1474 int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun; 1679 int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
1475 int ret; 1680 int ret;
1476 1681
1477 /* 1682 /*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1488 /* If the LUN has been locked for this same request, do no attempt to 1693 /* If the LUN has been locked for this same request, do no attempt to
1489 * lock it again 1694 * lock it again
1490 */ 1695 */
1491 if (test_and_set_bit(lun_id, lun_bitmap)) 1696 if (test_and_set_bit(pos, lun_bitmap))
1492 return; 1697 return;
1493 1698
1494 rlun = &pblk->luns[lun_id]; 1699 rlun = &pblk->luns[pos];
1495 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000)); 1700 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1496 if (ret) { 1701 if (ret) {
1497 switch (ret) { 1702 switch (ret) {
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c6b63c..6090d28f7995 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
20 20
21static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) 21static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22{ 22{
23 kfree(gc_rq->data); 23 vfree(gc_rq->data);
24 kfree(gc_rq->lba_list);
25 kfree(gc_rq); 24 kfree(gc_rq);
26} 25}
27 26
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
37 return 1; 36 return 1;
38 } 37 }
39 38
40 list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) { 39 list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
41 list_move_tail(&gc_rq->list, &w_list); 40 gc->w_entries = 0;
42 gc->w_entries--;
43 }
44 spin_unlock(&gc->w_lock); 41 spin_unlock(&gc->w_lock);
45 42
46 list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { 43 list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
48 gc_rq->nr_secs, gc_rq->secs_to_gc, 45 gc_rq->nr_secs, gc_rq->secs_to_gc,
49 gc_rq->line, PBLK_IOTYPE_GC); 46 gc_rq->line, PBLK_IOTYPE_GC);
50 47
51 kref_put(&gc_rq->line->ref, pblk_line_put);
52
53 list_del(&gc_rq->list); 48 list_del(&gc_rq->list);
49 kref_put(&gc_rq->line->ref, pblk_line_put);
54 pblk_gc_free_gc_rq(gc_rq); 50 pblk_gc_free_gc_rq(gc_rq);
55 } 51 }
56 52
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
66 * Responsible for managing all memory related to a gc request. Also in case of 62 * Responsible for managing all memory related to a gc request. Also in case of
67 * failure 63 * failure
68 */ 64 */
69static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, 65static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
70 u64 *lba_list, unsigned int nr_secs)
71{ 66{
72 struct nvm_tgt_dev *dev = pblk->dev; 67 struct nvm_tgt_dev *dev = pblk->dev;
73 struct nvm_geo *geo = &dev->geo; 68 struct nvm_geo *geo = &dev->geo;
74 struct pblk_gc *gc = &pblk->gc; 69 struct pblk_gc *gc = &pblk->gc;
75 struct pblk_gc_rq *gc_rq; 70 struct pblk_line *line = gc_rq->line;
76 void *data; 71 void *data;
77 unsigned int secs_to_gc; 72 unsigned int secs_to_gc;
78 int ret = NVM_IO_OK; 73 int ret = 0;
79 74
80 data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL); 75 data = vmalloc(gc_rq->nr_secs * geo->sec_size);
81 if (!data) { 76 if (!data) {
82 ret = NVM_IO_ERR; 77 ret = -ENOMEM;
83 goto free_lba_list; 78 goto out;
84 } 79 }
85 80
86 /* Read from GC victim block */ 81 /* Read from GC victim block */
87 if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs, 82 if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
88 &secs_to_gc, line)) { 83 &secs_to_gc, line)) {
89 ret = NVM_IO_ERR; 84 ret = -EFAULT;
90 goto free_data; 85 goto free_data;
91 } 86 }
92 87
93 if (!secs_to_gc) 88 if (!secs_to_gc)
94 goto free_data; 89 goto free_rq;
95
96 gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
97 if (!gc_rq) {
98 ret = NVM_IO_ERR;
99 goto free_data;
100 }
101 90
102 gc_rq->line = line;
103 gc_rq->data = data; 91 gc_rq->data = data;
104 gc_rq->lba_list = lba_list;
105 gc_rq->nr_secs = nr_secs;
106 gc_rq->secs_to_gc = secs_to_gc; 92 gc_rq->secs_to_gc = secs_to_gc;
107 93
108 kref_get(&line->ref);
109
110retry: 94retry:
111 spin_lock(&gc->w_lock); 95 spin_lock(&gc->w_lock);
112 if (gc->w_entries > 256) { 96 if (gc->w_entries >= PBLK_GC_W_QD) {
113 spin_unlock(&gc->w_lock); 97 spin_unlock(&gc->w_lock);
114 usleep_range(256, 1024); 98 pblk_gc_writer_kick(&pblk->gc);
99 usleep_range(128, 256);
115 goto retry; 100 goto retry;
116 } 101 }
117 gc->w_entries++; 102 gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
120 105
121 pblk_gc_writer_kick(&pblk->gc); 106 pblk_gc_writer_kick(&pblk->gc);
122 107
123 return NVM_IO_OK; 108 return 0;
124 109
110free_rq:
111 kfree(gc_rq);
125free_data: 112free_data:
126 kfree(data); 113 vfree(data);
127free_lba_list: 114out:
128 kfree(lba_list); 115 kref_put(&line->ref, pblk_line_put);
129
130 return ret; 116 return ret;
131} 117}
132 118
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
150 136
151static void pblk_gc_line_ws(struct work_struct *work) 137static void pblk_gc_line_ws(struct work_struct *work)
152{ 138{
139 struct pblk_line_ws *line_rq_ws = container_of(work,
140 struct pblk_line_ws, ws);
141 struct pblk *pblk = line_rq_ws->pblk;
142 struct pblk_gc *gc = &pblk->gc;
143 struct pblk_line *line = line_rq_ws->line;
144 struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
145
146 up(&gc->gc_sem);
147
148 if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
149 pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
150 line->id, *line->vsc,
151 gc_rq->nr_secs);
152 }
153
154 mempool_free(line_rq_ws, pblk->line_ws_pool);
155}
156
157static void pblk_gc_line_prepare_ws(struct work_struct *work)
158{
153 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, 159 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
154 ws); 160 ws);
155 struct pblk *pblk = line_ws->pblk; 161 struct pblk *pblk = line_ws->pblk;
156 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
157 struct pblk_line *line = line_ws->line; 162 struct pblk_line *line = line_ws->line;
163 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
158 struct pblk_line_meta *lm = &pblk->lm; 164 struct pblk_line_meta *lm = &pblk->lm;
159 __le64 *lba_list = line_ws->priv; 165 struct pblk_gc *gc = &pblk->gc;
160 u64 *gc_list; 166 struct line_emeta *emeta_buf;
161 int sec_left; 167 struct pblk_line_ws *line_rq_ws;
162 int nr_ppas, bit; 168 struct pblk_gc_rq *gc_rq;
163 int put_line = 1; 169 __le64 *lba_list;
170 int sec_left, nr_secs, bit;
171 int ret;
164 172
165 pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); 173 emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
174 GFP_KERNEL);
175 if (!emeta_buf) {
176 pr_err("pblk: cannot use GC emeta\n");
177 return;
178 }
166 179
167 spin_lock(&line->lock); 180 ret = pblk_line_read_emeta(pblk, line, emeta_buf);
168 sec_left = line->vsc; 181 if (ret) {
169 if (!sec_left) { 182 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
170 /* Lines are erased before being used (l_mg->data_/log_next) */ 183 goto fail_free_emeta;
171 spin_unlock(&line->lock); 184 }
172 goto out; 185
186 /* If this read fails, it means that emeta is corrupted. For now, leave
187 * the line untouched. TODO: Implement a recovery routine that scans and
188 * moves all sectors on the line.
189 */
190 lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
191 if (!lba_list) {
192 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
193 goto fail_free_emeta;
173 } 194 }
174 spin_unlock(&line->lock);
175 195
196 sec_left = pblk_line_vsc(line);
176 if (sec_left < 0) { 197 if (sec_left < 0) {
177 pr_err("pblk: corrupted GC line (%d)\n", line->id); 198 pr_err("pblk: corrupted GC line (%d)\n", line->id);
178 put_line = 0; 199 goto fail_free_emeta;
179 pblk_put_line_back(pblk, line);
180 goto out;
181 } 200 }
182 201
183 bit = -1; 202 bit = -1;
184next_rq: 203next_rq:
185 gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL); 204 gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
186 if (!gc_list) { 205 if (!gc_rq)
187 put_line = 0; 206 goto fail_free_emeta;
188 pblk_put_line_back(pblk, line);
189 goto out;
190 }
191 207
192 nr_ppas = 0; 208 nr_secs = 0;
193 do { 209 do {
194 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, 210 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
195 bit + 1); 211 bit + 1);
196 if (bit > line->emeta_ssec) 212 if (bit > line->emeta_ssec)
197 break; 213 break;
198 214
199 gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]); 215 gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
200 } while (nr_ppas < pblk->max_write_pgs); 216 } while (nr_secs < pblk->max_write_pgs);
201 217
202 if (unlikely(!nr_ppas)) { 218 if (unlikely(!nr_secs)) {
203 kfree(gc_list); 219 kfree(gc_rq);
204 goto out; 220 goto out;
205 } 221 }
206 222
207 if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) { 223 gc_rq->nr_secs = nr_secs;
208 pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n", 224 gc_rq->line = line;
209 line->id, line->vsc, 225
210 nr_ppas, nr_ppas); 226 line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
211 put_line = 0; 227 if (!line_rq_ws)
212 pblk_put_line_back(pblk, line); 228 goto fail_free_gc_rq;
213 goto out;
214 }
215 229
216 sec_left -= nr_ppas; 230 line_rq_ws->pblk = pblk;
231 line_rq_ws->line = line;
232 line_rq_ws->priv = gc_rq;
233
234 down(&gc->gc_sem);
235 kref_get(&line->ref);
236
237 INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
238 queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
239
240 sec_left -= nr_secs;
217 if (sec_left > 0) 241 if (sec_left > 0)
218 goto next_rq; 242 goto next_rq;
219 243
220out: 244out:
221 pblk_mfree(line->emeta, l_mg->emeta_alloc_type); 245 pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
222 mempool_free(line_ws, pblk->line_ws_pool); 246 mempool_free(line_ws, pblk->line_ws_pool);
223 atomic_dec(&pblk->gc.inflight_gc); 247
224 if (put_line) 248 kref_put(&line->ref, pblk_line_put);
225 kref_put(&line->ref, pblk_line_put); 249 atomic_dec(&gc->inflight_gc);
250
251 return;
252
253fail_free_gc_rq:
254 kfree(gc_rq);
255fail_free_emeta:
256 pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
257 pblk_put_line_back(pblk, line);
258 kref_put(&line->ref, pblk_line_put);
259 mempool_free(line_ws, pblk->line_ws_pool);
260 atomic_dec(&gc->inflight_gc);
261
262 pr_err("pblk: Failed to GC line %d\n", line->id);
226} 263}
227 264
228static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) 265static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
229{ 266{
230 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 267 struct pblk_gc *gc = &pblk->gc;
231 struct pblk_line_meta *lm = &pblk->lm;
232 struct pblk_line_ws *line_ws; 268 struct pblk_line_ws *line_ws;
233 __le64 *lba_list;
234 int ret;
235 269
236 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); 270 pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
237 line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
238 GFP_KERNEL);
239 if (!line->emeta) {
240 pr_err("pblk: cannot use GC emeta\n");
241 goto fail_free_ws;
242 }
243
244 ret = pblk_line_read_emeta(pblk, line);
245 if (ret) {
246 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
247 goto fail_free_emeta;
248 }
249 271
250 /* If this read fails, it means that emeta is corrupted. For now, leave 272 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
251 * the line untouched. TODO: Implement a recovery routine that scans and 273 if (!line_ws)
252 * moves all sectors on the line. 274 return -ENOMEM;
253 */
254 lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
255 if (!lba_list) {
256 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
257 goto fail_free_emeta;
258 }
259 275
260 line_ws->pblk = pblk; 276 line_ws->pblk = pblk;
261 line_ws->line = line; 277 line_ws->line = line;
262 line_ws->priv = lba_list;
263 278
264 INIT_WORK(&line_ws->ws, pblk_gc_line_ws); 279 INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
265 queue_work(pblk->gc.gc_reader_wq, &line_ws->ws); 280 queue_work(gc->gc_reader_wq, &line_ws->ws);
266 281
267 return 0; 282 return 0;
283}
268 284
269fail_free_emeta: 285static int pblk_gc_read(struct pblk *pblk)
270 pblk_mfree(line->emeta, l_mg->emeta_alloc_type); 286{
271fail_free_ws: 287 struct pblk_gc *gc = &pblk->gc;
272 mempool_free(line_ws, pblk->line_ws_pool); 288 struct pblk_line *line;
273 pblk_put_line_back(pblk, line); 289
290 spin_lock(&gc->r_lock);
291 if (list_empty(&gc->r_list)) {
292 spin_unlock(&gc->r_lock);
293 return 1;
294 }
295
296 line = list_first_entry(&gc->r_list, struct pblk_line, list);
297 list_del(&line->list);
298 spin_unlock(&gc->r_lock);
299
300 pblk_gc_kick(pblk);
274 301
275 return 1; 302 if (pblk_gc_line(pblk, line))
303 pr_err("pblk: failed to GC line %d\n", line->id);
304
305 return 0;
276} 306}
277 307
278static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list) 308static void pblk_gc_reader_kick(struct pblk_gc *gc)
279{ 309{
280 struct pblk_line *line, *tline; 310 wake_up_process(gc->gc_reader_ts);
311}
281 312
282 list_for_each_entry_safe(line, tline, gc_list, list) { 313static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
283 if (pblk_gc_line(pblk, line)) 314 struct list_head *group_list)
284 pr_err("pblk: failed to GC line %d\n", line->id); 315{
285 list_del(&line->list); 316 struct pblk_line *line, *victim;
317 int line_vsc, victim_vsc;
318
319 victim = list_first_entry(group_list, struct pblk_line, list);
320 list_for_each_entry(line, group_list, list) {
321 line_vsc = le32_to_cpu(*line->vsc);
322 victim_vsc = le32_to_cpu(*victim->vsc);
323 if (line_vsc < victim_vsc)
324 victim = line;
286 } 325 }
326
327 return victim;
328}
329
330static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
331{
332 unsigned int nr_blocks_free, nr_blocks_need;
333
334 nr_blocks_need = pblk_rl_high_thrs(rl);
335 nr_blocks_free = pblk_rl_nr_free_blks(rl);
336
337 /* This is not critical, no need to take lock here */
338 return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
287} 339}
288 340
289/* 341/*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
296{ 348{
297 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 349 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
298 struct pblk_gc *gc = &pblk->gc; 350 struct pblk_gc *gc = &pblk->gc;
299 struct pblk_line *line, *tline; 351 struct pblk_line *line;
300 unsigned int nr_blocks_free, nr_blocks_need;
301 struct list_head *group_list; 352 struct list_head *group_list;
302 int run_gc, gc_group = 0; 353 bool run_gc;
303 int prev_gc = 0; 354 int inflight_gc, gc_group = 0, prev_group = 0;
304 int inflight_gc = atomic_read(&gc->inflight_gc); 355
305 LIST_HEAD(gc_list); 356 do {
357 spin_lock(&l_mg->gc_lock);
358 if (list_empty(&l_mg->gc_full_list)) {
359 spin_unlock(&l_mg->gc_lock);
360 break;
361 }
362
363 line = list_first_entry(&l_mg->gc_full_list,
364 struct pblk_line, list);
306 365
307 spin_lock(&l_mg->gc_lock);
308 list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
309 spin_lock(&line->lock); 366 spin_lock(&line->lock);
310 WARN_ON(line->state != PBLK_LINESTATE_CLOSED); 367 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
311 line->state = PBLK_LINESTATE_GC; 368 line->state = PBLK_LINESTATE_GC;
312 spin_unlock(&line->lock); 369 spin_unlock(&line->lock);
313 370
314 list_del(&line->list); 371 list_del(&line->list);
372 spin_unlock(&l_mg->gc_lock);
373
315 kref_put(&line->ref, pblk_line_put); 374 kref_put(&line->ref, pblk_line_put);
316 } 375 } while (1);
317 spin_unlock(&l_mg->gc_lock);
318 376
319 nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl); 377 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
320 nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl); 378 if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
321 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); 379 return;
322 380
323next_gc_group: 381next_gc_group:
324 group_list = l_mg->gc_lists[gc_group++]; 382 group_list = l_mg->gc_lists[gc_group++];
325 spin_lock(&l_mg->gc_lock); 383
326 while (run_gc && !list_empty(group_list)) { 384 do {
327 /* No need to queue up more GC lines than we can handle */ 385 spin_lock(&l_mg->gc_lock);
328 if (!run_gc || inflight_gc > gc->gc_jobs_active) { 386 if (list_empty(group_list)) {
329 spin_unlock(&l_mg->gc_lock); 387 spin_unlock(&l_mg->gc_lock);
330 pblk_gc_lines(pblk, &gc_list); 388 break;
331 return;
332 } 389 }
333 390
334 line = list_first_entry(group_list, struct pblk_line, list); 391 line = pblk_gc_get_victim_line(pblk, group_list);
335 nr_blocks_free += atomic_read(&line->blk_in_line);
336 392
337 spin_lock(&line->lock); 393 spin_lock(&line->lock);
338 WARN_ON(line->state != PBLK_LINESTATE_CLOSED); 394 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
339 line->state = PBLK_LINESTATE_GC; 395 line->state = PBLK_LINESTATE_GC;
340 list_move_tail(&line->list, &gc_list);
341 atomic_inc(&gc->inflight_gc);
342 inflight_gc++;
343 spin_unlock(&line->lock); 396 spin_unlock(&line->lock);
344 397
345 prev_gc = 1; 398 list_del(&line->list);
346 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); 399 spin_unlock(&l_mg->gc_lock);
347 } 400
348 spin_unlock(&l_mg->gc_lock); 401 spin_lock(&gc->r_lock);
402 list_add_tail(&line->list, &gc->r_list);
403 spin_unlock(&gc->r_lock);
349 404
350 pblk_gc_lines(pblk, &gc_list); 405 inflight_gc = atomic_inc_return(&gc->inflight_gc);
406 pblk_gc_reader_kick(gc);
351 407
352 if (!prev_gc && pblk->rl.rb_state > gc_group && 408 prev_group = 1;
353 gc_group < PBLK_NR_GC_LISTS) 409
410 /* No need to queue up more GC lines than we can handle */
411 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
412 if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
413 break;
414 } while (1);
415
416 if (!prev_group && pblk->rl.rb_state > gc_group &&
417 gc_group < PBLK_GC_NR_LISTS)
354 goto next_gc_group; 418 goto next_gc_group;
355} 419}
356 420
357 421void pblk_gc_kick(struct pblk *pblk)
358static void pblk_gc_kick(struct pblk *pblk)
359{ 422{
360 struct pblk_gc *gc = &pblk->gc; 423 struct pblk_gc *gc = &pblk->gc;
361 424
362 wake_up_process(gc->gc_ts); 425 wake_up_process(gc->gc_ts);
363 pblk_gc_writer_kick(gc); 426 pblk_gc_writer_kick(gc);
427 pblk_gc_reader_kick(gc);
364 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); 428 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
365} 429}
366 430
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
398 return 0; 462 return 0;
399} 463}
400 464
401static void pblk_gc_start(struct pblk *pblk) 465static int pblk_gc_reader_ts(void *data)
402{ 466{
403 pblk->gc.gc_active = 1; 467 struct pblk *pblk = data;
404 468
405 pr_debug("pblk: gc start\n"); 469 while (!kthread_should_stop()) {
470 if (!pblk_gc_read(pblk))
471 continue;
472 set_current_state(TASK_INTERRUPTIBLE);
473 io_schedule();
474 }
475
476 return 0;
406} 477}
407 478
408int pblk_gc_status(struct pblk *pblk) 479static void pblk_gc_start(struct pblk *pblk)
409{ 480{
410 struct pblk_gc *gc = &pblk->gc; 481 pblk->gc.gc_active = 1;
411 int ret; 482 pr_debug("pblk: gc start\n");
412
413 spin_lock(&gc->lock);
414 ret = gc->gc_active;
415 spin_unlock(&gc->lock);
416
417 return ret;
418} 483}
419 484
420static void __pblk_gc_should_start(struct pblk *pblk) 485void pblk_gc_should_start(struct pblk *pblk)
421{ 486{
422 struct pblk_gc *gc = &pblk->gc; 487 struct pblk_gc *gc = &pblk->gc;
423 488
424 lockdep_assert_held(&gc->lock);
425
426 if (gc->gc_enabled && !gc->gc_active) 489 if (gc->gc_enabled && !gc->gc_active)
427 pblk_gc_start(pblk); 490 pblk_gc_start(pblk);
428}
429 491
430void pblk_gc_should_start(struct pblk *pblk) 492 pblk_gc_kick(pblk);
431{
432 struct pblk_gc *gc = &pblk->gc;
433
434 spin_lock(&gc->lock);
435 __pblk_gc_should_start(pblk);
436 spin_unlock(&gc->lock);
437} 493}
438 494
439/* 495/*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
442 */ 498 */
443static void pblk_gc_stop(struct pblk *pblk, int flush_wq) 499static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
444{ 500{
445 spin_lock(&pblk->gc.lock);
446 pblk->gc.gc_active = 0; 501 pblk->gc.gc_active = 0;
447 spin_unlock(&pblk->gc.lock);
448
449 pr_debug("pblk: gc stop\n"); 502 pr_debug("pblk: gc stop\n");
450} 503}
451 504
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
468 spin_unlock(&gc->lock); 521 spin_unlock(&gc->lock);
469} 522}
470 523
471void pblk_gc_sysfs_force(struct pblk *pblk, int force) 524int pblk_gc_sysfs_force(struct pblk *pblk, int force)
472{ 525{
473 struct pblk_gc *gc = &pblk->gc; 526 struct pblk_gc *gc = &pblk->gc;
474 int rsv = 0; 527
528 if (force < 0 || force > 1)
529 return -EINVAL;
475 530
476 spin_lock(&gc->lock); 531 spin_lock(&gc->lock);
477 if (force) {
478 gc->gc_enabled = 1;
479 rsv = 64;
480 }
481 pblk_rl_set_gc_rsc(&pblk->rl, rsv);
482 gc->gc_forced = force; 532 gc->gc_forced = force;
483 __pblk_gc_should_start(pblk); 533
534 if (force)
535 gc->gc_enabled = 1;
536 else
537 gc->gc_enabled = 0;
484 spin_unlock(&gc->lock); 538 spin_unlock(&gc->lock);
539
540 pblk_gc_should_start(pblk);
541
542 return 0;
485} 543}
486 544
487int pblk_gc_init(struct pblk *pblk) 545int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
503 goto fail_free_main_kthread; 561 goto fail_free_main_kthread;
504 } 562 }
505 563
564 gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
565 "pblk-gc-reader-ts");
566 if (IS_ERR(gc->gc_reader_ts)) {
567 pr_err("pblk: could not allocate GC reader kthread\n");
568 ret = PTR_ERR(gc->gc_reader_ts);
569 goto fail_free_writer_kthread;
570 }
571
506 setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk); 572 setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
507 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); 573 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
508 574
509 gc->gc_active = 0; 575 gc->gc_active = 0;
510 gc->gc_forced = 0; 576 gc->gc_forced = 0;
511 gc->gc_enabled = 1; 577 gc->gc_enabled = 1;
512 gc->gc_jobs_active = 8;
513 gc->w_entries = 0; 578 gc->w_entries = 0;
514 atomic_set(&gc->inflight_gc, 0); 579 atomic_set(&gc->inflight_gc, 0);
515 580
516 gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq", 581 /* Workqueue that reads valid sectors from a line and submit them to the
517 WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active); 582 * GC writer to be recycled.
583 */
584 gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
585 WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
586 if (!gc->gc_line_reader_wq) {
587 pr_err("pblk: could not allocate GC line reader workqueue\n");
588 ret = -ENOMEM;
589 goto fail_free_reader_kthread;
590 }
591
592 /* Workqueue that prepare lines for GC */
593 gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
594 WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
518 if (!gc->gc_reader_wq) { 595 if (!gc->gc_reader_wq) {
519 pr_err("pblk: could not allocate GC reader workqueue\n"); 596 pr_err("pblk: could not allocate GC reader workqueue\n");
520 ret = -ENOMEM; 597 ret = -ENOMEM;
521 goto fail_free_writer_kthread; 598 goto fail_free_reader_line_wq;
522 } 599 }
523 600
524 spin_lock_init(&gc->lock); 601 spin_lock_init(&gc->lock);
525 spin_lock_init(&gc->w_lock); 602 spin_lock_init(&gc->w_lock);
603 spin_lock_init(&gc->r_lock);
604
605 sema_init(&gc->gc_sem, 128);
606
526 INIT_LIST_HEAD(&gc->w_list); 607 INIT_LIST_HEAD(&gc->w_list);
608 INIT_LIST_HEAD(&gc->r_list);
527 609
528 return 0; 610 return 0;
529 611
612fail_free_reader_line_wq:
613 destroy_workqueue(gc->gc_line_reader_wq);
614fail_free_reader_kthread:
615 kthread_stop(gc->gc_reader_ts);
530fail_free_writer_kthread: 616fail_free_writer_kthread:
531 kthread_stop(gc->gc_writer_ts); 617 kthread_stop(gc->gc_writer_ts);
532fail_free_main_kthread: 618fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
540 struct pblk_gc *gc = &pblk->gc; 626 struct pblk_gc *gc = &pblk->gc;
541 627
542 flush_workqueue(gc->gc_reader_wq); 628 flush_workqueue(gc->gc_reader_wq);
629 flush_workqueue(gc->gc_line_reader_wq);
543 630
544 del_timer(&gc->gc_timer); 631 del_timer(&gc->gc_timer);
545 pblk_gc_stop(pblk, 1); 632 pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
547 if (gc->gc_ts) 634 if (gc->gc_ts)
548 kthread_stop(gc->gc_ts); 635 kthread_stop(gc->gc_ts);
549 636
550 if (pblk->gc.gc_reader_wq) 637 if (gc->gc_reader_wq)
551 destroy_workqueue(pblk->gc.gc_reader_wq); 638 destroy_workqueue(gc->gc_reader_wq);
639
640 if (gc->gc_line_reader_wq)
641 destroy_workqueue(gc->gc_line_reader_wq);
552 642
553 if (gc->gc_writer_ts) 643 if (gc->gc_writer_ts)
554 kthread_stop(gc->gc_writer_ts); 644 kthread_stop(gc->gc_writer_ts);
645
646 if (gc->gc_reader_ts)
647 kthread_stop(gc->gc_reader_ts);
555} 648}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..1b0f61233c21 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
20 20
21#include "pblk.h" 21#include "pblk.h"
22 22
23static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache, 23static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
24 *pblk_w_rq_cache, *pblk_line_meta_cache; 24 *pblk_w_rq_cache, *pblk_line_meta_cache;
25static DECLARE_RWSEM(pblk_lock); 25static DECLARE_RWSEM(pblk_lock);
26struct bio_set *pblk_bio_set;
26 27
27static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, 28static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
28 struct bio *bio) 29 struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
33 * constraint. Writes can be of arbitrary size. 34 * constraint. Writes can be of arbitrary size.
34 */ 35 */
35 if (bio_data_dir(bio) == READ) { 36 if (bio_data_dir(bio) == READ) {
36 blk_queue_split(q, &bio, q->bio_split); 37 blk_queue_split(q, &bio);
37 ret = pblk_submit_read(pblk, bio); 38 ret = pblk_submit_read(pblk, bio);
38 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) 39 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
39 bio_put(bio); 40 bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
46 * available for user I/O. 47 * available for user I/O.
47 */ 48 */
48 if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) 49 if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
49 blk_queue_split(q, &bio, q->bio_split); 50 blk_queue_split(q, &bio);
50 51
51 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); 52 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
52} 53}
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
199 return -ENOMEM; 200 return -ENOMEM;
200 } 201 }
201 202
202 pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size, 203 pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
203 0, 0, NULL); 204 0, 0, NULL);
204 if (!pblk_r_rq_cache) { 205 if (!pblk_g_rq_cache) {
205 kmem_cache_destroy(pblk_blk_ws_cache); 206 kmem_cache_destroy(pblk_blk_ws_cache);
206 kmem_cache_destroy(pblk_rec_cache); 207 kmem_cache_destroy(pblk_rec_cache);
207 up_write(&pblk_lock); 208 up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
213 if (!pblk_w_rq_cache) { 214 if (!pblk_w_rq_cache) {
214 kmem_cache_destroy(pblk_blk_ws_cache); 215 kmem_cache_destroy(pblk_blk_ws_cache);
215 kmem_cache_destroy(pblk_rec_cache); 216 kmem_cache_destroy(pblk_rec_cache);
216 kmem_cache_destroy(pblk_r_rq_cache); 217 kmem_cache_destroy(pblk_g_rq_cache);
217 up_write(&pblk_lock); 218 up_write(&pblk_lock);
218 return -ENOMEM; 219 return -ENOMEM;
219 } 220 }
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
225 if (!pblk_line_meta_cache) { 226 if (!pblk_line_meta_cache) {
226 kmem_cache_destroy(pblk_blk_ws_cache); 227 kmem_cache_destroy(pblk_blk_ws_cache);
227 kmem_cache_destroy(pblk_rec_cache); 228 kmem_cache_destroy(pblk_rec_cache);
228 kmem_cache_destroy(pblk_r_rq_cache); 229 kmem_cache_destroy(pblk_g_rq_cache);
229 kmem_cache_destroy(pblk_w_rq_cache); 230 kmem_cache_destroy(pblk_w_rq_cache);
230 up_write(&pblk_lock); 231 up_write(&pblk_lock);
231 return -ENOMEM; 232 return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
239{ 240{
240 struct nvm_tgt_dev *dev = pblk->dev; 241 struct nvm_tgt_dev *dev = pblk->dev;
241 struct nvm_geo *geo = &dev->geo; 242 struct nvm_geo *geo = &dev->geo;
242 int max_write_ppas;
243 int mod;
244 243
245 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
246 max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
247 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
248 max_write_ppas : nvm_max_phys_sects(dev);
249 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * 244 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
250 geo->nr_planes * geo->nr_luns; 245 geo->nr_planes * geo->nr_luns;
251 246
252 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
253 pr_err("pblk: cannot support device max_phys_sect\n");
254 return -EINVAL;
255 }
256
257 div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
258 if (mod) {
259 pr_err("pblk: bad configuration of sectors/pages\n");
260 return -EINVAL;
261 }
262
263 if (pblk_init_global_caches(pblk)) 247 if (pblk_init_global_caches(pblk))
264 return -ENOMEM; 248 return -ENOMEM;
265 249
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
267 if (!pblk->page_pool) 251 if (!pblk->page_pool)
268 return -ENOMEM; 252 return -ENOMEM;
269 253
270 pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns, 254 pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
271 pblk_blk_ws_cache); 255 pblk_blk_ws_cache);
272 if (!pblk->line_ws_pool) 256 if (!pblk->line_ws_pool)
273 goto free_page_pool; 257 goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
276 if (!pblk->rec_pool) 260 if (!pblk->rec_pool)
277 goto free_blk_ws_pool; 261 goto free_blk_ws_pool;
278 262
279 pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache); 263 pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
280 if (!pblk->r_rq_pool) 264 pblk_g_rq_cache);
265 if (!pblk->g_rq_pool)
281 goto free_rec_pool; 266 goto free_rec_pool;
282 267
283 pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache); 268 pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
269 pblk_w_rq_cache);
284 if (!pblk->w_rq_pool) 270 if (!pblk->w_rq_pool)
285 goto free_r_rq_pool; 271 goto free_g_rq_pool;
286 272
287 pblk->line_meta_pool = 273 pblk->line_meta_pool =
288 mempool_create_slab_pool(16, pblk_line_meta_cache); 274 mempool_create_slab_pool(PBLK_META_POOL_SIZE,
275 pblk_line_meta_cache);
289 if (!pblk->line_meta_pool) 276 if (!pblk->line_meta_pool)
290 goto free_w_rq_pool; 277 goto free_w_rq_pool;
291 278
292 pblk->kw_wq = alloc_workqueue("pblk-aux-wq", 279 pblk->close_wq = alloc_workqueue("pblk-close-wq",
293 WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 280 WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
294 if (!pblk->kw_wq) 281 if (!pblk->close_wq)
295 goto free_line_meta_pool; 282 goto free_line_meta_pool;
296 283
284 pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
285 WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
286 if (!pblk->bb_wq)
287 goto free_close_wq;
288
297 if (pblk_set_ppaf(pblk)) 289 if (pblk_set_ppaf(pblk))
298 goto free_kw_wq; 290 goto free_bb_wq;
299 291
300 if (pblk_rwb_init(pblk)) 292 if (pblk_rwb_init(pblk))
301 goto free_kw_wq; 293 goto free_bb_wq;
302 294
303 INIT_LIST_HEAD(&pblk->compl_list); 295 INIT_LIST_HEAD(&pblk->compl_list);
304 return 0; 296 return 0;
305 297
306free_kw_wq: 298free_bb_wq:
307 destroy_workqueue(pblk->kw_wq); 299 destroy_workqueue(pblk->bb_wq);
300free_close_wq:
301 destroy_workqueue(pblk->close_wq);
308free_line_meta_pool: 302free_line_meta_pool:
309 mempool_destroy(pblk->line_meta_pool); 303 mempool_destroy(pblk->line_meta_pool);
310free_w_rq_pool: 304free_w_rq_pool:
311 mempool_destroy(pblk->w_rq_pool); 305 mempool_destroy(pblk->w_rq_pool);
312free_r_rq_pool: 306free_g_rq_pool:
313 mempool_destroy(pblk->r_rq_pool); 307 mempool_destroy(pblk->g_rq_pool);
314free_rec_pool: 308free_rec_pool:
315 mempool_destroy(pblk->rec_pool); 309 mempool_destroy(pblk->rec_pool);
316free_blk_ws_pool: 310free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
322 316
323static void pblk_core_free(struct pblk *pblk) 317static void pblk_core_free(struct pblk *pblk)
324{ 318{
325 if (pblk->kw_wq) 319 if (pblk->close_wq)
326 destroy_workqueue(pblk->kw_wq); 320 destroy_workqueue(pblk->close_wq);
321
322 if (pblk->bb_wq)
323 destroy_workqueue(pblk->bb_wq);
327 324
328 mempool_destroy(pblk->page_pool); 325 mempool_destroy(pblk->page_pool);
329 mempool_destroy(pblk->line_ws_pool); 326 mempool_destroy(pblk->line_ws_pool);
330 mempool_destroy(pblk->rec_pool); 327 mempool_destroy(pblk->rec_pool);
331 mempool_destroy(pblk->r_rq_pool); 328 mempool_destroy(pblk->g_rq_pool);
332 mempool_destroy(pblk->w_rq_pool); 329 mempool_destroy(pblk->w_rq_pool);
333 mempool_destroy(pblk->line_meta_pool); 330 mempool_destroy(pblk->line_meta_pool);
334 331
335 kmem_cache_destroy(pblk_blk_ws_cache); 332 kmem_cache_destroy(pblk_blk_ws_cache);
336 kmem_cache_destroy(pblk_rec_cache); 333 kmem_cache_destroy(pblk_rec_cache);
337 kmem_cache_destroy(pblk_r_rq_cache); 334 kmem_cache_destroy(pblk_g_rq_cache);
338 kmem_cache_destroy(pblk_w_rq_cache); 335 kmem_cache_destroy(pblk_w_rq_cache);
339 kmem_cache_destroy(pblk_line_meta_cache); 336 kmem_cache_destroy(pblk_line_meta_cache);
340} 337}
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
344 kfree(pblk->luns); 341 kfree(pblk->luns);
345} 342}
346 343
344static void pblk_free_line_bitmaps(struct pblk_line *line)
345{
346 kfree(line->blk_bitmap);
347 kfree(line->erase_bitmap);
348}
349
347static void pblk_lines_free(struct pblk *pblk) 350static void pblk_lines_free(struct pblk *pblk)
348{ 351{
349 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 352 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
355 line = &pblk->lines[i]; 358 line = &pblk->lines[i];
356 359
357 pblk_line_free(pblk, line); 360 pblk_line_free(pblk, line);
358 kfree(line->blk_bitmap); 361 pblk_free_line_bitmaps(line);
359 kfree(line->erase_bitmap);
360 } 362 }
361 spin_unlock(&l_mg->free_lock); 363 spin_unlock(&l_mg->free_lock);
362} 364}
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
368 370
369 kfree(l_mg->bb_template); 371 kfree(l_mg->bb_template);
370 kfree(l_mg->bb_aux); 372 kfree(l_mg->bb_aux);
373 kfree(l_mg->vsc_list);
371 374
375 spin_lock(&l_mg->free_lock);
372 for (i = 0; i < PBLK_DATA_LINES; i++) { 376 for (i = 0; i < PBLK_DATA_LINES; i++) {
373 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); 377 kfree(l_mg->sline_meta[i]);
374 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); 378 pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
379 kfree(l_mg->eline_meta[i]);
375 } 380 }
381 spin_unlock(&l_mg->free_lock);
376 382
377 kfree(pblk->lines); 383 kfree(pblk->lines);
378} 384}
@@ -411,13 +417,31 @@ out:
411 return ret; 417 return ret;
412} 418}
413 419
414static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line) 420static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
421 int blk_per_line)
415{ 422{
416 struct pblk_line_meta *lm = &pblk->lm; 423 struct nvm_tgt_dev *dev = pblk->dev;
424 struct nvm_geo *geo = &dev->geo;
417 struct pblk_lun *rlun; 425 struct pblk_lun *rlun;
418 int bb_cnt = 0; 426 int bb_cnt = 0;
419 int i; 427 int i;
420 428
429 for (i = 0; i < blk_per_line; i++) {
430 rlun = &pblk->luns[i];
431 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
432 continue;
433
434 set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
435 bb_cnt++;
436 }
437
438 return bb_cnt;
439}
440
441static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
442{
443 struct pblk_line_meta *lm = &pblk->lm;
444
421 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); 445 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
422 if (!line->blk_bitmap) 446 if (!line->blk_bitmap)
423 return -ENOMEM; 447 return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
428 return -ENOMEM; 452 return -ENOMEM;
429 } 453 }
430 454
431 for (i = 0; i < lm->blk_per_line; i++) { 455 return 0;
432 rlun = &pblk->luns[i];
433 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
434 continue;
435
436 set_bit(i, line->blk_bitmap);
437 bb_cnt++;
438 }
439
440 return bb_cnt;
441} 456}
442 457
443static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) 458static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
505} 520}
506 521
507/* See comment over struct line_emeta definition */ 522/* See comment over struct line_emeta definition */
508static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm) 523static unsigned int calc_emeta_len(struct pblk *pblk)
509{ 524{
510 return (sizeof(struct line_emeta) + 525 struct pblk_line_meta *lm = &pblk->lm;
511 ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) + 526 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
512 (pblk->l_mg.nr_lines * sizeof(u32)) + 527 struct nvm_tgt_dev *dev = pblk->dev;
513 lm->blk_bitmap_len); 528 struct nvm_geo *geo = &dev->geo;
529
530 /* Round to sector size so that lba_list starts on its own sector */
531 lm->emeta_sec[1] = DIV_ROUND_UP(
532 sizeof(struct line_emeta) + lm->blk_bitmap_len,
533 geo->sec_size);
534 lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
535
536 /* Round to sector size so that vsc_list starts on its own sector */
537 lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
538 lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
539 geo->sec_size);
540 lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
541
542 lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
543 geo->sec_size);
544 lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
545
546 lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
547
548 return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
514} 549}
515 550
516static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) 551static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
534 atomic_set(&pblk->rl.free_blocks, nr_free_blks); 569 atomic_set(&pblk->rl.free_blocks, nr_free_blks);
535} 570}
536 571
572static int pblk_lines_alloc_metadata(struct pblk *pblk)
573{
574 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
575 struct pblk_line_meta *lm = &pblk->lm;
576 int i;
577
578 /* smeta is always small enough to fit on a kmalloc memory allocation,
579 * emeta depends on the number of LUNs allocated to the pblk instance
580 */
581 for (i = 0; i < PBLK_DATA_LINES; i++) {
582 l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
583 if (!l_mg->sline_meta[i])
584 goto fail_free_smeta;
585 }
586
587 /* emeta allocates three different buffers for managing metadata with
588 * in-memory and in-media layouts
589 */
590 for (i = 0; i < PBLK_DATA_LINES; i++) {
591 struct pblk_emeta *emeta;
592
593 emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
594 if (!emeta)
595 goto fail_free_emeta;
596
597 if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
598 l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
599
600 emeta->buf = vmalloc(lm->emeta_len[0]);
601 if (!emeta->buf) {
602 kfree(emeta);
603 goto fail_free_emeta;
604 }
605
606 emeta->nr_entries = lm->emeta_sec[0];
607 l_mg->eline_meta[i] = emeta;
608 } else {
609 l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
610
611 emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
612 if (!emeta->buf) {
613 kfree(emeta);
614 goto fail_free_emeta;
615 }
616
617 emeta->nr_entries = lm->emeta_sec[0];
618 l_mg->eline_meta[i] = emeta;
619 }
620 }
621
622 l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
623 if (!l_mg->vsc_list)
624 goto fail_free_emeta;
625
626 for (i = 0; i < l_mg->nr_lines; i++)
627 l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
628
629 return 0;
630
631fail_free_emeta:
632 while (--i >= 0) {
633 vfree(l_mg->eline_meta[i]->buf);
634 kfree(l_mg->eline_meta[i]);
635 }
636
637fail_free_smeta:
638 for (i = 0; i < PBLK_DATA_LINES; i++)
639 kfree(l_mg->sline_meta[i]);
640
641 return -ENOMEM;
642}
643
537static int pblk_lines_init(struct pblk *pblk) 644static int pblk_lines_init(struct pblk *pblk)
538{ 645{
539 struct nvm_tgt_dev *dev = pblk->dev; 646 struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
542 struct pblk_line_meta *lm = &pblk->lm; 649 struct pblk_line_meta *lm = &pblk->lm;
543 struct pblk_line *line; 650 struct pblk_line *line;
544 unsigned int smeta_len, emeta_len; 651 unsigned int smeta_len, emeta_len;
545 long nr_bad_blks, nr_meta_blks, nr_free_blks; 652 long nr_bad_blks, nr_free_blks;
546 int bb_distance; 653 int bb_distance, max_write_ppas, mod;
547 int i; 654 int i, ret;
548 int ret; 655
656 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
657 max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
658 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
659 max_write_ppas : nvm_max_phys_sects(dev);
660 pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
661
662 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
663 pr_err("pblk: cannot support device max_phys_sect\n");
664 return -EINVAL;
665 }
666
667 div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
668 if (mod) {
669 pr_err("pblk: bad configuration of sectors/pages\n");
670 return -EINVAL;
671 }
672
673 l_mg->nr_lines = geo->blks_per_lun;
674 l_mg->log_line = l_mg->data_line = NULL;
675 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
676 l_mg->nr_free_lines = 0;
677 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
549 678
550 lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; 679 lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
551 lm->blk_per_line = geo->nr_luns; 680 lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
554 lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); 683 lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
555 lm->high_thrs = lm->sec_per_line / 2; 684 lm->high_thrs = lm->sec_per_line / 2;
556 lm->mid_thrs = lm->sec_per_line / 4; 685 lm->mid_thrs = lm->sec_per_line / 4;
686 lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
557 687
558 /* Calculate necessary pages for smeta. See comment over struct 688 /* Calculate necessary pages for smeta. See comment over struct
559 * line_smeta definition 689 * line_smeta definition
560 */ 690 */
561 lm->smeta_len = sizeof(struct line_smeta) +
562 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
563
564 i = 1; 691 i = 1;
565add_smeta_page: 692add_smeta_page:
566 lm->smeta_sec = i * geo->sec_per_pl; 693 lm->smeta_sec = i * geo->sec_per_pl;
567 lm->smeta_len = lm->smeta_sec * geo->sec_size; 694 lm->smeta_len = lm->smeta_sec * geo->sec_size;
568 695
569 smeta_len = sizeof(struct line_smeta) + 696 smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
570 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
571 if (smeta_len > lm->smeta_len) { 697 if (smeta_len > lm->smeta_len) {
572 i++; 698 i++;
573 goto add_smeta_page; 699 goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
578 */ 704 */
579 i = 1; 705 i = 1;
580add_emeta_page: 706add_emeta_page:
581 lm->emeta_sec = i * geo->sec_per_pl; 707 lm->emeta_sec[0] = i * geo->sec_per_pl;
582 lm->emeta_len = lm->emeta_sec * geo->sec_size; 708 lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
583 709
584 emeta_len = calc_emeta_len(pblk, lm); 710 emeta_len = calc_emeta_len(pblk);
585 if (emeta_len > lm->emeta_len) { 711 if (emeta_len > lm->emeta_len[0]) {
586 i++; 712 i++;
587 goto add_emeta_page; 713 goto add_emeta_page;
588 } 714 }
589 lm->emeta_bb = geo->nr_luns - i;
590
591 nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
592 (geo->sec_per_blk / 2)) / geo->sec_per_blk;
593 lm->min_blk_line = nr_meta_blks + 1;
594
595 l_mg->nr_lines = geo->blks_per_lun;
596 l_mg->log_line = l_mg->data_line = NULL;
597 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
598 l_mg->nr_free_lines = 0;
599 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
600 715
601 /* smeta is always small enough to fit on a kmalloc memory allocation, 716 lm->emeta_bb = geo->nr_luns - i;
602 * emeta depends on the number of LUNs allocated to the pblk instance 717 lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
603 */ 718 geo->sec_per_blk);
604 l_mg->smeta_alloc_type = PBLK_KMALLOC_META; 719 if (lm->min_blk_line > lm->blk_per_line) {
605 for (i = 0; i < PBLK_DATA_LINES; i++) { 720 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
606 l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL); 721 lm->blk_per_line);
607 if (!l_mg->sline_meta[i].meta) 722 ret = -EINVAL;
608 while (--i >= 0) { 723 goto fail;
609 kfree(l_mg->sline_meta[i].meta);
610 ret = -ENOMEM;
611 goto fail;
612 }
613 } 724 }
614 725
615 if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) { 726 ret = pblk_lines_alloc_metadata(pblk);
616 l_mg->emeta_alloc_type = PBLK_VMALLOC_META; 727 if (ret)
617 728 goto fail;
618 for (i = 0; i < PBLK_DATA_LINES; i++) {
619 l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
620 if (!l_mg->eline_meta[i].meta)
621 while (--i >= 0) {
622 vfree(l_mg->eline_meta[i].meta);
623 ret = -ENOMEM;
624 goto fail;
625 }
626 }
627 } else {
628 l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
629
630 for (i = 0; i < PBLK_DATA_LINES; i++) {
631 l_mg->eline_meta[i].meta =
632 kmalloc(lm->emeta_len, GFP_KERNEL);
633 if (!l_mg->eline_meta[i].meta)
634 while (--i >= 0) {
635 kfree(l_mg->eline_meta[i].meta);
636 ret = -ENOMEM;
637 goto fail;
638 }
639 }
640 }
641 729
642 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); 730 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
643 if (!l_mg->bb_template) { 731 if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
664 INIT_LIST_HEAD(&l_mg->gc_low_list); 752 INIT_LIST_HEAD(&l_mg->gc_low_list);
665 INIT_LIST_HEAD(&l_mg->gc_empty_list); 753 INIT_LIST_HEAD(&l_mg->gc_empty_list);
666 754
755 INIT_LIST_HEAD(&l_mg->emeta_list);
756
667 l_mg->gc_lists[0] = &l_mg->gc_high_list; 757 l_mg->gc_lists[0] = &l_mg->gc_high_list;
668 l_mg->gc_lists[1] = &l_mg->gc_mid_list; 758 l_mg->gc_lists[1] = &l_mg->gc_mid_list;
669 l_mg->gc_lists[2] = &l_mg->gc_low_list; 759 l_mg->gc_lists[2] = &l_mg->gc_low_list;
670 760
671 spin_lock_init(&l_mg->free_lock); 761 spin_lock_init(&l_mg->free_lock);
762 spin_lock_init(&l_mg->close_lock);
672 spin_lock_init(&l_mg->gc_lock); 763 spin_lock_init(&l_mg->gc_lock);
673 764
674 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), 765 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
689 line->type = PBLK_LINETYPE_FREE; 780 line->type = PBLK_LINETYPE_FREE;
690 line->state = PBLK_LINESTATE_FREE; 781 line->state = PBLK_LINESTATE_FREE;
691 line->gc_group = PBLK_LINEGC_NONE; 782 line->gc_group = PBLK_LINEGC_NONE;
783 line->vsc = &l_mg->vsc_list[i];
692 spin_lock_init(&line->lock); 784 spin_lock_init(&line->lock);
693 785
694 nr_bad_blks = pblk_bb_line(pblk, line); 786 ret = pblk_alloc_line_bitmaps(pblk, line);
787 if (ret)
788 goto fail_free_lines;
789
790 nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
695 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) { 791 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
792 pblk_free_line_bitmaps(line);
696 ret = -EINVAL; 793 ret = -EINVAL;
697 goto fail_free_lines; 794 goto fail_free_lines;
698 } 795 }
@@ -713,24 +810,20 @@ add_emeta_page:
713 810
714 pblk_set_provision(pblk, nr_free_blks); 811 pblk_set_provision(pblk, nr_free_blks);
715 812
716 sema_init(&pblk->erase_sem, 1);
717
718 /* Cleanup per-LUN bad block lists - managed within lines on run-time */ 813 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
719 for (i = 0; i < geo->nr_luns; i++) 814 for (i = 0; i < geo->nr_luns; i++)
720 kfree(pblk->luns[i].bb_list); 815 kfree(pblk->luns[i].bb_list);
721 816
722 return 0; 817 return 0;
723fail_free_lines: 818fail_free_lines:
724 kfree(pblk->lines); 819 while (--i >= 0)
820 pblk_free_line_bitmaps(&pblk->lines[i]);
725fail_free_bb_aux: 821fail_free_bb_aux:
726 kfree(l_mg->bb_aux); 822 kfree(l_mg->bb_aux);
727fail_free_bb_template: 823fail_free_bb_template:
728 kfree(l_mg->bb_template); 824 kfree(l_mg->bb_template);
729fail_free_meta: 825fail_free_meta:
730 for (i = 0; i < PBLK_DATA_LINES; i++) { 826 pblk_line_meta_free(pblk);
731 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
732 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
733 }
734fail: 827fail:
735 for (i = 0; i < geo->nr_luns; i++) 828 for (i = 0; i < geo->nr_luns; i++)
736 kfree(pblk->luns[i].bb_list); 829 kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
754 847
755static void pblk_writer_stop(struct pblk *pblk) 848static void pblk_writer_stop(struct pblk *pblk)
756{ 849{
850 /* The pipeline must be stopped and the write buffer emptied before the
851 * write thread is stopped
852 */
853 WARN(pblk_rb_read_count(&pblk->rwb),
854 "Stopping not fully persisted write buffer\n");
855
856 WARN(pblk_rb_sync_count(&pblk->rwb),
857 "Stopping not fully synced write buffer\n");
858
757 if (pblk->writer_ts) 859 if (pblk->writer_ts)
758 kthread_stop(pblk->writer_ts); 860 kthread_stop(pblk->writer_ts);
759 del_timer(&pblk->wtimer); 861 del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
772 874
773static void pblk_tear_down(struct pblk *pblk) 875static void pblk_tear_down(struct pblk *pblk)
774{ 876{
775 pblk_flush_writer(pblk); 877 pblk_pipeline_stop(pblk);
776 pblk_writer_stop(pblk); 878 pblk_writer_stop(pblk);
777 pblk_rb_sync_l2p(&pblk->rwb); 879 pblk_rb_sync_l2p(&pblk->rwb);
778 pblk_recov_pad(pblk);
779 pblk_rwb_free(pblk); 880 pblk_rwb_free(pblk);
780 pblk_rl_free(&pblk->rl); 881 pblk_rl_free(&pblk->rl);
781 882
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
821 922
822 pblk->dev = dev; 923 pblk->dev = dev;
823 pblk->disk = tdisk; 924 pblk->disk = tdisk;
925 pblk->state = PBLK_STATE_RUNNING;
824 926
825 spin_lock_init(&pblk->trans_lock); 927 spin_lock_init(&pblk->trans_lock);
826 spin_lock_init(&pblk->lock); 928 spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
836 atomic_long_set(&pblk->req_writes, 0); 938 atomic_long_set(&pblk->req_writes, 0);
837 atomic_long_set(&pblk->sub_writes, 0); 939 atomic_long_set(&pblk->sub_writes, 0);
838 atomic_long_set(&pblk->sync_writes, 0); 940 atomic_long_set(&pblk->sync_writes, 0);
839 atomic_long_set(&pblk->compl_writes, 0);
840 atomic_long_set(&pblk->inflight_reads, 0); 941 atomic_long_set(&pblk->inflight_reads, 0);
942 atomic_long_set(&pblk->cache_reads, 0);
841 atomic_long_set(&pblk->sync_reads, 0); 943 atomic_long_set(&pblk->sync_reads, 0);
842 atomic_long_set(&pblk->recov_writes, 0); 944 atomic_long_set(&pblk->recov_writes, 0);
843 atomic_long_set(&pblk->recov_writes, 0); 945 atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
946 1048
947static int __init pblk_module_init(void) 1049static int __init pblk_module_init(void)
948{ 1050{
949 return nvm_register_tgt_type(&tt_pblk); 1051 int ret;
1052
1053 pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
1054 if (!pblk_bio_set)
1055 return -ENOMEM;
1056 ret = nvm_register_tgt_type(&tt_pblk);
1057 if (ret)
1058 bioset_free(pblk_bio_set);
1059 return ret;
950} 1060}
951 1061
952static void pblk_module_exit(void) 1062static void pblk_module_exit(void)
953{ 1063{
1064 bioset_free(pblk_bio_set);
954 nvm_unregister_tgt_type(&tt_pblk); 1065 nvm_unregister_tgt_type(&tt_pblk);
955} 1066}
956 1067
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c16955284d..fddb924f6dde 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
25 unsigned int valid_secs) 25 unsigned int valid_secs)
26{ 26{
27 struct pblk_line *line = pblk_line_get_data(pblk); 27 struct pblk_line *line = pblk_line_get_data(pblk);
28 struct line_emeta *emeta = line->emeta; 28 struct pblk_emeta *emeta = line->emeta;
29 struct pblk_w_ctx *w_ctx; 29 struct pblk_w_ctx *w_ctx;
30 __le64 *lba_list = pblk_line_emeta_to_lbas(emeta); 30 __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
31 u64 paddr; 31 u64 paddr;
32 int nr_secs = pblk->min_write_pgs; 32 int nr_secs = pblk->min_write_pgs;
33 int i; 33 int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
51 w_ctx->ppa = ppa_list[i]; 51 w_ctx->ppa = ppa_list[i];
52 meta_list[i].lba = cpu_to_le64(w_ctx->lba); 52 meta_list[i].lba = cpu_to_le64(w_ctx->lba);
53 lba_list[paddr] = cpu_to_le64(w_ctx->lba); 53 lba_list[paddr] = cpu_to_le64(w_ctx->lba);
54 le64_add_cpu(&line->emeta->nr_valid_lbas, 1); 54 line->nr_valid_lbas++;
55 } else { 55 } else {
56 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); 56 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
57 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); 57
58 pblk_map_pad_invalidate(pblk, line, paddr); 58 lba_list[paddr] = meta_list[i].lba = addr_empty;
59 __pblk_map_invalidate(pblk, line, paddr);
59 } 60 }
60 } 61 }
61 62
62 if (pblk_line_is_full(line)) { 63 if (pblk_line_is_full(line)) {
63 line = pblk_line_replace_data(pblk); 64 struct pblk_line *prev_line = line;
64 if (!line) 65
65 return; 66 pblk_line_replace_data(pblk);
67 pblk_line_close_meta(pblk, prev_line);
66 } 68 }
67 69
68 pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); 70 pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
91{ 93{
92 struct nvm_tgt_dev *dev = pblk->dev; 94 struct nvm_tgt_dev *dev = pblk->dev;
93 struct nvm_geo *geo = &dev->geo; 95 struct nvm_geo *geo = &dev->geo;
94 struct pblk_line *e_line = pblk_line_get_data_next(pblk); 96 struct pblk_line_meta *lm = &pblk->lm;
95 struct pblk_sec_meta *meta_list = rqd->meta_list; 97 struct pblk_sec_meta *meta_list = rqd->meta_list;
98 struct pblk_line *e_line, *d_line;
96 unsigned int map_secs; 99 unsigned int map_secs;
97 int min = pblk->min_write_pgs; 100 int min = pblk->min_write_pgs;
98 int i, erase_lun; 101 int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
102 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], 105 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
103 lun_bitmap, &meta_list[i], map_secs); 106 lun_bitmap, &meta_list[i], map_secs);
104 107
105 erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls + 108 erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
106 rqd->ppa_list[i].g.ch;
107 109
108 if (!test_bit(erase_lun, e_line->erase_bitmap)) { 110 /* line can change after page map. We might also be writing the
109 if (down_trylock(&pblk->erase_sem)) 111 * last line.
110 continue; 112 */
113 e_line = pblk_line_get_erase(pblk);
114 if (!e_line)
115 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
116 valid_secs, i + min);
111 117
118 spin_lock(&e_line->lock);
119 if (!test_bit(erase_lun, e_line->erase_bitmap)) {
112 set_bit(erase_lun, e_line->erase_bitmap); 120 set_bit(erase_lun, e_line->erase_bitmap);
113 atomic_dec(&e_line->left_eblks); 121 atomic_dec(&e_line->left_eblks);
122
114 *erase_ppa = rqd->ppa_list[i]; 123 *erase_ppa = rqd->ppa_list[i];
115 erase_ppa->g.blk = e_line->id; 124 erase_ppa->g.blk = e_line->id;
116 125
126 spin_unlock(&e_line->lock);
127
117 /* Avoid evaluating e_line->left_eblks */ 128 /* Avoid evaluating e_line->left_eblks */
118 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, 129 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
119 valid_secs, i + min); 130 valid_secs, i + min);
120 } 131 }
132 spin_unlock(&e_line->lock);
121 } 133 }
122 134
123 /* Erase blocks that are bad in this line but might not be in next */ 135 d_line = pblk_line_get_data(pblk);
124 if (unlikely(ppa_empty(*erase_ppa))) { 136
125 struct pblk_line_meta *lm = &pblk->lm; 137 /* line can change after page map. We might also be writing the
138 * last line.
139 */
140 e_line = pblk_line_get_erase(pblk);
141 if (!e_line)
142 return;
126 143
127 i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line); 144 /* Erase blocks that are bad in this line but might not be in next */
128 if (i == lm->blk_per_line) 145 if (unlikely(ppa_empty(*erase_ppa)) &&
146 bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
147 int bit = -1;
148
149retry:
150 bit = find_next_bit(d_line->blk_bitmap,
151 lm->blk_per_line, bit + 1);
152 if (bit >= lm->blk_per_line)
129 return; 153 return;
130 154
131 set_bit(i, e_line->erase_bitmap); 155 spin_lock(&e_line->lock);
156 if (test_bit(bit, e_line->erase_bitmap)) {
157 spin_unlock(&e_line->lock);
158 goto retry;
159 }
160 spin_unlock(&e_line->lock);
161
162 set_bit(bit, e_line->erase_bitmap);
132 atomic_dec(&e_line->left_eblks); 163 atomic_dec(&e_line->left_eblks);
133 *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */ 164 *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
134 erase_ppa->g.blk = e_line->id; 165 erase_ppa->g.blk = e_line->id;
135 } 166 }
136} 167}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384ddc1f9..5ecc154f6831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
150 /* Release flags on context. Protect from writes and reads */ 150 /* Release flags on context. Protect from writes and reads */
151 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); 151 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
152 pblk_ppa_set_empty(&w_ctx->ppa); 152 pblk_ppa_set_empty(&w_ctx->ppa);
153 w_ctx->lba = ADDR_EMPTY;
153} 154}
154 155
155#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size) 156#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
180 return pblk_rb_ring_count(mem, subm, rb->nr_entries); 181 return pblk_rb_ring_count(mem, subm, rb->nr_entries);
181} 182}
182 183
184unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
185{
186 unsigned int mem = READ_ONCE(rb->mem);
187 unsigned int sync = READ_ONCE(rb->sync);
188
189 return pblk_rb_ring_count(mem, sync, rb->nr_entries);
190}
191
183unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) 192unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
184{ 193{
185 unsigned int subm; 194 unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
199 struct pblk_line *line; 208 struct pblk_line *line;
200 struct pblk_rb_entry *entry; 209 struct pblk_rb_entry *entry;
201 struct pblk_w_ctx *w_ctx; 210 struct pblk_w_ctx *w_ctx;
211 unsigned int user_io = 0, gc_io = 0;
202 unsigned int i; 212 unsigned int i;
213 int flags;
203 214
204 for (i = 0; i < to_update; i++) { 215 for (i = 0; i < to_update; i++) {
205 entry = &rb->entries[*l2p_upd]; 216 entry = &rb->entries[*l2p_upd];
206 w_ctx = &entry->w_ctx; 217 w_ctx = &entry->w_ctx;
207 218
219 flags = READ_ONCE(entry->w_ctx.flags);
220 if (flags & PBLK_IOTYPE_USER)
221 user_io++;
222 else if (flags & PBLK_IOTYPE_GC)
223 gc_io++;
224 else
225 WARN(1, "pblk: unknown IO type\n");
226
208 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, 227 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
209 entry->cacheline); 228 entry->cacheline);
210 229
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
214 *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); 233 *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
215 } 234 }
216 235
236 pblk_rl_out(&pblk->rl, user_io, gc_io);
237
217 return 0; 238 return 0;
218} 239}
219 240
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
357 /* Protect syncs */ 378 /* Protect syncs */
358 smp_store_release(&rb->sync_point, sync_point); 379 smp_store_release(&rb->sync_point, sync_point);
359 380
381 if (!bio)
382 return 0;
383
360 spin_lock_irq(&rb->s_lock); 384 spin_lock_irq(&rb->s_lock);
361 bio_list_add(&entry->w_ctx.bios, bio); 385 bio_list_add(&entry->w_ctx.bios, bio);
362 spin_unlock_irq(&rb->s_lock); 386 spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
395 return 1; 419 return 1;
396} 420}
397 421
422void pblk_rb_flush(struct pblk_rb *rb)
423{
424 struct pblk *pblk = container_of(rb, struct pblk, rwb);
425 unsigned int mem = READ_ONCE(rb->mem);
426
427 if (pblk_rb_sync_point_set(rb, NULL, mem))
428 return;
429
430 pblk_write_should_kick(pblk);
431}
432
398static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, 433static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
399 unsigned int *pos, struct bio *bio, 434 unsigned int *pos, struct bio *bio,
400 int *io_ret) 435 int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
431 unsigned int nr_entries, unsigned int *pos) 466 unsigned int nr_entries, unsigned int *pos)
432{ 467{
433 struct pblk *pblk = container_of(rb, struct pblk, rwb); 468 struct pblk *pblk = container_of(rb, struct pblk, rwb);
434 int flush_done; 469 int io_ret;
435 470
436 spin_lock(&rb->w_lock); 471 spin_lock(&rb->w_lock);
437 if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) { 472 io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
473 if (io_ret) {
438 spin_unlock(&rb->w_lock); 474 spin_unlock(&rb->w_lock);
439 return NVM_IO_REQUEUE; 475 return io_ret;
440 } 476 }
441 477
442 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) { 478 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
443 spin_unlock(&rb->w_lock); 479 spin_unlock(&rb->w_lock);
444 return NVM_IO_REQUEUE; 480 return NVM_IO_REQUEUE;
445 } 481 }
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
447 pblk_rl_user_in(&pblk->rl, nr_entries); 483 pblk_rl_user_in(&pblk->rl, nr_entries);
448 spin_unlock(&rb->w_lock); 484 spin_unlock(&rb->w_lock);
449 485
450 return flush_done; 486 return io_ret;
451} 487}
452 488
453/* 489/*
@@ -521,20 +557,18 @@ out:
521 * This function is used by the write thread to form the write bio that will 557 * This function is used by the write thread to form the write bio that will
522 * persist data on the write buffer to the media. 558 * persist data on the write buffer to the media.
523 */ 559 */
524unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, 560unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
525 struct pblk_c_ctx *c_ctx, 561 struct bio *bio, unsigned int pos,
526 unsigned int pos, 562 unsigned int nr_entries, unsigned int count)
527 unsigned int nr_entries,
528 unsigned int count)
529{ 563{
530 struct pblk *pblk = container_of(rb, struct pblk, rwb); 564 struct pblk *pblk = container_of(rb, struct pblk, rwb);
565 struct request_queue *q = pblk->dev->q;
566 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
531 struct pblk_rb_entry *entry; 567 struct pblk_rb_entry *entry;
532 struct page *page; 568 struct page *page;
533 unsigned int pad = 0, read = 0, to_read = nr_entries; 569 unsigned int pad = 0, to_read = nr_entries;
534 unsigned int user_io = 0, gc_io = 0;
535 unsigned int i; 570 unsigned int i;
536 int flags; 571 int flags;
537 int ret;
538 572
539 if (count < nr_entries) { 573 if (count < nr_entries) {
540 pad = nr_entries - count; 574 pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
553 */ 587 */
554try: 588try:
555 flags = READ_ONCE(entry->w_ctx.flags); 589 flags = READ_ONCE(entry->w_ctx.flags);
556 if (!(flags & PBLK_WRITTEN_DATA)) 590 if (!(flags & PBLK_WRITTEN_DATA)) {
591 io_schedule();
557 goto try; 592 goto try;
558 593 }
559 if (flags & PBLK_IOTYPE_USER)
560 user_io++;
561 else if (flags & PBLK_IOTYPE_GC)
562 gc_io++;
563 else
564 WARN(1, "pblk: unknown IO type\n");
565 594
566 page = virt_to_page(entry->data); 595 page = virt_to_page(entry->data);
567 if (!page) { 596 if (!page) {
@@ -570,17 +599,17 @@ try:
570 flags |= PBLK_SUBMITTED_ENTRY; 599 flags |= PBLK_SUBMITTED_ENTRY;
571 /* Release flags on context. Protect from writes */ 600 /* Release flags on context. Protect from writes */
572 smp_store_release(&entry->w_ctx.flags, flags); 601 smp_store_release(&entry->w_ctx.flags, flags);
573 goto out; 602 return NVM_IO_ERR;
574 } 603 }
575 604
576 ret = bio_add_page(bio, page, rb->seg_size, 0); 605 if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
577 if (ret != rb->seg_size) { 606 rb->seg_size) {
578 pr_err("pblk: could not add page to write bio\n"); 607 pr_err("pblk: could not add page to write bio\n");
579 flags &= ~PBLK_WRITTEN_DATA; 608 flags &= ~PBLK_WRITTEN_DATA;
580 flags |= PBLK_SUBMITTED_ENTRY; 609 flags |= PBLK_SUBMITTED_ENTRY;
581 /* Release flags on context. Protect from writes */ 610 /* Release flags on context. Protect from writes */
582 smp_store_release(&entry->w_ctx.flags, flags); 611 smp_store_release(&entry->w_ctx.flags, flags);
583 goto out; 612 return NVM_IO_ERR;
584 } 613 }
585 614
586 if (flags & PBLK_FLUSH_ENTRY) { 615 if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
607 pos = (pos + 1) & (rb->nr_entries - 1); 636 pos = (pos + 1) & (rb->nr_entries - 1);
608 } 637 }
609 638
610 read = to_read; 639 if (pad) {
611 pblk_rl_out(&pblk->rl, user_io, gc_io); 640 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
641 pr_err("pblk: could not pad page in write bio\n");
642 return NVM_IO_ERR;
643 }
644 }
645
612#ifdef CONFIG_NVM_DEBUG 646#ifdef CONFIG_NVM_DEBUG
613 atomic_long_add(pad, &((struct pblk *) 647 atomic_long_add(pad, &((struct pblk *)
614 (container_of(rb, struct pblk, rwb)))->padded_writes); 648 (container_of(rb, struct pblk, rwb)))->padded_writes);
615#endif 649#endif
616out: 650
617 return read; 651 return NVM_IO_OK;
618} 652}
619 653
620/* 654/*
@@ -623,15 +657,17 @@ out:
623 * be directed to disk. 657 * be directed to disk.
624 */ 658 */
625int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 659int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
626 u64 pos, int bio_iter) 660 struct ppa_addr ppa, int bio_iter)
627{ 661{
662 struct pblk *pblk = container_of(rb, struct pblk, rwb);
628 struct pblk_rb_entry *entry; 663 struct pblk_rb_entry *entry;
629 struct pblk_w_ctx *w_ctx; 664 struct pblk_w_ctx *w_ctx;
665 struct ppa_addr l2p_ppa;
666 u64 pos = pblk_addr_to_cacheline(ppa);
630 void *data; 667 void *data;
631 int flags; 668 int flags;
632 int ret = 1; 669 int ret = 1;
633 670
634 spin_lock(&rb->w_lock);
635 671
636#ifdef CONFIG_NVM_DEBUG 672#ifdef CONFIG_NVM_DEBUG
637 /* Caller must ensure that the access will not cause an overflow */ 673 /* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
641 w_ctx = &entry->w_ctx; 677 w_ctx = &entry->w_ctx;
642 flags = READ_ONCE(w_ctx->flags); 678 flags = READ_ONCE(w_ctx->flags);
643 679
680 spin_lock(&rb->w_lock);
681 spin_lock(&pblk->trans_lock);
682 l2p_ppa = pblk_trans_map_get(pblk, lba);
683 spin_unlock(&pblk->trans_lock);
684
644 /* Check if the entry has been overwritten or is scheduled to be */ 685 /* Check if the entry has been overwritten or is scheduled to be */
645 if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) { 686 if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
687 flags & PBLK_WRITABLE_ENTRY) {
646 ret = 0; 688 ret = 0;
647 goto out; 689 goto out;
648 } 690 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..4e5c48f3de62 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
34 BUG_ON(!pblk_addr_in_cache(ppa)); 34 BUG_ON(!pblk_addr_in_cache(ppa));
35#endif 35#endif
36 36
37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, 37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
38 pblk_addr_to_cacheline(ppa), bio_iter);
39} 38}
40 39
41static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 40static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
76 } 75 }
77 WARN_ON(test_and_set_bit(i, read_bitmap)); 76 WARN_ON(test_and_set_bit(i, read_bitmap));
78 advanced_bio = 1; 77 advanced_bio = 1;
78#ifdef CONFIG_NVM_DEBUG
79 atomic_long_inc(&pblk->cache_reads);
80#endif
79 } else { 81 } else {
80 /* Read from media non-cached sectors */ 82 /* Read from media non-cached sectors */
81 rqd->ppa_list[j++] = p; 83 rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
85 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 87 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
86 } 88 }
87 89
90 if (pblk_io_aligned(pblk, nr_secs))
91 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
92 else
93 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
94
88#ifdef CONFIG_NVM_DEBUG 95#ifdef CONFIG_NVM_DEBUG
89 atomic_long_add(nr_secs, &pblk->inflight_reads); 96 atomic_long_add(nr_secs, &pblk->inflight_reads);
90#endif 97#endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
94{ 101{
95 int err; 102 int err;
96 103
97 rqd->flags = pblk_set_read_mode(pblk);
98
99 err = pblk_submit_io(pblk, rqd); 104 err = pblk_submit_io(pblk, rqd);
100 if (err) 105 if (err)
101 return NVM_IO_ERR; 106 return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
107{ 112{
108 struct pblk *pblk = rqd->private; 113 struct pblk *pblk = rqd->private;
109 struct nvm_tgt_dev *dev = pblk->dev; 114 struct nvm_tgt_dev *dev = pblk->dev;
110 struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); 115 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
111 struct bio *bio = rqd->bio; 116 struct bio *bio = rqd->bio;
112 117
113 if (rqd->error) 118 if (rqd->error)
114 pblk_log_read_err(pblk, rqd); 119 pblk_log_read_err(pblk, rqd);
115#ifdef CONFIG_NVM_DEBUG 120#ifdef CONFIG_NVM_DEBUG
116 else 121 else
117 WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n"); 122 WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
118#endif 123#endif
119 124
120 if (rqd->nr_ppas > 1) 125 nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
121 nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
122 126
123 bio_put(bio); 127 bio_put(bio);
124 if (r_ctx->orig_bio) { 128 if (r_ctx->private) {
129 struct bio *orig_bio = r_ctx->private;
130
125#ifdef CONFIG_NVM_DEBUG 131#ifdef CONFIG_NVM_DEBUG
126 WARN_ONCE(r_ctx->orig_bio->bi_error, 132 WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
127 "pblk: corrupted read bio\n");
128#endif 133#endif
129 bio_endio(r_ctx->orig_bio); 134 bio_endio(orig_bio);
130 bio_put(r_ctx->orig_bio); 135 bio_put(orig_bio);
131 } 136 }
132 137
133#ifdef CONFIG_NVM_DEBUG 138#ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
136#endif 141#endif
137 142
138 pblk_free_rqd(pblk, rqd, READ); 143 pblk_free_rqd(pblk, rqd, READ);
144 atomic_dec(&pblk->inflight_io);
139} 145}
140 146
141static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, 147static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
173 179
174 rqd->bio = new_bio; 180 rqd->bio = new_bio;
175 rqd->nr_ppas = nr_holes; 181 rqd->nr_ppas = nr_holes;
182 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
176 rqd->end_io = NULL; 183 rqd->end_io = NULL;
177 184
178 if (unlikely(nr_secs > 1 && nr_holes == 1)) { 185 if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
280 goto retry; 287 goto retry;
281 } 288 }
282 WARN_ON(test_and_set_bit(0, read_bitmap)); 289 WARN_ON(test_and_set_bit(0, read_bitmap));
290#ifdef CONFIG_NVM_DEBUG
291 atomic_long_inc(&pblk->cache_reads);
292#endif
283 } else { 293 } else {
284 rqd->ppa_addr = ppa; 294 rqd->ppa_addr = ppa;
285 } 295 }
296
297 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
286} 298}
287 299
288int pblk_submit_read(struct pblk *pblk, struct bio *bio) 300int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
316 */ 328 */
317 bio_init_idx = pblk_get_bi_idx(bio); 329 bio_init_idx = pblk_get_bi_idx(bio);
318 330
331 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
332 &rqd->dma_meta_list);
333 if (!rqd->meta_list) {
334 pr_err("pblk: not able to allocate ppa list\n");
335 goto fail_rqd_free;
336 }
337
319 if (nr_secs > 1) { 338 if (nr_secs > 1) {
320 rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 339 rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
321 &rqd->dma_ppa_list); 340 rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
322 if (!rqd->ppa_list) {
323 pr_err("pblk: not able to allocate ppa list\n");
324 goto fail_rqd_free;
325 }
326 341
327 pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); 342 pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
328 } else { 343 } else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
332 bio_get(bio); 347 bio_get(bio);
333 if (bitmap_full(&read_bitmap, nr_secs)) { 348 if (bitmap_full(&read_bitmap, nr_secs)) {
334 bio_endio(bio); 349 bio_endio(bio);
350 atomic_inc(&pblk->inflight_io);
335 pblk_end_io_read(rqd); 351 pblk_end_io_read(rqd);
336 return NVM_IO_OK; 352 return NVM_IO_OK;
337 } 353 }
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
339 /* All sectors are to be read from the device */ 355 /* All sectors are to be read from the device */
340 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { 356 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
341 struct bio *int_bio = NULL; 357 struct bio *int_bio = NULL;
342 struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); 358 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
343 359
344 /* Clone read bio to deal with read errors internally */ 360 /* Clone read bio to deal with read errors internally */
345 int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set); 361 int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
346 if (!int_bio) { 362 if (!int_bio) {
347 pr_err("pblk: could not clone read bio\n"); 363 pr_err("pblk: could not clone read bio\n");
348 return NVM_IO_ERR; 364 return NVM_IO_ERR;
349 } 365 }
350 366
351 rqd->bio = int_bio; 367 rqd->bio = int_bio;
352 r_ctx->orig_bio = bio; 368 r_ctx->private = bio;
353 369
354 ret = pblk_submit_read_io(pblk, rqd); 370 ret = pblk_submit_read_io(pblk, rqd);
355 if (ret) { 371 if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
445{ 461{
446 struct nvm_tgt_dev *dev = pblk->dev; 462 struct nvm_tgt_dev *dev = pblk->dev;
447 struct nvm_geo *geo = &dev->geo; 463 struct nvm_geo *geo = &dev->geo;
448 struct request_queue *q = dev->q;
449 struct bio *bio; 464 struct bio *bio;
450 struct nvm_rq rqd; 465 struct nvm_rq rqd;
451 int ret, data_len; 466 int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
453 468
454 memset(&rqd, 0, sizeof(struct nvm_rq)); 469 memset(&rqd, 0, sizeof(struct nvm_rq));
455 470
471 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
472 &rqd.dma_meta_list);
473 if (!rqd.meta_list)
474 return NVM_IO_ERR;
475
456 if (nr_secs > 1) { 476 if (nr_secs > 1) {
457 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 477 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
458 &rqd.dma_ppa_list); 478 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
459 if (!rqd.ppa_list)
460 return NVM_IO_ERR;
461 479
462 *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, 480 *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
463 nr_secs); 481 nr_secs);
464 if (*secs_to_gc == 1) { 482 if (*secs_to_gc == 1)
465 struct ppa_addr ppa; 483 rqd.ppa_addr = rqd.ppa_list[0];
466
467 ppa = rqd.ppa_list[0];
468 nvm_dev_dma_free(dev->parent, rqd.ppa_list,
469 rqd.dma_ppa_list);
470 rqd.ppa_addr = ppa;
471 }
472 } else { 484 } else {
473 *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); 485 *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
474 } 486 }
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
477 goto out; 489 goto out;
478 490
479 data_len = (*secs_to_gc) * geo->sec_size; 491 data_len = (*secs_to_gc) * geo->sec_size;
480 bio = bio_map_kern(q, data, data_len, GFP_KERNEL); 492 bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
493 PBLK_KMALLOC_META, GFP_KERNEL);
481 if (IS_ERR(bio)) { 494 if (IS_ERR(bio)) {
482 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); 495 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
483 goto err_free_dma; 496 goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
490 rqd.end_io = pblk_end_io_sync; 503 rqd.end_io = pblk_end_io_sync;
491 rqd.private = &wait; 504 rqd.private = &wait;
492 rqd.nr_ppas = *secs_to_gc; 505 rqd.nr_ppas = *secs_to_gc;
506 rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
493 rqd.bio = bio; 507 rqd.bio = bio;
494 508
495 ret = pblk_submit_read_io(pblk, &rqd); 509 ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
503 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 517 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
504 pr_err("pblk: GC read I/O timed out\n"); 518 pr_err("pblk: GC read I/O timed out\n");
505 } 519 }
520 atomic_dec(&pblk->inflight_io);
506 521
507 if (rqd.error) { 522 if (rqd.error) {
508 atomic_long_inc(&pblk->read_failed_gc); 523 atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
518#endif 533#endif
519 534
520out: 535out:
521 if (rqd.nr_ppas > 1) 536 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
522 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
523 return NVM_IO_OK; 537 return NVM_IO_OK;
524 538
525err_free_dma: 539err_free_dma:
526 if (rqd.nr_ppas > 1) 540 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
527 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
528 return NVM_IO_ERR; 541 return NVM_IO_ERR;
529} 542}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f85087cd3c..0e48d3e4e143 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
120 return 0; 120 return 0;
121} 121}
122 122
123__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta) 123__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
124{ 124{
125 u32 crc; 125 u32 crc;
126 126
127 crc = pblk_calc_emeta_crc(pblk, emeta); 127 crc = pblk_calc_emeta_crc(pblk, emeta_buf);
128 if (le32_to_cpu(emeta->crc) != crc) 128 if (le32_to_cpu(emeta_buf->crc) != crc)
129 return NULL; 129 return NULL;
130 130
131 if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC) 131 if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
132 return NULL; 132 return NULL;
133 133
134 return pblk_line_emeta_to_lbas(emeta); 134 return emeta_to_lbas(pblk, emeta_buf);
135} 135}
136 136
137static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) 137static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
139 struct nvm_tgt_dev *dev = pblk->dev; 139 struct nvm_tgt_dev *dev = pblk->dev;
140 struct nvm_geo *geo = &dev->geo; 140 struct nvm_geo *geo = &dev->geo;
141 struct pblk_line_meta *lm = &pblk->lm; 141 struct pblk_line_meta *lm = &pblk->lm;
142 struct line_emeta *emeta = line->emeta; 142 struct pblk_emeta *emeta = line->emeta;
143 struct line_emeta *emeta_buf = emeta->buf;
143 __le64 *lba_list; 144 __le64 *lba_list;
144 int data_start; 145 int data_start;
145 int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; 146 int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
146 int i; 147 int i;
147 148
148 lba_list = pblk_recov_get_lba_list(pblk, emeta); 149 lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
149 if (!lba_list) 150 if (!lba_list)
150 return 1; 151 return 1;
151 152
152 data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; 153 data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
153 nr_data_lbas = lm->sec_per_line - lm->emeta_sec; 154 nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
154 nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas); 155 nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
155 156
156 for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { 157 for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
157 struct ppa_addr ppa; 158 struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
169 if (test_and_set_bit(i, line->invalid_bitmap)) 170 if (test_and_set_bit(i, line->invalid_bitmap))
170 WARN_ONCE(1, "pblk: rec. double invalidate:\n"); 171 WARN_ONCE(1, "pblk: rec. double invalidate:\n");
171 else 172 else
172 line->vsc--; 173 le32_add_cpu(line->vsc, -1);
173 spin_unlock(&line->lock); 174 spin_unlock(&line->lock);
174 175
175 continue; 176 continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
181 182
182 if (nr_valid_lbas != nr_lbas) 183 if (nr_valid_lbas != nr_lbas)
183 pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", 184 pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
184 line->id, line->emeta->nr_valid_lbas, nr_lbas); 185 line->id, emeta_buf->nr_valid_lbas, nr_lbas);
185 186
186 line->left_msecs = 0; 187 line->left_msecs = 0;
187 188
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
195 struct pblk_line_meta *lm = &pblk->lm; 196 struct pblk_line_meta *lm = &pblk->lm;
196 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); 197 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
197 198
198 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec - 199 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
199 nr_bb * geo->sec_per_blk; 200 nr_bb * geo->sec_per_blk;
200} 201}
201 202
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
240 r_ptr_int = r_ptr; 241 r_ptr_int = r_ptr;
241 242
242next_read_rq: 243next_read_rq:
243 memset(rqd, 0, pblk_r_rq_size); 244 memset(rqd, 0, pblk_g_rq_size);
244 245
245 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 246 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
246 if (!rq_ppas) 247 if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
256 257
257 rqd->bio = bio; 258 rqd->bio = bio;
258 rqd->opcode = NVM_OP_PREAD; 259 rqd->opcode = NVM_OP_PREAD;
259 rqd->flags = pblk_set_read_mode(pblk);
260 rqd->meta_list = meta_list; 260 rqd->meta_list = meta_list;
261 rqd->nr_ppas = rq_ppas; 261 rqd->nr_ppas = rq_ppas;
262 rqd->ppa_list = ppa_list; 262 rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
265 rqd->end_io = pblk_end_io_sync; 265 rqd->end_io = pblk_end_io_sync;
266 rqd->private = &wait; 266 rqd->private = &wait;
267 267
268 if (pblk_io_aligned(pblk, rq_ppas))
269 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
270 else
271 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
272
268 for (i = 0; i < rqd->nr_ppas; ) { 273 for (i = 0; i < rqd->nr_ppas; ) {
269 struct ppa_addr ppa; 274 struct ppa_addr ppa;
270 int pos; 275 int pos;
@@ -295,7 +300,7 @@ next_read_rq:
295 pr_err("pblk: L2P recovery read timed out\n"); 300 pr_err("pblk: L2P recovery read timed out\n");
296 return -EINTR; 301 return -EINTR;
297 } 302 }
298 303 atomic_dec(&pblk->inflight_io);
299 reinit_completion(&wait); 304 reinit_completion(&wait);
300 305
301 /* At this point, the read should not fail. If it does, it is a problem 306 /* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
322 return 0; 327 return 0;
323} 328}
324 329
330static void pblk_recov_complete(struct kref *ref)
331{
332 struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
333
334 complete(&pad_rq->wait);
335}
336
337static void pblk_end_io_recov(struct nvm_rq *rqd)
338{
339 struct pblk_pad_rq *pad_rq = rqd->private;
340 struct pblk *pblk = pad_rq->pblk;
341 struct nvm_tgt_dev *dev = pblk->dev;
342
343 kref_put(&pad_rq->ref, pblk_recov_complete);
344 nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
345 pblk_free_rqd(pblk, rqd, WRITE);
346}
347
325static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, 348static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
326 struct pblk_recov_alloc p, int left_ppas) 349 int left_ppas)
327{ 350{
328 struct nvm_tgt_dev *dev = pblk->dev; 351 struct nvm_tgt_dev *dev = pblk->dev;
329 struct nvm_geo *geo = &dev->geo; 352 struct nvm_geo *geo = &dev->geo;
330 struct ppa_addr *ppa_list; 353 struct ppa_addr *ppa_list;
331 struct pblk_sec_meta *meta_list; 354 struct pblk_sec_meta *meta_list;
355 struct pblk_pad_rq *pad_rq;
332 struct nvm_rq *rqd; 356 struct nvm_rq *rqd;
333 struct bio *bio; 357 struct bio *bio;
334 void *data; 358 void *data;
335 dma_addr_t dma_ppa_list, dma_meta_list; 359 dma_addr_t dma_ppa_list, dma_meta_list;
336 __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta); 360 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
337 u64 w_ptr = line->cur_sec; 361 u64 w_ptr = line->cur_sec;
338 int left_line_ppas = line->left_msecs; 362 int left_line_ppas, rq_ppas, rq_len;
339 int rq_ppas, rq_len;
340 int i, j; 363 int i, j;
341 int ret = 0; 364 int ret = 0;
342 DECLARE_COMPLETION_ONSTACK(wait);
343 365
344 ppa_list = p.ppa_list; 366 spin_lock(&line->lock);
345 meta_list = p.meta_list; 367 left_line_ppas = line->left_msecs;
346 rqd = p.rqd; 368 spin_unlock(&line->lock);
347 data = p.data; 369
348 dma_ppa_list = p.dma_ppa_list; 370 pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
349 dma_meta_list = p.dma_meta_list; 371 if (!pad_rq)
372 return -ENOMEM;
373
374 data = vzalloc(pblk->max_write_pgs * geo->sec_size);
375 if (!data) {
376 ret = -ENOMEM;
377 goto free_rq;
378 }
379
380 pad_rq->pblk = pblk;
381 init_completion(&pad_rq->wait);
382 kref_init(&pad_rq->ref);
350 383
351next_pad_rq: 384next_pad_rq:
352 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 385 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
353 if (!rq_ppas) 386 if (rq_ppas < pblk->min_write_pgs) {
354 rq_ppas = pblk->min_write_pgs; 387 pr_err("pblk: corrupted pad line %d\n", line->id);
388 goto free_rq;
389 }
390
355 rq_len = rq_ppas * geo->sec_size; 391 rq_len = rq_ppas * geo->sec_size;
356 392
393 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
394 if (!meta_list) {
395 ret = -ENOMEM;
396 goto free_data;
397 }
398
399 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
400 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
401
402 rqd = pblk_alloc_rqd(pblk, WRITE);
403 if (IS_ERR(rqd)) {
404 ret = PTR_ERR(rqd);
405 goto fail_free_meta;
406 }
407 memset(rqd, 0, pblk_w_rq_size);
408
357 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); 409 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
358 if (IS_ERR(bio)) 410 if (IS_ERR(bio)) {
359 return PTR_ERR(bio); 411 ret = PTR_ERR(bio);
412 goto fail_free_rqd;
413 }
360 414
361 bio->bi_iter.bi_sector = 0; /* internal bio */ 415 bio->bi_iter.bi_sector = 0; /* internal bio */
362 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 416 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
363 417
364 memset(rqd, 0, pblk_r_rq_size);
365
366 rqd->bio = bio; 418 rqd->bio = bio;
367 rqd->opcode = NVM_OP_PWRITE; 419 rqd->opcode = NVM_OP_PWRITE;
368 rqd->flags = pblk_set_progr_mode(pblk, WRITE); 420 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
371 rqd->ppa_list = ppa_list; 423 rqd->ppa_list = ppa_list;
372 rqd->dma_ppa_list = dma_ppa_list; 424 rqd->dma_ppa_list = dma_ppa_list;
373 rqd->dma_meta_list = dma_meta_list; 425 rqd->dma_meta_list = dma_meta_list;
374 rqd->end_io = pblk_end_io_sync; 426 rqd->end_io = pblk_end_io_recov;
375 rqd->private = &wait; 427 rqd->private = pad_rq;
376 428
377 for (i = 0; i < rqd->nr_ppas; ) { 429 for (i = 0; i < rqd->nr_ppas; ) {
378 struct ppa_addr ppa; 430 struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
390 442
391 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { 443 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
392 struct ppa_addr dev_ppa; 444 struct ppa_addr dev_ppa;
445 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
393 446
394 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); 447 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
395 448
396 pblk_map_invalidate(pblk, dev_ppa); 449 pblk_map_invalidate(pblk, dev_ppa);
397 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); 450 lba_list[w_ptr] = meta_list[i].lba = addr_empty;
398 lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
399 rqd->ppa_list[i] = dev_ppa; 451 rqd->ppa_list[i] = dev_ppa;
400 } 452 }
401 } 453 }
402 454
455 kref_get(&pad_rq->ref);
456
403 ret = pblk_submit_io(pblk, rqd); 457 ret = pblk_submit_io(pblk, rqd);
404 if (ret) { 458 if (ret) {
405 pr_err("pblk: I/O submission failed: %d\n", ret); 459 pr_err("pblk: I/O submission failed: %d\n", ret);
406 return ret; 460 goto free_data;
407 } 461 }
408 462
409 if (!wait_for_completion_io_timeout(&wait, 463 atomic_dec(&pblk->inflight_io);
410 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
411 pr_err("pblk: L2P recovery write timed out\n");
412 }
413 reinit_completion(&wait);
414 464
415 left_line_ppas -= rq_ppas; 465 left_line_ppas -= rq_ppas;
416 left_ppas -= rq_ppas; 466 left_ppas -= rq_ppas;
417 if (left_ppas > 0 && left_line_ppas) 467 if (left_ppas && left_line_ppas)
418 goto next_pad_rq; 468 goto next_pad_rq;
419 469
420 return 0; 470 kref_put(&pad_rq->ref, pblk_recov_complete);
471
472 if (!wait_for_completion_io_timeout(&pad_rq->wait,
473 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
474 pr_err("pblk: pad write timed out\n");
475 ret = -ETIME;
476 }
477
478free_rq:
479 kfree(pad_rq);
480free_data:
481 vfree(data);
482 return ret;
483
484fail_free_rqd:
485 pblk_free_rqd(pblk, rqd, WRITE);
486fail_free_meta:
487 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
488 kfree(pad_rq);
489 return ret;
421} 490}
422 491
423/* When this function is called, it means that not all upper pages have been 492/* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
456 rec_round = 0; 525 rec_round = 0;
457 526
458next_rq: 527next_rq:
459 memset(rqd, 0, pblk_r_rq_size); 528 memset(rqd, 0, pblk_g_rq_size);
460 529
461 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 530 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
462 if (!rq_ppas) 531 if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
472 541
473 rqd->bio = bio; 542 rqd->bio = bio;
474 rqd->opcode = NVM_OP_PREAD; 543 rqd->opcode = NVM_OP_PREAD;
475 rqd->flags = pblk_set_read_mode(pblk);
476 rqd->meta_list = meta_list; 544 rqd->meta_list = meta_list;
477 rqd->nr_ppas = rq_ppas; 545 rqd->nr_ppas = rq_ppas;
478 rqd->ppa_list = ppa_list; 546 rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
481 rqd->end_io = pblk_end_io_sync; 549 rqd->end_io = pblk_end_io_sync;
482 rqd->private = &wait; 550 rqd->private = &wait;
483 551
552 if (pblk_io_aligned(pblk, rq_ppas))
553 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
554 else
555 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
556
484 for (i = 0; i < rqd->nr_ppas; ) { 557 for (i = 0; i < rqd->nr_ppas; ) {
485 struct ppa_addr ppa; 558 struct ppa_addr ppa;
486 int pos; 559 int pos;
@@ -510,6 +583,7 @@ next_rq:
510 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 583 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
511 pr_err("pblk: L2P recovery read timed out\n"); 584 pr_err("pblk: L2P recovery read timed out\n");
512 } 585 }
586 atomic_dec(&pblk->inflight_io);
513 reinit_completion(&wait); 587 reinit_completion(&wait);
514 588
515 /* This should not happen since the read failed during normal recovery, 589 /* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
544 if (pad_secs > line->left_msecs) 618 if (pad_secs > line->left_msecs)
545 pad_secs = line->left_msecs; 619 pad_secs = line->left_msecs;
546 620
547 ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); 621 ret = pblk_recov_pad_oob(pblk, line, pad_secs);
548 if (ret) 622 if (ret)
549 pr_err("pblk: OOB padding failed (err:%d)\n", ret); 623 pr_err("pblk: OOB padding failed (err:%d)\n", ret);
550 624
@@ -552,7 +626,6 @@ next_rq:
552 if (ret) 626 if (ret)
553 pr_err("pblk: OOB read failed (err:%d)\n", ret); 627 pr_err("pblk: OOB read failed (err:%d)\n", ret);
554 628
555 line->left_ssecs = line->left_msecs;
556 left_ppas = 0; 629 left_ppas = 0;
557 } 630 }
558 631
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
591 *done = 1; 664 *done = 1;
592 665
593next_rq: 666next_rq:
594 memset(rqd, 0, pblk_r_rq_size); 667 memset(rqd, 0, pblk_g_rq_size);
595 668
596 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 669 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
597 if (!rq_ppas) 670 if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
607 680
608 rqd->bio = bio; 681 rqd->bio = bio;
609 rqd->opcode = NVM_OP_PREAD; 682 rqd->opcode = NVM_OP_PREAD;
610 rqd->flags = pblk_set_read_mode(pblk);
611 rqd->meta_list = meta_list; 683 rqd->meta_list = meta_list;
612 rqd->nr_ppas = rq_ppas; 684 rqd->nr_ppas = rq_ppas;
613 rqd->ppa_list = ppa_list; 685 rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
616 rqd->end_io = pblk_end_io_sync; 688 rqd->end_io = pblk_end_io_sync;
617 rqd->private = &wait; 689 rqd->private = &wait;
618 690
691 if (pblk_io_aligned(pblk, rq_ppas))
692 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
693 else
694 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
695
619 for (i = 0; i < rqd->nr_ppas; ) { 696 for (i = 0; i < rqd->nr_ppas; ) {
620 struct ppa_addr ppa; 697 struct ppa_addr ppa;
621 int pos; 698 int pos;
@@ -646,6 +723,7 @@ next_rq:
646 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 723 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
647 pr_err("pblk: L2P recovery read timed out\n"); 724 pr_err("pblk: L2P recovery read timed out\n");
648 } 725 }
726 atomic_dec(&pblk->inflight_io);
649 reinit_completion(&wait); 727 reinit_completion(&wait);
650 728
651 /* Reached the end of the written line */ 729 /* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
658 /* Roll back failed sectors */ 736 /* Roll back failed sectors */
659 line->cur_sec -= nr_error_bits; 737 line->cur_sec -= nr_error_bits;
660 line->left_msecs += nr_error_bits; 738 line->left_msecs += nr_error_bits;
661 line->left_ssecs = line->left_msecs;
662 bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); 739 bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
663 740
664 left_ppas = 0; 741 left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
770 struct pblk_line_meta *lm = &pblk->lm; 847 struct pblk_line_meta *lm = &pblk->lm;
771 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 848 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
772 struct pblk_line *line, *tline, *data_line = NULL; 849 struct pblk_line *line, *tline, *data_line = NULL;
773 struct line_smeta *smeta; 850 struct pblk_smeta *smeta;
774 struct line_emeta *emeta; 851 struct pblk_emeta *emeta;
852 struct line_smeta *smeta_buf;
775 int found_lines = 0, recovered_lines = 0, open_lines = 0; 853 int found_lines = 0, recovered_lines = 0, open_lines = 0;
776 int is_next = 0; 854 int is_next = 0;
777 int meta_line; 855 int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
784 spin_lock(&l_mg->free_lock); 862 spin_lock(&l_mg->free_lock);
785 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); 863 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
786 set_bit(meta_line, &l_mg->meta_bitmap); 864 set_bit(meta_line, &l_mg->meta_bitmap);
787 smeta = l_mg->sline_meta[meta_line].meta; 865 smeta = l_mg->sline_meta[meta_line];
788 emeta = l_mg->eline_meta[meta_line].meta; 866 emeta = l_mg->eline_meta[meta_line];
867 smeta_buf = (struct line_smeta *)smeta;
789 spin_unlock(&l_mg->free_lock); 868 spin_unlock(&l_mg->free_lock);
790 869
791 /* Order data lines using their sequence number */ 870 /* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
796 875
797 memset(smeta, 0, lm->smeta_len); 876 memset(smeta, 0, lm->smeta_len);
798 line->smeta = smeta; 877 line->smeta = smeta;
799 line->lun_bitmap = ((void *)(smeta)) + 878 line->lun_bitmap = ((void *)(smeta_buf)) +
800 sizeof(struct line_smeta); 879 sizeof(struct line_smeta);
801 880
802 /* Lines that cannot be read are assumed as not written here */ 881 /* Lines that cannot be read are assumed as not written here */
803 if (pblk_line_read_smeta(pblk, line)) 882 if (pblk_line_read_smeta(pblk, line))
804 continue; 883 continue;
805 884
806 crc = pblk_calc_smeta_crc(pblk, smeta); 885 crc = pblk_calc_smeta_crc(pblk, smeta_buf);
807 if (le32_to_cpu(smeta->crc) != crc) 886 if (le32_to_cpu(smeta_buf->crc) != crc)
808 continue; 887 continue;
809 888
810 if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC) 889 if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
811 continue; 890 continue;
812 891
813 if (le16_to_cpu(smeta->header.version) != 1) { 892 if (le16_to_cpu(smeta_buf->header.version) != 1) {
814 pr_err("pblk: found incompatible line version %u\n", 893 pr_err("pblk: found incompatible line version %u\n",
815 smeta->header.version); 894 smeta_buf->header.version);
816 return ERR_PTR(-EINVAL); 895 return ERR_PTR(-EINVAL);
817 } 896 }
818 897
819 /* The first valid instance uuid is used for initialization */ 898 /* The first valid instance uuid is used for initialization */
820 if (!valid_uuid) { 899 if (!valid_uuid) {
821 memcpy(pblk->instance_uuid, smeta->header.uuid, 16); 900 memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
822 valid_uuid = 1; 901 valid_uuid = 1;
823 } 902 }
824 903
825 if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) { 904 if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
826 pr_debug("pblk: ignore line %u due to uuid mismatch\n", 905 pr_debug("pblk: ignore line %u due to uuid mismatch\n",
827 i); 906 i);
828 continue; 907 continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
830 909
831 /* Update line metadata */ 910 /* Update line metadata */
832 spin_lock(&line->lock); 911 spin_lock(&line->lock);
833 line->id = le32_to_cpu(line->smeta->header.id); 912 line->id = le32_to_cpu(smeta_buf->header.id);
834 line->type = le16_to_cpu(line->smeta->header.type); 913 line->type = le16_to_cpu(smeta_buf->header.type);
835 line->seq_nr = le64_to_cpu(line->smeta->seq_nr); 914 line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
836 spin_unlock(&line->lock); 915 spin_unlock(&line->lock);
837 916
838 /* Update general metadata */ 917 /* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
848 pblk_recov_line_add_ordered(&recov_list, line); 927 pblk_recov_line_add_ordered(&recov_list, line);
849 found_lines++; 928 found_lines++;
850 pr_debug("pblk: recovering data line %d, seq:%llu\n", 929 pr_debug("pblk: recovering data line %d, seq:%llu\n",
851 line->id, smeta->seq_nr); 930 line->id, smeta_buf->seq_nr);
852 } 931 }
853 932
854 if (!found_lines) { 933 if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
868 947
869 recovered_lines++; 948 recovered_lines++;
870 /* Calculate where emeta starts based on the line bb */ 949 /* Calculate where emeta starts based on the line bb */
871 off = lm->sec_per_line - lm->emeta_sec; 950 off = lm->sec_per_line - lm->emeta_sec[0];
872 nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); 951 nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
873 off -= nr_bb * geo->sec_per_pl; 952 off -= nr_bb * geo->sec_per_pl;
874 953
875 memset(emeta, 0, lm->emeta_len);
876 line->emeta = emeta;
877 line->emeta_ssec = off; 954 line->emeta_ssec = off;
955 line->emeta = emeta;
956 memset(line->emeta->buf, 0, lm->emeta_len[0]);
878 957
879 if (pblk_line_read_emeta(pblk, line)) { 958 if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
880 pblk_recov_l2p_from_oob(pblk, line); 959 pblk_recov_l2p_from_oob(pblk, line);
881 goto next; 960 goto next;
882 } 961 }
@@ -941,58 +1020,26 @@ out:
941} 1020}
942 1021
943/* 1022/*
944 * Pad until smeta can be read on current data line 1023 * Pad current line
945 */ 1024 */
946void pblk_recov_pad(struct pblk *pblk) 1025int pblk_recov_pad(struct pblk *pblk)
947{ 1026{
948 struct nvm_tgt_dev *dev = pblk->dev;
949 struct nvm_geo *geo = &dev->geo;
950 struct pblk_line *line; 1027 struct pblk_line *line;
951 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1028 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
952 struct nvm_rq *rqd; 1029 int left_msecs;
953 struct pblk_recov_alloc p; 1030 int ret = 0;
954 struct ppa_addr *ppa_list;
955 struct pblk_sec_meta *meta_list;
956 void *data;
957 dma_addr_t dma_ppa_list, dma_meta_list;
958 1031
959 spin_lock(&l_mg->free_lock); 1032 spin_lock(&l_mg->free_lock);
960 line = l_mg->data_line; 1033 line = l_mg->data_line;
1034 left_msecs = line->left_msecs;
961 spin_unlock(&l_mg->free_lock); 1035 spin_unlock(&l_mg->free_lock);
962 1036
963 rqd = pblk_alloc_rqd(pblk, READ); 1037 ret = pblk_recov_pad_oob(pblk, line, left_msecs);
964 if (IS_ERR(rqd)) 1038 if (ret) {
965 return; 1039 pr_err("pblk: Tear down padding failed (%d)\n", ret);
966 1040 return ret;
967 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
968 if (!meta_list)
969 goto free_rqd;
970
971 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
972 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
973
974 data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
975 if (!data)
976 goto free_meta_list;
977
978 p.ppa_list = ppa_list;
979 p.meta_list = meta_list;
980 p.rqd = rqd;
981 p.data = data;
982 p.dma_ppa_list = dma_ppa_list;
983 p.dma_meta_list = dma_meta_list;
984
985 if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
986 pr_err("pblk: Tear down padding failed\n");
987 goto free_data;
988 } 1041 }
989 1042
990 pblk_line_close(pblk, line); 1043 pblk_line_close_meta(pblk, line);
991 1044 return ret;
992free_data:
993 kfree(data);
994free_meta_list:
995 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
996free_rqd:
997 pblk_free_rqd(pblk, rqd, READ);
998} 1045}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb144f3f..2e6a5361baf0 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
23 mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); 23 mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
24} 24}
25 25
26int pblk_rl_is_limit(struct pblk_rl *rl)
27{
28 int rb_space;
29
30 rb_space = atomic_read(&rl->rb_space);
31
32 return (rb_space == 0);
33}
34
26int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) 35int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
27{ 36{
28 int rb_user_cnt = atomic_read(&rl->rb_user_cnt); 37 int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
38 int rb_space = atomic_read(&rl->rb_space);
29 39
30 return (!(rb_user_cnt + nr_entries > rl->rb_user_max)); 40 if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
41 return NVM_IO_ERR;
42
43 if (rb_user_cnt >= rl->rb_user_max)
44 return NVM_IO_REQUEUE;
45
46 return NVM_IO_OK;
47}
48
49void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
50{
51 int rb_space = atomic_read(&rl->rb_space);
52
53 if (unlikely(rb_space >= 0))
54 atomic_sub(nr_entries, &rl->rb_space);
31} 55}
32 56
33int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) 57int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
37 61
38 /* If there is no user I/O let GC take over space on the write buffer */ 62 /* If there is no user I/O let GC take over space on the write buffer */
39 rb_user_active = READ_ONCE(rl->rb_user_active); 63 rb_user_active = READ_ONCE(rl->rb_user_active);
40 return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active)); 64 return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
41} 65}
42 66
43void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) 67void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
77 unsigned long free_blocks = pblk_rl_nr_free_blks(rl); 101 unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
78 102
79 if (free_blocks >= rl->high) { 103 if (free_blocks >= rl->high) {
80 rl->rb_user_max = max - rl->rb_gc_rsv; 104 rl->rb_user_max = max;
81 rl->rb_gc_max = rl->rb_gc_rsv; 105 rl->rb_gc_max = 0;
82 rl->rb_state = PBLK_RL_HIGH; 106 rl->rb_state = PBLK_RL_HIGH;
83 } else if (free_blocks < rl->high) { 107 } else if (free_blocks < rl->high) {
84 int shift = rl->high_pw - rl->rb_windows_pw; 108 int shift = rl->high_pw - rl->rb_windows_pw;
85 int user_windows = free_blocks >> shift; 109 int user_windows = free_blocks >> shift;
86 int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; 110 int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
87 int gc_max;
88 111
89 rl->rb_user_max = user_max; 112 rl->rb_user_max = user_max;
90 gc_max = max - rl->rb_user_max; 113 rl->rb_gc_max = max - user_max;
91 rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv); 114
92 115 if (free_blocks <= rl->rsv_blocks) {
93 if (free_blocks > rl->low) 116 rl->rb_user_max = 0;
94 rl->rb_state = PBLK_RL_MID; 117 rl->rb_gc_max = max;
95 else 118 }
96 rl->rb_state = PBLK_RL_LOW; 119
120 /* In the worst case, we will need to GC lines in the low list
121 * (high valid sector count). If there are lines to GC on high
122 * or mid lists, these will be prioritized
123 */
124 rl->rb_state = PBLK_RL_LOW;
97 } 125 }
98 126
99 return rl->rb_state; 127 return rl->rb_state;
100} 128}
101 129
102void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
103{
104 rl->rb_gc_rsv = rl->rb_gc_max = rsv;
105}
106
107void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) 130void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
108{ 131{
109 struct pblk *pblk = container_of(rl, struct pblk, rl); 132 struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
122 145
123void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) 146void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
124{ 147{
125 struct pblk *pblk = container_of(rl, struct pblk, rl);
126 int blk_in_line = atomic_read(&line->blk_in_line); 148 int blk_in_line = atomic_read(&line->blk_in_line);
127 int ret;
128 149
129 atomic_sub(blk_in_line, &rl->free_blocks); 150 atomic_sub(blk_in_line, &rl->free_blocks);
151}
152
153void pblk_gc_should_kick(struct pblk *pblk)
154{
155 struct pblk_rl *rl = &pblk->rl;
156 int ret;
130 157
131 /* Rates will not change that often - no need to lock update */ 158 /* Rates will not change that often - no need to lock update */
132 ret = pblk_rl_update_rates(rl, rl->rb_budget); 159 ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
136 pblk_gc_should_stop(pblk); 163 pblk_gc_should_stop(pblk);
137} 164}
138 165
139int pblk_rl_gc_thrs(struct pblk_rl *rl) 166int pblk_rl_high_thrs(struct pblk_rl *rl)
140{ 167{
141 return rl->high; 168 return rl->high;
142} 169}
143 170
171int pblk_rl_low_thrs(struct pblk_rl *rl)
172{
173 return rl->low;
174}
175
144int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) 176int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
145{ 177{
146 return rl->rb_user_max; 178 return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
161 193
162void pblk_rl_init(struct pblk_rl *rl, int budget) 194void pblk_rl_init(struct pblk_rl *rl, int budget)
163{ 195{
196 struct pblk *pblk = container_of(rl, struct pblk, rl);
197 struct pblk_line_meta *lm = &pblk->lm;
198 int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
164 unsigned int rb_windows; 199 unsigned int rb_windows;
165 200
166 rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; 201 rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
167 rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
168 rl->high_pw = get_count_order(rl->high); 202 rl->high_pw = get_count_order(rl->high);
169 203
204 rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
205 if (rl->low < min_blocks)
206 rl->low = min_blocks;
207
208 rl->rsv_blocks = min_blocks;
209
170 /* This will always be a power-of-2 */ 210 /* This will always be a power-of-2 */
171 rb_windows = budget / PBLK_MAX_REQ_ADDRS; 211 rb_windows = budget / PBLK_MAX_REQ_ADDRS;
172 rl->rb_windows_pw = get_count_order(rb_windows) + 1; 212 rl->rb_windows_pw = get_count_order(rb_windows);
173 213
174 /* To start with, all buffer is available to user I/O writers */ 214 /* To start with, all buffer is available to user I/O writers */
175 rl->rb_budget = budget; 215 rl->rb_budget = budget;
176 rl->rb_user_max = budget; 216 rl->rb_user_max = budget;
177 atomic_set(&rl->rb_user_cnt, 0);
178 rl->rb_gc_max = 0; 217 rl->rb_gc_max = 0;
179 rl->rb_state = PBLK_RL_HIGH; 218 rl->rb_state = PBLK_RL_HIGH;
219
220 atomic_set(&rl->rb_user_cnt, 0);
180 atomic_set(&rl->rb_gc_cnt, 0); 221 atomic_set(&rl->rb_gc_cnt, 0);
222 atomic_set(&rl->rb_space, -1);
181 223
182 setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); 224 setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
225
183 rl->rb_user_active = 0; 226 rl->rb_user_active = 0;
227 rl->rb_gc_active = 0;
184} 228}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1ceeff..95fb434e2f01 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
49 49
50static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) 50static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
51{ 51{
52 struct nvm_tgt_dev *dev = pblk->dev;
53 struct nvm_geo *geo = &dev->geo;
54 int free_blocks, total_blocks; 52 int free_blocks, total_blocks;
55 int rb_user_max, rb_user_cnt; 53 int rb_user_max, rb_user_cnt;
56 int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state; 54 int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
57 55
58 free_blocks = atomic_read(&pblk->rl.free_blocks); 56 free_blocks = atomic_read(&pblk->rl.free_blocks);
59 rb_user_max = pblk->rl.rb_user_max; 57 rb_user_max = pblk->rl.rb_user_max;
60 rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); 58 rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
61 rb_gc_max = pblk->rl.rb_gc_max; 59 rb_gc_max = pblk->rl.rb_gc_max;
62 rb_gc_rsv = pblk->rl.rb_gc_rsv;
63 rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); 60 rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
64 rb_budget = pblk->rl.rb_budget; 61 rb_budget = pblk->rl.rb_budget;
65 rb_state = pblk->rl.rb_state; 62 rb_state = pblk->rl.rb_state;
66 63
67 total_blocks = geo->blks_per_lun * geo->nr_luns; 64 total_blocks = pblk->rl.total_blocks;
68 65
69 return snprintf(page, PAGE_SIZE, 66 return snprintf(page, PAGE_SIZE,
70 "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", 67 "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
71 rb_user_cnt, 68 rb_user_cnt,
72 rb_user_max, 69 rb_user_max,
73 rb_gc_cnt, 70 rb_gc_cnt,
74 rb_gc_max, 71 rb_gc_max,
75 rb_gc_rsv,
76 rb_state, 72 rb_state,
77 rb_budget, 73 rb_budget,
78 pblk->rl.low, 74 pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
150 ssize_t sz = 0; 146 ssize_t sz = 0;
151 int nr_free_lines; 147 int nr_free_lines;
152 int cur_data, cur_log; 148 int cur_data, cur_log;
153 int free_line_cnt = 0, closed_line_cnt = 0; 149 int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
154 int d_line_cnt = 0, l_line_cnt = 0; 150 int d_line_cnt = 0, l_line_cnt = 0;
155 int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; 151 int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
156 int free = 0, bad = 0, cor = 0; 152 int bad = 0, cor = 0;
157 int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; 153 int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
158 int map_weight = 0, meta_weight = 0; 154 int map_weight = 0, meta_weight = 0;
159 155
160 spin_lock(&l_mg->free_lock); 156 spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
166 free_line_cnt++; 162 free_line_cnt++;
167 spin_unlock(&l_mg->free_lock); 163 spin_unlock(&l_mg->free_lock);
168 164
165 spin_lock(&l_mg->close_lock);
166 list_for_each_entry(line, &l_mg->emeta_list, list)
167 emeta_line_cnt++;
168 spin_unlock(&l_mg->close_lock);
169
169 spin_lock(&l_mg->gc_lock); 170 spin_lock(&l_mg->gc_lock);
170 list_for_each_entry(line, &l_mg->gc_full_list, list) { 171 list_for_each_entry(line, &l_mg->gc_full_list, list) {
171 if (line->type == PBLK_LINETYPE_DATA) 172 if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
212 gc_empty++; 213 gc_empty++;
213 } 214 }
214 215
215 list_for_each_entry(line, &l_mg->free_list, list)
216 free++;
217 list_for_each_entry(line, &l_mg->bad_list, list) 216 list_for_each_entry(line, &l_mg->bad_list, list)
218 bad++; 217 bad++;
219 list_for_each_entry(line, &l_mg->corrupt_list, list) 218 list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
224 if (l_mg->data_line) { 223 if (l_mg->data_line) {
225 cur_sec = l_mg->data_line->cur_sec; 224 cur_sec = l_mg->data_line->cur_sec;
226 msecs = l_mg->data_line->left_msecs; 225 msecs = l_mg->data_line->left_msecs;
227 ssecs = l_mg->data_line->left_ssecs; 226 vsc = le32_to_cpu(*l_mg->data_line->vsc);
228 vsc = l_mg->data_line->vsc;
229 sec_in_line = l_mg->data_line->sec_in_line; 227 sec_in_line = l_mg->data_line->sec_in_line;
230 meta_weight = bitmap_weight(&l_mg->meta_bitmap, 228 meta_weight = bitmap_weight(&l_mg->meta_bitmap,
231 PBLK_DATA_LINES); 229 PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
235 spin_unlock(&l_mg->free_lock); 233 spin_unlock(&l_mg->free_lock);
236 234
237 if (nr_free_lines != free_line_cnt) 235 if (nr_free_lines != free_line_cnt)
238 pr_err("pblk: corrupted free line list\n"); 236 pr_err("pblk: corrupted free line list:%d/%d\n",
237 nr_free_lines, free_line_cnt);
239 238
240 sz = snprintf(page, PAGE_SIZE - sz, 239 sz = snprintf(page, PAGE_SIZE - sz,
241 "line: nluns:%d, nblks:%d, nsecs:%d\n", 240 "line: nluns:%d, nblks:%d, nsecs:%d\n",
242 geo->nr_luns, lm->blk_per_line, lm->sec_per_line); 241 geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
243 242
244 sz += snprintf(page + sz, PAGE_SIZE - sz, 243 sz += snprintf(page + sz, PAGE_SIZE - sz,
245 "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n", 244 "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
246 cur_data, cur_log, 245 cur_data, cur_log,
247 free, nr_free_lines, bad, cor, 246 nr_free_lines,
247 emeta_line_cnt, meta_weight,
248 closed_line_cnt, 248 closed_line_cnt,
249 bad, cor,
249 d_line_cnt, l_line_cnt, 250 d_line_cnt, l_line_cnt,
250 l_mg->nr_lines); 251 l_mg->nr_lines);
251 252
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
255 atomic_read(&pblk->gc.inflight_gc)); 256 atomic_read(&pblk->gc.inflight_gc));
256 257
257 sz += snprintf(page + sz, PAGE_SIZE - sz, 258 sz += snprintf(page + sz, PAGE_SIZE - sz,
258 "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n", 259 "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
259 cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line, 260 cur_data, cur_sec, msecs, vsc, sec_in_line,
260 map_weight, lm->sec_per_line, meta_weight); 261 map_weight, lm->sec_per_line,
262 atomic_read(&pblk->inflight_io));
261 263
262 return sz; 264 return sz;
263} 265}
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
274 lm->smeta_len, lm->smeta_sec); 276 lm->smeta_len, lm->smeta_sec);
275 sz += snprintf(page + sz, PAGE_SIZE - sz, 277 sz += snprintf(page + sz, PAGE_SIZE - sz,
276 "emeta - len:%d, sec:%d, bb_start:%d\n", 278 "emeta - len:%d, sec:%d, bb_start:%d\n",
277 lm->emeta_len, lm->emeta_sec, 279 lm->emeta_len[0], lm->emeta_sec[0],
278 lm->emeta_bb); 280 lm->emeta_bb);
279 sz += snprintf(page + sz, PAGE_SIZE - sz, 281 sz += snprintf(page + sz, PAGE_SIZE - sz,
280 "bitmap lengths: sec:%d, blk:%d, lun:%d\n", 282 "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
290 return sz; 292 return sz;
291} 293}
292 294
295static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
296{
297 return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
298}
299
293#ifdef CONFIG_NVM_DEBUG 300#ifdef CONFIG_NVM_DEBUG
294static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) 301static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
295{ 302{
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
303 atomic_long_read(&pblk->padded_wb), 310 atomic_long_read(&pblk->padded_wb),
304 atomic_long_read(&pblk->sub_writes), 311 atomic_long_read(&pblk->sub_writes),
305 atomic_long_read(&pblk->sync_writes), 312 atomic_long_read(&pblk->sync_writes),
306 atomic_long_read(&pblk->compl_writes),
307 atomic_long_read(&pblk->recov_writes), 313 atomic_long_read(&pblk->recov_writes),
308 atomic_long_read(&pblk->recov_gc_writes), 314 atomic_long_read(&pblk->recov_gc_writes),
309 atomic_long_read(&pblk->recov_gc_reads), 315 atomic_long_read(&pblk->recov_gc_reads),
316 atomic_long_read(&pblk->cache_reads),
310 atomic_long_read(&pblk->sync_reads)); 317 atomic_long_read(&pblk->sync_reads));
311} 318}
312#endif 319#endif
313 320
314static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page, 321static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
315 size_t len) 322 size_t len)
316{ 323{
317 struct pblk_gc *gc = &pblk->gc;
318 size_t c_len; 324 size_t c_len;
319 int value; 325 int force;
320 326
321 c_len = strcspn(page, "\n"); 327 c_len = strcspn(page, "\n");
322 if (c_len >= len) 328 if (c_len >= len)
323 return -EINVAL; 329 return -EINVAL;
324 330
325 if (kstrtouint(page, 0, &value)) 331 if (kstrtouint(page, 0, &force))
326 return -EINVAL; 332 return -EINVAL;
327 333
328 spin_lock(&gc->lock); 334 pblk_gc_sysfs_force(pblk, force);
329 pblk_rl_set_gc_rsc(&pblk->rl, value);
330 spin_unlock(&gc->lock);
331 335
332 return len; 336 return len;
333} 337}
334 338
335static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, 339static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
336 size_t len) 340 const char *page, size_t len)
337{ 341{
338 size_t c_len; 342 size_t c_len;
339 int force; 343 int sec_per_write;
340 344
341 c_len = strcspn(page, "\n"); 345 c_len = strcspn(page, "\n");
342 if (c_len >= len) 346 if (c_len >= len)
343 return -EINVAL; 347 return -EINVAL;
344 348
345 if (kstrtouint(page, 0, &force)) 349 if (kstrtouint(page, 0, &sec_per_write))
346 return -EINVAL; 350 return -EINVAL;
347 351
348 if (force < 0 || force > 1) 352 if (sec_per_write < pblk->min_write_pgs
353 || sec_per_write > pblk->max_write_pgs
354 || sec_per_write % pblk->min_write_pgs != 0)
349 return -EINVAL; 355 return -EINVAL;
350 356
351 pblk_gc_sysfs_force(pblk, force); 357 pblk_set_sec_per_write(pblk, sec_per_write);
352 358
353 return len; 359 return len;
354} 360}
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
398 .mode = 0200, 404 .mode = 0200,
399}; 405};
400 406
401static struct attribute sys_gc_rl_max = { 407static struct attribute sys_max_sec_per_write = {
402 .name = "gc_rl_max", 408 .name = "max_sec_per_write",
403 .mode = 0200, 409 .mode = 0644,
404}; 410};
405 411
406#ifdef CONFIG_NVM_DEBUG 412#ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
416 &sys_errors_attr, 422 &sys_errors_attr,
417 &sys_gc_state, 423 &sys_gc_state,
418 &sys_gc_force, 424 &sys_gc_force,
419 &sys_gc_rl_max, 425 &sys_max_sec_per_write,
420 &sys_rb_attr, 426 &sys_rb_attr,
421 &sys_stats_ppaf_attr, 427 &sys_stats_ppaf_attr,
422 &sys_lines_attr, 428 &sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
448 return pblk_sysfs_lines(pblk, buf); 454 return pblk_sysfs_lines(pblk, buf);
449 else if (strcmp(attr->name, "lines_info") == 0) 455 else if (strcmp(attr->name, "lines_info") == 0)
450 return pblk_sysfs_lines_info(pblk, buf); 456 return pblk_sysfs_lines_info(pblk, buf);
457 else if (strcmp(attr->name, "max_sec_per_write") == 0)
458 return pblk_sysfs_get_sec_per_write(pblk, buf);
451#ifdef CONFIG_NVM_DEBUG 459#ifdef CONFIG_NVM_DEBUG
452 else if (strcmp(attr->name, "stats") == 0) 460 else if (strcmp(attr->name, "stats") == 0)
453 return pblk_sysfs_stats_debug(pblk, buf); 461 return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
460{ 468{
461 struct pblk *pblk = container_of(kobj, struct pblk, kobj); 469 struct pblk *pblk = container_of(kobj, struct pblk, kobj);
462 470
463 if (strcmp(attr->name, "gc_rl_max") == 0) 471 if (strcmp(attr->name, "gc_force") == 0)
464 return pblk_sysfs_rate_store(pblk, buf, len);
465 else if (strcmp(attr->name, "gc_force") == 0)
466 return pblk_sysfs_gc_force(pblk, buf, len); 472 return pblk_sysfs_gc_force(pblk, buf, len);
473 else if (strcmp(attr->name, "max_sec_per_write") == 0)
474 return pblk_sysfs_set_sec_per_write(pblk, buf, len);
467 475
468 return 0; 476 return 0;
469} 477}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..d62a8f4faaf4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
17 17
18#include "pblk.h" 18#include "pblk.h"
19 19
20static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
21{
22#ifdef CONFIG_NVM_DEBUG
23 atomic_long_inc(&pblk->sync_writes);
24#endif
25
26 /* Counter protected by rb sync lock */
27 line->left_ssecs--;
28 if (!line->left_ssecs)
29 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
30}
31
32static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, 20static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
33 struct pblk_c_ctx *c_ctx) 21 struct pblk_c_ctx *c_ctx)
34{ 22{
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
39 27
40 for (i = 0; i < c_ctx->nr_valid; i++) { 28 for (i = 0; i < c_ctx->nr_valid; i++) {
41 struct pblk_w_ctx *w_ctx; 29 struct pblk_w_ctx *w_ctx;
42 struct ppa_addr p;
43 struct pblk_line *line;
44 30
45 w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i); 31 w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
46
47 p = rqd->ppa_list[i];
48 line = &pblk->lines[pblk_dev_ppa_to_line(p)];
49 pblk_sync_line(pblk, line);
50
51 while ((original_bio = bio_list_pop(&w_ctx->bios))) 32 while ((original_bio = bio_list_pop(&w_ctx->bios)))
52 bio_endio(original_bio); 33 bio_endio(original_bio);
53 } 34 }
54 35
55#ifdef CONFIG_NVM_DEBUG 36#ifdef CONFIG_NVM_DEBUG
56 atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes); 37 atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
57#endif 38#endif
58 39
59 ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); 40 ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
169 } 150 }
170 151
171 INIT_WORK(&recovery->ws_rec, pblk_submit_rec); 152 INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
172 queue_work(pblk->kw_wq, &recovery->ws_rec); 153 queue_work(pblk->close_wq, &recovery->ws_rec);
173 154
174out: 155out:
175 pblk_complete_write(pblk, rqd, c_ctx); 156 pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
186 } 167 }
187#ifdef CONFIG_NVM_DEBUG 168#ifdef CONFIG_NVM_DEBUG
188 else 169 else
189 WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n"); 170 WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
190#endif 171#endif
191 172
192 pblk_complete_write(pblk, rqd, c_ctx); 173 pblk_complete_write(pblk, rqd, c_ctx);
174 atomic_dec(&pblk->inflight_io);
175}
176
177static void pblk_end_io_write_meta(struct nvm_rq *rqd)
178{
179 struct pblk *pblk = rqd->private;
180 struct nvm_tgt_dev *dev = pblk->dev;
181 struct nvm_geo *geo = &dev->geo;
182 struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
183 struct pblk_line *line = m_ctx->private;
184 struct pblk_emeta *emeta = line->emeta;
185 int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
186 struct pblk_lun *rlun = &pblk->luns[pos];
187 int sync;
188
189 up(&rlun->wr_sem);
190
191 if (rqd->error) {
192 pblk_log_write_err(pblk, rqd);
193 pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
194 }
195#ifdef CONFIG_NVM_DEBUG
196 else
197 WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
198#endif
199
200 sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
201 if (sync == emeta->nr_entries)
202 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
203 pblk->close_wq);
204
205 bio_put(rqd->bio);
206 pblk_free_rqd(pblk, rqd, READ);
207
208 atomic_dec(&pblk->inflight_io);
193} 209}
194 210
195static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, 211static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
196 unsigned int nr_secs) 212 unsigned int nr_secs,
213 nvm_end_io_fn(*end_io))
197{ 214{
198 struct nvm_tgt_dev *dev = pblk->dev; 215 struct nvm_tgt_dev *dev = pblk->dev;
199 216
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
202 rqd->nr_ppas = nr_secs; 219 rqd->nr_ppas = nr_secs;
203 rqd->flags = pblk_set_progr_mode(pblk, WRITE); 220 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
204 rqd->private = pblk; 221 rqd->private = pblk;
205 rqd->end_io = pblk_end_io_write; 222 rqd->end_io = end_io;
206 223
207 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 224 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
208 &rqd->dma_meta_list); 225 &rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
219} 236}
220 237
221static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, 238static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
222 struct pblk_c_ctx *c_ctx) 239 struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
223{ 240{
224 struct pblk_line_meta *lm = &pblk->lm; 241 struct pblk_line_meta *lm = &pblk->lm;
225 struct pblk_line *e_line = pblk_line_get_data_next(pblk); 242 struct pblk_line *e_line = pblk_line_get_erase(pblk);
226 struct ppa_addr erase_ppa;
227 unsigned int valid = c_ctx->nr_valid; 243 unsigned int valid = c_ctx->nr_valid;
228 unsigned int padded = c_ctx->nr_padded; 244 unsigned int padded = c_ctx->nr_padded;
229 unsigned int nr_secs = valid + padded; 245 unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
231 int ret = 0; 247 int ret = 0;
232 248
233 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); 249 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
234 if (!lun_bitmap) { 250 if (!lun_bitmap)
235 ret = -ENOMEM; 251 return -ENOMEM;
236 goto out;
237 }
238 c_ctx->lun_bitmap = lun_bitmap; 252 c_ctx->lun_bitmap = lun_bitmap;
239 253
240 ret = pblk_alloc_w_rq(pblk, rqd, nr_secs); 254 ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
241 if (ret) { 255 if (ret) {
242 kfree(lun_bitmap); 256 kfree(lun_bitmap);
243 goto out; 257 return ret;
244 } 258 }
245 259
246 ppa_set_empty(&erase_ppa);
247 if (likely(!e_line || !atomic_read(&e_line->left_eblks))) 260 if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
248 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); 261 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
249 else 262 else
250 pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, 263 pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
251 valid, &erase_ppa); 264 valid, erase_ppa);
252
253out:
254 if (unlikely(e_line && !ppa_empty(erase_ppa))) {
255 if (pblk_blk_erase_async(pblk, erase_ppa)) {
256 struct nvm_tgt_dev *dev = pblk->dev;
257 struct nvm_geo *geo = &dev->geo;
258 int bit;
259
260 atomic_inc(&e_line->left_eblks);
261 bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
262 WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
263 up(&pblk->erase_sem);
264 }
265 }
266 265
267 return ret; 266 return 0;
268} 267}
269 268
270int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, 269int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
280 279
281 c_ctx->lun_bitmap = lun_bitmap; 280 c_ctx->lun_bitmap = lun_bitmap;
282 281
283 ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas); 282 ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
284 if (ret) 283 if (ret)
285 return ret; 284 return ret;
286 285
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
311 return secs_to_sync; 310 return secs_to_sync;
312} 311}
313 312
313static inline int pblk_valid_meta_ppa(struct pblk *pblk,
314 struct pblk_line *meta_line,
315 struct ppa_addr *ppa_list, int nr_ppas)
316{
317 struct nvm_tgt_dev *dev = pblk->dev;
318 struct nvm_geo *geo = &dev->geo;
319 struct pblk_line *data_line;
320 struct ppa_addr ppa, ppa_opt;
321 u64 paddr;
322 int i;
323
324 data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
325 paddr = pblk_lookup_page(pblk, meta_line);
326 ppa = addr_to_gen_ppa(pblk, paddr, 0);
327
328 if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
329 return 1;
330
331 /* Schedule a metadata I/O that is half the distance from the data I/O
332 * with regards to the number of LUNs forming the pblk instance. This
333 * balances LUN conflicts across every I/O.
334 *
335 * When the LUN configuration changes (e.g., due to GC), this distance
336 * can align, which would result on a LUN deadlock. In this case, modify
337 * the distance to not be optimal, but allow metadata I/Os to succeed.
338 */
339 ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
340 if (unlikely(ppa_opt.ppa == ppa.ppa)) {
341 data_line->meta_distance--;
342 return 0;
343 }
344
345 for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
346 if (ppa_list[i].g.ch == ppa_opt.g.ch &&
347 ppa_list[i].g.lun == ppa_opt.g.lun)
348 return 1;
349
350 if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
351 for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
352 if (ppa_list[i].g.ch == ppa.g.ch &&
353 ppa_list[i].g.lun == ppa.g.lun)
354 return 0;
355
356 return 1;
357 }
358
359 return 0;
360}
361
362int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
363{
364 struct nvm_tgt_dev *dev = pblk->dev;
365 struct nvm_geo *geo = &dev->geo;
366 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
367 struct pblk_line_meta *lm = &pblk->lm;
368 struct pblk_emeta *emeta = meta_line->emeta;
369 struct pblk_g_ctx *m_ctx;
370 struct pblk_lun *rlun;
371 struct bio *bio;
372 struct nvm_rq *rqd;
373 void *data;
374 u64 paddr;
375 int rq_ppas = pblk->min_write_pgs;
376 int id = meta_line->id;
377 int rq_len;
378 int i, j;
379 int ret;
380
381 rqd = pblk_alloc_rqd(pblk, READ);
382 if (IS_ERR(rqd)) {
383 pr_err("pblk: cannot allocate write req.\n");
384 return PTR_ERR(rqd);
385 }
386 m_ctx = nvm_rq_to_pdu(rqd);
387 m_ctx->private = meta_line;
388
389 rq_len = rq_ppas * geo->sec_size;
390 data = ((void *)emeta->buf) + emeta->mem;
391
392 bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
393 l_mg->emeta_alloc_type, GFP_KERNEL);
394 if (IS_ERR(bio)) {
395 ret = PTR_ERR(bio);
396 goto fail_free_rqd;
397 }
398 bio->bi_iter.bi_sector = 0; /* internal bio */
399 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
400 rqd->bio = bio;
401
402 ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
403 if (ret)
404 goto fail_free_bio;
405
406 for (i = 0; i < rqd->nr_ppas; ) {
407 spin_lock(&meta_line->lock);
408 paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
409 spin_unlock(&meta_line->lock);
410 for (j = 0; j < rq_ppas; j++, i++, paddr++)
411 rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
412 }
413
414 rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
415 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
416 if (ret) {
417 pr_err("pblk: lun semaphore timed out (%d)\n", ret);
418 goto fail_free_bio;
419 }
420
421 emeta->mem += rq_len;
422 if (emeta->mem >= lm->emeta_len[0]) {
423 spin_lock(&l_mg->close_lock);
424 list_del(&meta_line->list);
425 WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
426 "pblk: corrupt meta line %d\n", meta_line->id);
427 spin_unlock(&l_mg->close_lock);
428 }
429
430 ret = pblk_submit_io(pblk, rqd);
431 if (ret) {
432 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
433 goto fail_rollback;
434 }
435
436 return NVM_IO_OK;
437
438fail_rollback:
439 spin_lock(&l_mg->close_lock);
440 pblk_dealloc_page(pblk, meta_line, rq_ppas);
441 list_add(&meta_line->list, &meta_line->list);
442 spin_unlock(&l_mg->close_lock);
443fail_free_bio:
444 if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
445 bio_put(bio);
446fail_free_rqd:
447 pblk_free_rqd(pblk, rqd, READ);
448 return ret;
449}
450
451static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
452 int prev_n)
453{
454 struct pblk_line_meta *lm = &pblk->lm;
455 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
456 struct pblk_line *meta_line;
457
458 spin_lock(&l_mg->close_lock);
459retry:
460 if (list_empty(&l_mg->emeta_list)) {
461 spin_unlock(&l_mg->close_lock);
462 return 0;
463 }
464 meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
465 if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
466 goto retry;
467 spin_unlock(&l_mg->close_lock);
468
469 if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
470 return 0;
471
472 return pblk_submit_meta_io(pblk, meta_line);
473}
474
475static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
476{
477 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
478 struct ppa_addr erase_ppa;
479 int err;
480
481 ppa_set_empty(&erase_ppa);
482
483 /* Assign lbas to ppas and populate request structure */
484 err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
485 if (err) {
486 pr_err("pblk: could not setup write request: %d\n", err);
487 return NVM_IO_ERR;
488 }
489
490 if (likely(ppa_empty(erase_ppa))) {
491 /* Submit metadata write for previous data line */
492 err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
493 if (err) {
494 pr_err("pblk: metadata I/O submission failed: %d", err);
495 return NVM_IO_ERR;
496 }
497
498 /* Submit data write for current data line */
499 err = pblk_submit_io(pblk, rqd);
500 if (err) {
501 pr_err("pblk: data I/O submission failed: %d\n", err);
502 return NVM_IO_ERR;
503 }
504 } else {
505 /* Submit data write for current data line */
506 err = pblk_submit_io(pblk, rqd);
507 if (err) {
508 pr_err("pblk: data I/O submission failed: %d\n", err);
509 return NVM_IO_ERR;
510 }
511
512 /* Submit available erase for next data line */
513 if (pblk_blk_erase_async(pblk, erase_ppa)) {
514 struct pblk_line *e_line = pblk_line_get_erase(pblk);
515 struct nvm_tgt_dev *dev = pblk->dev;
516 struct nvm_geo *geo = &dev->geo;
517 int bit;
518
519 atomic_inc(&e_line->left_eblks);
520 bit = pblk_ppa_to_pos(geo, erase_ppa);
521 WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
522 }
523 }
524
525 return NVM_IO_OK;
526}
527
528static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
529{
530 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
531 struct bio *bio = rqd->bio;
532
533 if (c_ctx->nr_padded)
534 pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
535}
536
314static int pblk_submit_write(struct pblk *pblk) 537static int pblk_submit_write(struct pblk *pblk)
315{ 538{
316 struct bio *bio; 539 struct bio *bio;
317 struct nvm_rq *rqd; 540 struct nvm_rq *rqd;
318 struct pblk_c_ctx *c_ctx;
319 unsigned int pgs_read;
320 unsigned int secs_avail, secs_to_sync, secs_to_com; 541 unsigned int secs_avail, secs_to_sync, secs_to_com;
321 unsigned int secs_to_flush; 542 unsigned int secs_to_flush;
322 unsigned long pos; 543 unsigned long pos;
323 int err;
324 544
325 /* If there are no sectors in the cache, flushes (bios without data) 545 /* If there are no sectors in the cache, flushes (bios without data)
326 * will be cleared on the cache threads 546 * will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
338 pr_err("pblk: cannot allocate write req.\n"); 558 pr_err("pblk: cannot allocate write req.\n");
339 return 1; 559 return 1;
340 } 560 }
341 c_ctx = nvm_rq_to_pdu(rqd);
342 561
343 bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); 562 bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
344 if (!bio) { 563 if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
358 secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; 577 secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
359 pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); 578 pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
360 579
361 pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos, 580 if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
362 secs_to_sync, secs_avail); 581 secs_avail)) {
363 if (!pgs_read) {
364 pr_err("pblk: corrupted write bio\n"); 582 pr_err("pblk: corrupted write bio\n");
365 goto fail_put_bio; 583 goto fail_put_bio;
366 } 584 }
367 585
368 if (c_ctx->nr_padded) 586 if (pblk_submit_io_set(pblk, rqd))
369 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
370 goto fail_put_bio;
371
372 /* Assign lbas to ppas and populate request structure */
373 err = pblk_setup_w_rq(pblk, rqd, c_ctx);
374 if (err) {
375 pr_err("pblk: could not setup write request\n");
376 goto fail_free_bio;
377 }
378
379 err = pblk_submit_io(pblk, rqd);
380 if (err) {
381 pr_err("pblk: I/O submission failed: %d\n", err);
382 goto fail_free_bio; 587 goto fail_free_bio;
383 }
384 588
385#ifdef CONFIG_NVM_DEBUG 589#ifdef CONFIG_NVM_DEBUG
386 atomic_long_add(secs_to_sync, &pblk->sub_writes); 590 atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
389 return 0; 593 return 0;
390 594
391fail_free_bio: 595fail_free_bio:
392 if (c_ctx->nr_padded) 596 pblk_free_write_rqd(pblk, rqd);
393 pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
394fail_put_bio: 597fail_put_bio:
395 bio_put(bio); 598 bio_put(bio);
396fail_free_rqd: 599fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186b5288..15931381348c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
40#define PBLK_MAX_REQ_ADDRS (64) 40#define PBLK_MAX_REQ_ADDRS (64)
41#define PBLK_MAX_REQ_ADDRS_PW (6) 41#define PBLK_MAX_REQ_ADDRS_PW (6)
42 42
43#define PBLK_WS_POOL_SIZE (128)
44#define PBLK_META_POOL_SIZE (128)
45#define PBLK_READ_REQ_POOL_SIZE (1024)
46
47#define PBLK_NR_CLOSE_JOBS (4)
48
43#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) 49#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
44 50
45#define PBLK_COMMAND_TIMEOUT_MS 30000 51#define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
72 PBLK_BLK_ST_CLOSED = 0x2, 78 PBLK_BLK_ST_CLOSED = 0x2,
73}; 79};
74 80
81struct pblk_sec_meta {
82 u64 reserved;
83 __le64 lba;
84};
85
75/* The number of GC lists and the rate-limiter states go together. This way the 86/* The number of GC lists and the rate-limiter states go together. This way the
76 * rate-limiter can dictate how much GC is needed based on resource utilization. 87 * rate-limiter can dictate how much GC is needed based on resource utilization.
77 */ 88 */
78#define PBLK_NR_GC_LISTS 3 89#define PBLK_GC_NR_LISTS 3
79#define PBLK_MAX_GC_JOBS 32
80 90
81enum { 91enum {
82 PBLK_RL_HIGH = 1, 92 PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
84 PBLK_RL_LOW = 3, 94 PBLK_RL_LOW = 3,
85}; 95};
86 96
87struct pblk_sec_meta {
88 u64 reserved;
89 __le64 lba;
90};
91
92#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) 97#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
93 98
94/* write completion context */ 99/* write buffer completion context */
95struct pblk_c_ctx { 100struct pblk_c_ctx {
96 struct list_head list; /* Head for out-of-order completion */ 101 struct list_head list; /* Head for out-of-order completion */
97 102
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
101 unsigned int nr_padded; 106 unsigned int nr_padded;
102}; 107};
103 108
104/* Read context */ 109/* generic context */
105struct pblk_r_ctx { 110struct pblk_g_ctx {
106 struct bio *orig_bio; 111 void *private;
112};
113
114/* Pad context */
115struct pblk_pad_rq {
116 struct pblk *pblk;
117 struct completion wait;
118 struct kref ref;
107}; 119};
108 120
109/* Recovery context */ 121/* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
195struct pblk_gc_rq { 207struct pblk_gc_rq {
196 struct pblk_line *line; 208 struct pblk_line *line;
197 void *data; 209 void *data;
198 u64 *lba_list; 210 u64 lba_list[PBLK_MAX_REQ_ADDRS];
199 int nr_secs; 211 int nr_secs;
200 int secs_to_gc; 212 int secs_to_gc;
201 struct list_head list; 213 struct list_head list;
202}; 214};
203 215
204struct pblk_gc { 216struct pblk_gc {
217 /* These states are not protected by a lock since (i) they are in the
218 * fast path, and (ii) they are not critical.
219 */
205 int gc_active; 220 int gc_active;
206 int gc_enabled; 221 int gc_enabled;
207 int gc_forced; 222 int gc_forced;
208 int gc_jobs_active;
209 atomic_t inflight_gc;
210 223
211 struct task_struct *gc_ts; 224 struct task_struct *gc_ts;
212 struct task_struct *gc_writer_ts; 225 struct task_struct *gc_writer_ts;
226 struct task_struct *gc_reader_ts;
227
228 struct workqueue_struct *gc_line_reader_wq;
213 struct workqueue_struct *gc_reader_wq; 229 struct workqueue_struct *gc_reader_wq;
230
214 struct timer_list gc_timer; 231 struct timer_list gc_timer;
215 232
233 struct semaphore gc_sem;
234 atomic_t inflight_gc;
216 int w_entries; 235 int w_entries;
236
217 struct list_head w_list; 237 struct list_head w_list;
238 struct list_head r_list;
218 239
219 spinlock_t lock; 240 spinlock_t lock;
220 spinlock_t w_lock; 241 spinlock_t w_lock;
242 spinlock_t r_lock;
221}; 243};
222 244
223struct pblk_rl { 245struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
229 */ 251 */
230 unsigned int high_pw; /* High rounded up as a power of 2 */ 252 unsigned int high_pw; /* High rounded up as a power of 2 */
231 253
232#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent 254#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
233 * available blks 255#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
234 */
235#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
236 256
237 int rb_windows_pw; /* Number of rate windows in the write buffer 257 int rb_windows_pw; /* Number of rate windows in the write buffer
238 * given as a power-of-2. This guarantees that 258 * given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
244 */ 264 */
245 int rb_budget; /* Total number of entries available for I/O */ 265 int rb_budget; /* Total number of entries available for I/O */
246 int rb_user_max; /* Max buffer entries available for user I/O */ 266 int rb_user_max; /* Max buffer entries available for user I/O */
247 atomic_t rb_user_cnt; /* User I/O buffer counter */
248 int rb_gc_max; /* Max buffer entries available for GC I/O */ 267 int rb_gc_max; /* Max buffer entries available for GC I/O */
249 int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ 268 int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
250 int rb_state; /* Rate-limiter current state */ 269 int rb_state; /* Rate-limiter current state */
270
271 atomic_t rb_user_cnt; /* User I/O buffer counter */
251 atomic_t rb_gc_cnt; /* GC I/O buffer counter */ 272 atomic_t rb_gc_cnt; /* GC I/O buffer counter */
273 atomic_t rb_space; /* Space limit in case of reaching capacity */
274
275 int rsv_blocks; /* Reserved blocks for GC */
252 276
253 int rb_user_active; 277 int rb_user_active;
278 int rb_gc_active;
279
254 struct timer_list u_timer; 280 struct timer_list u_timer;
255 281
256 unsigned long long nr_secs; 282 unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
258 atomic_t free_blocks; 284 atomic_t free_blocks;
259}; 285};
260 286
261#define PBLK_LINE_NR_LUN_BITMAP 2
262#define PBLK_LINE_NR_SEC_BITMAP 2
263#define PBLK_LINE_EMPTY (~0U) 287#define PBLK_LINE_EMPTY (~0U)
264 288
265enum { 289enum {
@@ -310,16 +334,19 @@ struct line_smeta {
310 __le32 window_wr_lun; /* Number of parallel LUNs to write */ 334 __le32 window_wr_lun; /* Number of parallel LUNs to write */
311 335
312 __le32 rsvd[2]; 336 __le32 rsvd[2];
337
338 __le64 lun_bitmap[];
313}; 339};
314 340
315/* 341/*
316 * Metadata Layout: 342 * Metadata layout in media:
317 * 1. struct pblk_emeta 343 * First sector:
318 * 2. nr_lbas u64 forming lba list 344 * 1. struct line_emeta
319 * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line) 345 * 2. bad block bitmap (u64 * window_wr_lun)
320 * 4. nr_luns bits (u64 format) forming line bad block bitmap 346 * Mid sectors (start at lbas_sector):
321 * 347 * 3. nr_lbas (u64) forming lba list
322 * 3. and 4. will be part of FTL log 348 * Last sectors (start at vsc_sector):
349 * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
323 */ 350 */
324struct line_emeta { 351struct line_emeta {
325 struct line_header header; 352 struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
339 __le32 next_id; /* Line id for next line */ 366 __le32 next_id; /* Line id for next line */
340 __le64 nr_lbas; /* Number of lbas mapped in line */ 367 __le64 nr_lbas; /* Number of lbas mapped in line */
341 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ 368 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
369 __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
370};
371
372struct pblk_emeta {
373 struct line_emeta *buf; /* emeta buffer in media format */
374 int mem; /* Write offset - points to next
375 * writable entry in memory
376 */
377 atomic_t sync; /* Synced - backpointer that signals the
378 * last entry that has been successfully
379 * persisted to media
380 */
381 unsigned int nr_entries; /* Number of emeta entries */
382};
383
384struct pblk_smeta {
385 struct line_smeta *buf; /* smeta buffer in persistent format */
342}; 386};
343 387
344struct pblk_line { 388struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
355 399
356 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ 400 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
357 401
358 struct line_smeta *smeta; /* Start metadata */ 402 struct pblk_smeta *smeta; /* Start metadata */
359 struct line_emeta *emeta; /* End metadata */ 403 struct pblk_emeta *emeta; /* End medatada */
404
360 int meta_line; /* Metadata line id */ 405 int meta_line; /* Metadata line id */
406 int meta_distance; /* Distance between data and metadata */
407
361 u64 smeta_ssec; /* Sector where smeta starts */ 408 u64 smeta_ssec; /* Sector where smeta starts */
362 u64 emeta_ssec; /* Sector where emeta starts */ 409 u64 emeta_ssec; /* Sector where emeta starts */
363 410
@@ -374,9 +421,10 @@ struct pblk_line {
374 atomic_t left_seblks; /* Blocks left for sync erasing */ 421 atomic_t left_seblks; /* Blocks left for sync erasing */
375 422
376 int left_msecs; /* Sectors left for mapping */ 423 int left_msecs; /* Sectors left for mapping */
377 int left_ssecs; /* Sectors left to sync */
378 unsigned int cur_sec; /* Sector map pointer */ 424 unsigned int cur_sec; /* Sector map pointer */
379 unsigned int vsc; /* Valid sector count in line */ 425 unsigned int nr_valid_lbas; /* Number of valid lbas in line */
426
427 __le32 *vsc; /* Valid sector count in line */
380 428
381 struct kref ref; /* Write buffer L2P references */ 429 struct kref ref; /* Write buffer L2P references */
382 430
@@ -385,13 +433,15 @@ struct pblk_line {
385 433
386#define PBLK_DATA_LINES 4 434#define PBLK_DATA_LINES 4
387 435
388enum{ 436enum {
389 PBLK_KMALLOC_META = 1, 437 PBLK_KMALLOC_META = 1,
390 PBLK_VMALLOC_META = 2, 438 PBLK_VMALLOC_META = 2,
391}; 439};
392 440
393struct pblk_line_metadata { 441enum {
394 void *meta; 442 PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
443 PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
444 PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
395}; 445};
396 446
397struct pblk_line_mgmt { 447struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
404 struct list_head bad_list; /* Full lines bad */ 454 struct list_head bad_list; /* Full lines bad */
405 455
406 /* GC lists - use gc_lock */ 456 /* GC lists - use gc_lock */
407 struct list_head *gc_lists[PBLK_NR_GC_LISTS]; 457 struct list_head *gc_lists[PBLK_GC_NR_LISTS];
408 struct list_head gc_high_list; /* Full lines ready to GC, high isc */ 458 struct list_head gc_high_list; /* Full lines ready to GC, high isc */
409 struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ 459 struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
410 struct list_head gc_low_list; /* Full lines ready to GC, low isc */ 460 struct list_head gc_low_list; /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
417 struct pblk_line *log_next; /* Next FTL log line */ 467 struct pblk_line *log_next; /* Next FTL log line */
418 struct pblk_line *data_next; /* Next data line */ 468 struct pblk_line *data_next; /* Next data line */
419 469
470 struct list_head emeta_list; /* Lines queued to schedule emeta */
471
472 __le32 *vsc_list; /* Valid sector counts for all lines */
473
420 /* Metadata allocation type: VMALLOC | KMALLOC */ 474 /* Metadata allocation type: VMALLOC | KMALLOC */
421 int smeta_alloc_type;
422 int emeta_alloc_type; 475 int emeta_alloc_type;
423 476
424 /* Pre-allocated metadata for data lines */ 477 /* Pre-allocated metadata for data lines */
425 struct pblk_line_metadata sline_meta[PBLK_DATA_LINES]; 478 struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
426 struct pblk_line_metadata eline_meta[PBLK_DATA_LINES]; 479 struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
427 unsigned long meta_bitmap; 480 unsigned long meta_bitmap;
428 481
429 /* Helpers for fast bitmap calculations */ 482 /* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
434 unsigned long l_seq_nr; /* Log line unique sequence number */ 487 unsigned long l_seq_nr; /* Log line unique sequence number */
435 488
436 spinlock_t free_lock; 489 spinlock_t free_lock;
490 spinlock_t close_lock;
437 spinlock_t gc_lock; 491 spinlock_t gc_lock;
438}; 492};
439 493
440struct pblk_line_meta { 494struct pblk_line_meta {
441 unsigned int smeta_len; /* Total length for smeta */ 495 unsigned int smeta_len; /* Total length for smeta */
442 unsigned int smeta_sec; /* Sectors needed for smeta*/ 496 unsigned int smeta_sec; /* Sectors needed for smeta */
443 unsigned int emeta_len; /* Total length for emeta */ 497
444 unsigned int emeta_sec; /* Sectors needed for emeta*/ 498 unsigned int emeta_len[4]; /* Lengths for emeta:
499 * [0]: Total length
500 * [1]: struct line_emeta length
501 * [2]: L2P portion length
502 * [3]: vsc list length
503 */
504 unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
505 * as emeta_len
506 */
507
445 unsigned int emeta_bb; /* Boundary for bb that affects emeta */ 508 unsigned int emeta_bb; /* Boundary for bb that affects emeta */
509
510 unsigned int vsc_list_len; /* Length for vsc list */
446 unsigned int sec_bitmap_len; /* Length for sector bitmap in line */ 511 unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
447 unsigned int blk_bitmap_len; /* Length for block bitmap in line */ 512 unsigned int blk_bitmap_len; /* Length for block bitmap in line */
448 unsigned int lun_bitmap_len; /* Length for lun bitmap in line */ 513 unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
449 514
450 unsigned int blk_per_line; /* Number of blocks in a full line */ 515 unsigned int blk_per_line; /* Number of blocks in a full line */
451 unsigned int sec_per_line; /* Number of sectors in a line */ 516 unsigned int sec_per_line; /* Number of sectors in a line */
517 unsigned int dsec_per_line; /* Number of data sectors in a line */
452 unsigned int min_blk_line; /* Min. number of good blocks in line */ 518 unsigned int min_blk_line; /* Min. number of good blocks in line */
453 519
454 unsigned int mid_thrs; /* Threshold for GC mid list */ 520 unsigned int mid_thrs; /* Threshold for GC mid list */
455 unsigned int high_thrs; /* Threshold for GC high list */ 521 unsigned int high_thrs; /* Threshold for GC high list */
522
523 unsigned int meta_distance; /* Distance between data and metadata */
456}; 524};
457 525
458struct pblk_addr_format { 526struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
470 u8 sec_offset; 538 u8 sec_offset;
471}; 539};
472 540
541enum {
542 PBLK_STATE_RUNNING = 0,
543 PBLK_STATE_STOPPING = 1,
544 PBLK_STATE_RECOVERING = 2,
545 PBLK_STATE_STOPPED = 3,
546};
547
473struct pblk { 548struct pblk {
474 struct nvm_tgt_dev *dev; 549 struct nvm_tgt_dev *dev;
475 struct gendisk *disk; 550 struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
487 562
488 struct pblk_rb rwb; 563 struct pblk_rb rwb;
489 564
565 int state; /* pblk line state */
566
490 int min_write_pgs; /* Minimum amount of pages required by controller */ 567 int min_write_pgs; /* Minimum amount of pages required by controller */
491 int max_write_pgs; /* Maximum amount of pages supported by controller */ 568 int max_write_pgs; /* Maximum amount of pages supported by controller */
492 int pgs_in_buffer; /* Number of pages that need to be held in buffer to 569 int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
499 /* pblk provisioning values. Used by rate limiter */ 576 /* pblk provisioning values. Used by rate limiter */
500 struct pblk_rl rl; 577 struct pblk_rl rl;
501 578
502 struct semaphore erase_sem; 579 int sec_per_write;
503 580
504 unsigned char instance_uuid[16]; 581 unsigned char instance_uuid[16];
505#ifdef CONFIG_NVM_DEBUG 582#ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
511 atomic_long_t req_writes; /* Sectors stored on write buffer */ 588 atomic_long_t req_writes; /* Sectors stored on write buffer */
512 atomic_long_t sub_writes; /* Sectors submitted from buffer */ 589 atomic_long_t sub_writes; /* Sectors submitted from buffer */
513 atomic_long_t sync_writes; /* Sectors synced to media */ 590 atomic_long_t sync_writes; /* Sectors synced to media */
514 atomic_long_t compl_writes; /* Sectors completed in write bio */
515 atomic_long_t inflight_reads; /* Inflight sector read requests */ 591 atomic_long_t inflight_reads; /* Inflight sector read requests */
592 atomic_long_t cache_reads; /* Read requests that hit the cache */
516 atomic_long_t sync_reads; /* Completed sector read requests */ 593 atomic_long_t sync_reads; /* Completed sector read requests */
517 atomic_long_t recov_writes; /* Sectors submitted from recovery */ 594 atomic_long_t recov_writes; /* Sectors submitted from recovery */
518 atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */ 595 atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
528 atomic_long_t write_failed; 605 atomic_long_t write_failed;
529 atomic_long_t erase_failed; 606 atomic_long_t erase_failed;
530 607
608 atomic_t inflight_io; /* General inflight I/O counter */
609
531 struct task_struct *writer_ts; 610 struct task_struct *writer_ts;
532 611
533 /* Simple translation map of logical addresses to physical addresses. 612 /* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
542 mempool_t *page_pool; 621 mempool_t *page_pool;
543 mempool_t *line_ws_pool; 622 mempool_t *line_ws_pool;
544 mempool_t *rec_pool; 623 mempool_t *rec_pool;
545 mempool_t *r_rq_pool; 624 mempool_t *g_rq_pool;
546 mempool_t *w_rq_pool; 625 mempool_t *w_rq_pool;
547 mempool_t *line_meta_pool; 626 mempool_t *line_meta_pool;
548 627
549 struct workqueue_struct *kw_wq; 628 struct workqueue_struct *close_wq;
629 struct workqueue_struct *bb_wq;
630
550 struct timer_list wtimer; 631 struct timer_list wtimer;
551 632
552 struct pblk_gc gc; 633 struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
559 struct work_struct ws; 640 struct work_struct ws;
560}; 641};
561 642
562#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx)) 643#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
563#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx)) 644#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
564 645
565/* 646/*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
579 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, 660 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
580 unsigned int pos); 661 unsigned int pos);
581struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); 662struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
663void pblk_rb_flush(struct pblk_rb *rb);
582 664
583void pblk_rb_sync_l2p(struct pblk_rb *rb); 665void pblk_rb_sync_l2p(struct pblk_rb *rb);
584unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, 666unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
585 struct pblk_c_ctx *c_ctx, 667 struct bio *bio, unsigned int pos,
586 unsigned int pos, 668 unsigned int nr_entries, unsigned int count);
587 unsigned int nr_entries,
588 unsigned int count);
589unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, 669unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
590 struct list_head *list, 670 struct list_head *list,
591 unsigned int max); 671 unsigned int max);
592int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 672int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
593 u64 pos, int bio_iter); 673 struct ppa_addr ppa, int bio_iter);
594unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); 674unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
595 675
596unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); 676unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
601unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); 681unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
602 682
603unsigned int pblk_rb_read_count(struct pblk_rb *rb); 683unsigned int pblk_rb_read_count(struct pblk_rb *rb);
684unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
604unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); 685unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
605 686
606int pblk_rb_tear_down_check(struct pblk_rb *rb); 687int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
612 * pblk core 693 * pblk core
613 */ 694 */
614struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw); 695struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
696void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
615int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, 697int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
616 struct pblk_c_ctx *c_ctx); 698 struct pblk_c_ctx *c_ctx);
617void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); 699void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
618void pblk_flush_writer(struct pblk *pblk); 700void pblk_wait_for_meta(struct pblk *pblk);
619struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); 701struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
620void pblk_discard(struct pblk *pblk, struct bio *bio); 702void pblk_discard(struct pblk *pblk, struct bio *bio);
621void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); 703void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
622void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); 704void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
623int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); 705int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
706int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
624struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, 707struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
625 unsigned int nr_secs, unsigned int len, 708 unsigned int nr_secs, unsigned int len,
626 gfp_t gfp_mask); 709 int alloc_type, gfp_t gfp_mask);
627struct pblk_line *pblk_line_get(struct pblk *pblk); 710struct pblk_line *pblk_line_get(struct pblk *pblk);
628struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); 711struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
629struct pblk_line *pblk_line_replace_data(struct pblk *pblk); 712void pblk_line_replace_data(struct pblk *pblk);
630int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); 713int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
631void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); 714void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
632struct pblk_line *pblk_line_get_data(struct pblk *pblk); 715struct pblk_line *pblk_line_get_data(struct pblk *pblk);
633struct pblk_line *pblk_line_get_data_next(struct pblk *pblk); 716struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
634int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); 717int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
635int pblk_line_is_full(struct pblk_line *line); 718int pblk_line_is_full(struct pblk_line *line);
636void pblk_line_free(struct pblk *pblk, struct pblk_line *line); 719void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
637void pblk_line_close_ws(struct work_struct *work); 720void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
638void pblk_line_close(struct pblk *pblk, struct pblk_line *line); 721void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
722void pblk_line_close_meta_sync(struct pblk *pblk);
723void pblk_line_close_ws(struct work_struct *work);
724void pblk_pipeline_stop(struct pblk *pblk);
639void pblk_line_mark_bb(struct work_struct *work); 725void pblk_line_mark_bb(struct work_struct *work);
640void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, 726void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
641 void (*work)(struct work_struct *)); 727 void (*work)(struct work_struct *),
728 struct workqueue_struct *wq);
642u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); 729u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
643int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); 730int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
644int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line); 731int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
732 void *emeta_buf);
645int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); 733int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
646void pblk_line_put(struct kref *ref); 734void pblk_line_put(struct kref *ref);
647struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); 735struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
736u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
737void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
648u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); 738u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
739u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
649int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, 740int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
650 unsigned long secs_to_flush); 741 unsigned long secs_to_flush);
651void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, 742void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
656void pblk_end_io_sync(struct nvm_rq *rqd); 747void pblk_end_io_sync(struct nvm_rq *rqd);
657int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, 748int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
658 int nr_pages); 749 int nr_pages);
659void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
660 u64 paddr);
661void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, 750void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
662 int nr_pages); 751 int nr_pages);
663void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa); 752void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
753void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
754 u64 paddr);
664void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa); 755void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
665void pblk_update_map_cache(struct pblk *pblk, sector_t lba, 756void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
666 struct ppa_addr ppa); 757 struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
702/* 793/*
703 * pblk read path 794 * pblk read path
704 */ 795 */
796extern struct bio_set *pblk_bio_set;
705int pblk_submit_read(struct pblk *pblk, struct bio *bio); 797int pblk_submit_read(struct pblk *pblk, struct bio *bio);
706int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, 798int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
707 unsigned int nr_secs, unsigned int *secs_to_gc, 799 unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
711 */ 803 */
712void pblk_submit_rec(struct work_struct *work); 804void pblk_submit_rec(struct work_struct *work);
713struct pblk_line *pblk_recov_l2p(struct pblk *pblk); 805struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
714void pblk_recov_pad(struct pblk *pblk); 806int pblk_recov_pad(struct pblk *pblk);
715__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); 807__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
716int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, 808int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
717 struct pblk_rec_ctx *recovery, u64 *comp_bits, 809 struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
720/* 812/*
721 * pblk gc 813 * pblk gc
722 */ 814 */
723#define PBLK_GC_TRIES 3 815#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
816#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
817#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
818#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
724 819
725int pblk_gc_init(struct pblk *pblk); 820int pblk_gc_init(struct pblk *pblk);
726void pblk_gc_exit(struct pblk *pblk); 821void pblk_gc_exit(struct pblk *pblk);
727void pblk_gc_should_start(struct pblk *pblk); 822void pblk_gc_should_start(struct pblk *pblk);
728void pblk_gc_should_stop(struct pblk *pblk); 823void pblk_gc_should_stop(struct pblk *pblk);
729int pblk_gc_status(struct pblk *pblk); 824void pblk_gc_should_kick(struct pblk *pblk);
825void pblk_gc_kick(struct pblk *pblk);
730void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, 826void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
731 int *gc_active); 827 int *gc_active);
732void pblk_gc_sysfs_force(struct pblk *pblk, int force); 828int pblk_gc_sysfs_force(struct pblk *pblk, int force);
733 829
734/* 830/*
735 * pblk rate limiter 831 * pblk rate limiter
736 */ 832 */
737void pblk_rl_init(struct pblk_rl *rl, int budget); 833void pblk_rl_init(struct pblk_rl *rl, int budget);
738void pblk_rl_free(struct pblk_rl *rl); 834void pblk_rl_free(struct pblk_rl *rl);
739int pblk_rl_gc_thrs(struct pblk_rl *rl); 835int pblk_rl_high_thrs(struct pblk_rl *rl);
836int pblk_rl_low_thrs(struct pblk_rl *rl);
740unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); 837unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
741int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); 838int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
839void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
742void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); 840void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
743int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); 841int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
744void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); 842void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
745void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); 843void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
746void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
747int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); 844int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
748void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); 845void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
749void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); 846void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
847void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
848int pblk_rl_is_limit(struct pblk_rl *rl);
750 849
751/* 850/*
752 * pblk sysfs 851 * pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
774 return c_ctx - sizeof(struct nvm_rq); 873 return c_ctx - sizeof(struct nvm_rq);
775} 874}
776 875
777static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta) 876static inline void *emeta_to_bb(struct line_emeta *emeta)
877{
878 return emeta->bb_bitmap;
879}
880
881static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
882{
883 return ((void *)emeta + pblk->lm.emeta_len[1]);
884}
885
886static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
778{ 887{
779 return (emeta) + 1; 888 return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
889}
890
891static inline int pblk_line_vsc(struct pblk_line *line)
892{
893 int vsc;
894
895 spin_lock(&line->lock);
896 vsc = le32_to_cpu(*line->vsc);
897 spin_unlock(&line->lock);
898
899 return vsc;
780} 900}
781 901
782#define NVM_MEM_PAGE_WRITE (8) 902#define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
917 ppa_addr->ppa = ADDR_EMPTY; 1037 ppa_addr->ppa = ADDR_EMPTY;
918} 1038}
919 1039
1040static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
1041{
1042 if (lppa.ppa == rppa.ppa)
1043 return true;
1044
1045 return false;
1046}
1047
920static inline int pblk_addr_in_cache(struct ppa_addr ppa) 1048static inline int pblk_addr_in_cache(struct ppa_addr ppa)
921{ 1049{
922 return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached); 1050 return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
964} 1092}
965 1093
966static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, 1094static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
967 struct line_smeta *smeta) 1095 struct line_header *header)
968{ 1096{
969 u32 crc = ~(u32)0; 1097 u32 crc = ~(u32)0;
970 1098
971 crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc), 1099 crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
972 sizeof(struct line_header) - sizeof(crc)); 1100 sizeof(struct line_header) - sizeof(crc));
973 1101
974 return crc; 1102 return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
996 1124
997 crc = crc32_le(crc, (unsigned char *)emeta + 1125 crc = crc32_le(crc, (unsigned char *)emeta +
998 sizeof(struct line_header) + sizeof(crc), 1126 sizeof(struct line_header) + sizeof(crc),
999 lm->emeta_len - 1127 lm->emeta_len[0] -
1000 sizeof(struct line_header) - sizeof(crc)); 1128 sizeof(struct line_header) - sizeof(crc));
1001 1129
1002 return crc; 1130 return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
1016 return flags; 1144 return flags;
1017} 1145}
1018 1146
1019static inline int pblk_set_read_mode(struct pblk *pblk) 1147enum {
1148 PBLK_READ_RANDOM = 0,
1149 PBLK_READ_SEQUENTIAL = 1,
1150};
1151
1152static inline int pblk_set_read_mode(struct pblk *pblk, int type)
1153{
1154 struct nvm_tgt_dev *dev = pblk->dev;
1155 struct nvm_geo *geo = &dev->geo;
1156 int flags;
1157
1158 flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
1159 if (type == PBLK_READ_SEQUENTIAL)
1160 flags |= geo->plane_mode >> 1;
1161
1162 return flags;
1163}
1164
1165static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
1020{ 1166{
1021 return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; 1167 return !(nr_secs % pblk->min_write_pgs);
1022} 1168}
1023 1169
1024#ifdef CONFIG_NVM_DEBUG 1170#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
279{ 279{
280 struct completion *waiting = bio->bi_private; 280 struct completion *waiting = bio->bi_private;
281 281
282 if (bio->bi_error) 282 if (bio->bi_status)
283 pr_err("nvm: gc request failed (%u).\n", bio->bi_error); 283 pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
284 284
285 complete(waiting); 285 complete(waiting);
286} 286}
@@ -359,7 +359,7 @@ try:
359 goto finished; 359 goto finished;
360 } 360 }
361 wait_for_completion_io(&wait); 361 wait_for_completion_io(&wait);
362 if (bio->bi_error) { 362 if (bio->bi_status) {
363 rrpc_inflight_laddr_release(rrpc, rqd); 363 rrpc_inflight_laddr_release(rrpc, rqd);
364 goto finished; 364 goto finished;
365 } 365 }
@@ -385,7 +385,7 @@ try:
385 wait_for_completion_io(&wait); 385 wait_for_completion_io(&wait);
386 386
387 rrpc_inflight_laddr_release(rrpc, rqd); 387 rrpc_inflight_laddr_release(rrpc, rqd);
388 if (bio->bi_error) 388 if (bio->bi_status)
389 goto finished; 389 goto finished;
390 390
391 bio_reset(bio); 391 bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
994 struct nvm_rq *rqd; 994 struct nvm_rq *rqd;
995 int err; 995 int err;
996 996
997 blk_queue_split(q, &bio, q->bio_split); 997 blk_queue_split(q, &bio);
998 998
999 if (bio_op(bio) == REQ_OP_DISCARD) { 999 if (bio_op(bio) == REQ_OP_DISCARD) {
1000 rrpc_discard(rrpc, bio); 1000 rrpc_discard(rrpc, bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
849 849
850/* Forward declarations */ 850/* Forward declarations */
851 851
852void bch_count_io_errors(struct cache *, int, const char *); 852void bch_count_io_errors(struct cache *, blk_status_t, const char *);
853void bch_bbio_count_io_errors(struct cache_set *, struct bio *, 853void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
854 int, const char *); 854 blk_status_t, const char *);
855void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); 855void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
856 const char *);
856void bch_bbio_free(struct bio *, struct cache_set *); 857void bch_bbio_free(struct bio *, struct cache_set *);
857struct bio *bch_bbio_alloc(struct cache_set *); 858struct bio *bch_bbio_alloc(struct cache_set *);
858 859
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
307 bch_submit_bbio(bio, b->c, &b->key, 0); 307 bch_submit_bbio(bio, b->c, &b->key, 0);
308 closure_sync(&cl); 308 closure_sync(&cl);
309 309
310 if (bio->bi_error) 310 if (bio->bi_status)
311 set_btree_node_io_error(b); 311 set_btree_node_io_error(b);
312 312
313 bch_bbio_free(bio, b->c); 313 bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
374 struct closure *cl = bio->bi_private; 374 struct closure *cl = bio->bi_private;
375 struct btree *b = container_of(cl, struct btree, io); 375 struct btree *b = container_of(cl, struct btree, io);
376 376
377 if (bio->bi_error) 377 if (bio->bi_status)
378 set_btree_node_io_error(b); 378 set_btree_node_io_error(b);
379 379
380 bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree"); 380 bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
381 closure_put(cl); 381 closure_put(cl);
382} 382}
383 383
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
110 struct bio_vec bv, cbv; 110 struct bio_vec bv, cbv;
111 struct bvec_iter iter, citer = { 0 }; 111 struct bvec_iter iter, citer = { 0 };
112 112
113 check = bio_clone(bio, GFP_NOIO); 113 check = bio_clone_kmalloc(bio, GFP_NOIO);
114 if (!check) 114 if (!check)
115 return; 115 return;
116 check->bi_opf = REQ_OP_READ; 116 check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
50 50
51/* IO errors */ 51/* IO errors */
52 52
53void bch_count_io_errors(struct cache *ca, int error, const char *m) 53void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
54{ 54{
55 /* 55 /*
56 * The halflife of an error is: 56 * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
103} 103}
104 104
105void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, 105void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
106 int error, const char *m) 106 blk_status_t error, const char *m)
107{ 107{
108 struct bbio *b = container_of(bio, struct bbio, bio); 108 struct bbio *b = container_of(bio, struct bbio, bio);
109 struct cache *ca = PTR_CACHE(c, &b->key, 0); 109 struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
132} 132}
133 133
134void bch_bbio_endio(struct cache_set *c, struct bio *bio, 134void bch_bbio_endio(struct cache_set *c, struct bio *bio,
135 int error, const char *m) 135 blk_status_t error, const char *m)
136{ 136{
137 struct closure *cl = bio->bi_private; 137 struct closure *cl = bio->bi_private;
138 138
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
549{ 549{
550 struct journal_write *w = bio->bi_private; 550 struct journal_write *w = bio->bi_private;
551 551
552 cache_set_err_on(bio->bi_error, w->c, "journal io error"); 552 cache_set_err_on(bio->bi_status, w->c, "journal io error");
553 closure_put(&w->c->journal.io); 553 closure_put(&w->c->journal.io);
554} 554}
555 555
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
63 struct moving_io *io = container_of(bio->bi_private, 63 struct moving_io *io = container_of(bio->bi_private,
64 struct moving_io, cl); 64 struct moving_io, cl);
65 65
66 if (bio->bi_error) 66 if (bio->bi_status)
67 io->op.error = bio->bi_error; 67 io->op.status = bio->bi_status;
68 else if (!KEY_DIRTY(&b->key) && 68 else if (!KEY_DIRTY(&b->key) &&
69 ptr_stale(io->op.c, &b->key, 0)) { 69 ptr_stale(io->op.c, &b->key, 0)) {
70 io->op.error = -EINTR; 70 io->op.status = BLK_STS_IOERR;
71 } 71 }
72 72
73 bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move"); 73 bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
74} 74}
75 75
76static void moving_init(struct moving_io *io) 76static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
92 struct moving_io *io = container_of(cl, struct moving_io, cl); 92 struct moving_io *io = container_of(cl, struct moving_io, cl);
93 struct data_insert_op *op = &io->op; 93 struct data_insert_op *op = &io->op;
94 94
95 if (!op->error) { 95 if (!op->status) {
96 moving_init(io); 96 moving_init(io);
97 97
98 io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key); 98 io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
81 if (ret == -ESRCH) { 81 if (ret == -ESRCH) {
82 op->replace_collision = true; 82 op->replace_collision = true;
83 } else if (ret) { 83 } else if (ret) {
84 op->error = -ENOMEM; 84 op->status = BLK_STS_RESOURCE;
85 op->insert_data_done = true; 85 op->insert_data_done = true;
86 } 86 }
87 87
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
178 struct closure *cl = bio->bi_private; 178 struct closure *cl = bio->bi_private;
179 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 179 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
180 180
181 if (bio->bi_error) { 181 if (bio->bi_status) {
182 /* TODO: We could try to recover from this. */ 182 /* TODO: We could try to recover from this. */
183 if (op->writeback) 183 if (op->writeback)
184 op->error = bio->bi_error; 184 op->status = bio->bi_status;
185 else if (!op->replace) 185 else if (!op->replace)
186 set_closure_fn(cl, bch_data_insert_error, op->wq); 186 set_closure_fn(cl, bch_data_insert_error, op->wq);
187 else 187 else
188 set_closure_fn(cl, NULL, NULL); 188 set_closure_fn(cl, NULL, NULL);
189 } 189 }
190 190
191 bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache"); 191 bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
192} 192}
193 193
194static void bch_data_insert_start(struct closure *cl) 194static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
488 * from the backing device. 488 * from the backing device.
489 */ 489 */
490 490
491 if (bio->bi_error) 491 if (bio->bi_status)
492 s->iop.error = bio->bi_error; 492 s->iop.status = bio->bi_status;
493 else if (!KEY_DIRTY(&b->key) && 493 else if (!KEY_DIRTY(&b->key) &&
494 ptr_stale(s->iop.c, &b->key, 0)) { 494 ptr_stale(s->iop.c, &b->key, 0)) {
495 atomic_long_inc(&s->iop.c->cache_read_races); 495 atomic_long_inc(&s->iop.c->cache_read_races);
496 s->iop.error = -EINTR; 496 s->iop.status = BLK_STS_IOERR;
497 } 497 }
498 498
499 bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache"); 499 bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
500} 500}
501 501
502/* 502/*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
593{ 593{
594 struct closure *cl = bio->bi_private; 594 struct closure *cl = bio->bi_private;
595 595
596 if (bio->bi_error) { 596 if (bio->bi_status) {
597 struct search *s = container_of(cl, struct search, cl); 597 struct search *s = container_of(cl, struct search, cl);
598 s->iop.error = bio->bi_error; 598 s->iop.status = bio->bi_status;
599 /* Only cache read errors are recoverable */ 599 /* Only cache read errors are recoverable */
600 s->recoverable = false; 600 s->recoverable = false;
601 } 601 }
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
611 &s->d->disk->part0, s->start_time); 611 &s->d->disk->part0, s->start_time);
612 612
613 trace_bcache_request_end(s->d, s->orig_bio); 613 trace_bcache_request_end(s->d, s->orig_bio);
614 s->orig_bio->bi_error = s->iop.error; 614 s->orig_bio->bi_status = s->iop.status;
615 bio_endio(s->orig_bio); 615 bio_endio(s->orig_bio);
616 s->orig_bio = NULL; 616 s->orig_bio = NULL;
617 } 617 }
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
664 s->iop.inode = d->id; 664 s->iop.inode = d->id;
665 s->iop.write_point = hash_long((unsigned long) current, 16); 665 s->iop.write_point = hash_long((unsigned long) current, 16);
666 s->iop.write_prio = 0; 666 s->iop.write_prio = 0;
667 s->iop.error = 0; 667 s->iop.status = 0;
668 s->iop.flags = 0; 668 s->iop.flags = 0;
669 s->iop.flush_journal = op_is_flush(bio->bi_opf); 669 s->iop.flush_journal = op_is_flush(bio->bi_opf);
670 s->iop.wq = bcache_wq; 670 s->iop.wq = bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
707 /* Retry from the backing device: */ 707 /* Retry from the backing device: */
708 trace_bcache_read_retry(s->orig_bio); 708 trace_bcache_read_retry(s->orig_bio);
709 709
710 s->iop.error = 0; 710 s->iop.status = 0;
711 do_bio_hook(s, s->orig_bio); 711 do_bio_hook(s, s->orig_bio);
712 712
713 /* XXX: invalidate cache */ 713 /* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
767 !s->cache_miss, s->iop.bypass); 767 !s->cache_miss, s->iop.bypass);
768 trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); 768 trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
769 769
770 if (s->iop.error) 770 if (s->iop.status)
771 continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); 771 continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
772 else if (s->iop.bio || verify(dc, &s->bio.bio)) 772 else if (s->iop.bio || verify(dc, &s->bio.bio))
773 continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq); 773 continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
10 unsigned inode; 10 unsigned inode;
11 uint16_t write_point; 11 uint16_t write_point;
12 uint16_t write_prio; 12 uint16_t write_prio;
13 short error; 13 blk_status_t status;
14 14
15 union { 15 union {
16 uint16_t flags; 16 uint16_t flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
271{ 271{
272 struct cache *ca = bio->bi_private; 272 struct cache *ca = bio->bi_private;
273 273
274 bch_count_io_errors(ca, bio->bi_error, "writing superblock"); 274 bch_count_io_errors(ca, bio->bi_status, "writing superblock");
275 closure_put(&ca->set->sb_write); 275 closure_put(&ca->set->sb_write);
276} 276}
277 277
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
321 struct closure *cl = bio->bi_private; 321 struct closure *cl = bio->bi_private;
322 struct cache_set *c = container_of(cl, struct cache_set, uuid_write); 322 struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
323 323
324 cache_set_err_on(bio->bi_error, c, "accessing uuids"); 324 cache_set_err_on(bio->bi_status, c, "accessing uuids");
325 bch_bbio_free(bio, c); 325 bch_bbio_free(bio, c);
326 closure_put(cl); 326 closure_put(cl);
327} 327}
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
494{ 494{
495 struct cache *ca = bio->bi_private; 495 struct cache *ca = bio->bi_private;
496 496
497 cache_set_err_on(bio->bi_error, ca->set, "accessing priorities"); 497 cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
498 bch_bbio_free(bio, ca->set); 498 bch_bbio_free(bio, ca->set);
499 closure_put(&ca->prio); 499 closure_put(&ca->prio);
500} 500}
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
782 782
783 minor *= BCACHE_MINORS; 783 minor *= BCACHE_MINORS;
784 784
785 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 785 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
786 BIOSET_NEED_BVECS |
787 BIOSET_NEED_RESCUER)) ||
786 !(d->disk = alloc_disk(BCACHE_MINORS))) { 788 !(d->disk = alloc_disk(BCACHE_MINORS))) {
787 ida_simple_remove(&bcache_minor, minor); 789 ida_simple_remove(&bcache_minor, minor);
788 return -ENOMEM; 790 return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1516 sizeof(struct bbio) + sizeof(struct bio_vec) * 1518 sizeof(struct bbio) + sizeof(struct bio_vec) *
1517 bucket_pages(c))) || 1519 bucket_pages(c))) ||
1518 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || 1520 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
1519 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 1521 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
1522 BIOSET_NEED_BVECS |
1523 BIOSET_NEED_RESCUER)) ||
1520 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || 1524 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
1521 !(c->moving_gc_wq = alloc_workqueue("bcache_gc", 1525 !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
1522 WQ_MEM_RECLAIM, 0)) || 1526 WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
167 struct keybuf_key *w = bio->bi_private; 167 struct keybuf_key *w = bio->bi_private;
168 struct dirty_io *io = w->private; 168 struct dirty_io *io = w->private;
169 169
170 if (bio->bi_error) 170 if (bio->bi_status)
171 SET_KEY_DIRTY(&w->key, false); 171 SET_KEY_DIRTY(&w->key, false);
172 172
173 closure_put(&io->cl); 173 closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
195 struct dirty_io *io = w->private; 195 struct dirty_io *io = w->private;
196 196
197 bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), 197 bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
198 bio->bi_error, "reading dirty data from cache"); 198 bio->bi_status, "reading dirty data from cache");
199 199
200 dirty_endio(bio); 200 dirty_endio(bio);
201} 201}
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
229EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); 229EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
230 230
231void dm_cell_error(struct dm_bio_prison *prison, 231void dm_cell_error(struct dm_bio_prison *prison,
232 struct dm_bio_prison_cell *cell, int error) 232 struct dm_bio_prison_cell *cell, blk_status_t error)
233{ 233{
234 struct bio_list bios; 234 struct bio_list bios;
235 struct bio *bio; 235 struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
238 dm_cell_release(prison, cell, &bios); 238 dm_cell_release(prison, cell, &bios);
239 239
240 while ((bio = bio_list_pop(&bios))) { 240 while ((bio = bio_list_pop(&bios))) {
241 bio->bi_error = error; 241 bio->bi_status = error;
242 bio_endio(bio); 242 bio_endio(bio);
243 } 243 }
244} 244}
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
91 struct dm_bio_prison_cell *cell, 91 struct dm_bio_prison_cell *cell,
92 struct bio_list *inmates); 92 struct bio_list *inmates);
93void dm_cell_error(struct dm_bio_prison *prison, 93void dm_cell_error(struct dm_bio_prison *prison,
94 struct dm_bio_prison_cell *cell, int error); 94 struct dm_bio_prison_cell *cell, blk_status_t error);
95 95
96/* 96/*
97 * Visits the cell and then releases. Guarantees no new inmates are 97 * Visits the cell and then releases. Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 840c1496b2b1..850ff6c67994 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
145 enum data_mode data_mode; 145 enum data_mode data_mode;
146 unsigned char list_mode; /* LIST_* */ 146 unsigned char list_mode; /* LIST_* */
147 unsigned hold_count; 147 unsigned hold_count;
148 int read_error; 148 blk_status_t read_error;
149 int write_error; 149 blk_status_t write_error;
150 unsigned long state; 150 unsigned long state;
151 unsigned long last_accessed; 151 unsigned long last_accessed;
152 struct dm_bufio_client *c; 152 struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
555{ 555{
556 struct dm_buffer *b = context; 556 struct dm_buffer *b = context;
557 557
558 b->bio.bi_error = error ? -EIO : 0; 558 b->bio.bi_status = error ? BLK_STS_IOERR : 0;
559 b->bio.bi_end_io(&b->bio); 559 b->bio.bi_end_io(&b->bio);
560} 560}
561 561
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
588 588
589 r = dm_io(&io_req, 1, &region, NULL); 589 r = dm_io(&io_req, 1, &region, NULL);
590 if (r) { 590 if (r) {
591 b->bio.bi_error = r; 591 b->bio.bi_status = errno_to_blk_status(r);
592 end_io(&b->bio); 592 end_io(&b->bio);
593 } 593 }
594} 594}
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
596static void inline_endio(struct bio *bio) 596static void inline_endio(struct bio *bio)
597{ 597{
598 bio_end_io_t *end_fn = bio->bi_private; 598 bio_end_io_t *end_fn = bio->bi_private;
599 int error = bio->bi_error; 599 blk_status_t status = bio->bi_status;
600 600
601 /* 601 /*
602 * Reset the bio to free any attached resources 602 * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
604 */ 604 */
605 bio_reset(bio); 605 bio_reset(bio);
606 606
607 bio->bi_error = error; 607 bio->bi_status = status;
608 end_fn(bio); 608 end_fn(bio);
609} 609}
610 610
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
685{ 685{
686 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); 686 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
687 687
688 b->write_error = bio->bi_error; 688 b->write_error = bio->bi_status;
689 if (unlikely(bio->bi_error)) { 689 if (unlikely(bio->bi_status)) {
690 struct dm_bufio_client *c = b->c; 690 struct dm_bufio_client *c = b->c;
691 int error = bio->bi_error; 691
692 (void)cmpxchg(&c->async_write_error, 0, error); 692 (void)cmpxchg(&c->async_write_error, 0,
693 blk_status_to_errno(bio->bi_status));
693 } 694 }
694 695
695 BUG_ON(!test_bit(B_WRITING, &b->state)); 696 BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
1063{ 1064{
1064 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); 1065 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
1065 1066
1066 b->read_error = bio->bi_error; 1067 b->read_error = bio->bi_status;
1067 1068
1068 BUG_ON(!test_bit(B_READING, &b->state)); 1069 BUG_ON(!test_bit(B_READING, &b->state));
1069 1070
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
1107 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 1108 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
1108 1109
1109 if (b->read_error) { 1110 if (b->read_error) {
1110 int error = b->read_error; 1111 int error = blk_status_to_errno(b->read_error);
1111 1112
1112 dm_bufio_release(b); 1113 dm_bufio_release(b);
1113 1114
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
1257 */ 1258 */
1258int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) 1259int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
1259{ 1260{
1260 int a, f; 1261 blk_status_t a;
1262 int f;
1261 unsigned long buffers_processed = 0; 1263 unsigned long buffers_processed = 0;
1262 struct dm_buffer *b, *tmp; 1264 struct dm_buffer *b, *tmp;
1263 1265
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d682a0511381..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
119 */ 119 */
120struct continuation { 120struct continuation {
121 struct work_struct ws; 121 struct work_struct ws;
122 int input; 122 blk_status_t input;
123}; 123};
124 124
125static inline void init_continuation(struct continuation *k, 125static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
145 /* 145 /*
146 * The operation that everyone is waiting for. 146 * The operation that everyone is waiting for.
147 */ 147 */
148 int (*commit_op)(void *context); 148 blk_status_t (*commit_op)(void *context);
149 void *commit_context; 149 void *commit_context;
150 150
151 /* 151 /*
@@ -171,8 +171,7 @@ struct batcher {
171static void __commit(struct work_struct *_ws) 171static void __commit(struct work_struct *_ws)
172{ 172{
173 struct batcher *b = container_of(_ws, struct batcher, commit_work); 173 struct batcher *b = container_of(_ws, struct batcher, commit_work);
174 174 blk_status_t r;
175 int r;
176 unsigned long flags; 175 unsigned long flags;
177 struct list_head work_items; 176 struct list_head work_items;
178 struct work_struct *ws, *tmp; 177 struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
205 204
206 while ((bio = bio_list_pop(&bios))) { 205 while ((bio = bio_list_pop(&bios))) {
207 if (r) { 206 if (r) {
208 bio->bi_error = r; 207 bio->bi_status = r;
209 bio_endio(bio); 208 bio_endio(bio);
210 } else 209 } else
211 b->issue_op(bio, b->issue_context); 210 b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
213} 212}
214 213
215static void batcher_init(struct batcher *b, 214static void batcher_init(struct batcher *b,
216 int (*commit_op)(void *), 215 blk_status_t (*commit_op)(void *),
217 void *commit_context, 216 void *commit_context,
218 void (*issue_op)(struct bio *bio, void *), 217 void (*issue_op)(struct bio *bio, void *),
219 void *issue_context, 218 void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
955 954
956 dm_unhook_bio(&pb->hook_info, bio); 955 dm_unhook_bio(&pb->hook_info, bio);
957 956
958 if (bio->bi_error) { 957 if (bio->bi_status) {
959 bio_endio(bio); 958 bio_endio(bio);
960 return; 959 return;
961 } 960 }
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
1220 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k); 1219 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1221 1220
1222 if (read_err || write_err) 1221 if (read_err || write_err)
1223 mg->k.input = -EIO; 1222 mg->k.input = BLK_STS_IOERR;
1224 1223
1225 queue_continuation(mg->cache->wq, &mg->k); 1224 queue_continuation(mg->cache->wq, &mg->k);
1226} 1225}
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
1266 1265
1267 dm_unhook_bio(&pb->hook_info, bio); 1266 dm_unhook_bio(&pb->hook_info, bio);
1268 1267
1269 if (bio->bi_error) 1268 if (bio->bi_status)
1270 mg->k.input = bio->bi_error; 1269 mg->k.input = bio->bi_status;
1271 1270
1272 queue_continuation(mg->cache->wq, &mg->k); 1271 queue_continuation(mg->cache->wq, &mg->k);
1273} 1272}
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
1323 if (mg->overwrite_bio) { 1322 if (mg->overwrite_bio) {
1324 if (success) 1323 if (success)
1325 force_set_dirty(cache, cblock); 1324 force_set_dirty(cache, cblock);
1325 else if (mg->k.input)
1326 mg->overwrite_bio->bi_status = mg->k.input;
1326 else 1327 else
1327 mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO); 1328 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1328 bio_endio(mg->overwrite_bio); 1329 bio_endio(mg->overwrite_bio);
1329 } else { 1330 } else {
1330 if (success) 1331 if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
1504 r = copy(mg, is_policy_promote); 1505 r = copy(mg, is_policy_promote);
1505 if (r) { 1506 if (r) {
1506 DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); 1507 DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
1507 mg->k.input = -EIO; 1508 mg->k.input = BLK_STS_IOERR;
1508 mg_complete(mg, false); 1509 mg_complete(mg, false);
1509 } 1510 }
1510 } 1511 }
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
1907/* 1908/*
1908 * Used by the batcher. 1909 * Used by the batcher.
1909 */ 1910 */
1910static int commit_op(void *context) 1911static blk_status_t commit_op(void *context)
1911{ 1912{
1912 struct cache *cache = context; 1913 struct cache *cache = context;
1913 1914
1914 if (dm_cache_changed_this_transaction(cache->cmd)) 1915 if (dm_cache_changed_this_transaction(cache->cmd))
1915 return commit(cache, false); 1916 return errno_to_blk_status(commit(cache, false));
1916 1917
1917 return 0; 1918 return 0;
1918} 1919}
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
2018 bio_list_init(&cache->deferred_bios); 2019 bio_list_init(&cache->deferred_bios);
2019 2020
2020 while ((bio = bio_list_pop(&bios))) { 2021 while ((bio = bio_list_pop(&bios))) {
2021 bio->bi_error = DM_ENDIO_REQUEUE; 2022 bio->bi_status = BLK_STS_DM_REQUEUE;
2022 bio_endio(bio); 2023 bio_endio(bio);
2023 } 2024 }
2024} 2025}
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2820 return r; 2821 return r;
2821} 2822}
2822 2823
2823static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2824static int cache_end_io(struct dm_target *ti, struct bio *bio,
2825 blk_status_t *error)
2824{ 2826{
2825 struct cache *cache = ti->private; 2827 struct cache *cache = ti->private;
2826 unsigned long flags; 2828 unsigned long flags;
@@ -2838,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2838 bio_drop_shared_lock(cache, bio); 2840 bio_drop_shared_lock(cache, bio);
2839 accounted_complete(cache, bio); 2841 accounted_complete(cache, bio);
2840 2842
2841 return 0; 2843 return DM_ENDIO_DONE;
2842} 2844}
2843 2845
2844static int write_dirty_bitset(struct cache *cache) 2846static int write_dirty_bitset(struct cache *cache)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72d479b..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
71 struct convert_context ctx; 71 struct convert_context ctx;
72 72
73 atomic_t io_pending; 73 atomic_t io_pending;
74 int error; 74 blk_status_t error;
75 sector_t sector; 75 sector_t sector;
76 76
77 struct rb_node rb_node; 77 struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
1292/* 1292/*
1293 * Encrypt / decrypt data from one bio to another one (can be the same one) 1293 * Encrypt / decrypt data from one bio to another one (can be the same one)
1294 */ 1294 */
1295static int crypt_convert(struct crypt_config *cc, 1295static blk_status_t crypt_convert(struct crypt_config *cc,
1296 struct convert_context *ctx) 1296 struct convert_context *ctx)
1297{ 1297{
1298 unsigned int tag_offset = 0; 1298 unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
1343 */ 1343 */
1344 case -EBADMSG: 1344 case -EBADMSG:
1345 atomic_dec(&ctx->cc_pending); 1345 atomic_dec(&ctx->cc_pending);
1346 return -EILSEQ; 1346 return BLK_STS_PROTECTION;
1347 /* 1347 /*
1348 * There was an error while processing the request. 1348 * There was an error while processing the request.
1349 */ 1349 */
1350 default: 1350 default:
1351 atomic_dec(&ctx->cc_pending); 1351 atomic_dec(&ctx->cc_pending);
1352 return -EIO; 1352 return BLK_STS_IOERR;
1353 } 1353 }
1354 } 1354 }
1355 1355
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1463{ 1463{
1464 struct crypt_config *cc = io->cc; 1464 struct crypt_config *cc = io->cc;
1465 struct bio *base_bio = io->base_bio; 1465 struct bio *base_bio = io->base_bio;
1466 int error = io->error; 1466 blk_status_t error = io->error;
1467 1467
1468 if (!atomic_dec_and_test(&io->io_pending)) 1468 if (!atomic_dec_and_test(&io->io_pending))
1469 return; 1469 return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1476 else 1476 else
1477 kfree(io->integrity_metadata); 1477 kfree(io->integrity_metadata);
1478 1478
1479 base_bio->bi_error = error; 1479 base_bio->bi_status = error;
1480 bio_endio(base_bio); 1480 bio_endio(base_bio);
1481} 1481}
1482 1482
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
1502 struct dm_crypt_io *io = clone->bi_private; 1502 struct dm_crypt_io *io = clone->bi_private;
1503 struct crypt_config *cc = io->cc; 1503 struct crypt_config *cc = io->cc;
1504 unsigned rw = bio_data_dir(clone); 1504 unsigned rw = bio_data_dir(clone);
1505 int error; 1505 blk_status_t error;
1506 1506
1507 /* 1507 /*
1508 * free the processed pages 1508 * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
1510 if (rw == WRITE) 1510 if (rw == WRITE)
1511 crypt_free_buffer_pages(cc, clone); 1511 crypt_free_buffer_pages(cc, clone);
1512 1512
1513 error = clone->bi_error; 1513 error = clone->bi_status;
1514 bio_put(clone); 1514 bio_put(clone);
1515 1515
1516 if (rw == READ && !error) { 1516 if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
1570 1570
1571 crypt_inc_pending(io); 1571 crypt_inc_pending(io);
1572 if (kcryptd_io_read(io, GFP_NOIO)) 1572 if (kcryptd_io_read(io, GFP_NOIO))
1573 io->error = -ENOMEM; 1573 io->error = BLK_STS_RESOURCE;
1574 crypt_dec_pending(io); 1574 crypt_dec_pending(io);
1575} 1575}
1576 1576
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1656 sector_t sector; 1656 sector_t sector;
1657 struct rb_node **rbp, *parent; 1657 struct rb_node **rbp, *parent;
1658 1658
1659 if (unlikely(io->error < 0)) { 1659 if (unlikely(io->error)) {
1660 crypt_free_buffer_pages(cc, clone); 1660 crypt_free_buffer_pages(cc, clone);
1661 bio_put(clone); 1661 bio_put(clone);
1662 crypt_dec_pending(io); 1662 crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1697 struct bio *clone; 1697 struct bio *clone;
1698 int crypt_finished; 1698 int crypt_finished;
1699 sector_t sector = io->sector; 1699 sector_t sector = io->sector;
1700 int r; 1700 blk_status_t r;
1701 1701
1702 /* 1702 /*
1703 * Prevent io from disappearing until this function completes. 1703 * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1707 1707
1708 clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); 1708 clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
1709 if (unlikely(!clone)) { 1709 if (unlikely(!clone)) {
1710 io->error = -EIO; 1710 io->error = BLK_STS_IOERR;
1711 goto dec; 1711 goto dec;
1712 } 1712 }
1713 1713
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1718 1718
1719 crypt_inc_pending(io); 1719 crypt_inc_pending(io);
1720 r = crypt_convert(cc, &io->ctx); 1720 r = crypt_convert(cc, &io->ctx);
1721 if (r < 0) 1721 if (r)
1722 io->error = r; 1722 io->error = r;
1723 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); 1723 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
1724 1724
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
1740static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) 1740static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
1741{ 1741{
1742 struct crypt_config *cc = io->cc; 1742 struct crypt_config *cc = io->cc;
1743 int r = 0; 1743 blk_status_t r;
1744 1744
1745 crypt_inc_pending(io); 1745 crypt_inc_pending(io);
1746 1746
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
1748 io->sector); 1748 io->sector);
1749 1749
1750 r = crypt_convert(cc, &io->ctx); 1750 r = crypt_convert(cc, &io->ctx);
1751 if (r < 0) 1751 if (r)
1752 io->error = r; 1752 io->error = r;
1753 1753
1754 if (atomic_dec_and_test(&io->ctx.cc_pending)) 1754 if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
1781 if (error == -EBADMSG) { 1781 if (error == -EBADMSG) {
1782 DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu", 1782 DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
1783 (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq))); 1783 (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
1784 io->error = -EILSEQ; 1784 io->error = BLK_STS_PROTECTION;
1785 } else if (error < 0) 1785 } else if (error < 0)
1786 io->error = -EIO; 1786 io->error = BLK_STS_IOERR;
1787 1787
1788 crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); 1788 crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
1789 1789
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2677 goto bad; 2677 goto bad;
2678 } 2678 }
2679 2679
2680 cc->bs = bioset_create(MIN_IOS, 0); 2680 cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
2681 BIOSET_NEED_RESCUER));
2681 if (!cc->bs) { 2682 if (!cc->bs) {
2682 ti->error = "Cannot allocate crypt bioset"; 2683 ti->error = "Cannot allocate crypt bioset";
2683 goto bad; 2684 goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
2795 * and is aligned to this size as defined in IO hints. 2796 * and is aligned to this size as defined in IO hints.
2796 */ 2797 */
2797 if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0)) 2798 if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
2798 return -EIO; 2799 return DM_MAPIO_KILL;
2799 2800
2800 if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1))) 2801 if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
2801 return -EIO; 2802 return DM_MAPIO_KILL;
2802 2803
2803 io = dm_per_bio_data(bio, cc->per_bio_data_size); 2804 io = dm_per_bio_data(bio, cc->per_bio_data_size);
2804 crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); 2805 crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a182611..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
321 if (bio_data_dir(bio) == READ) { 321 if (bio_data_dir(bio) == READ) {
322 if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) && 322 if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
323 !test_bit(ERROR_WRITES, &fc->flags)) 323 !test_bit(ERROR_WRITES, &fc->flags))
324 return -EIO; 324 return DM_MAPIO_KILL;
325 goto map_bio; 325 goto map_bio;
326 } 326 }
327 327
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
349 /* 349 /*
350 * By default, error all I/O. 350 * By default, error all I/O.
351 */ 351 */
352 return -EIO; 352 return DM_MAPIO_KILL;
353 } 353 }
354 354
355map_bio: 355map_bio:
@@ -358,12 +358,13 @@ map_bio:
358 return DM_MAPIO_REMAPPED; 358 return DM_MAPIO_REMAPPED;
359} 359}
360 360
361static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) 361static int flakey_end_io(struct dm_target *ti, struct bio *bio,
362 blk_status_t *error)
362{ 363{
363 struct flakey_c *fc = ti->private; 364 struct flakey_c *fc = ti->private;
364 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 365 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
365 366
366 if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { 367 if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
367 if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && 368 if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
368 all_corrupt_bio_flags_match(bio, fc)) { 369 all_corrupt_bio_flags_match(bio, fc)) {
369 /* 370 /*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
377 * Error read during the down_interval if drop_writes 378 * Error read during the down_interval if drop_writes
378 * and error_writes were not configured. 379 * and error_writes were not configured.
379 */ 380 */
380 return -EIO; 381 *error = BLK_STS_IOERR;
381 } 382 }
382 } 383 }
383 384
384 return error; 385 return DM_ENDIO_DONE;
385} 386}
386 387
387static void flakey_status(struct dm_target *ti, status_type_t type, 388static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 93b181088168..1b224aa9cf15 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
246 unsigned metadata_offset; 246 unsigned metadata_offset;
247 247
248 atomic_t in_flight; 248 atomic_t in_flight;
249 int bi_error; 249 blk_status_t bi_status;
250 250
251 struct completion *completion; 251 struct completion *completion;
252 252
@@ -1118,8 +1118,8 @@ static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *
1118static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1118static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
1119{ 1119{
1120 int r = dm_integrity_failed(ic); 1120 int r = dm_integrity_failed(ic);
1121 if (unlikely(r) && !bio->bi_error) 1121 if (unlikely(r) && !bio->bi_status)
1122 bio->bi_error = r; 1122 bio->bi_status = errno_to_blk_status(r);
1123 bio_endio(bio); 1123 bio_endio(bio);
1124} 1124}
1125 1125
@@ -1127,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
1127{ 1127{
1128 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1128 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1129 1129
1130 if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic))) 1130 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
1131 submit_flush_bio(ic, dio); 1131 submit_flush_bio(ic, dio);
1132 else 1132 else
1133 do_endio(ic, bio); 1133 do_endio(ic, bio);
@@ -1146,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
1146 1146
1147 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1147 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1148 1148
1149 if (unlikely(dio->bi_error) && !bio->bi_error) 1149 if (unlikely(dio->bi_status) && !bio->bi_status)
1150 bio->bi_error = dio->bi_error; 1150 bio->bi_status = dio->bi_status;
1151 if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1151 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
1152 dio->range.logical_sector += dio->range.n_sectors; 1152 dio->range.logical_sector += dio->range.n_sectors;
1153 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1153 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
1154 INIT_WORK(&dio->work, integrity_bio_wait); 1154 INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1322,7 +1322,7 @@ skip_io:
1322 dec_in_flight(dio); 1322 dec_in_flight(dio);
1323 return; 1323 return;
1324error: 1324error:
1325 dio->bi_error = r; 1325 dio->bi_status = errno_to_blk_status(r);
1326 dec_in_flight(dio); 1326 dec_in_flight(dio);
1327} 1327}
1328 1328
@@ -1335,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1335 sector_t area, offset; 1335 sector_t area, offset;
1336 1336
1337 dio->ic = ic; 1337 dio->ic = ic;
1338 dio->bi_error = 0; 1338 dio->bi_status = 0;
1339 1339
1340 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 1340 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
1341 submit_flush_bio(ic, dio); 1341 submit_flush_bio(ic, dio);
@@ -1356,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1356 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 1356 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
1357 (unsigned long long)dio->range.logical_sector, bio_sectors(bio), 1357 (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
1358 (unsigned long long)ic->provided_data_sectors); 1358 (unsigned long long)ic->provided_data_sectors);
1359 return -EIO; 1359 return DM_MAPIO_KILL;
1360 } 1360 }
1361 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) { 1361 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
1362 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 1362 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
1363 ic->sectors_per_block, 1363 ic->sectors_per_block,
1364 (unsigned long long)dio->range.logical_sector, bio_sectors(bio)); 1364 (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
1365 return -EIO; 1365 return DM_MAPIO_KILL;
1366 } 1366 }
1367 1367
1368 if (ic->sectors_per_block > 1) { 1368 if (ic->sectors_per_block > 1) {
@@ -1372,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1372 if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 1372 if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
1373 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 1373 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
1374 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 1374 bv.bv_offset, bv.bv_len, ic->sectors_per_block);
1375 return -EIO; 1375 return DM_MAPIO_KILL;
1376 } 1376 }
1377 } 1377 }
1378 } 1378 }
@@ -1387,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1387 wanted_tag_size *= ic->tag_size; 1387 wanted_tag_size *= ic->tag_size;
1388 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 1388 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
1389 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size); 1389 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
1390 return -EIO; 1390 return DM_MAPIO_KILL;
1391 } 1391 }
1392 } 1392 }
1393 } else { 1393 } else {
1394 if (unlikely(bip != NULL)) { 1394 if (unlikely(bip != NULL)) {
1395 DMERR("Unexpected integrity data when using internal hash"); 1395 DMERR("Unexpected integrity data when using internal hash");
1396 return -EIO; 1396 return DM_MAPIO_KILL;
1397 } 1397 }
1398 } 1398 }
1399 1399
1400 if (unlikely(ic->mode == 'R') && unlikely(dio->write)) 1400 if (unlikely(ic->mode == 'R') && unlikely(dio->write))
1401 return -EIO; 1401 return DM_MAPIO_KILL;
1402 1402
1403 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1403 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1404 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1404 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 8d5ca30f6551..25039607f3cb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
58 if (!client->pool) 58 if (!client->pool)
59 goto bad; 59 goto bad;
60 60
61 client->bios = bioset_create(min_ios, 0); 61 client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
62 BIOSET_NEED_RESCUER));
62 if (!client->bios) 63 if (!client->bios)
63 goto bad; 64 goto bad;
64 65
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
124 fn(error_bits, context); 125 fn(error_bits, context);
125} 126}
126 127
127static void dec_count(struct io *io, unsigned int region, int error) 128static void dec_count(struct io *io, unsigned int region, blk_status_t error)
128{ 129{
129 if (error) 130 if (error)
130 set_bit(region, &io->error_bits); 131 set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
137{ 138{
138 struct io *io; 139 struct io *io;
139 unsigned region; 140 unsigned region;
140 int error; 141 blk_status_t error;
141 142
142 if (bio->bi_error && bio_data_dir(bio) == READ) 143 if (bio->bi_status && bio_data_dir(bio) == READ)
143 zero_fill_bio(bio); 144 zero_fill_bio(bio);
144 145
145 /* 146 /*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
147 */ 148 */
148 retrieve_io_and_region_from_bio(bio, &io, &region); 149 retrieve_io_and_region_from_bio(bio, &io, &region);
149 150
150 error = bio->bi_error; 151 error = bio->bi_status;
151 bio_put(bio); 152 bio_put(bio);
152 153
153 dec_count(io, region, error); 154 dec_count(io, region, error);
@@ -319,7 +320,7 @@ static void do_region(int op, int op_flags, unsigned region,
319 if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || 320 if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
320 op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { 321 op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
321 atomic_inc(&io->count); 322 atomic_inc(&io->count);
322 dec_count(io, region, -EOPNOTSUPP); 323 dec_count(io, region, BLK_STS_NOTSUPP);
323 return; 324 return;
324 } 325 }
325 326
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe38655a49..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
150{ 150{
151 struct log_writes_c *lc = bio->bi_private; 151 struct log_writes_c *lc = bio->bi_private;
152 152
153 if (bio->bi_error) { 153 if (bio->bi_status) {
154 unsigned long flags; 154 unsigned long flags;
155 155
156 DMERR("Error writing log block, error=%d", bio->bi_error); 156 DMERR("Error writing log block, error=%d", bio->bi_status);
157 spin_lock_irqsave(&lc->blocks_lock, flags); 157 spin_lock_irqsave(&lc->blocks_lock, flags);
158 lc->logging_enabled = false; 158 lc->logging_enabled = false;
159 spin_unlock_irqrestore(&lc->blocks_lock, flags); 159 spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
586 spin_lock_irq(&lc->blocks_lock); 586 spin_lock_irq(&lc->blocks_lock);
587 lc->logging_enabled = false; 587 lc->logging_enabled = false;
588 spin_unlock_irq(&lc->blocks_lock); 588 spin_unlock_irq(&lc->blocks_lock);
589 return -ENOMEM; 589 return DM_MAPIO_KILL;
590 } 590 }
591 INIT_LIST_HEAD(&block->list); 591 INIT_LIST_HEAD(&block->list);
592 pb->block = block; 592 pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
639 spin_lock_irq(&lc->blocks_lock); 639 spin_lock_irq(&lc->blocks_lock);
640 lc->logging_enabled = false; 640 lc->logging_enabled = false;
641 spin_unlock_irq(&lc->blocks_lock); 641 spin_unlock_irq(&lc->blocks_lock);
642 return -ENOMEM; 642 return DM_MAPIO_KILL;
643 } 643 }
644 644
645 src = kmap_atomic(bv.bv_page); 645 src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
664 return DM_MAPIO_REMAPPED; 664 return DM_MAPIO_REMAPPED;
665} 665}
666 666
667static int normal_end_io(struct dm_target *ti, struct bio *bio, int error) 667static int normal_end_io(struct dm_target *ti, struct bio *bio,
668 blk_status_t *error)
668{ 669{
669 struct log_writes_c *lc = ti->private; 670 struct log_writes_c *lc = ti->private;
670 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 671 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
686 spin_unlock_irqrestore(&lc->blocks_lock, flags); 687 spin_unlock_irqrestore(&lc->blocks_lock, flags);
687 } 688 }
688 689
689 return error; 690 return DM_ENDIO_DONE;
690} 691}
691 692
692/* 693/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3df056b73b66..0e8ab5bb3575 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -559,13 +559,13 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
559 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 559 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
560 return DM_MAPIO_REQUEUE; 560 return DM_MAPIO_REQUEUE;
561 dm_report_EIO(m); 561 dm_report_EIO(m);
562 return -EIO; 562 return DM_MAPIO_KILL;
563 } 563 }
564 564
565 mpio->pgpath = pgpath; 565 mpio->pgpath = pgpath;
566 mpio->nr_bytes = nr_bytes; 566 mpio->nr_bytes = nr_bytes;
567 567
568 bio->bi_error = 0; 568 bio->bi_status = 0;
569 bio->bi_bdev = pgpath->path.dev->bdev; 569 bio->bi_bdev = pgpath->path.dev->bdev;
570 bio->bi_opf |= REQ_FAILFAST_TRANSPORT; 570 bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
571 571
@@ -621,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
621 blk_start_plug(&plug); 621 blk_start_plug(&plug);
622 while ((bio = bio_list_pop(&bios))) { 622 while ((bio = bio_list_pop(&bios))) {
623 r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio)); 623 r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
624 if (r < 0 || r == DM_MAPIO_REQUEUE) { 624 switch (r) {
625 bio->bi_error = r; 625 case DM_MAPIO_KILL:
626 bio->bi_status = BLK_STS_IOERR;
627 bio_endio(bio);
628 break;
629 case DM_MAPIO_REQUEUE:
630 bio->bi_status = BLK_STS_DM_REQUEUE;
626 bio_endio(bio); 631 bio_endio(bio);
627 } else if (r == DM_MAPIO_REMAPPED) 632 break;
633 case DM_MAPIO_REMAPPED:
628 generic_make_request(bio); 634 generic_make_request(bio);
635 break;
636 }
629 } 637 }
630 blk_finish_plug(&plug); 638 blk_finish_plug(&plug);
631} 639}
@@ -1442,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
1442 activate_or_offline_path(pgpath); 1450 activate_or_offline_path(pgpath);
1443} 1451}
1444 1452
1445static int noretry_error(int error) 1453static int noretry_error(blk_status_t error)
1446{ 1454{
1447 switch (error) { 1455 switch (error) {
1448 case -EBADE: 1456 case BLK_STS_NOTSUPP:
1449 /* 1457 case BLK_STS_NOSPC:
1450 * EBADE signals an reservation conflict. 1458 case BLK_STS_TARGET:
1451 * We shouldn't fail the path here as we can communicate with 1459 case BLK_STS_NEXUS:
1452 * the target. We should failover to the next path, but in 1460 case BLK_STS_MEDIUM:
1453 * doing so we might be causing a ping-pong between paths. 1461 case BLK_STS_RESOURCE:
1454 * So just return the reservation conflict error.
1455 */
1456 case -EOPNOTSUPP:
1457 case -EREMOTEIO:
1458 case -EILSEQ:
1459 case -ENODATA:
1460 case -ENOSPC:
1461 return 1; 1462 return 1;
1462 } 1463 }
1463 1464
@@ -1466,7 +1467,7 @@ static int noretry_error(int error)
1466} 1467}
1467 1468
1468static int multipath_end_io(struct dm_target *ti, struct request *clone, 1469static int multipath_end_io(struct dm_target *ti, struct request *clone,
1469 int error, union map_info *map_context) 1470 blk_status_t error, union map_info *map_context)
1470{ 1471{
1471 struct dm_mpath_io *mpio = get_mpio(map_context); 1472 struct dm_mpath_io *mpio = get_mpio(map_context);
1472 struct pgpath *pgpath = mpio->pgpath; 1473 struct pgpath *pgpath = mpio->pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
1493 1494
1494 if (atomic_read(&m->nr_valid_paths) == 0 && 1495 if (atomic_read(&m->nr_valid_paths) == 0 &&
1495 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1496 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1496 if (error == -EIO) 1497 if (error == BLK_STS_IOERR)
1497 dm_report_EIO(m); 1498 dm_report_EIO(m);
1498 /* complete with the original error */ 1499 /* complete with the original error */
1499 r = DM_ENDIO_DONE; 1500 r = DM_ENDIO_DONE;
@@ -1510,24 +1511,26 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
1510 return r; 1511 return r;
1511} 1512}
1512 1513
1513static int do_end_io_bio(struct multipath *m, struct bio *clone, 1514static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
1514 int error, struct dm_mpath_io *mpio) 1515 blk_status_t *error)
1515{ 1516{
1517 struct multipath *m = ti->private;
1518 struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
1519 struct pgpath *pgpath = mpio->pgpath;
1516 unsigned long flags; 1520 unsigned long flags;
1521 int r = DM_ENDIO_DONE;
1517 1522
1518 if (!error) 1523 if (!*error || noretry_error(*error))
1519 return 0; /* I/O complete */ 1524 goto done;
1520
1521 if (noretry_error(error))
1522 return error;
1523 1525
1524 if (mpio->pgpath) 1526 if (pgpath)
1525 fail_path(mpio->pgpath); 1527 fail_path(pgpath);
1526 1528
1527 if (atomic_read(&m->nr_valid_paths) == 0 && 1529 if (atomic_read(&m->nr_valid_paths) == 0 &&
1528 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1530 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1529 dm_report_EIO(m); 1531 dm_report_EIO(m);
1530 return -EIO; 1532 *error = BLK_STS_IOERR;
1533 goto done;
1531 } 1534 }
1532 1535
1533 /* Queue for the daemon to resubmit */ 1536 /* Queue for the daemon to resubmit */
@@ -1539,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
1539 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) 1542 if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
1540 queue_work(kmultipathd, &m->process_queued_bios); 1543 queue_work(kmultipathd, &m->process_queued_bios);
1541 1544
1542 return DM_ENDIO_INCOMPLETE; 1545 r = DM_ENDIO_INCOMPLETE;
1543} 1546done:
1544
1545static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
1546{
1547 struct multipath *m = ti->private;
1548 struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
1549 struct pgpath *pgpath;
1550 struct path_selector *ps;
1551 int r;
1552
1553 BUG_ON(!mpio);
1554
1555 r = do_end_io_bio(m, clone, error, mpio);
1556 pgpath = mpio->pgpath;
1557 if (pgpath) { 1547 if (pgpath) {
1558 ps = &pgpath->pg->ps; 1548 struct path_selector *ps = &pgpath->pg->ps;
1549
1559 if (ps->type->end_io) 1550 if (ps->type->end_io)
1560 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1551 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1561 } 1552 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4da8858856fb..a4fbd911d566 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -491,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
491 * If device is suspended, complete the bio. 491 * If device is suspended, complete the bio.
492 */ 492 */
493 if (dm_noflush_suspending(ms->ti)) 493 if (dm_noflush_suspending(ms->ti))
494 bio->bi_error = DM_ENDIO_REQUEUE; 494 bio->bi_status = BLK_STS_DM_REQUEUE;
495 else 495 else
496 bio->bi_error = -EIO; 496 bio->bi_status = BLK_STS_IOERR;
497 497
498 bio_endio(bio); 498 bio_endio(bio);
499 return; 499 return;
@@ -627,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
627 * degrade the array. 627 * degrade the array.
628 */ 628 */
629 if (bio_op(bio) == REQ_OP_DISCARD) { 629 if (bio_op(bio) == REQ_OP_DISCARD) {
630 bio->bi_error = -EOPNOTSUPP; 630 bio->bi_status = BLK_STS_NOTSUPP;
631 bio_endio(bio); 631 bio_endio(bio);
632 return; 632 return;
633 } 633 }
@@ -1210,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1210 1210
1211 r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0); 1211 r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
1212 if (r < 0 && r != -EWOULDBLOCK) 1212 if (r < 0 && r != -EWOULDBLOCK)
1213 return r; 1213 return DM_MAPIO_KILL;
1214 1214
1215 /* 1215 /*
1216 * If region is not in-sync queue the bio. 1216 * If region is not in-sync queue the bio.
1217 */ 1217 */
1218 if (!r || (r == -EWOULDBLOCK)) { 1218 if (!r || (r == -EWOULDBLOCK)) {
1219 if (bio->bi_opf & REQ_RAHEAD) 1219 if (bio->bi_opf & REQ_RAHEAD)
1220 return -EWOULDBLOCK; 1220 return DM_MAPIO_KILL;
1221 1221
1222 queue_bio(ms, bio, rw); 1222 queue_bio(ms, bio, rw);
1223 return DM_MAPIO_SUBMITTED; 1223 return DM_MAPIO_SUBMITTED;
@@ -1229,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1229 */ 1229 */
1230 m = choose_mirror(ms, bio->bi_iter.bi_sector); 1230 m = choose_mirror(ms, bio->bi_iter.bi_sector);
1231 if (unlikely(!m)) 1231 if (unlikely(!m))
1232 return -EIO; 1232 return DM_MAPIO_KILL;
1233 1233
1234 dm_bio_record(&bio_record->details, bio); 1234 dm_bio_record(&bio_record->details, bio);
1235 bio_record->m = m; 1235 bio_record->m = m;
@@ -1239,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1239 return DM_MAPIO_REMAPPED; 1239 return DM_MAPIO_REMAPPED;
1240} 1240}
1241 1241
1242static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) 1242static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1243 blk_status_t *error)
1243{ 1244{
1244 int rw = bio_data_dir(bio); 1245 int rw = bio_data_dir(bio);
1245 struct mirror_set *ms = (struct mirror_set *) ti->private; 1246 struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1255,16 +1256,16 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1255 if (!(bio->bi_opf & REQ_PREFLUSH) && 1256 if (!(bio->bi_opf & REQ_PREFLUSH) &&
1256 bio_op(bio) != REQ_OP_DISCARD) 1257 bio_op(bio) != REQ_OP_DISCARD)
1257 dm_rh_dec(ms->rh, bio_record->write_region); 1258 dm_rh_dec(ms->rh, bio_record->write_region);
1258 return error; 1259 return DM_ENDIO_DONE;
1259 } 1260 }
1260 1261
1261 if (error == -EOPNOTSUPP) 1262 if (*error == BLK_STS_NOTSUPP)
1262 goto out; 1263 goto out;
1263 1264
1264 if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) 1265 if (bio->bi_opf & REQ_RAHEAD)
1265 goto out; 1266 goto out;
1266 1267
1267 if (unlikely(error)) { 1268 if (unlikely(*error)) {
1268 if (!bio_record->details.bi_bdev) { 1269 if (!bio_record->details.bi_bdev) {
1269 /* 1270 /*
1270 * There wasn't enough memory to record necessary 1271 * There wasn't enough memory to record necessary
@@ -1272,7 +1273,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1272 * mirror in-sync. 1273 * mirror in-sync.
1273 */ 1274 */
1274 DMERR_LIMIT("Mirror read failed."); 1275 DMERR_LIMIT("Mirror read failed.");
1275 return -EIO; 1276 return DM_ENDIO_DONE;
1276 } 1277 }
1277 1278
1278 m = bio_record->m; 1279 m = bio_record->m;
@@ -1291,7 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1291 1292
1292 dm_bio_restore(bd, bio); 1293 dm_bio_restore(bd, bio);
1293 bio_record->details.bi_bdev = NULL; 1294 bio_record->details.bi_bdev = NULL;
1294 bio->bi_error = 0; 1295 bio->bi_status = 0;
1295 1296
1296 queue_bio(ms, bio, rw); 1297 queue_bio(ms, bio, rw);
1297 return DM_ENDIO_INCOMPLETE; 1298 return DM_ENDIO_INCOMPLETE;
@@ -1302,7 +1303,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1302out: 1303out:
1303 bio_record->details.bi_bdev = NULL; 1304 bio_record->details.bi_bdev = NULL;
1304 1305
1305 return error; 1306 return DM_ENDIO_DONE;
1306} 1307}
1307 1308
1308static void mirror_presuspend(struct dm_target *ti) 1309static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7246ee..c6ebc5b1e00e 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
71 71
72static void dm_mq_start_queue(struct request_queue *q) 72static void dm_mq_start_queue(struct request_queue *q)
73{ 73{
74 blk_mq_start_stopped_hw_queues(q, true); 74 blk_mq_unquiesce_queue(q);
75 blk_mq_kick_requeue_list(q); 75 blk_mq_kick_requeue_list(q);
76} 76}
77 77
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
119 struct dm_rq_target_io *tio = info->tio; 119 struct dm_rq_target_io *tio = info->tio;
120 struct bio *bio = info->orig; 120 struct bio *bio = info->orig;
121 unsigned int nr_bytes = info->orig->bi_iter.bi_size; 121 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
122 int error = clone->bi_error; 122 blk_status_t error = clone->bi_status;
123 123
124 bio_put(clone); 124 bio_put(clone);
125 125
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
158 * Do not use blk_end_request() here, because it may complete 158 * Do not use blk_end_request() here, because it may complete
159 * the original request before the clone, and break the ordering. 159 * the original request before the clone, and break the ordering.
160 */ 160 */
161 blk_update_request(tio->orig, 0, nr_bytes); 161 blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
162} 162}
163 163
164static struct dm_rq_target_io *tio_from_request(struct request *rq) 164static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
216 * Must be called without clone's queue lock held, 216 * Must be called without clone's queue lock held,
217 * see end_clone_request() for more details. 217 * see end_clone_request() for more details.
218 */ 218 */
219static void dm_end_request(struct request *clone, int error) 219static void dm_end_request(struct request *clone, blk_status_t error)
220{ 220{
221 int rw = rq_data_dir(clone); 221 int rw = rq_data_dir(clone);
222 struct dm_rq_target_io *tio = clone->end_io_data; 222 struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
285 rq_completed(md, rw, false); 285 rq_completed(md, rw, false);
286} 286}
287 287
288static void dm_done(struct request *clone, int error, bool mapped) 288static void dm_done(struct request *clone, blk_status_t error, bool mapped)
289{ 289{
290 int r = DM_ENDIO_DONE; 290 int r = DM_ENDIO_DONE;
291 struct dm_rq_target_io *tio = clone->end_io_data; 291 struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
298 r = rq_end_io(tio->ti, clone, error, &tio->info); 298 r = rq_end_io(tio->ti, clone, error, &tio->info);
299 } 299 }
300 300
301 if (unlikely(error == -EREMOTEIO)) { 301 if (unlikely(error == BLK_STS_TARGET)) {
302 if (req_op(clone) == REQ_OP_WRITE_SAME && 302 if (req_op(clone) == REQ_OP_WRITE_SAME &&
303 !clone->q->limits.max_write_same_sectors) 303 !clone->q->limits.max_write_same_sectors)
304 disable_write_same(tio->md); 304 disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
358 * Complete the clone and the original request with the error status 358 * Complete the clone and the original request with the error status
359 * through softirq context. 359 * through softirq context.
360 */ 360 */
361static void dm_complete_request(struct request *rq, int error) 361static void dm_complete_request(struct request *rq, blk_status_t error)
362{ 362{
363 struct dm_rq_target_io *tio = tio_from_request(rq); 363 struct dm_rq_target_io *tio = tio_from_request(rq);
364 364
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
375 * Target's rq_end_io() function isn't called. 375 * Target's rq_end_io() function isn't called.
376 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. 376 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
377 */ 377 */
378static void dm_kill_unmapped_request(struct request *rq, int error) 378static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
379{ 379{
380 rq->rq_flags |= RQF_FAILED; 380 rq->rq_flags |= RQF_FAILED;
381 dm_complete_request(rq, error); 381 dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
384/* 384/*
385 * Called with the clone's queue lock held (in the case of .request_fn) 385 * Called with the clone's queue lock held (in the case of .request_fn)
386 */ 386 */
387static void end_clone_request(struct request *clone, int error) 387static void end_clone_request(struct request *clone, blk_status_t error)
388{ 388{
389 struct dm_rq_target_io *tio = clone->end_io_data; 389 struct dm_rq_target_io *tio = clone->end_io_data;
390 390
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
401 401
402static void dm_dispatch_clone_request(struct request *clone, struct request *rq) 402static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
403{ 403{
404 int r; 404 blk_status_t r;
405 405
406 if (blk_queue_io_stat(clone->q)) 406 if (blk_queue_io_stat(clone->q))
407 clone->rq_flags |= RQF_IO_STAT; 407 clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
506 break; 506 break;
507 case DM_MAPIO_KILL: 507 case DM_MAPIO_KILL:
508 /* The target wants to complete the I/O */ 508 /* The target wants to complete the I/O */
509 dm_kill_unmapped_request(rq, -EIO); 509 dm_kill_unmapped_request(rq, BLK_STS_IOERR);
510 break; 510 break;
511 default: 511 default:
512 DMWARN("unimplemented target map return value: %d", r); 512 DMWARN("unimplemented target map return value: %d", r);
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
727 return __dm_rq_init_rq(set->driver_data, rq); 727 return __dm_rq_init_rq(set->driver_data, rq);
728} 728}
729 729
730static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 730static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
731 const struct blk_mq_queue_data *bd) 731 const struct blk_mq_queue_data *bd)
732{ 732{
733 struct request *rq = bd->rq; 733 struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
744 } 744 }
745 745
746 if (ti->type->busy && ti->type->busy(ti)) 746 if (ti->type->busy && ti->type->busy(ti))
747 return BLK_MQ_RQ_QUEUE_BUSY; 747 return BLK_STS_RESOURCE;
748 748
749 dm_start_request(md, rq); 749 dm_start_request(md, rq);
750 750
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
762 rq_end_stats(md, rq); 762 rq_end_stats(md, rq);
763 rq_completed(md, rq_data_dir(rq), false); 763 rq_completed(md, rq_data_dir(rq), false);
764 blk_mq_delay_run_hw_queue(hctx, 100/*ms*/); 764 blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
765 return BLK_MQ_RQ_QUEUE_BUSY; 765 return BLK_STS_RESOURCE;
766 } 766 }
767 767
768 return BLK_MQ_RQ_QUEUE_OK; 768 return BLK_STS_OK;
769} 769}
770 770
771static const struct blk_mq_ops dm_mq_ops = { 771static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
24 struct dm_target *ti; 24 struct dm_target *ti;
25 struct request *orig, *clone; 25 struct request *orig, *clone;
26 struct kthread_work work; 26 struct kthread_work work;
27 int error; 27 blk_status_t error;
28 union map_info info; 28 union map_info info;
29 struct dm_stats_aux stats_aux; 29 struct dm_stats_aux stats_aux;
30 unsigned long duration_jiffies; 30 unsigned long duration_jiffies;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d9817c81..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
1590{ 1590{
1591 void *callback_data = bio->bi_private; 1591 void *callback_data = bio->bi_private;
1592 1592
1593 dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0); 1593 dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
1594} 1594}
1595 1595
1596static void start_full_bio(struct dm_snap_pending_exception *pe, 1596static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1690 /* Full snapshots are not usable */ 1690 /* Full snapshots are not usable */
1691 /* To get here the table must be live so s->active is always set. */ 1691 /* To get here the table must be live so s->active is always set. */
1692 if (!s->valid) 1692 if (!s->valid)
1693 return -EIO; 1693 return DM_MAPIO_KILL;
1694 1694
1695 /* FIXME: should only take write lock if we need 1695 /* FIXME: should only take write lock if we need
1696 * to copy an exception */ 1696 * to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1698 1698
1699 if (!s->valid || (unlikely(s->snapshot_overflowed) && 1699 if (!s->valid || (unlikely(s->snapshot_overflowed) &&
1700 bio_data_dir(bio) == WRITE)) { 1700 bio_data_dir(bio) == WRITE)) {
1701 r = -EIO; 1701 r = DM_MAPIO_KILL;
1702 goto out_unlock; 1702 goto out_unlock;
1703 } 1703 }
1704 1704
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1723 1723
1724 if (!s->valid || s->snapshot_overflowed) { 1724 if (!s->valid || s->snapshot_overflowed) {
1725 free_pending_exception(pe); 1725 free_pending_exception(pe);
1726 r = -EIO; 1726 r = DM_MAPIO_KILL;
1727 goto out_unlock; 1727 goto out_unlock;
1728 } 1728 }
1729 1729
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1741 DMERR("Snapshot overflowed: Unable to allocate exception."); 1741 DMERR("Snapshot overflowed: Unable to allocate exception.");
1742 } else 1742 } else
1743 __invalidate_snapshot(s, -ENOMEM); 1743 __invalidate_snapshot(s, -ENOMEM);
1744 r = -EIO; 1744 r = DM_MAPIO_KILL;
1745 goto out_unlock; 1745 goto out_unlock;
1746 } 1746 }
1747 } 1747 }
@@ -1851,14 +1851,15 @@ out_unlock:
1851 return r; 1851 return r;
1852} 1852}
1853 1853
1854static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) 1854static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1855 blk_status_t *error)
1855{ 1856{
1856 struct dm_snapshot *s = ti->private; 1857 struct dm_snapshot *s = ti->private;
1857 1858
1858 if (is_bio_tracked(bio)) 1859 if (is_bio_tracked(bio))
1859 stop_tracking_chunk(s, bio); 1860 stop_tracking_chunk(s, bio);
1860 1861
1861 return 0; 1862 return DM_ENDIO_DONE;
1862} 1863}
1863 1864
1864static void snapshot_merge_presuspend(struct dm_target *ti) 1865static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 75152482f3ad..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
375 } 375 }
376} 376}
377 377
378static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) 378static int stripe_end_io(struct dm_target *ti, struct bio *bio,
379 blk_status_t *error)
379{ 380{
380 unsigned i; 381 unsigned i;
381 char major_minor[16]; 382 char major_minor[16];
382 struct stripe_c *sc = ti->private; 383 struct stripe_c *sc = ti->private;
383 384
384 if (!error) 385 if (!*error)
385 return 0; /* I/O complete */ 386 return DM_ENDIO_DONE; /* I/O complete */
386 387
387 if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) 388 if (bio->bi_opf & REQ_RAHEAD)
388 return error; 389 return DM_ENDIO_DONE;
389 390
390 if (error == -EOPNOTSUPP) 391 if (*error == BLK_STS_NOTSUPP)
391 return error; 392 return DM_ENDIO_DONE;
392 393
393 memset(major_minor, 0, sizeof(major_minor)); 394 memset(major_minor, 0, sizeof(major_minor));
394 sprintf(major_minor, "%d:%d", 395 sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
409 schedule_work(&sc->trigger_event); 410 schedule_work(&sc->trigger_event);
410 } 411 }
411 412
412 return error; 413 return DM_ENDIO_DONE;
413} 414}
414 415
415static int stripe_iterate_devices(struct dm_target *ti, 416static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b750542f..c0d7e60820c4 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
128 128
129static int io_err_map(struct dm_target *tt, struct bio *bio) 129static int io_err_map(struct dm_target *tt, struct bio *bio)
130{ 130{
131 return -EIO; 131 return DM_MAPIO_KILL;
132} 132}
133 133
134static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq, 134static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 28808e5ec0fd..9dec2f8cc739 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
383 * Even if r is set, there could be sub discards in flight that we 383 * Even if r is set, there could be sub discards in flight that we
384 * need to wait for. 384 * need to wait for.
385 */ 385 */
386 if (r && !op->parent_bio->bi_error) 386 if (r && !op->parent_bio->bi_status)
387 op->parent_bio->bi_error = r; 387 op->parent_bio->bi_status = errno_to_blk_status(r);
388 bio_endio(op->parent_bio); 388 bio_endio(op->parent_bio);
389} 389}
390 390
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
450} 450}
451 451
452static void cell_error_with_code(struct pool *pool, 452static void cell_error_with_code(struct pool *pool,
453 struct dm_bio_prison_cell *cell, int error_code) 453 struct dm_bio_prison_cell *cell, blk_status_t error_code)
454{ 454{
455 dm_cell_error(pool->prison, cell, error_code); 455 dm_cell_error(pool->prison, cell, error_code);
456 dm_bio_prison_free_cell(pool->prison, cell); 456 dm_bio_prison_free_cell(pool->prison, cell);
457} 457}
458 458
459static int get_pool_io_error_code(struct pool *pool) 459static blk_status_t get_pool_io_error_code(struct pool *pool)
460{ 460{
461 return pool->out_of_data_space ? -ENOSPC : -EIO; 461 return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
462} 462}
463 463
464static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell) 464static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
465{ 465{
466 int error = get_pool_io_error_code(pool); 466 cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
467
468 cell_error_with_code(pool, cell, error);
469} 467}
470 468
471static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell) 469static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
475 473
476static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell) 474static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
477{ 475{
478 cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE); 476 cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
479} 477}
480 478
481/*----------------------------------------------------------------*/ 479/*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
555 bio_list_init(master); 553 bio_list_init(master);
556} 554}
557 555
558static void error_bio_list(struct bio_list *bios, int error) 556static void error_bio_list(struct bio_list *bios, blk_status_t error)
559{ 557{
560 struct bio *bio; 558 struct bio *bio;
561 559
562 while ((bio = bio_list_pop(bios))) { 560 while ((bio = bio_list_pop(bios))) {
563 bio->bi_error = error; 561 bio->bi_status = error;
564 bio_endio(bio); 562 bio_endio(bio);
565 } 563 }
566} 564}
567 565
568static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error) 566static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
567 blk_status_t error)
569{ 568{
570 struct bio_list bios; 569 struct bio_list bios;
571 unsigned long flags; 570 unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
608 __merge_bio_list(&bios, &tc->retry_on_resume_list); 607 __merge_bio_list(&bios, &tc->retry_on_resume_list);
609 spin_unlock_irqrestore(&tc->lock, flags); 608 spin_unlock_irqrestore(&tc->lock, flags);
610 609
611 error_bio_list(&bios, DM_ENDIO_REQUEUE); 610 error_bio_list(&bios, BLK_STS_DM_REQUEUE);
612 requeue_deferred_cells(tc); 611 requeue_deferred_cells(tc);
613} 612}
614 613
615static void error_retry_list_with_code(struct pool *pool, int error) 614static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
616{ 615{
617 struct thin_c *tc; 616 struct thin_c *tc;
618 617
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
624 623
625static void error_retry_list(struct pool *pool) 624static void error_retry_list(struct pool *pool)
626{ 625{
627 int error = get_pool_io_error_code(pool); 626 error_retry_list_with_code(pool, get_pool_io_error_code(pool));
628
629 error_retry_list_with_code(pool, error);
630} 627}
631 628
632/* 629/*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
774 */ 771 */
775 atomic_t prepare_actions; 772 atomic_t prepare_actions;
776 773
777 int err; 774 blk_status_t status;
778 struct thin_c *tc; 775 struct thin_c *tc;
779 dm_block_t virt_begin, virt_end; 776 dm_block_t virt_begin, virt_end;
780 dm_block_t data_block; 777 dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
814{ 811{
815 struct dm_thin_new_mapping *m = context; 812 struct dm_thin_new_mapping *m = context;
816 813
817 m->err = read_err || write_err ? -EIO : 0; 814 m->status = read_err || write_err ? BLK_STS_IOERR : 0;
818 complete_mapping_preparation(m); 815 complete_mapping_preparation(m);
819} 816}
820 817
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
825 822
826 bio->bi_end_io = m->saved_bi_end_io; 823 bio->bi_end_io = m->saved_bi_end_io;
827 824
828 m->err = bio->bi_error; 825 m->status = bio->bi_status;
829 complete_mapping_preparation(m); 826 complete_mapping_preparation(m);
830} 827}
831 828
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
925 struct bio *bio = m->bio; 922 struct bio *bio = m->bio;
926 int r; 923 int r;
927 924
928 if (m->err) { 925 if (m->status) {
929 cell_error(pool, m->cell); 926 cell_error(pool, m->cell);
930 goto out; 927 goto out;
931 } 928 }
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
1495 spin_unlock_irqrestore(&tc->lock, flags); 1492 spin_unlock_irqrestore(&tc->lock, flags);
1496} 1493}
1497 1494
1498static int should_error_unserviceable_bio(struct pool *pool) 1495static blk_status_t should_error_unserviceable_bio(struct pool *pool)
1499{ 1496{
1500 enum pool_mode m = get_pool_mode(pool); 1497 enum pool_mode m = get_pool_mode(pool);
1501 1498
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
1503 case PM_WRITE: 1500 case PM_WRITE:
1504 /* Shouldn't get here */ 1501 /* Shouldn't get here */
1505 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); 1502 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
1506 return -EIO; 1503 return BLK_STS_IOERR;
1507 1504
1508 case PM_OUT_OF_DATA_SPACE: 1505 case PM_OUT_OF_DATA_SPACE:
1509 return pool->pf.error_if_no_space ? -ENOSPC : 0; 1506 return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
1510 1507
1511 case PM_READ_ONLY: 1508 case PM_READ_ONLY:
1512 case PM_FAIL: 1509 case PM_FAIL:
1513 return -EIO; 1510 return BLK_STS_IOERR;
1514 default: 1511 default:
1515 /* Shouldn't get here */ 1512 /* Shouldn't get here */
1516 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); 1513 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
1517 return -EIO; 1514 return BLK_STS_IOERR;
1518 } 1515 }
1519} 1516}
1520 1517
1521static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) 1518static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
1522{ 1519{
1523 int error = should_error_unserviceable_bio(pool); 1520 blk_status_t error = should_error_unserviceable_bio(pool);
1524 1521
1525 if (error) { 1522 if (error) {
1526 bio->bi_error = error; 1523 bio->bi_status = error;
1527 bio_endio(bio); 1524 bio_endio(bio);
1528 } else 1525 } else
1529 retry_on_resume(bio); 1526 retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
1533{ 1530{
1534 struct bio *bio; 1531 struct bio *bio;
1535 struct bio_list bios; 1532 struct bio_list bios;
1536 int error; 1533 blk_status_t error;
1537 1534
1538 error = should_error_unserviceable_bio(pool); 1535 error = should_error_unserviceable_bio(pool);
1539 if (error) { 1536 if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
2071 unsigned count = 0; 2068 unsigned count = 0;
2072 2069
2073 if (tc->requeue_mode) { 2070 if (tc->requeue_mode) {
2074 error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE); 2071 error_thin_bio_list(tc, &tc->deferred_bio_list,
2072 BLK_STS_DM_REQUEUE);
2075 return; 2073 return;
2076 } 2074 }
2077 2075
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
2322 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { 2320 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
2323 pool->pf.error_if_no_space = true; 2321 pool->pf.error_if_no_space = true;
2324 notify_of_pool_mode_change_to_oods(pool); 2322 notify_of_pool_mode_change_to_oods(pool);
2325 error_retry_list_with_code(pool, -ENOSPC); 2323 error_retry_list_with_code(pool, BLK_STS_NOSPC);
2326 } 2324 }
2327} 2325}
2328 2326
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
2624 thin_hook_bio(tc, bio); 2622 thin_hook_bio(tc, bio);
2625 2623
2626 if (tc->requeue_mode) { 2624 if (tc->requeue_mode) {
2627 bio->bi_error = DM_ENDIO_REQUEUE; 2625 bio->bi_status = BLK_STS_DM_REQUEUE;
2628 bio_endio(bio); 2626 bio_endio(bio);
2629 return DM_MAPIO_SUBMITTED; 2627 return DM_MAPIO_SUBMITTED;
2630 } 2628 }
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
4177 return thin_bio_map(ti, bio); 4175 return thin_bio_map(ti, bio);
4178} 4176}
4179 4177
4180static int thin_endio(struct dm_target *ti, struct bio *bio, int err) 4178static int thin_endio(struct dm_target *ti, struct bio *bio,
4179 blk_status_t *err)
4181{ 4180{
4182 unsigned long flags; 4181 unsigned long flags;
4183 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); 4182 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
4212 if (h->cell) 4211 if (h->cell)
4213 cell_defer_no_holder(h->tc, h->cell); 4212 cell_defer_no_holder(h->tc, h->cell);
4214 4213
4215 return 0; 4214 return DM_ENDIO_DONE;
4216} 4215}
4217 4216
4218static void thin_presuspend(struct dm_target *ti) 4217static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 1ec9b2c51c07..b46705ebf01f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
538/* 538/*
539 * End one "io" structure with a given error. 539 * End one "io" structure with a given error.
540 */ 540 */
541static void verity_finish_io(struct dm_verity_io *io, int error) 541static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
542{ 542{
543 struct dm_verity *v = io->v; 543 struct dm_verity *v = io->v;
544 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); 544 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
545 545
546 bio->bi_end_io = io->orig_bi_end_io; 546 bio->bi_end_io = io->orig_bi_end_io;
547 bio->bi_error = error; 547 bio->bi_status = status;
548 548
549 verity_fec_finish_io(io); 549 verity_fec_finish_io(io);
550 550
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
555{ 555{
556 struct dm_verity_io *io = container_of(w, struct dm_verity_io, work); 556 struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
557 557
558 verity_finish_io(io, verity_verify_io(io)); 558 verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
559} 559}
560 560
561static void verity_end_io(struct bio *bio) 561static void verity_end_io(struct bio *bio)
562{ 562{
563 struct dm_verity_io *io = bio->bi_private; 563 struct dm_verity_io *io = bio->bi_private;
564 564
565 if (bio->bi_error && !verity_fec_is_enabled(io->v)) { 565 if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
566 verity_finish_io(io, bio->bi_error); 566 verity_finish_io(io, bio->bi_status);
567 return; 567 return;
568 } 568 }
569 569
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
643 if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & 643 if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
644 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) { 644 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
645 DMERR_LIMIT("unaligned io"); 645 DMERR_LIMIT("unaligned io");
646 return -EIO; 646 return DM_MAPIO_KILL;
647 } 647 }
648 648
649 if (bio_end_sector(bio) >> 649 if (bio_end_sector(bio) >>
650 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { 650 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
651 DMERR_LIMIT("io out of range"); 651 DMERR_LIMIT("io out of range");
652 return -EIO; 652 return DM_MAPIO_KILL;
653 } 653 }
654 654
655 if (bio_data_dir(bio) == WRITE) 655 if (bio_data_dir(bio) == WRITE)
656 return -EIO; 656 return DM_MAPIO_KILL;
657 657
658 io = dm_per_bio_data(bio, ti->per_io_data_size); 658 io = dm_per_bio_data(bio, ti->per_io_data_size);
659 io->v = v; 659 io->v = v;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11d8473..b65ca8dcfbdc 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
39 case REQ_OP_READ: 39 case REQ_OP_READ:
40 if (bio->bi_opf & REQ_RAHEAD) { 40 if (bio->bi_opf & REQ_RAHEAD) {
41 /* readahead of null bytes only wastes buffer cache */ 41 /* readahead of null bytes only wastes buffer cache */
42 return -EIO; 42 return DM_MAPIO_KILL;
43 } 43 }
44 zero_fill_bio(bio); 44 zero_fill_bio(bio);
45 break; 45 break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
47 /* writes get silently dropped */ 47 /* writes get silently dropped */
48 break; 48 break;
49 default: 49 default:
50 return -EIO; 50 return DM_MAPIO_KILL;
51 } 51 }
52 52
53 bio_endio(bio); 53 bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 37ccd73c79ec..402946035308 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
63 */ 63 */
64struct dm_io { 64struct dm_io {
65 struct mapped_device *md; 65 struct mapped_device *md;
66 int error; 66 blk_status_t status;
67 atomic_t io_count; 67 atomic_t io_count;
68 struct bio *bio; 68 struct bio *bio;
69 unsigned long start_time; 69 unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
768 * Decrements the number of outstanding ios that a bio has been 768 * Decrements the number of outstanding ios that a bio has been
769 * cloned into, completing the original io if necc. 769 * cloned into, completing the original io if necc.
770 */ 770 */
771static void dec_pending(struct dm_io *io, int error) 771static void dec_pending(struct dm_io *io, blk_status_t error)
772{ 772{
773 unsigned long flags; 773 unsigned long flags;
774 int io_error; 774 blk_status_t io_error;
775 struct bio *bio; 775 struct bio *bio;
776 struct mapped_device *md = io->md; 776 struct mapped_device *md = io->md;
777 777
778 /* Push-back supersedes any I/O errors */ 778 /* Push-back supersedes any I/O errors */
779 if (unlikely(error)) { 779 if (unlikely(error)) {
780 spin_lock_irqsave(&io->endio_lock, flags); 780 spin_lock_irqsave(&io->endio_lock, flags);
781 if (!(io->error > 0 && __noflush_suspending(md))) 781 if (!(io->status == BLK_STS_DM_REQUEUE &&
782 io->error = error; 782 __noflush_suspending(md)))
783 io->status = error;
783 spin_unlock_irqrestore(&io->endio_lock, flags); 784 spin_unlock_irqrestore(&io->endio_lock, flags);
784 } 785 }
785 786
786 if (atomic_dec_and_test(&io->io_count)) { 787 if (atomic_dec_and_test(&io->io_count)) {
787 if (io->error == DM_ENDIO_REQUEUE) { 788 if (io->status == BLK_STS_DM_REQUEUE) {
788 /* 789 /*
789 * Target requested pushing back the I/O. 790 * Target requested pushing back the I/O.
790 */ 791 */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
793 bio_list_add_head(&md->deferred, io->bio); 794 bio_list_add_head(&md->deferred, io->bio);
794 else 795 else
795 /* noflush suspend was interrupted. */ 796 /* noflush suspend was interrupted. */
796 io->error = -EIO; 797 io->status = BLK_STS_IOERR;
797 spin_unlock_irqrestore(&md->deferred_lock, flags); 798 spin_unlock_irqrestore(&md->deferred_lock, flags);
798 } 799 }
799 800
800 io_error = io->error; 801 io_error = io->status;
801 bio = io->bio; 802 bio = io->bio;
802 end_io_acct(io); 803 end_io_acct(io);
803 free_io(md, io); 804 free_io(md, io);
804 805
805 if (io_error == DM_ENDIO_REQUEUE) 806 if (io_error == BLK_STS_DM_REQUEUE)
806 return; 807 return;
807 808
808 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { 809 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
814 queue_io(md, bio); 815 queue_io(md, bio);
815 } else { 816 } else {
816 /* done with normal IO or empty flush */ 817 /* done with normal IO or empty flush */
817 bio->bi_error = io_error; 818 bio->bi_status = io_error;
818 bio_endio(bio); 819 bio_endio(bio);
819 } 820 }
820 } 821 }
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
838 839
839static void clone_endio(struct bio *bio) 840static void clone_endio(struct bio *bio)
840{ 841{
841 int error = bio->bi_error; 842 blk_status_t error = bio->bi_status;
842 int r = error;
843 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 843 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
844 struct dm_io *io = tio->io; 844 struct dm_io *io = tio->io;
845 struct mapped_device *md = tio->io->md; 845 struct mapped_device *md = tio->io->md;
846 dm_endio_fn endio = tio->ti->type->end_io; 846 dm_endio_fn endio = tio->ti->type->end_io;
847 847
848 if (endio) { 848 if (unlikely(error == BLK_STS_TARGET)) {
849 r = endio(tio->ti, bio, error);
850 if (r < 0 || r == DM_ENDIO_REQUEUE)
851 /*
852 * error and requeue request are handled
853 * in dec_pending().
854 */
855 error = r;
856 else if (r == DM_ENDIO_INCOMPLETE)
857 /* The target will handle the io */
858 return;
859 else if (r) {
860 DMWARN("unimplemented target endio return value: %d", r);
861 BUG();
862 }
863 }
864
865 if (unlikely(r == -EREMOTEIO)) {
866 if (bio_op(bio) == REQ_OP_WRITE_SAME && 849 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
867 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) 850 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
868 disable_write_same(md); 851 disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
871 disable_write_zeroes(md); 854 disable_write_zeroes(md);
872 } 855 }
873 856
857 if (endio) {
858 int r = endio(tio->ti, bio, &error);
859 switch (r) {
860 case DM_ENDIO_REQUEUE:
861 error = BLK_STS_DM_REQUEUE;
862 /*FALLTHRU*/
863 case DM_ENDIO_DONE:
864 break;
865 case DM_ENDIO_INCOMPLETE:
866 /* The target will handle the io */
867 return;
868 default:
869 DMWARN("unimplemented target endio return value: %d", r);
870 BUG();
871 }
872 }
873
874 free_tio(tio); 874 free_tio(tio);
875 dec_pending(io, error); 875 dec_pending(io, error);
876} 876}
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
1036 1036
1037 while ((bio = bio_list_pop(&list))) { 1037 while ((bio = bio_list_pop(&list))) {
1038 struct bio_set *bs = bio->bi_pool; 1038 struct bio_set *bs = bio->bi_pool;
1039 if (unlikely(!bs) || bs == fs_bio_set) { 1039 if (unlikely(!bs) || bs == fs_bio_set ||
1040 !bs->rescue_workqueue) {
1040 bio_list_add(&current->bio_list[i], bio); 1041 bio_list_add(&current->bio_list[i], bio);
1041 continue; 1042 continue;
1042 } 1043 }
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
1084 r = ti->type->map(ti, clone); 1085 r = ti->type->map(ti, clone);
1085 dm_offload_end(&o); 1086 dm_offload_end(&o);
1086 1087
1087 if (r == DM_MAPIO_REMAPPED) { 1088 switch (r) {
1089 case DM_MAPIO_SUBMITTED:
1090 break;
1091 case DM_MAPIO_REMAPPED:
1088 /* the bio has been remapped so dispatch it */ 1092 /* the bio has been remapped so dispatch it */
1089
1090 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, 1093 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
1091 tio->io->bio->bi_bdev->bd_dev, sector); 1094 tio->io->bio->bi_bdev->bd_dev, sector);
1092
1093 generic_make_request(clone); 1095 generic_make_request(clone);
1094 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 1096 break;
1095 /* error the io and bail out, or requeue it if needed */ 1097 case DM_MAPIO_KILL:
1096 dec_pending(tio->io, r); 1098 dec_pending(tio->io, BLK_STS_IOERR);
1099 free_tio(tio);
1100 break;
1101 case DM_MAPIO_REQUEUE:
1102 dec_pending(tio->io, BLK_STS_DM_REQUEUE);
1097 free_tio(tio); 1103 free_tio(tio);
1098 } else if (r != DM_MAPIO_SUBMITTED) { 1104 break;
1105 default:
1099 DMWARN("unimplemented target map return value: %d", r); 1106 DMWARN("unimplemented target map return value: %d", r);
1100 BUG(); 1107 BUG();
1101 } 1108 }
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
1360 ci.map = map; 1367 ci.map = map;
1361 ci.md = md; 1368 ci.md = md;
1362 ci.io = alloc_io(md); 1369 ci.io = alloc_io(md);
1363 ci.io->error = 0; 1370 ci.io->status = 0;
1364 atomic_set(&ci.io->io_count, 1); 1371 atomic_set(&ci.io->io_count, 1);
1365 ci.io->bio = bio; 1372 ci.io->bio = bio;
1366 ci.io->md = md; 1373 ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
1527 * Initialize aspects of queue that aren't relevant for blk-mq 1534 * Initialize aspects of queue that aren't relevant for blk-mq
1528 */ 1535 */
1529 md->queue->backing_dev_info->congested_fn = dm_any_congested; 1536 md->queue->backing_dev_info->congested_fn = dm_any_congested;
1530 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1531} 1537}
1532 1538
1533static void cleanup_mapped_device(struct mapped_device *md) 1539static void cleanup_mapped_device(struct mapped_device *md)
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
2654 BUG(); 2660 BUG();
2655 } 2661 }
2656 2662
2657 pools->bs = bioset_create_nobvec(pool_size, front_pad); 2663 pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
2658 if (!pools->bs) 2664 if (!pools->bs)
2659 goto out; 2665 goto out;
2660 2666
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 84e76ebac4d4..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
185static bool create_on_open = true; 185static bool create_on_open = true;
186 186
187/* bio_clone_mddev 187/* bio_clone_mddev
188 * like bio_clone, but with a local bio set 188 * like bio_clone_bioset, but with a local bio set
189 */ 189 */
190 190
191struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, 191struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
265 unsigned int sectors; 265 unsigned int sectors;
266 int cpu; 266 int cpu;
267 267
268 blk_queue_split(q, &bio, q->bio_split); 268 blk_queue_split(q, &bio);
269 269
270 if (mddev == NULL || mddev->pers == NULL) { 270 if (mddev == NULL || mddev->pers == NULL) {
271 bio_io_error(bio); 271 bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
273 } 273 }
274 if (mddev->ro == 1 && unlikely(rw == WRITE)) { 274 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
275 if (bio_sectors(bio) != 0) 275 if (bio_sectors(bio) != 0)
276 bio->bi_error = -EROFS; 276 bio->bi_status = BLK_STS_IOERR;
277 bio_endio(bio); 277 bio_endio(bio);
278 return BLK_QC_T_NONE; 278 return BLK_QC_T_NONE;
279 } 279 }
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
719 struct md_rdev *rdev = bio->bi_private; 719 struct md_rdev *rdev = bio->bi_private;
720 struct mddev *mddev = rdev->mddev; 720 struct mddev *mddev = rdev->mddev;
721 721
722 if (bio->bi_error) { 722 if (bio->bi_status) {
723 pr_err("md: super_written gets error=%d\n", bio->bi_error); 723 pr_err("md: super_written gets error=%d\n", bio->bi_status);
724 md_error(mddev, rdev); 724 md_error(mddev, rdev);
725 if (!test_bit(Faulty, &rdev->flags) 725 if (!test_bit(Faulty, &rdev->flags)
726 && (bio->bi_opf & MD_FAILFAST)) { 726 && (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
801 801
802 submit_bio_wait(bio); 802 submit_bio_wait(bio);
803 803
804 ret = !bio->bi_error; 804 ret = !bio->bi_status;
805 bio_put(bio); 805 bio_put(bio);
806 return ret; 806 return ret;
807} 807}
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
5428 } 5428 }
5429 5429
5430 if (mddev->bio_set == NULL) { 5430 if (mddev->bio_set == NULL) {
5431 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0); 5431 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5432 if (!mddev->bio_set) 5432 if (!mddev->bio_set)
5433 return -ENOMEM; 5433 return -ENOMEM;
5434 } 5434 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
73 * operation and are ready to return a success/failure code to the buffer 73 * operation and are ready to return a success/failure code to the buffer
74 * cache layer. 74 * cache layer.
75 */ 75 */
76static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) 76static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
77{ 77{
78 struct bio *bio = mp_bh->master_bio; 78 struct bio *bio = mp_bh->master_bio;
79 struct mpconf *conf = mp_bh->mddev->private; 79 struct mpconf *conf = mp_bh->mddev->private;
80 80
81 bio->bi_error = err; 81 bio->bi_status = status;
82 bio_endio(bio); 82 bio_endio(bio);
83 mempool_free(mp_bh, conf->pool); 83 mempool_free(mp_bh, conf->pool);
84} 84}
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
89 struct mpconf *conf = mp_bh->mddev->private; 89 struct mpconf *conf = mp_bh->mddev->private;
90 struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; 90 struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
91 91
92 if (!bio->bi_error) 92 if (!bio->bi_status)
93 multipath_end_bh_io(mp_bh, 0); 93 multipath_end_bh_io(mp_bh, 0);
94 else if (!(bio->bi_opf & REQ_RAHEAD)) { 94 else if (!(bio->bi_opf & REQ_RAHEAD)) {
95 /* 95 /*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
102 (unsigned long long)bio->bi_iter.bi_sector); 102 (unsigned long long)bio->bi_iter.bi_sector);
103 multipath_reschedule_retry(mp_bh); 103 multipath_reschedule_retry(mp_bh);
104 } else 104 } else
105 multipath_end_bh_io(mp_bh, bio->bi_error); 105 multipath_end_bh_io(mp_bh, bio->bi_status);
106 rdev_dec_pending(rdev, conf->mddev); 106 rdev_dec_pending(rdev, conf->mddev);
107} 107}
108 108
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
347 pr_err("multipath: %s: unrecoverable IO read error for block %llu\n", 347 pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
348 bdevname(bio->bi_bdev,b), 348 bdevname(bio->bi_bdev,b),
349 (unsigned long long)bio->bi_iter.bi_sector); 349 (unsigned long long)bio->bi_iter.bi_sector);
350 multipath_end_bh_io(mp_bh, -EIO); 350 multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
351 } else { 351 } else {
352 pr_err("multipath: %s: redirecting sector %llu to another IO path\n", 352 pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
353 bdevname(bio->bi_bdev,b), 353 bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e1a7e3d4c5e4..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
277 struct r1conf *conf = r1_bio->mddev->private; 277 struct r1conf *conf = r1_bio->mddev->private;
278 278
279 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) 279 if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
280 bio->bi_error = -EIO; 280 bio->bi_status = BLK_STS_IOERR;
281 281
282 bio_endio(bio); 282 bio_endio(bio);
283 /* 283 /*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
335 335
336static void raid1_end_read_request(struct bio *bio) 336static void raid1_end_read_request(struct bio *bio)
337{ 337{
338 int uptodate = !bio->bi_error; 338 int uptodate = !bio->bi_status;
339 struct r1bio *r1_bio = bio->bi_private; 339 struct r1bio *r1_bio = bio->bi_private;
340 struct r1conf *conf = r1_bio->mddev->private; 340 struct r1conf *conf = r1_bio->mddev->private;
341 struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev; 341 struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
426 struct md_rdev *rdev = conf->mirrors[mirror].rdev; 426 struct md_rdev *rdev = conf->mirrors[mirror].rdev;
427 bool discard_error; 427 bool discard_error;
428 428
429 discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD; 429 discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
430 430
431 /* 431 /*
432 * 'one mirror IO has finished' event handler: 432 * 'one mirror IO has finished' event handler:
433 */ 433 */
434 if (bio->bi_error && !discard_error) { 434 if (bio->bi_status && !discard_error) {
435 set_bit(WriteErrorSeen, &rdev->flags); 435 set_bit(WriteErrorSeen, &rdev->flags);
436 if (!test_and_set_bit(WantReplacement, &rdev->flags)) 436 if (!test_and_set_bit(WantReplacement, &rdev->flags))
437 set_bit(MD_RECOVERY_NEEDED, & 437 set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
802 bio->bi_next = NULL; 802 bio->bi_next = NULL;
803 bio->bi_bdev = rdev->bdev; 803 bio->bi_bdev = rdev->bdev;
804 if (test_bit(Faulty, &rdev->flags)) { 804 if (test_bit(Faulty, &rdev->flags)) {
805 bio->bi_error = -EIO; 805 bio->bi_status = BLK_STS_IOERR;
806 bio_endio(bio); 806 bio_endio(bio);
807 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 807 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
808 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 808 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
1856 * or re-read if the read failed. 1856 * or re-read if the read failed.
1857 * We don't do much here, just schedule handling by raid1d 1857 * We don't do much here, just schedule handling by raid1d
1858 */ 1858 */
1859 if (!bio->bi_error) 1859 if (!bio->bi_status)
1860 set_bit(R1BIO_Uptodate, &r1_bio->state); 1860 set_bit(R1BIO_Uptodate, &r1_bio->state);
1861 1861
1862 if (atomic_dec_and_test(&r1_bio->remaining)) 1862 if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
1865 1865
1866static void end_sync_write(struct bio *bio) 1866static void end_sync_write(struct bio *bio)
1867{ 1867{
1868 int uptodate = !bio->bi_error; 1868 int uptodate = !bio->bi_status;
1869 struct r1bio *r1_bio = get_resync_r1bio(bio); 1869 struct r1bio *r1_bio = get_resync_r1bio(bio);
1870 struct mddev *mddev = r1_bio->mddev; 1870 struct mddev *mddev = r1_bio->mddev;
1871 struct r1conf *conf = mddev->private; 1871 struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
2058 idx ++; 2058 idx ++;
2059 } 2059 }
2060 set_bit(R1BIO_Uptodate, &r1_bio->state); 2060 set_bit(R1BIO_Uptodate, &r1_bio->state);
2061 bio->bi_error = 0; 2061 bio->bi_status = 0;
2062 return 1; 2062 return 1;
2063} 2063}
2064 2064
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
2082 for (i = 0; i < conf->raid_disks * 2; i++) { 2082 for (i = 0; i < conf->raid_disks * 2; i++) {
2083 int j; 2083 int j;
2084 int size; 2084 int size;
2085 int error; 2085 blk_status_t status;
2086 struct bio_vec *bi; 2086 struct bio_vec *bi;
2087 struct bio *b = r1_bio->bios[i]; 2087 struct bio *b = r1_bio->bios[i];
2088 struct resync_pages *rp = get_resync_pages(b); 2088 struct resync_pages *rp = get_resync_pages(b);
2089 if (b->bi_end_io != end_sync_read) 2089 if (b->bi_end_io != end_sync_read)
2090 continue; 2090 continue;
2091 /* fixup the bio for reuse, but preserve errno */ 2091 /* fixup the bio for reuse, but preserve errno */
2092 error = b->bi_error; 2092 status = b->bi_status;
2093 bio_reset(b); 2093 bio_reset(b);
2094 b->bi_error = error; 2094 b->bi_status = status;
2095 b->bi_vcnt = vcnt; 2095 b->bi_vcnt = vcnt;
2096 b->bi_iter.bi_size = r1_bio->sectors << 9; 2096 b->bi_iter.bi_size = r1_bio->sectors << 9;
2097 b->bi_iter.bi_sector = r1_bio->sector + 2097 b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
2113 } 2113 }
2114 for (primary = 0; primary < conf->raid_disks * 2; primary++) 2114 for (primary = 0; primary < conf->raid_disks * 2; primary++)
2115 if (r1_bio->bios[primary]->bi_end_io == end_sync_read && 2115 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
2116 !r1_bio->bios[primary]->bi_error) { 2116 !r1_bio->bios[primary]->bi_status) {
2117 r1_bio->bios[primary]->bi_end_io = NULL; 2117 r1_bio->bios[primary]->bi_end_io = NULL;
2118 rdev_dec_pending(conf->mirrors[primary].rdev, mddev); 2118 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
2119 break; 2119 break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
2123 int j; 2123 int j;
2124 struct bio *pbio = r1_bio->bios[primary]; 2124 struct bio *pbio = r1_bio->bios[primary];
2125 struct bio *sbio = r1_bio->bios[i]; 2125 struct bio *sbio = r1_bio->bios[i];
2126 int error = sbio->bi_error; 2126 blk_status_t status = sbio->bi_status;
2127 struct page **ppages = get_resync_pages(pbio)->pages; 2127 struct page **ppages = get_resync_pages(pbio)->pages;
2128 struct page **spages = get_resync_pages(sbio)->pages; 2128 struct page **spages = get_resync_pages(sbio)->pages;
2129 struct bio_vec *bi; 2129 struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
2132 if (sbio->bi_end_io != end_sync_read) 2132 if (sbio->bi_end_io != end_sync_read)
2133 continue; 2133 continue;
2134 /* Now we can 'fixup' the error value */ 2134 /* Now we can 'fixup' the error value */
2135 sbio->bi_error = 0; 2135 sbio->bi_status = 0;
2136 2136
2137 bio_for_each_segment_all(bi, sbio, j) 2137 bio_for_each_segment_all(bi, sbio, j)
2138 page_len[j] = bi->bv_len; 2138 page_len[j] = bi->bv_len;
2139 2139
2140 if (!error) { 2140 if (!status) {
2141 for (j = vcnt; j-- ; ) { 2141 for (j = vcnt; j-- ; ) {
2142 if (memcmp(page_address(ppages[j]), 2142 if (memcmp(page_address(ppages[j]),
2143 page_address(spages[j]), 2143 page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
2149 if (j >= 0) 2149 if (j >= 0)
2150 atomic64_add(r1_bio->sectors, &mddev->resync_mismatches); 2150 atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
2151 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) 2151 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
2152 && !error)) { 2152 && !status)) {
2153 /* No need to write to this device. */ 2153 /* No need to write to this device. */
2154 sbio->bi_end_io = NULL; 2154 sbio->bi_end_io = NULL;
2155 rdev_dec_pending(conf->mirrors[i].rdev, mddev); 2155 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
2400 struct bio *bio = r1_bio->bios[m]; 2400 struct bio *bio = r1_bio->bios[m];
2401 if (bio->bi_end_io == NULL) 2401 if (bio->bi_end_io == NULL)
2402 continue; 2402 continue;
2403 if (!bio->bi_error && 2403 if (!bio->bi_status &&
2404 test_bit(R1BIO_MadeGood, &r1_bio->state)) { 2404 test_bit(R1BIO_MadeGood, &r1_bio->state)) {
2405 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); 2405 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
2406 } 2406 }
2407 if (bio->bi_error && 2407 if (bio->bi_status &&
2408 test_bit(R1BIO_WriteError, &r1_bio->state)) { 2408 test_bit(R1BIO_WriteError, &r1_bio->state)) {
2409 if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) 2409 if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
2410 md_error(conf->mddev, rdev); 2410 md_error(conf->mddev, rdev);
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2955 if (!conf->r1bio_pool) 2955 if (!conf->r1bio_pool)
2956 goto abort; 2956 goto abort;
2957 2957
2958 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); 2958 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
2959 if (!conf->bio_split) 2959 if (!conf->bio_split)
2960 goto abort; 2960 goto abort;
2961 2961
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 797ed60abd5e..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
336 struct r10conf *conf = r10_bio->mddev->private; 336 struct r10conf *conf = r10_bio->mddev->private;
337 337
338 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) 338 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
339 bio->bi_error = -EIO; 339 bio->bi_status = BLK_STS_IOERR;
340 340
341 bio_endio(bio); 341 bio_endio(bio);
342 /* 342 /*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
389 389
390static void raid10_end_read_request(struct bio *bio) 390static void raid10_end_read_request(struct bio *bio)
391{ 391{
392 int uptodate = !bio->bi_error; 392 int uptodate = !bio->bi_status;
393 struct r10bio *r10_bio = bio->bi_private; 393 struct r10bio *r10_bio = bio->bi_private;
394 int slot, dev; 394 int slot, dev;
395 struct md_rdev *rdev; 395 struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
477 struct bio *to_put = NULL; 477 struct bio *to_put = NULL;
478 bool discard_error; 478 bool discard_error;
479 479
480 discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD; 480 discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
481 481
482 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 482 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
483 483
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
491 /* 491 /*
492 * this branch is our 'one mirror IO has finished' event handler: 492 * this branch is our 'one mirror IO has finished' event handler:
493 */ 493 */
494 if (bio->bi_error && !discard_error) { 494 if (bio->bi_status && !discard_error) {
495 if (repl) 495 if (repl)
496 /* Never record new bad blocks to replacement, 496 /* Never record new bad blocks to replacement,
497 * just fail it. 497 * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
913 bio->bi_next = NULL; 913 bio->bi_next = NULL;
914 bio->bi_bdev = rdev->bdev; 914 bio->bi_bdev = rdev->bdev;
915 if (test_bit(Faulty, &rdev->flags)) { 915 if (test_bit(Faulty, &rdev->flags)) {
916 bio->bi_error = -EIO; 916 bio->bi_status = BLK_STS_IOERR;
917 bio_endio(bio); 917 bio_endio(bio);
918 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 918 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
919 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 919 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1098 bio->bi_next = NULL; 1098 bio->bi_next = NULL;
1099 bio->bi_bdev = rdev->bdev; 1099 bio->bi_bdev = rdev->bdev;
1100 if (test_bit(Faulty, &rdev->flags)) { 1100 if (test_bit(Faulty, &rdev->flags)) {
1101 bio->bi_error = -EIO; 1101 bio->bi_status = BLK_STS_IOERR;
1102 bio_endio(bio); 1102 bio_endio(bio);
1103 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 1103 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
1104 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 1104 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
1888{ 1888{
1889 struct r10conf *conf = r10_bio->mddev->private; 1889 struct r10conf *conf = r10_bio->mddev->private;
1890 1890
1891 if (!bio->bi_error) 1891 if (!bio->bi_status)
1892 set_bit(R10BIO_Uptodate, &r10_bio->state); 1892 set_bit(R10BIO_Uptodate, &r10_bio->state);
1893 else 1893 else
1894 /* The write handler will notice the lack of 1894 /* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
1972 else 1972 else
1973 rdev = conf->mirrors[d].rdev; 1973 rdev = conf->mirrors[d].rdev;
1974 1974
1975 if (bio->bi_error) { 1975 if (bio->bi_status) {
1976 if (repl) 1976 if (repl)
1977 md_error(mddev, rdev); 1977 md_error(mddev, rdev);
1978 else { 1978 else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2021 2021
2022 /* find the first device with a block */ 2022 /* find the first device with a block */
2023 for (i=0; i<conf->copies; i++) 2023 for (i=0; i<conf->copies; i++)
2024 if (!r10_bio->devs[i].bio->bi_error) 2024 if (!r10_bio->devs[i].bio->bi_status)
2025 break; 2025 break;
2026 2026
2027 if (i == conf->copies) 2027 if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2050 tpages = get_resync_pages(tbio)->pages; 2050 tpages = get_resync_pages(tbio)->pages;
2051 d = r10_bio->devs[i].devnum; 2051 d = r10_bio->devs[i].devnum;
2052 rdev = conf->mirrors[d].rdev; 2052 rdev = conf->mirrors[d].rdev;
2053 if (!r10_bio->devs[i].bio->bi_error) { 2053 if (!r10_bio->devs[i].bio->bi_status) {
2054 /* We know that the bi_io_vec layout is the same for 2054 /* We know that the bi_io_vec layout is the same for
2055 * both 'first' and 'i', so we just compare them. 2055 * both 'first' and 'i', so we just compare them.
2056 * All vec entries are PAGE_SIZE; 2056 * All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2633 rdev = conf->mirrors[dev].rdev; 2633 rdev = conf->mirrors[dev].rdev;
2634 if (r10_bio->devs[m].bio == NULL) 2634 if (r10_bio->devs[m].bio == NULL)
2635 continue; 2635 continue;
2636 if (!r10_bio->devs[m].bio->bi_error) { 2636 if (!r10_bio->devs[m].bio->bi_status) {
2637 rdev_clear_badblocks( 2637 rdev_clear_badblocks(
2638 rdev, 2638 rdev,
2639 r10_bio->devs[m].addr, 2639 r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2649 if (r10_bio->devs[m].repl_bio == NULL) 2649 if (r10_bio->devs[m].repl_bio == NULL)
2650 continue; 2650 continue;
2651 2651
2652 if (!r10_bio->devs[m].repl_bio->bi_error) { 2652 if (!r10_bio->devs[m].repl_bio->bi_status) {
2653 rdev_clear_badblocks( 2653 rdev_clear_badblocks(
2654 rdev, 2654 rdev,
2655 r10_bio->devs[m].addr, 2655 r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2675 r10_bio->devs[m].addr, 2675 r10_bio->devs[m].addr,
2676 r10_bio->sectors, 0); 2676 r10_bio->sectors, 0);
2677 rdev_dec_pending(rdev, conf->mddev); 2677 rdev_dec_pending(rdev, conf->mddev);
2678 } else if (bio != NULL && bio->bi_error) { 2678 } else if (bio != NULL && bio->bi_status) {
2679 fail = true; 2679 fail = true;
2680 if (!narrow_write_error(r10_bio, m)) { 2680 if (!narrow_write_error(r10_bio, m)) {
2681 md_error(conf->mddev, rdev); 2681 md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3267 r10_bio->devs[i].repl_bio->bi_end_io = NULL; 3267 r10_bio->devs[i].repl_bio->bi_end_io = NULL;
3268 3268
3269 bio = r10_bio->devs[i].bio; 3269 bio = r10_bio->devs[i].bio;
3270 bio->bi_error = -EIO; 3270 bio->bi_status = BLK_STS_IOERR;
3271 rcu_read_lock(); 3271 rcu_read_lock();
3272 rdev = rcu_dereference(conf->mirrors[d].rdev); 3272 rdev = rcu_dereference(conf->mirrors[d].rdev);
3273 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { 3273 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3309 3309
3310 /* Need to set up for writing to the replacement */ 3310 /* Need to set up for writing to the replacement */
3311 bio = r10_bio->devs[i].repl_bio; 3311 bio = r10_bio->devs[i].repl_bio;
3312 bio->bi_error = -EIO; 3312 bio->bi_status = BLK_STS_IOERR;
3313 3313
3314 sector = r10_bio->devs[i].addr; 3314 sector = r10_bio->devs[i].addr;
3315 bio->bi_next = biolist; 3315 bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3375 3375
3376 if (bio->bi_end_io == end_sync_read) { 3376 if (bio->bi_end_io == end_sync_read) {
3377 md_sync_acct(bio->bi_bdev, nr_sectors); 3377 md_sync_acct(bio->bi_bdev, nr_sectors);
3378 bio->bi_error = 0; 3378 bio->bi_status = 0;
3379 generic_make_request(bio); 3379 generic_make_request(bio);
3380 } 3380 }
3381 } 3381 }
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
3552 if (!conf->r10bio_pool) 3552 if (!conf->r10bio_pool)
3553 goto out; 3553 goto out;
3554 3554
3555 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); 3555 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
3556 if (!conf->bio_split) 3556 if (!conf->bio_split)
3557 goto out; 3557 goto out;
3558 3558
@@ -4397,7 +4397,7 @@ read_more:
4397 read_bio->bi_end_io = end_reshape_read; 4397 read_bio->bi_end_io = end_reshape_read;
4398 bio_set_op_attrs(read_bio, REQ_OP_READ, 0); 4398 bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
4399 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); 4399 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
4400 read_bio->bi_error = 0; 4400 read_bio->bi_status = 0;
4401 read_bio->bi_vcnt = 0; 4401 read_bio->bi_vcnt = 0;
4402 read_bio->bi_iter.bi_size = 0; 4402 read_bio->bi_iter.bi_size = 0;
4403 r10_bio->master_bio = read_bio; 4403 r10_bio->master_bio = read_bio;
@@ -4641,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
4641 rdev = conf->mirrors[d].rdev; 4641 rdev = conf->mirrors[d].rdev;
4642 } 4642 }
4643 4643
4644 if (bio->bi_error) { 4644 if (bio->bi_status) {
4645 /* FIXME should record badblock */ 4645 /* FIXME should record badblock */
4646 md_error(mddev, rdev); 4646 md_error(mddev, rdev);
4647 } 4647 }
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0a7af8b0a80a..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
572 struct r5l_log *log = io->log; 572 struct r5l_log *log = io->log;
573 unsigned long flags; 573 unsigned long flags;
574 574
575 if (bio->bi_error) 575 if (bio->bi_status)
576 md_error(log->rdev->mddev, log->rdev); 576 md_error(log->rdev->mddev, log->rdev);
577 577
578 bio_put(bio); 578 bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
1247 unsigned long flags; 1247 unsigned long flags;
1248 struct r5l_io_unit *io; 1248 struct r5l_io_unit *io;
1249 1249
1250 if (bio->bi_error) 1250 if (bio->bi_status)
1251 md_error(log->rdev->mddev, log->rdev); 1251 md_error(log->rdev->mddev, log->rdev);
1252 1252
1253 spin_lock_irqsave(&log->io_list_lock, flags); 1253 spin_lock_irqsave(&log->io_list_lock, flags);
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
3063 if (!log->io_pool) 3063 if (!log->io_pool)
3064 goto io_pool; 3064 goto io_pool;
3065 3065
3066 log->bs = bioset_create(R5L_POOL_SIZE, 0); 3066 log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
3067 if (!log->bs) 3067 if (!log->bs)
3068 goto io_bs; 3068 goto io_bs;
3069 3069
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index ccce92e68d7f..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
397 397
398 pr_debug("%s: seq: %llu\n", __func__, io->seq); 398 pr_debug("%s: seq: %llu\n", __func__, io->seq);
399 399
400 if (bio->bi_error) 400 if (bio->bi_status)
401 md_error(ppl_conf->mddev, log->rdev); 401 md_error(ppl_conf->mddev, log->rdev);
402 402
403 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { 403 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
1150 goto err; 1150 goto err;
1151 } 1151 }
1152 1152
1153 ppl_conf->bs = bioset_create(conf->raid_disks, 0); 1153 ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
1154 if (!ppl_conf->bs) { 1154 if (!ppl_conf->bs) {
1155 ret = -ENOMEM; 1155 ret = -ENOMEM;
1156 goto err; 1156 goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ec0f951ae19f..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
2476 2476
2477 pr_debug("end_read_request %llu/%d, count: %d, error %d.\n", 2477 pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
2478 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 2478 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
2479 bi->bi_error); 2479 bi->bi_status);
2480 if (i == disks) { 2480 if (i == disks) {
2481 bio_reset(bi); 2481 bio_reset(bi);
2482 BUG(); 2482 BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
2496 s = sh->sector + rdev->new_data_offset; 2496 s = sh->sector + rdev->new_data_offset;
2497 else 2497 else
2498 s = sh->sector + rdev->data_offset; 2498 s = sh->sector + rdev->data_offset;
2499 if (!bi->bi_error) { 2499 if (!bi->bi_status) {
2500 set_bit(R5_UPTODATE, &sh->dev[i].flags); 2500 set_bit(R5_UPTODATE, &sh->dev[i].flags);
2501 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 2501 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
2502 /* Note that this cannot happen on a 2502 /* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
2613 } 2613 }
2614 pr_debug("end_write_request %llu/%d, count %d, error: %d.\n", 2614 pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
2615 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 2615 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
2616 bi->bi_error); 2616 bi->bi_status);
2617 if (i == disks) { 2617 if (i == disks) {
2618 bio_reset(bi); 2618 bio_reset(bi);
2619 BUG(); 2619 BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
2621 } 2621 }
2622 2622
2623 if (replacement) { 2623 if (replacement) {
2624 if (bi->bi_error) 2624 if (bi->bi_status)
2625 md_error(conf->mddev, rdev); 2625 md_error(conf->mddev, rdev);
2626 else if (is_badblock(rdev, sh->sector, 2626 else if (is_badblock(rdev, sh->sector,
2627 STRIPE_SECTORS, 2627 STRIPE_SECTORS,
2628 &first_bad, &bad_sectors)) 2628 &first_bad, &bad_sectors))
2629 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); 2629 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
2630 } else { 2630 } else {
2631 if (bi->bi_error) { 2631 if (bi->bi_status) {
2632 set_bit(STRIPE_DEGRADED, &sh->state); 2632 set_bit(STRIPE_DEGRADED, &sh->state);
2633 set_bit(WriteErrorSeen, &rdev->flags); 2633 set_bit(WriteErrorSeen, &rdev->flags);
2634 set_bit(R5_WriteError, &sh->dev[i].flags); 2634 set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
2649 } 2649 }
2650 rdev_dec_pending(rdev, conf->mddev); 2650 rdev_dec_pending(rdev, conf->mddev);
2651 2651
2652 if (sh->batch_head && bi->bi_error && !replacement) 2652 if (sh->batch_head && bi->bi_status && !replacement)
2653 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); 2653 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
2654 2654
2655 bio_reset(bi); 2655 bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3381 sh->dev[i].sector + STRIPE_SECTORS) { 3381 sh->dev[i].sector + STRIPE_SECTORS) {
3382 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 3382 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
3383 3383
3384 bi->bi_error = -EIO; 3384 bi->bi_status = BLK_STS_IOERR;
3385 md_write_end(conf->mddev); 3385 md_write_end(conf->mddev);
3386 bio_endio(bi); 3386 bio_endio(bi);
3387 bi = nextbi; 3387 bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3403 sh->dev[i].sector + STRIPE_SECTORS) { 3403 sh->dev[i].sector + STRIPE_SECTORS) {
3404 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 3404 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
3405 3405
3406 bi->bi_error = -EIO; 3406 bi->bi_status = BLK_STS_IOERR;
3407 md_write_end(conf->mddev); 3407 md_write_end(conf->mddev);
3408 bio_endio(bi); 3408 bio_endio(bi);
3409 bi = bi2; 3409 bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3429 struct bio *nextbi = 3429 struct bio *nextbi =
3430 r5_next_bio(bi, sh->dev[i].sector); 3430 r5_next_bio(bi, sh->dev[i].sector);
3431 3431
3432 bi->bi_error = -EIO; 3432 bi->bi_status = BLK_STS_IOERR;
3433 bio_endio(bi); 3433 bio_endio(bi);
3434 bi = nextbi; 3434 bi = nextbi;
3435 } 3435 }
@@ -5154,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
5154 struct mddev *mddev; 5154 struct mddev *mddev;
5155 struct r5conf *conf; 5155 struct r5conf *conf;
5156 struct md_rdev *rdev; 5156 struct md_rdev *rdev;
5157 int error = bi->bi_error; 5157 blk_status_t error = bi->bi_status;
5158 5158
5159 bio_put(bi); 5159 bio_put(bi);
5160 5160
@@ -5731,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
5731 release_stripe_plug(mddev, sh); 5731 release_stripe_plug(mddev, sh);
5732 } else { 5732 } else {
5733 /* cannot get stripe for read-ahead, just give-up */ 5733 /* cannot get stripe for read-ahead, just give-up */
5734 bi->bi_error = -EIO; 5734 bi->bi_status = BLK_STS_IOERR;
5735 break; 5735 break;
5736 } 5736 }
5737 } 5737 }
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6943 goto abort; 6943 goto abort;
6944 } 6944 }
6945 6945
6946 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); 6946 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
6947 if (!conf->bio_split) 6947 if (!conf->bio_split)
6948 goto abort; 6948 goto abort;
6949 conf->mddev = mddev; 6949 conf->mddev = mddev;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c27fb7..22de7f5ed032 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
1921 spin_lock_irqsave(&msb->q_lock, flags); 1921 spin_lock_irqsave(&msb->q_lock, flags);
1922 1922
1923 if (len) 1923 if (len)
1924 if (!__blk_end_request(msb->req, 0, len)) 1924 if (!__blk_end_request(msb->req, BLK_STS_OK, len))
1925 msb->req = NULL; 1925 msb->req = NULL;
1926 1926
1927 if (error && msb->req) { 1927 if (error && msb->req) {
1928 blk_status_t ret = errno_to_blk_status(error);
1928 dbg_verbose("IO: ending one sector of the request with error"); 1929 dbg_verbose("IO: ending one sector of the request with error");
1929 if (!__blk_end_request(msb->req, error, msb->page_size)) 1930 if (!__blk_end_request(msb->req, ret, msb->page_size))
1930 msb->req = NULL; 1931 msb->req = NULL;
1931 } 1932 }
1932 1933
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
2014 WARN_ON(!msb->io_queue_stopped); 2015 WARN_ON(!msb->io_queue_stopped);
2015 2016
2016 while ((req = blk_fetch_request(q)) != NULL) 2017 while ((req = blk_fetch_request(q)) != NULL)
2017 __blk_end_request_all(req, -ENODEV); 2018 __blk_end_request_all(req, BLK_STS_IOERR);
2018 return; 2019 return;
2019 } 2020 }
2020 2021
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a266878..8897962781bb 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
709 msb->req_sg); 709 msb->req_sg);
710 710
711 if (!msb->seg_count) { 711 if (!msb->seg_count) {
712 chunk = __blk_end_request_cur(msb->block_req, -ENOMEM); 712 chunk = __blk_end_request_cur(msb->block_req,
713 BLK_STS_RESOURCE);
713 continue; 714 continue;
714 } 715 }
715 716
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
776 if (error && !t_len) 777 if (error && !t_len)
777 t_len = blk_rq_cur_bytes(msb->block_req); 778 t_len = blk_rq_cur_bytes(msb->block_req);
778 779
779 chunk = __blk_end_request(msb->block_req, error, t_len); 780 chunk = __blk_end_request(msb->block_req,
781 errno_to_blk_status(error), t_len);
780 782
781 error = mspro_block_issue_req(card, chunk); 783 error = mspro_block_issue_req(card, chunk);
782 784
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
838 840
839 if (msb->eject) { 841 if (msb->eject) {
840 while ((req = blk_fetch_request(q)) != NULL) 842 while ((req = blk_fetch_request(q)) != NULL)
841 __blk_end_request_all(req, -ENODEV); 843 __blk_end_request_all(req, BLK_STS_IOERR);
842 844
843 return; 845 return;
844 } 846 }
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..6ff94a948a4b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
1184 struct mmc_card *card = md->queue.card; 1184 struct mmc_card *card = md->queue.card;
1185 unsigned int from, nr, arg; 1185 unsigned int from, nr, arg;
1186 int err = 0, type = MMC_BLK_DISCARD; 1186 int err = 0, type = MMC_BLK_DISCARD;
1187 blk_status_t status = BLK_STS_OK;
1187 1188
1188 if (!mmc_can_erase(card)) { 1189 if (!mmc_can_erase(card)) {
1189 err = -EOPNOTSUPP; 1190 status = BLK_STS_NOTSUPP;
1190 goto fail; 1191 goto fail;
1191 } 1192 }
1192 1193
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
1212 if (!err) 1213 if (!err)
1213 err = mmc_erase(card, from, nr, arg); 1214 err = mmc_erase(card, from, nr, arg);
1214 } while (err == -EIO && !mmc_blk_reset(md, card->host, type)); 1215 } while (err == -EIO && !mmc_blk_reset(md, card->host, type));
1215 if (!err) 1216 if (err)
1217 status = BLK_STS_IOERR;
1218 else
1216 mmc_blk_reset_success(md, type); 1219 mmc_blk_reset_success(md, type);
1217fail: 1220fail:
1218 blk_end_request(req, err, blk_rq_bytes(req)); 1221 blk_end_request(req, status, blk_rq_bytes(req));
1219} 1222}
1220 1223
1221static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, 1224static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
1225 struct mmc_card *card = md->queue.card; 1228 struct mmc_card *card = md->queue.card;
1226 unsigned int from, nr, arg; 1229 unsigned int from, nr, arg;
1227 int err = 0, type = MMC_BLK_SECDISCARD; 1230 int err = 0, type = MMC_BLK_SECDISCARD;
1231 blk_status_t status = BLK_STS_OK;
1228 1232
1229 if (!(mmc_can_secure_erase_trim(card))) { 1233 if (!(mmc_can_secure_erase_trim(card))) {
1230 err = -EOPNOTSUPP; 1234 status = BLK_STS_NOTSUPP;
1231 goto out; 1235 goto out;
1232 } 1236 }
1233 1237
@@ -1254,8 +1258,10 @@ retry:
1254 err = mmc_erase(card, from, nr, arg); 1258 err = mmc_erase(card, from, nr, arg);
1255 if (err == -EIO) 1259 if (err == -EIO)
1256 goto out_retry; 1260 goto out_retry;
1257 if (err) 1261 if (err) {
1262 status = BLK_STS_IOERR;
1258 goto out; 1263 goto out;
1264 }
1259 1265
1260 if (arg == MMC_SECURE_TRIM1_ARG) { 1266 if (arg == MMC_SECURE_TRIM1_ARG) {
1261 if (card->quirks & MMC_QUIRK_INAND_CMD38) { 1267 if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
1270 err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG); 1276 err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
1271 if (err == -EIO) 1277 if (err == -EIO)
1272 goto out_retry; 1278 goto out_retry;
1273 if (err) 1279 if (err) {
1280 status = BLK_STS_IOERR;
1274 goto out; 1281 goto out;
1282 }
1275 } 1283 }
1276 1284
1277out_retry: 1285out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
1280 if (!err) 1288 if (!err)
1281 mmc_blk_reset_success(md, type); 1289 mmc_blk_reset_success(md, type);
1282out: 1290out:
1283 blk_end_request(req, err, blk_rq_bytes(req)); 1291 blk_end_request(req, status, blk_rq_bytes(req));
1284} 1292}
1285 1293
1286static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) 1294static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
1290 int ret = 0; 1298 int ret = 0;
1291 1299
1292 ret = mmc_flush_cache(card); 1300 ret = mmc_flush_cache(card);
1293 if (ret) 1301 blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
1294 ret = -EIO;
1295
1296 blk_end_request_all(req, ret);
1297} 1302}
1298 1303
1299/* 1304/*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
1641{ 1646{
1642 if (mmc_card_removed(card)) 1647 if (mmc_card_removed(card))
1643 req->rq_flags |= RQF_QUIET; 1648 req->rq_flags |= RQF_QUIET;
1644 while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req))); 1649 while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
1645 mmc_queue_req_free(mq, mqrq); 1650 mmc_queue_req_free(mq, mqrq);
1646} 1651}
1647 1652
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
1661 */ 1666 */
1662 if (mmc_card_removed(mq->card)) { 1667 if (mmc_card_removed(mq->card)) {
1663 req->rq_flags |= RQF_QUIET; 1668 req->rq_flags |= RQF_QUIET;
1664 blk_end_request_all(req, -EIO); 1669 blk_end_request_all(req, BLK_STS_IOERR);
1665 mmc_queue_req_free(mq, mqrq); 1670 mmc_queue_req_free(mq, mqrq);
1666 return; 1671 return;
1667 } 1672 }
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
1743 */ 1748 */
1744 mmc_blk_reset_success(md, type); 1749 mmc_blk_reset_success(md, type);
1745 1750
1746 req_pending = blk_end_request(old_req, 0, 1751 req_pending = blk_end_request(old_req, BLK_STS_OK,
1747 brq->data.bytes_xfered); 1752 brq->data.bytes_xfered);
1748 /* 1753 /*
1749 * If the blk_end_request function returns non-zero even 1754 * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
1811 * time, so we only reach here after trying to 1816 * time, so we only reach here after trying to
1812 * read a single sector. 1817 * read a single sector.
1813 */ 1818 */
1814 req_pending = blk_end_request(old_req, -EIO, 1819 req_pending = blk_end_request(old_req, BLK_STS_IOERR,
1815 brq->data.blksz); 1820 brq->data.blksz);
1816 if (!req_pending) { 1821 if (!req_pending) {
1817 mmc_queue_req_free(mq, mq_rq); 1822 mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
1860 ret = mmc_blk_part_switch(card, md); 1865 ret = mmc_blk_part_switch(card, md);
1861 if (ret) { 1866 if (ret) {
1862 if (req) { 1867 if (req) {
1863 blk_end_request_all(req, -EIO); 1868 blk_end_request_all(req, BLK_STS_IOERR);
1864 } 1869 }
1865 goto out; 1870 goto out;
1866 } 1871 }
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..b659a28c8018 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
133 if (!mq) { 133 if (!mq) {
134 while ((req = blk_fetch_request(q)) != NULL) { 134 while ((req = blk_fetch_request(q)) != NULL) {
135 req->rq_flags |= RQF_QUIET; 135 req->rq_flags |= RQF_QUIET;
136 __blk_end_request_all(req, -EIO); 136 __blk_end_request_all(req, BLK_STS_IOERR);
137 } 137 }
138 return; 138 return;
139 } 139 }
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
388 mmc_queue_setup_discard(mq->queue, card); 388 mmc_queue_setup_discard(mq->queue, card);
389 389
390 if (card->bouncesz) { 390 if (card->bouncesz) {
391 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
392 blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512); 391 blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
393 blk_queue_max_segments(mq->queue, card->bouncesz / 512); 392 blk_queue_max_segments(mq->queue, card->bouncesz / 512);
394 blk_queue_max_segment_size(mq->queue, card->bouncesz); 393 blk_queue_max_segment_size(mq->queue, card->bouncesz);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd7dbf6..f336a9b85576 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
73} 73}
74 74
75 75
76static int do_blktrans_request(struct mtd_blktrans_ops *tr, 76static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
77 struct mtd_blktrans_dev *dev, 77 struct mtd_blktrans_dev *dev,
78 struct request *req) 78 struct request *req)
79{ 79{
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
84 nsect = blk_rq_cur_bytes(req) >> tr->blkshift; 84 nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
85 buf = bio_data(req->bio); 85 buf = bio_data(req->bio);
86 86
87 if (req_op(req) == REQ_OP_FLUSH) 87 if (req_op(req) == REQ_OP_FLUSH) {
88 return tr->flush(dev); 88 if (tr->flush(dev))
89 return BLK_STS_IOERR;
90 return BLK_STS_OK;
91 }
89 92
90 if (blk_rq_pos(req) + blk_rq_cur_sectors(req) > 93 if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
91 get_capacity(req->rq_disk)) 94 get_capacity(req->rq_disk))
92 return -EIO; 95 return BLK_STS_IOERR;
93 96
94 switch (req_op(req)) { 97 switch (req_op(req)) {
95 case REQ_OP_DISCARD: 98 case REQ_OP_DISCARD:
96 return tr->discard(dev, block, nsect); 99 if (tr->discard(dev, block, nsect))
100 return BLK_STS_IOERR;
101 return BLK_STS_OK;
97 case REQ_OP_READ: 102 case REQ_OP_READ:
98 for (; nsect > 0; nsect--, block++, buf += tr->blksize) 103 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
99 if (tr->readsect(dev, block, buf)) 104 if (tr->readsect(dev, block, buf))
100 return -EIO; 105 return BLK_STS_IOERR;
101 rq_flush_dcache_pages(req); 106 rq_flush_dcache_pages(req);
102 return 0; 107 return BLK_STS_OK;
103 case REQ_OP_WRITE: 108 case REQ_OP_WRITE:
104 if (!tr->writesect) 109 if (!tr->writesect)
105 return -EIO; 110 return BLK_STS_IOERR;
106 111
107 rq_flush_dcache_pages(req); 112 rq_flush_dcache_pages(req);
108 for (; nsect > 0; nsect--, block++, buf += tr->blksize) 113 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
109 if (tr->writesect(dev, block, buf)) 114 if (tr->writesect(dev, block, buf))
110 return -EIO; 115 return BLK_STS_IOERR;
111 return 0;
112 default: 116 default:
113 return -EIO; 117 return BLK_STS_IOERR;
114 } 118 }
115} 119}
116 120
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
132 spin_lock_irq(rq->queue_lock); 136 spin_lock_irq(rq->queue_lock);
133 137
134 while (1) { 138 while (1) {
135 int res; 139 blk_status_t res;
136 140
137 dev->bg_stop = false; 141 dev->bg_stop = false;
138 if (!req && !(req = blk_fetch_request(rq))) { 142 if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
178 182
179 if (!dev) 183 if (!dev)
180 while ((req = blk_fetch_request(rq)) != NULL) 184 while ((req = blk_fetch_request(rq)) != NULL)
181 __blk_end_request_all(req, -ENODEV); 185 __blk_end_request_all(req, BLK_STS_IOERR);
182 else 186 else
183 queue_work(dev->wq, &dev->work); 187 queue_work(dev->wq, &dev->work);
184} 188}
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
413 new->rq->queuedata = new; 417 new->rq->queuedata = new;
414 blk_queue_logical_block_size(new->rq, tr->blksize); 418 blk_queue_logical_block_size(new->rq, tr->blksize);
415 419
420 blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
416 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq); 421 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
417 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq); 422 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
418 423
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65439df..c3963f880448 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
313 ret = ubiblock_read(pdu); 313 ret = ubiblock_read(pdu);
314 rq_flush_dcache_pages(req); 314 rq_flush_dcache_pages(req);
315 315
316 blk_mq_end_request(req, ret); 316 blk_mq_end_request(req, errno_to_blk_status(ret));
317} 317}
318 318
319static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, 319static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
320 const struct blk_mq_queue_data *bd) 320 const struct blk_mq_queue_data *bd)
321{ 321{
322 struct request *req = bd->rq; 322 struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
327 case REQ_OP_READ: 327 case REQ_OP_READ:
328 ubi_sgl_init(&pdu->usgl); 328 ubi_sgl_init(&pdu->usgl);
329 queue_work(dev->wq, &pdu->work); 329 queue_work(dev->wq, &pdu->work);
330 return BLK_MQ_RQ_QUEUE_OK; 330 return BLK_STS_OK;
331 default: 331 default:
332 return BLK_MQ_RQ_QUEUE_ERROR; 332 return BLK_STS_IOERR;
333 } 333 }
334 334
335} 335}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a75e96..f12d23c49771 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
186 * another kernel subsystem, and we just pass it through. 186 * another kernel subsystem, and we just pass it through.
187 */ 187 */
188 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 188 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
189 bio->bi_error = -EIO; 189 bio->bi_status = BLK_STS_IOERR;
190 goto out; 190 goto out;
191 } 191 }
192 192
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
205 "io error in %s sector %lld, len %d,\n", 205 "io error in %s sector %lld, len %d,\n",
206 (rw == READ) ? "READ" : "WRITE", 206 (rw == READ) ? "READ" : "WRITE",
207 (unsigned long long) iter.bi_sector, len); 207 (unsigned long long) iter.bi_sector, len);
208 bio->bi_error = err; 208 bio->bi_status = errno_to_blk_status(err);
209 break; 209 break;
210 } 210 }
211 } 211 }
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
273 273
274 blk_queue_make_request(q, nd_blk_make_request); 274 blk_queue_make_request(q, nd_blk_make_request);
275 blk_queue_max_hw_sectors(q, UINT_MAX); 275 blk_queue_max_hw_sectors(q, UINT_MAX);
276 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
277 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); 276 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
278 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 277 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
279 q->queuedata = nsblk; 278 q->queuedata = nsblk;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b8fd9b..b6ba0618ea46 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
1210 * another kernel subsystem, and we just pass it through. 1210 * another kernel subsystem, and we just pass it through.
1211 */ 1211 */
1212 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1212 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1213 bio->bi_error = -EIO; 1213 bio->bi_status = BLK_STS_IOERR;
1214 goto out; 1214 goto out;
1215 } 1215 }
1216 1216
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
1232 (op_is_write(bio_op(bio))) ? "WRITE" : 1232 (op_is_write(bio_op(bio))) ? "WRITE" :
1233 "READ", 1233 "READ",
1234 (unsigned long long) iter.bi_sector, len); 1234 (unsigned long long) iter.bi_sector, len);
1235 bio->bi_error = err; 1235 bio->bi_status = errno_to_blk_status(err);
1236 break; 1236 break;
1237 } 1237 }
1238 } 1238 }
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
1297 blk_queue_make_request(btt->btt_queue, btt_make_request); 1297 blk_queue_make_request(btt->btt_queue, btt_make_request);
1298 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 1298 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
1299 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 1299 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
1300 blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
1301 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); 1300 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
1302 btt->btt_queue->queuedata = btt; 1301 btt->btt_queue->queuedata = btt;
1303 1302
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..6b577afb1d44 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
49 return to_nd_region(to_dev(pmem)->parent); 49 return to_nd_region(to_dev(pmem)->parent);
50} 50}
51 51
52static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 52static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
53 unsigned int len) 53 phys_addr_t offset, unsigned int len)
54{ 54{
55 struct device *dev = to_dev(pmem); 55 struct device *dev = to_dev(pmem);
56 sector_t sector; 56 sector_t sector;
57 long cleared; 57 long cleared;
58 int rc = 0; 58 blk_status_t rc = BLK_STS_OK;
59 59
60 sector = (offset - pmem->data_offset) / 512; 60 sector = (offset - pmem->data_offset) / 512;
61 61
62 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); 62 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
63 if (cleared < len) 63 if (cleared < len)
64 rc = -EIO; 64 rc = BLK_STS_IOERR;
65 if (cleared > 0 && cleared / 512) { 65 if (cleared > 0 && cleared / 512) {
66 cleared /= 512; 66 cleared /= 512;
67 dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, 67 dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
84 kunmap_atomic(mem); 84 kunmap_atomic(mem);
85} 85}
86 86
87static int read_pmem(struct page *page, unsigned int off, 87static blk_status_t read_pmem(struct page *page, unsigned int off,
88 void *pmem_addr, unsigned int len) 88 void *pmem_addr, unsigned int len)
89{ 89{
90 int rc; 90 int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
93 rc = memcpy_mcsafe(mem + off, pmem_addr, len); 93 rc = memcpy_mcsafe(mem + off, pmem_addr, len);
94 kunmap_atomic(mem); 94 kunmap_atomic(mem);
95 if (rc) 95 if (rc)
96 return -EIO; 96 return BLK_STS_IOERR;
97 return 0; 97 return BLK_STS_OK;
98} 98}
99 99
100static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, 100static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
101 unsigned int len, unsigned int off, bool is_write, 101 unsigned int len, unsigned int off, bool is_write,
102 sector_t sector) 102 sector_t sector)
103{ 103{
104 int rc = 0; 104 blk_status_t rc = BLK_STS_OK;
105 bool bad_pmem = false; 105 bool bad_pmem = false;
106 phys_addr_t pmem_off = sector * 512 + pmem->data_offset; 106 phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
107 void *pmem_addr = pmem->virt_addr + pmem_off; 107 void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
111 111
112 if (!is_write) { 112 if (!is_write) {
113 if (unlikely(bad_pmem)) 113 if (unlikely(bad_pmem))
114 rc = -EIO; 114 rc = BLK_STS_IOERR;
115 else { 115 else {
116 rc = read_pmem(page, off, pmem_addr, len); 116 rc = read_pmem(page, off, pmem_addr, len);
117 flush_dcache_page(page); 117 flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
149 149
150static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) 150static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
151{ 151{
152 int rc = 0; 152 blk_status_t rc = 0;
153 bool do_acct; 153 bool do_acct;
154 unsigned long start; 154 unsigned long start;
155 struct bio_vec bvec; 155 struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
166 bvec.bv_offset, op_is_write(bio_op(bio)), 166 bvec.bv_offset, op_is_write(bio_op(bio)),
167 iter.bi_sector); 167 iter.bi_sector);
168 if (rc) { 168 if (rc) {
169 bio->bi_error = rc; 169 bio->bi_status = rc;
170 break; 170 break;
171 } 171 }
172 } 172 }
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
184 struct page *page, bool is_write) 184 struct page *page, bool is_write)
185{ 185{
186 struct pmem_device *pmem = bdev->bd_queue->queuedata; 186 struct pmem_device *pmem = bdev->bd_queue->queuedata;
187 int rc; 187 blk_status_t rc;
188 188
189 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); 189 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
190 190
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
197 if (rc == 0) 197 if (rc == 0)
198 page_endio(page, is_write, 0); 198 page_endio(page, is_write, 0);
199 199
200 return rc; 200 return blk_status_to_errno(rc);
201} 201}
202 202
203/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ 203/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
343 blk_queue_make_request(q, pmem_make_request); 343 blk_queue_make_request(q, pmem_make_request);
344 blk_queue_physical_block_size(q, PAGE_SIZE); 344 blk_queue_physical_block_size(q, PAGE_SIZE);
345 blk_queue_max_hw_sectors(q, UINT_MAX); 345 blk_queue_max_hw_sectors(q, UINT_MAX);
346 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
347 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 346 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
348 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); 347 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
349 q->queuedata = pmem; 348 q->queuedata = pmem;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 90745a616df7..46d6cb1e03bd 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -13,18 +13,6 @@ config BLK_DEV_NVME
13 To compile this driver as a module, choose M here: the 13 To compile this driver as a module, choose M here: the
14 module will be called nvme. 14 module will be called nvme.
15 15
16config BLK_DEV_NVME_SCSI
17 bool "SCSI emulation for NVMe device nodes"
18 depends on NVME_CORE
19 ---help---
20 This adds support for the SG_IO ioctl on the NVMe character
21 and block devices nodes, as well as a translation for a small
22 number of selected SCSI commands to NVMe commands to the NVMe
23 driver. If you don't know what this means you probably want
24 to say N here, unless you run a distro that abuses the SCSI
25 emulation to provide stable device names for mount by id, like
26 some OpenSuSE and SLES versions.
27
28config NVME_FABRICS 16config NVME_FABRICS
29 tristate 17 tristate
30 18
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index f1a7d945fbb6..cc0aacb4c8b4 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -5,7 +5,6 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
5obj-$(CONFIG_NVME_FC) += nvme-fc.o 5obj-$(CONFIG_NVME_FC) += nvme-fc.o
6 6
7nvme-core-y := core.o 7nvme-core-y := core.o
8nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
9nvme-core-$(CONFIG_NVM) += lightnvm.o 8nvme-core-$(CONFIG_NVM) += lightnvm.o
10 9
11nvme-y += pci.o 10nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 903d5813023a..d70df1d0072d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -27,7 +27,6 @@
27#include <linux/nvme_ioctl.h> 27#include <linux/nvme_ioctl.h>
28#include <linux/t10-pi.h> 28#include <linux/t10-pi.h>
29#include <linux/pm_qos.h> 29#include <linux/pm_qos.h>
30#include <scsi/sg.h>
31#include <asm/unaligned.h> 30#include <asm/unaligned.h>
32 31
33#include "nvme.h" 32#include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
45MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); 44MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
46EXPORT_SYMBOL_GPL(nvme_io_timeout); 45EXPORT_SYMBOL_GPL(nvme_io_timeout);
47 46
48unsigned char shutdown_timeout = 5; 47static unsigned char shutdown_timeout = 5;
49module_param(shutdown_timeout, byte, 0644); 48module_param(shutdown_timeout, byte, 0644);
50MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); 49MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
51 50
@@ -65,34 +64,53 @@ static bool force_apst;
65module_param(force_apst, bool, 0644); 64module_param(force_apst, bool, 0644);
66MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); 65MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
67 66
67static bool streams;
68module_param(streams, bool, 0644);
69MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
70
71struct workqueue_struct *nvme_wq;
72EXPORT_SYMBOL_GPL(nvme_wq);
73
68static LIST_HEAD(nvme_ctrl_list); 74static LIST_HEAD(nvme_ctrl_list);
69static DEFINE_SPINLOCK(dev_list_lock); 75static DEFINE_SPINLOCK(dev_list_lock);
70 76
71static struct class *nvme_class; 77static struct class *nvme_class;
72 78
73static int nvme_error_status(struct request *req) 79int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
80{
81 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
82 return -EBUSY;
83 if (!queue_work(nvme_wq, &ctrl->reset_work))
84 return -EBUSY;
85 return 0;
86}
87EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
88
89static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
90{
91 int ret;
92
93 ret = nvme_reset_ctrl(ctrl);
94 if (!ret)
95 flush_work(&ctrl->reset_work);
96 return ret;
97}
98
99static blk_status_t nvme_error_status(struct request *req)
74{ 100{
75 switch (nvme_req(req)->status & 0x7ff) { 101 switch (nvme_req(req)->status & 0x7ff) {
76 case NVME_SC_SUCCESS: 102 case NVME_SC_SUCCESS:
77 return 0; 103 return BLK_STS_OK;
78 case NVME_SC_CAP_EXCEEDED: 104 case NVME_SC_CAP_EXCEEDED:
79 return -ENOSPC; 105 return BLK_STS_NOSPC;
80 default:
81 return -EIO;
82
83 /*
84 * XXX: these errors are a nasty side-band protocol to
85 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
86 * anywhere..
87 */
88 case NVME_SC_CMD_SEQ_ERROR:
89 return -EILSEQ;
90 case NVME_SC_ONCS_NOT_SUPPORTED: 106 case NVME_SC_ONCS_NOT_SUPPORTED:
91 return -EOPNOTSUPP; 107 return BLK_STS_NOTSUPP;
92 case NVME_SC_WRITE_FAULT: 108 case NVME_SC_WRITE_FAULT:
93 case NVME_SC_READ_ERROR: 109 case NVME_SC_READ_ERROR:
94 case NVME_SC_UNWRITTEN_BLOCK: 110 case NVME_SC_UNWRITTEN_BLOCK:
95 return -ENODATA; 111 return BLK_STS_MEDIUM;
112 default:
113 return BLK_STS_IOERR;
96 } 114 }
97} 115}
98 116
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
165 switch (old_state) { 183 switch (old_state) {
166 case NVME_CTRL_NEW: 184 case NVME_CTRL_NEW:
167 case NVME_CTRL_LIVE: 185 case NVME_CTRL_LIVE:
168 case NVME_CTRL_RECONNECTING:
169 changed = true; 186 changed = true;
170 /* FALLTHRU */ 187 /* FALLTHRU */
171 default: 188 default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
283} 300}
284EXPORT_SYMBOL_GPL(nvme_alloc_request); 301EXPORT_SYMBOL_GPL(nvme_alloc_request);
285 302
303static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
304{
305 struct nvme_command c;
306
307 memset(&c, 0, sizeof(c));
308
309 c.directive.opcode = nvme_admin_directive_send;
310 c.directive.nsid = cpu_to_le32(0xffffffff);
311 c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
312 c.directive.dtype = NVME_DIR_IDENTIFY;
313 c.directive.tdtype = NVME_DIR_STREAMS;
314 c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
315
316 return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
317}
318
319static int nvme_disable_streams(struct nvme_ctrl *ctrl)
320{
321 return nvme_toggle_streams(ctrl, false);
322}
323
324static int nvme_enable_streams(struct nvme_ctrl *ctrl)
325{
326 return nvme_toggle_streams(ctrl, true);
327}
328
329static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
330 struct streams_directive_params *s, u32 nsid)
331{
332 struct nvme_command c;
333
334 memset(&c, 0, sizeof(c));
335 memset(s, 0, sizeof(*s));
336
337 c.directive.opcode = nvme_admin_directive_recv;
338 c.directive.nsid = cpu_to_le32(nsid);
339 c.directive.numd = sizeof(*s);
340 c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
341 c.directive.dtype = NVME_DIR_STREAMS;
342
343 return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
344}
345
346static int nvme_configure_directives(struct nvme_ctrl *ctrl)
347{
348 struct streams_directive_params s;
349 int ret;
350
351 if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
352 return 0;
353 if (!streams)
354 return 0;
355
356 ret = nvme_enable_streams(ctrl);
357 if (ret)
358 return ret;
359
360 ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
361 if (ret)
362 return ret;
363
364 ctrl->nssa = le16_to_cpu(s.nssa);
365 if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
366 dev_info(ctrl->device, "too few streams (%u) available\n",
367 ctrl->nssa);
368 nvme_disable_streams(ctrl);
369 return 0;
370 }
371
372 ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
373 dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
374 return 0;
375}
376
377/*
378 * Check if 'req' has a write hint associated with it. If it does, assign
379 * a valid namespace stream to the write.
380 */
381static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
382 struct request *req, u16 *control,
383 u32 *dsmgmt)
384{
385 enum rw_hint streamid = req->write_hint;
386
387 if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
388 streamid = 0;
389 else {
390 streamid--;
391 if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
392 return;
393
394 *control |= NVME_RW_DTYPE_STREAMS;
395 *dsmgmt |= streamid << 16;
396 }
397
398 if (streamid < ARRAY_SIZE(req->q->write_hints))
399 req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
400}
401
286static inline void nvme_setup_flush(struct nvme_ns *ns, 402static inline void nvme_setup_flush(struct nvme_ns *ns,
287 struct nvme_command *cmnd) 403 struct nvme_command *cmnd)
288{ 404{
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
291 cmnd->common.nsid = cpu_to_le32(ns->ns_id); 407 cmnd->common.nsid = cpu_to_le32(ns->ns_id);
292} 408}
293 409
294static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, 410static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
295 struct nvme_command *cmnd) 411 struct nvme_command *cmnd)
296{ 412{
297 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0; 413 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
300 416
301 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 417 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
302 if (!range) 418 if (!range)
303 return BLK_MQ_RQ_QUEUE_BUSY; 419 return BLK_STS_RESOURCE;
304 420
305 __rq_for_each_bio(bio, req) { 421 __rq_for_each_bio(bio, req) {
306 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); 422 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
314 430
315 if (WARN_ON_ONCE(n != segments)) { 431 if (WARN_ON_ONCE(n != segments)) {
316 kfree(range); 432 kfree(range);
317 return BLK_MQ_RQ_QUEUE_ERROR; 433 return BLK_STS_IOERR;
318 } 434 }
319 435
320 memset(cmnd, 0, sizeof(*cmnd)); 436 memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
328 req->special_vec.bv_len = sizeof(*range) * segments; 444 req->special_vec.bv_len = sizeof(*range) * segments;
329 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 445 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
330 446
331 return BLK_MQ_RQ_QUEUE_OK; 447 return BLK_STS_OK;
332} 448}
333 449
334static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, 450static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
335 struct nvme_command *cmnd) 451 struct request *req, struct nvme_command *cmnd)
336{ 452{
453 struct nvme_ctrl *ctrl = ns->ctrl;
337 u16 control = 0; 454 u16 control = 0;
338 u32 dsmgmt = 0; 455 u32 dsmgmt = 0;
339 456
457 /*
458 * If formated with metadata, require the block layer provide a buffer
459 * unless this namespace is formated such that the metadata can be
460 * stripped/generated by the controller with PRACT=1.
461 */
462 if (ns && ns->ms &&
463 (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
464 !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
465 return BLK_STS_NOTSUPP;
466
340 if (req->cmd_flags & REQ_FUA) 467 if (req->cmd_flags & REQ_FUA)
341 control |= NVME_RW_FUA; 468 control |= NVME_RW_FUA;
342 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) 469 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
351 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); 478 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
352 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); 479 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
353 480
481 if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
482 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
483
354 if (ns->ms) { 484 if (ns->ms) {
355 switch (ns->pi_type) { 485 switch (ns->pi_type) {
356 case NVME_NS_DPS_PI_TYPE3: 486 case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
370 500
371 cmnd->rw.control = cpu_to_le16(control); 501 cmnd->rw.control = cpu_to_le16(control);
372 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 502 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
503 return 0;
373} 504}
374 505
375int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, 506blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
376 struct nvme_command *cmd) 507 struct nvme_command *cmd)
377{ 508{
378 int ret = BLK_MQ_RQ_QUEUE_OK; 509 blk_status_t ret = BLK_STS_OK;
379 510
380 if (!(req->rq_flags & RQF_DONTPREP)) { 511 if (!(req->rq_flags & RQF_DONTPREP)) {
381 nvme_req(req)->retries = 0; 512 nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
398 break; 529 break;
399 case REQ_OP_READ: 530 case REQ_OP_READ:
400 case REQ_OP_WRITE: 531 case REQ_OP_WRITE:
401 nvme_setup_rw(ns, req, cmd); 532 ret = nvme_setup_rw(ns, req, cmd);
402 break; 533 break;
403 default: 534 default:
404 WARN_ON_ONCE(1); 535 WARN_ON_ONCE(1);
405 return BLK_MQ_RQ_QUEUE_ERROR; 536 return BLK_STS_IOERR;
406 } 537 }
407 538
408 cmd->common.command_id = req->tag; 539 cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
555 result, timeout); 686 result, timeout);
556} 687}
557 688
558static void nvme_keep_alive_end_io(struct request *rq, int error) 689static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
559{ 690{
560 struct nvme_ctrl *ctrl = rq->end_io_data; 691 struct nvme_ctrl *ctrl = rq->end_io_data;
561 692
562 blk_mq_free_request(rq); 693 blk_mq_free_request(rq);
563 694
564 if (error) { 695 if (status) {
565 dev_err(ctrl->device, 696 dev_err(ctrl->device,
566 "failed nvme_keep_alive_end_io error=%d\n", error); 697 "failed nvme_keep_alive_end_io error=%d\n",
698 status);
567 return; 699 return;
568 } 700 }
569 701
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
599 if (nvme_keep_alive(ctrl)) { 731 if (nvme_keep_alive(ctrl)) {
600 /* allocation failure, reset the controller */ 732 /* allocation failure, reset the controller */
601 dev_err(ctrl->device, "keep-alive failed\n"); 733 dev_err(ctrl->device, "keep-alive failed\n");
602 ctrl->ops->reset_ctrl(ctrl); 734 nvme_reset_ctrl(ctrl);
603 return; 735 return;
604 } 736 }
605} 737}
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
623} 755}
624EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); 756EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
625 757
626int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) 758static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
627{ 759{
628 struct nvme_command c = { }; 760 struct nvme_command c = { };
629 int error; 761 int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
643 return error; 775 return error;
644} 776}
645 777
778static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
779{
780 struct nvme_command c = { };
781 int status;
782 void *data;
783 int pos;
784 int len;
785
786 c.identify.opcode = nvme_admin_identify;
787 c.identify.nsid = cpu_to_le32(nsid);
788 c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
789
790 data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
791 if (!data)
792 return -ENOMEM;
793
794 status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
795 NVME_IDENTIFY_DATA_SIZE);
796 if (status)
797 goto free_data;
798
799 for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
800 struct nvme_ns_id_desc *cur = data + pos;
801
802 if (cur->nidl == 0)
803 break;
804
805 switch (cur->nidt) {
806 case NVME_NIDT_EUI64:
807 if (cur->nidl != NVME_NIDT_EUI64_LEN) {
808 dev_warn(ns->ctrl->device,
809 "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
810 cur->nidl);
811 goto free_data;
812 }
813 len = NVME_NIDT_EUI64_LEN;
814 memcpy(ns->eui, data + pos + sizeof(*cur), len);
815 break;
816 case NVME_NIDT_NGUID:
817 if (cur->nidl != NVME_NIDT_NGUID_LEN) {
818 dev_warn(ns->ctrl->device,
819 "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
820 cur->nidl);
821 goto free_data;
822 }
823 len = NVME_NIDT_NGUID_LEN;
824 memcpy(ns->nguid, data + pos + sizeof(*cur), len);
825 break;
826 case NVME_NIDT_UUID:
827 if (cur->nidl != NVME_NIDT_UUID_LEN) {
828 dev_warn(ns->ctrl->device,
829 "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
830 cur->nidl);
831 goto free_data;
832 }
833 len = NVME_NIDT_UUID_LEN;
834 uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
835 break;
836 default:
837 /* Skip unnkown types */
838 len = cur->nidl;
839 break;
840 }
841
842 len += sizeof(*cur);
843 }
844free_data:
845 kfree(data);
846 return status;
847}
848
646static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) 849static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
647{ 850{
648 struct nvme_command c = { }; 851 struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
653 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); 856 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
654} 857}
655 858
656int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, 859static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
657 struct nvme_id_ns **id) 860 struct nvme_id_ns **id)
658{ 861{
659 struct nvme_command c = { }; 862 struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
675 return error; 878 return error;
676} 879}
677 880
678int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, 881static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
679 void *buffer, size_t buflen, u32 *result)
680{
681 struct nvme_command c;
682 union nvme_result res;
683 int ret;
684
685 memset(&c, 0, sizeof(c));
686 c.features.opcode = nvme_admin_get_features;
687 c.features.nsid = cpu_to_le32(nsid);
688 c.features.fid = cpu_to_le32(fid);
689
690 ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
691 NVME_QID_ANY, 0, 0);
692 if (ret >= 0 && result)
693 *result = le32_to_cpu(res.u32);
694 return ret;
695}
696
697int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
698 void *buffer, size_t buflen, u32 *result) 882 void *buffer, size_t buflen, u32 *result)
699{ 883{
700 struct nvme_command c; 884 struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
713 return ret; 897 return ret;
714} 898}
715 899
716int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
717{
718 struct nvme_command c = { };
719 int error;
720
721 c.common.opcode = nvme_admin_get_log_page,
722 c.common.nsid = cpu_to_le32(0xFFFFFFFF),
723 c.common.cdw10[0] = cpu_to_le32(
724 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
725 NVME_LOG_SMART),
726
727 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
728 if (!*log)
729 return -ENOMEM;
730
731 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
732 sizeof(struct nvme_smart_log));
733 if (error)
734 kfree(*log);
735 return error;
736}
737
738int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) 900int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
739{ 901{
740 u32 q_count = (*count - 1) | ((*count - 1) << 16); 902 u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
752 * access to the admin queue, as that might be only way to fix them up. 914 * access to the admin queue, as that might be only way to fix them up.
753 */ 915 */
754 if (status > 0) { 916 if (status > 0) {
755 dev_err(ctrl->dev, "Could not set queue count (%d)\n", status); 917 dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
756 *count = 0; 918 *count = 0;
757 } else { 919 } else {
758 nr_io_queues = min(result & 0xffff, result >> 16) + 1; 920 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
870 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); 1032 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
871 case NVME_IOCTL_SUBMIT_IO: 1033 case NVME_IOCTL_SUBMIT_IO:
872 return nvme_submit_io(ns, (void __user *)arg); 1034 return nvme_submit_io(ns, (void __user *)arg);
873#ifdef CONFIG_BLK_DEV_NVME_SCSI
874 case SG_GET_VERSION_NUM:
875 return nvme_sg_get_version_num((void __user *)arg);
876 case SG_IO:
877 return nvme_sg_io(ns, (void __user *)arg);
878#endif
879 default: 1035 default:
880#ifdef CONFIG_NVM 1036#ifdef CONFIG_NVM
881 if (ns->ndev) 1037 if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
892static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, 1048static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
893 unsigned int cmd, unsigned long arg) 1049 unsigned int cmd, unsigned long arg)
894{ 1050{
895 switch (cmd) {
896 case SG_IO:
897 return -ENOIOCTLCMD;
898 }
899 return nvme_ioctl(bdev, mode, cmd, arg); 1051 return nvme_ioctl(bdev, mode, cmd, arg);
900} 1052}
901#else 1053#else
@@ -983,6 +1135,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
983} 1135}
984#endif /* CONFIG_BLK_DEV_INTEGRITY */ 1136#endif /* CONFIG_BLK_DEV_INTEGRITY */
985 1137
1138static void nvme_set_chunk_size(struct nvme_ns *ns)
1139{
1140 u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
1141 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
1142}
1143
986static void nvme_config_discard(struct nvme_ns *ns) 1144static void nvme_config_discard(struct nvme_ns *ns)
987{ 1145{
988 struct nvme_ctrl *ctrl = ns->ctrl; 1146 struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
991 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < 1149 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
992 NVME_DSM_MAX_RANGES); 1150 NVME_DSM_MAX_RANGES);
993 1151
994 ns->queue->limits.discard_alignment = logical_block_size; 1152 if (ctrl->nr_streams && ns->sws && ns->sgs) {
995 ns->queue->limits.discard_granularity = logical_block_size; 1153 unsigned int sz = logical_block_size * ns->sws * ns->sgs;
1154
1155 ns->queue->limits.discard_alignment = sz;
1156 ns->queue->limits.discard_granularity = sz;
1157 } else {
1158 ns->queue->limits.discard_alignment = logical_block_size;
1159 ns->queue->limits.discard_granularity = logical_block_size;
1160 }
996 blk_queue_max_discard_sectors(ns->queue, UINT_MAX); 1161 blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
997 blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); 1162 blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
998 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 1163 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1016,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
1016 if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) 1181 if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
1017 memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); 1182 memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
1018 if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) 1183 if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
1019 memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid)); 1184 memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
1185 if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
1186 /* Don't treat error as fatal we potentially
1187 * already have a NGUID or EUI-64
1188 */
1189 if (nvme_identify_ns_descs(ns, ns->ns_id))
1190 dev_warn(ns->ctrl->device,
1191 "%s: Identify Descriptors failed\n", __func__);
1192 }
1020 1193
1021 return 0; 1194 return 0;
1022} 1195}
@@ -1024,6 +1197,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
1024static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) 1197static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1025{ 1198{
1026 struct nvme_ns *ns = disk->private_data; 1199 struct nvme_ns *ns = disk->private_data;
1200 struct nvme_ctrl *ctrl = ns->ctrl;
1027 u16 bs; 1201 u16 bs;
1028 1202
1029 /* 1203 /*
@@ -1034,12 +1208,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1034 if (ns->lba_shift == 0) 1208 if (ns->lba_shift == 0)
1035 ns->lba_shift = 9; 1209 ns->lba_shift = 9;
1036 bs = 1 << ns->lba_shift; 1210 bs = 1 << ns->lba_shift;
1211 ns->noiob = le16_to_cpu(id->noiob);
1037 1212
1038 blk_mq_freeze_queue(disk->queue); 1213 blk_mq_freeze_queue(disk->queue);
1039 1214
1040 if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) 1215 if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
1041 nvme_prep_integrity(disk, id, bs); 1216 nvme_prep_integrity(disk, id, bs);
1042 blk_queue_logical_block_size(ns->queue, bs); 1217 blk_queue_logical_block_size(ns->queue, bs);
1218 if (ns->noiob)
1219 nvme_set_chunk_size(ns);
1043 if (ns->ms && !blk_get_integrity(disk) && !ns->ext) 1220 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
1044 nvme_init_integrity(ns); 1221 nvme_init_integrity(ns);
1045 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) 1222 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1047 else 1224 else
1048 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); 1225 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
1049 1226
1050 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) 1227 if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
1051 nvme_config_discard(ns); 1228 nvme_config_discard(ns);
1052 blk_mq_unfreeze_queue(disk->queue); 1229 blk_mq_unfreeze_queue(disk->queue);
1053} 1230}
@@ -1283,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
1283 1460
1284int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) 1461int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
1285{ 1462{
1286 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies; 1463 unsigned long timeout = jiffies + (shutdown_timeout * HZ);
1287 u32 csts; 1464 u32 csts;
1288 int ret; 1465 int ret;
1289 1466
@@ -1372,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
1372 if (!table) 1549 if (!table)
1373 return; 1550 return;
1374 1551
1375 if (ctrl->ps_max_latency_us == 0) { 1552 if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
1376 /* Turn off APST. */ 1553 /* Turn off APST. */
1377 apste = 0; 1554 apste = 0;
1378 dev_dbg(ctrl->device, "APST disabled\n"); 1555 dev_dbg(ctrl->device, "APST disabled\n");
@@ -1528,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
1528 string_matches(id->fr, q->fr, sizeof(id->fr)); 1705 string_matches(id->fr, q->fr, sizeof(id->fr));
1529} 1706}
1530 1707
1708static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
1709{
1710 size_t nqnlen;
1711 int off;
1712
1713 nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
1714 if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
1715 strcpy(ctrl->subnqn, id->subnqn);
1716 return;
1717 }
1718
1719 if (ctrl->vs >= NVME_VS(1, 2, 1))
1720 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
1721
1722 /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
1723 off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
1724 "nqn.2014.08.org.nvmexpress:%4x%4x",
1725 le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
1726 memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
1727 off += sizeof(id->sn);
1728 memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
1729 off += sizeof(id->mn);
1730 memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
1731}
1732
1531/* 1733/*
1532 * Initialize the cached copies of the Identify data and various controller 1734 * Initialize the cached copies of the Identify data and various controller
1533 * register in our nvme_ctrl structure. This should be called as soon as 1735 * register in our nvme_ctrl structure. This should be called as soon as
@@ -1539,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1539 u64 cap; 1741 u64 cap;
1540 int ret, page_shift; 1742 int ret, page_shift;
1541 u32 max_hw_sectors; 1743 u32 max_hw_sectors;
1542 u8 prev_apsta; 1744 bool prev_apst_enabled;
1543 1745
1544 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); 1746 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
1545 if (ret) { 1747 if (ret) {
@@ -1563,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1563 return -EIO; 1765 return -EIO;
1564 } 1766 }
1565 1767
1768 nvme_init_subnqn(ctrl, id);
1769
1566 if (!ctrl->identified) { 1770 if (!ctrl->identified) {
1567 /* 1771 /*
1568 * Check for quirks. Quirk can depend on firmware version, 1772 * Check for quirks. Quirk can depend on firmware version,
@@ -1582,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1582 } 1786 }
1583 1787
1584 if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { 1788 if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
1585 dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); 1789 dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
1586 ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; 1790 ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
1587 } 1791 }
1588 1792
@@ -1607,16 +1811,17 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1607 ctrl->kas = le16_to_cpu(id->kas); 1811 ctrl->kas = le16_to_cpu(id->kas);
1608 1812
1609 ctrl->npss = id->npss; 1813 ctrl->npss = id->npss;
1610 prev_apsta = ctrl->apsta; 1814 ctrl->apsta = id->apsta;
1815 prev_apst_enabled = ctrl->apst_enabled;
1611 if (ctrl->quirks & NVME_QUIRK_NO_APST) { 1816 if (ctrl->quirks & NVME_QUIRK_NO_APST) {
1612 if (force_apst && id->apsta) { 1817 if (force_apst && id->apsta) {
1613 dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); 1818 dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
1614 ctrl->apsta = 1; 1819 ctrl->apst_enabled = true;
1615 } else { 1820 } else {
1616 ctrl->apsta = 0; 1821 ctrl->apst_enabled = false;
1617 } 1822 }
1618 } else { 1823 } else {
1619 ctrl->apsta = id->apsta; 1824 ctrl->apst_enabled = id->apsta;
1620 } 1825 }
1621 memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); 1826 memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
1622 1827
@@ -1634,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1634 ret = -EINVAL; 1839 ret = -EINVAL;
1635 1840
1636 if (!ctrl->opts->discovery_nqn && !ctrl->kas) { 1841 if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
1637 dev_err(ctrl->dev, 1842 dev_err(ctrl->device,
1638 "keep-alive support is mandatory for fabrics\n"); 1843 "keep-alive support is mandatory for fabrics\n");
1639 ret = -EINVAL; 1844 ret = -EINVAL;
1640 } 1845 }
1641 } else { 1846 } else {
1642 ctrl->cntlid = le16_to_cpu(id->cntlid); 1847 ctrl->cntlid = le16_to_cpu(id->cntlid);
1848 ctrl->hmpre = le32_to_cpu(id->hmpre);
1849 ctrl->hmmin = le32_to_cpu(id->hmmin);
1643 } 1850 }
1644 1851
1645 kfree(id); 1852 kfree(id);
1646 1853
1647 if (ctrl->apsta && !prev_apsta) 1854 if (ctrl->apst_enabled && !prev_apst_enabled)
1648 dev_pm_qos_expose_latency_tolerance(ctrl->device); 1855 dev_pm_qos_expose_latency_tolerance(ctrl->device);
1649 else if (!ctrl->apsta && prev_apsta) 1856 else if (!ctrl->apst_enabled && prev_apst_enabled)
1650 dev_pm_qos_hide_latency_tolerance(ctrl->device); 1857 dev_pm_qos_hide_latency_tolerance(ctrl->device);
1651 1858
1652 nvme_configure_apst(ctrl); 1859 nvme_configure_apst(ctrl);
1860 nvme_configure_directives(ctrl);
1653 1861
1654 ctrl->identified = true; 1862 ctrl->identified = true;
1655 1863
@@ -1735,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
1735 return nvme_dev_user_cmd(ctrl, argp); 1943 return nvme_dev_user_cmd(ctrl, argp);
1736 case NVME_IOCTL_RESET: 1944 case NVME_IOCTL_RESET:
1737 dev_warn(ctrl->device, "resetting controller\n"); 1945 dev_warn(ctrl->device, "resetting controller\n");
1738 return ctrl->ops->reset_ctrl(ctrl); 1946 return nvme_reset_ctrl_sync(ctrl);
1739 case NVME_IOCTL_SUBSYS_RESET: 1947 case NVME_IOCTL_SUBSYS_RESET:
1740 return nvme_reset_subsystem(ctrl); 1948 return nvme_reset_subsystem(ctrl);
1741 case NVME_IOCTL_RESCAN: 1949 case NVME_IOCTL_RESCAN:
@@ -1761,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
1761 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 1969 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1762 int ret; 1970 int ret;
1763 1971
1764 ret = ctrl->ops->reset_ctrl(ctrl); 1972 ret = nvme_reset_ctrl_sync(ctrl);
1765 if (ret < 0) 1973 if (ret < 0)
1766 return ret; 1974 return ret;
1767 return count; 1975 return count;
@@ -1787,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
1787 int serial_len = sizeof(ctrl->serial); 1995 int serial_len = sizeof(ctrl->serial);
1788 int model_len = sizeof(ctrl->model); 1996 int model_len = sizeof(ctrl->model);
1789 1997
1790 if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 1998 if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
1791 return sprintf(buf, "eui.%16phN\n", ns->uuid); 1999 return sprintf(buf, "eui.%16phN\n", ns->nguid);
1792 2000
1793 if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) 2001 if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1794 return sprintf(buf, "eui.%8phN\n", ns->eui); 2002 return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1803,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
1803} 2011}
1804static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); 2012static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
1805 2013
2014static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
2015 char *buf)
2016{
2017 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
2018 return sprintf(buf, "%pU\n", ns->nguid);
2019}
2020static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
2021
1806static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, 2022static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1807 char *buf) 2023 char *buf)
1808{ 2024{
1809 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); 2025 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1810 return sprintf(buf, "%pU\n", ns->uuid); 2026
2027 /* For backward compatibility expose the NGUID to userspace if
2028 * we have no UUID set
2029 */
2030 if (uuid_is_null(&ns->uuid)) {
2031 printk_ratelimited(KERN_WARNING
2032 "No UUID available providing old NGUID\n");
2033 return sprintf(buf, "%pU\n", ns->nguid);
2034 }
2035 return sprintf(buf, "%pU\n", &ns->uuid);
1811} 2036}
1812static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); 2037static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1813 2038
@@ -1830,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1830static struct attribute *nvme_ns_attrs[] = { 2055static struct attribute *nvme_ns_attrs[] = {
1831 &dev_attr_wwid.attr, 2056 &dev_attr_wwid.attr,
1832 &dev_attr_uuid.attr, 2057 &dev_attr_uuid.attr,
2058 &dev_attr_nguid.attr,
1833 &dev_attr_eui.attr, 2059 &dev_attr_eui.attr,
1834 &dev_attr_nsid.attr, 2060 &dev_attr_nsid.attr,
1835 NULL, 2061 NULL,
@@ -1842,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
1842 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); 2068 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1843 2069
1844 if (a == &dev_attr_uuid.attr) { 2070 if (a == &dev_attr_uuid.attr) {
1845 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 2071 if (uuid_is_null(&ns->uuid) ||
2072 !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
2073 return 0;
2074 }
2075 if (a == &dev_attr_nguid.attr) {
2076 if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
1846 return 0; 2077 return 0;
1847 } 2078 }
1848 if (a == &dev_attr_eui.attr) { 2079 if (a == &dev_attr_eui.attr) {
@@ -1931,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
1931{ 2162{
1932 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 2163 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1933 2164
1934 return snprintf(buf, PAGE_SIZE, "%s\n", 2165 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
1935 ctrl->ops->get_subsysnqn(ctrl));
1936} 2166}
1937static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); 2167static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
1938 2168
@@ -1961,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
1961 NULL 2191 NULL
1962}; 2192};
1963 2193
1964#define CHECK_ATTR(ctrl, a, name) \
1965 if ((a) == &dev_attr_##name.attr && \
1966 !(ctrl)->ops->get_##name) \
1967 return 0
1968
1969static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, 2194static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
1970 struct attribute *a, int n) 2195 struct attribute *a, int n)
1971{ 2196{
1972 struct device *dev = container_of(kobj, struct device, kobj); 2197 struct device *dev = container_of(kobj, struct device, kobj);
1973 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 2198 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1974 2199
1975 if (a == &dev_attr_delete_controller.attr) { 2200 if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
1976 if (!ctrl->ops->delete_ctrl) 2201 return 0;
1977 return 0; 2202 if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
1978 } 2203 return 0;
1979
1980 CHECK_ATTR(ctrl, a, subsysnqn);
1981 CHECK_ATTR(ctrl, a, address);
1982 2204
1983 return a->mode; 2205 return a->mode;
1984} 2206}
@@ -2019,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2019 return ret; 2241 return ret;
2020} 2242}
2021 2243
2244static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
2245{
2246 struct streams_directive_params s;
2247 int ret;
2248
2249 if (!ctrl->nr_streams)
2250 return 0;
2251
2252 ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
2253 if (ret)
2254 return ret;
2255
2256 ns->sws = le32_to_cpu(s.sws);
2257 ns->sgs = le16_to_cpu(s.sgs);
2258
2259 if (ns->sws) {
2260 unsigned int bs = 1 << ns->lba_shift;
2261
2262 blk_queue_io_min(ns->queue, bs * ns->sws);
2263 if (ns->sgs)
2264 blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
2265 }
2266
2267 return 0;
2268}
2269
2022static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) 2270static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2023{ 2271{
2024 struct nvme_ns *ns; 2272 struct nvme_ns *ns;
@@ -2048,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2048 2296
2049 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2297 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2050 nvme_set_queue_limits(ctrl, ns->queue); 2298 nvme_set_queue_limits(ctrl, ns->queue);
2299 nvme_setup_streams_ns(ctrl, ns);
2051 2300
2052 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); 2301 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
2053 2302
@@ -2056,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2056 2305
2057 if (nvme_nvm_ns_supported(ns, id) && 2306 if (nvme_nvm_ns_supported(ns, id) &&
2058 nvme_nvm_register(ns, disk_name, node)) { 2307 nvme_nvm_register(ns, disk_name, node)) {
2059 dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__); 2308 dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
2060 goto out_free_id; 2309 goto out_free_id;
2061 } 2310 }
2062 2311
@@ -2231,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
2231 * removal. 2480 * removal.
2232 */ 2481 */
2233 if (ctrl->state == NVME_CTRL_LIVE) 2482 if (ctrl->state == NVME_CTRL_LIVE)
2234 schedule_work(&ctrl->scan_work); 2483 queue_work(nvme_wq, &ctrl->scan_work);
2235} 2484}
2236EXPORT_SYMBOL_GPL(nvme_queue_scan); 2485EXPORT_SYMBOL_GPL(nvme_queue_scan);
2237 2486
@@ -2286,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
2286 /*FALLTHRU*/ 2535 /*FALLTHRU*/
2287 case NVME_SC_ABORT_REQ: 2536 case NVME_SC_ABORT_REQ:
2288 ++ctrl->event_limit; 2537 ++ctrl->event_limit;
2289 schedule_work(&ctrl->async_event_work); 2538 queue_work(nvme_wq, &ctrl->async_event_work);
2290 break; 2539 break;
2291 default: 2540 default:
2292 break; 2541 break;
@@ -2309,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
2309void nvme_queue_async_events(struct nvme_ctrl *ctrl) 2558void nvme_queue_async_events(struct nvme_ctrl *ctrl)
2310{ 2559{
2311 ctrl->event_limit = NVME_NR_AERS; 2560 ctrl->event_limit = NVME_NR_AERS;
2312 schedule_work(&ctrl->async_event_work); 2561 queue_work(nvme_wq, &ctrl->async_event_work);
2313} 2562}
2314EXPORT_SYMBOL_GPL(nvme_queue_async_events); 2563EXPORT_SYMBOL_GPL(nvme_queue_async_events);
2315 2564
@@ -2442,6 +2691,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
2442 2691
2443 mutex_lock(&ctrl->namespaces_mutex); 2692 mutex_lock(&ctrl->namespaces_mutex);
2444 2693
2694 /* Forcibly unquiesce queues to avoid blocking dispatch */
2695 blk_mq_unquiesce_queue(ctrl->admin_q);
2696
2445 /* Forcibly start all queues to avoid having stuck requests */ 2697 /* Forcibly start all queues to avoid having stuck requests */
2446 blk_mq_start_hw_queues(ctrl->admin_q); 2698 blk_mq_start_hw_queues(ctrl->admin_q);
2447 2699
@@ -2455,6 +2707,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
2455 revalidate_disk(ns->disk); 2707 revalidate_disk(ns->disk);
2456 blk_set_queue_dying(ns->queue); 2708 blk_set_queue_dying(ns->queue);
2457 2709
2710 /* Forcibly unquiesce queues to avoid blocking dispatch */
2711 blk_mq_unquiesce_queue(ns->queue);
2712
2458 /* 2713 /*
2459 * Forcibly start all queues to avoid having stuck requests. 2714 * Forcibly start all queues to avoid having stuck requests.
2460 * Note that we must ensure the queues are not stopped 2715 * Note that we must ensure the queues are not stopped
@@ -2533,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
2533 2788
2534 mutex_lock(&ctrl->namespaces_mutex); 2789 mutex_lock(&ctrl->namespaces_mutex);
2535 list_for_each_entry(ns, &ctrl->namespaces, list) { 2790 list_for_each_entry(ns, &ctrl->namespaces, list) {
2536 blk_mq_start_stopped_hw_queues(ns->queue, true); 2791 blk_mq_unquiesce_queue(ns->queue);
2537 blk_mq_kick_requeue_list(ns->queue); 2792 blk_mq_kick_requeue_list(ns->queue);
2538 } 2793 }
2539 mutex_unlock(&ctrl->namespaces_mutex); 2794 mutex_unlock(&ctrl->namespaces_mutex);
@@ -2544,10 +2799,15 @@ int __init nvme_core_init(void)
2544{ 2799{
2545 int result; 2800 int result;
2546 2801
2802 nvme_wq = alloc_workqueue("nvme-wq",
2803 WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
2804 if (!nvme_wq)
2805 return -ENOMEM;
2806
2547 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", 2807 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
2548 &nvme_dev_fops); 2808 &nvme_dev_fops);
2549 if (result < 0) 2809 if (result < 0)
2550 return result; 2810 goto destroy_wq;
2551 else if (result > 0) 2811 else if (result > 0)
2552 nvme_char_major = result; 2812 nvme_char_major = result;
2553 2813
@@ -2559,8 +2819,10 @@ int __init nvme_core_init(void)
2559 2819
2560 return 0; 2820 return 0;
2561 2821
2562 unregister_chrdev: 2822unregister_chrdev:
2563 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 2823 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
2824destroy_wq:
2825 destroy_workqueue(nvme_wq);
2564 return result; 2826 return result;
2565} 2827}
2566 2828
@@ -2568,6 +2830,7 @@ void nvme_core_exit(void)
2568{ 2830{
2569 class_destroy(nvme_class); 2831 class_destroy(nvme_class);
2570 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 2832 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
2833 destroy_workqueue(nvme_wq);
2571} 2834}
2572 2835
2573MODULE_LICENSE("GPL"); 2836MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c190d7e36900..2e582a240943 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
58 58
59 kref_init(&host->ref); 59 kref_init(&host->ref);
60 memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); 60 memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
61 uuid_gen(&host->id);
62 61
63 list_add_tail(&host->list, &nvmf_hosts); 62 list_add_tail(&host->list, &nvmf_hosts);
64out_unlock: 63out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
75 return NULL; 74 return NULL;
76 75
77 kref_init(&host->ref); 76 kref_init(&host->ref);
78 uuid_gen(&host->id);
79 snprintf(host->nqn, NVMF_NQN_SIZE, 77 snprintf(host->nqn, NVMF_NQN_SIZE,
80 "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); 78 "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
81 79
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
128EXPORT_SYMBOL_GPL(nvmf_get_address); 126EXPORT_SYMBOL_GPL(nvmf_get_address);
129 127
130/** 128/**
131 * nvmf_get_subsysnqn() - Get subsystem NQN
132 * @ctrl: Host NVMe controller instance which we got the NQN
133 */
134const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
135{
136 return ctrl->opts->subsysnqn;
137}
138EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
139
140/**
141 * nvmf_reg_read32() - NVMe Fabrics "Property Get" API function. 129 * nvmf_reg_read32() - NVMe Fabrics "Property Get" API function.
142 * @ctrl: Host NVMe controller instance maintaining the admin 130 * @ctrl: Host NVMe controller instance maintaining the admin
143 * queue used to submit the property read command to 131 * queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
337 } 325 }
338 } 326 }
339 break; 327 break;
328
329 case NVME_SC_CONNECT_INVALID_HOST:
330 dev_err(ctrl->device,
331 "Connect for subsystem %s is not allowed, hostnqn: %s\n",
332 data->subsysnqn, data->hostnqn);
333 break;
334
335 case NVME_SC_CONNECT_CTRL_BUSY:
336 dev_err(ctrl->device,
337 "Connect command failed: controller is busy or not available\n");
338 break;
339
340 case NVME_SC_CONNECT_FORMAT:
341 dev_err(ctrl->device,
342 "Connect incompatible format: %d",
343 cmd->connect.recfmt);
344 break;
345
340 default: 346 default:
341 dev_err(ctrl->device, 347 dev_err(ctrl->device,
342 "Connect command failed, error wo/DNR bit: %d\n", 348 "Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
376 cmd.connect.opcode = nvme_fabrics_command; 382 cmd.connect.opcode = nvme_fabrics_command;
377 cmd.connect.fctype = nvme_fabrics_type_connect; 383 cmd.connect.fctype = nvme_fabrics_type_connect;
378 cmd.connect.qid = 0; 384 cmd.connect.qid = 0;
379 385 cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
380 /*
381 * fabrics spec sets a minimum of depth 32 for admin queue,
382 * so set the queue with this depth always until
383 * justification otherwise.
384 */
385 cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
386 386
387 /* 387 /*
388 * Set keep-alive timeout in seconds granularity (ms * 1000) 388 * Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
474bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) 474bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
475{ 475{
476 if (ctrl->opts->max_reconnects != -1 && 476 if (ctrl->opts->max_reconnects != -1 &&
477 ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects) 477 ctrl->nr_reconnects < ctrl->opts->max_reconnects)
478 return true; 478 return true;
479 479
480 return false; 480 return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
547 { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, 547 { NVMF_OPT_KATO, "keep_alive_tmo=%d" },
548 { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, 548 { NVMF_OPT_HOSTNQN, "hostnqn=%s" },
549 { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, 549 { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
550 { NVMF_OPT_HOST_ID, "hostid=%s" },
550 { NVMF_OPT_ERR, NULL } 551 { NVMF_OPT_ERR, NULL }
551}; 552};
552 553
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
558 int token, ret = 0; 559 int token, ret = 0;
559 size_t nqnlen = 0; 560 size_t nqnlen = 0;
560 int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; 561 int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
562 uuid_t hostid;
561 563
562 /* Set defaults */ 564 /* Set defaults */
563 opts->queue_size = NVMF_DEF_QUEUE_SIZE; 565 opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
568 if (!options) 570 if (!options)
569 return -ENOMEM; 571 return -ENOMEM;
570 572
573 uuid_gen(&hostid);
574
571 while ((p = strsep(&o, ",\n")) != NULL) { 575 while ((p = strsep(&o, ",\n")) != NULL) {
572 if (!*p) 576 if (!*p)
573 continue; 577 continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
724 } 728 }
725 opts->host_traddr = p; 729 opts->host_traddr = p;
726 break; 730 break;
731 case NVMF_OPT_HOST_ID:
732 p = match_strdup(args);
733 if (!p) {
734 ret = -ENOMEM;
735 goto out;
736 }
737 if (uuid_parse(p, &hostid)) {
738 ret = -EINVAL;
739 goto out;
740 }
741 break;
727 default: 742 default:
728 pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", 743 pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
729 p); 744 p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
743 opts->host = nvmf_default_host; 758 opts->host = nvmf_default_host;
744 } 759 }
745 760
761 uuid_copy(&opts->host->id, &hostid);
762
746out: 763out:
747 if (!opts->discovery_nqn && !opts->kato) 764 if (!opts->discovery_nqn && !opts->kato)
748 opts->kato = NVME_DEFAULT_KATO; 765 opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
803 820
804#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) 821#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
805#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ 822#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
806 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN) 823 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
824 NVMF_OPT_HOST_ID)
807 825
808static struct nvme_ctrl * 826static struct nvme_ctrl *
809nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) 827nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
854 goto out_unlock; 872 goto out_unlock;
855 } 873 }
856 874
875 if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
876 dev_warn(ctrl->device,
877 "controller returned incorrect NQN: \"%s\".\n",
878 ctrl->subnqn);
879 mutex_unlock(&nvmf_transports_mutex);
880 ctrl->ops->delete_ctrl(ctrl);
881 return ERR_PTR(-EINVAL);
882 }
883
857 mutex_unlock(&nvmf_transports_mutex); 884 mutex_unlock(&nvmf_transports_mutex);
858 return ctrl; 885 return ctrl;
859 886
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 29be7600689d..bf33663218cd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -56,6 +56,7 @@ enum {
56 NVMF_OPT_RECONNECT_DELAY = 1 << 9, 56 NVMF_OPT_RECONNECT_DELAY = 1 << 9,
57 NVMF_OPT_HOST_TRADDR = 1 << 10, 57 NVMF_OPT_HOST_TRADDR = 1 << 10,
58 NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, 58 NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
59 NVMF_OPT_HOST_ID = 1 << 12,
59}; 60};
60 61
61/** 62/**
@@ -80,7 +81,6 @@ enum {
80 * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. 81 * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
81 * @kato: Keep-alive timeout. 82 * @kato: Keep-alive timeout.
82 * @host: Virtual NVMe host, contains the NQN and Host ID. 83 * @host: Virtual NVMe host, contains the NQN and Host ID.
83 * @nr_reconnects: number of reconnect attempted since the last ctrl failure
84 * @max_reconnects: maximum number of allowed reconnect attempts before removing 84 * @max_reconnects: maximum number of allowed reconnect attempts before removing
85 * the controller, (-1) means reconnect forever, zero means remove 85 * the controller, (-1) means reconnect forever, zero means remove
86 * immediately; 86 * immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
98 bool discovery_nqn; 98 bool discovery_nqn;
99 unsigned int kato; 99 unsigned int kato;
100 struct nvmf_host *host; 100 struct nvmf_host *host;
101 int nr_reconnects;
102 int max_reconnects; 101 int max_reconnects;
103}; 102};
104 103
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
140int nvmf_register_transport(struct nvmf_transport_ops *ops); 139int nvmf_register_transport(struct nvmf_transport_ops *ops);
141void nvmf_unregister_transport(struct nvmf_transport_ops *ops); 140void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
142void nvmf_free_options(struct nvmf_ctrl_options *opts); 141void nvmf_free_options(struct nvmf_ctrl_options *opts);
143const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
144int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 142int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
145bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 143bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
146 144
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5ee4c71d168d..ed87214fdc0e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -36,7 +36,7 @@
36 */ 36 */
37#define NVME_FC_NR_AEN_COMMANDS 1 37#define NVME_FC_NR_AEN_COMMANDS 1
38#define NVME_FC_AQ_BLKMQ_DEPTH \ 38#define NVME_FC_AQ_BLKMQ_DEPTH \
39 (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) 39 (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
40#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) 40#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
41 41
42enum nvme_fc_queue_flags { 42enum nvme_fc_queue_flags {
@@ -161,12 +161,12 @@ struct nvme_fc_ctrl {
161 struct blk_mq_tag_set tag_set; 161 struct blk_mq_tag_set tag_set;
162 162
163 struct work_struct delete_work; 163 struct work_struct delete_work;
164 struct work_struct reset_work;
165 struct delayed_work connect_work; 164 struct delayed_work connect_work;
166 165
167 struct kref ref; 166 struct kref ref;
168 u32 flags; 167 u32 flags;
169 u32 iocnt; 168 u32 iocnt;
169 wait_queue_head_t ioabort_wait;
170 170
171 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; 171 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS];
172 172
@@ -214,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
214static DEFINE_IDA(nvme_fc_local_port_cnt); 214static DEFINE_IDA(nvme_fc_local_port_cnt);
215static DEFINE_IDA(nvme_fc_ctrl_cnt); 215static DEFINE_IDA(nvme_fc_ctrl_cnt);
216 216
217static struct workqueue_struct *nvme_fc_wq;
218 217
219 218
220 219
@@ -1241,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
1241 1240
1242 spin_lock_irqsave(&ctrl->lock, flags); 1241 spin_lock_irqsave(&ctrl->lock, flags);
1243 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1242 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
1244 if (ctrl->flags & FCCTRL_TERMIO) 1243 if (ctrl->flags & FCCTRL_TERMIO) {
1245 ctrl->iocnt--; 1244 if (!--ctrl->iocnt)
1245 wake_up(&ctrl->ioabort_wait);
1246 }
1246 } 1247 }
1247 if (op->flags & FCOP_FLAGS_RELEASED) 1248 if (op->flags & FCOP_FLAGS_RELEASED)
1248 complete_rq = true; 1249 complete_rq = true;
@@ -1449,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
1449{ 1450{
1450 struct nvme_fc_ctrl *ctrl = set->driver_data; 1451 struct nvme_fc_ctrl *ctrl = set->driver_data;
1451 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1452 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1452 struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; 1453 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
1453 1454 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
1454 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
1455}
1456
1457static int
1458nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
1459 unsigned int hctx_idx, unsigned int numa_node)
1460{
1461 struct nvme_fc_ctrl *ctrl = set->driver_data;
1462 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1463 struct nvme_fc_queue *queue = &ctrl->queues[0];
1464 1455
1465 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1456 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
1466} 1457}
@@ -1758,16 +1749,16 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
1758static void 1749static void
1759nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 1750nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
1760{ 1751{
1752 /* only proceed if in LIVE state - e.g. on first error */
1753 if (ctrl->ctrl.state != NVME_CTRL_LIVE)
1754 return;
1755
1761 dev_warn(ctrl->ctrl.device, 1756 dev_warn(ctrl->ctrl.device,
1762 "NVME-FC{%d}: transport association error detected: %s\n", 1757 "NVME-FC{%d}: transport association error detected: %s\n",
1763 ctrl->cnum, errmsg); 1758 ctrl->cnum, errmsg);
1764 dev_warn(ctrl->ctrl.device, 1759 dev_warn(ctrl->ctrl.device,
1765 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 1760 "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
1766 1761
1767 /* stop the queues on error, cleanup is in reset thread */
1768 if (ctrl->queue_count > 1)
1769 nvme_stop_queues(&ctrl->ctrl);
1770
1771 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1762 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
1772 dev_err(ctrl->ctrl.device, 1763 dev_err(ctrl->ctrl.device,
1773 "NVME-FC{%d}: error_recovery: Couldn't change state " 1764 "NVME-FC{%d}: error_recovery: Couldn't change state "
@@ -1775,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
1775 return; 1766 return;
1776 } 1767 }
1777 1768
1778 if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) 1769 nvme_reset_ctrl(&ctrl->ctrl);
1779 dev_err(ctrl->ctrl.device,
1780 "NVME-FC{%d}: error_recovery: Failed to schedule "
1781 "reset work\n", ctrl->cnum);
1782} 1770}
1783 1771
1784static enum blk_eh_timer_return 1772static enum blk_eh_timer_return
@@ -1887,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
1887 * level FC exchange resource that is also outstanding. This must be 1875 * level FC exchange resource that is also outstanding. This must be
1888 * considered in all cleanup operations. 1876 * considered in all cleanup operations.
1889 */ 1877 */
1890static int 1878static blk_status_t
1891nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1879nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1892 struct nvme_fc_fcp_op *op, u32 data_len, 1880 struct nvme_fc_fcp_op *op, u32 data_len,
1893 enum nvmefc_fcp_datadir io_dir) 1881 enum nvmefc_fcp_datadir io_dir)
@@ -1902,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1902 * the target device is present 1890 * the target device is present
1903 */ 1891 */
1904 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1892 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
1905 return BLK_MQ_RQ_QUEUE_ERROR; 1893 return BLK_STS_IOERR;
1906 1894
1907 if (!nvme_fc_ctrl_get(ctrl)) 1895 if (!nvme_fc_ctrl_get(ctrl))
1908 return BLK_MQ_RQ_QUEUE_ERROR; 1896 return BLK_STS_IOERR;
1909 1897
1910 /* format the FC-NVME CMD IU and fcp_req */ 1898 /* format the FC-NVME CMD IU and fcp_req */
1911 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 1899 cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1953,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1953 if (ret < 0) { 1941 if (ret < 0) {
1954 nvme_cleanup_cmd(op->rq); 1942 nvme_cleanup_cmd(op->rq);
1955 nvme_fc_ctrl_put(ctrl); 1943 nvme_fc_ctrl_put(ctrl);
1956 return (ret == -ENOMEM || ret == -EAGAIN) ? 1944 if (ret == -ENOMEM || ret == -EAGAIN)
1957 BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; 1945 return BLK_STS_RESOURCE;
1946 return BLK_STS_IOERR;
1958 } 1947 }
1959 } 1948 }
1960 1949
@@ -1971,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1971 queue->lldd_handle, &op->fcp_req); 1960 queue->lldd_handle, &op->fcp_req);
1972 1961
1973 if (ret) { 1962 if (ret) {
1974 if (op->rq) { /* normal request */ 1963 if (op->rq) /* normal request */
1975 nvme_fc_unmap_data(ctrl, op->rq, op); 1964 nvme_fc_unmap_data(ctrl, op->rq, op);
1976 nvme_cleanup_cmd(op->rq);
1977 }
1978 /* else - aen. no cleanup needed */ 1965 /* else - aen. no cleanup needed */
1979 1966
1980 nvme_fc_ctrl_put(ctrl); 1967 nvme_fc_ctrl_put(ctrl);
1981 1968
1982 if (ret != -EBUSY) 1969 if (ret != -EBUSY)
1983 return BLK_MQ_RQ_QUEUE_ERROR; 1970 return BLK_STS_IOERR;
1984 1971
1985 if (op->rq) { 1972 if (op->rq) {
1986 blk_mq_stop_hw_queues(op->rq->q); 1973 blk_mq_stop_hw_queues(op->rq->q);
1987 blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); 1974 blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
1988 } 1975 }
1989 return BLK_MQ_RQ_QUEUE_BUSY; 1976 return BLK_STS_RESOURCE;
1990 } 1977 }
1991 1978
1992 return BLK_MQ_RQ_QUEUE_OK; 1979 return BLK_STS_OK;
1993} 1980}
1994 1981
1995static int 1982static blk_status_t
1996nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 1983nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
1997 const struct blk_mq_queue_data *bd) 1984 const struct blk_mq_queue_data *bd)
1998{ 1985{
@@ -2005,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2005 struct nvme_command *sqe = &cmdiu->sqe; 1992 struct nvme_command *sqe = &cmdiu->sqe;
2006 enum nvmefc_fcp_datadir io_dir; 1993 enum nvmefc_fcp_datadir io_dir;
2007 u32 data_len; 1994 u32 data_len;
2008 int ret; 1995 blk_status_t ret;
2009 1996
2010 ret = nvme_setup_cmd(ns, rq, sqe); 1997 ret = nvme_setup_cmd(ns, rq, sqe);
2011 if (ret) 1998 if (ret)
@@ -2060,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
2060 struct nvme_fc_fcp_op *aen_op; 2047 struct nvme_fc_fcp_op *aen_op;
2061 unsigned long flags; 2048 unsigned long flags;
2062 bool terminating = false; 2049 bool terminating = false;
2063 int ret; 2050 blk_status_t ret;
2064 2051
2065 if (aer_idx > NVME_FC_NR_AEN_COMMANDS) 2052 if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
2066 return; 2053 return;
@@ -2092,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
2092 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | 2079 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
2093 FCOP_FLAGS_COMPLETE); 2080 FCOP_FLAGS_COMPLETE);
2094 2081
2095 nvme_cleanup_cmd(rq);
2096 nvme_fc_unmap_data(ctrl, rq, op); 2082 nvme_fc_unmap_data(ctrl, rq, op);
2097 nvme_complete_rq(rq); 2083 nvme_complete_rq(rq);
2098 nvme_fc_ctrl_put(ctrl); 2084 nvme_fc_ctrl_put(ctrl);
@@ -2310,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2310 int ret; 2296 int ret;
2311 bool changed; 2297 bool changed;
2312 2298
2313 ++ctrl->ctrl.opts->nr_reconnects; 2299 ++ctrl->ctrl.nr_reconnects;
2314 2300
2315 /* 2301 /*
2316 * Create the admin queue 2302 * Create the admin queue
@@ -2407,7 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2407 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2393 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
2408 WARN_ON_ONCE(!changed); 2394 WARN_ON_ONCE(!changed);
2409 2395
2410 ctrl->ctrl.opts->nr_reconnects = 0; 2396 ctrl->ctrl.nr_reconnects = 0;
2411 2397
2412 if (ctrl->queue_count > 1) { 2398 if (ctrl->queue_count > 1) {
2413 nvme_start_queues(&ctrl->ctrl); 2399 nvme_start_queues(&ctrl->ctrl);
@@ -2493,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
2493 2479
2494 /* wait for all io that had to be aborted */ 2480 /* wait for all io that had to be aborted */
2495 spin_lock_irqsave(&ctrl->lock, flags); 2481 spin_lock_irqsave(&ctrl->lock, flags);
2496 while (ctrl->iocnt) { 2482 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
2497 spin_unlock_irqrestore(&ctrl->lock, flags);
2498 msleep(1000);
2499 spin_lock_irqsave(&ctrl->lock, flags);
2500 }
2501 ctrl->flags &= ~FCCTRL_TERMIO; 2483 ctrl->flags &= ~FCCTRL_TERMIO;
2502 spin_unlock_irqrestore(&ctrl->lock, flags); 2484 spin_unlock_irqrestore(&ctrl->lock, flags);
2503 2485
@@ -2527,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
2527 struct nvme_fc_ctrl *ctrl = 2509 struct nvme_fc_ctrl *ctrl =
2528 container_of(work, struct nvme_fc_ctrl, delete_work); 2510 container_of(work, struct nvme_fc_ctrl, delete_work);
2529 2511
2530 cancel_work_sync(&ctrl->reset_work); 2512 cancel_work_sync(&ctrl->ctrl.reset_work);
2531 cancel_delayed_work_sync(&ctrl->connect_work); 2513 cancel_delayed_work_sync(&ctrl->connect_work);
2532 2514
2533 /* 2515 /*
@@ -2554,7 +2536,7 @@ __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
2554 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2536 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
2555 return true; 2537 return true;
2556 2538
2557 if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) 2539 if (!queue_work(nvme_wq, &ctrl->delete_work))
2558 return true; 2540 return true;
2559 2541
2560 return false; 2542 return false;
@@ -2581,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
2581 ret = __nvme_fc_del_ctrl(ctrl); 2563 ret = __nvme_fc_del_ctrl(ctrl);
2582 2564
2583 if (!ret) 2565 if (!ret)
2584 flush_workqueue(nvme_fc_wq); 2566 flush_workqueue(nvme_wq);
2585 2567
2586 nvme_put_ctrl(&ctrl->ctrl); 2568 nvme_put_ctrl(&ctrl->ctrl);
2587 2569
@@ -2606,13 +2588,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
2606 dev_info(ctrl->ctrl.device, 2588 dev_info(ctrl->ctrl.device,
2607 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2589 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
2608 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2590 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
2609 queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, 2591 queue_delayed_work(nvme_wq, &ctrl->connect_work,
2610 ctrl->ctrl.opts->reconnect_delay * HZ); 2592 ctrl->ctrl.opts->reconnect_delay * HZ);
2611 } else { 2593 } else {
2612 dev_warn(ctrl->ctrl.device, 2594 dev_warn(ctrl->ctrl.device,
2613 "NVME-FC{%d}: Max reconnect attempts (%d) " 2595 "NVME-FC{%d}: Max reconnect attempts (%d) "
2614 "reached. Removing controller\n", 2596 "reached. Removing controller\n",
2615 ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); 2597 ctrl->cnum, ctrl->ctrl.nr_reconnects);
2616 WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2598 WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
2617 } 2599 }
2618} 2600}
@@ -2621,7 +2603,7 @@ static void
2621nvme_fc_reset_ctrl_work(struct work_struct *work) 2603nvme_fc_reset_ctrl_work(struct work_struct *work)
2622{ 2604{
2623 struct nvme_fc_ctrl *ctrl = 2605 struct nvme_fc_ctrl *ctrl =
2624 container_of(work, struct nvme_fc_ctrl, reset_work); 2606 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
2625 int ret; 2607 int ret;
2626 2608
2627 /* will block will waiting for io to terminate */ 2609 /* will block will waiting for io to terminate */
@@ -2635,29 +2617,6 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
2635 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2617 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
2636} 2618}
2637 2619
2638/*
2639 * called by the nvme core layer, for sysfs interface that requests
2640 * a reset of the nvme controller
2641 */
2642static int
2643nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
2644{
2645 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2646
2647 dev_info(ctrl->ctrl.device,
2648 "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
2649
2650 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
2651 return -EBUSY;
2652
2653 if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
2654 return -EBUSY;
2655
2656 flush_work(&ctrl->reset_work);
2657
2658 return 0;
2659}
2660
2661static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2620static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2662 .name = "fc", 2621 .name = "fc",
2663 .module = THIS_MODULE, 2622 .module = THIS_MODULE,
@@ -2665,11 +2624,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2665 .reg_read32 = nvmf_reg_read32, 2624 .reg_read32 = nvmf_reg_read32,
2666 .reg_read64 = nvmf_reg_read64, 2625 .reg_read64 = nvmf_reg_read64,
2667 .reg_write32 = nvmf_reg_write32, 2626 .reg_write32 = nvmf_reg_write32,
2668 .reset_ctrl = nvme_fc_reset_nvme_ctrl,
2669 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2627 .free_ctrl = nvme_fc_nvme_ctrl_freed,
2670 .submit_async_event = nvme_fc_submit_async_event, 2628 .submit_async_event = nvme_fc_submit_async_event,
2671 .delete_ctrl = nvme_fc_del_nvme_ctrl, 2629 .delete_ctrl = nvme_fc_del_nvme_ctrl,
2672 .get_subsysnqn = nvmf_get_subsysnqn,
2673 .get_address = nvmf_get_address, 2630 .get_address = nvmf_get_address,
2674}; 2631};
2675 2632
@@ -2695,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
2695static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2652static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
2696 .queue_rq = nvme_fc_queue_rq, 2653 .queue_rq = nvme_fc_queue_rq,
2697 .complete = nvme_fc_complete_rq, 2654 .complete = nvme_fc_complete_rq,
2698 .init_request = nvme_fc_init_admin_request, 2655 .init_request = nvme_fc_init_request,
2699 .exit_request = nvme_fc_exit_request, 2656 .exit_request = nvme_fc_exit_request,
2700 .reinit_request = nvme_fc_reinit_request, 2657 .reinit_request = nvme_fc_reinit_request,
2701 .init_hctx = nvme_fc_init_admin_hctx, 2658 .init_hctx = nvme_fc_init_admin_hctx,
@@ -2740,7 +2697,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2740 kref_init(&ctrl->ref); 2697 kref_init(&ctrl->ref);
2741 2698
2742 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 2699 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
2743 INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); 2700 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
2744 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 2701 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
2745 spin_lock_init(&ctrl->lock); 2702 spin_lock_init(&ctrl->lock);
2746 2703
@@ -2807,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2807 nvme_uninit_ctrl(&ctrl->ctrl); 2764 nvme_uninit_ctrl(&ctrl->ctrl);
2808 nvme_put_ctrl(&ctrl->ctrl); 2765 nvme_put_ctrl(&ctrl->ctrl);
2809 2766
2767 /* Remove core ctrl ref. */
2768 nvme_put_ctrl(&ctrl->ctrl);
2769
2810 /* as we're past the point where we transition to the ref 2770 /* as we're past the point where we transition to the ref
2811 * counting teardown path, if we return a bad pointer here, 2771 * counting teardown path, if we return a bad pointer here,
2812 * the calling routine, thinking it's prior to the 2772 * the calling routine, thinking it's prior to the
@@ -2965,20 +2925,7 @@ static struct nvmf_transport_ops nvme_fc_transport = {
2965 2925
2966static int __init nvme_fc_init_module(void) 2926static int __init nvme_fc_init_module(void)
2967{ 2927{
2968 int ret; 2928 return nvmf_register_transport(&nvme_fc_transport);
2969
2970 nvme_fc_wq = create_workqueue("nvme_fc_wq");
2971 if (!nvme_fc_wq)
2972 return -ENOMEM;
2973
2974 ret = nvmf_register_transport(&nvme_fc_transport);
2975 if (ret)
2976 goto err;
2977
2978 return 0;
2979err:
2980 destroy_workqueue(nvme_fc_wq);
2981 return ret;
2982} 2929}
2983 2930
2984static void __exit nvme_fc_exit_module(void) 2931static void __exit nvme_fc_exit_module(void)
@@ -2989,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
2989 2936
2990 nvmf_unregister_transport(&nvme_fc_transport); 2937 nvmf_unregister_transport(&nvme_fc_transport);
2991 2938
2992 destroy_workqueue(nvme_fc_wq);
2993
2994 ida_destroy(&nvme_fc_local_port_cnt); 2939 ida_destroy(&nvme_fc_local_port_cnt);
2995 ida_destroy(&nvme_fc_ctrl_cnt); 2940 ida_destroy(&nvme_fc_ctrl_cnt);
2996} 2941}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f5df78ed1e10..be8541335e31 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
242 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); 242 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
243 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); 243 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
244 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); 244 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
245 BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); 245 BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
246 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); 246 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
247} 247}
248 248
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
480 rqd->bio->bi_iter.bi_sector)); 480 rqd->bio->bi_iter.bi_sector));
481} 481}
482 482
483static void nvme_nvm_end_io(struct request *rq, int error) 483static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
484{ 484{
485 struct nvm_rq *rqd = rq->end_io_data; 485 struct nvm_rq *rqd = rq->end_io_data;
486 486
@@ -509,7 +509,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
509 rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); 509 rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
510 if (IS_ERR(rq)) { 510 if (IS_ERR(rq)) {
511 kfree(cmd); 511 kfree(cmd);
512 return -ENOMEM; 512 return PTR_ERR(rq);
513 } 513 }
514 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; 514 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
515 515
@@ -571,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
571 .max_phys_sect = 64, 571 .max_phys_sect = 64,
572}; 572};
573 573
574static void nvme_nvm_end_user_vio(struct request *rq, int error)
575{
576 struct completion *waiting = rq->end_io_data;
577
578 complete(waiting);
579}
580
581static int nvme_nvm_submit_user_cmd(struct request_queue *q, 574static int nvme_nvm_submit_user_cmd(struct request_queue *q,
582 struct nvme_ns *ns, 575 struct nvme_ns *ns,
583 struct nvme_nvm_command *vcmd, 576 struct nvme_nvm_command *vcmd,
@@ -608,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
608 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; 601 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
609 602
610 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; 603 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
611 rq->end_io_data = &wait;
612 604
613 if (ppa_buf && ppa_len) { 605 if (ppa_buf && ppa_len) {
614 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); 606 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -662,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
662 } 654 }
663 655
664submit: 656submit:
665 blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio); 657 blk_execute_rq(q, NULL, rq, 0);
666
667 wait_for_completion_io(&wait);
668 658
669 if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) 659 if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
670 ret = -EINTR; 660 ret = -EINTR;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070d4391..d70ff0fdd36b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
27extern unsigned char admin_timeout; 27extern unsigned char admin_timeout;
28#define ADMIN_TIMEOUT (admin_timeout * HZ) 28#define ADMIN_TIMEOUT (admin_timeout * HZ)
29 29
30extern unsigned char shutdown_timeout;
31#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
32
33#define NVME_DEFAULT_KATO 5 30#define NVME_DEFAULT_KATO 5
34#define NVME_KATO_GRACE 10 31#define NVME_KATO_GRACE 10
35 32
33extern struct workqueue_struct *nvme_wq;
34
36enum { 35enum {
37 NVME_NS_LBA = 0, 36 NVME_NS_LBA = 0,
38 NVME_NS_LIGHTNVM = 1, 37 NVME_NS_LIGHTNVM = 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
131 struct device *device; /* char device */ 130 struct device *device; /* char device */
132 struct list_head node; 131 struct list_head node;
133 struct ida ns_ida; 132 struct ida ns_ida;
133 struct work_struct reset_work;
134 134
135 struct opal_dev *opal_dev; 135 struct opal_dev *opal_dev;
136 136
@@ -138,6 +138,7 @@ struct nvme_ctrl {
138 char serial[20]; 138 char serial[20];
139 char model[40]; 139 char model[40];
140 char firmware_rev[8]; 140 char firmware_rev[8];
141 char subnqn[NVMF_NQN_SIZE];
141 u16 cntlid; 142 u16 cntlid;
142 143
143 u32 ctrl_config; 144 u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
147 u16 oncs; 148 u16 oncs;
148 u16 vid; 149 u16 vid;
149 u16 oacs; 150 u16 oacs;
151 u16 nssa;
152 u16 nr_streams;
150 atomic_t abort_limit; 153 atomic_t abort_limit;
151 u8 event_limit; 154 u8 event_limit;
152 u8 vwc; 155 u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
165 168
166 /* Power saving configuration */ 169 /* Power saving configuration */
167 u64 ps_max_latency_us; 170 u64 ps_max_latency_us;
171 bool apst_enabled;
172
173 u32 hmpre;
174 u32 hmmin;
168 175
169 /* Fabrics only */ 176 /* Fabrics only */
170 u16 sqsize; 177 u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
172 u32 iorcsz; 179 u32 iorcsz;
173 u16 icdoff; 180 u16 icdoff;
174 u16 maxcmd; 181 u16 maxcmd;
182 int nr_reconnects;
175 struct nvmf_ctrl_options *opts; 183 struct nvmf_ctrl_options *opts;
176}; 184};
177 185
178/*
179 * An NVM Express namespace is equivalent to a SCSI LUN
180 */
181struct nvme_ns { 186struct nvme_ns {
182 struct list_head list; 187 struct list_head list;
183 188
@@ -189,14 +194,18 @@ struct nvme_ns {
189 int instance; 194 int instance;
190 195
191 u8 eui[8]; 196 u8 eui[8];
192 u8 uuid[16]; 197 u8 nguid[16];
198 uuid_t uuid;
193 199
194 unsigned ns_id; 200 unsigned ns_id;
195 int lba_shift; 201 int lba_shift;
196 u16 ms; 202 u16 ms;
203 u16 sgs;
204 u32 sws;
197 bool ext; 205 bool ext;
198 u8 pi_type; 206 u8 pi_type;
199 unsigned long flags; 207 unsigned long flags;
208 u16 noiob;
200 209
201#define NVME_NS_REMOVING 0 210#define NVME_NS_REMOVING 0
202#define NVME_NS_DEAD 1 211#define NVME_NS_DEAD 1
@@ -214,11 +223,9 @@ struct nvme_ctrl_ops {
214 int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); 223 int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
215 int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); 224 int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
216 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); 225 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
217 int (*reset_ctrl)(struct nvme_ctrl *ctrl);
218 void (*free_ctrl)(struct nvme_ctrl *ctrl); 226 void (*free_ctrl)(struct nvme_ctrl *ctrl);
219 void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); 227 void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
220 int (*delete_ctrl)(struct nvme_ctrl *ctrl); 228 int (*delete_ctrl)(struct nvme_ctrl *ctrl);
221 const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
222 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 229 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
223}; 230};
224 231
@@ -296,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
296#define NVME_QID_ANY -1 303#define NVME_QID_ANY -1
297struct request *nvme_alloc_request(struct request_queue *q, 304struct request *nvme_alloc_request(struct request_queue *q,
298 struct nvme_command *cmd, unsigned int flags, int qid); 305 struct nvme_command *cmd, unsigned int flags, int qid);
299int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, 306blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
300 struct nvme_command *cmd); 307 struct nvme_command *cmd);
301int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 308int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
302 void *buf, unsigned bufflen); 309 void *buf, unsigned bufflen);
@@ -310,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
310 void __user *ubuffer, unsigned bufflen, 317 void __user *ubuffer, unsigned bufflen,
311 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 318 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
312 u32 *result, unsigned timeout); 319 u32 *result, unsigned timeout);
313int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
314int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
315 struct nvme_id_ns **id);
316int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
317int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
318 void *buffer, size_t buflen, u32 *result);
319int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
320 void *buffer, size_t buflen, u32 *result);
321int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); 320int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
322void nvme_start_keep_alive(struct nvme_ctrl *ctrl); 321void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
323void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); 322void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
324 323int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
325struct sg_io_hdr;
326
327int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
328int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
329int nvme_sg_get_version_num(int __user *ip);
330 324
331#ifdef CONFIG_NVM 325#ifdef CONFIG_NVM
332int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); 326int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 40c7581caeb0..33c3b9db7d36 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -17,28 +17,15 @@
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/blk-mq.h> 18#include <linux/blk-mq.h>
19#include <linux/blk-mq-pci.h> 19#include <linux/blk-mq-pci.h>
20#include <linux/cpu.h>
21#include <linux/delay.h>
22#include <linux/dmi.h> 20#include <linux/dmi.h>
23#include <linux/errno.h>
24#include <linux/fs.h>
25#include <linux/genhd.h>
26#include <linux/hdreg.h>
27#include <linux/idr.h>
28#include <linux/init.h> 21#include <linux/init.h>
29#include <linux/interrupt.h> 22#include <linux/interrupt.h>
30#include <linux/io.h> 23#include <linux/io.h>
31#include <linux/kdev_t.h>
32#include <linux/kernel.h>
33#include <linux/mm.h> 24#include <linux/mm.h>
34#include <linux/module.h> 25#include <linux/module.h>
35#include <linux/moduleparam.h>
36#include <linux/mutex.h> 26#include <linux/mutex.h>
37#include <linux/pci.h> 27#include <linux/pci.h>
38#include <linux/poison.h> 28#include <linux/poison.h>
39#include <linux/ptrace.h>
40#include <linux/sched.h>
41#include <linux/slab.h>
42#include <linux/t10-pi.h> 29#include <linux/t10-pi.h>
43#include <linux/timer.h> 30#include <linux/timer.h>
44#include <linux/types.h> 31#include <linux/types.h>
@@ -49,7 +36,6 @@
49#include "nvme.h" 36#include "nvme.h"
50 37
51#define NVME_Q_DEPTH 1024 38#define NVME_Q_DEPTH 1024
52#define NVME_AQ_DEPTH 256
53#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 39#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
54#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 40#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
55 41
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
66module_param(use_cmb_sqes, bool, 0644); 52module_param(use_cmb_sqes, bool, 0644);
67MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); 53MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
68 54
69static struct workqueue_struct *nvme_workq; 55static unsigned int max_host_mem_size_mb = 128;
56module_param(max_host_mem_size_mb, uint, 0444);
57MODULE_PARM_DESC(max_host_mem_size_mb,
58 "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
70 59
71struct nvme_dev; 60struct nvme_dev;
72struct nvme_queue; 61struct nvme_queue;
73 62
74static int nvme_reset(struct nvme_dev *dev);
75static void nvme_process_cq(struct nvme_queue *nvmeq); 63static void nvme_process_cq(struct nvme_queue *nvmeq);
76static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); 64static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
77 65
@@ -92,9 +80,8 @@ struct nvme_dev {
92 int q_depth; 80 int q_depth;
93 u32 db_stride; 81 u32 db_stride;
94 void __iomem *bar; 82 void __iomem *bar;
95 struct work_struct reset_work; 83 unsigned long bar_mapped_size;
96 struct work_struct remove_work; 84 struct work_struct remove_work;
97 struct timer_list watchdog_timer;
98 struct mutex shutdown_lock; 85 struct mutex shutdown_lock;
99 bool subsystem; 86 bool subsystem;
100 void __iomem *cmb; 87 void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
104 u32 cmbloc; 91 u32 cmbloc;
105 struct nvme_ctrl ctrl; 92 struct nvme_ctrl ctrl;
106 struct completion ioq_wait; 93 struct completion ioq_wait;
94
95 /* shadow doorbell buffer support: */
107 u32 *dbbuf_dbs; 96 u32 *dbbuf_dbs;
108 dma_addr_t dbbuf_dbs_dma_addr; 97 dma_addr_t dbbuf_dbs_dma_addr;
109 u32 *dbbuf_eis; 98 u32 *dbbuf_eis;
110 dma_addr_t dbbuf_eis_dma_addr; 99 dma_addr_t dbbuf_eis_dma_addr;
100
101 /* host memory buffer support: */
102 u64 host_mem_size;
103 u32 nr_host_mem_descs;
104 struct nvme_host_mem_buf_desc *host_mem_descs;
105 void **host_mem_desc_bufs;
111}; 106};
112 107
113static inline unsigned int sq_idx(unsigned int qid, u32 stride) 108static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
185 BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); 180 BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
186 BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); 181 BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
187 BUILD_BUG_ON(sizeof(struct nvme_command) != 64); 182 BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
188 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); 183 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
189 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); 184 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
190 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); 185 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
191 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); 186 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
192 BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); 187 BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
350 nvmeq->tags = NULL; 345 nvmeq->tags = NULL;
351} 346}
352 347
353static int nvme_admin_init_request(struct blk_mq_tag_set *set,
354 struct request *req, unsigned int hctx_idx,
355 unsigned int numa_node)
356{
357 struct nvme_dev *dev = set->driver_data;
358 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
359 struct nvme_queue *nvmeq = dev->queues[0];
360
361 BUG_ON(!nvmeq);
362 iod->nvmeq = nvmeq;
363 return 0;
364}
365
366static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 348static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
367 unsigned int hctx_idx) 349 unsigned int hctx_idx)
368{ 350{
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
382{ 364{
383 struct nvme_dev *dev = set->driver_data; 365 struct nvme_dev *dev = set->driver_data;
384 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 366 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
385 struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; 367 int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
368 struct nvme_queue *nvmeq = dev->queues[queue_idx];
386 369
387 BUG_ON(!nvmeq); 370 BUG_ON(!nvmeq);
388 iod->nvmeq = nvmeq; 371 iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
427 return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); 410 return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
428} 411}
429 412
430static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) 413static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
431{ 414{
432 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); 415 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
433 int nseg = blk_rq_nr_phys_segments(rq); 416 int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
436 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 419 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
437 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); 420 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
438 if (!iod->sg) 421 if (!iod->sg)
439 return BLK_MQ_RQ_QUEUE_BUSY; 422 return BLK_STS_RESOURCE;
440 } else { 423 } else {
441 iod->sg = iod->inline_sg; 424 iod->sg = iod->inline_sg;
442 } 425 }
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
446 iod->nents = 0; 429 iod->nents = 0;
447 iod->length = size; 430 iod->length = size;
448 431
449 return BLK_MQ_RQ_QUEUE_OK; 432 return BLK_STS_OK;
450} 433}
451 434
452static void nvme_free_iod(struct nvme_dev *dev, struct request *req) 435static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
616 return true; 599 return true;
617} 600}
618 601
619static int nvme_map_data(struct nvme_dev *dev, struct request *req, 602static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
620 struct nvme_command *cmnd) 603 struct nvme_command *cmnd)
621{ 604{
622 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 605 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
623 struct request_queue *q = req->q; 606 struct request_queue *q = req->q;
624 enum dma_data_direction dma_dir = rq_data_dir(req) ? 607 enum dma_data_direction dma_dir = rq_data_dir(req) ?
625 DMA_TO_DEVICE : DMA_FROM_DEVICE; 608 DMA_TO_DEVICE : DMA_FROM_DEVICE;
626 int ret = BLK_MQ_RQ_QUEUE_ERROR; 609 blk_status_t ret = BLK_STS_IOERR;
627 610
628 sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); 611 sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
629 iod->nents = blk_rq_map_sg(q, req, iod->sg); 612 iod->nents = blk_rq_map_sg(q, req, iod->sg);
630 if (!iod->nents) 613 if (!iod->nents)
631 goto out; 614 goto out;
632 615
633 ret = BLK_MQ_RQ_QUEUE_BUSY; 616 ret = BLK_STS_RESOURCE;
634 if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, 617 if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
635 DMA_ATTR_NO_WARN)) 618 DMA_ATTR_NO_WARN))
636 goto out; 619 goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
638 if (!nvme_setup_prps(dev, req)) 621 if (!nvme_setup_prps(dev, req))
639 goto out_unmap; 622 goto out_unmap;
640 623
641 ret = BLK_MQ_RQ_QUEUE_ERROR; 624 ret = BLK_STS_IOERR;
642 if (blk_integrity_rq(req)) { 625 if (blk_integrity_rq(req)) {
643 if (blk_rq_count_integrity_sg(q, req->bio) != 1) 626 if (blk_rq_count_integrity_sg(q, req->bio) != 1)
644 goto out_unmap; 627 goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
658 cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma); 641 cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
659 if (blk_integrity_rq(req)) 642 if (blk_integrity_rq(req))
660 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); 643 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
661 return BLK_MQ_RQ_QUEUE_OK; 644 return BLK_STS_OK;
662 645
663out_unmap: 646out_unmap:
664 dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); 647 dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
688/* 671/*
689 * NOTE: ns is NULL when called on the admin queue. 672 * NOTE: ns is NULL when called on the admin queue.
690 */ 673 */
691static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, 674static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
692 const struct blk_mq_queue_data *bd) 675 const struct blk_mq_queue_data *bd)
693{ 676{
694 struct nvme_ns *ns = hctx->queue->queuedata; 677 struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
696 struct nvme_dev *dev = nvmeq->dev; 679 struct nvme_dev *dev = nvmeq->dev;
697 struct request *req = bd->rq; 680 struct request *req = bd->rq;
698 struct nvme_command cmnd; 681 struct nvme_command cmnd;
699 int ret = BLK_MQ_RQ_QUEUE_OK; 682 blk_status_t ret;
700
701 /*
702 * If formated with metadata, require the block layer provide a buffer
703 * unless this namespace is formated such that the metadata can be
704 * stripped/generated by the controller with PRACT=1.
705 */
706 if (ns && ns->ms && !blk_integrity_rq(req)) {
707 if (!(ns->pi_type && ns->ms == 8) &&
708 !blk_rq_is_passthrough(req)) {
709 blk_mq_end_request(req, -EFAULT);
710 return BLK_MQ_RQ_QUEUE_OK;
711 }
712 }
713 683
714 ret = nvme_setup_cmd(ns, req, &cmnd); 684 ret = nvme_setup_cmd(ns, req, &cmnd);
715 if (ret != BLK_MQ_RQ_QUEUE_OK) 685 if (ret)
716 return ret; 686 return ret;
717 687
718 ret = nvme_init_iod(req, dev); 688 ret = nvme_init_iod(req, dev);
719 if (ret != BLK_MQ_RQ_QUEUE_OK) 689 if (ret)
720 goto out_free_cmd; 690 goto out_free_cmd;
721 691
722 if (blk_rq_nr_phys_segments(req)) 692 if (blk_rq_nr_phys_segments(req)) {
723 ret = nvme_map_data(dev, req, &cmnd); 693 ret = nvme_map_data(dev, req, &cmnd);
724 694 if (ret)
725 if (ret != BLK_MQ_RQ_QUEUE_OK) 695 goto out_cleanup_iod;
726 goto out_cleanup_iod; 696 }
727 697
728 blk_mq_start_request(req); 698 blk_mq_start_request(req);
729 699
730 spin_lock_irq(&nvmeq->q_lock); 700 spin_lock_irq(&nvmeq->q_lock);
731 if (unlikely(nvmeq->cq_vector < 0)) { 701 if (unlikely(nvmeq->cq_vector < 0)) {
732 ret = BLK_MQ_RQ_QUEUE_ERROR; 702 ret = BLK_STS_IOERR;
733 spin_unlock_irq(&nvmeq->q_lock); 703 spin_unlock_irq(&nvmeq->q_lock);
734 goto out_cleanup_iod; 704 goto out_cleanup_iod;
735 } 705 }
736 __nvme_submit_cmd(nvmeq, &cmnd); 706 __nvme_submit_cmd(nvmeq, &cmnd);
737 nvme_process_cq(nvmeq); 707 nvme_process_cq(nvmeq);
738 spin_unlock_irq(&nvmeq->q_lock); 708 spin_unlock_irq(&nvmeq->q_lock);
739 return BLK_MQ_RQ_QUEUE_OK; 709 return BLK_STS_OK;
740out_cleanup_iod: 710out_cleanup_iod:
741 nvme_free_iod(dev, req); 711 nvme_free_iod(dev, req);
742out_free_cmd: 712out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
759 return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; 729 return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
760} 730}
761 731
762static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) 732static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
763{ 733{
764 u16 head, phase; 734 u16 head = nvmeq->cq_head;
765
766 head = nvmeq->cq_head;
767 phase = nvmeq->cq_phase;
768
769 while (nvme_cqe_valid(nvmeq, head, phase)) {
770 struct nvme_completion cqe = nvmeq->cqes[head];
771 struct request *req;
772
773 if (++head == nvmeq->q_depth) {
774 head = 0;
775 phase = !phase;
776 }
777
778 if (tag && *tag == cqe.command_id)
779 *tag = -1;
780 735
781 if (unlikely(cqe.command_id >= nvmeq->q_depth)) { 736 if (likely(nvmeq->cq_vector >= 0)) {
782 dev_warn(nvmeq->dev->ctrl.device, 737 if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
783 "invalid id %d completed on queue %d\n", 738 nvmeq->dbbuf_cq_ei))
784 cqe.command_id, le16_to_cpu(cqe.sq_id)); 739 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
785 continue; 740 }
786 } 741}
787 742
788 /* 743static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
789 * AEN requests are special as they don't time out and can 744 struct nvme_completion *cqe)
790 * survive any kind of queue freeze and often don't respond to 745{
791 * aborts. We don't even bother to allocate a struct request 746 struct request *req;
792 * for them but rather special case them here.
793 */
794 if (unlikely(nvmeq->qid == 0 &&
795 cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
796 nvme_complete_async_event(&nvmeq->dev->ctrl,
797 cqe.status, &cqe.result);
798 continue;
799 }
800 747
801 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); 748 if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
802 nvme_end_request(req, cqe.status, cqe.result); 749 dev_warn(nvmeq->dev->ctrl.device,
750 "invalid id %d completed on queue %d\n",
751 cqe->command_id, le16_to_cpu(cqe->sq_id));
752 return;
803 } 753 }
804 754
805 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) 755 /*
756 * AEN requests are special as they don't time out and can
757 * survive any kind of queue freeze and often don't respond to
758 * aborts. We don't even bother to allocate a struct request
759 * for them but rather special case them here.
760 */
761 if (unlikely(nvmeq->qid == 0 &&
762 cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
763 nvme_complete_async_event(&nvmeq->dev->ctrl,
764 cqe->status, &cqe->result);
806 return; 765 return;
766 }
807 767
808 if (likely(nvmeq->cq_vector >= 0)) 768 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
809 if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, 769 nvme_end_request(req, cqe->status, cqe->result);
810 nvmeq->dbbuf_cq_ei)) 770}
811 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
812 nvmeq->cq_head = head;
813 nvmeq->cq_phase = phase;
814 771
815 nvmeq->cqe_seen = 1; 772static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
773 struct nvme_completion *cqe)
774{
775 if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
776 *cqe = nvmeq->cqes[nvmeq->cq_head];
777
778 if (++nvmeq->cq_head == nvmeq->q_depth) {
779 nvmeq->cq_head = 0;
780 nvmeq->cq_phase = !nvmeq->cq_phase;
781 }
782 return true;
783 }
784 return false;
816} 785}
817 786
818static void nvme_process_cq(struct nvme_queue *nvmeq) 787static void nvme_process_cq(struct nvme_queue *nvmeq)
819{ 788{
820 __nvme_process_cq(nvmeq, NULL); 789 struct nvme_completion cqe;
790 int consumed = 0;
791
792 while (nvme_read_cqe(nvmeq, &cqe)) {
793 nvme_handle_cqe(nvmeq, &cqe);
794 consumed++;
795 }
796
797 if (consumed) {
798 nvme_ring_cq_doorbell(nvmeq);
799 nvmeq->cqe_seen = 1;
800 }
821} 801}
822 802
823static irqreturn_t nvme_irq(int irq, void *data) 803static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
842 822
843static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) 823static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
844{ 824{
845 if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { 825 struct nvme_completion cqe;
846 spin_lock_irq(&nvmeq->q_lock); 826 int found = 0, consumed = 0;
847 __nvme_process_cq(nvmeq, &tag);
848 spin_unlock_irq(&nvmeq->q_lock);
849 827
850 if (tag == -1) 828 if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
851 return 1; 829 return 0;
852 }
853 830
854 return 0; 831 spin_lock_irq(&nvmeq->q_lock);
832 while (nvme_read_cqe(nvmeq, &cqe)) {
833 nvme_handle_cqe(nvmeq, &cqe);
834 consumed++;
835
836 if (tag == cqe.command_id) {
837 found = 1;
838 break;
839 }
840 }
841
842 if (consumed)
843 nvme_ring_cq_doorbell(nvmeq);
844 spin_unlock_irq(&nvmeq->q_lock);
845
846 return found;
855} 847}
856 848
857static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 849static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
939 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); 931 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
940} 932}
941 933
942static void abort_endio(struct request *req, int error) 934static void abort_endio(struct request *req, blk_status_t error)
943{ 935{
944 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 936 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
945 struct nvme_queue *nvmeq = iod->nvmeq; 937 struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
950 blk_mq_free_request(req); 942 blk_mq_free_request(req);
951} 943}
952 944
945static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
946{
947
948 /* If true, indicates loss of adapter communication, possibly by a
949 * NVMe Subsystem reset.
950 */
951 bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
952
953 /* If there is a reset ongoing, we shouldn't reset again. */
954 if (dev->ctrl.state == NVME_CTRL_RESETTING)
955 return false;
956
957 /* We shouldn't reset unless the controller is on fatal error state
958 * _or_ if we lost the communication with it.
959 */
960 if (!(csts & NVME_CSTS_CFS) && !nssro)
961 return false;
962
963 /* If PCI error recovery process is happening, we cannot reset or
964 * the recovery mechanism will surely fail.
965 */
966 if (pci_channel_offline(to_pci_dev(dev->dev)))
967 return false;
968
969 return true;
970}
971
972static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
973{
974 /* Read a config register to help see what died. */
975 u16 pci_status;
976 int result;
977
978 result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
979 &pci_status);
980 if (result == PCIBIOS_SUCCESSFUL)
981 dev_warn(dev->ctrl.device,
982 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
983 csts, pci_status);
984 else
985 dev_warn(dev->ctrl.device,
986 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
987 csts, result);
988}
989
953static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) 990static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
954{ 991{
955 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 992 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
957 struct nvme_dev *dev = nvmeq->dev; 994 struct nvme_dev *dev = nvmeq->dev;
958 struct request *abort_req; 995 struct request *abort_req;
959 struct nvme_command cmd; 996 struct nvme_command cmd;
997 u32 csts = readl(dev->bar + NVME_REG_CSTS);
998
999 /*
1000 * Reset immediately if the controller is failed
1001 */
1002 if (nvme_should_reset(dev, csts)) {
1003 nvme_warn_reset(dev, csts);
1004 nvme_dev_disable(dev, false);
1005 nvme_reset_ctrl(&dev->ctrl);
1006 return BLK_EH_HANDLED;
1007 }
960 1008
961 /* 1009 /*
962 * Did we miss an interrupt? 1010 * Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
993 "I/O %d QID %d timeout, reset controller\n", 1041 "I/O %d QID %d timeout, reset controller\n",
994 req->tag, nvmeq->qid); 1042 req->tag, nvmeq->qid);
995 nvme_dev_disable(dev, false); 1043 nvme_dev_disable(dev, false);
996 nvme_reset(dev); 1044 nvme_reset_ctrl(&dev->ctrl);
997 1045
998 /* 1046 /*
999 * Mark the request as handled, since the inline shutdown 1047 * Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
1247 .complete = nvme_pci_complete_rq, 1295 .complete = nvme_pci_complete_rq,
1248 .init_hctx = nvme_admin_init_hctx, 1296 .init_hctx = nvme_admin_init_hctx,
1249 .exit_hctx = nvme_admin_exit_hctx, 1297 .exit_hctx = nvme_admin_exit_hctx,
1250 .init_request = nvme_admin_init_request, 1298 .init_request = nvme_init_request,
1251 .timeout = nvme_timeout, 1299 .timeout = nvme_timeout,
1252}; 1300};
1253 1301
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
1311 return 0; 1359 return 0;
1312} 1360}
1313 1361
1362static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
1363{
1364 return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
1365}
1366
1367static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
1368{
1369 struct pci_dev *pdev = to_pci_dev(dev->dev);
1370
1371 if (size <= dev->bar_mapped_size)
1372 return 0;
1373 if (size > pci_resource_len(pdev, 0))
1374 return -ENOMEM;
1375 if (dev->bar)
1376 iounmap(dev->bar);
1377 dev->bar = ioremap(pci_resource_start(pdev, 0), size);
1378 if (!dev->bar) {
1379 dev->bar_mapped_size = 0;
1380 return -ENOMEM;
1381 }
1382 dev->bar_mapped_size = size;
1383 dev->dbs = dev->bar + NVME_REG_DBS;
1384
1385 return 0;
1386}
1387
1314static int nvme_configure_admin_queue(struct nvme_dev *dev) 1388static int nvme_configure_admin_queue(struct nvme_dev *dev)
1315{ 1389{
1316 int result; 1390 int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1318 u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); 1392 u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
1319 struct nvme_queue *nvmeq; 1393 struct nvme_queue *nvmeq;
1320 1394
1395 result = nvme_remap_bar(dev, db_bar_size(dev, 0));
1396 if (result < 0)
1397 return result;
1398
1321 dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? 1399 dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
1322 NVME_CAP_NSSRC(cap) : 0; 1400 NVME_CAP_NSSRC(cap) : 0;
1323 1401
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1358 return result; 1436 return result;
1359} 1437}
1360 1438
1361static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
1362{
1363
1364 /* If true, indicates loss of adapter communication, possibly by a
1365 * NVMe Subsystem reset.
1366 */
1367 bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
1368
1369 /* If there is a reset ongoing, we shouldn't reset again. */
1370 if (dev->ctrl.state == NVME_CTRL_RESETTING)
1371 return false;
1372
1373 /* We shouldn't reset unless the controller is on fatal error state
1374 * _or_ if we lost the communication with it.
1375 */
1376 if (!(csts & NVME_CSTS_CFS) && !nssro)
1377 return false;
1378
1379 /* If PCI error recovery process is happening, we cannot reset or
1380 * the recovery mechanism will surely fail.
1381 */
1382 if (pci_channel_offline(to_pci_dev(dev->dev)))
1383 return false;
1384
1385 return true;
1386}
1387
1388static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
1389{
1390 /* Read a config register to help see what died. */
1391 u16 pci_status;
1392 int result;
1393
1394 result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
1395 &pci_status);
1396 if (result == PCIBIOS_SUCCESSFUL)
1397 dev_warn(dev->ctrl.device,
1398 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
1399 csts, pci_status);
1400 else
1401 dev_warn(dev->ctrl.device,
1402 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
1403 csts, result);
1404}
1405
1406static void nvme_watchdog_timer(unsigned long data)
1407{
1408 struct nvme_dev *dev = (struct nvme_dev *)data;
1409 u32 csts = readl(dev->bar + NVME_REG_CSTS);
1410
1411 /* Skip controllers under certain specific conditions. */
1412 if (nvme_should_reset(dev, csts)) {
1413 if (!nvme_reset(dev))
1414 nvme_warn_reset(dev, csts);
1415 return;
1416 }
1417
1418 mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
1419}
1420
1421static int nvme_create_io_queues(struct nvme_dev *dev) 1439static int nvme_create_io_queues(struct nvme_dev *dev)
1422{ 1440{
1423 unsigned i, max; 1441 unsigned i, max;
@@ -1514,16 +1532,168 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
1514 } 1532 }
1515} 1533}
1516 1534
1517static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) 1535static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
1536{
1537 size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
1538 struct nvme_command c;
1539 u64 dma_addr;
1540 int ret;
1541
1542 dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
1543 DMA_TO_DEVICE);
1544 if (dma_mapping_error(dev->dev, dma_addr))
1545 return -ENOMEM;
1546
1547 memset(&c, 0, sizeof(c));
1548 c.features.opcode = nvme_admin_set_features;
1549 c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
1550 c.features.dword11 = cpu_to_le32(bits);
1551 c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
1552 ilog2(dev->ctrl.page_size));
1553 c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
1554 c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
1555 c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
1556
1557 ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
1558 if (ret) {
1559 dev_warn(dev->ctrl.device,
1560 "failed to set host mem (err %d, flags %#x).\n",
1561 ret, bits);
1562 }
1563 dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
1564 return ret;
1565}
1566
1567static void nvme_free_host_mem(struct nvme_dev *dev)
1568{
1569 int i;
1570
1571 for (i = 0; i < dev->nr_host_mem_descs; i++) {
1572 struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
1573 size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
1574
1575 dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
1576 le64_to_cpu(desc->addr));
1577 }
1578
1579 kfree(dev->host_mem_desc_bufs);
1580 dev->host_mem_desc_bufs = NULL;
1581 kfree(dev->host_mem_descs);
1582 dev->host_mem_descs = NULL;
1583}
1584
1585static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
1518{ 1586{
1519 return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); 1587 struct nvme_host_mem_buf_desc *descs;
1588 u32 chunk_size, max_entries, i = 0;
1589 void **bufs;
1590 u64 size, tmp;
1591
1592 /* start big and work our way down */
1593 chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
1594retry:
1595 tmp = (preferred + chunk_size - 1);
1596 do_div(tmp, chunk_size);
1597 max_entries = tmp;
1598 descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
1599 if (!descs)
1600 goto out;
1601
1602 bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
1603 if (!bufs)
1604 goto out_free_descs;
1605
1606 for (size = 0; size < preferred; size += chunk_size) {
1607 u32 len = min_t(u64, chunk_size, preferred - size);
1608 dma_addr_t dma_addr;
1609
1610 bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
1611 DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
1612 if (!bufs[i])
1613 break;
1614
1615 descs[i].addr = cpu_to_le64(dma_addr);
1616 descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
1617 i++;
1618 }
1619
1620 if (!size || (min && size < min)) {
1621 dev_warn(dev->ctrl.device,
1622 "failed to allocate host memory buffer.\n");
1623 goto out_free_bufs;
1624 }
1625
1626 dev_info(dev->ctrl.device,
1627 "allocated %lld MiB host memory buffer.\n",
1628 size >> ilog2(SZ_1M));
1629 dev->nr_host_mem_descs = i;
1630 dev->host_mem_size = size;
1631 dev->host_mem_descs = descs;
1632 dev->host_mem_desc_bufs = bufs;
1633 return 0;
1634
1635out_free_bufs:
1636 while (--i >= 0) {
1637 size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
1638
1639 dma_free_coherent(dev->dev, size, bufs[i],
1640 le64_to_cpu(descs[i].addr));
1641 }
1642
1643 kfree(bufs);
1644out_free_descs:
1645 kfree(descs);
1646out:
1647 /* try a smaller chunk size if we failed early */
1648 if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
1649 chunk_size /= 2;
1650 goto retry;
1651 }
1652 dev->host_mem_descs = NULL;
1653 return -ENOMEM;
1654}
1655
1656static void nvme_setup_host_mem(struct nvme_dev *dev)
1657{
1658 u64 max = (u64)max_host_mem_size_mb * SZ_1M;
1659 u64 preferred = (u64)dev->ctrl.hmpre * 4096;
1660 u64 min = (u64)dev->ctrl.hmmin * 4096;
1661 u32 enable_bits = NVME_HOST_MEM_ENABLE;
1662
1663 preferred = min(preferred, max);
1664 if (min > max) {
1665 dev_warn(dev->ctrl.device,
1666 "min host memory (%lld MiB) above limit (%d MiB).\n",
1667 min >> ilog2(SZ_1M), max_host_mem_size_mb);
1668 nvme_free_host_mem(dev);
1669 return;
1670 }
1671
1672 /*
1673 * If we already have a buffer allocated check if we can reuse it.
1674 */
1675 if (dev->host_mem_descs) {
1676 if (dev->host_mem_size >= min)
1677 enable_bits |= NVME_HOST_MEM_RETURN;
1678 else
1679 nvme_free_host_mem(dev);
1680 }
1681
1682 if (!dev->host_mem_descs) {
1683 if (nvme_alloc_host_mem(dev, min, preferred))
1684 return;
1685 }
1686
1687 if (nvme_set_host_mem(dev, enable_bits))
1688 nvme_free_host_mem(dev);
1520} 1689}
1521 1690
1522static int nvme_setup_io_queues(struct nvme_dev *dev) 1691static int nvme_setup_io_queues(struct nvme_dev *dev)
1523{ 1692{
1524 struct nvme_queue *adminq = dev->queues[0]; 1693 struct nvme_queue *adminq = dev->queues[0];
1525 struct pci_dev *pdev = to_pci_dev(dev->dev); 1694 struct pci_dev *pdev = to_pci_dev(dev->dev);
1526 int result, nr_io_queues, size; 1695 int result, nr_io_queues;
1696 unsigned long size;
1527 1697
1528 nr_io_queues = num_online_cpus(); 1698 nr_io_queues = num_online_cpus();
1529 result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); 1699 result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1542 nvme_release_cmb(dev); 1712 nvme_release_cmb(dev);
1543 } 1713 }
1544 1714
1545 size = db_bar_size(dev, nr_io_queues); 1715 do {
1546 if (size > 8192) { 1716 size = db_bar_size(dev, nr_io_queues);
1547 iounmap(dev->bar); 1717 result = nvme_remap_bar(dev, size);
1548 do { 1718 if (!result)
1549 dev->bar = ioremap(pci_resource_start(pdev, 0), size); 1719 break;
1550 if (dev->bar) 1720 if (!--nr_io_queues)
1551 break; 1721 return -ENOMEM;
1552 if (!--nr_io_queues) 1722 } while (1);
1553 return -ENOMEM; 1723 adminq->q_db = dev->dbs;
1554 size = db_bar_size(dev, nr_io_queues);
1555 } while (1);
1556 dev->dbs = dev->bar + 4096;
1557 adminq->q_db = dev->dbs;
1558 }
1559 1724
1560 /* Deregister the admin queue's interrupt */ 1725 /* Deregister the admin queue's interrupt */
1561 pci_free_irq(pdev, 0, adminq); 1726 pci_free_irq(pdev, 0, adminq);
@@ -1586,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1586 return nvme_create_io_queues(dev); 1751 return nvme_create_io_queues(dev);
1587} 1752}
1588 1753
1589static void nvme_del_queue_end(struct request *req, int error) 1754static void nvme_del_queue_end(struct request *req, blk_status_t error)
1590{ 1755{
1591 struct nvme_queue *nvmeq = req->end_io_data; 1756 struct nvme_queue *nvmeq = req->end_io_data;
1592 1757
@@ -1594,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
1594 complete(&nvmeq->dev->ioq_wait); 1759 complete(&nvmeq->dev->ioq_wait);
1595} 1760}
1596 1761
1597static void nvme_del_cq_end(struct request *req, int error) 1762static void nvme_del_cq_end(struct request *req, blk_status_t error)
1598{ 1763{
1599 struct nvme_queue *nvmeq = req->end_io_data; 1764 struct nvme_queue *nvmeq = req->end_io_data;
1600 1765
@@ -1799,8 +1964,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1799 bool dead = true; 1964 bool dead = true;
1800 struct pci_dev *pdev = to_pci_dev(dev->dev); 1965 struct pci_dev *pdev = to_pci_dev(dev->dev);
1801 1966
1802 del_timer_sync(&dev->watchdog_timer);
1803
1804 mutex_lock(&dev->shutdown_lock); 1967 mutex_lock(&dev->shutdown_lock);
1805 if (pci_is_enabled(pdev)) { 1968 if (pci_is_enabled(pdev)) {
1806 u32 csts = readl(dev->bar + NVME_REG_CSTS); 1969 u32 csts = readl(dev->bar + NVME_REG_CSTS);
@@ -1816,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1816 * Give the controller a chance to complete all entered requests if 1979 * Give the controller a chance to complete all entered requests if
1817 * doing a safe shutdown. 1980 * doing a safe shutdown.
1818 */ 1981 */
1819 if (!dead && shutdown) 1982 if (!dead) {
1820 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); 1983 if (shutdown)
1984 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
1985
1986 /*
1987 * If the controller is still alive tell it to stop using the
1988 * host memory buffer. In theory the shutdown / reset should
1989 * make sure that it doesn't access the host memoery anymore,
1990 * but I'd rather be safe than sorry..
1991 */
1992 if (dev->host_mem_descs)
1993 nvme_set_host_mem(dev, 0);
1994
1995 }
1821 nvme_stop_queues(&dev->ctrl); 1996 nvme_stop_queues(&dev->ctrl);
1822 1997
1823 queues = dev->online_queues - 1; 1998 queues = dev->online_queues - 1;
@@ -1900,7 +2075,8 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
1900 2075
1901static void nvme_reset_work(struct work_struct *work) 2076static void nvme_reset_work(struct work_struct *work)
1902{ 2077{
1903 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); 2078 struct nvme_dev *dev =
2079 container_of(work, struct nvme_dev, ctrl.reset_work);
1904 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); 2080 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
1905 int result = -ENODEV; 2081 int result = -ENODEV;
1906 2082
@@ -1949,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
1949 "unable to allocate dma for dbbuf\n"); 2125 "unable to allocate dma for dbbuf\n");
1950 } 2126 }
1951 2127
2128 if (dev->ctrl.hmpre)
2129 nvme_setup_host_mem(dev);
2130
1952 result = nvme_setup_io_queues(dev); 2131 result = nvme_setup_io_queues(dev);
1953 if (result) 2132 if (result)
1954 goto out; 2133 goto out;
@@ -1962,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
1962 if (dev->online_queues > 1) 2141 if (dev->online_queues > 1)
1963 nvme_queue_async_events(&dev->ctrl); 2142 nvme_queue_async_events(&dev->ctrl);
1964 2143
1965 mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
1966
1967 /* 2144 /*
1968 * Keep the controller around but remove all namespaces if we don't have 2145 * Keep the controller around but remove all namespaces if we don't have
1969 * any working I/O queue. 2146 * any working I/O queue.
@@ -2003,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
2003 nvme_put_ctrl(&dev->ctrl); 2180 nvme_put_ctrl(&dev->ctrl);
2004} 2181}
2005 2182
2006static int nvme_reset(struct nvme_dev *dev)
2007{
2008 if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
2009 return -ENODEV;
2010 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
2011 return -EBUSY;
2012 if (!queue_work(nvme_workq, &dev->reset_work))
2013 return -EBUSY;
2014 return 0;
2015}
2016
2017static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) 2183static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
2018{ 2184{
2019 *val = readl(to_nvme_dev(ctrl)->bar + off); 2185 *val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2032,16 +2198,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
2032 return 0; 2198 return 0;
2033} 2199}
2034 2200
2035static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
2036{
2037 struct nvme_dev *dev = to_nvme_dev(ctrl);
2038 int ret = nvme_reset(dev);
2039
2040 if (!ret)
2041 flush_work(&dev->reset_work);
2042 return ret;
2043}
2044
2045static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { 2201static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2046 .name = "pcie", 2202 .name = "pcie",
2047 .module = THIS_MODULE, 2203 .module = THIS_MODULE,
@@ -2049,7 +2205,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2049 .reg_read32 = nvme_pci_reg_read32, 2205 .reg_read32 = nvme_pci_reg_read32,
2050 .reg_write32 = nvme_pci_reg_write32, 2206 .reg_write32 = nvme_pci_reg_write32,
2051 .reg_read64 = nvme_pci_reg_read64, 2207 .reg_read64 = nvme_pci_reg_read64,
2052 .reset_ctrl = nvme_pci_reset_ctrl,
2053 .free_ctrl = nvme_pci_free_ctrl, 2208 .free_ctrl = nvme_pci_free_ctrl,
2054 .submit_async_event = nvme_pci_submit_async_event, 2209 .submit_async_event = nvme_pci_submit_async_event,
2055}; 2210};
@@ -2061,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
2061 if (pci_request_mem_regions(pdev, "nvme")) 2216 if (pci_request_mem_regions(pdev, "nvme"))
2062 return -ENODEV; 2217 return -ENODEV;
2063 2218
2064 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); 2219 if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
2065 if (!dev->bar)
2066 goto release; 2220 goto release;
2067 2221
2068 return 0; 2222 return 0;
@@ -2116,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2116 if (result) 2270 if (result)
2117 goto free; 2271 goto free;
2118 2272
2119 INIT_WORK(&dev->reset_work, nvme_reset_work); 2273 INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
2120 INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); 2274 INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
2121 setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
2122 (unsigned long)dev);
2123 mutex_init(&dev->shutdown_lock); 2275 mutex_init(&dev->shutdown_lock);
2124 init_completion(&dev->ioq_wait); 2276 init_completion(&dev->ioq_wait);
2125 2277
@@ -2137,7 +2289,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2137 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); 2289 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
2138 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 2290 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
2139 2291
2140 queue_work(nvme_workq, &dev->reset_work); 2292 queue_work(nvme_wq, &dev->ctrl.reset_work);
2141 return 0; 2293 return 0;
2142 2294
2143 release_pools: 2295 release_pools:
@@ -2158,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
2158 if (prepare) 2310 if (prepare)
2159 nvme_dev_disable(dev, false); 2311 nvme_dev_disable(dev, false);
2160 else 2312 else
2161 nvme_reset(dev); 2313 nvme_reset_ctrl(&dev->ctrl);
2162} 2314}
2163 2315
2164static void nvme_shutdown(struct pci_dev *pdev) 2316static void nvme_shutdown(struct pci_dev *pdev)
@@ -2178,7 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
2178 2330
2179 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 2331 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
2180 2332
2181 cancel_work_sync(&dev->reset_work); 2333 cancel_work_sync(&dev->ctrl.reset_work);
2182 pci_set_drvdata(pdev, NULL); 2334 pci_set_drvdata(pdev, NULL);
2183 2335
2184 if (!pci_device_is_present(pdev)) { 2336 if (!pci_device_is_present(pdev)) {
@@ -2186,9 +2338,10 @@ static void nvme_remove(struct pci_dev *pdev)
2186 nvme_dev_disable(dev, false); 2338 nvme_dev_disable(dev, false);
2187 } 2339 }
2188 2340
2189 flush_work(&dev->reset_work); 2341 flush_work(&dev->ctrl.reset_work);
2190 nvme_uninit_ctrl(&dev->ctrl); 2342 nvme_uninit_ctrl(&dev->ctrl);
2191 nvme_dev_disable(dev, true); 2343 nvme_dev_disable(dev, true);
2344 nvme_free_host_mem(dev);
2192 nvme_dev_remove_admin(dev); 2345 nvme_dev_remove_admin(dev);
2193 nvme_free_queues(dev, 0); 2346 nvme_free_queues(dev, 0);
2194 nvme_release_prp_pools(dev); 2347 nvme_release_prp_pools(dev);
@@ -2229,7 +2382,7 @@ static int nvme_resume(struct device *dev)
2229 struct pci_dev *pdev = to_pci_dev(dev); 2382 struct pci_dev *pdev = to_pci_dev(dev);
2230 struct nvme_dev *ndev = pci_get_drvdata(pdev); 2383 struct nvme_dev *ndev = pci_get_drvdata(pdev);
2231 2384
2232 nvme_reset(ndev); 2385 nvme_reset_ctrl(&ndev->ctrl);
2233 return 0; 2386 return 0;
2234} 2387}
2235#endif 2388#endif
@@ -2268,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
2268 2421
2269 dev_info(dev->ctrl.device, "restart after slot reset\n"); 2422 dev_info(dev->ctrl.device, "restart after slot reset\n");
2270 pci_restore_state(pdev); 2423 pci_restore_state(pdev);
2271 nvme_reset(dev); 2424 nvme_reset_ctrl(&dev->ctrl);
2272 return PCI_ERS_RESULT_RECOVERED; 2425 return PCI_ERS_RESULT_RECOVERED;
2273} 2426}
2274 2427
@@ -2324,22 +2477,12 @@ static struct pci_driver nvme_driver = {
2324 2477
2325static int __init nvme_init(void) 2478static int __init nvme_init(void)
2326{ 2479{
2327 int result; 2480 return pci_register_driver(&nvme_driver);
2328
2329 nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
2330 if (!nvme_workq)
2331 return -ENOMEM;
2332
2333 result = pci_register_driver(&nvme_driver);
2334 if (result)
2335 destroy_workqueue(nvme_workq);
2336 return result;
2337} 2481}
2338 2482
2339static void __exit nvme_exit(void) 2483static void __exit nvme_exit(void)
2340{ 2484{
2341 pci_unregister_driver(&nvme_driver); 2485 pci_unregister_driver(&nvme_driver);
2342 destroy_workqueue(nvme_workq);
2343 _nvme_check_size(); 2486 _nvme_check_size();
2344} 2487}
2345 2488
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 24397d306d53..6d4119dfbdaa 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -48,7 +48,7 @@
48 */ 48 */
49#define NVME_RDMA_NR_AEN_COMMANDS 1 49#define NVME_RDMA_NR_AEN_COMMANDS 1
50#define NVME_RDMA_AQ_BLKMQ_DEPTH \ 50#define NVME_RDMA_AQ_BLKMQ_DEPTH \
51 (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) 51 (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
52 52
53struct nvme_rdma_device { 53struct nvme_rdma_device {
54 struct ib_device *dev; 54 struct ib_device *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
80}; 80};
81 81
82enum nvme_rdma_queue_flags { 82enum nvme_rdma_queue_flags {
83 NVME_RDMA_Q_CONNECTED = (1 << 0), 83 NVME_RDMA_Q_LIVE = 0,
84 NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), 84 NVME_RDMA_Q_DELETING = 1,
85 NVME_RDMA_Q_DELETING = (1 << 2),
86 NVME_RDMA_Q_LIVE = (1 << 3),
87}; 85};
88 86
89struct nvme_rdma_queue { 87struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
103}; 101};
104 102
105struct nvme_rdma_ctrl { 103struct nvme_rdma_ctrl {
106 /* read and written in the hot path */
107 spinlock_t lock;
108
109 /* read only in the hot path */ 104 /* read only in the hot path */
110 struct nvme_rdma_queue *queues; 105 struct nvme_rdma_queue *queues;
111 u32 queue_count; 106 u32 queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
113 /* other member variables */ 108 /* other member variables */
114 struct blk_mq_tag_set tag_set; 109 struct blk_mq_tag_set tag_set;
115 struct work_struct delete_work; 110 struct work_struct delete_work;
116 struct work_struct reset_work;
117 struct work_struct err_work; 111 struct work_struct err_work;
118 112
119 struct nvme_rdma_qe async_event_sqe; 113 struct nvme_rdma_qe async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
145static LIST_HEAD(nvme_rdma_ctrl_list); 139static LIST_HEAD(nvme_rdma_ctrl_list);
146static DEFINE_MUTEX(nvme_rdma_ctrl_mutex); 140static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
147 141
148static struct workqueue_struct *nvme_rdma_wq;
149
150/* 142/*
151 * Disabling this option makes small I/O goes faster, but is fundamentally 143 * Disabling this option makes small I/O goes faster, but is fundamentally
152 * unsafe. With it turned off we will have to register a global rkey that 144 * unsafe. With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ out:
301 return ret; 293 return ret;
302} 294}
303 295
304static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl, 296static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
305 struct request *rq, unsigned int queue_idx) 297 struct request *rq, unsigned int hctx_idx)
306{ 298{
299 struct nvme_rdma_ctrl *ctrl = set->driver_data;
307 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 300 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
301 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
308 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 302 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
309 struct nvme_rdma_device *dev = queue->device; 303 struct nvme_rdma_device *dev = queue->device;
310 304
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
315 DMA_TO_DEVICE); 309 DMA_TO_DEVICE);
316} 310}
317 311
318static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, 312static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
319 struct request *rq, unsigned int hctx_idx) 313 struct request *rq, unsigned int hctx_idx,
320{ 314 unsigned int numa_node)
321 return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
322}
323
324static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
325 struct request *rq, unsigned int hctx_idx)
326{
327 return __nvme_rdma_exit_request(set->driver_data, rq, 0);
328}
329
330static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
331 struct request *rq, unsigned int queue_idx)
332{ 315{
316 struct nvme_rdma_ctrl *ctrl = set->driver_data;
333 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 317 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
318 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
334 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 319 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
335 struct nvme_rdma_device *dev = queue->device; 320 struct nvme_rdma_device *dev = queue->device;
336 struct ib_device *ibdev = dev->dev; 321 struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ out_free_qe:
358 return -ENOMEM; 343 return -ENOMEM;
359} 344}
360 345
361static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
362 struct request *rq, unsigned int hctx_idx,
363 unsigned int numa_node)
364{
365 return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
366}
367
368static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
369 struct request *rq, unsigned int hctx_idx,
370 unsigned int numa_node)
371{
372 return __nvme_rdma_init_request(set->driver_data, rq, 0);
373}
374
375static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 346static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
376 unsigned int hctx_idx) 347 unsigned int hctx_idx)
377{ 348{
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
469 struct nvme_rdma_device *dev; 440 struct nvme_rdma_device *dev;
470 struct ib_device *ibdev; 441 struct ib_device *ibdev;
471 442
472 if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
473 return;
474
475 dev = queue->device; 443 dev = queue->device;
476 ibdev = dev->dev; 444 ibdev = dev->dev;
477 rdma_destroy_qp(queue->cm_id); 445 rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
483 nvme_rdma_dev_put(dev); 451 nvme_rdma_dev_put(dev);
484} 452}
485 453
486static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, 454static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
487 struct nvme_rdma_device *dev)
488{ 455{
489 struct ib_device *ibdev = dev->dev; 456 struct ib_device *ibdev;
490 const int send_wr_factor = 3; /* MR, SEND, INV */ 457 const int send_wr_factor = 3; /* MR, SEND, INV */
491 const int cq_factor = send_wr_factor + 1; /* + RECV */ 458 const int cq_factor = send_wr_factor + 1; /* + RECV */
492 int comp_vector, idx = nvme_rdma_queue_idx(queue); 459 int comp_vector, idx = nvme_rdma_queue_idx(queue);
493
494 int ret; 460 int ret;
495 461
496 queue->device = dev; 462 queue->device = nvme_rdma_find_get_device(queue->cm_id);
463 if (!queue->device) {
464 dev_err(queue->cm_id->device->dev.parent,
465 "no client data found!\n");
466 return -ECONNREFUSED;
467 }
468 ibdev = queue->device->dev;
497 469
498 /* 470 /*
499 * The admin queue is barely used once the controller is live, so don't 471 * The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
506 478
507 479
508 /* +1 for ib_stop_cq */ 480 /* +1 for ib_stop_cq */
509 queue->ib_cq = ib_alloc_cq(dev->dev, queue, 481 queue->ib_cq = ib_alloc_cq(ibdev, queue,
510 cq_factor * queue->queue_size + 1, comp_vector, 482 cq_factor * queue->queue_size + 1,
511 IB_POLL_SOFTIRQ); 483 comp_vector, IB_POLL_SOFTIRQ);
512 if (IS_ERR(queue->ib_cq)) { 484 if (IS_ERR(queue->ib_cq)) {
513 ret = PTR_ERR(queue->ib_cq); 485 ret = PTR_ERR(queue->ib_cq);
514 goto out; 486 goto out_put_dev;
515 } 487 }
516 488
517 ret = nvme_rdma_create_qp(queue, send_wr_factor); 489 ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
524 ret = -ENOMEM; 496 ret = -ENOMEM;
525 goto out_destroy_qp; 497 goto out_destroy_qp;
526 } 498 }
527 set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
528 499
529 return 0; 500 return 0;
530 501
@@ -532,7 +503,8 @@ out_destroy_qp:
532 ib_destroy_qp(queue->qp); 503 ib_destroy_qp(queue->qp);
533out_destroy_ib_cq: 504out_destroy_ib_cq:
534 ib_free_cq(queue->ib_cq); 505 ib_free_cq(queue->ib_cq);
535out: 506out_put_dev:
507 nvme_rdma_dev_put(queue->device);
536 return ret; 508 return ret;
537} 509}
538 510
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
583 } 555 }
584 556
585 clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); 557 clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
586 set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
587 558
588 return 0; 559 return 0;
589 560
590out_destroy_cm_id: 561out_destroy_cm_id:
591 nvme_rdma_destroy_queue_ib(queue);
592 rdma_destroy_id(queue->cm_id); 562 rdma_destroy_id(queue->cm_id);
593 return ret; 563 return ret;
594} 564}
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
718 if (nvmf_should_reconnect(&ctrl->ctrl)) { 688 if (nvmf_should_reconnect(&ctrl->ctrl)) {
719 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", 689 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
720 ctrl->ctrl.opts->reconnect_delay); 690 ctrl->ctrl.opts->reconnect_delay);
721 queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, 691 queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
722 ctrl->ctrl.opts->reconnect_delay * HZ); 692 ctrl->ctrl.opts->reconnect_delay * HZ);
723 } else { 693 } else {
724 dev_info(ctrl->ctrl.device, "Removing controller...\n"); 694 dev_info(ctrl->ctrl.device, "Removing controller...\n");
725 queue_work(nvme_rdma_wq, &ctrl->delete_work); 695 queue_work(nvme_wq, &ctrl->delete_work);
726 } 696 }
727} 697}
728 698
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
733 bool changed; 703 bool changed;
734 int ret; 704 int ret;
735 705
736 ++ctrl->ctrl.opts->nr_reconnects; 706 ++ctrl->ctrl.nr_reconnects;
737 707
738 if (ctrl->queue_count > 1) { 708 if (ctrl->queue_count > 1) {
739 nvme_rdma_free_io_queues(ctrl); 709 nvme_rdma_free_io_queues(ctrl);
@@ -749,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
749 if (ret) 719 if (ret)
750 goto requeue; 720 goto requeue;
751 721
752 ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH); 722 ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
753 if (ret) 723 if (ret)
754 goto requeue; 724 goto requeue;
755 725
@@ -777,7 +747,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
777 747
778 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 748 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
779 WARN_ON_ONCE(!changed); 749 WARN_ON_ONCE(!changed);
780 ctrl->ctrl.opts->nr_reconnects = 0; 750 ctrl->ctrl.nr_reconnects = 0;
781 751
782 if (ctrl->queue_count > 1) { 752 if (ctrl->queue_count > 1) {
783 nvme_queue_scan(&ctrl->ctrl); 753 nvme_queue_scan(&ctrl->ctrl);
@@ -790,7 +760,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
790 760
791requeue: 761requeue:
792 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", 762 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
793 ctrl->ctrl.opts->nr_reconnects); 763 ctrl->ctrl.nr_reconnects);
794 nvme_rdma_reconnect_or_remove(ctrl); 764 nvme_rdma_reconnect_or_remove(ctrl);
795} 765}
796 766
@@ -802,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
802 772
803 nvme_stop_keep_alive(&ctrl->ctrl); 773 nvme_stop_keep_alive(&ctrl->ctrl);
804 774
805 for (i = 0; i < ctrl->queue_count; i++) { 775 for (i = 0; i < ctrl->queue_count; i++)
806 clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
807 clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); 776 clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
808 }
809 777
810 if (ctrl->queue_count > 1) 778 if (ctrl->queue_count > 1)
811 nvme_stop_queues(&ctrl->ctrl); 779 nvme_stop_queues(&ctrl->ctrl);
@@ -833,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
833 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) 801 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
834 return; 802 return;
835 803
836 queue_work(nvme_rdma_wq, &ctrl->err_work); 804 queue_work(nvme_wq, &ctrl->err_work);
837} 805}
838 806
839static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, 807static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1278,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
1278 1246
1279static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue) 1247static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
1280{ 1248{
1281 struct nvme_rdma_device *dev;
1282 int ret; 1249 int ret;
1283 1250
1284 dev = nvme_rdma_find_get_device(queue->cm_id); 1251 ret = nvme_rdma_create_queue_ib(queue);
1285 if (!dev) { 1252 if (ret)
1286 dev_err(queue->cm_id->device->dev.parent, 1253 return ret;
1287 "no client data found!\n");
1288 return -ECONNREFUSED;
1289 }
1290
1291 ret = nvme_rdma_create_queue_ib(queue, dev);
1292 if (ret) {
1293 nvme_rdma_dev_put(dev);
1294 goto out;
1295 }
1296 1254
1297 ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS); 1255 ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
1298 if (ret) { 1256 if (ret) {
@@ -1306,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
1306 1264
1307out_destroy_queue: 1265out_destroy_queue:
1308 nvme_rdma_destroy_queue_ib(queue); 1266 nvme_rdma_destroy_queue_ib(queue);
1309out:
1310 return ret; 1267 return ret;
1311} 1268}
1312 1269
@@ -1334,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
1334 * specified by the Fabrics standard. 1291 * specified by the Fabrics standard.
1335 */ 1292 */
1336 if (priv.qid == 0) { 1293 if (priv.qid == 0) {
1337 priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); 1294 priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
1338 priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); 1295 priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
1339 } else { 1296 } else {
1340 /* 1297 /*
1341 * current interpretation of the fabrics spec 1298 * current interpretation of the fabrics spec
@@ -1383,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
1383 complete(&queue->cm_done); 1340 complete(&queue->cm_done);
1384 return 0; 1341 return 0;
1385 case RDMA_CM_EVENT_REJECTED: 1342 case RDMA_CM_EVENT_REJECTED:
1343 nvme_rdma_destroy_queue_ib(queue);
1386 cm_error = nvme_rdma_conn_rejected(queue, ev); 1344 cm_error = nvme_rdma_conn_rejected(queue, ev);
1387 break; 1345 break;
1388 case RDMA_CM_EVENT_ADDR_ERROR:
1389 case RDMA_CM_EVENT_ROUTE_ERROR: 1346 case RDMA_CM_EVENT_ROUTE_ERROR:
1390 case RDMA_CM_EVENT_CONNECT_ERROR: 1347 case RDMA_CM_EVENT_CONNECT_ERROR:
1391 case RDMA_CM_EVENT_UNREACHABLE: 1348 case RDMA_CM_EVENT_UNREACHABLE:
1349 nvme_rdma_destroy_queue_ib(queue);
1350 case RDMA_CM_EVENT_ADDR_ERROR:
1392 dev_dbg(queue->ctrl->ctrl.device, 1351 dev_dbg(queue->ctrl->ctrl.device,
1393 "CM error event %d\n", ev->event); 1352 "CM error event %d\n", ev->event);
1394 cm_error = -ECONNRESET; 1353 cm_error = -ECONNRESET;
@@ -1435,8 +1394,8 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
1435/* 1394/*
1436 * We cannot accept any other command until the Connect command has completed. 1395 * We cannot accept any other command until the Connect command has completed.
1437 */ 1396 */
1438static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, 1397static inline blk_status_t
1439 struct request *rq) 1398nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
1440{ 1399{
1441 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { 1400 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
1442 struct nvme_command *cmd = nvme_req(rq)->cmd; 1401 struct nvme_command *cmd = nvme_req(rq)->cmd;
@@ -1452,16 +1411,15 @@ static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
1452 * failover. 1411 * failover.
1453 */ 1412 */
1454 if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING) 1413 if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
1455 return -EIO; 1414 return BLK_STS_IOERR;
1456 else 1415 return BLK_STS_RESOURCE; /* try again later */
1457 return -EAGAIN;
1458 } 1416 }
1459 } 1417 }
1460 1418
1461 return 0; 1419 return 0;
1462} 1420}
1463 1421
1464static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, 1422static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1465 const struct blk_mq_queue_data *bd) 1423 const struct blk_mq_queue_data *bd)
1466{ 1424{
1467 struct nvme_ns *ns = hctx->queue->queuedata; 1425 struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1472,28 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1472 struct nvme_command *c = sqe->data; 1430 struct nvme_command *c = sqe->data;
1473 bool flush = false; 1431 bool flush = false;
1474 struct ib_device *dev; 1432 struct ib_device *dev;
1475 int ret; 1433 blk_status_t ret;
1434 int err;
1476 1435
1477 WARN_ON_ONCE(rq->tag < 0); 1436 WARN_ON_ONCE(rq->tag < 0);
1478 1437
1479 ret = nvme_rdma_queue_is_ready(queue, rq); 1438 ret = nvme_rdma_queue_is_ready(queue, rq);
1480 if (unlikely(ret)) 1439 if (unlikely(ret))
1481 goto err; 1440 return ret;
1482 1441
1483 dev = queue->device->dev; 1442 dev = queue->device->dev;
1484 ib_dma_sync_single_for_cpu(dev, sqe->dma, 1443 ib_dma_sync_single_for_cpu(dev, sqe->dma,
1485 sizeof(struct nvme_command), DMA_TO_DEVICE); 1444 sizeof(struct nvme_command), DMA_TO_DEVICE);
1486 1445
1487 ret = nvme_setup_cmd(ns, rq, c); 1446 ret = nvme_setup_cmd(ns, rq, c);
1488 if (ret != BLK_MQ_RQ_QUEUE_OK) 1447 if (ret)
1489 return ret; 1448 return ret;
1490 1449
1491 blk_mq_start_request(rq); 1450 blk_mq_start_request(rq);
1492 1451
1493 ret = nvme_rdma_map_data(queue, rq, c); 1452 err = nvme_rdma_map_data(queue, rq, c);
1494 if (ret < 0) { 1453 if (err < 0) {
1495 dev_err(queue->ctrl->ctrl.device, 1454 dev_err(queue->ctrl->ctrl.device,
1496 "Failed to map data (%d)\n", ret); 1455 "Failed to map data (%d)\n", err);
1497 nvme_cleanup_cmd(rq); 1456 nvme_cleanup_cmd(rq);
1498 goto err; 1457 goto err;
1499 } 1458 }
@@ -1503,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1503 1462
1504 if (req_op(rq) == REQ_OP_FLUSH) 1463 if (req_op(rq) == REQ_OP_FLUSH)
1505 flush = true; 1464 flush = true;
1506 ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, 1465 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
1507 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); 1466 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
1508 if (ret) { 1467 if (err) {
1509 nvme_rdma_unmap_data(queue, rq); 1468 nvme_rdma_unmap_data(queue, rq);
1510 goto err; 1469 goto err;
1511 } 1470 }
1512 1471
1513 return BLK_MQ_RQ_QUEUE_OK; 1472 return BLK_STS_OK;
1514err: 1473err:
1515 return (ret == -ENOMEM || ret == -EAGAIN) ? 1474 if (err == -ENOMEM || err == -EAGAIN)
1516 BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; 1475 return BLK_STS_RESOURCE;
1476 return BLK_STS_IOERR;
1517} 1477}
1518 1478
1519static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 1479static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1523,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
1523 struct ib_wc wc; 1483 struct ib_wc wc;
1524 int found = 0; 1484 int found = 0;
1525 1485
1526 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1527 while (ib_poll_cq(cq, 1, &wc) > 0) { 1486 while (ib_poll_cq(cq, 1, &wc) > 0) {
1528 struct ib_cqe *cqe = wc.wr_cqe; 1487 struct ib_cqe *cqe = wc.wr_cqe;
1529 1488
@@ -1560,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
1560static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { 1519static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
1561 .queue_rq = nvme_rdma_queue_rq, 1520 .queue_rq = nvme_rdma_queue_rq,
1562 .complete = nvme_rdma_complete_rq, 1521 .complete = nvme_rdma_complete_rq,
1563 .init_request = nvme_rdma_init_admin_request, 1522 .init_request = nvme_rdma_init_request,
1564 .exit_request = nvme_rdma_exit_admin_request, 1523 .exit_request = nvme_rdma_exit_request,
1565 .reinit_request = nvme_rdma_reinit_request, 1524 .reinit_request = nvme_rdma_reinit_request,
1566 .init_hctx = nvme_rdma_init_admin_hctx, 1525 .init_hctx = nvme_rdma_init_admin_hctx,
1567 .timeout = nvme_rdma_timeout, 1526 .timeout = nvme_rdma_timeout,
@@ -1571,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
1571{ 1530{
1572 int error; 1531 int error;
1573 1532
1574 error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH); 1533 error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
1575 if (error) 1534 if (error)
1576 return error; 1535 return error;
1577 1536
@@ -1672,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
1672 nvme_rdma_free_io_queues(ctrl); 1631 nvme_rdma_free_io_queues(ctrl);
1673 } 1632 }
1674 1633
1675 if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags)) 1634 if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
1676 nvme_shutdown_ctrl(&ctrl->ctrl); 1635 nvme_shutdown_ctrl(&ctrl->ctrl);
1677 1636
1678 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); 1637 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1709,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
1709 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 1668 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
1710 return -EBUSY; 1669 return -EBUSY;
1711 1670
1712 if (!queue_work(nvme_rdma_wq, &ctrl->delete_work)) 1671 if (!queue_work(nvme_wq, &ctrl->delete_work))
1713 return -EBUSY; 1672 return -EBUSY;
1714 1673
1715 return 0; 1674 return 0;
@@ -1743,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
1743 1702
1744static void nvme_rdma_reset_ctrl_work(struct work_struct *work) 1703static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1745{ 1704{
1746 struct nvme_rdma_ctrl *ctrl = container_of(work, 1705 struct nvme_rdma_ctrl *ctrl =
1747 struct nvme_rdma_ctrl, reset_work); 1706 container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
1748 int ret; 1707 int ret;
1749 bool changed; 1708 bool changed;
1750 1709
@@ -1785,22 +1744,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1785del_dead_ctrl: 1744del_dead_ctrl:
1786 /* Deleting this dead controller... */ 1745 /* Deleting this dead controller... */
1787 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); 1746 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
1788 WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work)); 1747 WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
1789}
1790
1791static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
1792{
1793 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
1794
1795 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
1796 return -EBUSY;
1797
1798 if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
1799 return -EBUSY;
1800
1801 flush_work(&ctrl->reset_work);
1802
1803 return 0;
1804} 1748}
1805 1749
1806static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { 1750static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1810,11 +1754,9 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1810 .reg_read32 = nvmf_reg_read32, 1754 .reg_read32 = nvmf_reg_read32,
1811 .reg_read64 = nvmf_reg_read64, 1755 .reg_read64 = nvmf_reg_read64,
1812 .reg_write32 = nvmf_reg_write32, 1756 .reg_write32 = nvmf_reg_write32,
1813 .reset_ctrl = nvme_rdma_reset_ctrl,
1814 .free_ctrl = nvme_rdma_free_ctrl, 1757 .free_ctrl = nvme_rdma_free_ctrl,
1815 .submit_async_event = nvme_rdma_submit_async_event, 1758 .submit_async_event = nvme_rdma_submit_async_event,
1816 .delete_ctrl = nvme_rdma_del_ctrl, 1759 .delete_ctrl = nvme_rdma_del_ctrl,
1817 .get_subsysnqn = nvmf_get_subsysnqn,
1818 .get_address = nvmf_get_address, 1760 .get_address = nvmf_get_address,
1819}; 1761};
1820 1762
@@ -1919,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1919 nvme_rdma_reconnect_ctrl_work); 1861 nvme_rdma_reconnect_ctrl_work);
1920 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); 1862 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
1921 INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); 1863 INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
1922 INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work); 1864 INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
1923 spin_lock_init(&ctrl->lock);
1924 1865
1925 ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ 1866 ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
1926 ctrl->ctrl.sqsize = opts->queue_size - 1; 1867 ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1939,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1939 /* sanity check icdoff */ 1880 /* sanity check icdoff */
1940 if (ctrl->ctrl.icdoff) { 1881 if (ctrl->ctrl.icdoff) {
1941 dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); 1882 dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
1883 ret = -EINVAL;
1942 goto out_remove_admin_queue; 1884 goto out_remove_admin_queue;
1943 } 1885 }
1944 1886
1945 /* sanity check keyed sgls */ 1887 /* sanity check keyed sgls */
1946 if (!(ctrl->ctrl.sgls & (1 << 20))) { 1888 if (!(ctrl->ctrl.sgls & (1 << 20))) {
1947 dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n"); 1889 dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
1890 ret = -EINVAL;
1948 goto out_remove_admin_queue; 1891 goto out_remove_admin_queue;
1949 } 1892 }
1950 1893
@@ -2033,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
2033 } 1976 }
2034 mutex_unlock(&nvme_rdma_ctrl_mutex); 1977 mutex_unlock(&nvme_rdma_ctrl_mutex);
2035 1978
2036 flush_workqueue(nvme_rdma_wq); 1979 flush_workqueue(nvme_wq);
2037} 1980}
2038 1981
2039static struct ib_client nvme_rdma_ib_client = { 1982static struct ib_client nvme_rdma_ib_client = {
@@ -2046,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
2046{ 1989{
2047 int ret; 1990 int ret;
2048 1991
2049 nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
2050 if (!nvme_rdma_wq)
2051 return -ENOMEM;
2052
2053 ret = ib_register_client(&nvme_rdma_ib_client); 1992 ret = ib_register_client(&nvme_rdma_ib_client);
2054 if (ret) 1993 if (ret)
2055 goto err_destroy_wq; 1994 return ret;
2056 1995
2057 ret = nvmf_register_transport(&nvme_rdma_transport); 1996 ret = nvmf_register_transport(&nvme_rdma_transport);
2058 if (ret) 1997 if (ret)
@@ -2062,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
2062 2001
2063err_unreg_client: 2002err_unreg_client:
2064 ib_unregister_client(&nvme_rdma_ib_client); 2003 ib_unregister_client(&nvme_rdma_ib_client);
2065err_destroy_wq:
2066 destroy_workqueue(nvme_rdma_wq);
2067 return ret; 2004 return ret;
2068} 2005}
2069 2006
@@ -2071,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
2071{ 2008{
2072 nvmf_unregister_transport(&nvme_rdma_transport); 2009 nvmf_unregister_transport(&nvme_rdma_transport);
2073 ib_unregister_client(&nvme_rdma_ib_client); 2010 ib_unregister_client(&nvme_rdma_ib_client);
2074 destroy_workqueue(nvme_rdma_wq);
2075} 2011}
2076 2012
2077module_init(nvme_rdma_init_module); 2013module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644
index 1f7671e631dd..000000000000
--- a/drivers/nvme/host/scsi.c
+++ /dev/null
@@ -1,2460 +0,0 @@
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15/*
16 * Refer to the SCSI-NVMe Translation spec for details on how
17 * each command is translated.
18 */
19
20#include <linux/bio.h>
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/compat.h>
24#include <linux/delay.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/genhd.h>
28#include <linux/idr.h>
29#include <linux/init.h>
30#include <linux/interrupt.h>
31#include <linux/io.h>
32#include <linux/kdev_t.h>
33#include <linux/kthread.h>
34#include <linux/kernel.h>
35#include <linux/mm.h>
36#include <linux/module.h>
37#include <linux/moduleparam.h>
38#include <linux/pci.h>
39#include <linux/poison.h>
40#include <linux/sched.h>
41#include <linux/slab.h>
42#include <linux/types.h>
43#include <asm/unaligned.h>
44#include <scsi/sg.h>
45#include <scsi/scsi.h>
46#include <scsi/scsi_request.h>
47
48#include "nvme.h"
49
50static int sg_version_num = 30534; /* 2 digits for each component */
51
52/* VPD Page Codes */
53#define VPD_SUPPORTED_PAGES 0x00
54#define VPD_SERIAL_NUMBER 0x80
55#define VPD_DEVICE_IDENTIFIERS 0x83
56#define VPD_EXTENDED_INQUIRY 0x86
57#define VPD_BLOCK_LIMITS 0xB0
58#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
59
60/* format unit paramter list offsets */
61#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
62#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
63#define FORMAT_UNIT_PROT_INT_OFFSET 3
64#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
65#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
66
67/* Misc. defines */
68#define FIXED_SENSE_DATA 0x70
69#define DESC_FORMAT_SENSE_DATA 0x72
70#define FIXED_SENSE_DATA_ADD_LENGTH 10
71#define LUN_ENTRY_SIZE 8
72#define LUN_DATA_HEADER_SIZE 8
73#define ALL_LUNS_RETURNED 0x02
74#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01
75#define RESTRICTED_LUNS_RETURNED 0x00
76#define DOWNLOAD_SAVE_ACTIVATE 0x05
77#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E
78#define ACTIVATE_DEFERRED_MICROCODE 0x0F
79#define FORMAT_UNIT_IMMED_MASK 0x2
80#define FORMAT_UNIT_IMMED_OFFSET 1
81#define KELVIN_TEMP_FACTOR 273
82#define FIXED_FMT_SENSE_DATA_SIZE 18
83#define DESC_FMT_SENSE_DATA_SIZE 8
84
85/* SCSI/NVMe defines and bit masks */
86#define INQ_STANDARD_INQUIRY_PAGE 0x00
87#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00
88#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80
89#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83
90#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86
91#define INQ_BDEV_LIMITS_PAGE 0xB0
92#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1
93#define INQ_SERIAL_NUMBER_LENGTH 0x14
94#define INQ_NUM_SUPPORTED_VPD_PAGES 6
95#define VERSION_SPC_4 0x06
96#define ACA_UNSUPPORTED 0
97#define STANDARD_INQUIRY_LENGTH 36
98#define ADDITIONAL_STD_INQ_LENGTH 31
99#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
100#define RESERVED_FIELD 0
101
102/* Mode Sense/Select defines */
103#define MODE_PAGE_INFO_EXCEP 0x1C
104#define MODE_PAGE_CACHING 0x08
105#define MODE_PAGE_CONTROL 0x0A
106#define MODE_PAGE_POWER_CONDITION 0x1A
107#define MODE_PAGE_RETURN_ALL 0x3F
108#define MODE_PAGE_BLK_DES_LEN 0x08
109#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10
110#define MODE_PAGE_CACHING_LEN 0x14
111#define MODE_PAGE_CONTROL_LEN 0x0C
112#define MODE_PAGE_POW_CND_LEN 0x28
113#define MODE_PAGE_INF_EXC_LEN 0x0C
114#define MODE_PAGE_ALL_LEN 0x54
115#define MODE_SENSE6_MPH_SIZE 4
116#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
117#define MODE_SENSE_PAGE_CODE_OFFSET 2
118#define MODE_SENSE_PAGE_CODE_MASK 0x3F
119#define MODE_SENSE_LLBAA_MASK 0x10
120#define MODE_SENSE_LLBAA_SHIFT 4
121#define MODE_SENSE_DBD_MASK 8
122#define MODE_SENSE_DBD_SHIFT 3
123#define MODE_SENSE10_MPH_SIZE 8
124#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
125#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
126#define MODE_SELECT_6_BD_OFFSET 3
127#define MODE_SELECT_10_BD_OFFSET 6
128#define MODE_SELECT_10_LLBAA_OFFSET 4
129#define MODE_SELECT_10_LLBAA_MASK 1
130#define MODE_SELECT_6_MPH_SIZE 4
131#define MODE_SELECT_10_MPH_SIZE 8
132#define CACHING_MODE_PAGE_WCE_MASK 0x04
133#define MODE_SENSE_BLK_DESC_ENABLED 0
134#define MODE_SENSE_BLK_DESC_COUNT 1
135#define MODE_SELECT_PAGE_CODE_MASK 0x3F
136#define SHORT_DESC_BLOCK 8
137#define LONG_DESC_BLOCK 16
138#define MODE_PAGE_POW_CND_LEN_FIELD 0x26
139#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A
140#define MODE_PAGE_CACHING_LEN_FIELD 0x12
141#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A
142#define MODE_SENSE_PC_CURRENT_VALUES 0
143
144/* Log Sense defines */
145#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00
146#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
147#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
148#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
149#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
150#define LOG_SENSE_CDB_PC_MASK 0xC0
151#define LOG_SENSE_CDB_PC_SHIFT 6
152#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
153#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
154#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
155#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
156#define REMAINING_TEMP_PAGE_LENGTH 0xC
157#define LOG_TEMP_PAGE_LENGTH 0x10
158#define LOG_TEMP_UNKNOWN 0xFF
159#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3
160
161/* Read Capacity defines */
162#define READ_CAP_10_RESP_SIZE 8
163#define READ_CAP_16_RESP_SIZE 32
164
165/* NVMe Namespace and Command Defines */
166#define BYTES_TO_DWORDS 4
167#define NVME_MAX_FIRMWARE_SLOT 7
168
169/* Report LUNs defines */
170#define REPORT_LUNS_FIRST_LUN_OFFSET 8
171
172/* SCSI ADDITIONAL SENSE Codes */
173
174#define SCSI_ASC_NO_SENSE 0x00
175#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03
176#define SCSI_ASC_LUN_NOT_READY 0x04
177#define SCSI_ASC_WARNING 0x0B
178#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10
179#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10
180#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10
181#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11
182#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D
183#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20
184#define SCSI_ASC_ILLEGAL_COMMAND 0x20
185#define SCSI_ASC_ILLEGAL_BLOCK 0x21
186#define SCSI_ASC_INVALID_CDB 0x24
187#define SCSI_ASC_INVALID_LUN 0x25
188#define SCSI_ASC_INVALID_PARAMETER 0x26
189#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31
190#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44
191
192/* SCSI ADDITIONAL SENSE Code Qualifiers */
193
194#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00
195#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01
196#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01
197#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02
198#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03
199#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04
200#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
201#define SCSI_ASCQ_INVALID_LUN_ID 0x09
202
203/* copied from drivers/usb/gadget/function/storage_common.h */
204static inline u32 get_unaligned_be24(u8 *buf)
205{
206 return 0xffffff & (u32) get_unaligned_be32(buf - 1);
207}
208
209/* Struct to gather data that needs to be extracted from a SCSI CDB.
210 Not conforming to any particular CDB variant, but compatible with all. */
211
212struct nvme_trans_io_cdb {
213 u8 fua;
214 u8 prot_info;
215 u64 lba;
216 u32 xfer_len;
217};
218
219
220/* Internal Helper Functions */
221
222
223/* Copy data to userspace memory */
224
225static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
226 unsigned long n)
227{
228 int i;
229 void *index = from;
230 size_t remaining = n;
231 size_t xfer_len;
232
233 if (hdr->iovec_count > 0) {
234 struct sg_iovec sgl;
235
236 for (i = 0; i < hdr->iovec_count; i++) {
237 if (copy_from_user(&sgl, hdr->dxferp +
238 i * sizeof(struct sg_iovec),
239 sizeof(struct sg_iovec)))
240 return -EFAULT;
241 xfer_len = min(remaining, sgl.iov_len);
242 if (copy_to_user(sgl.iov_base, index, xfer_len))
243 return -EFAULT;
244
245 index += xfer_len;
246 remaining -= xfer_len;
247 if (remaining == 0)
248 break;
249 }
250 return 0;
251 }
252
253 if (copy_to_user(hdr->dxferp, from, n))
254 return -EFAULT;
255 return 0;
256}
257
258/* Copy data from userspace memory */
259
260static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
261 unsigned long n)
262{
263 int i;
264 void *index = to;
265 size_t remaining = n;
266 size_t xfer_len;
267
268 if (hdr->iovec_count > 0) {
269 struct sg_iovec sgl;
270
271 for (i = 0; i < hdr->iovec_count; i++) {
272 if (copy_from_user(&sgl, hdr->dxferp +
273 i * sizeof(struct sg_iovec),
274 sizeof(struct sg_iovec)))
275 return -EFAULT;
276 xfer_len = min(remaining, sgl.iov_len);
277 if (copy_from_user(index, sgl.iov_base, xfer_len))
278 return -EFAULT;
279 index += xfer_len;
280 remaining -= xfer_len;
281 if (remaining == 0)
282 break;
283 }
284 return 0;
285 }
286
287 if (copy_from_user(to, hdr->dxferp, n))
288 return -EFAULT;
289 return 0;
290}
291
292/* Status/Sense Buffer Writeback */
293
294static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
295 u8 asc, u8 ascq)
296{
297 u8 xfer_len;
298 u8 resp[DESC_FMT_SENSE_DATA_SIZE];
299
300 if (scsi_status_is_good(status)) {
301 hdr->status = SAM_STAT_GOOD;
302 hdr->masked_status = GOOD;
303 hdr->host_status = DID_OK;
304 hdr->driver_status = DRIVER_OK;
305 hdr->sb_len_wr = 0;
306 } else {
307 hdr->status = status;
308 hdr->masked_status = status >> 1;
309 hdr->host_status = DID_OK;
310 hdr->driver_status = DRIVER_OK;
311
312 memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
313 resp[0] = DESC_FORMAT_SENSE_DATA;
314 resp[1] = sense_key;
315 resp[2] = asc;
316 resp[3] = ascq;
317
318 xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
319 hdr->sb_len_wr = xfer_len;
320 if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
321 return -EFAULT;
322 }
323
324 return 0;
325}
326
327/*
328 * Take a status code from a lowlevel routine, and if it was a positive NVMe
329 * error code update the sense data based on it. In either case the passed
330 * in value is returned again, unless an -EFAULT from copy_to_user overrides
331 * it.
332 */
333static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
334{
335 u8 status, sense_key, asc, ascq;
336 int res;
337
338 /* For non-nvme (Linux) errors, simply return the error code */
339 if (nvme_sc < 0)
340 return nvme_sc;
341
342 /* Mask DNR, More, and reserved fields */
343 switch (nvme_sc & 0x7FF) {
344 /* Generic Command Status */
345 case NVME_SC_SUCCESS:
346 status = SAM_STAT_GOOD;
347 sense_key = NO_SENSE;
348 asc = SCSI_ASC_NO_SENSE;
349 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
350 break;
351 case NVME_SC_INVALID_OPCODE:
352 status = SAM_STAT_CHECK_CONDITION;
353 sense_key = ILLEGAL_REQUEST;
354 asc = SCSI_ASC_ILLEGAL_COMMAND;
355 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
356 break;
357 case NVME_SC_INVALID_FIELD:
358 status = SAM_STAT_CHECK_CONDITION;
359 sense_key = ILLEGAL_REQUEST;
360 asc = SCSI_ASC_INVALID_CDB;
361 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
362 break;
363 case NVME_SC_DATA_XFER_ERROR:
364 status = SAM_STAT_CHECK_CONDITION;
365 sense_key = MEDIUM_ERROR;
366 asc = SCSI_ASC_NO_SENSE;
367 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
368 break;
369 case NVME_SC_POWER_LOSS:
370 status = SAM_STAT_TASK_ABORTED;
371 sense_key = ABORTED_COMMAND;
372 asc = SCSI_ASC_WARNING;
373 ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
374 break;
375 case NVME_SC_INTERNAL:
376 status = SAM_STAT_CHECK_CONDITION;
377 sense_key = HARDWARE_ERROR;
378 asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
379 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
380 break;
381 case NVME_SC_ABORT_REQ:
382 status = SAM_STAT_TASK_ABORTED;
383 sense_key = ABORTED_COMMAND;
384 asc = SCSI_ASC_NO_SENSE;
385 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
386 break;
387 case NVME_SC_ABORT_QUEUE:
388 status = SAM_STAT_TASK_ABORTED;
389 sense_key = ABORTED_COMMAND;
390 asc = SCSI_ASC_NO_SENSE;
391 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
392 break;
393 case NVME_SC_FUSED_FAIL:
394 status = SAM_STAT_TASK_ABORTED;
395 sense_key = ABORTED_COMMAND;
396 asc = SCSI_ASC_NO_SENSE;
397 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
398 break;
399 case NVME_SC_FUSED_MISSING:
400 status = SAM_STAT_TASK_ABORTED;
401 sense_key = ABORTED_COMMAND;
402 asc = SCSI_ASC_NO_SENSE;
403 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
404 break;
405 case NVME_SC_INVALID_NS:
406 status = SAM_STAT_CHECK_CONDITION;
407 sense_key = ILLEGAL_REQUEST;
408 asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
409 ascq = SCSI_ASCQ_INVALID_LUN_ID;
410 break;
411 case NVME_SC_LBA_RANGE:
412 status = SAM_STAT_CHECK_CONDITION;
413 sense_key = ILLEGAL_REQUEST;
414 asc = SCSI_ASC_ILLEGAL_BLOCK;
415 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
416 break;
417 case NVME_SC_CAP_EXCEEDED:
418 status = SAM_STAT_CHECK_CONDITION;
419 sense_key = MEDIUM_ERROR;
420 asc = SCSI_ASC_NO_SENSE;
421 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
422 break;
423 case NVME_SC_NS_NOT_READY:
424 status = SAM_STAT_CHECK_CONDITION;
425 sense_key = NOT_READY;
426 asc = SCSI_ASC_LUN_NOT_READY;
427 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
428 break;
429
430 /* Command Specific Status */
431 case NVME_SC_INVALID_FORMAT:
432 status = SAM_STAT_CHECK_CONDITION;
433 sense_key = ILLEGAL_REQUEST;
434 asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
435 ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
436 break;
437 case NVME_SC_BAD_ATTRIBUTES:
438 status = SAM_STAT_CHECK_CONDITION;
439 sense_key = ILLEGAL_REQUEST;
440 asc = SCSI_ASC_INVALID_CDB;
441 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
442 break;
443
444 /* Media Errors */
445 case NVME_SC_WRITE_FAULT:
446 status = SAM_STAT_CHECK_CONDITION;
447 sense_key = MEDIUM_ERROR;
448 asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
449 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
450 break;
451 case NVME_SC_READ_ERROR:
452 status = SAM_STAT_CHECK_CONDITION;
453 sense_key = MEDIUM_ERROR;
454 asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
455 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
456 break;
457 case NVME_SC_GUARD_CHECK:
458 status = SAM_STAT_CHECK_CONDITION;
459 sense_key = MEDIUM_ERROR;
460 asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
461 ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
462 break;
463 case NVME_SC_APPTAG_CHECK:
464 status = SAM_STAT_CHECK_CONDITION;
465 sense_key = MEDIUM_ERROR;
466 asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
467 ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
468 break;
469 case NVME_SC_REFTAG_CHECK:
470 status = SAM_STAT_CHECK_CONDITION;
471 sense_key = MEDIUM_ERROR;
472 asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
473 ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
474 break;
475 case NVME_SC_COMPARE_FAILED:
476 status = SAM_STAT_CHECK_CONDITION;
477 sense_key = MISCOMPARE;
478 asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
479 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
480 break;
481 case NVME_SC_ACCESS_DENIED:
482 status = SAM_STAT_CHECK_CONDITION;
483 sense_key = ILLEGAL_REQUEST;
484 asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
485 ascq = SCSI_ASCQ_INVALID_LUN_ID;
486 break;
487
488 /* Unspecified/Default */
489 case NVME_SC_CMDID_CONFLICT:
490 case NVME_SC_CMD_SEQ_ERROR:
491 case NVME_SC_CQ_INVALID:
492 case NVME_SC_QID_INVALID:
493 case NVME_SC_QUEUE_SIZE:
494 case NVME_SC_ABORT_LIMIT:
495 case NVME_SC_ABORT_MISSING:
496 case NVME_SC_ASYNC_LIMIT:
497 case NVME_SC_FIRMWARE_SLOT:
498 case NVME_SC_FIRMWARE_IMAGE:
499 case NVME_SC_INVALID_VECTOR:
500 case NVME_SC_INVALID_LOG_PAGE:
501 default:
502 status = SAM_STAT_CHECK_CONDITION;
503 sense_key = ILLEGAL_REQUEST;
504 asc = SCSI_ASC_NO_SENSE;
505 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
506 break;
507 }
508
509 res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
510 return res ? res : nvme_sc;
511}
512
513/* INQUIRY Helper Functions */
514
515static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
516 struct sg_io_hdr *hdr, u8 *inq_response,
517 int alloc_len)
518{
519 struct nvme_ctrl *ctrl = ns->ctrl;
520 struct nvme_id_ns *id_ns;
521 int res;
522 int nvme_sc;
523 int xfer_len;
524 u8 resp_data_format = 0x02;
525 u8 protect;
526 u8 cmdque = 0x01 << 1;
527 u8 fw_offset = sizeof(ctrl->firmware_rev);
528
529 /* nvme ns identify - use DPS value for PROTECT field */
530 nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
531 res = nvme_trans_status_code(hdr, nvme_sc);
532 if (res)
533 return res;
534
535 if (id_ns->dps)
536 protect = 0x01;
537 else
538 protect = 0;
539 kfree(id_ns);
540
541 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
542 inq_response[2] = VERSION_SPC_4;
543 inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */
544 inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
545 inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
546 inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
547 strncpy(&inq_response[8], "NVMe ", 8);
548 strncpy(&inq_response[16], ctrl->model, 16);
549
550 while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
551 fw_offset--;
552 fw_offset -= 4;
553 strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
554
555 xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
556 return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
557}
558
559static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
560 struct sg_io_hdr *hdr, u8 *inq_response,
561 int alloc_len)
562{
563 int xfer_len;
564
565 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
566 inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */
567 inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */
568 inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
569 inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
570 inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
571 inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
572 inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
573 inq_response[9] = INQ_BDEV_LIMITS_PAGE;
574
575 xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
576 return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
577}
578
579static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
580 struct sg_io_hdr *hdr, u8 *inq_response,
581 int alloc_len)
582{
583 int xfer_len;
584
585 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
586 inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
587 inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
588 strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
589
590 xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
591 return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
592}
593
594static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
595 u8 *inq_response, int alloc_len)
596{
597 struct nvme_id_ns *id_ns;
598 int nvme_sc, res;
599 size_t len;
600 void *eui;
601
602 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
603 res = nvme_trans_status_code(hdr, nvme_sc);
604 if (res)
605 return res;
606
607 eui = id_ns->eui64;
608 len = sizeof(id_ns->eui64);
609
610 if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
611 if (bitmap_empty(eui, len * 8)) {
612 eui = id_ns->nguid;
613 len = sizeof(id_ns->nguid);
614 }
615 }
616
617 if (bitmap_empty(eui, len * 8)) {
618 res = -EOPNOTSUPP;
619 goto out_free_id;
620 }
621
622 memset(inq_response, 0, alloc_len);
623 inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
624 inq_response[3] = 4 + len; /* Page Length */
625
626 /* Designation Descriptor start */
627 inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
628 inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
629 inq_response[6] = 0x00; /* Rsvd */
630 inq_response[7] = len; /* Designator Length */
631 memcpy(&inq_response[8], eui, len);
632
633 res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
634out_free_id:
635 kfree(id_ns);
636 return res;
637}
638
639static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
640 struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
641{
642 struct nvme_ctrl *ctrl = ns->ctrl;
643 struct nvme_id_ctrl *id_ctrl;
644 int nvme_sc, res;
645
646 if (alloc_len < 72) {
647 return nvme_trans_completion(hdr,
648 SAM_STAT_CHECK_CONDITION,
649 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
650 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
651 }
652
653 nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
654 res = nvme_trans_status_code(hdr, nvme_sc);
655 if (res)
656 return res;
657
658 memset(inq_response, 0, alloc_len);
659 inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
660 inq_response[3] = 0x48; /* Page Length */
661
662 /* Designation Descriptor start */
663 inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
664 inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
665 inq_response[6] = 0x00; /* Rsvd */
666 inq_response[7] = 0x44; /* Designator Length */
667
668 sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
669 memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
670 sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
671 memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
672
673 res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
674 kfree(id_ctrl);
675 return res;
676}
677
678static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
679 u8 *resp, int alloc_len)
680{
681 int res;
682
683 if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
684 res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
685 if (res != -EOPNOTSUPP)
686 return res;
687 }
688
689 return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
690}
691
692static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
693 int alloc_len)
694{
695 u8 *inq_response;
696 int res;
697 int nvme_sc;
698 struct nvme_ctrl *ctrl = ns->ctrl;
699 struct nvme_id_ctrl *id_ctrl;
700 struct nvme_id_ns *id_ns;
701 int xfer_len;
702 u8 microcode = 0x80;
703 u8 spt;
704 u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
705 u8 grd_chk, app_chk, ref_chk, protect;
706 u8 uask_sup = 0x20;
707 u8 v_sup;
708 u8 luiclr = 0x01;
709
710 inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
711 if (inq_response == NULL)
712 return -ENOMEM;
713
714 nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
715 res = nvme_trans_status_code(hdr, nvme_sc);
716 if (res)
717 goto out_free_inq;
718
719 spt = spt_lut[id_ns->dpc & 0x07] << 3;
720 if (id_ns->dps)
721 protect = 0x01;
722 else
723 protect = 0;
724 kfree(id_ns);
725
726 grd_chk = protect << 2;
727 app_chk = protect << 1;
728 ref_chk = protect;
729
730 nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
731 res = nvme_trans_status_code(hdr, nvme_sc);
732 if (res)
733 goto out_free_inq;
734
735 v_sup = id_ctrl->vwc;
736 kfree(id_ctrl);
737
738 memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
739 inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
740 inq_response[2] = 0x00; /* Page Length MSB */
741 inq_response[3] = 0x3C; /* Page Length LSB */
742 inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
743 inq_response[5] = uask_sup;
744 inq_response[6] = v_sup;
745 inq_response[7] = luiclr;
746 inq_response[8] = 0;
747 inq_response[9] = 0;
748
749 xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
750 res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
751
752 out_free_inq:
753 kfree(inq_response);
754 return res;
755}
756
757static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
758 u8 *inq_response, int alloc_len)
759{
760 __be32 max_sectors = cpu_to_be32(
761 nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
762 __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
763 __be32 discard_desc_count = cpu_to_be32(0x100);
764
765 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
766 inq_response[1] = VPD_BLOCK_LIMITS;
767 inq_response[3] = 0x3c; /* Page Length */
768 memcpy(&inq_response[8], &max_sectors, sizeof(u32));
769 memcpy(&inq_response[20], &max_discard, sizeof(u32));
770
771 if (max_discard)
772 memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
773
774 return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
775}
776
777static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
778 int alloc_len)
779{
780 u8 *inq_response;
781 int res;
782 int xfer_len;
783
784 inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
785 if (inq_response == NULL) {
786 res = -ENOMEM;
787 goto out_mem;
788 }
789
790 inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
791 inq_response[2] = 0x00; /* Page Length MSB */
792 inq_response[3] = 0x3C; /* Page Length LSB */
793 inq_response[4] = 0x00; /* Medium Rotation Rate MSB */
794 inq_response[5] = 0x01; /* Medium Rotation Rate LSB */
795 inq_response[6] = 0x00; /* Form Factor */
796
797 xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
798 res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
799
800 kfree(inq_response);
801 out_mem:
802 return res;
803}
804
805/* LOG SENSE Helper Functions */
806
807static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
808 int alloc_len)
809{
810 int res;
811 int xfer_len;
812 u8 *log_response;
813
814 log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
815 if (log_response == NULL) {
816 res = -ENOMEM;
817 goto out_mem;
818 }
819
820 log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
821 /* Subpage=0x00, Page Length MSB=0 */
822 log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
823 log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
824 log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
825 log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
826
827 xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
828 res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
829
830 kfree(log_response);
831 out_mem:
832 return res;
833}
834
835static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
836 struct sg_io_hdr *hdr, int alloc_len)
837{
838 int res;
839 int xfer_len;
840 u8 *log_response;
841 struct nvme_smart_log *smart_log;
842 u8 temp_c;
843 u16 temp_k;
844
845 log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
846 if (log_response == NULL)
847 return -ENOMEM;
848
849 res = nvme_get_log_page(ns->ctrl, &smart_log);
850 if (res < 0)
851 goto out_free_response;
852
853 if (res != NVME_SC_SUCCESS) {
854 temp_c = LOG_TEMP_UNKNOWN;
855 } else {
856 temp_k = (smart_log->temperature[1] << 8) +
857 (smart_log->temperature[0]);
858 temp_c = temp_k - KELVIN_TEMP_FACTOR;
859 }
860 kfree(smart_log);
861
862 log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
863 /* Subpage=0x00, Page Length MSB=0 */
864 log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
865 /* Informational Exceptions Log Parameter 1 Start */
866 /* Parameter Code=0x0000 bytes 4,5 */
867 log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
868 log_response[7] = 0x04; /* PARAMETER LENGTH */
869 /* Add sense Code and qualifier = 0x00 each */
870 /* Use Temperature from NVMe Get Log Page, convert to C from K */
871 log_response[10] = temp_c;
872
873 xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
874 res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
875
876 out_free_response:
877 kfree(log_response);
878 return res;
879}
880
881static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
882 int alloc_len)
883{
884 int res;
885 int xfer_len;
886 u8 *log_response;
887 struct nvme_smart_log *smart_log;
888 u32 feature_resp;
889 u8 temp_c_cur, temp_c_thresh;
890 u16 temp_k;
891
892 log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
893 if (log_response == NULL)
894 return -ENOMEM;
895
896 res = nvme_get_log_page(ns->ctrl, &smart_log);
897 if (res < 0)
898 goto out_free_response;
899
900 if (res != NVME_SC_SUCCESS) {
901 temp_c_cur = LOG_TEMP_UNKNOWN;
902 } else {
903 temp_k = (smart_log->temperature[1] << 8) +
904 (smart_log->temperature[0]);
905 temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
906 }
907 kfree(smart_log);
908
909 /* Get Features for Temp Threshold */
910 res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
911 &feature_resp);
912 if (res != NVME_SC_SUCCESS)
913 temp_c_thresh = LOG_TEMP_UNKNOWN;
914 else
915 temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
916
917 log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
918 /* Subpage=0x00, Page Length MSB=0 */
919 log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
920 /* Temperature Log Parameter 1 (Temperature) Start */
921 /* Parameter Code = 0x0000 */
922 log_response[6] = 0x01; /* Format and Linking = 01b */
923 log_response[7] = 0x02; /* Parameter Length */
924 /* Use Temperature from NVMe Get Log Page, convert to C from K */
925 log_response[9] = temp_c_cur;
926 /* Temperature Log Parameter 2 (Reference Temperature) Start */
927 log_response[11] = 0x01; /* Parameter Code = 0x0001 */
928 log_response[12] = 0x01; /* Format and Linking = 01b */
929 log_response[13] = 0x02; /* Parameter Length */
930 /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
931 log_response[15] = temp_c_thresh;
932
933 xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
934 res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
935
936 out_free_response:
937 kfree(log_response);
938 return res;
939}
940
941/* MODE SENSE Helper Functions */
942
943static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
944 u16 mode_data_length, u16 blk_desc_len)
945{
946 /* Quick check to make sure I don't stomp on my own memory... */
947 if ((cdb10 && len < 8) || (!cdb10 && len < 4))
948 return -EINVAL;
949
950 if (cdb10) {
951 resp[0] = (mode_data_length & 0xFF00) >> 8;
952 resp[1] = (mode_data_length & 0x00FF);
953 resp[3] = 0x10 /* DPOFUA */;
954 resp[4] = llbaa;
955 resp[5] = RESERVED_FIELD;
956 resp[6] = (blk_desc_len & 0xFF00) >> 8;
957 resp[7] = (blk_desc_len & 0x00FF);
958 } else {
959 resp[0] = (mode_data_length & 0x00FF);
960 resp[2] = 0x10 /* DPOFUA */;
961 resp[3] = (blk_desc_len & 0x00FF);
962 }
963
964 return 0;
965}
966
967static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
968 u8 *resp, int len, u8 llbaa)
969{
970 int res;
971 int nvme_sc;
972 struct nvme_id_ns *id_ns;
973 u8 flbas;
974 u32 lba_length;
975
976 if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
977 return -EINVAL;
978 else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
979 return -EINVAL;
980
981 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
982 res = nvme_trans_status_code(hdr, nvme_sc);
983 if (res)
984 return res;
985
986 flbas = (id_ns->flbas) & 0x0F;
987 lba_length = (1 << (id_ns->lbaf[flbas].ds));
988
989 if (llbaa == 0) {
990 __be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
991 /* Byte 4 is reserved */
992 __be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
993
994 memcpy(resp, &tmp_cap, sizeof(u32));
995 memcpy(&resp[4], &tmp_len, sizeof(u32));
996 } else {
997 __be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
998 __be32 tmp_len = cpu_to_be32(lba_length);
999
1000 memcpy(resp, &tmp_cap, sizeof(u64));
1001 /* Bytes 8, 9, 10, 11 are reserved */
1002 memcpy(&resp[12], &tmp_len, sizeof(u32));
1003 }
1004
1005 kfree(id_ns);
1006 return res;
1007}
1008
1009static int nvme_trans_fill_control_page(struct nvme_ns *ns,
1010 struct sg_io_hdr *hdr, u8 *resp,
1011 int len)
1012{
1013 if (len < MODE_PAGE_CONTROL_LEN)
1014 return -EINVAL;
1015
1016 resp[0] = MODE_PAGE_CONTROL;
1017 resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
1018 resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1,
1019 * D_SENSE=1, GLTSD=1, RLEC=0 */
1020 resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
1021 /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */
1022 resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
1023 /* resp[6] and [7] are obsolete, thus zero */
1024 resp[8] = 0xFF; /* Busy timeout period = 0xffff */
1025 resp[9] = 0xFF;
1026 /* Bytes 10,11: Extended selftest completion time = 0x0000 */
1027
1028 return 0;
1029}
1030
1031static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
1032 struct sg_io_hdr *hdr,
1033 u8 *resp, int len)
1034{
1035 int res = 0;
1036 int nvme_sc;
1037 u32 feature_resp;
1038 u8 vwc;
1039
1040 if (len < MODE_PAGE_CACHING_LEN)
1041 return -EINVAL;
1042
1043 nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
1044 &feature_resp);
1045 res = nvme_trans_status_code(hdr, nvme_sc);
1046 if (res)
1047 return res;
1048
1049 vwc = feature_resp & 0x00000001;
1050
1051 resp[0] = MODE_PAGE_CACHING;
1052 resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
1053 resp[2] = vwc << 2;
1054 return 0;
1055}
1056
1057static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
1058 struct sg_io_hdr *hdr, u8 *resp,
1059 int len)
1060{
1061 if (len < MODE_PAGE_POW_CND_LEN)
1062 return -EINVAL;
1063
1064 resp[0] = MODE_PAGE_POWER_CONDITION;
1065 resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
1066 /* All other bytes are zero */
1067
1068 return 0;
1069}
1070
1071static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
1072 struct sg_io_hdr *hdr, u8 *resp,
1073 int len)
1074{
1075 if (len < MODE_PAGE_INF_EXC_LEN)
1076 return -EINVAL;
1077
1078 resp[0] = MODE_PAGE_INFO_EXCEP;
1079 resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
1080 resp[2] = 0x88;
1081 /* All other bytes are zero */
1082
1083 return 0;
1084}
1085
1086static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1087 u8 *resp, int len)
1088{
1089 int res;
1090 u16 mode_pages_offset_1 = 0;
1091 u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
1092
1093 mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
1094 mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
1095 mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
1096
1097 res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
1098 MODE_PAGE_CACHING_LEN);
1099 if (res)
1100 return res;
1101 res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
1102 MODE_PAGE_CONTROL_LEN);
1103 if (res)
1104 return res;
1105 res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
1106 MODE_PAGE_POW_CND_LEN);
1107 if (res)
1108 return res;
1109 return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
1110 MODE_PAGE_INF_EXC_LEN);
1111}
1112
1113static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
1114{
1115 if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
1116 /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
1117 return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
1118 } else {
1119 return 0;
1120 }
1121}
1122
1123static int nvme_trans_mode_page_create(struct nvme_ns *ns,
1124 struct sg_io_hdr *hdr, u8 *cmd,
1125 u16 alloc_len, u8 cdb10,
1126 int (*mode_page_fill_func)
1127 (struct nvme_ns *,
1128 struct sg_io_hdr *hdr, u8 *, int),
1129 u16 mode_pages_tot_len)
1130{
1131 int res;
1132 int xfer_len;
1133 u8 *response;
1134 u8 dbd, llbaa;
1135 u16 resp_size;
1136 int mph_size;
1137 u16 mode_pages_offset_1;
1138 u16 blk_desc_len, blk_desc_offset, mode_data_length;
1139
1140 dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
1141 llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
1142 mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
1143
1144 blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
1145
1146 resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
1147 /* Refer spc4r34 Table 440 for calculation of Mode data Length field */
1148 mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
1149
1150 blk_desc_offset = mph_size;
1151 mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
1152
1153 response = kzalloc(resp_size, GFP_KERNEL);
1154 if (response == NULL) {
1155 res = -ENOMEM;
1156 goto out_mem;
1157 }
1158
1159 res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
1160 llbaa, mode_data_length, blk_desc_len);
1161 if (res)
1162 goto out_free;
1163 if (blk_desc_len > 0) {
1164 res = nvme_trans_fill_blk_desc(ns, hdr,
1165 &response[blk_desc_offset],
1166 blk_desc_len, llbaa);
1167 if (res)
1168 goto out_free;
1169 }
1170 res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
1171 mode_pages_tot_len);
1172 if (res)
1173 goto out_free;
1174
1175 xfer_len = min(alloc_len, resp_size);
1176 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
1177
1178 out_free:
1179 kfree(response);
1180 out_mem:
1181 return res;
1182}
1183
1184/* Read Capacity Helper Functions */
1185
1186static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
1187 u8 cdb16)
1188{
1189 u8 flbas;
1190 u32 lba_length;
1191 u64 rlba;
1192 u8 prot_en;
1193 u8 p_type_lut[4] = {0, 0, 1, 2};
1194 __be64 tmp_rlba;
1195 __be32 tmp_rlba_32;
1196 __be32 tmp_len;
1197
1198 flbas = (id_ns->flbas) & 0x0F;
1199 lba_length = (1 << (id_ns->lbaf[flbas].ds));
1200 rlba = le64_to_cpup(&id_ns->nsze) - 1;
1201 (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
1202
1203 if (!cdb16) {
1204 if (rlba > 0xFFFFFFFF)
1205 rlba = 0xFFFFFFFF;
1206 tmp_rlba_32 = cpu_to_be32(rlba);
1207 tmp_len = cpu_to_be32(lba_length);
1208 memcpy(response, &tmp_rlba_32, sizeof(u32));
1209 memcpy(&response[4], &tmp_len, sizeof(u32));
1210 } else {
1211 tmp_rlba = cpu_to_be64(rlba);
1212 tmp_len = cpu_to_be32(lba_length);
1213 memcpy(response, &tmp_rlba, sizeof(u64));
1214 memcpy(&response[8], &tmp_len, sizeof(u32));
1215 response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
1216 /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
1217 /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
1218 /* Bytes 16-31 - Reserved */
1219 }
1220}
1221
1222/* Start Stop Unit Helper Functions */
1223
1224static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1225 u8 buffer_id)
1226{
1227 struct nvme_command c;
1228 int nvme_sc;
1229
1230 memset(&c, 0, sizeof(c));
1231 c.common.opcode = nvme_admin_activate_fw;
1232 c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
1233
1234 nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
1235 return nvme_trans_status_code(hdr, nvme_sc);
1236}
1237
1238static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1239 u8 opcode, u32 tot_len, u32 offset,
1240 u8 buffer_id)
1241{
1242 int nvme_sc;
1243 struct nvme_command c;
1244
1245 if (hdr->iovec_count > 0) {
1246 /* Assuming SGL is not allowed for this command */
1247 return nvme_trans_completion(hdr,
1248 SAM_STAT_CHECK_CONDITION,
1249 ILLEGAL_REQUEST,
1250 SCSI_ASC_INVALID_CDB,
1251 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1252 }
1253
1254 memset(&c, 0, sizeof(c));
1255 c.common.opcode = nvme_admin_download_fw;
1256 c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
1257 c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
1258
1259 nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
1260 hdr->dxferp, tot_len, NULL, 0);
1261 return nvme_trans_status_code(hdr, nvme_sc);
1262}
1263
1264/* Mode Select Helper Functions */
1265
1266static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
1267 u16 *bd_len, u8 *llbaa)
1268{
1269 if (cdb10) {
1270 /* 10 Byte CDB */
1271 *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
1272 parm_list[MODE_SELECT_10_BD_OFFSET + 1];
1273 *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
1274 MODE_SELECT_10_LLBAA_MASK;
1275 } else {
1276 /* 6 Byte CDB */
1277 *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
1278 }
1279}
1280
1281static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
1282 u16 idx, u16 bd_len, u8 llbaa)
1283{
1284 /* Store block descriptor info if a FORMAT UNIT comes later */
1285 /* TODO Saving 1st BD info; what to do if multiple BD received? */
1286 if (llbaa == 0) {
1287 /* Standard Block Descriptor - spc4r34 7.5.5.1 */
1288 ns->mode_select_num_blocks =
1289 (parm_list[idx + 1] << 16) +
1290 (parm_list[idx + 2] << 8) +
1291 (parm_list[idx + 3]);
1292
1293 ns->mode_select_block_len =
1294 (parm_list[idx + 5] << 16) +
1295 (parm_list[idx + 6] << 8) +
1296 (parm_list[idx + 7]);
1297 } else {
1298 /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
1299 ns->mode_select_num_blocks =
1300 (((u64)parm_list[idx + 0]) << 56) +
1301 (((u64)parm_list[idx + 1]) << 48) +
1302 (((u64)parm_list[idx + 2]) << 40) +
1303 (((u64)parm_list[idx + 3]) << 32) +
1304 (((u64)parm_list[idx + 4]) << 24) +
1305 (((u64)parm_list[idx + 5]) << 16) +
1306 (((u64)parm_list[idx + 6]) << 8) +
1307 ((u64)parm_list[idx + 7]);
1308
1309 ns->mode_select_block_len =
1310 (parm_list[idx + 12] << 24) +
1311 (parm_list[idx + 13] << 16) +
1312 (parm_list[idx + 14] << 8) +
1313 (parm_list[idx + 15]);
1314 }
1315}
1316
1317static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1318 u8 *mode_page, u8 page_code)
1319{
1320 int res = 0;
1321 int nvme_sc;
1322 unsigned dword11;
1323
1324 switch (page_code) {
1325 case MODE_PAGE_CACHING:
1326 dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
1327 nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
1328 dword11, NULL, 0, NULL);
1329 res = nvme_trans_status_code(hdr, nvme_sc);
1330 break;
1331 case MODE_PAGE_CONTROL:
1332 break;
1333 case MODE_PAGE_POWER_CONDITION:
1334 /* Verify the OS is not trying to set timers */
1335 if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
1336 res = nvme_trans_completion(hdr,
1337 SAM_STAT_CHECK_CONDITION,
1338 ILLEGAL_REQUEST,
1339 SCSI_ASC_INVALID_PARAMETER,
1340 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1341 break;
1342 }
1343 break;
1344 default:
1345 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1346 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1347 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1348 break;
1349 }
1350
1351 return res;
1352}
1353
1354static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1355 u8 *cmd, u16 parm_list_len, u8 pf,
1356 u8 sp, u8 cdb10)
1357{
1358 int res;
1359 u8 *parm_list;
1360 u16 bd_len;
1361 u8 llbaa = 0;
1362 u16 index, saved_index;
1363 u8 page_code;
1364 u16 mp_size;
1365
1366 /* Get parm list from data-in/out buffer */
1367 parm_list = kmalloc(parm_list_len, GFP_KERNEL);
1368 if (parm_list == NULL) {
1369 res = -ENOMEM;
1370 goto out;
1371 }
1372
1373 res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
1374 if (res)
1375 goto out_mem;
1376
1377 nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
1378 index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
1379
1380 if (bd_len != 0) {
1381 /* Block Descriptors present, parse */
1382 nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
1383 index += bd_len;
1384 }
1385 saved_index = index;
1386
1387 /* Multiple mode pages may be present; iterate through all */
1388 /* In 1st Iteration, don't do NVME Command, only check for CDB errors */
1389 do {
1390 page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
1391 mp_size = parm_list[index + 1] + 2;
1392 if ((page_code != MODE_PAGE_CACHING) &&
1393 (page_code != MODE_PAGE_CONTROL) &&
1394 (page_code != MODE_PAGE_POWER_CONDITION)) {
1395 res = nvme_trans_completion(hdr,
1396 SAM_STAT_CHECK_CONDITION,
1397 ILLEGAL_REQUEST,
1398 SCSI_ASC_INVALID_CDB,
1399 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1400 goto out_mem;
1401 }
1402 index += mp_size;
1403 } while (index < parm_list_len);
1404
1405 /* In 2nd Iteration, do the NVME Commands */
1406 index = saved_index;
1407 do {
1408 page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
1409 mp_size = parm_list[index + 1] + 2;
1410 res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
1411 page_code);
1412 if (res)
1413 break;
1414 index += mp_size;
1415 } while (index < parm_list_len);
1416
1417 out_mem:
1418 kfree(parm_list);
1419 out:
1420 return res;
1421}
1422
1423/* Format Unit Helper Functions */
1424
1425static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
1426 struct sg_io_hdr *hdr)
1427{
1428 int res = 0;
1429 int nvme_sc;
1430 u8 flbas;
1431
1432 /*
1433 * SCSI Expects a MODE SELECT would have been issued prior to
1434 * a FORMAT UNIT, and the block size and number would be used
1435 * from the block descriptor in it. If a MODE SELECT had not
1436 * been issued, FORMAT shall use the current values for both.
1437 */
1438
1439 if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
1440 struct nvme_id_ns *id_ns;
1441
1442 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
1443 res = nvme_trans_status_code(hdr, nvme_sc);
1444 if (res)
1445 return res;
1446
1447 if (ns->mode_select_num_blocks == 0)
1448 ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
1449 if (ns->mode_select_block_len == 0) {
1450 flbas = (id_ns->flbas) & 0x0F;
1451 ns->mode_select_block_len =
1452 (1 << (id_ns->lbaf[flbas].ds));
1453 }
1454
1455 kfree(id_ns);
1456 }
1457
1458 return 0;
1459}
1460
1461static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
1462 u8 format_prot_info, u8 *nvme_pf_code)
1463{
1464 int res;
1465 u8 *parm_list;
1466 u8 pf_usage, pf_code;
1467
1468 parm_list = kmalloc(len, GFP_KERNEL);
1469 if (parm_list == NULL) {
1470 res = -ENOMEM;
1471 goto out;
1472 }
1473 res = nvme_trans_copy_from_user(hdr, parm_list, len);
1474 if (res)
1475 goto out_mem;
1476
1477 if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
1478 FORMAT_UNIT_IMMED_MASK) != 0) {
1479 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1480 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1481 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1482 goto out_mem;
1483 }
1484
1485 if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
1486 (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
1487 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1488 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1489 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1490 goto out_mem;
1491 }
1492 pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
1493 FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
1494 pf_code = (pf_usage << 2) | format_prot_info;
1495 switch (pf_code) {
1496 case 0:
1497 *nvme_pf_code = 0;
1498 break;
1499 case 2:
1500 *nvme_pf_code = 1;
1501 break;
1502 case 3:
1503 *nvme_pf_code = 2;
1504 break;
1505 case 7:
1506 *nvme_pf_code = 3;
1507 break;
1508 default:
1509 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1510 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1511 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1512 break;
1513 }
1514
1515 out_mem:
1516 kfree(parm_list);
1517 out:
1518 return res;
1519}
1520
1521static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1522 u8 prot_info)
1523{
1524 int res;
1525 int nvme_sc;
1526 struct nvme_id_ns *id_ns;
1527 u8 i;
1528 u8 nlbaf;
1529 u8 selected_lbaf = 0xFF;
1530 u32 cdw10 = 0;
1531 struct nvme_command c;
1532
1533 /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
1534 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
1535 res = nvme_trans_status_code(hdr, nvme_sc);
1536 if (res)
1537 return res;
1538
1539 nlbaf = id_ns->nlbaf;
1540
1541 for (i = 0; i < nlbaf; i++) {
1542 if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
1543 selected_lbaf = i;
1544 break;
1545 }
1546 }
1547 if (selected_lbaf > 0x0F) {
1548 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1549 ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
1550 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1551 }
1552 if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
1553 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1554 ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
1555 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1556 }
1557
1558 cdw10 |= prot_info << 5;
1559 cdw10 |= selected_lbaf & 0x0F;
1560 memset(&c, 0, sizeof(c));
1561 c.format.opcode = nvme_admin_format_nvm;
1562 c.format.nsid = cpu_to_le32(ns->ns_id);
1563 c.format.cdw10 = cpu_to_le32(cdw10);
1564
1565 nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
1566 res = nvme_trans_status_code(hdr, nvme_sc);
1567
1568 kfree(id_ns);
1569 return res;
1570}
1571
1572static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
1573 struct nvme_trans_io_cdb *cdb_info,
1574 u32 max_blocks)
1575{
1576 /* If using iovecs, send one nvme command per vector */
1577 if (hdr->iovec_count > 0)
1578 return hdr->iovec_count;
1579 else if (cdb_info->xfer_len > max_blocks)
1580 return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
1581 else
1582 return 1;
1583}
1584
1585static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
1586 struct nvme_trans_io_cdb *cdb_info)
1587{
1588 u16 control = 0;
1589
1590 /* When Protection information support is added, implement here */
1591
1592 if (cdb_info->fua > 0)
1593 control |= NVME_RW_FUA;
1594
1595 return control;
1596}
1597
1598static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1599 struct nvme_trans_io_cdb *cdb_info, u8 is_write)
1600{
1601 int nvme_sc = NVME_SC_SUCCESS;
1602 u32 num_cmds;
1603 u64 unit_len;
1604 u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
1605 u32 retcode;
1606 u32 i = 0;
1607 u64 nvme_offset = 0;
1608 void __user *next_mapping_addr;
1609 struct nvme_command c;
1610 u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
1611 u16 control;
1612 u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
1613
1614 num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
1615
1616 /*
1617 * This loop handles two cases.
1618 * First, when an SGL is used in the form of an iovec list:
1619 * - Use iov_base as the next mapping address for the nvme command_id
1620 * - Use iov_len as the data transfer length for the command.
1621 * Second, when we have a single buffer
1622 * - If larger than max_blocks, split into chunks, offset
1623 * each nvme command accordingly.
1624 */
1625 for (i = 0; i < num_cmds; i++) {
1626 memset(&c, 0, sizeof(c));
1627 if (hdr->iovec_count > 0) {
1628 struct sg_iovec sgl;
1629
1630 retcode = copy_from_user(&sgl, hdr->dxferp +
1631 i * sizeof(struct sg_iovec),
1632 sizeof(struct sg_iovec));
1633 if (retcode)
1634 return -EFAULT;
1635 unit_len = sgl.iov_len;
1636 unit_num_blocks = unit_len >> ns->lba_shift;
1637 next_mapping_addr = sgl.iov_base;
1638 } else {
1639 unit_num_blocks = min((u64)max_blocks,
1640 (cdb_info->xfer_len - nvme_offset));
1641 unit_len = unit_num_blocks << ns->lba_shift;
1642 next_mapping_addr = hdr->dxferp +
1643 ((1 << ns->lba_shift) * nvme_offset);
1644 }
1645
1646 c.rw.opcode = opcode;
1647 c.rw.nsid = cpu_to_le32(ns->ns_id);
1648 c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
1649 c.rw.length = cpu_to_le16(unit_num_blocks - 1);
1650 control = nvme_trans_io_get_control(ns, cdb_info);
1651 c.rw.control = cpu_to_le16(control);
1652
1653 if (get_capacity(ns->disk) - unit_num_blocks <
1654 cdb_info->lba + nvme_offset) {
1655 nvme_sc = NVME_SC_LBA_RANGE;
1656 break;
1657 }
1658 nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
1659 next_mapping_addr, unit_len, NULL, 0);
1660 if (nvme_sc)
1661 break;
1662
1663 nvme_offset += unit_num_blocks;
1664 }
1665
1666 return nvme_trans_status_code(hdr, nvme_sc);
1667}
1668
1669
1670/* SCSI Command Translation Functions */
1671
1672static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
1673 u8 *cmd)
1674{
1675 int res = 0;
1676 struct nvme_trans_io_cdb cdb_info = { 0, };
1677 u8 opcode = cmd[0];
1678 u64 xfer_bytes;
1679 u64 sum_iov_len = 0;
1680 struct sg_iovec sgl;
1681 int i;
1682 size_t not_copied;
1683
1684 /*
1685 * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
1686 * but always in the same place for all others.
1687 */
1688 switch (opcode) {
1689 case WRITE_6:
1690 case READ_6:
1691 break;
1692 default:
1693 cdb_info.fua = cmd[1] & 0x8;
1694 cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
1695 if (cdb_info.prot_info && !ns->pi_type) {
1696 return nvme_trans_completion(hdr,
1697 SAM_STAT_CHECK_CONDITION,
1698 ILLEGAL_REQUEST,
1699 SCSI_ASC_INVALID_CDB,
1700 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1701 }
1702 }
1703
1704 switch (opcode) {
1705 case WRITE_6:
1706 case READ_6:
1707 cdb_info.lba = get_unaligned_be24(&cmd[1]);
1708 cdb_info.xfer_len = cmd[4];
1709 if (cdb_info.xfer_len == 0)
1710 cdb_info.xfer_len = 256;
1711 break;
1712 case WRITE_10:
1713 case READ_10:
1714 cdb_info.lba = get_unaligned_be32(&cmd[2]);
1715 cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
1716 break;
1717 case WRITE_12:
1718 case READ_12:
1719 cdb_info.lba = get_unaligned_be32(&cmd[2]);
1720 cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
1721 break;
1722 case WRITE_16:
1723 case READ_16:
1724 cdb_info.lba = get_unaligned_be64(&cmd[2]);
1725 cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
1726 break;
1727 default:
1728 /* Will never really reach here */
1729 res = -EIO;
1730 goto out;
1731 }
1732
1733 /* Calculate total length of transfer (in bytes) */
1734 if (hdr->iovec_count > 0) {
1735 for (i = 0; i < hdr->iovec_count; i++) {
1736 not_copied = copy_from_user(&sgl, hdr->dxferp +
1737 i * sizeof(struct sg_iovec),
1738 sizeof(struct sg_iovec));
1739 if (not_copied)
1740 return -EFAULT;
1741 sum_iov_len += sgl.iov_len;
1742 /* IO vector sizes should be multiples of block size */
1743 if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
1744 res = nvme_trans_completion(hdr,
1745 SAM_STAT_CHECK_CONDITION,
1746 ILLEGAL_REQUEST,
1747 SCSI_ASC_INVALID_PARAMETER,
1748 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1749 goto out;
1750 }
1751 }
1752 } else {
1753 sum_iov_len = hdr->dxfer_len;
1754 }
1755
1756 /* As Per sg ioctl howto, if the lengths differ, use the lower one */
1757 xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
1758
1759 /* If block count and actual data buffer size dont match, error out */
1760 if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
1761 res = -EINVAL;
1762 goto out;
1763 }
1764
1765 /* Check for 0 length transfer - it is not illegal */
1766 if (cdb_info.xfer_len == 0)
1767 goto out;
1768
1769 /* Send NVMe IO Command(s) */
1770 res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
1771 if (res)
1772 goto out;
1773
1774 out:
1775 return res;
1776}
1777
1778static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1779 u8 *cmd)
1780{
1781 int res = 0;
1782 u8 evpd;
1783 u8 page_code;
1784 int alloc_len;
1785 u8 *inq_response;
1786
1787 evpd = cmd[1] & 0x01;
1788 page_code = cmd[2];
1789 alloc_len = get_unaligned_be16(&cmd[3]);
1790
1791 inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
1792 GFP_KERNEL);
1793 if (inq_response == NULL) {
1794 res = -ENOMEM;
1795 goto out_mem;
1796 }
1797
1798 if (evpd == 0) {
1799 if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
1800 res = nvme_trans_standard_inquiry_page(ns, hdr,
1801 inq_response, alloc_len);
1802 } else {
1803 res = nvme_trans_completion(hdr,
1804 SAM_STAT_CHECK_CONDITION,
1805 ILLEGAL_REQUEST,
1806 SCSI_ASC_INVALID_CDB,
1807 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1808 }
1809 } else {
1810 switch (page_code) {
1811 case VPD_SUPPORTED_PAGES:
1812 res = nvme_trans_supported_vpd_pages(ns, hdr,
1813 inq_response, alloc_len);
1814 break;
1815 case VPD_SERIAL_NUMBER:
1816 res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
1817 alloc_len);
1818 break;
1819 case VPD_DEVICE_IDENTIFIERS:
1820 res = nvme_trans_device_id_page(ns, hdr, inq_response,
1821 alloc_len);
1822 break;
1823 case VPD_EXTENDED_INQUIRY:
1824 res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
1825 break;
1826 case VPD_BLOCK_LIMITS:
1827 res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
1828 alloc_len);
1829 break;
1830 case VPD_BLOCK_DEV_CHARACTERISTICS:
1831 res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
1832 break;
1833 default:
1834 res = nvme_trans_completion(hdr,
1835 SAM_STAT_CHECK_CONDITION,
1836 ILLEGAL_REQUEST,
1837 SCSI_ASC_INVALID_CDB,
1838 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1839 break;
1840 }
1841 }
1842 kfree(inq_response);
1843 out_mem:
1844 return res;
1845}
1846
1847static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1848 u8 *cmd)
1849{
1850 int res;
1851 u16 alloc_len;
1852 u8 pc;
1853 u8 page_code;
1854
1855 if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
1856 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1857 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1858 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1859 goto out;
1860 }
1861
1862 page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
1863 pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
1864 if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
1865 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1866 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1867 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1868 goto out;
1869 }
1870 alloc_len = get_unaligned_be16(&cmd[7]);
1871 switch (page_code) {
1872 case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
1873 res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
1874 break;
1875 case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
1876 res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
1877 break;
1878 case LOG_PAGE_TEMPERATURE_PAGE:
1879 res = nvme_trans_log_temperature(ns, hdr, alloc_len);
1880 break;
1881 default:
1882 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1883 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1884 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1885 break;
1886 }
1887
1888 out:
1889 return res;
1890}
1891
1892static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1893 u8 *cmd)
1894{
1895 u8 cdb10 = 0;
1896 u16 parm_list_len;
1897 u8 page_format;
1898 u8 save_pages;
1899
1900 page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
1901 save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
1902
1903 if (cmd[0] == MODE_SELECT) {
1904 parm_list_len = cmd[4];
1905 } else {
1906 parm_list_len = cmd[7];
1907 cdb10 = 1;
1908 }
1909
1910 if (parm_list_len != 0) {
1911 /*
1912 * According to SPC-4 r24, a paramter list length field of 0
1913 * shall not be considered an error
1914 */
1915 return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
1916 page_format, save_pages, cdb10);
1917 }
1918
1919 return 0;
1920}
1921
1922static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1923 u8 *cmd)
1924{
1925 int res = 0;
1926 u16 alloc_len;
1927 u8 cdb10 = 0;
1928
1929 if (cmd[0] == MODE_SENSE) {
1930 alloc_len = cmd[4];
1931 } else {
1932 alloc_len = get_unaligned_be16(&cmd[7]);
1933 cdb10 = 1;
1934 }
1935
1936 if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
1937 MODE_SENSE_PC_CURRENT_VALUES) {
1938 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1939 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1940 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1941 goto out;
1942 }
1943
1944 switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
1945 case MODE_PAGE_CACHING:
1946 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1947 cdb10,
1948 &nvme_trans_fill_caching_page,
1949 MODE_PAGE_CACHING_LEN);
1950 break;
1951 case MODE_PAGE_CONTROL:
1952 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1953 cdb10,
1954 &nvme_trans_fill_control_page,
1955 MODE_PAGE_CONTROL_LEN);
1956 break;
1957 case MODE_PAGE_POWER_CONDITION:
1958 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1959 cdb10,
1960 &nvme_trans_fill_pow_cnd_page,
1961 MODE_PAGE_POW_CND_LEN);
1962 break;
1963 case MODE_PAGE_INFO_EXCEP:
1964 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1965 cdb10,
1966 &nvme_trans_fill_inf_exc_page,
1967 MODE_PAGE_INF_EXC_LEN);
1968 break;
1969 case MODE_PAGE_RETURN_ALL:
1970 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1971 cdb10,
1972 &nvme_trans_fill_all_pages,
1973 MODE_PAGE_ALL_LEN);
1974 break;
1975 default:
1976 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1977 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1978 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1979 break;
1980 }
1981
1982 out:
1983 return res;
1984}
1985
1986static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1987 u8 *cmd, u8 cdb16)
1988{
1989 int res;
1990 int nvme_sc;
1991 u32 alloc_len;
1992 u32 resp_size;
1993 u32 xfer_len;
1994 struct nvme_id_ns *id_ns;
1995 u8 *response;
1996
1997 if (cdb16) {
1998 alloc_len = get_unaligned_be32(&cmd[10]);
1999 resp_size = READ_CAP_16_RESP_SIZE;
2000 } else {
2001 alloc_len = READ_CAP_10_RESP_SIZE;
2002 resp_size = READ_CAP_10_RESP_SIZE;
2003 }
2004
2005 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
2006 res = nvme_trans_status_code(hdr, nvme_sc);
2007 if (res)
2008 return res;
2009
2010 response = kzalloc(resp_size, GFP_KERNEL);
2011 if (response == NULL) {
2012 res = -ENOMEM;
2013 goto out_free_id;
2014 }
2015 nvme_trans_fill_read_cap(response, id_ns, cdb16);
2016
2017 xfer_len = min(alloc_len, resp_size);
2018 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
2019
2020 kfree(response);
2021 out_free_id:
2022 kfree(id_ns);
2023 return res;
2024}
2025
2026static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2027 u8 *cmd)
2028{
2029 int res;
2030 int nvme_sc;
2031 u32 alloc_len, xfer_len, resp_size;
2032 u8 *response;
2033 struct nvme_id_ctrl *id_ctrl;
2034 u32 ll_length, lun_id;
2035 u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
2036 __be32 tmp_len;
2037
2038 switch (cmd[2]) {
2039 default:
2040 return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2041 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2042 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2043 case ALL_LUNS_RETURNED:
2044 case ALL_WELL_KNOWN_LUNS_RETURNED:
2045 case RESTRICTED_LUNS_RETURNED:
2046 nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
2047 res = nvme_trans_status_code(hdr, nvme_sc);
2048 if (res)
2049 return res;
2050
2051 ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
2052 resp_size = ll_length + LUN_DATA_HEADER_SIZE;
2053
2054 alloc_len = get_unaligned_be32(&cmd[6]);
2055 if (alloc_len < resp_size) {
2056 res = nvme_trans_completion(hdr,
2057 SAM_STAT_CHECK_CONDITION,
2058 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2059 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2060 goto out_free_id;
2061 }
2062
2063 response = kzalloc(resp_size, GFP_KERNEL);
2064 if (response == NULL) {
2065 res = -ENOMEM;
2066 goto out_free_id;
2067 }
2068
2069 /* The first LUN ID will always be 0 per the SAM spec */
2070 for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
2071 /*
2072 * Set the LUN Id and then increment to the next LUN
2073 * location in the parameter data.
2074 */
2075 __be64 tmp_id = cpu_to_be64(lun_id);
2076 memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
2077 lun_id_offset += LUN_ENTRY_SIZE;
2078 }
2079 tmp_len = cpu_to_be32(ll_length);
2080 memcpy(response, &tmp_len, sizeof(u32));
2081 }
2082
2083 xfer_len = min(alloc_len, resp_size);
2084 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
2085
2086 kfree(response);
2087 out_free_id:
2088 kfree(id_ctrl);
2089 return res;
2090}
2091
2092static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2093 u8 *cmd)
2094{
2095 int res;
2096 u8 alloc_len, xfer_len, resp_size;
2097 u8 desc_format;
2098 u8 *response;
2099
2100 desc_format = cmd[1] & 0x01;
2101 alloc_len = cmd[4];
2102
2103 resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
2104 (FIXED_FMT_SENSE_DATA_SIZE));
2105 response = kzalloc(resp_size, GFP_KERNEL);
2106 if (response == NULL) {
2107 res = -ENOMEM;
2108 goto out;
2109 }
2110
2111 if (desc_format) {
2112 /* Descriptor Format Sense Data */
2113 response[0] = DESC_FORMAT_SENSE_DATA;
2114 response[1] = NO_SENSE;
2115 /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
2116 response[2] = SCSI_ASC_NO_SENSE;
2117 response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
2118 /* SDAT_OVFL = 0 | Additional Sense Length = 0 */
2119 } else {
2120 /* Fixed Format Sense Data */
2121 response[0] = FIXED_SENSE_DATA;
2122 /* Byte 1 = Obsolete */
2123 response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
2124 /* Bytes 3-6 - Information - set to zero */
2125 response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
2126 /* Bytes 8-11 - Cmd Specific Information - set to zero */
2127 response[12] = SCSI_ASC_NO_SENSE;
2128 response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
2129 /* Byte 14 = Field Replaceable Unit Code = 0 */
2130 /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
2131 }
2132
2133 xfer_len = min(alloc_len, resp_size);
2134 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
2135
2136 kfree(response);
2137 out:
2138 return res;
2139}
2140
2141static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
2142 struct sg_io_hdr *hdr)
2143{
2144 int nvme_sc;
2145 struct nvme_command c;
2146
2147 memset(&c, 0, sizeof(c));
2148 c.common.opcode = nvme_cmd_flush;
2149 c.common.nsid = cpu_to_le32(ns->ns_id);
2150
2151 nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
2152 return nvme_trans_status_code(hdr, nvme_sc);
2153}
2154
2155static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2156 u8 *cmd)
2157{
2158 int res;
2159 u8 parm_hdr_len = 0;
2160 u8 nvme_pf_code = 0;
2161 u8 format_prot_info, long_list, format_data;
2162
2163 format_prot_info = (cmd[1] & 0xc0) >> 6;
2164 long_list = cmd[1] & 0x20;
2165 format_data = cmd[1] & 0x10;
2166
2167 if (format_data != 0) {
2168 if (format_prot_info != 0) {
2169 if (long_list == 0)
2170 parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
2171 else
2172 parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
2173 }
2174 } else if (format_data == 0 && format_prot_info != 0) {
2175 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2176 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2177 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2178 goto out;
2179 }
2180
2181 /* Get parm header from data-in/out buffer */
2182 /*
2183 * According to the translation spec, the only fields in the parameter
2184 * list we are concerned with are in the header. So allocate only that.
2185 */
2186 if (parm_hdr_len > 0) {
2187 res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
2188 format_prot_info, &nvme_pf_code);
2189 if (res)
2190 goto out;
2191 }
2192
2193 /* Attempt to activate any previously downloaded firmware image */
2194 res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
2195
2196 /* Determine Block size and count and send format command */
2197 res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
2198 if (res)
2199 goto out;
2200
2201 res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
2202
2203 out:
2204 return res;
2205}
2206
2207static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
2208 struct sg_io_hdr *hdr,
2209 u8 *cmd)
2210{
2211 if (nvme_ctrl_ready(ns->ctrl))
2212 return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2213 NOT_READY, SCSI_ASC_LUN_NOT_READY,
2214 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2215 else
2216 return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
2217}
2218
2219static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2220 u8 *cmd)
2221{
2222 int res = 0;
2223 u32 buffer_offset, parm_list_length;
2224 u8 buffer_id, mode;
2225
2226 parm_list_length = get_unaligned_be24(&cmd[6]);
2227 if (parm_list_length % BYTES_TO_DWORDS != 0) {
2228 /* NVMe expects Firmware file to be a whole number of DWORDS */
2229 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2230 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2231 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2232 goto out;
2233 }
2234 buffer_id = cmd[2];
2235 if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
2236 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2237 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2238 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2239 goto out;
2240 }
2241 mode = cmd[1] & 0x1f;
2242 buffer_offset = get_unaligned_be24(&cmd[3]);
2243
2244 switch (mode) {
2245 case DOWNLOAD_SAVE_ACTIVATE:
2246 res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
2247 parm_list_length, buffer_offset,
2248 buffer_id);
2249 if (res)
2250 goto out;
2251 res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
2252 break;
2253 case DOWNLOAD_SAVE_DEFER_ACTIVATE:
2254 res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
2255 parm_list_length, buffer_offset,
2256 buffer_id);
2257 break;
2258 case ACTIVATE_DEFERRED_MICROCODE:
2259 res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
2260 break;
2261 default:
2262 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2263 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2264 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2265 break;
2266 }
2267
2268 out:
2269 return res;
2270}
2271
2272struct scsi_unmap_blk_desc {
2273 __be64 slba;
2274 __be32 nlb;
2275 u32 resv;
2276};
2277
2278struct scsi_unmap_parm_list {
2279 __be16 unmap_data_len;
2280 __be16 unmap_blk_desc_data_len;
2281 u32 resv;
2282 struct scsi_unmap_blk_desc desc[0];
2283};
2284
2285static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2286 u8 *cmd)
2287{
2288 struct scsi_unmap_parm_list *plist;
2289 struct nvme_dsm_range *range;
2290 struct nvme_command c;
2291 int i, nvme_sc, res;
2292 u16 ndesc, list_len;
2293
2294 list_len = get_unaligned_be16(&cmd[7]);
2295 if (!list_len)
2296 return -EINVAL;
2297
2298 plist = kmalloc(list_len, GFP_KERNEL);
2299 if (!plist)
2300 return -ENOMEM;
2301
2302 res = nvme_trans_copy_from_user(hdr, plist, list_len);
2303 if (res)
2304 goto out;
2305
2306 ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
2307 if (!ndesc || ndesc > 256) {
2308 res = -EINVAL;
2309 goto out;
2310 }
2311
2312 range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
2313 if (!range) {
2314 res = -ENOMEM;
2315 goto out;
2316 }
2317
2318 for (i = 0; i < ndesc; i++) {
2319 range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
2320 range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
2321 range[i].cattr = 0;
2322 }
2323
2324 memset(&c, 0, sizeof(c));
2325 c.dsm.opcode = nvme_cmd_dsm;
2326 c.dsm.nsid = cpu_to_le32(ns->ns_id);
2327 c.dsm.nr = cpu_to_le32(ndesc - 1);
2328 c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
2329
2330 nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
2331 ndesc * sizeof(*range));
2332 res = nvme_trans_status_code(hdr, nvme_sc);
2333
2334 kfree(range);
2335 out:
2336 kfree(plist);
2337 return res;
2338}
2339
2340static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
2341{
2342 u8 cmd[16];
2343 int retcode;
2344 unsigned int opcode;
2345
2346 if (hdr->cmdp == NULL)
2347 return -EMSGSIZE;
2348 if (hdr->cmd_len > sizeof(cmd))
2349 return -EINVAL;
2350 if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
2351 return -EFAULT;
2352
2353 /*
2354 * Prime the hdr with good status for scsi commands that don't require
2355 * an nvme command for translation.
2356 */
2357 retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
2358 if (retcode)
2359 return retcode;
2360
2361 opcode = cmd[0];
2362
2363 switch (opcode) {
2364 case READ_6:
2365 case READ_10:
2366 case READ_12:
2367 case READ_16:
2368 retcode = nvme_trans_io(ns, hdr, 0, cmd);
2369 break;
2370 case WRITE_6:
2371 case WRITE_10:
2372 case WRITE_12:
2373 case WRITE_16:
2374 retcode = nvme_trans_io(ns, hdr, 1, cmd);
2375 break;
2376 case INQUIRY:
2377 retcode = nvme_trans_inquiry(ns, hdr, cmd);
2378 break;
2379 case LOG_SENSE:
2380 retcode = nvme_trans_log_sense(ns, hdr, cmd);
2381 break;
2382 case MODE_SELECT:
2383 case MODE_SELECT_10:
2384 retcode = nvme_trans_mode_select(ns, hdr, cmd);
2385 break;
2386 case MODE_SENSE:
2387 case MODE_SENSE_10:
2388 retcode = nvme_trans_mode_sense(ns, hdr, cmd);
2389 break;
2390 case READ_CAPACITY:
2391 retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
2392 break;
2393 case SERVICE_ACTION_IN_16:
2394 switch (cmd[1]) {
2395 case SAI_READ_CAPACITY_16:
2396 retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
2397 break;
2398 default:
2399 goto out;
2400 }
2401 break;
2402 case REPORT_LUNS:
2403 retcode = nvme_trans_report_luns(ns, hdr, cmd);
2404 break;
2405 case REQUEST_SENSE:
2406 retcode = nvme_trans_request_sense(ns, hdr, cmd);
2407 break;
2408 case SYNCHRONIZE_CACHE:
2409 retcode = nvme_trans_synchronize_cache(ns, hdr);
2410 break;
2411 case FORMAT_UNIT:
2412 retcode = nvme_trans_format_unit(ns, hdr, cmd);
2413 break;
2414 case TEST_UNIT_READY:
2415 retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
2416 break;
2417 case WRITE_BUFFER:
2418 retcode = nvme_trans_write_buffer(ns, hdr, cmd);
2419 break;
2420 case UNMAP:
2421 retcode = nvme_trans_unmap(ns, hdr, cmd);
2422 break;
2423 default:
2424 out:
2425 retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2426 ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
2427 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2428 break;
2429 }
2430 return retcode;
2431}
2432
2433int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
2434{
2435 struct sg_io_hdr hdr;
2436 int retcode;
2437
2438 if (!capable(CAP_SYS_ADMIN))
2439 return -EACCES;
2440 if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
2441 return -EFAULT;
2442 if (hdr.interface_id != 'S')
2443 return -EINVAL;
2444
2445 /*
2446 * A positive return code means a NVMe status, which has been
2447 * translated to sense data.
2448 */
2449 retcode = nvme_scsi_translate(ns, &hdr);
2450 if (retcode < 0)
2451 return retcode;
2452 if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
2453 return -EFAULT;
2454 return 0;
2455}
2456
2457int nvme_sg_get_version_num(int __user *ip)
2458{
2459 return put_user(sg_version_num, ip);
2460}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f97006322..35f930db3c02 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -336,7 +336,7 @@ out:
336 336
337static void nvmet_execute_identify_nslist(struct nvmet_req *req) 337static void nvmet_execute_identify_nslist(struct nvmet_req *req)
338{ 338{
339 static const int buf_size = 4096; 339 static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
340 struct nvmet_ctrl *ctrl = req->sq->ctrl; 340 struct nvmet_ctrl *ctrl = req->sq->ctrl;
341 struct nvmet_ns *ns; 341 struct nvmet_ns *ns;
342 u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid); 342 u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ out:
367 nvmet_req_complete(req, status); 367 nvmet_req_complete(req, status);
368} 368}
369 369
370static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
371 void *id, off_t *off)
372{
373 struct nvme_ns_id_desc desc = {
374 .nidt = type,
375 .nidl = len,
376 };
377 u16 status;
378
379 status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
380 if (status)
381 return status;
382 *off += sizeof(desc);
383
384 status = nvmet_copy_to_sgl(req, *off, id, len);
385 if (status)
386 return status;
387 *off += len;
388
389 return 0;
390}
391
392static void nvmet_execute_identify_desclist(struct nvmet_req *req)
393{
394 struct nvmet_ns *ns;
395 u16 status = 0;
396 off_t off = 0;
397
398 ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
399 if (!ns) {
400 status = NVME_SC_INVALID_NS | NVME_SC_DNR;
401 goto out;
402 }
403
404 if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
405 status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
406 NVME_NIDT_UUID_LEN,
407 &ns->uuid, &off);
408 if (status)
409 goto out_put_ns;
410 }
411 if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
412 status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
413 NVME_NIDT_NGUID_LEN,
414 &ns->nguid, &off);
415 if (status)
416 goto out_put_ns;
417 }
418
419 if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
420 off) != NVME_IDENTIFY_DATA_SIZE - off)
421 status = NVME_SC_INTERNAL | NVME_SC_DNR;
422out_put_ns:
423 nvmet_put_namespace(ns);
424out:
425 nvmet_req_complete(req, status);
426}
427
370/* 428/*
371 * A "mimimum viable" abort implementation: the command is mandatory in the 429 * A "mimimum viable" abort implementation: the command is mandatory in the
372 * spec, but we are not required to do any useful work. We couldn't really 430 * spec, but we are not required to do any useful work. We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
504 } 562 }
505 break; 563 break;
506 case nvme_admin_identify: 564 case nvme_admin_identify:
507 req->data_len = 4096; 565 req->data_len = NVME_IDENTIFY_DATA_SIZE;
508 switch (cmd->identify.cns) { 566 switch (cmd->identify.cns) {
509 case NVME_ID_CNS_NS: 567 case NVME_ID_CNS_NS:
510 req->execute = nvmet_execute_identify_ns; 568 req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
515 case NVME_ID_CNS_NS_ACTIVE_LIST: 573 case NVME_ID_CNS_NS_ACTIVE_LIST:
516 req->execute = nvmet_execute_identify_nslist; 574 req->execute = nvmet_execute_identify_nslist;
517 return 0; 575 return 0;
576 case NVME_ID_CNS_NS_DESC_LIST:
577 req->execute = nvmet_execute_identify_desclist;
578 return 0;
518 } 579 }
519 break; 580 break;
520 case nvme_admin_abort_cmd: 581 case nvme_admin_abort_cmd:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800078e2..a358ecd93e11 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -305,11 +305,41 @@ out_unlock:
305 305
306CONFIGFS_ATTR(nvmet_ns_, device_path); 306CONFIGFS_ATTR(nvmet_ns_, device_path);
307 307
308static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
309{
310 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
311}
312
313static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
314 const char *page, size_t count)
315{
316 struct nvmet_ns *ns = to_nvmet_ns(item);
317 struct nvmet_subsys *subsys = ns->subsys;
318 int ret = 0;
319
320
321 mutex_lock(&subsys->lock);
322 if (ns->enabled) {
323 ret = -EBUSY;
324 goto out_unlock;
325 }
326
327
328 if (uuid_parse(page, &ns->uuid))
329 ret = -EINVAL;
330
331out_unlock:
332 mutex_unlock(&subsys->lock);
333 return ret ? ret : count;
334}
335
308static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page) 336static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
309{ 337{
310 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid); 338 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
311} 339}
312 340
341CONFIGFS_ATTR(nvmet_ns_, device_uuid);
342
313static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, 343static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
314 const char *page, size_t count) 344 const char *page, size_t count)
315{ 345{
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
379static struct configfs_attribute *nvmet_ns_attrs[] = { 409static struct configfs_attribute *nvmet_ns_attrs[] = {
380 &nvmet_ns_attr_device_path, 410 &nvmet_ns_attr_device_path,
381 &nvmet_ns_attr_device_nguid, 411 &nvmet_ns_attr_device_nguid,
412 &nvmet_ns_attr_device_uuid,
382 &nvmet_ns_attr_enable, 413 &nvmet_ns_attr_enable,
383 NULL, 414 NULL,
384}; 415};
@@ -619,8 +650,45 @@ out_unlock:
619 650
620CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host); 651CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
621 652
653static ssize_t nvmet_subsys_version_show(struct config_item *item,
654 char *page)
655{
656 struct nvmet_subsys *subsys = to_subsys(item);
657
658 if (NVME_TERTIARY(subsys->ver))
659 return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
660 (int)NVME_MAJOR(subsys->ver),
661 (int)NVME_MINOR(subsys->ver),
662 (int)NVME_TERTIARY(subsys->ver));
663 else
664 return snprintf(page, PAGE_SIZE, "%d.%d\n",
665 (int)NVME_MAJOR(subsys->ver),
666 (int)NVME_MINOR(subsys->ver));
667}
668
669static ssize_t nvmet_subsys_version_store(struct config_item *item,
670 const char *page, size_t count)
671{
672 struct nvmet_subsys *subsys = to_subsys(item);
673 int major, minor, tertiary = 0;
674 int ret;
675
676
677 ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
678 if (ret != 2 && ret != 3)
679 return -EINVAL;
680
681 down_write(&nvmet_config_sem);
682 subsys->ver = NVME_VS(major, minor, tertiary);
683 up_write(&nvmet_config_sem);
684
685 return count;
686}
687CONFIGFS_ATTR(nvmet_subsys_, version);
688
622static struct configfs_attribute *nvmet_subsys_attrs[] = { 689static struct configfs_attribute *nvmet_subsys_attrs[] = {
623 &nvmet_subsys_attr_attr_allow_any_host, 690 &nvmet_subsys_attr_attr_allow_any_host,
691 &nvmet_subsys_attr_version,
624 NULL, 692 NULL,
625}; 693};
626 694
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index eb9399ac97cf..b5b4ac103748 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
380 380
381 ns->nsid = nsid; 381 ns->nsid = nsid;
382 ns->subsys = subsys; 382 ns->subsys = subsys;
383 uuid_gen(&ns->uuid);
383 384
384 return ns; 385 return ns;
385} 386}
@@ -926,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
926 if (!subsys) 927 if (!subsys)
927 return NULL; 928 return NULL;
928 929
929 subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */ 930 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
930 931
931 switch (type) { 932 switch (type) {
932 case NVME_NQN_NVME: 933 case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597e81fc..8f3b57b4c97b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
53 e->portid = port->disc_addr.portid; 53 e->portid = port->disc_addr.portid;
54 /* we support only dynamic controllers */ 54 /* we support only dynamic controllers */
55 e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); 55 e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
56 e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH); 56 e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
57 e->subtype = type; 57 e->subtype = type;
58 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); 58 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
59 memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); 59 memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
185 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 185 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
186 } 186 }
187 case nvme_admin_identify: 187 case nvme_admin_identify:
188 req->data_len = 4096; 188 req->data_len = NVME_IDENTIFY_DATA_SIZE;
189 switch (cmd->identify.cns) { 189 switch (cmd->identify.cns) {
190 case NVME_ID_CNS_CTRL: 190 case NVME_ID_CNS_CTRL:
191 req->execute = 191 req->execute =
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 2006fae61980..7692a96c9065 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2096,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
2096 /* clear any response payload */ 2096 /* clear any response payload */
2097 memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); 2097 memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
2098 2098
2099 fod->data_sg = NULL;
2100 fod->data_sg_cnt = 0;
2101
2099 ret = nvmet_req_init(&fod->req, 2102 ret = nvmet_req_init(&fod->req,
2100 &fod->queue->nvme_cq, 2103 &fod->queue->nvme_cq,
2101 &fod->queue->nvme_sq, 2104 &fod->queue->nvme_sq,
2102 &nvmet_fc_tgt_fcp_ops); 2105 &nvmet_fc_tgt_fcp_ops);
2103 if (!ret) { /* bad SQE content or invalid ctrl state */ 2106 if (!ret) {
2104 nvmet_fc_abort_op(tgtport, fod); 2107 /* bad SQE content or invalid ctrl state */
2108 /* nvmet layer has already called op done to send rsp. */
2105 return; 2109 return;
2106 } 2110 }
2107 2111
2108 /* keep a running counter of tail position */ 2112 /* keep a running counter of tail position */
2109 atomic_inc(&fod->queue->sqtail); 2113 atomic_inc(&fod->queue->sqtail);
2110 2114
2111 fod->data_sg = NULL;
2112 fod->data_sg_cnt = 0;
2113 if (fod->total_length) { 2115 if (fod->total_length) {
2114 ret = nvmet_fc_alloc_tgt_pgs(fod); 2116 ret = nvmet_fc_alloc_tgt_pgs(fod);
2115 if (ret) { 2117 if (ret) {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 294a6611fb24..1bb9d5b311b1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
569 struct nvmefc_tgt_fcp_req *tgt_fcpreq) 569 struct nvmefc_tgt_fcp_req *tgt_fcpreq)
570{ 570{
571 struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); 571 struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
572 int active;
573 572
574 /* 573 /*
575 * mark aborted only in case there were 2 threads in transport 574 * mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
577 * after the abort request 576 * after the abort request
578 */ 577 */
579 spin_lock(&tfcp_req->reqlock); 578 spin_lock(&tfcp_req->reqlock);
580 active = tfcp_req->active;
581 tfcp_req->aborted = true; 579 tfcp_req->aborted = true;
582 spin_unlock(&tfcp_req->reqlock); 580 spin_unlock(&tfcp_req->reqlock);
583 581
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d80fc8..40128793e613 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
21 struct nvmet_req *req = bio->bi_private; 21 struct nvmet_req *req = bio->bi_private;
22 22
23 nvmet_req_complete(req, 23 nvmet_req_complete(req,
24 bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); 24 bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
25 25
26 if (bio != &req->inline_bio) 26 if (bio != &req->inline_bio)
27 bio_put(bio); 27 bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
145 bio->bi_private = req; 145 bio->bi_private = req;
146 bio->bi_end_io = nvmet_bio_done; 146 bio->bi_end_io = nvmet_bio_done;
147 if (status) { 147 if (status) {
148 bio->bi_error = -EIO; 148 bio->bi_status = BLK_STS_IOERR;
149 bio_endio(bio); 149 bio_endio(bio);
150 } else { 150 } else {
151 submit_bio(bio); 151 submit_bio(bio);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e503cfff0337..5f55c683b338 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -21,8 +21,6 @@
21#include "../host/nvme.h" 21#include "../host/nvme.h"
22#include "../host/fabrics.h" 22#include "../host/fabrics.h"
23 23
24#define NVME_LOOP_AQ_DEPTH 256
25
26#define NVME_LOOP_MAX_SEGMENTS 256 24#define NVME_LOOP_MAX_SEGMENTS 256
27 25
28/* 26/*
@@ -31,7 +29,7 @@
31 */ 29 */
32#define NVME_LOOP_NR_AEN_COMMANDS 1 30#define NVME_LOOP_NR_AEN_COMMANDS 1
33#define NVME_LOOP_AQ_BLKMQ_DEPTH \ 31#define NVME_LOOP_AQ_BLKMQ_DEPTH \
34 (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) 32 (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
35 33
36struct nvme_loop_iod { 34struct nvme_loop_iod {
37 struct nvme_request nvme_req; 35 struct nvme_request nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
45}; 43};
46 44
47struct nvme_loop_ctrl { 45struct nvme_loop_ctrl {
48 spinlock_t lock;
49 struct nvme_loop_queue *queues; 46 struct nvme_loop_queue *queues;
50 u32 queue_count; 47 u32 queue_count;
51 48
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
59 56
60 struct nvmet_ctrl *target_ctrl; 57 struct nvmet_ctrl *target_ctrl;
61 struct work_struct delete_work; 58 struct work_struct delete_work;
62 struct work_struct reset_work;
63}; 59};
64 60
65static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) 61static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
151 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq); 147 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
152 148
153 /* queue error recovery */ 149 /* queue error recovery */
154 schedule_work(&iod->queue->ctrl->reset_work); 150 nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
155 151
156 /* fail with DNR on admin cmd timeout */ 152 /* fail with DNR on admin cmd timeout */
157 nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; 153 nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
159 return BLK_EH_HANDLED; 155 return BLK_EH_HANDLED;
160} 156}
161 157
162static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, 158static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
163 const struct blk_mq_queue_data *bd) 159 const struct blk_mq_queue_data *bd)
164{ 160{
165 struct nvme_ns *ns = hctx->queue->queuedata; 161 struct nvme_ns *ns = hctx->queue->queuedata;
166 struct nvme_loop_queue *queue = hctx->driver_data; 162 struct nvme_loop_queue *queue = hctx->driver_data;
167 struct request *req = bd->rq; 163 struct request *req = bd->rq;
168 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 164 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
169 int ret; 165 blk_status_t ret;
170 166
171 ret = nvme_setup_cmd(ns, req, &iod->cmd); 167 ret = nvme_setup_cmd(ns, req, &iod->cmd);
172 if (ret != BLK_MQ_RQ_QUEUE_OK) 168 if (ret)
173 return ret; 169 return ret;
174 170
175 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; 171 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
179 nvme_cleanup_cmd(req); 175 nvme_cleanup_cmd(req);
180 blk_mq_start_request(req); 176 blk_mq_start_request(req);
181 nvme_loop_queue_response(&iod->req); 177 nvme_loop_queue_response(&iod->req);
182 return BLK_MQ_RQ_QUEUE_OK; 178 return BLK_STS_OK;
183 } 179 }
184 180
185 if (blk_rq_bytes(req)) { 181 if (blk_rq_bytes(req)) {
186 iod->sg_table.sgl = iod->first_sgl; 182 iod->sg_table.sgl = iod->first_sgl;
187 ret = sg_alloc_table_chained(&iod->sg_table, 183 if (sg_alloc_table_chained(&iod->sg_table,
188 blk_rq_nr_phys_segments(req), 184 blk_rq_nr_phys_segments(req),
189 iod->sg_table.sgl); 185 iod->sg_table.sgl))
190 if (ret) 186 return BLK_STS_RESOURCE;
191 return BLK_MQ_RQ_QUEUE_BUSY;
192 187
193 iod->req.sg = iod->sg_table.sgl; 188 iod->req.sg = iod->sg_table.sgl;
194 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); 189 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
197 blk_mq_start_request(req); 192 blk_mq_start_request(req);
198 193
199 schedule_work(&iod->work); 194 schedule_work(&iod->work);
200 return BLK_MQ_RQ_QUEUE_OK; 195 return BLK_STS_OK;
201} 196}
202 197
203static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) 198static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
234 struct request *req, unsigned int hctx_idx, 229 struct request *req, unsigned int hctx_idx,
235 unsigned int numa_node) 230 unsigned int numa_node)
236{ 231{
237 return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 232 struct nvme_loop_ctrl *ctrl = set->driver_data;
238 hctx_idx + 1);
239}
240 233
241static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set, 234 return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
242 struct request *req, unsigned int hctx_idx, 235 (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
243 unsigned int numa_node)
244{
245 return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
246} 236}
247 237
248static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 238static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
280static const struct blk_mq_ops nvme_loop_admin_mq_ops = { 270static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
281 .queue_rq = nvme_loop_queue_rq, 271 .queue_rq = nvme_loop_queue_rq,
282 .complete = nvme_loop_complete_rq, 272 .complete = nvme_loop_complete_rq,
283 .init_request = nvme_loop_init_admin_request, 273 .init_request = nvme_loop_init_request,
284 .init_hctx = nvme_loop_init_admin_hctx, 274 .init_hctx = nvme_loop_init_admin_hctx,
285 .timeout = nvme_loop_timeout, 275 .timeout = nvme_loop_timeout,
286}; 276};
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
467 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 457 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
468 return -EBUSY; 458 return -EBUSY;
469 459
470 if (!schedule_work(&ctrl->delete_work)) 460 if (!queue_work(nvme_wq, &ctrl->delete_work))
471 return -EBUSY; 461 return -EBUSY;
472 462
473 return 0; 463 return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
501 491
502static void nvme_loop_reset_ctrl_work(struct work_struct *work) 492static void nvme_loop_reset_ctrl_work(struct work_struct *work)
503{ 493{
504 struct nvme_loop_ctrl *ctrl = container_of(work, 494 struct nvme_loop_ctrl *ctrl =
505 struct nvme_loop_ctrl, reset_work); 495 container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
506 bool changed; 496 bool changed;
507 int ret; 497 int ret;
508 498
@@ -540,21 +530,6 @@ out_disable:
540 nvme_put_ctrl(&ctrl->ctrl); 530 nvme_put_ctrl(&ctrl->ctrl);
541} 531}
542 532
543static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
544{
545 struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
546
547 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
548 return -EBUSY;
549
550 if (!schedule_work(&ctrl->reset_work))
551 return -EBUSY;
552
553 flush_work(&ctrl->reset_work);
554
555 return 0;
556}
557
558static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { 533static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
559 .name = "loop", 534 .name = "loop",
560 .module = THIS_MODULE, 535 .module = THIS_MODULE,
@@ -562,11 +537,9 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
562 .reg_read32 = nvmf_reg_read32, 537 .reg_read32 = nvmf_reg_read32,
563 .reg_read64 = nvmf_reg_read64, 538 .reg_read64 = nvmf_reg_read64,
564 .reg_write32 = nvmf_reg_write32, 539 .reg_write32 = nvmf_reg_write32,
565 .reset_ctrl = nvme_loop_reset_ctrl,
566 .free_ctrl = nvme_loop_free_ctrl, 540 .free_ctrl = nvme_loop_free_ctrl,
567 .submit_async_event = nvme_loop_submit_async_event, 541 .submit_async_event = nvme_loop_submit_async_event,
568 .delete_ctrl = nvme_loop_del_ctrl, 542 .delete_ctrl = nvme_loop_del_ctrl,
569 .get_subsysnqn = nvmf_get_subsysnqn,
570}; 543};
571 544
572static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) 545static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
629 INIT_LIST_HEAD(&ctrl->list); 602 INIT_LIST_HEAD(&ctrl->list);
630 603
631 INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work); 604 INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
632 INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work); 605 INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
633 606
634 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, 607 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
635 0 /* no quirks, we're perfect! */); 608 0 /* no quirks, we're perfect! */);
636 if (ret) 609 if (ret)
637 goto out_put_ctrl; 610 goto out_put_ctrl;
638 611
639 spin_lock_init(&ctrl->lock);
640
641 ret = -ENOMEM; 612 ret = -ENOMEM;
642 613
643 ctrl->ctrl.sqsize = opts->queue_size - 1; 614 ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
766 __nvme_loop_del_ctrl(ctrl); 737 __nvme_loop_del_ctrl(ctrl);
767 mutex_unlock(&nvme_loop_ctrl_mutex); 738 mutex_unlock(&nvme_loop_ctrl_mutex);
768 739
769 flush_scheduled_work(); 740 flush_workqueue(nvme_wq);
770} 741}
771 742
772module_init(nvme_loop_init_module); 743module_init(nvme_loop_init_module);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8ff6e430b30a..747bbdb4f9c6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -47,6 +47,7 @@ struct nvmet_ns {
47 u32 blksize_shift; 47 u32 blksize_shift;
48 loff_t size; 48 loff_t size;
49 u8 nguid[16]; 49 u8 nguid[16];
50 uuid_t uuid;
50 51
51 bool enabled; 52 bool enabled;
52 struct nvmet_subsys *subsys; 53 struct nvmet_subsys *subsys;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9e45cde63376..56a4cba690b5 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
1027 queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; 1027 queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
1028 queue->send_queue_size = le16_to_cpu(req->hrqsize); 1028 queue->send_queue_size = le16_to_cpu(req->hrqsize);
1029 1029
1030 if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH) 1030 if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
1031 return NVME_RDMA_CM_INVALID_HSQSIZE; 1031 return NVME_RDMA_CM_INVALID_HSQSIZE;
1032 1032
1033 /* XXX: Should we enforce some kind of max for IO queues? */ 1033 /* XXX: Should we enforce some kind of max for IO queues? */
@@ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
1307 1307
1308/** 1308/**
1309 * nvme_rdma_device_removal() - Handle RDMA device removal 1309 * nvme_rdma_device_removal() - Handle RDMA device removal
1310 * @cm_id: rdma_cm id, used for nvmet port
1310 * @queue: nvmet rdma queue (cm id qp_context) 1311 * @queue: nvmet rdma queue (cm id qp_context)
1311 * @addr: nvmet address (cm_id context)
1312 * 1312 *
1313 * DEVICE_REMOVAL event notifies us that the RDMA device is about 1313 * DEVICE_REMOVAL event notifies us that the RDMA device is about
1314 * to unplug so we should take care of destroying our RDMA resources. 1314 * to unplug. Note that this event can be generated on a normal
1315 * This event will be generated for each allocated cm_id. 1315 * queue cm_id and/or a device bound listener cm_id (where in this
1316 * case queue will be null).
1316 * 1317 *
1317 * Note that this event can be generated on a normal queue cm_id 1318 * We registered an ib_client to handle device removal for queues,
1318 * and/or a device bound listener cm_id (where in this case 1319 * so we only need to handle the listening port cm_ids. In this case
1319 * queue will be null).
1320 *
1321 * we claim ownership on destroying the cm_id. For queues we move
1322 * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
1323 * we nullify the priv to prevent double cm_id destruction and destroying 1320 * we nullify the priv to prevent double cm_id destruction and destroying
1324 * the cm_id implicitely by returning a non-zero rc to the callout. 1321 * the cm_id implicitely by returning a non-zero rc to the callout.
1325 */ 1322 */
1326static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, 1323static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
1327 struct nvmet_rdma_queue *queue) 1324 struct nvmet_rdma_queue *queue)
1328{ 1325{
1329 unsigned long flags; 1326 struct nvmet_port *port;
1330
1331 if (!queue) {
1332 struct nvmet_port *port = cm_id->context;
1333 1327
1328 if (queue) {
1334 /* 1329 /*
1335 * This is a listener cm_id. Make sure that 1330 * This is a queue cm_id. we have registered
1336 * future remove_port won't invoke a double 1331 * an ib_client to handle queues removal
1337 * cm_id destroy. use atomic xchg to make sure 1332 * so don't interfear and just return.
1338 * we don't compete with remove_port.
1339 */
1340 if (xchg(&port->priv, NULL) != cm_id)
1341 return 0;
1342 } else {
1343 /*
1344 * This is a queue cm_id. Make sure that
1345 * release queue will not destroy the cm_id
1346 * and schedule all ctrl queues removal (only
1347 * if the queue is not disconnecting already).
1348 */ 1333 */
1349 spin_lock_irqsave(&queue->state_lock, flags); 1334 return 0;
1350 if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
1351 queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
1352 spin_unlock_irqrestore(&queue->state_lock, flags);
1353 nvmet_rdma_queue_disconnect(queue);
1354 flush_scheduled_work();
1355 } 1335 }
1356 1336
1337 port = cm_id->context;
1338
1339 /*
1340 * This is a listener cm_id. Make sure that
1341 * future remove_port won't invoke a double
1342 * cm_id destroy. use atomic xchg to make sure
1343 * we don't compete with remove_port.
1344 */
1345 if (xchg(&port->priv, NULL) != cm_id)
1346 return 0;
1347
1357 /* 1348 /*
1358 * We need to return 1 so that the core will destroy 1349 * We need to return 1 so that the core will destroy
1359 * it's own ID. What a great API design.. 1350 * it's own ID. What a great API design..
@@ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
1519 .delete_ctrl = nvmet_rdma_delete_ctrl, 1510 .delete_ctrl = nvmet_rdma_delete_ctrl,
1520}; 1511};
1521 1512
1513static void nvmet_rdma_add_one(struct ib_device *ib_device)
1514{
1515}
1516
1517static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
1518{
1519 struct nvmet_rdma_queue *queue;
1520
1521 /* Device is being removed, delete all queues using this device */
1522 mutex_lock(&nvmet_rdma_queue_mutex);
1523 list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
1524 if (queue->dev->device != ib_device)
1525 continue;
1526
1527 pr_info("Removing queue %d\n", queue->idx);
1528 __nvmet_rdma_queue_disconnect(queue);
1529 }
1530 mutex_unlock(&nvmet_rdma_queue_mutex);
1531
1532 flush_scheduled_work();
1533}
1534
1535static struct ib_client nvmet_rdma_ib_client = {
1536 .name = "nvmet_rdma",
1537 .add = nvmet_rdma_add_one,
1538 .remove = nvmet_rdma_remove_one
1539};
1540
1522static int __init nvmet_rdma_init(void) 1541static int __init nvmet_rdma_init(void)
1523{ 1542{
1524 return nvmet_register_transport(&nvmet_rdma_ops); 1543 int ret;
1544
1545 ret = ib_register_client(&nvmet_rdma_ib_client);
1546 if (ret)
1547 return ret;
1548
1549 ret = nvmet_register_transport(&nvmet_rdma_ops);
1550 if (ret)
1551 goto err_ib_client;
1552
1553 return 0;
1554
1555err_ib_client:
1556 ib_unregister_client(&nvmet_rdma_ib_client);
1557 return ret;
1525} 1558}
1526 1559
1527static void __exit nvmet_rdma_exit(void) 1560static void __exit nvmet_rdma_exit(void)
@@ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
1544 mutex_unlock(&nvmet_rdma_queue_mutex); 1577 mutex_unlock(&nvmet_rdma_queue_mutex);
1545 1578
1546 flush_scheduled_work(); 1579 flush_scheduled_work();
1580 ib_unregister_client(&nvmet_rdma_ib_client);
1547 ida_destroy(&nvmet_rdma_queue_ida); 1581 ida_destroy(&nvmet_rdma_queue_ida);
1548} 1582}
1549 1583
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b7cbd5d2cdea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2672 */ 2672 */
2673 if (basedev->state < DASD_STATE_READY) { 2673 if (basedev->state < DASD_STATE_READY) {
2674 while ((req = blk_fetch_request(block->request_queue))) 2674 while ((req = blk_fetch_request(block->request_queue)))
2675 __blk_end_request_all(req, -EIO); 2675 __blk_end_request_all(req, BLK_STS_IOERR);
2676 return; 2676 return;
2677 } 2677 }
2678 2678
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2692 "Rejecting write request %p", 2692 "Rejecting write request %p",
2693 req); 2693 req);
2694 blk_start_request(req); 2694 blk_start_request(req);
2695 __blk_end_request_all(req, -EIO); 2695 __blk_end_request_all(req, BLK_STS_IOERR);
2696 continue; 2696 continue;
2697 } 2697 }
2698 if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) && 2698 if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2702 "Rejecting failfast request %p", 2702 "Rejecting failfast request %p",
2703 req); 2703 req);
2704 blk_start_request(req); 2704 blk_start_request(req);
2705 __blk_end_request_all(req, -ETIMEDOUT); 2705 __blk_end_request_all(req, BLK_STS_TIMEOUT);
2706 continue; 2706 continue;
2707 } 2707 }
2708 cqr = basedev->discipline->build_cp(basedev, block, req); 2708 cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2734 "on request %p", 2734 "on request %p",
2735 PTR_ERR(cqr), req); 2735 PTR_ERR(cqr), req);
2736 blk_start_request(req); 2736 blk_start_request(req);
2737 __blk_end_request_all(req, -EIO); 2737 __blk_end_request_all(req, BLK_STS_IOERR);
2738 continue; 2738 continue;
2739 } 2739 }
2740 /* 2740 /*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
2755{ 2755{
2756 struct request *req; 2756 struct request *req;
2757 int status; 2757 int status;
2758 int error = 0; 2758 blk_status_t error = BLK_STS_OK;
2759 2759
2760 req = (struct request *) cqr->callback_data; 2760 req = (struct request *) cqr->callback_data;
2761 dasd_profile_end(cqr->block, cqr, req); 2761 dasd_profile_end(cqr->block, cqr, req);
2762
2762 status = cqr->block->base->discipline->free_cp(cqr, req); 2763 status = cqr->block->base->discipline->free_cp(cqr, req);
2763 if (status < 0) 2764 if (status < 0)
2764 error = status; 2765 error = errno_to_blk_status(status);
2765 else if (status == 0) { 2766 else if (status == 0) {
2766 if (cqr->intrc == -EPERM) 2767 switch (cqr->intrc) {
2767 error = -EBADE; 2768 case -EPERM:
2768 else if (cqr->intrc == -ENOLINK || 2769 error = BLK_STS_NEXUS;
2769 cqr->intrc == -ETIMEDOUT) 2770 break;
2770 error = cqr->intrc; 2771 case -ENOLINK:
2771 else 2772 error = BLK_STS_TRANSPORT;
2772 error = -EIO; 2773 break;
2774 case -ETIMEDOUT:
2775 error = BLK_STS_TIMEOUT;
2776 break;
2777 default:
2778 error = BLK_STS_IOERR;
2779 break;
2780 }
2773 } 2781 }
2774 __blk_end_request_all(req, error); 2782 __blk_end_request_all(req, error);
2775} 2783}
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
3190 3198
3191 spin_lock_irq(&block->request_queue_lock); 3199 spin_lock_irq(&block->request_queue_lock);
3192 while ((req = blk_fetch_request(block->request_queue))) 3200 while ((req = blk_fetch_request(block->request_queue)))
3193 __blk_end_request_all(req, -EIO); 3201 __blk_end_request_all(req, BLK_STS_IOERR);
3194 spin_unlock_irq(&block->request_queue_lock); 3202 spin_unlock_irq(&block->request_queue_lock);
3195} 3203}
3196 3204
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..06eb1de52d1c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
845 unsigned long source_addr; 845 unsigned long source_addr;
846 unsigned long bytes_done; 846 unsigned long bytes_done;
847 847
848 blk_queue_split(q, &bio, q->bio_split); 848 blk_queue_split(q, &bio);
849 849
850 bytes_done = 0; 850 bytes_done = 0;
851 dev_info = bio->bi_bdev->bd_disk->private_data; 851 dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de6817875..3c2c84b72877 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
231 aob->request.data = (u64) aobrq; 231 aob->request.data = (u64) aobrq;
232 scmrq->bdev = bdev; 232 scmrq->bdev = bdev;
233 scmrq->retries = 4; 233 scmrq->retries = 4;
234 scmrq->error = 0; 234 scmrq->error = BLK_STS_OK;
235 /* We don't use all msbs - place aidaws at the end of the aob page. */ 235 /* We don't use all msbs - place aidaws at the end of the aob page. */
236 scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io]; 236 scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
237 scm_request_cluster_init(scmrq); 237 scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
364{ 364{
365 struct aob *aob = scmrq->aob; 365 struct aob *aob = scmrq->aob;
366 366
367 if (scmrq->error == -ETIMEDOUT) 367 if (scmrq->error == BLK_STS_TIMEOUT)
368 SCM_LOG(1, "Request timeout"); 368 SCM_LOG(1, "Request timeout");
369 else { 369 else {
370 SCM_LOG(1, "Request error"); 370 SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
377 scmrq->error); 377 scmrq->error);
378} 378}
379 379
380void scm_blk_irq(struct scm_device *scmdev, void *data, int error) 380void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
381{ 381{
382 struct scm_request *scmrq = data; 382 struct scm_request *scmrq = data;
383 struct scm_blk_dev *bdev = scmrq->bdev; 383 struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
397 struct scm_blk_dev *bdev = scmrq->bdev; 397 struct scm_blk_dev *bdev = scmrq->bdev;
398 unsigned long flags; 398 unsigned long flags;
399 399
400 if (scmrq->error != -EIO) 400 if (scmrq->error != BLK_STS_IOERR)
401 goto restart; 401 goto restart;
402 402
403 /* For -EIO the response block is valid. */ 403 /* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cdc5129..cd598d1a4eae 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
35 struct aob *aob; 35 struct aob *aob;
36 struct list_head list; 36 struct list_head list;
37 u8 retries; 37 u8 retries;
38 int error; 38 blk_status_t error;
39#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE 39#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
40 struct { 40 struct {
41 enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state; 41 enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
50int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *); 50int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
51void scm_blk_dev_cleanup(struct scm_blk_dev *); 51void scm_blk_dev_cleanup(struct scm_blk_dev *);
52void scm_blk_set_available(struct scm_blk_dev *); 52void scm_blk_set_available(struct scm_blk_dev *);
53void scm_blk_irq(struct scm_device *, void *, int); 53void scm_blk_irq(struct scm_device *, void *, blk_status_t);
54 54
55void scm_request_finish(struct scm_request *); 55void scm_request_finish(struct scm_request *);
56void scm_request_requeue(struct scm_request *); 56void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
190 unsigned long page_addr; 190 unsigned long page_addr;
191 unsigned long bytes; 191 unsigned long bytes;
192 192
193 blk_queue_split(q, &bio, q->bio_split); 193 blk_queue_split(q, &bio);
194 194
195 if ((bio->bi_iter.bi_sector & 7) != 0 || 195 if ((bio->bi_iter.bi_sector & 7) != 0 ||
196 (bio->bi_iter.bi_size & 4095) != 0) 196 (bio->bi_iter.bi_size & 4095) != 0)
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc7f644..0f11f3bcac82 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
135 struct eadm_private *private = get_eadm_private(sch); 135 struct eadm_private *private = get_eadm_private(sch);
136 struct eadm_scsw *scsw = &sch->schib.scsw.eadm; 136 struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
137 struct irb *irb = this_cpu_ptr(&cio_irb); 137 struct irb *irb = this_cpu_ptr(&cio_irb);
138 int error = 0; 138 blk_status_t error = BLK_STS_OK;
139 139
140 EADM_LOG(6, "irq"); 140 EADM_LOG(6, "irq");
141 EADM_LOG_HEX(6, irb, sizeof(*irb)); 141 EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
144 144
145 if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) 145 if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
146 && scsw->eswf == 1 && irb->esw.eadm.erw.r) 146 && scsw->eswf == 1 && irb->esw.eadm.erw.r)
147 error = -EIO; 147 error = BLK_STS_IOERR;
148 148
149 if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC) 149 if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
150 error = -ETIMEDOUT; 150 error = BLK_STS_TIMEOUT;
151 151
152 eadm_subchannel_set_timeout(sch, 0); 152 eadm_subchannel_set_timeout(sch, 0);
153 153
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268edc54ae..1fa53ecdc2aa 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
71} 71}
72EXPORT_SYMBOL_GPL(scm_driver_unregister); 72EXPORT_SYMBOL_GPL(scm_driver_unregister);
73 73
74void scm_irq_handler(struct aob *aob, int error) 74void scm_irq_handler(struct aob *aob, blk_status_t error)
75{ 75{
76 struct aob_rq_header *aobrq = (void *) aob->request.data; 76 struct aob_rq_header *aobrq = (void *) aob->request.data;
77 struct scm_device *scmdev = aobrq->scmdev; 77 struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9dc893e..14f377ac1280 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
214 struct jsfd_part *jdp = req->rq_disk->private_data; 214 struct jsfd_part *jdp = req->rq_disk->private_data;
215 unsigned long offset = blk_rq_pos(req) << 9; 215 unsigned long offset = blk_rq_pos(req) << 9;
216 size_t len = blk_rq_cur_bytes(req); 216 size_t len = blk_rq_cur_bytes(req);
217 int err = -EIO; 217 blk_status_t err = BLK_STS_IOERR;
218 218
219 if ((offset + len) > jdp->dsize) 219 if ((offset + len) > jdp->dsize)
220 goto end; 220 goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
230 } 230 }
231 231
232 jsfd_read(bio_data(req->bio), jdp->dbase + offset, len); 232 jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
233 err = 0; 233 err = BLK_STS_OK;
234 end: 234 end:
235 if (!__blk_end_request_cur(req, err)) 235 if (!__blk_end_request_cur(req, err))
236 req = jsfd_next_request(); 236 req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
592 put_disk(disk); 592 put_disk(disk);
593 goto out; 593 goto out;
594 } 594 }
595 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
595 jsfd_disk[i] = disk; 596 jsfd_disk[i] = disk;
596 } 597 }
597 598
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 8a1b94816419..a4f28b7e4c65 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
446 * code paths. 446 * code paths.
447 */ 447 */
448 if (unlikely(rq->bio)) 448 if (unlikely(rq->bio))
449 blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq)); 449 blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
450 else 450 else
451 blk_put_request(rq); 451 blk_put_request(rq);
452} 452}
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
474EXPORT_SYMBOL(osd_end_request); 474EXPORT_SYMBOL(osd_end_request);
475 475
476static void _set_error_resid(struct osd_request *or, struct request *req, 476static void _set_error_resid(struct osd_request *or, struct request *req,
477 int error) 477 blk_status_t error)
478{ 478{
479 or->async_error = error; 479 or->async_error = error;
480 or->req_errors = scsi_req(req)->result ? : error; 480 or->req_errors = scsi_req(req)->result;
481 or->sense_len = scsi_req(req)->sense_len; 481 or->sense_len = scsi_req(req)->sense_len;
482 if (or->sense_len) 482 if (or->sense_len)
483 memcpy(or->sense, scsi_req(req)->sense, or->sense_len); 483 memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
489 489
490int osd_execute_request(struct osd_request *or) 490int osd_execute_request(struct osd_request *or)
491{ 491{
492 int error;
493
494 blk_execute_rq(or->request->q, NULL, or->request, 0); 492 blk_execute_rq(or->request->q, NULL, or->request, 0);
495 error = scsi_req(or->request)->result ? -EIO : 0;
496 493
497 _set_error_resid(or, or->request, error); 494 if (scsi_req(or->request)->result) {
498 return error; 495 _set_error_resid(or, or->request, BLK_STS_IOERR);
496 return -EIO;
497 }
498
499 _set_error_resid(or, or->request, BLK_STS_OK);
500 return 0;
499} 501}
500EXPORT_SYMBOL(osd_execute_request); 502EXPORT_SYMBOL(osd_execute_request);
501 503
502static void osd_request_async_done(struct request *req, int error) 504static void osd_request_async_done(struct request *req, blk_status_t error)
503{ 505{
504 struct osd_request *or = req->end_io_data; 506 struct osd_request *or = req->end_io_data;
505 507
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1572 flags); 1574 flags);
1573 if (IS_ERR(req)) 1575 if (IS_ERR(req))
1574 return req; 1576 return req;
1575 scsi_req_init(req);
1576 1577
1577 for_each_bio(bio) { 1578 for_each_bio(bio) {
1578 struct bio *bounce_bio = bio; 1579 ret = blk_rq_append_bio(req, bio);
1579
1580 blk_queue_bounce(req->q, &bounce_bio);
1581 ret = blk_rq_append_bio(req, bounce_bio);
1582 if (ret) 1580 if (ret)
1583 return ERR_PTR(ret); 1581 return ERR_PTR(ret);
1584 } 1582 }
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
1617 ret = PTR_ERR(req); 1615 ret = PTR_ERR(req);
1618 goto out; 1616 goto out;
1619 } 1617 }
1620 scsi_req_init(req);
1621 or->in.req = or->request->next_rq = req; 1618 or->in.req = or->request->next_rq = req;
1622 } 1619 }
1623 } else if (has_in) 1620 } else if (has_in)
@@ -1914,7 +1911,7 @@ analyze:
1914 /* scsi sense is Empty, the request was never issued to target 1911 /* scsi sense is Empty, the request was never issued to target
1915 * linux return code might tell us what happened. 1912 * linux return code might tell us what happened.
1916 */ 1913 */
1917 if (or->async_error == -ENOMEM) 1914 if (or->async_error == BLK_STS_RESOURCE)
1918 osi->osd_err_pri = OSD_ERR_PRI_RESOURCE; 1915 osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
1919 else 1916 else
1920 osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE; 1917 osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed92f07d..929ee7e88120 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
320 320
321 321
322/* Wakeup from interrupt */ 322/* Wakeup from interrupt */
323static void osst_end_async(struct request *req, int update) 323static void osst_end_async(struct request *req, blk_status_t status)
324{ 324{
325 struct scsi_request *rq = scsi_req(req); 325 struct scsi_request *rq = scsi_req(req);
326 struct osst_request *SRpnt = req->end_io_data; 326 struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
373 return DRIVER_ERROR << 24; 373 return DRIVER_ERROR << 24;
374 374
375 rq = scsi_req(req); 375 rq = scsi_req(req);
376 scsi_req_init(req);
377 req->rq_flags |= RQF_QUIET; 376 req->rq_flags |= RQF_QUIET;
378 377
379 SRpnt->bio = NULL; 378 SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..304a7158540f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
1874 } 1874 }
1875} 1875}
1876 1876
1877static void eh_lock_door_done(struct request *req, int uptodate) 1877static void eh_lock_door_done(struct request *req, blk_status_t status)
1878{ 1878{
1879 __blk_put_request(req->q, req); 1879 __blk_put_request(req->q, req);
1880} 1880}
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
1903 if (IS_ERR(req)) 1903 if (IS_ERR(req))
1904 return; 1904 return;
1905 rq = scsi_req(req); 1905 rq = scsi_req(req);
1906 scsi_req_init(req);
1907 1906
1908 rq->cmd[0] = ALLOW_MEDIUM_REMOVAL; 1907 rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
1909 rq->cmd[1] = 0; 1908 rq->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 99e16ac479e3..550e29f903b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
250 if (IS_ERR(req)) 250 if (IS_ERR(req))
251 return ret; 251 return ret;
252 rq = scsi_req(req); 252 rq = scsi_req(req);
253 scsi_req_init(req);
254 253
255 if (bufflen && blk_rq_map_kern(sdev->request_queue, req, 254 if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
256 buffer, bufflen, __GFP_RECLAIM)) 255 buffer, bufflen, __GFP_RECLAIM))
@@ -635,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
635 cmd->request->next_rq->special = NULL; 634 cmd->request->next_rq->special = NULL;
636} 635}
637 636
638static bool scsi_end_request(struct request *req, int error, 637static bool scsi_end_request(struct request *req, blk_status_t error,
639 unsigned int bytes, unsigned int bidi_bytes) 638 unsigned int bytes, unsigned int bidi_bytes)
640{ 639{
641 struct scsi_cmnd *cmd = req->special; 640 struct scsi_cmnd *cmd = req->special;
@@ -694,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
694 * @cmd: SCSI command (unused) 693 * @cmd: SCSI command (unused)
695 * @result: scsi error code 694 * @result: scsi error code
696 * 695 *
697 * Translate SCSI error code into standard UNIX errno. 696 * Translate SCSI error code into block errors.
698 * Return values:
699 * -ENOLINK temporary transport failure
700 * -EREMOTEIO permanent target failure, do not retry
701 * -EBADE permanent nexus failure, retry on other path
702 * -ENOSPC No write space available
703 * -ENODATA Medium error
704 * -EIO unspecified I/O error
705 */ 697 */
706static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) 698static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
699 int result)
707{ 700{
708 int error = 0; 701 switch (host_byte(result)) {
709
710 switch(host_byte(result)) {
711 case DID_TRANSPORT_FAILFAST: 702 case DID_TRANSPORT_FAILFAST:
712 error = -ENOLINK; 703 return BLK_STS_TRANSPORT;
713 break;
714 case DID_TARGET_FAILURE: 704 case DID_TARGET_FAILURE:
715 set_host_byte(cmd, DID_OK); 705 set_host_byte(cmd, DID_OK);
716 error = -EREMOTEIO; 706 return BLK_STS_TARGET;
717 break;
718 case DID_NEXUS_FAILURE: 707 case DID_NEXUS_FAILURE:
719 set_host_byte(cmd, DID_OK); 708 return BLK_STS_NEXUS;
720 error = -EBADE;
721 break;
722 case DID_ALLOC_FAILURE: 709 case DID_ALLOC_FAILURE:
723 set_host_byte(cmd, DID_OK); 710 set_host_byte(cmd, DID_OK);
724 error = -ENOSPC; 711 return BLK_STS_NOSPC;
725 break;
726 case DID_MEDIUM_ERROR: 712 case DID_MEDIUM_ERROR:
727 set_host_byte(cmd, DID_OK); 713 set_host_byte(cmd, DID_OK);
728 error = -ENODATA; 714 return BLK_STS_MEDIUM;
729 break;
730 default: 715 default:
731 error = -EIO; 716 return BLK_STS_IOERR;
732 break;
733 } 717 }
734
735 return error;
736} 718}
737 719
738/* 720/*
@@ -769,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
769 int result = cmd->result; 751 int result = cmd->result;
770 struct request_queue *q = cmd->device->request_queue; 752 struct request_queue *q = cmd->device->request_queue;
771 struct request *req = cmd->request; 753 struct request *req = cmd->request;
772 int error = 0; 754 blk_status_t error = BLK_STS_OK;
773 struct scsi_sense_hdr sshdr; 755 struct scsi_sense_hdr sshdr;
774 bool sense_valid = false; 756 bool sense_valid = false;
775 int sense_deferred = 0, level = 0; 757 int sense_deferred = 0, level = 0;
@@ -808,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
808 * both sides at once. 790 * both sides at once.
809 */ 791 */
810 scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid; 792 scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
811 if (scsi_end_request(req, 0, blk_rq_bytes(req), 793 if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
812 blk_rq_bytes(req->next_rq))) 794 blk_rq_bytes(req->next_rq)))
813 BUG(); 795 BUG();
814 return; 796 return;
@@ -850,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
850 scsi_print_sense(cmd); 832 scsi_print_sense(cmd);
851 result = 0; 833 result = 0;
852 /* for passthrough error may be set */ 834 /* for passthrough error may be set */
853 error = 0; 835 error = BLK_STS_OK;
854 } 836 }
855 837
856 /* 838 /*
@@ -922,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
922 action = ACTION_REPREP; 904 action = ACTION_REPREP;
923 } else if (sshdr.asc == 0x10) /* DIX */ { 905 } else if (sshdr.asc == 0x10) /* DIX */ {
924 action = ACTION_FAIL; 906 action = ACTION_FAIL;
925 error = -EILSEQ; 907 error = BLK_STS_PROTECTION;
926 /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ 908 /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
927 } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { 909 } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
928 action = ACTION_FAIL; 910 action = ACTION_FAIL;
929 error = -EREMOTEIO; 911 error = BLK_STS_TARGET;
930 } else 912 } else
931 action = ACTION_FAIL; 913 action = ACTION_FAIL;
932 break; 914 break;
933 case ABORTED_COMMAND: 915 case ABORTED_COMMAND:
934 action = ACTION_FAIL; 916 action = ACTION_FAIL;
935 if (sshdr.asc == 0x10) /* DIF */ 917 if (sshdr.asc == 0x10) /* DIF */
936 error = -EILSEQ; 918 error = BLK_STS_PROTECTION;
937 break; 919 break;
938 case NOT_READY: 920 case NOT_READY:
939 /* If the device is in the process of becoming 921 /* If the device is in the process of becoming
@@ -1134,6 +1116,20 @@ err_exit:
1134} 1116}
1135EXPORT_SYMBOL(scsi_init_io); 1117EXPORT_SYMBOL(scsi_init_io);
1136 1118
1119/**
1120 * scsi_initialize_rq - initialize struct scsi_cmnd.req
1121 *
1122 * Called from inside blk_get_request().
1123 */
1124void scsi_initialize_rq(struct request *rq)
1125{
1126 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1127
1128 scsi_req_init(&cmd->req);
1129}
1130EXPORT_SYMBOL(scsi_initialize_rq);
1131
1132/* Called after a request has been started. */
1137void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) 1133void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
1138{ 1134{
1139 void *buf = cmd->sense_buffer; 1135 void *buf = cmd->sense_buffer;
@@ -1829,15 +1825,15 @@ out_delay:
1829 blk_delay_queue(q, SCSI_QUEUE_DELAY); 1825 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1830} 1826}
1831 1827
1832static inline int prep_to_mq(int ret) 1828static inline blk_status_t prep_to_mq(int ret)
1833{ 1829{
1834 switch (ret) { 1830 switch (ret) {
1835 case BLKPREP_OK: 1831 case BLKPREP_OK:
1836 return BLK_MQ_RQ_QUEUE_OK; 1832 return BLK_STS_OK;
1837 case BLKPREP_DEFER: 1833 case BLKPREP_DEFER:
1838 return BLK_MQ_RQ_QUEUE_BUSY; 1834 return BLK_STS_RESOURCE;
1839 default: 1835 default:
1840 return BLK_MQ_RQ_QUEUE_ERROR; 1836 return BLK_STS_IOERR;
1841 } 1837 }
1842} 1838}
1843 1839
@@ -1909,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
1909 blk_mq_complete_request(cmd->request); 1905 blk_mq_complete_request(cmd->request);
1910} 1906}
1911 1907
1912static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, 1908static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1913 const struct blk_mq_queue_data *bd) 1909 const struct blk_mq_queue_data *bd)
1914{ 1910{
1915 struct request *req = bd->rq; 1911 struct request *req = bd->rq;
@@ -1917,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1917 struct scsi_device *sdev = q->queuedata; 1913 struct scsi_device *sdev = q->queuedata;
1918 struct Scsi_Host *shost = sdev->host; 1914 struct Scsi_Host *shost = sdev->host;
1919 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); 1915 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1920 int ret; 1916 blk_status_t ret;
1921 int reason; 1917 int reason;
1922 1918
1923 ret = prep_to_mq(scsi_prep_state_check(sdev, req)); 1919 ret = prep_to_mq(scsi_prep_state_check(sdev, req));
1924 if (ret != BLK_MQ_RQ_QUEUE_OK) 1920 if (ret != BLK_STS_OK)
1925 goto out; 1921 goto out;
1926 1922
1927 ret = BLK_MQ_RQ_QUEUE_BUSY; 1923 ret = BLK_STS_RESOURCE;
1928 if (!get_device(&sdev->sdev_gendev)) 1924 if (!get_device(&sdev->sdev_gendev))
1929 goto out; 1925 goto out;
1930 1926
@@ -1937,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1937 1933
1938 if (!(req->rq_flags & RQF_DONTPREP)) { 1934 if (!(req->rq_flags & RQF_DONTPREP)) {
1939 ret = prep_to_mq(scsi_mq_prep_fn(req)); 1935 ret = prep_to_mq(scsi_mq_prep_fn(req));
1940 if (ret != BLK_MQ_RQ_QUEUE_OK) 1936 if (ret != BLK_STS_OK)
1941 goto out_dec_host_busy; 1937 goto out_dec_host_busy;
1942 req->rq_flags |= RQF_DONTPREP; 1938 req->rq_flags |= RQF_DONTPREP;
1943 } else { 1939 } else {
@@ -1955,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1955 reason = scsi_dispatch_cmd(cmd); 1951 reason = scsi_dispatch_cmd(cmd);
1956 if (reason) { 1952 if (reason) {
1957 scsi_set_blocked(cmd, reason); 1953 scsi_set_blocked(cmd, reason);
1958 ret = BLK_MQ_RQ_QUEUE_BUSY; 1954 ret = BLK_STS_RESOURCE;
1959 goto out_dec_host_busy; 1955 goto out_dec_host_busy;
1960 } 1956 }
1961 1957
1962 return BLK_MQ_RQ_QUEUE_OK; 1958 return BLK_STS_OK;
1963 1959
1964out_dec_host_busy: 1960out_dec_host_busy:
1965 atomic_dec(&shost->host_busy); 1961 atomic_dec(&shost->host_busy);
@@ -1972,12 +1968,14 @@ out_put_device:
1972 put_device(&sdev->sdev_gendev); 1968 put_device(&sdev->sdev_gendev);
1973out: 1969out:
1974 switch (ret) { 1970 switch (ret) {
1975 case BLK_MQ_RQ_QUEUE_BUSY: 1971 case BLK_STS_OK:
1972 break;
1973 case BLK_STS_RESOURCE:
1976 if (atomic_read(&sdev->device_busy) == 0 && 1974 if (atomic_read(&sdev->device_busy) == 0 &&
1977 !scsi_device_blocked(sdev)) 1975 !scsi_device_blocked(sdev))
1978 blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); 1976 blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
1979 break; 1977 break;
1980 case BLK_MQ_RQ_QUEUE_ERROR: 1978 default:
1981 /* 1979 /*
1982 * Make sure to release all allocated ressources when 1980 * Make sure to release all allocated ressources when
1983 * we hit an error, as we will never see this command 1981 * we hit an error, as we will never see this command
@@ -1986,8 +1984,6 @@ out:
1986 if (req->rq_flags & RQF_DONTPREP) 1984 if (req->rq_flags & RQF_DONTPREP)
1987 scsi_mq_uninit_cmd(cmd); 1985 scsi_mq_uninit_cmd(cmd);
1988 break; 1986 break;
1989 default:
1990 break;
1991 } 1987 }
1992 return ret; 1988 return ret;
1993} 1989}
@@ -2057,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
2057{ 2053{
2058 struct device *dev = shost->dma_dev; 2054 struct device *dev = shost->dma_dev;
2059 2055
2056 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
2057
2060 /* 2058 /*
2061 * this limit is imposed by hardware restrictions 2059 * this limit is imposed by hardware restrictions
2062 */ 2060 */
@@ -2139,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
2139 q->request_fn = scsi_request_fn; 2137 q->request_fn = scsi_request_fn;
2140 q->init_rq_fn = scsi_init_rq; 2138 q->init_rq_fn = scsi_init_rq;
2141 q->exit_rq_fn = scsi_exit_rq; 2139 q->exit_rq_fn = scsi_exit_rq;
2140 q->initialize_rq_fn = scsi_initialize_rq;
2142 2141
2143 if (blk_init_allocated_queue(q) < 0) { 2142 if (blk_init_allocated_queue(q) < 0) {
2144 blk_cleanup_queue(q); 2143 blk_cleanup_queue(q);
@@ -2163,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
2163#endif 2162#endif
2164 .init_request = scsi_init_request, 2163 .init_request = scsi_init_request,
2165 .exit_request = scsi_exit_request, 2164 .exit_request = scsi_exit_request,
2165 .initialize_rq_fn = scsi_initialize_rq,
2166 .map_queues = scsi_map_queues, 2166 .map_queues = scsi_map_queues,
2167}; 2167};
2168 2168
@@ -2977,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
2977 if (wait) 2977 if (wait)
2978 blk_mq_quiesce_queue(q); 2978 blk_mq_quiesce_queue(q);
2979 else 2979 else
2980 blk_mq_stop_hw_queues(q); 2980 blk_mq_quiesce_queue_nowait(q);
2981 } else { 2981 } else {
2982 spin_lock_irqsave(q->queue_lock, flags); 2982 spin_lock_irqsave(q->queue_lock, flags);
2983 blk_stop_queue(q); 2983 blk_stop_queue(q);
@@ -3031,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
3031 return -EINVAL; 3031 return -EINVAL;
3032 3032
3033 if (q->mq_ops) { 3033 if (q->mq_ops) {
3034 blk_mq_start_stopped_hw_queues(q, false); 3034 blk_mq_unquiesce_queue(q);
3035 } else { 3035 } else {
3036 spin_lock_irqsave(q->queue_lock, flags); 3036 spin_lock_irqsave(q->queue_lock, flags);
3037 blk_start_queue(q); 3037 blk_start_queue(q);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1bb908..5006a656e16a 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
33#include <linux/bsg.h> 33#include <linux/bsg.h>
34 34
35#include <scsi/scsi.h> 35#include <scsi/scsi.h>
36#include <scsi/scsi_cmnd.h>
36#include <scsi/scsi_request.h> 37#include <scsi/scsi_request.h>
37#include <scsi/scsi_device.h> 38#include <scsi/scsi_device.h>
38#include <scsi/scsi_host.h> 39#include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
172 struct sas_rphy *rphy) 173 struct sas_rphy *rphy)
173{ 174{
174 struct request *req; 175 struct request *req;
175 int ret; 176 blk_status_t ret;
176 int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); 177 int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
177 178
178 while ((req = blk_fetch_request(q)) != NULL) { 179 while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
230 q = blk_alloc_queue(GFP_KERNEL); 231 q = blk_alloc_queue(GFP_KERNEL);
231 if (!q) 232 if (!q)
232 return -ENOMEM; 233 return -ENOMEM;
234 q->initialize_rq_fn = scsi_initialize_rq;
233 q->cmd_size = sizeof(struct scsi_request); 235 q->cmd_size = sizeof(struct scsi_request);
234 236
235 if (rphy) { 237 if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
249 if (error) 251 if (error)
250 goto out_cleanup_queue; 252 goto out_cleanup_queue;
251 253
254 /*
255 * by default assume old behaviour and bounce for any highmem page
256 */
257 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
258
252 error = bsg_register_queue(q, dev, name, release); 259 error = bsg_register_queue(q, dev, name, release);
253 if (error) 260 if (error)
254 goto out_cleanup_queue; 261 goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
264 q->queuedata = shost; 271 q->queuedata = shost;
265 272
266 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 273 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
274 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
267 return 0; 275 return 0;
268 276
269out_cleanup_queue: 277out_cleanup_queue:
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82c33a6edbea..21225d62b0c1 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
177} Sg_device; 177} Sg_device;
178 178
179/* tasklet or soft irq callback */ 179/* tasklet or soft irq callback */
180static void sg_rq_end_io(struct request *rq, int uptodate); 180static void sg_rq_end_io(struct request *rq, blk_status_t status);
181static int sg_start_req(Sg_request *srp, unsigned char *cmd); 181static int sg_start_req(Sg_request *srp, unsigned char *cmd);
182static int sg_finish_rem_req(Sg_request * srp); 182static int sg_finish_rem_req(Sg_request * srp);
183static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); 183static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
808 if (atomic_read(&sdp->detaching)) { 808 if (atomic_read(&sdp->detaching)) {
809 if (srp->bio) { 809 if (srp->bio) {
810 scsi_req_free_cmd(scsi_req(srp->rq)); 810 scsi_req_free_cmd(scsi_req(srp->rq));
811 blk_end_request_all(srp->rq, -EIO); 811 blk_end_request_all(srp->rq, BLK_STS_IOERR);
812 srp->rq = NULL; 812 srp->rq = NULL;
813 } 813 }
814 814
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
1300 * level when a command is completed (or has failed). 1300 * level when a command is completed (or has failed).
1301 */ 1301 */
1302static void 1302static void
1303sg_rq_end_io(struct request *rq, int uptodate) 1303sg_rq_end_io(struct request *rq, blk_status_t status)
1304{ 1304{
1305 struct sg_request *srp = rq->end_io_data; 1305 struct sg_request *srp = rq->end_io_data;
1306 struct scsi_request *req = scsi_req(rq); 1306 struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1732 } 1732 }
1733 req = scsi_req(rq); 1733 req = scsi_req(rq);
1734 1734
1735 scsi_req_init(rq);
1736
1737 if (hp->cmd_len > BLK_MAX_CDB) 1735 if (hp->cmd_len > BLK_MAX_CDB)
1738 req->cmd = long_cmdp; 1736 req->cmd = long_cmdp;
1739 memcpy(req->cmd, cmd, hp->cmd_len); 1737 memcpy(req->cmd, cmd, hp->cmd_len);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6f5437..8e5013d9cad4 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
511 atomic64_dec(&STp->stats->in_flight); 511 atomic64_dec(&STp->stats->in_flight);
512} 512}
513 513
514static void st_scsi_execute_end(struct request *req, int uptodate) 514static void st_scsi_execute_end(struct request *req, blk_status_t status)
515{ 515{
516 struct st_request *SRpnt = req->end_io_data; 516 struct st_request *SRpnt = req->end_io_data;
517 struct scsi_request *rq = scsi_req(req); 517 struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
549 if (IS_ERR(req)) 549 if (IS_ERR(req))
550 return DRIVER_ERROR << 24; 550 return DRIVER_ERROR << 24;
551 rq = scsi_req(req); 551 rq = scsi_req(req);
552 scsi_req_init(req);
553 req->rq_flags |= RQF_QUIET; 552 req->rq_flags |= RQF_QUIET;
554 553
555 mdata->null_mapped = 1; 554 mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069ebe4aa6..c05d38016556 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
93 return -EINVAL; 93 return -EINVAL;
94 } 94 }
95 95
96 ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0); 96 ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
97 if (!ib_dev->ibd_bio_set) { 97 if (!ib_dev->ibd_bio_set) {
98 pr_err("IBLOCK: Unable to create bioset\n"); 98 pr_err("IBLOCK: Unable to create bioset\n");
99 goto out; 99 goto out;
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
296 struct se_cmd *cmd = bio->bi_private; 296 struct se_cmd *cmd = bio->bi_private;
297 struct iblock_req *ibr = cmd->priv; 297 struct iblock_req *ibr = cmd->priv;
298 298
299 if (bio->bi_error) { 299 if (bio->bi_status) {
300 pr_err("bio error: %p, err: %d\n", bio, bio->bi_error); 300 pr_err("bio error: %p, err: %d\n", bio, bio->bi_status);
301 /* 301 /*
302 * Bump the ib_bio_err_cnt and release bio. 302 * Bump the ib_bio_err_cnt and release bio.
303 */ 303 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
354{ 354{
355 struct se_cmd *cmd = bio->bi_private; 355 struct se_cmd *cmd = bio->bi_private;
356 356
357 if (bio->bi_error) 357 if (bio->bi_status)
358 pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error); 358 pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
359 359
360 if (cmd) { 360 if (cmd) {
361 if (bio->bi_error) 361 if (bio->bi_status)
362 target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION); 362 target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
363 else 363 else
364 target_complete_cmd(cmd, SAM_STAT_GOOD); 364 target_complete_cmd(cmd, SAM_STAT_GOOD);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb13f8ea..ceec0211e84e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
55} 55}
56 56
57static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); 57static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
58static void pscsi_req_done(struct request *, int); 58static void pscsi_req_done(struct request *, blk_status_t);
59 59
60/* pscsi_attach_hba(): 60/* pscsi_attach_hba():
61 * 61 *
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
992 goto fail; 992 goto fail;
993 } 993 }
994 994
995 scsi_req_init(req);
996
997 if (sgl) { 995 if (sgl) {
998 ret = pscsi_map_sg(cmd, sgl, sgl_nents, req); 996 ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
999 if (ret) 997 if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
1045 return 0; 1043 return 0;
1046} 1044}
1047 1045
1048static void pscsi_req_done(struct request *req, int uptodate) 1046static void pscsi_req_done(struct request *req, blk_status_t status)
1049{ 1047{
1050 struct se_cmd *cmd = req->end_io_data; 1048 struct se_cmd *cmd = req->end_io_data;
1051 struct pscsi_plugin_task *pt = cmd->priv; 1049 struct pscsi_plugin_task *pt = cmd->priv;
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..dcad3a66748c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1541 ssize_t ret; 1541 ssize_t ret;
1542 1542
1543 /* enforce forwards compatibility on users */ 1543 /* enforce forwards compatibility on users */
1544 if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { 1544 if (unlikely(iocb->aio_reserved2)) {
1545 pr_debug("EINVAL: reserve field set\n"); 1545 pr_debug("EINVAL: reserve field set\n");
1546 return -EINVAL; 1546 return -EINVAL;
1547 } 1547 }
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1568 req->common.ki_pos = iocb->aio_offset; 1568 req->common.ki_pos = iocb->aio_offset;
1569 req->common.ki_complete = aio_complete; 1569 req->common.ki_complete = aio_complete;
1570 req->common.ki_flags = iocb_flags(req->common.ki_filp); 1570 req->common.ki_flags = iocb_flags(req->common.ki_filp);
1571 req->common.ki_hint = file_write_hint(file);
1571 1572
1572 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1573 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1573 /* 1574 /*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1586 req->common.ki_flags |= IOCB_EVENTFD; 1587 req->common.ki_flags |= IOCB_EVENTFD;
1587 } 1588 }
1588 1589
1590 ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
1591 if (unlikely(ret)) {
1592 pr_debug("EINVAL: aio_rw_flags\n");
1593 goto out_put_req;
1594 }
1595
1596 if ((req->common.ki_flags & IOCB_NOWAIT) &&
1597 !(req->common.ki_flags & IOCB_DIRECT)) {
1598 ret = -EOPNOTSUPP;
1599 goto out_put_req;
1600 }
1601
1589 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1602 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
1590 if (unlikely(ret)) { 1603 if (unlikely(ret)) {
1591 pr_debug("EFAULT: aio_key\n"); 1604 pr_debug("EFAULT: aio_key\n");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0a7404ef9335..a7df151f8aba 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
225 bio_init(&bio, vecs, nr_pages); 225 bio_init(&bio, vecs, nr_pages);
226 bio.bi_bdev = bdev; 226 bio.bi_bdev = bdev;
227 bio.bi_iter.bi_sector = pos >> 9; 227 bio.bi_iter.bi_sector = pos >> 9;
228 bio.bi_write_hint = iocb->ki_hint;
228 bio.bi_private = current; 229 bio.bi_private = current;
229 bio.bi_end_io = blkdev_bio_end_io_simple; 230 bio.bi_end_io = blkdev_bio_end_io_simple;
230 231
@@ -262,8 +263,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
262 if (vecs != inline_vecs) 263 if (vecs != inline_vecs)
263 kfree(vecs); 264 kfree(vecs);
264 265
265 if (unlikely(bio.bi_error)) 266 if (unlikely(bio.bi_status))
266 ret = bio.bi_error; 267 ret = blk_status_to_errno(bio.bi_status);
267 268
268 bio_uninit(&bio); 269 bio_uninit(&bio);
269 270
@@ -291,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
291 bool should_dirty = dio->should_dirty; 292 bool should_dirty = dio->should_dirty;
292 293
293 if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { 294 if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
294 if (bio->bi_error && !dio->bio.bi_error) 295 if (bio->bi_status && !dio->bio.bi_status)
295 dio->bio.bi_error = bio->bi_error; 296 dio->bio.bi_status = bio->bi_status;
296 } else { 297 } else {
297 if (!dio->is_sync) { 298 if (!dio->is_sync) {
298 struct kiocb *iocb = dio->iocb; 299 struct kiocb *iocb = dio->iocb;
299 ssize_t ret = dio->bio.bi_error; 300 ssize_t ret;
300 301
301 if (likely(!ret)) { 302 if (likely(!dio->bio.bi_status)) {
302 ret = dio->size; 303 ret = dio->size;
303 iocb->ki_pos += ret; 304 iocb->ki_pos += ret;
305 } else {
306 ret = blk_status_to_errno(dio->bio.bi_status);
304 } 307 }
305 308
306 dio->iocb->ki_complete(iocb, ret, 0); 309 dio->iocb->ki_complete(iocb, ret, 0);
@@ -337,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
337 bool is_read = (iov_iter_rw(iter) == READ), is_sync; 340 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
338 loff_t pos = iocb->ki_pos; 341 loff_t pos = iocb->ki_pos;
339 blk_qc_t qc = BLK_QC_T_NONE; 342 blk_qc_t qc = BLK_QC_T_NONE;
340 int ret; 343 int ret = 0;
341 344
342 if ((pos | iov_iter_alignment(iter)) & 345 if ((pos | iov_iter_alignment(iter)) &
343 (bdev_logical_block_size(bdev) - 1)) 346 (bdev_logical_block_size(bdev) - 1))
@@ -361,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
361 for (;;) { 364 for (;;) {
362 bio->bi_bdev = bdev; 365 bio->bi_bdev = bdev;
363 bio->bi_iter.bi_sector = pos >> 9; 366 bio->bi_iter.bi_sector = pos >> 9;
367 bio->bi_write_hint = iocb->ki_hint;
364 bio->bi_private = dio; 368 bio->bi_private = dio;
365 bio->bi_end_io = blkdev_bio_end_io; 369 bio->bi_end_io = blkdev_bio_end_io;
366 370
367 ret = bio_iov_iter_get_pages(bio, iter); 371 ret = bio_iov_iter_get_pages(bio, iter);
368 if (unlikely(ret)) { 372 if (unlikely(ret)) {
369 bio->bi_error = ret; 373 bio->bi_status = BLK_STS_IOERR;
370 bio_endio(bio); 374 bio_endio(bio);
371 break; 375 break;
372 } 376 }
@@ -415,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
415 } 419 }
416 __set_current_state(TASK_RUNNING); 420 __set_current_state(TASK_RUNNING);
417 421
418 ret = dio->bio.bi_error; 422 if (!ret)
423 ret = blk_status_to_errno(dio->bio.bi_status);
419 if (likely(!ret)) 424 if (likely(!ret))
420 ret = dio->size; 425 ret = dio->size;
421 426
@@ -439,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
439 444
440static __init int blkdev_init(void) 445static __init int blkdev_init(void)
441{ 446{
442 blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio)); 447 blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
443 if (!blkdev_dio_pool) 448 if (!blkdev_dio_pool)
444 return -ENOMEM; 449 return -ENOMEM;
445 return 0; 450 return 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4d1744..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
310 * The original bio may be split to several sub-bios, this is 310 * The original bio may be split to several sub-bios, this is
311 * done during endio of sub-bios 311 * done during endio of sub-bios
312 */ 312 */
313 int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); 313 blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
314 blk_status_t);
314}; 315};
315 316
316/* 317/*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..4ded1c3f92b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
2129 /* mutex is not held! This is not save if IO is not yet completed 2129 /* mutex is not held! This is not save if IO is not yet completed
2130 * on umount */ 2130 * on umount */
2131 iodone_w_error = 0; 2131 iodone_w_error = 0;
2132 if (bp->bi_error) 2132 if (bp->bi_status)
2133 iodone_w_error = 1; 2133 iodone_w_error = 1;
2134 2134
2135 BUG_ON(NULL == block); 2135 BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
2143 if ((dev_state->state->print_mask & 2143 if ((dev_state->state->print_mask &
2144 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2144 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2145 pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2145 pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2146 bp->bi_error, 2146 bp->bi_status,
2147 btrfsic_get_block_type(dev_state->state, block), 2147 btrfsic_get_block_type(dev_state->state, block),
2148 block->logical_bytenr, dev_state->name, 2148 block->logical_bytenr, dev_state->name,
2149 block->dev_bytenr, block->mirror_num); 2149 block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b282d09d..a2fad39f79ba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
155 unsigned long index; 155 unsigned long index;
156 int ret; 156 int ret;
157 157
158 if (bio->bi_error) 158 if (bio->bi_status)
159 cb->errors = 1; 159 cb->errors = 1;
160 160
161 /* if there are more bios still pending for this compressed 161 /* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
268 struct page *page; 268 struct page *page;
269 unsigned long index; 269 unsigned long index;
270 270
271 if (bio->bi_error) 271 if (bio->bi_status)
272 cb->errors = 1; 272 cb->errors = 1;
273 273
274 /* if there are more bios still pending for this compressed 274 /* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
287 cb->start, 287 cb->start,
288 cb->start + cb->len - 1, 288 cb->start + cb->len - 1,
289 NULL, 289 NULL,
290 bio->bi_error ? 0 : 1); 290 bio->bi_status ? 0 : 1);
291 cb->compressed_pages[0]->mapping = NULL; 291 cb->compressed_pages[0]->mapping = NULL;
292 292
293 end_compressed_writeback(inode, cb); 293 end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
320 * This also checksums the file bytes and gets things ready for 320 * This also checksums the file bytes and gets things ready for
321 * the end io hooks. 321 * the end io hooks.
322 */ 322 */
323int btrfs_submit_compressed_write(struct inode *inode, u64 start, 323blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
324 unsigned long len, u64 disk_start, 324 unsigned long len, u64 disk_start,
325 unsigned long compressed_len, 325 unsigned long compressed_len,
326 struct page **compressed_pages, 326 struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
335 struct page *page; 335 struct page *page;
336 u64 first_byte = disk_start; 336 u64 first_byte = disk_start;
337 struct block_device *bdev; 337 struct block_device *bdev;
338 int ret; 338 blk_status_t ret;
339 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 339 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
340 340
341 WARN_ON(start & ((u64)PAGE_SIZE - 1)); 341 WARN_ON(start & ((u64)PAGE_SIZE - 1));
342 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); 342 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
343 if (!cb) 343 if (!cb)
344 return -ENOMEM; 344 return BLK_STS_RESOURCE;
345 refcount_set(&cb->pending_bios, 0); 345 refcount_set(&cb->pending_bios, 0);
346 cb->errors = 0; 346 cb->errors = 0;
347 cb->inode = inode; 347 cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
358 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); 358 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
359 if (!bio) { 359 if (!bio) {
360 kfree(cb); 360 kfree(cb);
361 return -ENOMEM; 361 return BLK_STS_RESOURCE;
362 } 362 }
363 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 363 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
364 bio->bi_private = cb; 364 bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
368 /* create and submit bios for the compressed pages */ 368 /* create and submit bios for the compressed pages */
369 bytes_left = compressed_len; 369 bytes_left = compressed_len;
370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { 370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
371 int submit = 0;
372
371 page = compressed_pages[pg_index]; 373 page = compressed_pages[pg_index];
372 page->mapping = inode->i_mapping; 374 page->mapping = inode->i_mapping;
373 if (bio->bi_iter.bi_size) 375 if (bio->bi_iter.bi_size)
374 ret = io_tree->ops->merge_bio_hook(page, 0, 376 submit = io_tree->ops->merge_bio_hook(page, 0,
375 PAGE_SIZE, 377 PAGE_SIZE,
376 bio, 0); 378 bio, 0);
377 else
378 ret = 0;
379 379
380 page->mapping = NULL; 380 page->mapping = NULL;
381 if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) < 381 if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
382 PAGE_SIZE) { 382 PAGE_SIZE) {
383 bio_get(bio); 383 bio_get(bio);
384 384
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
400 400
401 ret = btrfs_map_bio(fs_info, bio, 0, 1); 401 ret = btrfs_map_bio(fs_info, bio, 0, 1);
402 if (ret) { 402 if (ret) {
403 bio->bi_error = ret; 403 bio->bi_status = ret;
404 bio_endio(bio); 404 bio_endio(bio);
405 } 405 }
406 406
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
434 434
435 ret = btrfs_map_bio(fs_info, bio, 0, 1); 435 ret = btrfs_map_bio(fs_info, bio, 0, 1);
436 if (ret) { 436 if (ret) {
437 bio->bi_error = ret; 437 bio->bi_status = ret;
438 bio_endio(bio); 438 bio_endio(bio);
439 } 439 }
440 440
@@ -569,7 +569,7 @@ next:
569 * After the compressed pages are read, we copy the bytes into the 569 * After the compressed pages are read, we copy the bytes into the
570 * bio we were passed and then call the bio end_io calls 570 * bio we were passed and then call the bio end_io calls
571 */ 571 */
572int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 572blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
573 int mirror_num, unsigned long bio_flags) 573 int mirror_num, unsigned long bio_flags)
574{ 574{
575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
586 u64 em_len; 586 u64 em_len;
587 u64 em_start; 587 u64 em_start;
588 struct extent_map *em; 588 struct extent_map *em;
589 int ret = -ENOMEM; 589 blk_status_t ret = BLK_STS_RESOURCE;
590 int faili = 0; 590 int faili = 0;
591 u32 *sums; 591 u32 *sums;
592 592
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
600 PAGE_SIZE); 600 PAGE_SIZE);
601 read_unlock(&em_tree->lock); 601 read_unlock(&em_tree->lock);
602 if (!em) 602 if (!em)
603 return -EIO; 603 return BLK_STS_IOERR;
604 604
605 compressed_len = em->block_len; 605 compressed_len = em->block_len;
606 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); 606 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
638 __GFP_HIGHMEM); 638 __GFP_HIGHMEM);
639 if (!cb->compressed_pages[pg_index]) { 639 if (!cb->compressed_pages[pg_index]) {
640 faili = pg_index - 1; 640 faili = pg_index - 1;
641 ret = -ENOMEM; 641 ret = BLK_STS_RESOURCE;
642 goto fail2; 642 goto fail2;
643 } 643 }
644 } 644 }
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
659 refcount_set(&cb->pending_bios, 1); 659 refcount_set(&cb->pending_bios, 1);
660 660
661 for (pg_index = 0; pg_index < nr_pages; pg_index++) { 661 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
662 int submit = 0;
663
662 page = cb->compressed_pages[pg_index]; 664 page = cb->compressed_pages[pg_index];
663 page->mapping = inode->i_mapping; 665 page->mapping = inode->i_mapping;
664 page->index = em_start >> PAGE_SHIFT; 666 page->index = em_start >> PAGE_SHIFT;
665 667
666 if (comp_bio->bi_iter.bi_size) 668 if (comp_bio->bi_iter.bi_size)
667 ret = tree->ops->merge_bio_hook(page, 0, 669 submit = tree->ops->merge_bio_hook(page, 0,
668 PAGE_SIZE, 670 PAGE_SIZE,
669 comp_bio, 0); 671 comp_bio, 0);
670 else
671 ret = 0;
672 672
673 page->mapping = NULL; 673 page->mapping = NULL;
674 if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < 674 if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
675 PAGE_SIZE) { 675 PAGE_SIZE) {
676 bio_get(comp_bio); 676 bio_get(comp_bio);
677 677
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
697 697
698 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); 698 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
699 if (ret) { 699 if (ret) {
700 comp_bio->bi_error = ret; 700 comp_bio->bi_status = ret;
701 bio_endio(comp_bio); 701 bio_endio(comp_bio);
702 } 702 }
703 703
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
726 726
727 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); 727 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
728 if (ret) { 728 if (ret) {
729 comp_bio->bi_error = ret; 729 comp_bio->bi_status = ret;
730 bio_endio(comp_bio); 730 bio_endio(comp_bio);
731 } 731 }
732 732
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..680d4265d601 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
48 unsigned long total_out, u64 disk_start, 48 unsigned long total_out, u64 disk_start,
49 struct bio *bio); 49 struct bio *bio);
50 50
51int btrfs_submit_compressed_write(struct inode *inode, u64 start, 51blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
52 unsigned long len, u64 disk_start, 52 unsigned long len, u64 disk_start,
53 unsigned long compressed_len, 53 unsigned long compressed_len,
54 struct page **compressed_pages, 54 struct page **compressed_pages,
55 unsigned long nr_pages); 55 unsigned long nr_pages);
56int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 56blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
57 int mirror_num, unsigned long bio_flags); 57 int mirror_num, unsigned long bio_flags);
58 58
59enum btrfs_compression_type { 59enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4f8f75d9e839..a0d0c79d95ed 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
3078struct btrfs_dio_private; 3078struct btrfs_dio_private;
3079int btrfs_del_csums(struct btrfs_trans_handle *trans, 3079int btrfs_del_csums(struct btrfs_trans_handle *trans,
3080 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); 3080 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
3081int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst); 3081blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
3082int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, 3082blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
3083 u64 logical_offset); 3083 u64 logical_offset);
3084int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3084int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3085 struct btrfs_root *root, 3085 struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
3094int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 3094int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
3095 struct btrfs_root *root, 3095 struct btrfs_root *root,
3096 struct btrfs_ordered_sum *sums); 3096 struct btrfs_ordered_sum *sums);
3097int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, 3097blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
3098 u64 file_start, int contig); 3098 u64 file_start, int contig);
3099int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 3099int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
3100 struct list_head *list, int search_commit); 3100 struct list_head *list, int search_commit);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f678dcb20e6..6036d15b47b8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
87 bio_end_io_t *end_io; 87 bio_end_io_t *end_io;
88 void *private; 88 void *private;
89 struct btrfs_fs_info *info; 89 struct btrfs_fs_info *info;
90 int error; 90 blk_status_t status;
91 enum btrfs_wq_endio_type metadata; 91 enum btrfs_wq_endio_type metadata;
92 struct list_head list; 92 struct list_head list;
93 struct btrfs_work work; 93 struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
131 */ 131 */
132 u64 bio_offset; 132 u64 bio_offset;
133 struct btrfs_work work; 133 struct btrfs_work work;
134 int error; 134 blk_status_t status;
135}; 135};
136 136
137/* 137/*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
799 btrfs_work_func_t func; 799 btrfs_work_func_t func;
800 800
801 fs_info = end_io_wq->info; 801 fs_info = end_io_wq->info;
802 end_io_wq->error = bio->bi_error; 802 end_io_wq->status = bio->bi_status;
803 803
804 if (bio_op(bio) == REQ_OP_WRITE) { 804 if (bio_op(bio) == REQ_OP_WRITE) {
805 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { 805 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
836 btrfs_queue_work(wq, &end_io_wq->work); 836 btrfs_queue_work(wq, &end_io_wq->work);
837} 837}
838 838
839int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 839blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
840 enum btrfs_wq_endio_type metadata) 840 enum btrfs_wq_endio_type metadata)
841{ 841{
842 struct btrfs_end_io_wq *end_io_wq; 842 struct btrfs_end_io_wq *end_io_wq;
843 843
844 end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); 844 end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
845 if (!end_io_wq) 845 if (!end_io_wq)
846 return -ENOMEM; 846 return BLK_STS_RESOURCE;
847 847
848 end_io_wq->private = bio->bi_private; 848 end_io_wq->private = bio->bi_private;
849 end_io_wq->end_io = bio->bi_end_io; 849 end_io_wq->end_io = bio->bi_end_io;
850 end_io_wq->info = info; 850 end_io_wq->info = info;
851 end_io_wq->error = 0; 851 end_io_wq->status = 0;
852 end_io_wq->bio = bio; 852 end_io_wq->bio = bio;
853 end_io_wq->metadata = metadata; 853 end_io_wq->metadata = metadata;
854 854
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
868static void run_one_async_start(struct btrfs_work *work) 868static void run_one_async_start(struct btrfs_work *work)
869{ 869{
870 struct async_submit_bio *async; 870 struct async_submit_bio *async;
871 int ret; 871 blk_status_t ret;
872 872
873 async = container_of(work, struct async_submit_bio, work); 873 async = container_of(work, struct async_submit_bio, work);
874 ret = async->submit_bio_start(async->inode, async->bio, 874 ret = async->submit_bio_start(async->inode, async->bio,
875 async->mirror_num, async->bio_flags, 875 async->mirror_num, async->bio_flags,
876 async->bio_offset); 876 async->bio_offset);
877 if (ret) 877 if (ret)
878 async->error = ret; 878 async->status = ret;
879} 879}
880 880
881static void run_one_async_done(struct btrfs_work *work) 881static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
898 wake_up(&fs_info->async_submit_wait); 898 wake_up(&fs_info->async_submit_wait);
899 899
900 /* If an error occurred we just want to clean up the bio and move on */ 900 /* If an error occurred we just want to clean up the bio and move on */
901 if (async->error) { 901 if (async->status) {
902 async->bio->bi_error = async->error; 902 async->bio->bi_status = async->status;
903 bio_endio(async->bio); 903 bio_endio(async->bio);
904 return; 904 return;
905 } 905 }
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
916 kfree(async); 916 kfree(async);
917} 917}
918 918
919int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 919blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
920 struct bio *bio, int mirror_num, 920 struct inode *inode, struct bio *bio, int mirror_num,
921 unsigned long bio_flags, 921 unsigned long bio_flags, u64 bio_offset,
922 u64 bio_offset, 922 extent_submit_bio_hook_t *submit_bio_start,
923 extent_submit_bio_hook_t *submit_bio_start, 923 extent_submit_bio_hook_t *submit_bio_done)
924 extent_submit_bio_hook_t *submit_bio_done)
925{ 924{
926 struct async_submit_bio *async; 925 struct async_submit_bio *async;
927 926
928 async = kmalloc(sizeof(*async), GFP_NOFS); 927 async = kmalloc(sizeof(*async), GFP_NOFS);
929 if (!async) 928 if (!async)
930 return -ENOMEM; 929 return BLK_STS_RESOURCE;
931 930
932 async->inode = inode; 931 async->inode = inode;
933 async->bio = bio; 932 async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
941 async->bio_flags = bio_flags; 940 async->bio_flags = bio_flags;
942 async->bio_offset = bio_offset; 941 async->bio_offset = bio_offset;
943 942
944 async->error = 0; 943 async->status = 0;
945 944
946 atomic_inc(&fs_info->nr_async_submits); 945 atomic_inc(&fs_info->nr_async_submits);
947 946
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
959 return 0; 958 return 0;
960} 959}
961 960
962static int btree_csum_one_bio(struct bio *bio) 961static blk_status_t btree_csum_one_bio(struct bio *bio)
963{ 962{
964 struct bio_vec *bvec; 963 struct bio_vec *bvec;
965 struct btrfs_root *root; 964 struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
972 break; 971 break;
973 } 972 }
974 973
975 return ret; 974 return errno_to_blk_status(ret);
976} 975}
977 976
978static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, 977static blk_status_t __btree_submit_bio_start(struct inode *inode,
979 int mirror_num, unsigned long bio_flags, 978 struct bio *bio, int mirror_num, unsigned long bio_flags,
980 u64 bio_offset) 979 u64 bio_offset)
981{ 980{
982 /* 981 /*
983 * when we're called for a write, we're already in the async 982 * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
986 return btree_csum_one_bio(bio); 985 return btree_csum_one_bio(bio);
987} 986}
988 987
989static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, 988static blk_status_t __btree_submit_bio_done(struct inode *inode,
990 int mirror_num, unsigned long bio_flags, 989 struct bio *bio, int mirror_num, unsigned long bio_flags,
991 u64 bio_offset) 990 u64 bio_offset)
992{ 991{
993 int ret; 992 blk_status_t ret;
994 993
995 /* 994 /*
996 * when we're called for a write, we're already in the async 995 * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
998 */ 997 */
999 ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1); 998 ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
1000 if (ret) { 999 if (ret) {
1001 bio->bi_error = ret; 1000 bio->bi_status = ret;
1002 bio_endio(bio); 1001 bio_endio(bio);
1003 } 1002 }
1004 return ret; 1003 return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
1015 return 1; 1014 return 1;
1016} 1015}
1017 1016
1018static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, 1017static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
1019 int mirror_num, unsigned long bio_flags, 1018 int mirror_num, unsigned long bio_flags,
1020 u64 bio_offset) 1019 u64 bio_offset)
1021{ 1020{
1022 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1021 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1023 int async = check_async_write(bio_flags); 1022 int async = check_async_write(bio_flags);
1024 int ret; 1023 blk_status_t ret;
1025 1024
1026 if (bio_op(bio) != REQ_OP_WRITE) { 1025 if (bio_op(bio) != REQ_OP_WRITE) {
1027 /* 1026 /*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
1054 return 0; 1053 return 0;
1055 1054
1056out_w_error: 1055out_w_error:
1057 bio->bi_error = ret; 1056 bio->bi_status = ret;
1058 bio_endio(bio); 1057 bio_endio(bio);
1059 return ret; 1058 return ret;
1060} 1059}
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
1820 end_io_wq = container_of(work, struct btrfs_end_io_wq, work); 1819 end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
1821 bio = end_io_wq->bio; 1820 bio = end_io_wq->bio;
1822 1821
1823 bio->bi_error = end_io_wq->error; 1822 bio->bi_status = end_io_wq->status;
1824 bio->bi_private = end_io_wq->private; 1823 bio->bi_private = end_io_wq->private;
1825 bio->bi_end_io = end_io_wq->end_io; 1824 bio->bi_end_io = end_io_wq->end_io;
1826 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); 1825 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3497,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
3497 * any device where the flush fails with eopnotsupp are flagged as not-barrier 3496 * any device where the flush fails with eopnotsupp are flagged as not-barrier
3498 * capable 3497 * capable
3499 */ 3498 */
3500static int write_dev_flush(struct btrfs_device *device, int wait) 3499static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
3501{ 3500{
3502 struct request_queue *q = bdev_get_queue(device->bdev); 3501 struct request_queue *q = bdev_get_queue(device->bdev);
3503 struct bio *bio; 3502 struct bio *bio;
3504 int ret = 0; 3503 blk_status_t ret = 0;
3505 3504
3506 if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) 3505 if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
3507 return 0; 3506 return 0;
@@ -3513,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3513 3512
3514 wait_for_completion(&device->flush_wait); 3513 wait_for_completion(&device->flush_wait);
3515 3514
3516 if (bio->bi_error) { 3515 if (bio->bi_status) {
3517 ret = bio->bi_error; 3516 ret = bio->bi_status;
3518 btrfs_dev_stat_inc_and_print(device, 3517 btrfs_dev_stat_inc_and_print(device,
3519 BTRFS_DEV_STAT_FLUSH_ERRS); 3518 BTRFS_DEV_STAT_FLUSH_ERRS);
3520 } 3519 }
@@ -3533,7 +3532,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3533 device->flush_bio = NULL; 3532 device->flush_bio = NULL;
3534 bio = btrfs_io_bio_alloc(GFP_NOFS, 0); 3533 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
3535 if (!bio) 3534 if (!bio)
3536 return -ENOMEM; 3535 return BLK_STS_RESOURCE;
3537 3536
3538 bio->bi_end_io = btrfs_end_empty_barrier; 3537 bio->bi_end_io = btrfs_end_empty_barrier;
3539 bio->bi_bdev = device->bdev; 3538 bio->bi_bdev = device->bdev;
@@ -3558,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
3558 struct btrfs_device *dev; 3557 struct btrfs_device *dev;
3559 int errors_send = 0; 3558 int errors_send = 0;
3560 int errors_wait = 0; 3559 int errors_wait = 0;
3561 int ret; 3560 blk_status_t ret;
3562 3561
3563 /* send down all the barriers */ 3562 /* send down all the barriers */
3564 head = &info->fs_devices->devices; 3563 head = &info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb85b76..c581927555f3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
118int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 118int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
119u32 btrfs_csum_data(const char *data, u32 seed, size_t len); 119u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
120void btrfs_csum_final(u32 crc, u8 *result); 120void btrfs_csum_final(u32 crc, u8 *result);
121int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 121blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
122 enum btrfs_wq_endio_type metadata); 122 enum btrfs_wq_endio_type metadata);
123int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 123blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
124 struct bio *bio, int mirror_num, 124 struct inode *inode, struct bio *bio, int mirror_num,
125 unsigned long bio_flags, u64 bio_offset, 125 unsigned long bio_flags, u64 bio_offset,
126 extent_submit_bio_hook_t *submit_bio_start, 126 extent_submit_bio_hook_t *submit_bio_start,
127 extent_submit_bio_hook_t *submit_bio_done); 127 extent_submit_bio_hook_t *submit_bio_done);
128unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); 128unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
129int btrfs_write_tree_block(struct extent_buffer *buf); 129int btrfs_write_tree_block(struct extent_buffer *buf);
130int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 130int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d3619e010005..d1cd60140817 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
174 goto free_state_cache; 174 goto free_state_cache;
175 175
176 btrfs_bioset = bioset_create(BIO_POOL_SIZE, 176 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
177 offsetof(struct btrfs_io_bio, bio)); 177 offsetof(struct btrfs_io_bio, bio),
178 BIOSET_NEED_BVECS);
178 if (!btrfs_bioset) 179 if (!btrfs_bioset)
179 goto free_buffer_cache; 180 goto free_buffer_cache;
180 181
@@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2399 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2400 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2400 struct bio *bio; 2401 struct bio *bio;
2401 int read_mode = 0; 2402 int read_mode = 0;
2403 blk_status_t status;
2402 int ret; 2404 int ret;
2403 2405
2404 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); 2406 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2431 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", 2433 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2432 read_mode, failrec->this_mirror, failrec->in_validation); 2434 read_mode, failrec->this_mirror, failrec->in_validation);
2433 2435
2434 ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, 2436 status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
2435 failrec->bio_flags, 0); 2437 failrec->bio_flags, 0);
2436 if (ret) { 2438 if (status) {
2437 free_io_failure(BTRFS_I(inode), failrec); 2439 free_io_failure(BTRFS_I(inode), failrec);
2438 bio_put(bio); 2440 bio_put(bio);
2441 ret = blk_status_to_errno(status);
2439 } 2442 }
2440 2443
2441 return ret; 2444 return ret;
@@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2474 */ 2477 */
2475static void end_bio_extent_writepage(struct bio *bio) 2478static void end_bio_extent_writepage(struct bio *bio)
2476{ 2479{
2480 int error = blk_status_to_errno(bio->bi_status);
2477 struct bio_vec *bvec; 2481 struct bio_vec *bvec;
2478 u64 start; 2482 u64 start;
2479 u64 end; 2483 u64 end;
@@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
2503 start = page_offset(page); 2507 start = page_offset(page);
2504 end = start + bvec->bv_offset + bvec->bv_len - 1; 2508 end = start + bvec->bv_offset + bvec->bv_len - 1;
2505 2509
2506 end_extent_writepage(page, bio->bi_error, start, end); 2510 end_extent_writepage(page, error, start, end);
2507 end_page_writeback(page); 2511 end_page_writeback(page);
2508 } 2512 }
2509 2513
@@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2536static void end_bio_extent_readpage(struct bio *bio) 2540static void end_bio_extent_readpage(struct bio *bio)
2537{ 2541{
2538 struct bio_vec *bvec; 2542 struct bio_vec *bvec;
2539 int uptodate = !bio->bi_error; 2543 int uptodate = !bio->bi_status;
2540 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 2544 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2541 struct extent_io_tree *tree; 2545 struct extent_io_tree *tree;
2542 u64 offset = 0; 2546 u64 offset = 0;
@@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
2556 2560
2557 btrfs_debug(fs_info, 2561 btrfs_debug(fs_info,
2558 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", 2562 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2559 (u64)bio->bi_iter.bi_sector, bio->bi_error, 2563 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2560 io_bio->mirror_num); 2564 io_bio->mirror_num);
2561 tree = &BTRFS_I(inode)->io_tree; 2565 tree = &BTRFS_I(inode)->io_tree;
2562 2566
@@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
2615 ret = bio_readpage_error(bio, offset, page, 2619 ret = bio_readpage_error(bio, offset, page,
2616 start, end, mirror); 2620 start, end, mirror);
2617 if (ret == 0) { 2621 if (ret == 0) {
2618 uptodate = !bio->bi_error; 2622 uptodate = !bio->bi_status;
2619 offset += len; 2623 offset += len;
2620 continue; 2624 continue;
2621 } 2625 }
@@ -2673,7 +2677,7 @@ readpage_ok:
2673 endio_readpage_release_extent(tree, extent_start, extent_len, 2677 endio_readpage_release_extent(tree, extent_start, extent_len,
2674 uptodate); 2678 uptodate);
2675 if (io_bio->end_io) 2679 if (io_bio->end_io)
2676 io_bio->end_io(io_bio, bio->bi_error); 2680 io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
2677 bio_put(bio); 2681 bio_put(bio);
2678} 2682}
2679 2683
@@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2743static int __must_check submit_one_bio(struct bio *bio, int mirror_num, 2747static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2744 unsigned long bio_flags) 2748 unsigned long bio_flags)
2745{ 2749{
2746 int ret = 0; 2750 blk_status_t ret = 0;
2747 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2751 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2748 struct page *page = bvec->bv_page; 2752 struct page *page = bvec->bv_page;
2749 struct extent_io_tree *tree = bio->bi_private; 2753 struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2761 btrfsic_submit_bio(bio); 2765 btrfsic_submit_bio(bio);
2762 2766
2763 bio_put(bio); 2767 bio_put(bio);
2764 return ret; 2768 return blk_status_to_errno(ret);
2765} 2769}
2766 2770
2767static int merge_bio(struct extent_io_tree *tree, struct page *page, 2771static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
2826 bio_add_page(bio, page, page_size, offset); 2830 bio_add_page(bio, page, page_size, offset);
2827 bio->bi_end_io = end_io_func; 2831 bio->bi_end_io = end_io_func;
2828 bio->bi_private = tree; 2832 bio->bi_private = tree;
2833 bio->bi_write_hint = page->mapping->host->i_write_hint;
2829 bio_set_op_attrs(bio, op, op_flags); 2834 bio_set_op_attrs(bio, op, op_flags);
2830 if (wbc) { 2835 if (wbc) {
2831 wbc_init_bio(wbc, bio); 2836 wbc_init_bio(wbc, bio);
@@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
3707 BUG_ON(!eb); 3712 BUG_ON(!eb);
3708 done = atomic_dec_and_test(&eb->io_pages); 3713 done = atomic_dec_and_test(&eb->io_pages);
3709 3714
3710 if (bio->bi_error || 3715 if (bio->bi_status ||
3711 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { 3716 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3712 ClearPageUptodate(page); 3717 ClearPageUptodate(page);
3713 set_btree_ioerr(page); 3718 set_btree_ioerr(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f0ede3..487ca0207cb6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,9 @@ struct btrfs_inode;
92struct btrfs_io_bio; 92struct btrfs_io_bio;
93struct io_failure_record; 93struct io_failure_record;
94 94
95typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio, 95typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
96 int mirror_num, unsigned long bio_flags, 96 struct bio *bio, int mirror_num, unsigned long bio_flags,
97 u64 bio_offset); 97 u64 bio_offset);
98struct extent_io_ops { 98struct extent_io_ops {
99 /* 99 /*
100 * The following callbacks must be allways defined, the function 100 * The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..5b1c7090e546 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
160 kfree(bio->csum_allocated); 160 kfree(bio->csum_allocated);
161} 161}
162 162
163static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, 163static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
164 u64 logical_offset, u32 *dst, int dio) 164 u64 logical_offset, u32 *dst, int dio)
165{ 165{
166 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 166 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
182 182
183 path = btrfs_alloc_path(); 183 path = btrfs_alloc_path();
184 if (!path) 184 if (!path)
185 return -ENOMEM; 185 return BLK_STS_RESOURCE;
186 186
187 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; 187 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
188 if (!dst) { 188 if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
191 csum_size, GFP_NOFS); 191 csum_size, GFP_NOFS);
192 if (!btrfs_bio->csum_allocated) { 192 if (!btrfs_bio->csum_allocated) {
193 btrfs_free_path(path); 193 btrfs_free_path(path);
194 return -ENOMEM; 194 return BLK_STS_RESOURCE;
195 } 195 }
196 btrfs_bio->csum = btrfs_bio->csum_allocated; 196 btrfs_bio->csum = btrfs_bio->csum_allocated;
197 btrfs_bio->end_io = btrfs_io_bio_endio_readpage; 197 btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
303 return 0; 303 return 0;
304} 304}
305 305
306int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst) 306blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
307{ 307{
308 return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0); 308 return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
309} 309}
310 310
311int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset) 311blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
312{ 312{
313 return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1); 313 return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
314} 314}
@@ -433,7 +433,7 @@ fail:
433 return ret; 433 return ret;
434} 434}
435 435
436int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, 436blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
437 u64 file_start, int contig) 437 u64 file_start, int contig)
438{ 438{
439 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 439 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
452 sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), 452 sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
453 GFP_NOFS); 453 GFP_NOFS);
454 if (!sums) 454 if (!sums)
455 return -ENOMEM; 455 return BLK_STS_RESOURCE;
456 456
457 sums->len = bio->bi_iter.bi_size; 457 sums->len = bio->bi_iter.bi_size;
458 INIT_LIST_HEAD(&sums->list); 458 INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da1096eb1a40..59e2dccdf75b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1875 ssize_t num_written = 0; 1875 ssize_t num_written = 0;
1876 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1876 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1877 ssize_t err; 1877 ssize_t err;
1878 loff_t pos; 1878 loff_t pos = iocb->ki_pos;
1879 size_t count; 1879 size_t count = iov_iter_count(from);
1880 loff_t oldsize; 1880 loff_t oldsize;
1881 int clean_page = 0; 1881 int clean_page = 0;
1882 1882
1883 inode_lock(inode); 1883 if ((iocb->ki_flags & IOCB_NOWAIT) &&
1884 (iocb->ki_flags & IOCB_DIRECT)) {
1885 /* Don't sleep on inode rwsem */
1886 if (!inode_trylock(inode))
1887 return -EAGAIN;
1888 /*
1889 * We will allocate space in case nodatacow is not set,
1890 * so bail
1891 */
1892 if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1893 BTRFS_INODE_PREALLOC)) ||
1894 check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
1895 inode_unlock(inode);
1896 return -EAGAIN;
1897 }
1898 } else
1899 inode_lock(inode);
1900
1884 err = generic_write_checks(iocb, from); 1901 err = generic_write_checks(iocb, from);
1885 if (err <= 0) { 1902 if (err <= 0) {
1886 inode_unlock(inode); 1903 inode_unlock(inode);
@@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1914 */ 1931 */
1915 update_time_for_write(inode); 1932 update_time_for_write(inode);
1916 1933
1917 pos = iocb->ki_pos;
1918 count = iov_iter_count(from);
1919 start_pos = round_down(pos, fs_info->sectorsize); 1934 start_pos = round_down(pos, fs_info->sectorsize);
1920 oldsize = i_size_read(inode); 1935 oldsize = i_size_read(inode);
1921 if (start_pos > oldsize) { 1936 if (start_pos > oldsize) {
@@ -3071,13 +3086,19 @@ out:
3071 return offset; 3086 return offset;
3072} 3087}
3073 3088
3089static int btrfs_file_open(struct inode *inode, struct file *filp)
3090{
3091 filp->f_mode |= FMODE_AIO_NOWAIT;
3092 return generic_file_open(inode, filp);
3093}
3094
3074const struct file_operations btrfs_file_operations = { 3095const struct file_operations btrfs_file_operations = {
3075 .llseek = btrfs_file_llseek, 3096 .llseek = btrfs_file_llseek,
3076 .read_iter = generic_file_read_iter, 3097 .read_iter = generic_file_read_iter,
3077 .splice_read = generic_file_splice_read, 3098 .splice_read = generic_file_splice_read,
3078 .write_iter = btrfs_file_write_iter, 3099 .write_iter = btrfs_file_write_iter,
3079 .mmap = btrfs_file_mmap, 3100 .mmap = btrfs_file_mmap,
3080 .open = generic_file_open, 3101 .open = btrfs_file_open,
3081 .release = btrfs_release_file, 3102 .release = btrfs_release_file,
3082 .fsync = btrfs_sync_file, 3103 .fsync = btrfs_sync_file,
3083 .fallocate = btrfs_fallocate, 3104 .fallocate = btrfs_fallocate,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ef3c98c527c1..556c93060606 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -842,13 +842,12 @@ retry:
842 NULL, EXTENT_LOCKED | EXTENT_DELALLOC, 842 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
843 PAGE_UNLOCK | PAGE_CLEAR_DIRTY | 843 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
844 PAGE_SET_WRITEBACK); 844 PAGE_SET_WRITEBACK);
845 ret = btrfs_submit_compressed_write(inode, 845 if (btrfs_submit_compressed_write(inode,
846 async_extent->start, 846 async_extent->start,
847 async_extent->ram_size, 847 async_extent->ram_size,
848 ins.objectid, 848 ins.objectid,
849 ins.offset, async_extent->pages, 849 ins.offset, async_extent->pages,
850 async_extent->nr_pages); 850 async_extent->nr_pages)) {
851 if (ret) {
852 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 851 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
853 struct page *p = async_extent->pages[0]; 852 struct page *p = async_extent->pages[0];
854 const u64 start = async_extent->start; 853 const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1901 * At IO completion time the cums attached on the ordered extent record 1900 * At IO completion time the cums attached on the ordered extent record
1902 * are inserted into the btree 1901 * are inserted into the btree
1903 */ 1902 */
1904static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio, 1903static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
1905 int mirror_num, unsigned long bio_flags, 1904 struct bio *bio, int mirror_num, unsigned long bio_flags,
1906 u64 bio_offset) 1905 u64 bio_offset)
1907{ 1906{
1908 int ret = 0; 1907 blk_status_t ret = 0;
1909 1908
1910 ret = btrfs_csum_one_bio(inode, bio, 0, 0); 1909 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
1911 BUG_ON(ret); /* -ENOMEM */ 1910 BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
1920 * At IO completion time the cums attached on the ordered extent record 1919 * At IO completion time the cums attached on the ordered extent record
1921 * are inserted into the btree 1920 * are inserted into the btree
1922 */ 1921 */
1923static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio, 1922static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
1924 int mirror_num, unsigned long bio_flags, 1923 struct bio *bio, int mirror_num, unsigned long bio_flags,
1925 u64 bio_offset) 1924 u64 bio_offset)
1926{ 1925{
1927 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1926 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1928 int ret; 1927 blk_status_t ret;
1929 1928
1930 ret = btrfs_map_bio(fs_info, bio, mirror_num, 1); 1929 ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
1931 if (ret) { 1930 if (ret) {
1932 bio->bi_error = ret; 1931 bio->bi_status = ret;
1933 bio_endio(bio); 1932 bio_endio(bio);
1934 } 1933 }
1935 return ret; 1934 return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
1939 * extent_io.c submission hook. This does the right thing for csum calculation 1938 * extent_io.c submission hook. This does the right thing for csum calculation
1940 * on write, or reading the csums from the tree before a read 1939 * on write, or reading the csums from the tree before a read
1941 */ 1940 */
1942static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, 1941static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
1943 int mirror_num, unsigned long bio_flags, 1942 int mirror_num, unsigned long bio_flags,
1944 u64 bio_offset) 1943 u64 bio_offset)
1945{ 1944{
1946 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1945 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1947 struct btrfs_root *root = BTRFS_I(inode)->root; 1946 struct btrfs_root *root = BTRFS_I(inode)->root;
1948 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; 1947 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1949 int ret = 0; 1948 blk_status_t ret = 0;
1950 int skip_sum; 1949 int skip_sum;
1951 int async = !atomic_read(&BTRFS_I(inode)->sync_writers); 1950 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1952 1951
@@ -1991,8 +1990,8 @@ mapit:
1991 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); 1990 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
1992 1991
1993out: 1992out:
1994 if (ret < 0) { 1993 if (ret) {
1995 bio->bi_error = ret; 1994 bio->bi_status = ret;
1996 bio_endio(bio); 1995 bio_endio(bio);
1997 } 1996 }
1998 return ret; 1997 return ret;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
8037 struct bio_vec *bvec; 8036 struct bio_vec *bvec;
8038 int i; 8037 int i;
8039 8038
8040 if (bio->bi_error) 8039 if (bio->bi_status)
8041 goto end; 8040 goto end;
8042 8041
8043 ASSERT(bio->bi_vcnt == 1); 8042 ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
8116 int ret; 8115 int ret;
8117 int i; 8116 int i;
8118 8117
8119 if (bio->bi_error) 8118 if (bio->bi_status)
8120 goto end; 8119 goto end;
8121 8120
8122 uptodate = 1; 8121 uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
8141 bio_put(bio); 8140 bio_put(bio);
8142} 8141}
8143 8142
8144static int __btrfs_subio_endio_read(struct inode *inode, 8143static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
8145 struct btrfs_io_bio *io_bio, int err) 8144 struct btrfs_io_bio *io_bio, blk_status_t err)
8146{ 8145{
8147 struct btrfs_fs_info *fs_info; 8146 struct btrfs_fs_info *fs_info;
8148 struct bio_vec *bvec; 8147 struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
8184 io_bio->mirror_num, 8183 io_bio->mirror_num,
8185 btrfs_retry_endio, &done); 8184 btrfs_retry_endio, &done);
8186 if (ret) { 8185 if (ret) {
8187 err = ret; 8186 err = errno_to_blk_status(ret);
8188 goto next; 8187 goto next;
8189 } 8188 }
8190 8189
@@ -8211,8 +8210,8 @@ next:
8211 return err; 8210 return err;
8212} 8211}
8213 8212
8214static int btrfs_subio_endio_read(struct inode *inode, 8213static blk_status_t btrfs_subio_endio_read(struct inode *inode,
8215 struct btrfs_io_bio *io_bio, int err) 8214 struct btrfs_io_bio *io_bio, blk_status_t err)
8216{ 8215{
8217 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 8216 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8218 8217
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
8232 struct inode *inode = dip->inode; 8231 struct inode *inode = dip->inode;
8233 struct bio *dio_bio; 8232 struct bio *dio_bio;
8234 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8233 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8235 int err = bio->bi_error; 8234 blk_status_t err = bio->bi_status;
8236 8235
8237 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) 8236 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8238 err = btrfs_subio_endio_read(inode, io_bio, err); 8237 err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
8243 8242
8244 kfree(dip); 8243 kfree(dip);
8245 8244
8246 dio_bio->bi_error = bio->bi_error; 8245 dio_bio->bi_status = bio->bi_status;
8247 dio_end_io(dio_bio, bio->bi_error); 8246 dio_end_io(dio_bio);
8248 8247
8249 if (io_bio->end_io) 8248 if (io_bio->end_io)
8250 io_bio->end_io(io_bio, err); 8249 io_bio->end_io(io_bio, blk_status_to_errno(err));
8251 bio_put(bio); 8250 bio_put(bio);
8252} 8251}
8253 8252
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
8299 struct bio *dio_bio = dip->dio_bio; 8298 struct bio *dio_bio = dip->dio_bio;
8300 8299
8301 __endio_write_update_ordered(dip->inode, dip->logical_offset, 8300 __endio_write_update_ordered(dip->inode, dip->logical_offset,
8302 dip->bytes, !bio->bi_error); 8301 dip->bytes, !bio->bi_status);
8303 8302
8304 kfree(dip); 8303 kfree(dip);
8305 8304
8306 dio_bio->bi_error = bio->bi_error; 8305 dio_bio->bi_status = bio->bi_status;
8307 dio_end_io(dio_bio, bio->bi_error); 8306 dio_end_io(dio_bio);
8308 bio_put(bio); 8307 bio_put(bio);
8309} 8308}
8310 8309
8311static int __btrfs_submit_bio_start_direct_io(struct inode *inode, 8310static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
8312 struct bio *bio, int mirror_num, 8311 struct bio *bio, int mirror_num,
8313 unsigned long bio_flags, u64 offset) 8312 unsigned long bio_flags, u64 offset)
8314{ 8313{
8315 int ret; 8314 blk_status_t ret;
8316 ret = btrfs_csum_one_bio(inode, bio, offset, 1); 8315 ret = btrfs_csum_one_bio(inode, bio, offset, 1);
8317 BUG_ON(ret); /* -ENOMEM */ 8316 BUG_ON(ret); /* -ENOMEM */
8318 return 0; 8317 return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
8321static void btrfs_end_dio_bio(struct bio *bio) 8320static void btrfs_end_dio_bio(struct bio *bio)
8322{ 8321{
8323 struct btrfs_dio_private *dip = bio->bi_private; 8322 struct btrfs_dio_private *dip = bio->bi_private;
8324 int err = bio->bi_error; 8323 blk_status_t err = bio->bi_status;
8325 8324
8326 if (err) 8325 if (err)
8327 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, 8326 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
8351 if (dip->errors) { 8350 if (dip->errors) {
8352 bio_io_error(dip->orig_bio); 8351 bio_io_error(dip->orig_bio);
8353 } else { 8352 } else {
8354 dip->dio_bio->bi_error = 0; 8353 dip->dio_bio->bi_status = 0;
8355 bio_endio(dip->orig_bio); 8354 bio_endio(dip->orig_bio);
8356 } 8355 }
8357out: 8356out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
8368 return bio; 8367 return bio;
8369} 8368}
8370 8369
8371static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode, 8370static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
8372 struct btrfs_dio_private *dip, 8371 struct btrfs_dio_private *dip,
8373 struct bio *bio, 8372 struct bio *bio,
8374 u64 file_offset) 8373 u64 file_offset)
8375{ 8374{
8376 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8375 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8377 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); 8376 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8378 int ret; 8377 blk_status_t ret;
8379 8378
8380 /* 8379 /*
8381 * We load all the csum data we need when we submit 8380 * We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
8406 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 8405 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8407 struct btrfs_dio_private *dip = bio->bi_private; 8406 struct btrfs_dio_private *dip = bio->bi_private;
8408 bool write = bio_op(bio) == REQ_OP_WRITE; 8407 bool write = bio_op(bio) == REQ_OP_WRITE;
8409 int ret; 8408 blk_status_t ret;
8410 8409
8411 if (async_submit) 8410 if (async_submit)
8412 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); 8411 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
8649 * callbacks - they require an allocated dip and a clone of dio_bio. 8648 * callbacks - they require an allocated dip and a clone of dio_bio.
8650 */ 8649 */
8651 if (io_bio && dip) { 8650 if (io_bio && dip) {
8652 io_bio->bi_error = -EIO; 8651 io_bio->bi_status = BLK_STS_IOERR;
8653 bio_endio(io_bio); 8652 bio_endio(io_bio);
8654 /* 8653 /*
8655 * The end io callbacks free our dip, do the final put on io_bio 8654 * The end io callbacks free our dip, do the final put on io_bio
@@ -8668,12 +8667,12 @@ free_ordered:
8668 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, 8667 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8669 file_offset + dio_bio->bi_iter.bi_size - 1); 8668 file_offset + dio_bio->bi_iter.bi_size - 1);
8670 8669
8671 dio_bio->bi_error = -EIO; 8670 dio_bio->bi_status = BLK_STS_IOERR;
8672 /* 8671 /*
8673 * Releases and cleans up our dio_bio, no need to bio_put() 8672 * Releases and cleans up our dio_bio, no need to bio_put()
8674 * nor bio_endio()/bio_io_error() against dio_bio. 8673 * nor bio_endio()/bio_io_error() against dio_bio.
8675 */ 8674 */
8676 dio_end_io(dio_bio, ret); 8675 dio_end_io(dio_bio);
8677 } 8676 }
8678 if (io_bio) 8677 if (io_bio)
8679 bio_put(io_bio); 8678 bio_put(io_bio);
@@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8755 dio_data.overwrite = 1; 8754 dio_data.overwrite = 1;
8756 inode_unlock(inode); 8755 inode_unlock(inode);
8757 relock = true; 8756 relock = true;
8757 } else if (iocb->ki_flags & IOCB_NOWAIT) {
8758 ret = -EAGAIN;
8759 goto out;
8758 } 8760 }
8759 ret = btrfs_delalloc_reserve_space(inode, offset, count); 8761 ret = btrfs_delalloc_reserve_space(inode, offset, count);
8760 if (ret) 8762 if (ret)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb76325..f3d30d9ea8f9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
871 * this frees the rbio and runs through all the bios in the 871 * this frees the rbio and runs through all the bios in the
872 * bio_list and calls end_io on them 872 * bio_list and calls end_io on them
873 */ 873 */
874static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err) 874static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
875{ 875{
876 struct bio *cur = bio_list_get(&rbio->bio_list); 876 struct bio *cur = bio_list_get(&rbio->bio_list);
877 struct bio *next; 877 struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
884 while (cur) { 884 while (cur) {
885 next = cur->bi_next; 885 next = cur->bi_next;
886 cur->bi_next = NULL; 886 cur->bi_next = NULL;
887 cur->bi_error = err; 887 cur->bi_status = err;
888 bio_endio(cur); 888 bio_endio(cur);
889 cur = next; 889 cur = next;
890 } 890 }
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
897static void raid_write_end_io(struct bio *bio) 897static void raid_write_end_io(struct bio *bio)
898{ 898{
899 struct btrfs_raid_bio *rbio = bio->bi_private; 899 struct btrfs_raid_bio *rbio = bio->bi_private;
900 int err = bio->bi_error; 900 blk_status_t err = bio->bi_status;
901 int max_errors; 901 int max_errors;
902 902
903 if (err) 903 if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
914 max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? 914 max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
915 0 : rbio->bbio->max_errors; 915 0 : rbio->bbio->max_errors;
916 if (atomic_read(&rbio->error) > max_errors) 916 if (atomic_read(&rbio->error) > max_errors)
917 err = -EIO; 917 err = BLK_STS_IOERR;
918 918
919 rbio_orig_end_io(rbio, err); 919 rbio_orig_end_io(rbio, err);
920} 920}
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1092 * devices or if they are not contiguous 1092 * devices or if they are not contiguous
1093 */ 1093 */
1094 if (last_end == disk_start && stripe->dev->bdev && 1094 if (last_end == disk_start && stripe->dev->bdev &&
1095 !last->bi_error && 1095 !last->bi_status &&
1096 last->bi_bdev == stripe->dev->bdev) { 1096 last->bi_bdev == stripe->dev->bdev) {
1097 ret = bio_add_page(last, page, PAGE_SIZE, 0); 1097 ret = bio_add_page(last, page, PAGE_SIZE, 0);
1098 if (ret == PAGE_SIZE) 1098 if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
1448{ 1448{
1449 struct btrfs_raid_bio *rbio = bio->bi_private; 1449 struct btrfs_raid_bio *rbio = bio->bi_private;
1450 1450
1451 if (bio->bi_error) 1451 if (bio->bi_status)
1452 fail_bio_stripe(rbio, bio); 1452 fail_bio_stripe(rbio, bio);
1453 else 1453 else
1454 set_bio_pages_uptodate(bio); 1454 set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
1991 * we only read stripe pages off the disk, set them 1991 * we only read stripe pages off the disk, set them
1992 * up to date if there were no errors 1992 * up to date if there were no errors
1993 */ 1993 */
1994 if (bio->bi_error) 1994 if (bio->bi_status)
1995 fail_bio_stripe(rbio, bio); 1995 fail_bio_stripe(rbio, bio);
1996 else 1996 else
1997 set_bio_pages_uptodate(bio); 1997 set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
2530{ 2530{
2531 struct btrfs_raid_bio *rbio = bio->bi_private; 2531 struct btrfs_raid_bio *rbio = bio->bi_private;
2532 2532
2533 if (bio->bi_error) 2533 if (bio->bi_status)
2534 fail_bio_stripe(rbio, bio); 2534 fail_bio_stripe(rbio, bio);
2535 else 2535 else
2536 set_bio_pages_uptodate(bio); 2536 set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb2403d..ba5595d19de1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -95,7 +95,7 @@ struct scrub_bio {
95 struct scrub_ctx *sctx; 95 struct scrub_ctx *sctx;
96 struct btrfs_device *dev; 96 struct btrfs_device *dev;
97 struct bio *bio; 97 struct bio *bio;
98 int err; 98 blk_status_t status;
99 u64 logical; 99 u64 logical;
100 u64 physical; 100 u64 physical;
101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO 101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
1668 1668
1669struct scrub_bio_ret { 1669struct scrub_bio_ret {
1670 struct completion event; 1670 struct completion event;
1671 int error; 1671 blk_status_t status;
1672}; 1672};
1673 1673
1674static void scrub_bio_wait_endio(struct bio *bio) 1674static void scrub_bio_wait_endio(struct bio *bio)
1675{ 1675{
1676 struct scrub_bio_ret *ret = bio->bi_private; 1676 struct scrub_bio_ret *ret = bio->bi_private;
1677 1677
1678 ret->error = bio->bi_error; 1678 ret->status = bio->bi_status;
1679 complete(&ret->event); 1679 complete(&ret->event);
1680} 1680}
1681 1681
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1693 int ret; 1693 int ret;
1694 1694
1695 init_completion(&done.event); 1695 init_completion(&done.event);
1696 done.error = 0; 1696 done.status = 0;
1697 bio->bi_iter.bi_sector = page->logical >> 9; 1697 bio->bi_iter.bi_sector = page->logical >> 9;
1698 bio->bi_private = &done; 1698 bio->bi_private = &done;
1699 bio->bi_end_io = scrub_bio_wait_endio; 1699 bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1705 return ret; 1705 return ret;
1706 1706
1707 wait_for_completion(&done.event); 1707 wait_for_completion(&done.event);
1708 if (done.error) 1708 if (done.status)
1709 return -EIO; 1709 return -EIO;
1710 1710
1711 return 0; 1711 return 0;
@@ -1937,7 +1937,7 @@ again:
1937 bio->bi_bdev = sbio->dev->bdev; 1937 bio->bi_bdev = sbio->dev->bdev;
1938 bio->bi_iter.bi_sector = sbio->physical >> 9; 1938 bio->bi_iter.bi_sector = sbio->physical >> 9;
1939 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1939 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1940 sbio->err = 0; 1940 sbio->status = 0;
1941 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1941 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1942 spage->physical_for_dev_replace || 1942 spage->physical_for_dev_replace ||
1943 sbio->logical + sbio->page_count * PAGE_SIZE != 1943 sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
1992 struct scrub_bio *sbio = bio->bi_private; 1992 struct scrub_bio *sbio = bio->bi_private;
1993 struct btrfs_fs_info *fs_info = sbio->dev->fs_info; 1993 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1994 1994
1995 sbio->err = bio->bi_error; 1995 sbio->status = bio->bi_status;
1996 sbio->bio = bio; 1996 sbio->bio = bio;
1997 1997
1998 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, 1998 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
2007 int i; 2007 int i;
2008 2008
2009 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); 2009 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
2010 if (sbio->err) { 2010 if (sbio->status) {
2011 struct btrfs_dev_replace *dev_replace = 2011 struct btrfs_dev_replace *dev_replace =
2012 &sbio->sctx->fs_info->dev_replace; 2012 &sbio->sctx->fs_info->dev_replace;
2013 2013
@@ -2341,7 +2341,7 @@ again:
2341 bio->bi_bdev = sbio->dev->bdev; 2341 bio->bi_bdev = sbio->dev->bdev;
2342 bio->bi_iter.bi_sector = sbio->physical >> 9; 2342 bio->bi_iter.bi_sector = sbio->physical >> 9;
2343 bio_set_op_attrs(bio, REQ_OP_READ, 0); 2343 bio_set_op_attrs(bio, REQ_OP_READ, 0);
2344 sbio->err = 0; 2344 sbio->status = 0;
2345 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 2345 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2346 spage->physical || 2346 spage->physical ||
2347 sbio->logical + sbio->page_count * PAGE_SIZE != 2347 sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
2377 struct scrub_block *sblock = bio->bi_private; 2377 struct scrub_block *sblock = bio->bi_private;
2378 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info; 2378 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2379 2379
2380 if (bio->bi_error) 2380 if (bio->bi_status)
2381 sblock->no_io_error_seen = 0; 2381 sblock->no_io_error_seen = 0;
2382 2382
2383 bio_put(bio); 2383 bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
2588 struct scrub_bio *sbio = bio->bi_private; 2588 struct scrub_bio *sbio = bio->bi_private;
2589 struct btrfs_fs_info *fs_info = sbio->dev->fs_info; 2589 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2590 2590
2591 sbio->err = bio->bi_error; 2591 sbio->status = bio->bi_status;
2592 sbio->bio = bio; 2592 sbio->bio = bio;
2593 2593
2594 btrfs_queue_work(fs_info->scrub_workers, &sbio->work); 2594 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
2601 int i; 2601 int i;
2602 2602
2603 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); 2603 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2604 if (sbio->err) { 2604 if (sbio->status) {
2605 for (i = 0; i < sbio->page_count; i++) { 2605 for (i = 0; i < sbio->page_count; i++) {
2606 struct scrub_page *spage = sbio->pagev[i]; 2606 struct scrub_page *spage = sbio->pagev[i];
2607 2607
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
3004 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; 3004 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
3005 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info; 3005 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
3006 3006
3007 if (bio->bi_error) 3007 if (bio->bi_status)
3008 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, 3008 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
3009 sparity->nsectors); 3009 sparity->nsectors);
3010 3010
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67daa3bb..84a495967e0a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
6042 struct btrfs_bio *bbio = bio->bi_private; 6042 struct btrfs_bio *bbio = bio->bi_private;
6043 int is_orig_bio = 0; 6043 int is_orig_bio = 0;
6044 6044
6045 if (bio->bi_error) { 6045 if (bio->bi_status) {
6046 atomic_inc(&bbio->error); 6046 atomic_inc(&bbio->error);
6047 if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) { 6047 if (bio->bi_status == BLK_STS_IOERR ||
6048 bio->bi_status == BLK_STS_TARGET) {
6048 unsigned int stripe_index = 6049 unsigned int stripe_index =
6049 btrfs_io_bio(bio)->stripe_index; 6050 btrfs_io_bio(bio)->stripe_index;
6050 struct btrfs_device *dev; 6051 struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
6082 * beyond the tolerance of the btrfs bio 6083 * beyond the tolerance of the btrfs bio
6083 */ 6084 */
6084 if (atomic_read(&bbio->error) > bbio->max_errors) { 6085 if (atomic_read(&bbio->error) > bbio->max_errors) {
6085 bio->bi_error = -EIO; 6086 bio->bi_status = BLK_STS_IOERR;
6086 } else { 6087 } else {
6087 /* 6088 /*
6088 * this bio is actually up to date, we didn't 6089 * this bio is actually up to date, we didn't
6089 * go over the max number of errors 6090 * go over the max number of errors
6090 */ 6091 */
6091 bio->bi_error = 0; 6092 bio->bi_status = 0;
6092 } 6093 }
6093 6094
6094 btrfs_end_bbio(bbio, bio); 6095 btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6199 6200
6200 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 6201 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6201 bio->bi_iter.bi_sector = logical >> 9; 6202 bio->bi_iter.bi_sector = logical >> 9;
6202 bio->bi_error = -EIO; 6203 bio->bi_status = BLK_STS_IOERR;
6203 btrfs_end_bbio(bbio, bio); 6204 btrfs_end_bbio(bbio, bio);
6204 } 6205 }
6205} 6206}
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..5c2cba8d2387 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
49 49
50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, 51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
52 struct writeback_control *wbc); 52 enum rw_hint hint, struct writeback_control *wbc);
53 53
54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
55 55
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
1829 do { 1829 do {
1830 struct buffer_head *next = bh->b_this_page; 1830 struct buffer_head *next = bh->b_this_page;
1831 if (buffer_async_write(bh)) { 1831 if (buffer_async_write(bh)) {
1832 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); 1832 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1833 inode->i_write_hint, wbc);
1833 nr_underway++; 1834 nr_underway++;
1834 } 1835 }
1835 bh = next; 1836 bh = next;
@@ -1883,7 +1884,8 @@ recover:
1883 struct buffer_head *next = bh->b_this_page; 1884 struct buffer_head *next = bh->b_this_page;
1884 if (buffer_async_write(bh)) { 1885 if (buffer_async_write(bh)) {
1885 clear_buffer_dirty(bh); 1886 clear_buffer_dirty(bh);
1886 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); 1887 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1888 inode->i_write_hint, wbc);
1887 nr_underway++; 1889 nr_underway++;
1888 } 1890 }
1889 bh = next; 1891 bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
3038 if (unlikely(bio_flagged(bio, BIO_QUIET))) 3040 if (unlikely(bio_flagged(bio, BIO_QUIET)))
3039 set_bit(BH_Quiet, &bh->b_state); 3041 set_bit(BH_Quiet, &bh->b_state);
3040 3042
3041 bh->b_end_io(bh, !bio->bi_error); 3043 bh->b_end_io(bh, !bio->bi_status);
3042 bio_put(bio); 3044 bio_put(bio);
3043} 3045}
3044 3046
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
3091} 3093}
3092 3094
3093static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, 3095static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3094 struct writeback_control *wbc) 3096 enum rw_hint write_hint, struct writeback_control *wbc)
3095{ 3097{
3096 struct bio *bio; 3098 struct bio *bio;
3097 3099
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3120 3122
3121 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 3123 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3122 bio->bi_bdev = bh->b_bdev; 3124 bio->bi_bdev = bh->b_bdev;
3125 bio->bi_write_hint = write_hint;
3123 3126
3124 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 3127 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3125 BUG_ON(bio->bi_iter.bi_size != bh->b_size); 3128 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3142 3145
3143int submit_bh(int op, int op_flags, struct buffer_head *bh) 3146int submit_bh(int op, int op_flags, struct buffer_head *bh)
3144{ 3147{
3145 return submit_bh_wbc(op, op_flags, bh, NULL); 3148 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3146} 3149}
3147EXPORT_SYMBOL(submit_bh); 3150EXPORT_SYMBOL(submit_bh);
3148 3151
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
129 goto errout; 129 goto errout;
130 } 130 }
131 err = submit_bio_wait(bio); 131 err = submit_bio_wait(bio);
132 if ((err == 0) && bio->bi_error) 132 if (err == 0 && bio->bi_status)
133 err = -EIO; 133 err = -EIO;
134 bio_put(bio); 134 bio_put(bio);
135 if (err) 135 if (err)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..08cf27811e5a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
294 dio_complete(dio, 0, true); 294 dio_complete(dio, 0, true);
295} 295}
296 296
297static int dio_bio_complete(struct dio *dio, struct bio *bio); 297static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
298 298
299/* 299/*
300 * Asynchronous IO callback. 300 * Asynchronous IO callback.
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
348/** 348/**
349 * dio_end_io - handle the end io action for the given bio 349 * dio_end_io - handle the end io action for the given bio
350 * @bio: The direct io bio thats being completed 350 * @bio: The direct io bio thats being completed
351 * @error: Error if there was one
352 * 351 *
353 * This is meant to be called by any filesystem that uses their own dio_submit_t 352 * This is meant to be called by any filesystem that uses their own dio_submit_t
354 * so that the DIO specific endio actions are dealt with after the filesystem 353 * so that the DIO specific endio actions are dealt with after the filesystem
355 * has done it's completion work. 354 * has done it's completion work.
356 */ 355 */
357void dio_end_io(struct bio *bio, int error) 356void dio_end_io(struct bio *bio)
358{ 357{
359 struct dio *dio = bio->bi_private; 358 struct dio *dio = bio->bi_private;
360 359
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
386 else 385 else
387 bio->bi_end_io = dio_bio_end_io; 386 bio->bi_end_io = dio_bio_end_io;
388 387
388 bio->bi_write_hint = dio->iocb->ki_hint;
389
389 sdio->bio = bio; 390 sdio->bio = bio;
390 sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; 391 sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
391} 392}
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
474/* 475/*
475 * Process one completed BIO. No locks are held. 476 * Process one completed BIO. No locks are held.
476 */ 477 */
477static int dio_bio_complete(struct dio *dio, struct bio *bio) 478static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
478{ 479{
479 struct bio_vec *bvec; 480 struct bio_vec *bvec;
480 unsigned i; 481 unsigned i;
481 int err; 482 blk_status_t err = bio->bi_status;
482 483
483 if (bio->bi_error) 484 if (err) {
484 dio->io_error = -EIO; 485 if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
486 dio->io_error = -EAGAIN;
487 else
488 dio->io_error = -EIO;
489 }
485 490
486 if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { 491 if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
487 err = bio->bi_error;
488 bio_check_pages_dirty(bio); /* transfers ownership */ 492 bio_check_pages_dirty(bio); /* transfers ownership */
489 } else { 493 } else {
490 bio_for_each_segment_all(bvec, bio, i) { 494 bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
495 set_page_dirty_lock(page); 499 set_page_dirty_lock(page);
496 put_page(page); 500 put_page(page);
497 } 501 }
498 err = bio->bi_error;
499 bio_put(bio); 502 bio_put(bio);
500 } 503 }
501 return err; 504 return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
539 bio = dio->bio_list; 542 bio = dio->bio_list;
540 dio->bio_list = bio->bi_private; 543 dio->bio_list = bio->bi_private;
541 spin_unlock_irqrestore(&dio->bio_lock, flags); 544 spin_unlock_irqrestore(&dio->bio_lock, flags);
542 ret2 = dio_bio_complete(dio, bio); 545 ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
543 if (ret == 0) 546 if (ret == 0)
544 ret = ret2; 547 ret = ret2;
545 } 548 }
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1197 if (iov_iter_rw(iter) == WRITE) { 1200 if (iov_iter_rw(iter) == WRITE) {
1198 dio->op = REQ_OP_WRITE; 1201 dio->op = REQ_OP_WRITE;
1199 dio->op_flags = REQ_SYNC | REQ_IDLE; 1202 dio->op_flags = REQ_SYNC | REQ_IDLE;
1203 if (iocb->ki_flags & IOCB_NOWAIT)
1204 dio->op_flags |= REQ_NOWAIT;
1200 } else { 1205 } else {
1201 dio->op = REQ_OP_READ; 1206 dio->op = REQ_OP_READ;
1202 } 1207 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 02ce7e7bbdf5..58e2eeaa0bc4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
37 struct inode *inode = file_inode(iocb->ki_filp); 37 struct inode *inode = file_inode(iocb->ki_filp);
38 ssize_t ret; 38 ssize_t ret;
39 39
40 inode_lock_shared(inode); 40 if (!inode_trylock_shared(inode)) {
41 if (iocb->ki_flags & IOCB_NOWAIT)
42 return -EAGAIN;
43 inode_lock_shared(inode);
44 }
41 /* 45 /*
42 * Recheck under inode lock - at this point we are sure it cannot 46 * Recheck under inode lock - at this point we are sure it cannot
43 * change anymore 47 * change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
179 struct inode *inode = file_inode(iocb->ki_filp); 183 struct inode *inode = file_inode(iocb->ki_filp);
180 ssize_t ret; 184 ssize_t ret;
181 185
182 inode_lock(inode); 186 if (!inode_trylock(inode)) {
187 if (iocb->ki_flags & IOCB_NOWAIT)
188 return -EAGAIN;
189 inode_lock(inode);
190 }
183 ret = ext4_write_checks(iocb, from); 191 ret = ext4_write_checks(iocb, from);
184 if (ret <= 0) 192 if (ret <= 0)
185 goto out; 193 goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
216 return ext4_dax_write_iter(iocb, from); 224 return ext4_dax_write_iter(iocb, from);
217#endif 225#endif
218 226
219 inode_lock(inode); 227 if (!inode_trylock(inode)) {
228 if (iocb->ki_flags & IOCB_NOWAIT)
229 return -EAGAIN;
230 inode_lock(inode);
231 }
232
220 ret = ext4_write_checks(iocb, from); 233 ret = ext4_write_checks(iocb, from);
221 if (ret <= 0) 234 if (ret <= 0)
222 goto out; 235 goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
235 248
236 iocb->private = &overwrite; 249 iocb->private = &overwrite;
237 /* Check whether we do a DIO overwrite or not */ 250 /* Check whether we do a DIO overwrite or not */
238 if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio && 251 if (o_direct && !unaligned_aio) {
239 ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) 252 if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
240 overwrite = 1; 253 if (ext4_should_dioread_nolock(inode))
254 overwrite = 1;
255 } else if (iocb->ki_flags & IOCB_NOWAIT) {
256 ret = -EAGAIN;
257 goto out;
258 }
259 }
241 260
242 ret = __generic_file_write_iter(iocb, from); 261 ret = __generic_file_write_iter(iocb, from);
243 inode_unlock(inode); 262 inode_unlock(inode);
@@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
435 if (ret < 0) 454 if (ret < 0)
436 return ret; 455 return ret;
437 } 456 }
457
458 /* Set the flags to support nowait AIO */
459 filp->f_mode |= FMODE_AIO_NOWAIT;
460
438 return dquot_file_open(inode, filp); 461 return dquot_file_open(inode, filp);
439} 462}
440 463
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..c2fce4478cca 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
85 } 85 }
86#endif 86#endif
87 87
88 if (bio->bi_error) { 88 if (bio->bi_status) {
89 SetPageError(page); 89 SetPageError(page);
90 mapping_set_error(page->mapping, -EIO); 90 mapping_set_error(page->mapping, -EIO);
91 } 91 }
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
104 continue; 104 continue;
105 } 105 }
106 clear_buffer_async_write(bh); 106 clear_buffer_async_write(bh);
107 if (bio->bi_error) 107 if (bio->bi_status)
108 buffer_io_error(bh); 108 buffer_io_error(bh);
109 } while ((bh = bh->b_this_page) != head); 109 } while ((bh = bh->b_this_page) != head);
110 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); 110 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
303 bdevname(bio->bi_bdev, b), 303 bdevname(bio->bi_bdev, b),
304 (long long) bio->bi_iter.bi_sector, 304 (long long) bio->bi_iter.bi_sector,
305 (unsigned) bio_sectors(bio), 305 (unsigned) bio_sectors(bio),
306 bio->bi_error)) { 306 bio->bi_status)) {
307 ext4_finish_bio(bio); 307 ext4_finish_bio(bio);
308 bio_put(bio); 308 bio_put(bio);
309 return; 309 return;
310 } 310 }
311 bio->bi_end_io = NULL; 311 bio->bi_end_io = NULL;
312 312
313 if (bio->bi_error) { 313 if (bio->bi_status) {
314 struct inode *inode = io_end->inode; 314 struct inode *inode = io_end->inode;
315 315
316 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " 316 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
317 "(offset %llu size %ld starting block %llu)", 317 "(offset %llu size %ld starting block %llu)",
318 bio->bi_error, inode->i_ino, 318 bio->bi_status, inode->i_ino,
319 (unsigned long long) io_end->offset, 319 (unsigned long long) io_end->offset,
320 (long) io_end->size, 320 (long) io_end->size,
321 (unsigned long long) 321 (unsigned long long)
322 bi_sector >> (inode->i_blkbits - 9)); 322 bi_sector >> (inode->i_blkbits - 9));
323 mapping_set_error(inode->i_mapping, bio->bi_error); 323 mapping_set_error(inode->i_mapping,
324 blk_status_to_errno(bio->bi_status));
324 } 325 }
325 326
326 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 327 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
349 if (bio) { 350 if (bio) {
350 int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? 351 int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
351 REQ_SYNC : 0; 352 REQ_SYNC : 0;
353 io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
352 bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); 354 bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
353 submit_bio(io->io_bio); 355 submit_bio(io->io_bio);
354 } 356 }
@@ -396,6 +398,7 @@ submit_and_retry:
396 ret = io_submit_init_bio(io, bh); 398 ret = io_submit_init_bio(io, bh);
397 if (ret) 399 if (ret)
398 return ret; 400 return ret;
401 io->io_bio->bi_write_hint = inode->i_write_hint;
399 } 402 }
400 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); 403 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
401 if (ret != bh->b_size) 404 if (ret != bh->b_size)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
73 int i; 73 int i;
74 74
75 if (ext4_bio_encrypted(bio)) { 75 if (ext4_bio_encrypted(bio)) {
76 if (bio->bi_error) { 76 if (bio->bi_status) {
77 fscrypt_release_ctx(bio->bi_private); 77 fscrypt_release_ctx(bio->bi_private);
78 } else { 78 } else {
79 fscrypt_decrypt_bio_pages(bio->bi_private, bio); 79 fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
83 bio_for_each_segment_all(bv, bio, i) { 83 bio_for_each_segment_all(bv, bio, i) {
84 struct page *page = bv->bv_page; 84 struct page *page = bv->bv_page;
85 85
86 if (!bio->bi_error) { 86 if (!bio->bi_status) {
87 SetPageUptodate(page); 87 SetPageUptodate(page);
88 } else { 88 } else {
89 ClearPageUptodate(page); 89 ClearPageUptodate(page);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..36fe82012a33 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
58#ifdef CONFIG_F2FS_FAULT_INJECTION 58#ifdef CONFIG_F2FS_FAULT_INJECTION
59 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { 59 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
60 f2fs_show_injection_info(FAULT_IO); 60 f2fs_show_injection_info(FAULT_IO);
61 bio->bi_error = -EIO; 61 bio->bi_status = BLK_STS_IOERR;
62 } 62 }
63#endif 63#endif
64 64
65 if (f2fs_bio_encrypted(bio)) { 65 if (f2fs_bio_encrypted(bio)) {
66 if (bio->bi_error) { 66 if (bio->bi_status) {
67 fscrypt_release_ctx(bio->bi_private); 67 fscrypt_release_ctx(bio->bi_private);
68 } else { 68 } else {
69 fscrypt_decrypt_bio_pages(bio->bi_private, bio); 69 fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
74 bio_for_each_segment_all(bvec, bio, i) { 74 bio_for_each_segment_all(bvec, bio, i) {
75 struct page *page = bvec->bv_page; 75 struct page *page = bvec->bv_page;
76 76
77 if (!bio->bi_error) { 77 if (!bio->bi_status) {
78 if (!PageUptodate(page)) 78 if (!PageUptodate(page))
79 SetPageUptodate(page); 79 SetPageUptodate(page);
80 } else { 80 } else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
102 unlock_page(page); 102 unlock_page(page);
103 mempool_free(page, sbi->write_io_dummy); 103 mempool_free(page, sbi->write_io_dummy);
104 104
105 if (unlikely(bio->bi_error)) 105 if (unlikely(bio->bi_status))
106 f2fs_stop_checkpoint(sbi, true); 106 f2fs_stop_checkpoint(sbi, true);
107 continue; 107 continue;
108 } 108 }
109 109
110 fscrypt_pullback_bio_page(&page, true); 110 fscrypt_pullback_bio_page(&page, true);
111 111
112 if (unlikely(bio->bi_error)) { 112 if (unlikely(bio->bi_status)) {
113 mapping_set_error(page->mapping, -EIO); 113 mapping_set_error(page->mapping, -EIO);
114 f2fs_stop_checkpoint(sbi, true); 114 f2fs_stop_checkpoint(sbi, true);
115 } 115 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..ea9f455d94ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
749{ 749{
750 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 750 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
751 751
752 dc->error = bio->bi_error; 752 dc->error = blk_status_to_errno(bio->bi_status);
753 dc->state = D_DONE; 753 dc->state = D_DONE;
754 complete(&dc->wait); 754 complete(&dc->wait);
755 bio_put(bio); 755 bio_put(bio);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..ed051f825bad 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
243} 243}
244#endif 244#endif
245 245
246static bool rw_hint_valid(enum rw_hint hint)
247{
248 switch (hint) {
249 case RWF_WRITE_LIFE_NOT_SET:
250 case RWH_WRITE_LIFE_NONE:
251 case RWH_WRITE_LIFE_SHORT:
252 case RWH_WRITE_LIFE_MEDIUM:
253 case RWH_WRITE_LIFE_LONG:
254 case RWH_WRITE_LIFE_EXTREME:
255 return true;
256 default:
257 return false;
258 }
259}
260
261static long fcntl_rw_hint(struct file *file, unsigned int cmd,
262 unsigned long arg)
263{
264 struct inode *inode = file_inode(file);
265 u64 *argp = (u64 __user *)arg;
266 enum rw_hint hint;
267 u64 h;
268
269 switch (cmd) {
270 case F_GET_FILE_RW_HINT:
271 h = file_write_hint(file);
272 if (copy_to_user(argp, &h, sizeof(*argp)))
273 return -EFAULT;
274 return 0;
275 case F_SET_FILE_RW_HINT:
276 if (copy_from_user(&h, argp, sizeof(h)))
277 return -EFAULT;
278 hint = (enum rw_hint) h;
279 if (!rw_hint_valid(hint))
280 return -EINVAL;
281
282 spin_lock(&file->f_lock);
283 file->f_write_hint = hint;
284 spin_unlock(&file->f_lock);
285 return 0;
286 case F_GET_RW_HINT:
287 h = inode->i_write_hint;
288 if (copy_to_user(argp, &h, sizeof(*argp)))
289 return -EFAULT;
290 return 0;
291 case F_SET_RW_HINT:
292 if (copy_from_user(&h, argp, sizeof(h)))
293 return -EFAULT;
294 hint = (enum rw_hint) h;
295 if (!rw_hint_valid(hint))
296 return -EINVAL;
297
298 inode_lock(inode);
299 inode->i_write_hint = hint;
300 inode_unlock(inode);
301 return 0;
302 default:
303 return -EINVAL;
304 }
305}
306
246static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 307static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
247 struct file *filp) 308 struct file *filp)
248{ 309{
@@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
337 case F_GET_SEALS: 398 case F_GET_SEALS:
338 err = shmem_fcntl(filp, cmd, arg); 399 err = shmem_fcntl(filp, cmd, arg);
339 break; 400 break;
401 case F_GET_RW_HINT:
402 case F_SET_RW_HINT:
403 case F_GET_FILE_RW_HINT:
404 case F_SET_FILE_RW_HINT:
405 err = fcntl_rw_hint(filp, cmd, arg);
406 break;
340 default: 407 default:
341 break; 408 break;
342 } 409 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d13561..aa3d44527fa2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -815,7 +815,6 @@ struct gfs2_sbd {
815 atomic_t sd_log_in_flight; 815 atomic_t sd_log_in_flight;
816 struct bio *sd_log_bio; 816 struct bio *sd_log_bio;
817 wait_queue_head_t sd_log_flush_wait; 817 wait_queue_head_t sd_log_flush_wait;
818 int sd_log_error;
819 818
820 atomic_t sd_reserving_log; 819 atomic_t sd_reserving_log;
821 wait_queue_head_t sd_reserving_log_wait; 820 wait_queue_head_t sd_reserving_log_wait;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144b42c7..885d36e7a29f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
170 */ 170 */
171 171
172static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, 172static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
173 int error) 173 blk_status_t error)
174{ 174{
175 struct buffer_head *bh, *next; 175 struct buffer_head *bh, *next;
176 struct page *page = bvec->bv_page; 176 struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
209 struct page *page; 209 struct page *page;
210 int i; 210 int i;
211 211
212 if (bio->bi_error) { 212 if (bio->bi_status)
213 sdp->sd_log_error = bio->bi_error; 213 fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
214 fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
215 }
216 214
217 bio_for_each_segment_all(bvec, bio, i) { 215 bio_for_each_segment_all(bvec, bio, i) {
218 page = bvec->bv_page; 216 page = bvec->bv_page;
219 if (page_has_buffers(page)) 217 if (page_has_buffers(page))
220 gfs2_end_log_write_bh(sdp, bvec, bio->bi_error); 218 gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
221 else 219 else
222 mempool_free(page, gfs2_page_pool); 220 mempool_free(page, gfs2_page_pool);
223 } 221 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
201 do { 201 do {
202 struct buffer_head *next = bh->b_this_page; 202 struct buffer_head *next = bh->b_this_page;
203 len -= bh->b_size; 203 len -= bh->b_size;
204 bh->b_end_io(bh, !bio->bi_error); 204 bh->b_end_io(bh, !bio->bi_status);
205 bh = next; 205 bh = next;
206 } while (bh && len); 206 } while (bh && len);
207 } 207 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b92135c202c2..e76058d34b74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
176{ 176{
177 struct page *page = bio->bi_private; 177 struct page *page = bio->bi_private;
178 178
179 if (!bio->bi_error) 179 if (!bio->bi_status)
180 SetPageUptodate(page); 180 SetPageUptodate(page);
181 else 181 else
182 pr_warn("error %d reading superblock\n", bio->bi_error); 182 pr_warn("error %d reading superblock\n", bio->bi_status);
183 unlock_page(page); 183 unlock_page(page);
184} 184}
185 185
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..f0e5fc77e6a4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
146 i_gid_write(inode, 0); 146 i_gid_write(inode, 0);
147 atomic_set(&inode->i_writecount, 0); 147 atomic_set(&inode->i_writecount, 0);
148 inode->i_size = 0; 148 inode->i_size = 0;
149 inode->i_write_hint = WRITE_LIFE_NOT_SET;
149 inode->i_blocks = 0; 150 inode->i_blocks = 0;
150 inode->i_bytes = 0; 151 inode->i_bytes = 0;
151 inode->i_generation = 0; 152 inode->i_generation = 0;
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..fa6cd5b3f578 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
672 struct iomap_dio *dio = bio->bi_private; 672 struct iomap_dio *dio = bio->bi_private;
673 bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); 673 bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
674 674
675 if (bio->bi_error) 675 if (bio->bi_status)
676 iomap_dio_set_error(dio, bio->bi_error); 676 iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
677 677
678 if (atomic_dec_and_test(&dio->ref)) { 678 if (atomic_dec_and_test(&dio->ref)) {
679 if (is_sync_kiocb(dio->iocb)) { 679 if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
793 bio->bi_bdev = iomap->bdev; 793 bio->bi_bdev = iomap->bdev;
794 bio->bi_iter.bi_sector = 794 bio->bi_iter.bi_sector =
795 iomap->blkno + ((pos - iomap->offset) >> 9); 795 iomap->blkno + ((pos - iomap->offset) >> 9);
796 bio->bi_write_hint = dio->iocb->ki_hint;
796 bio->bi_private = dio; 797 bio->bi_private = dio;
797 bio->bi_end_io = iomap_dio_bio_end_io; 798 bio->bi_end_io = iomap_dio_bio_end_io;
798 799
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
881 flags |= IOMAP_WRITE; 882 flags |= IOMAP_WRITE;
882 } 883 }
883 884
885 if (iocb->ki_flags & IOCB_NOWAIT) {
886 if (filemap_range_has_page(mapping, start, end)) {
887 ret = -EAGAIN;
888 goto out_free_dio;
889 }
890 flags |= IOMAP_NOWAIT;
891 }
892
884 ret = filemap_write_and_wait_range(mapping, start, end); 893 ret = filemap_write_and_wait_range(mapping, start, end);
885 if (ret) 894 if (ret)
886 goto out_free_dio; 895 goto out_free_dio;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
2205 2205
2206 bp->l_flag |= lbmDONE; 2206 bp->l_flag |= lbmDONE;
2207 2207
2208 if (bio->bi_error) { 2208 if (bio->bi_status) {
2209 bp->l_flag |= lbmERROR; 2209 bp->l_flag |= lbmERROR;
2210 2210
2211 jfs_err("lbmIODone: I/O error in JFS log"); 2211 jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..ce93db3aef3c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
280{ 280{
281 struct page *page = bio->bi_private; 281 struct page *page = bio->bi_private;
282 282
283 if (bio->bi_error) { 283 if (bio->bi_status) {
284 printk(KERN_ERR "metapage_read_end_io: I/O error\n"); 284 printk(KERN_ERR "metapage_read_end_io: I/O error\n");
285 SetPageError(page); 285 SetPageError(page);
286 } 286 }
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
337 337
338 BUG_ON(!PagePrivate(page)); 338 BUG_ON(!PagePrivate(page));
339 339
340 if (bio->bi_error) { 340 if (bio->bi_status) {
341 printk(KERN_ERR "metapage_write_end_io: I/O error\n"); 341 printk(KERN_ERR "metapage_write_end_io: I/O error\n");
342 SetPageError(page); 342 SetPageError(page);
343 } 343 }
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..d6d1486d6f99 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
50 50
51 bio_for_each_segment_all(bv, bio, i) { 51 bio_for_each_segment_all(bv, bio, i) {
52 struct page *page = bv->bv_page; 52 struct page *page = bv->bv_page;
53 page_endio(page, op_is_write(bio_op(bio)), bio->bi_error); 53 page_endio(page, op_is_write(bio_op(bio)),
54 blk_status_to_errno(bio->bi_status));
54 } 55 }
55 56
56 bio_put(bio); 57 bio_put(bio);
@@ -614,6 +615,7 @@ alloc_new:
614 goto confused; 615 goto confused;
615 616
616 wbc_init_bio(wbc, bio); 617 wbc_init_bio(wbc, bio);
618 bio->bi_write_hint = inode->i_write_hint;
617 } 619 }
618 620
619 /* 621 /*
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
188{ 188{
189 struct parallel_io *par = bio->bi_private; 189 struct parallel_io *par = bio->bi_private;
190 190
191 if (bio->bi_error) { 191 if (bio->bi_status) {
192 struct nfs_pgio_header *header = par->data; 192 struct nfs_pgio_header *header = par->data;
193 193
194 if (!header->pnfs_error) 194 if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
319 struct parallel_io *par = bio->bi_private; 319 struct parallel_io *par = bio->bi_private;
320 struct nfs_pgio_header *header = par->data; 320 struct nfs_pgio_header *header = par->data;
321 321
322 if (bio->bi_error) { 322 if (bio->bi_status) {
323 if (!header->pnfs_error) 323 if (!header->pnfs_error)
324 header->pnfs_error = -EIO; 324 header->pnfs_error = -EIO;
325 pnfs_set_lo_fail(header->lseg); 325 pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213afc854..c862c2489df0 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
219 u8 *buf, *d, type, assoc; 219 u8 *buf, *d, type, assoc;
220 int error; 220 int error;
221 221
222 if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
223 return -EINVAL;
224
222 buf = kzalloc(bufflen, GFP_KERNEL); 225 buf = kzalloc(bufflen, GFP_KERNEL);
223 if (!buf) 226 if (!buf)
224 return -ENOMEM; 227 return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
229 goto out_free_buf; 232 goto out_free_buf;
230 } 233 }
231 req = scsi_req(rq); 234 req = scsi_req(rq);
232 scsi_req_init(rq);
233 235
234 error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); 236 error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
235 if (error) 237 if (error)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
338{ 338{
339 struct nilfs_segment_buffer *segbuf = bio->bi_private; 339 struct nilfs_segment_buffer *segbuf = bio->bi_private;
340 340
341 if (bio->bi_error) 341 if (bio->bi_status)
342 atomic_inc(&segbuf->sb_err); 342 atomic_inc(&segbuf->sb_err);
343 343
344 bio_put(bio); 344 bio_put(bio);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
516{ 516{
517 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 517 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
518 518
519 if (bio->bi_error) { 519 if (bio->bi_status) {
520 mlog(ML_ERROR, "IO Error %d\n", bio->bi_error); 520 mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
521 wc->wc_error = bio->bi_error; 521 wc->wc_error = blk_status_to_errno(bio->bi_status);
522 } 522 }
523 523
524 o2hb_bio_wait_dec(wc, 1); 524 o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..3fe0c4aa7d27 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
759 likely(f->f_op->write || f->f_op->write_iter)) 759 likely(f->f_op->write || f->f_op->write_iter))
760 f->f_mode |= FMODE_CAN_WRITE; 760 f->f_mode |= FMODE_CAN_WRITE;
761 761
762 f->f_write_hint = WRITE_LIFE_NOT_SET;
762 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 763 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
763 764
764 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 765 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/fs/read_write.c b/fs/read_write.c
index 19d4d88fa285..d591eeed061f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
678 struct kiocb kiocb; 678 struct kiocb kiocb;
679 ssize_t ret; 679 ssize_t ret;
680 680
681 if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
682 return -EOPNOTSUPP;
683
684 init_sync_kiocb(&kiocb, filp); 681 init_sync_kiocb(&kiocb, filp);
685 if (flags & RWF_HIPRI) 682 ret = kiocb_set_rw_flags(&kiocb, flags);
686 kiocb.ki_flags |= IOCB_HIPRI; 683 if (ret)
687 if (flags & RWF_DSYNC) 684 return ret;
688 kiocb.ki_flags |= IOCB_DSYNC;
689 if (flags & RWF_SYNC)
690 kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
691 kiocb.ki_pos = *ppos; 685 kiocb.ki_pos = *ppos;
692 686
693 if (type == READ) 687 if (type == READ)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3b91faacc1ba..d20c29b9c95b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
276 struct xfs_inode *ip = XFS_I(ioend->io_inode); 276 struct xfs_inode *ip = XFS_I(ioend->io_inode);
277 xfs_off_t offset = ioend->io_offset; 277 xfs_off_t offset = ioend->io_offset;
278 size_t size = ioend->io_size; 278 size_t size = ioend->io_size;
279 int error = ioend->io_bio->bi_error; 279 int error;
280 280
281 /* 281 /*
282 * Just clean up the in-memory strutures if the fs has been shut down. 282 * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
289 /* 289 /*
290 * Clean up any COW blocks on an I/O error. 290 * Clean up any COW blocks on an I/O error.
291 */ 291 */
292 error = blk_status_to_errno(ioend->io_bio->bi_status);
292 if (unlikely(error)) { 293 if (unlikely(error)) {
293 switch (ioend->io_type) { 294 switch (ioend->io_type) {
294 case XFS_IO_COW: 295 case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
332 else if (ioend->io_append_trans) 333 else if (ioend->io_append_trans)
333 queue_work(mp->m_data_workqueue, &ioend->io_work); 334 queue_work(mp->m_data_workqueue, &ioend->io_work);
334 else 335 else
335 xfs_destroy_ioend(ioend, bio->bi_error); 336 xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
336} 337}
337 338
338STATIC int 339STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
500 * time. 501 * time.
501 */ 502 */
502 if (status) { 503 if (status) {
503 ioend->io_bio->bi_error = status; 504 ioend->io_bio->bi_status = errno_to_blk_status(status);
504 bio_endio(ioend->io_bio); 505 bio_endio(ioend->io_bio);
505 return status; 506 return status;
506 } 507 }
507 508
509 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
508 submit_bio(ioend->io_bio); 510 submit_bio(ioend->io_bio);
509 return 0; 511 return 0;
510} 512}
@@ -564,6 +566,7 @@ xfs_chain_bio(
564 bio_chain(ioend->io_bio, new); 566 bio_chain(ioend->io_bio, new);
565 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 567 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
566 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 568 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
569 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
567 submit_bio(ioend->io_bio); 570 submit_bio(ioend->io_bio);
568 ioend->io_bio = new; 571 ioend->io_bio = new;
569} 572}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 16d6a578fc16..438505f395e7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1227,8 +1227,11 @@ xfs_buf_bio_end_io(
1227 * don't overwrite existing errors - otherwise we can lose errors on 1227 * don't overwrite existing errors - otherwise we can lose errors on
1228 * buffers that require multiple bios to complete. 1228 * buffers that require multiple bios to complete.
1229 */ 1229 */
1230 if (bio->bi_error) 1230 if (bio->bi_status) {
1231 cmpxchg(&bp->b_io_error, 0, bio->bi_error); 1231 int error = blk_status_to_errno(bio->bi_status);
1232
1233 cmpxchg(&bp->b_io_error, 0, error);
1234 }
1232 1235
1233 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1236 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1234 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1237 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5fb5a0958a14..17f27a2fb5e2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,7 +237,11 @@ xfs_file_dax_read(
237 if (!count) 237 if (!count)
238 return 0; /* skip atime */ 238 return 0; /* skip atime */
239 239
240 xfs_ilock(ip, XFS_IOLOCK_SHARED); 240 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
241 if (iocb->ki_flags & IOCB_NOWAIT)
242 return -EAGAIN;
243 xfs_ilock(ip, XFS_IOLOCK_SHARED);
244 }
241 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); 245 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
242 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 246 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
243 247
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
541 iolock = XFS_IOLOCK_SHARED; 545 iolock = XFS_IOLOCK_SHARED;
542 } 546 }
543 547
544 xfs_ilock(ip, iolock); 548 if (!xfs_ilock_nowait(ip, iolock)) {
549 if (iocb->ki_flags & IOCB_NOWAIT)
550 return -EAGAIN;
551 xfs_ilock(ip, iolock);
552 }
545 553
546 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 554 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
547 if (ret) 555 if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
553 * otherwise demote the lock if we had to take the exclusive lock 561 * otherwise demote the lock if we had to take the exclusive lock
554 * for other reasons in xfs_file_aio_write_checks. 562 * for other reasons in xfs_file_aio_write_checks.
555 */ 563 */
556 if (unaligned_io) 564 if (unaligned_io) {
557 inode_dio_wait(inode); 565 /* If we are going to wait for other DIO to finish, bail */
558 else if (iolock == XFS_IOLOCK_EXCL) { 566 if (iocb->ki_flags & IOCB_NOWAIT) {
567 if (atomic_read(&inode->i_dio_count))
568 return -EAGAIN;
569 } else {
570 inode_dio_wait(inode);
571 }
572 } else if (iolock == XFS_IOLOCK_EXCL) {
559 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 573 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
560 iolock = XFS_IOLOCK_SHARED; 574 iolock = XFS_IOLOCK_SHARED;
561 } 575 }
@@ -585,7 +599,12 @@ xfs_file_dax_write(
585 size_t count; 599 size_t count;
586 loff_t pos; 600 loff_t pos;
587 601
588 xfs_ilock(ip, iolock); 602 if (!xfs_ilock_nowait(ip, iolock)) {
603 if (iocb->ki_flags & IOCB_NOWAIT)
604 return -EAGAIN;
605 xfs_ilock(ip, iolock);
606 }
607
589 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 608 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
590 if (ret) 609 if (ret)
591 goto out; 610 goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
892 return -EFBIG; 911 return -EFBIG;
893 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 912 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
894 return -EIO; 913 return -EIO;
914 file->f_mode |= FMODE_AIO_NOWAIT;
895 return 0; 915 return 0;
896} 916}
897 917
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf7304c..05dc87e8c1f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
995 lockmode = xfs_ilock_data_map_shared(ip); 995 lockmode = xfs_ilock_data_map_shared(ip);
996 } 996 }
997 997
998 if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
999 error = -EAGAIN;
1000 goto out_unlock;
1001 }
1002
998 ASSERT(offset <= mp->m_super->s_maxbytes); 1003 ASSERT(offset <= mp->m_super->s_maxbytes);
999 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) 1004 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
1000 length = mp->m_super->s_maxbytes - offset; 1005 length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
1016 1021
1017 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { 1022 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
1018 if (flags & IOMAP_DIRECT) { 1023 if (flags & IOMAP_DIRECT) {
1024 /*
1025 * A reflinked inode will result in CoW alloc.
1026 * FIXME: It could still overwrite on unshared extents
1027 * and not need allocation.
1028 */
1029 if (flags & IOMAP_NOWAIT) {
1030 error = -EAGAIN;
1031 goto out_unlock;
1032 }
1019 /* may drop and re-acquire the ilock */ 1033 /* may drop and re-acquire the ilock */
1020 error = xfs_reflink_allocate_cow(ip, &imap, &shared, 1034 error = xfs_reflink_allocate_cow(ip, &imap, &shared,
1021 &lockmode); 1035 &lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
1033 1047
1034 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { 1048 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
1035 /* 1049 /*
1050 * If nowait is set bail since we are going to make
1051 * allocations.
1052 */
1053 if (flags & IOMAP_NOWAIT) {
1054 error = -EAGAIN;
1055 goto out_unlock;
1056 }
1057 /*
1036 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES 1058 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
1037 * pages to keep the chunks of work done where somewhat symmetric 1059 * pages to keep the chunks of work done where somewhat symmetric
1038 * with the work writeback does. This is a completely arbitrary 1060 * with the work writeback does. This is a completely arbitrary
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101d..97df4db13b2e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1766,7 +1766,8 @@ STATIC int __init
1766xfs_init_zones(void) 1766xfs_init_zones(void)
1767{ 1767{
1768 xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, 1768 xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
1769 offsetof(struct xfs_ioend, io_inline_bio)); 1769 offsetof(struct xfs_ioend, io_inline_bio),
1770 BIOSET_NEED_BVECS);
1770 if (!xfs_ioend_bioset) 1771 if (!xfs_ioend_bioset)
1771 goto out; 1772 goto out;
1772 1773
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a7e29fa0981f..664a27da276d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
118/* 118/*
119 * will die 119 * will die
120 */ 120 */
121#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
122#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) 121#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
123 122
124/* 123/*
@@ -373,8 +372,11 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
373 return bio_split(bio, sectors, gfp, bs); 372 return bio_split(bio, sectors, gfp, bs);
374} 373}
375 374
376extern struct bio_set *bioset_create(unsigned int, unsigned int); 375extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
377extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); 376enum {
377 BIOSET_NEED_BVECS = BIT(0),
378 BIOSET_NEED_RESCUER = BIT(1),
379};
378extern void bioset_free(struct bio_set *); 380extern void bioset_free(struct bio_set *);
379extern mempool_t *biovec_create_pool(int pool_entries); 381extern mempool_t *biovec_create_pool(int pool_entries);
380 382
@@ -392,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
392 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); 394 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
393} 395}
394 396
395static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
396{
397 return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
398}
399
400static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) 397static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
401{ 398{
402 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); 399 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
@@ -414,7 +411,13 @@ extern void bio_endio(struct bio *);
414 411
415static inline void bio_io_error(struct bio *bio) 412static inline void bio_io_error(struct bio *bio)
416{ 413{
417 bio->bi_error = -EIO; 414 bio->bi_status = BLK_STS_IOERR;
415 bio_endio(bio);
416}
417
418static inline void bio_wouldblock_error(struct bio *bio)
419{
420 bio->bi_status = BLK_STS_AGAIN;
418 bio_endio(bio); 421 bio_endio(bio);
419} 422}
420 423
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd641032f8d..23d32ff0b462 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
39 struct blk_mq_tags *tags; 39 struct blk_mq_tags *tags;
40 struct blk_mq_tags *sched_tags; 40 struct blk_mq_tags *sched_tags;
41 41
42 struct srcu_struct queue_rq_srcu;
43
44 unsigned long queued; 42 unsigned long queued;
45 unsigned long run; 43 unsigned long run;
46#define BLK_MQ_MAX_DISPATCH_ORDER 7 44#define BLK_MQ_MAX_DISPATCH_ORDER 7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
62 struct dentry *debugfs_dir; 60 struct dentry *debugfs_dir;
63 struct dentry *sched_debugfs_dir; 61 struct dentry *sched_debugfs_dir;
64#endif 62#endif
63
64 /* Must be the last member - see also blk_mq_hw_ctx_size(). */
65 struct srcu_struct queue_rq_srcu[0];
65}; 66};
66 67
67struct blk_mq_tag_set { 68struct blk_mq_tag_set {
@@ -87,7 +88,8 @@ struct blk_mq_queue_data {
87 bool last; 88 bool last;
88}; 89};
89 90
90typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); 91typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
92 const struct blk_mq_queue_data *);
91typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); 93typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
92typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); 94typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
93typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); 95typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -142,6 +144,8 @@ struct blk_mq_ops {
142 init_request_fn *init_request; 144 init_request_fn *init_request;
143 exit_request_fn *exit_request; 145 exit_request_fn *exit_request;
144 reinit_request_fn *reinit_request; 146 reinit_request_fn *reinit_request;
147 /* Called from inside blk_get_request() */
148 void (*initialize_rq_fn)(struct request *rq);
145 149
146 map_queues_fn *map_queues; 150 map_queues_fn *map_queues;
147 151
@@ -155,10 +159,6 @@ struct blk_mq_ops {
155}; 159};
156 160
157enum { 161enum {
158 BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
159 BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
160 BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
161
162 BLK_MQ_F_SHOULD_MERGE = 1 << 0, 162 BLK_MQ_F_SHOULD_MERGE = 1 << 0,
163 BLK_MQ_F_TAG_SHARED = 1 << 1, 163 BLK_MQ_F_TAG_SHARED = 1 << 1,
164 BLK_MQ_F_SG_MERGE = 1 << 2, 164 BLK_MQ_F_SG_MERGE = 1 << 2,
@@ -204,10 +204,10 @@ enum {
204 BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ 204 BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
205}; 205};
206 206
207struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 207struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
208 unsigned int flags); 208 unsigned int flags);
209struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op, 209struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
210 unsigned int flags, unsigned int hctx_idx); 210 unsigned int op, unsigned int flags, unsigned int hctx_idx);
211struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); 211struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
212 212
213enum { 213enum {
@@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
230 230
231int blk_mq_request_started(struct request *rq); 231int blk_mq_request_started(struct request *rq);
232void blk_mq_start_request(struct request *rq); 232void blk_mq_start_request(struct request *rq);
233void blk_mq_end_request(struct request *rq, int error); 233void blk_mq_end_request(struct request *rq, blk_status_t error);
234void __blk_mq_end_request(struct request *rq, int error); 234void __blk_mq_end_request(struct request *rq, blk_status_t error);
235 235
236void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); 236void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
237void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, 237void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
@@ -247,6 +247,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
247void blk_mq_start_hw_queues(struct request_queue *q); 247void blk_mq_start_hw_queues(struct request_queue *q);
248void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 248void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
249void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); 249void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
250void blk_mq_quiesce_queue(struct request_queue *q);
251void blk_mq_unquiesce_queue(struct request_queue *q);
250void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 252void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
251void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 253void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
252void blk_mq_run_hw_queues(struct request_queue *q, bool async); 254void blk_mq_run_hw_queues(struct request_queue *q, bool async);
@@ -264,6 +266,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
264int blk_mq_map_queues(struct blk_mq_tag_set *set); 266int blk_mq_map_queues(struct blk_mq_tag_set *set);
265void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); 267void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
266 268
269void blk_mq_quiesce_queue_nowait(struct request_queue *q);
270
267/* 271/*
268 * Driver command data is immediately after the request. So subtract request 272 * Driver command data is immediately after the request. So subtract request
269 * size to get back to the original request, add request size to get the PDU. 273 * size to get back to the original request, add request size to get the PDU.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc44400..d2eb87c84d82 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,27 @@ struct io_context;
17struct cgroup_subsys_state; 17struct cgroup_subsys_state;
18typedef void (bio_end_io_t) (struct bio *); 18typedef void (bio_end_io_t) (struct bio *);
19 19
20/*
21 * Block error status values. See block/blk-core:blk_errors for the details.
22 */
23typedef u8 __bitwise blk_status_t;
24#define BLK_STS_OK 0
25#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
26#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
27#define BLK_STS_NOSPC ((__force blk_status_t)3)
28#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
29#define BLK_STS_TARGET ((__force blk_status_t)5)
30#define BLK_STS_NEXUS ((__force blk_status_t)6)
31#define BLK_STS_MEDIUM ((__force blk_status_t)7)
32#define BLK_STS_PROTECTION ((__force blk_status_t)8)
33#define BLK_STS_RESOURCE ((__force blk_status_t)9)
34#define BLK_STS_IOERR ((__force blk_status_t)10)
35
36/* hack for device mapper, don't use elsewhere: */
37#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
38
39#define BLK_STS_AGAIN ((__force blk_status_t)12)
40
20struct blk_issue_stat { 41struct blk_issue_stat {
21 u64 stat; 42 u64 stat;
22}; 43};
@@ -28,13 +49,14 @@ struct blk_issue_stat {
28struct bio { 49struct bio {
29 struct bio *bi_next; /* request queue link */ 50 struct bio *bi_next; /* request queue link */
30 struct block_device *bi_bdev; 51 struct block_device *bi_bdev;
31 int bi_error; 52 blk_status_t bi_status;
32 unsigned int bi_opf; /* bottom bits req flags, 53 unsigned int bi_opf; /* bottom bits req flags,
33 * top bits REQ_OP. Use 54 * top bits REQ_OP. Use
34 * accessors. 55 * accessors.
35 */ 56 */
36 unsigned short bi_flags; /* status, etc and bvec pool number */ 57 unsigned short bi_flags; /* status, etc and bvec pool number */
37 unsigned short bi_ioprio; 58 unsigned short bi_ioprio;
59 unsigned short bi_write_hint;
38 60
39 struct bvec_iter bi_iter; 61 struct bvec_iter bi_iter;
40 62
@@ -205,6 +227,7 @@ enum req_flag_bits {
205 /* command specific flags for REQ_OP_WRITE_ZEROES: */ 227 /* command specific flags for REQ_OP_WRITE_ZEROES: */
206 __REQ_NOUNMAP, /* do not free blocks when zeroing */ 228 __REQ_NOUNMAP, /* do not free blocks when zeroing */
207 229
230 __REQ_NOWAIT, /* Don't wait if request will block */
208 __REQ_NR_BITS, /* stops here */ 231 __REQ_NR_BITS, /* stops here */
209}; 232};
210 233
@@ -223,6 +246,7 @@ enum req_flag_bits {
223#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) 246#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
224 247
225#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) 248#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
249#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
226 250
227#define REQ_FAILFAST_MASK \ 251#define REQ_FAILFAST_MASK \
228 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 252 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ddd36bd2173..25f6a0cb27d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,7 +55,7 @@ struct blk_stat_callback;
55 */ 55 */
56#define BLKCG_MAX_POLS 3 56#define BLKCG_MAX_POLS 3
57 57
58typedef void (rq_end_io_fn)(struct request *, int); 58typedef void (rq_end_io_fn)(struct request *, blk_status_t);
59 59
60#define BLK_RL_SYNCFULL (1U << 0) 60#define BLK_RL_SYNCFULL (1U << 0)
61#define BLK_RL_ASYNCFULL (1U << 1) 61#define BLK_RL_ASYNCFULL (1U << 1)
@@ -225,6 +225,8 @@ struct request {
225 225
226 unsigned int extra_len; /* length of alignment and padding */ 226 unsigned int extra_len; /* length of alignment and padding */
227 227
228 unsigned short write_hint;
229
228 unsigned long deadline; 230 unsigned long deadline;
229 struct list_head timeout_list; 231 struct list_head timeout_list;
230 232
@@ -412,8 +414,12 @@ struct request_queue {
412 rq_timed_out_fn *rq_timed_out_fn; 414 rq_timed_out_fn *rq_timed_out_fn;
413 dma_drain_needed_fn *dma_drain_needed; 415 dma_drain_needed_fn *dma_drain_needed;
414 lld_busy_fn *lld_busy_fn; 416 lld_busy_fn *lld_busy_fn;
417 /* Called just after a request is allocated */
415 init_rq_fn *init_rq_fn; 418 init_rq_fn *init_rq_fn;
419 /* Called just before a request is freed */
416 exit_rq_fn *exit_rq_fn; 420 exit_rq_fn *exit_rq_fn;
421 /* Called from inside blk_get_request() */
422 void (*initialize_rq_fn)(struct request *rq);
417 423
418 const struct blk_mq_ops *mq_ops; 424 const struct blk_mq_ops *mq_ops;
419 425
@@ -590,6 +596,9 @@ struct request_queue {
590 void *rq_alloc_data; 596 void *rq_alloc_data;
591 597
592 struct work_struct release_work; 598 struct work_struct release_work;
599
600#define BLK_MAX_WRITE_HINTS 5
601 u64 write_hints[BLK_MAX_WRITE_HINTS];
593}; 602};
594 603
595#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 604#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -622,6 +631,8 @@ struct request_queue {
622#define QUEUE_FLAG_STATS 27 /* track rq completion times */ 631#define QUEUE_FLAG_STATS 27 /* track rq completion times */
623#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ 632#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
624#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ 633#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
634#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
635#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */
625 636
626#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 637#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
627 (1 << QUEUE_FLAG_STACKABLE) | \ 638 (1 << QUEUE_FLAG_STACKABLE) | \
@@ -633,6 +644,13 @@ struct request_queue {
633 (1 << QUEUE_FLAG_SAME_COMP) | \ 644 (1 << QUEUE_FLAG_SAME_COMP) | \
634 (1 << QUEUE_FLAG_POLL)) 645 (1 << QUEUE_FLAG_POLL))
635 646
647/*
648 * @q->queue_lock is set while a queue is being initialized. Since we know
649 * that no other threads access the queue object before @q->queue_lock has
650 * been set, it is safe to manipulate queue flags without holding the
651 * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
652 * blk_init_allocated_queue().
653 */
636static inline void queue_lockdep_assert_held(struct request_queue *q) 654static inline void queue_lockdep_assert_held(struct request_queue *q)
637{ 655{
638 if (q->queue_lock) 656 if (q->queue_lock)
@@ -712,10 +730,13 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
712#define blk_queue_secure_erase(q) \ 730#define blk_queue_secure_erase(q) \
713 (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) 731 (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
714#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) 732#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
733#define blk_queue_scsi_passthrough(q) \
734 test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
715 735
716#define blk_noretry_request(rq) \ 736#define blk_noretry_request(rq) \
717 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ 737 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
718 REQ_FAILFAST_DRIVER)) 738 REQ_FAILFAST_DRIVER))
739#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
719 740
720static inline bool blk_account_rq(struct request *rq) 741static inline bool blk_account_rq(struct request *rq)
721{ 742{
@@ -814,7 +835,8 @@ static inline bool rq_mergeable(struct request *rq)
814 835
815static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) 836static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
816{ 837{
817 if (bio_data(a) == bio_data(b)) 838 if (bio_page(a) == bio_page(b) &&
839 bio_offset(a) == bio_offset(b))
818 return true; 840 return true;
819 841
820 return false; 842 return false;
@@ -862,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
862#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 884#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
863#define BLK_MIN_SG_TIMEOUT (7 * HZ) 885#define BLK_MIN_SG_TIMEOUT (7 * HZ)
864 886
865#ifdef CONFIG_BOUNCE
866extern int init_emergency_isa_pool(void);
867extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
868#else
869static inline int init_emergency_isa_pool(void)
870{
871 return 0;
872}
873static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
874{
875}
876#endif /* CONFIG_MMU */
877
878struct rq_map_data { 887struct rq_map_data {
879 struct page **pages; 888 struct page **pages;
880 int page_order; 889 int page_order;
@@ -933,7 +942,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
933extern void blk_init_request_from_bio(struct request *req, struct bio *bio); 942extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
934extern void blk_put_request(struct request *); 943extern void blk_put_request(struct request *);
935extern void __blk_put_request(struct request_queue *, struct request *); 944extern void __blk_put_request(struct request_queue *, struct request *);
936extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 945extern struct request *blk_get_request(struct request_queue *, unsigned int op,
946 gfp_t gfp_mask);
937extern void blk_requeue_request(struct request_queue *, struct request *); 947extern void blk_requeue_request(struct request_queue *, struct request *);
938extern int blk_lld_busy(struct request_queue *q); 948extern int blk_lld_busy(struct request_queue *q);
939extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 949extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -941,12 +951,11 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
941 int (*bio_ctr)(struct bio *, struct bio *, void *), 951 int (*bio_ctr)(struct bio *, struct bio *, void *),
942 void *data); 952 void *data);
943extern void blk_rq_unprep_clone(struct request *rq); 953extern void blk_rq_unprep_clone(struct request *rq);
944extern int blk_insert_cloned_request(struct request_queue *q, 954extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
945 struct request *rq); 955 struct request *rq);
946extern int blk_rq_append_bio(struct request *rq, struct bio *bio); 956extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
947extern void blk_delay_queue(struct request_queue *, unsigned long); 957extern void blk_delay_queue(struct request_queue *, unsigned long);
948extern void blk_queue_split(struct request_queue *, struct bio **, 958extern void blk_queue_split(struct request_queue *, struct bio **);
949 struct bio_set *);
950extern void blk_recount_segments(struct request_queue *, struct bio *); 959extern void blk_recount_segments(struct request_queue *, struct bio *);
951extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); 960extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
952extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, 961extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -967,7 +976,6 @@ extern void __blk_run_queue(struct request_queue *q);
967extern void __blk_run_queue_uncond(struct request_queue *q); 976extern void __blk_run_queue_uncond(struct request_queue *q);
968extern void blk_run_queue(struct request_queue *); 977extern void blk_run_queue(struct request_queue *);
969extern void blk_run_queue_async(struct request_queue *q); 978extern void blk_run_queue_async(struct request_queue *q);
970extern void blk_mq_quiesce_queue(struct request_queue *q);
971extern int blk_rq_map_user(struct request_queue *, struct request *, 979extern int blk_rq_map_user(struct request_queue *, struct request *,
972 struct rq_map_data *, void __user *, unsigned long, 980 struct rq_map_data *, void __user *, unsigned long,
973 gfp_t); 981 gfp_t);
@@ -981,6 +989,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
981extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 989extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
982 struct request *, int, rq_end_io_fn *); 990 struct request *, int, rq_end_io_fn *);
983 991
992int blk_status_to_errno(blk_status_t status);
993blk_status_t errno_to_blk_status(int errno);
994
984bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); 995bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
985 996
986static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 997static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1113,16 +1124,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
1113 * blk_end_request() for parts of the original function. 1124 * blk_end_request() for parts of the original function.
1114 * This prevents code duplication in drivers. 1125 * This prevents code duplication in drivers.
1115 */ 1126 */
1116extern bool blk_update_request(struct request *rq, int error, 1127extern bool blk_update_request(struct request *rq, blk_status_t error,
1117 unsigned int nr_bytes); 1128 unsigned int nr_bytes);
1118extern void blk_finish_request(struct request *rq, int error); 1129extern void blk_finish_request(struct request *rq, blk_status_t error);
1119extern bool blk_end_request(struct request *rq, int error, 1130extern bool blk_end_request(struct request *rq, blk_status_t error,
1120 unsigned int nr_bytes); 1131 unsigned int nr_bytes);
1121extern void blk_end_request_all(struct request *rq, int error); 1132extern void blk_end_request_all(struct request *rq, blk_status_t error);
1122extern bool __blk_end_request(struct request *rq, int error, 1133extern bool __blk_end_request(struct request *rq, blk_status_t error,
1123 unsigned int nr_bytes); 1134 unsigned int nr_bytes);
1124extern void __blk_end_request_all(struct request *rq, int error); 1135extern void __blk_end_request_all(struct request *rq, blk_status_t error);
1125extern bool __blk_end_request_cur(struct request *rq, int error); 1136extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
1126 1137
1127extern void blk_complete_request(struct request *); 1138extern void blk_complete_request(struct request *);
1128extern void __blk_complete_request(struct request *); 1139extern void __blk_complete_request(struct request *);
@@ -1374,11 +1385,6 @@ enum blk_default_limits {
1374 1385
1375#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 1386#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
1376 1387
1377static inline unsigned long queue_bounce_pfn(struct request_queue *q)
1378{
1379 return q->limits.bounce_pfn;
1380}
1381
1382static inline unsigned long queue_segment_boundary(struct request_queue *q) 1388static inline unsigned long queue_segment_boundary(struct request_queue *q)
1383{ 1389{
1384 return q->limits.seg_boundary_mask; 1390 return q->limits.seg_boundary_mask;
@@ -1780,7 +1786,7 @@ struct blk_integrity_iter {
1780 const char *disk_name; 1786 const char *disk_name;
1781}; 1787};
1782 1788
1783typedef int (integrity_processing_fn) (struct blk_integrity_iter *); 1789typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
1784 1790
1785struct blk_integrity_profile { 1791struct blk_integrity_profile {
1786 integrity_processing_fn *generate_fn; 1792 integrity_processing_fn *generate_fn;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c0c362..456da5017b32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,9 +72,9 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
72 * 2 : The target wants to push back the io 72 * 2 : The target wants to push back the io
73 */ 73 */
74typedef int (*dm_endio_fn) (struct dm_target *ti, 74typedef int (*dm_endio_fn) (struct dm_target *ti,
75 struct bio *bio, int error); 75 struct bio *bio, blk_status_t *error);
76typedef int (*dm_request_endio_fn) (struct dm_target *ti, 76typedef int (*dm_request_endio_fn) (struct dm_target *ti,
77 struct request *clone, int error, 77 struct request *clone, blk_status_t error,
78 union map_info *map_context); 78 union map_info *map_context);
79 79
80typedef void (*dm_presuspend_fn) (struct dm_target *ti); 80typedef void (*dm_presuspend_fn) (struct dm_target *ti);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0e306c5a86d6..5bc8f8682a3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -104,8 +104,9 @@ struct elevator_mq_ops {
104 int (*request_merge)(struct request_queue *q, struct request **, struct bio *); 104 int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
105 void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); 105 void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
106 void (*requests_merged)(struct request_queue *, struct request *, struct request *); 106 void (*requests_merged)(struct request_queue *, struct request *, struct request *);
107 struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); 107 void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
108 void (*put_request)(struct request *); 108 void (*prepare_request)(struct request *, struct bio *bio);
109 void (*finish_request)(struct request *);
109 void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); 110 void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
110 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); 111 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
111 bool (*has_work)(struct blk_mq_hw_ctx *); 112 bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -114,8 +115,6 @@ struct elevator_mq_ops {
114 void (*requeue_request)(struct request *); 115 void (*requeue_request)(struct request *);
115 struct request *(*former_request)(struct request_queue *, struct request *); 116 struct request *(*former_request)(struct request_queue *, struct request *);
116 struct request *(*next_request)(struct request_queue *, struct request *); 117 struct request *(*next_request)(struct request_queue *, struct request *);
117 int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
118 void (*put_rq_priv)(struct request_queue *, struct request *);
119 void (*init_icq)(struct io_cq *); 118 void (*init_icq)(struct io_cq *);
120 void (*exit_icq)(struct io_cq *); 119 void (*exit_icq)(struct io_cq *);
121}; 120};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e68cabb8457..65adbddb3163 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -20,6 +20,7 @@
20#include <linux/rwsem.h> 20#include <linux/rwsem.h>
21#include <linux/capability.h> 21#include <linux/capability.h>
22#include <linux/semaphore.h> 22#include <linux/semaphore.h>
23#include <linux/fcntl.h>
23#include <linux/fiemap.h> 24#include <linux/fiemap.h>
24#include <linux/rculist_bl.h> 25#include <linux/rculist_bl.h>
25#include <linux/atomic.h> 26#include <linux/atomic.h>
@@ -143,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
143/* File was opened by fanotify and shouldn't generate fanotify events */ 144/* File was opened by fanotify and shouldn't generate fanotify events */
144#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) 145#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
145 146
147/* File is capable of returning -EAGAIN if AIO will block */
148#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000)
149
146/* 150/*
147 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector 151 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
148 * that indicates that they should check the contents of the iovec are 152 * that indicates that they should check the contents of the iovec are
@@ -262,6 +266,18 @@ struct page;
262struct address_space; 266struct address_space;
263struct writeback_control; 267struct writeback_control;
264 268
269/*
270 * Write life time hint values.
271 */
272enum rw_hint {
273 WRITE_LIFE_NOT_SET = 0,
274 WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
275 WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
276 WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
277 WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
278 WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
279};
280
265#define IOCB_EVENTFD (1 << 0) 281#define IOCB_EVENTFD (1 << 0)
266#define IOCB_APPEND (1 << 1) 282#define IOCB_APPEND (1 << 1)
267#define IOCB_DIRECT (1 << 2) 283#define IOCB_DIRECT (1 << 2)
@@ -269,6 +285,7 @@ struct writeback_control;
269#define IOCB_DSYNC (1 << 4) 285#define IOCB_DSYNC (1 << 4)
270#define IOCB_SYNC (1 << 5) 286#define IOCB_SYNC (1 << 5)
271#define IOCB_WRITE (1 << 6) 287#define IOCB_WRITE (1 << 6)
288#define IOCB_NOWAIT (1 << 7)
272 289
273struct kiocb { 290struct kiocb {
274 struct file *ki_filp; 291 struct file *ki_filp;
@@ -276,6 +293,7 @@ struct kiocb {
276 void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); 293 void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
277 void *private; 294 void *private;
278 int ki_flags; 295 int ki_flags;
296 enum rw_hint ki_hint;
279}; 297};
280 298
281static inline bool is_sync_kiocb(struct kiocb *kiocb) 299static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -283,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
283 return kiocb->ki_complete == NULL; 301 return kiocb->ki_complete == NULL;
284} 302}
285 303
286static inline int iocb_flags(struct file *file);
287
288static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
289{
290 *kiocb = (struct kiocb) {
291 .ki_filp = filp,
292 .ki_flags = iocb_flags(filp),
293 };
294}
295
296/* 304/*
297 * "descriptor" for what we're up to with a read. 305 * "descriptor" for what we're up to with a read.
298 * This allows us to use the same read code yet 306 * This allows us to use the same read code yet
@@ -593,6 +601,7 @@ struct inode {
593 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 601 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
594 unsigned short i_bytes; 602 unsigned short i_bytes;
595 unsigned int i_blkbits; 603 unsigned int i_blkbits;
604 enum rw_hint i_write_hint;
596 blkcnt_t i_blocks; 605 blkcnt_t i_blocks;
597 606
598#ifdef __NEED_I_SIZE_ORDERED 607#ifdef __NEED_I_SIZE_ORDERED
@@ -847,6 +856,7 @@ struct file {
847 * Must not be taken from IRQ context. 856 * Must not be taken from IRQ context.
848 */ 857 */
849 spinlock_t f_lock; 858 spinlock_t f_lock;
859 enum rw_hint f_write_hint;
850 atomic_long_t f_count; 860 atomic_long_t f_count;
851 unsigned int f_flags; 861 unsigned int f_flags;
852 fmode_t f_mode; 862 fmode_t f_mode;
@@ -1022,8 +1032,6 @@ struct file_lock_context {
1022#define OFFT_OFFSET_MAX INT_LIMIT(off_t) 1032#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
1023#endif 1033#endif
1024 1034
1025#include <linux/fcntl.h>
1026
1027extern void send_sigio(struct fown_struct *fown, int fd, int band); 1035extern void send_sigio(struct fown_struct *fown, int fd, int band);
1028 1036
1029/* 1037/*
@@ -1874,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
1874 return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); 1882 return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
1875} 1883}
1876 1884
1885static inline enum rw_hint file_write_hint(struct file *file)
1886{
1887 if (file->f_write_hint != WRITE_LIFE_NOT_SET)
1888 return file->f_write_hint;
1889
1890 return file_inode(file)->i_write_hint;
1891}
1892
1893static inline int iocb_flags(struct file *file);
1894
1895static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
1896{
1897 *kiocb = (struct kiocb) {
1898 .ki_filp = filp,
1899 .ki_flags = iocb_flags(filp),
1900 .ki_hint = file_write_hint(filp),
1901 };
1902}
1903
1877/* 1904/*
1878 * Inode state bits. Protected by inode->i_lock 1905 * Inode state bits. Protected by inode->i_lock
1879 * 1906 *
@@ -2518,6 +2545,8 @@ extern int filemap_fdatawait(struct address_space *);
2518extern void filemap_fdatawait_keep_errors(struct address_space *); 2545extern void filemap_fdatawait_keep_errors(struct address_space *);
2519extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, 2546extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2520 loff_t lend); 2547 loff_t lend);
2548extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
2549 loff_t lend);
2521extern int filemap_write_and_wait(struct address_space *mapping); 2550extern int filemap_write_and_wait(struct address_space *mapping);
2522extern int filemap_write_and_wait_range(struct address_space *mapping, 2551extern int filemap_write_and_wait_range(struct address_space *mapping,
2523 loff_t lstart, loff_t lend); 2552 loff_t lstart, loff_t lend);
@@ -2844,7 +2873,7 @@ enum {
2844 DIO_SKIP_DIO_COUNT = 0x08, 2873 DIO_SKIP_DIO_COUNT = 0x08,
2845}; 2874};
2846 2875
2847void dio_end_io(struct bio *bio, int error); 2876void dio_end_io(struct bio *bio);
2848 2877
2849ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, 2878ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
2850 struct block_device *bdev, struct iov_iter *iter, 2879 struct block_device *bdev, struct iov_iter *iter,
@@ -3057,6 +3086,25 @@ static inline int iocb_flags(struct file *file)
3057 return res; 3086 return res;
3058} 3087}
3059 3088
3089static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
3090{
3091 if (unlikely(flags & ~RWF_SUPPORTED))
3092 return -EOPNOTSUPP;
3093
3094 if (flags & RWF_NOWAIT) {
3095 if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
3096 return -EOPNOTSUPP;
3097 ki->ki_flags |= IOCB_NOWAIT;
3098 }
3099 if (flags & RWF_HIPRI)
3100 ki->ki_flags |= IOCB_HIPRI;
3101 if (flags & RWF_DSYNC)
3102 ki->ki_flags |= IOCB_DSYNC;
3103 if (flags & RWF_SYNC)
3104 ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
3105 return 0;
3106}
3107
3060static inline ino_t parent_ino(struct dentry *dentry) 3108static inline ino_t parent_ino(struct dentry *dentry)
3061{ 3109{
3062 ino_t res; 3110 ino_t res;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca322074..dc152e4b7f73 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -671,7 +671,7 @@ struct ide_port_ops {
671 void (*init_dev)(ide_drive_t *); 671 void (*init_dev)(ide_drive_t *);
672 void (*set_pio_mode)(struct hwif_s *, ide_drive_t *); 672 void (*set_pio_mode)(struct hwif_s *, ide_drive_t *);
673 void (*set_dma_mode)(struct hwif_s *, ide_drive_t *); 673 void (*set_dma_mode)(struct hwif_s *, ide_drive_t *);
674 int (*reset_poll)(ide_drive_t *); 674 blk_status_t (*reset_poll)(ide_drive_t *);
675 void (*pre_reset)(ide_drive_t *); 675 void (*pre_reset)(ide_drive_t *);
676 void (*resetproc)(ide_drive_t *); 676 void (*resetproc)(ide_drive_t *);
677 void (*maskproc)(ide_drive_t *, int); 677 void (*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
1092extern int ide_vlb_clk; 1092extern int ide_vlb_clk;
1093extern int ide_pci_clk; 1093extern int ide_pci_clk;
1094 1094
1095int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int); 1095int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
1096void ide_kill_rq(ide_drive_t *, struct request *); 1096void ide_kill_rq(ide_drive_t *, struct request *);
1097 1097
1098void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); 1098void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
1123 const struct ide_devset *setting, int arg); 1123 const struct ide_devset *setting, int arg);
1124 1124
1125void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8); 1125void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
1126int ide_complete_rq(ide_drive_t *, int, unsigned int); 1126int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
1127 1127
1128void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd); 1128void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
1129void ide_tf_dump(const char *, struct ide_cmd *); 1129void ide_tf_dump(const char *, struct ide_cmd *);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e788da31..69f4e9470084 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -52,6 +52,7 @@ struct iomap {
52#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ 52#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
53#define IOMAP_FAULT (1 << 3) /* mapping for page fault */ 53#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
54#define IOMAP_DIRECT (1 << 4) /* direct I/O */ 54#define IOMAP_DIRECT (1 << 4) /* direct I/O */
55#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */
55 56
56struct iomap_ops { 57struct iomap_ops {
57 /* 58 /*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index e400a69fa1d3..6b8ee9e628e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -87,7 +87,7 @@ enum {
87 NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ 87 NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
88}; 88};
89 89
90#define NVMF_AQ_DEPTH 32 90#define NVME_AQ_DEPTH 32
91 91
92enum { 92enum {
93 NVME_REG_CAP = 0x0000, /* Controller Capabilities */ 93 NVME_REG_CAP = 0x0000, /* Controller Capabilities */
@@ -102,6 +102,7 @@ enum {
102 NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ 102 NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
103 NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ 103 NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
104 NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ 104 NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
105 NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
105}; 106};
106 107
107#define NVME_CAP_MQES(cap) ((cap) & 0xffff) 108#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
@@ -208,9 +209,15 @@ struct nvme_id_ctrl {
208 __u8 tnvmcap[16]; 209 __u8 tnvmcap[16];
209 __u8 unvmcap[16]; 210 __u8 unvmcap[16];
210 __le32 rpmbs; 211 __le32 rpmbs;
211 __u8 rsvd316[4]; 212 __le16 edstt;
213 __u8 dsto;
214 __u8 fwug;
212 __le16 kas; 215 __le16 kas;
213 __u8 rsvd322[190]; 216 __le16 hctma;
217 __le16 mntmt;
218 __le16 mxtmt;
219 __le32 sanicap;
220 __u8 rsvd332[180];
214 __u8 sqes; 221 __u8 sqes;
215 __u8 cqes; 222 __u8 cqes;
216 __le16 maxcmd; 223 __le16 maxcmd;
@@ -246,6 +253,7 @@ enum {
246 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, 253 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
247 NVME_CTRL_VWC_PRESENT = 1 << 0, 254 NVME_CTRL_VWC_PRESENT = 1 << 0,
248 NVME_CTRL_OACS_SEC_SUPP = 1 << 0, 255 NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
256 NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
249 NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, 257 NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7,
250}; 258};
251 259
@@ -275,7 +283,7 @@ struct nvme_id_ns {
275 __le16 nabsn; 283 __le16 nabsn;
276 __le16 nabo; 284 __le16 nabo;
277 __le16 nabspf; 285 __le16 nabspf;
278 __u16 rsvd46; 286 __le16 noiob;
279 __u8 nvmcap[16]; 287 __u8 nvmcap[16];
280 __u8 rsvd64[40]; 288 __u8 rsvd64[40];
281 __u8 nguid[16]; 289 __u8 nguid[16];
@@ -289,6 +297,7 @@ enum {
289 NVME_ID_CNS_NS = 0x00, 297 NVME_ID_CNS_NS = 0x00,
290 NVME_ID_CNS_CTRL = 0x01, 298 NVME_ID_CNS_CTRL = 0x01,
291 NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, 299 NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
300 NVME_ID_CNS_NS_DESC_LIST = 0x03,
292 NVME_ID_CNS_NS_PRESENT_LIST = 0x10, 301 NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
293 NVME_ID_CNS_NS_PRESENT = 0x11, 302 NVME_ID_CNS_NS_PRESENT = 0x11,
294 NVME_ID_CNS_CTRL_NS_LIST = 0x12, 303 NVME_ID_CNS_CTRL_NS_LIST = 0x12,
@@ -296,6 +305,19 @@ enum {
296}; 305};
297 306
298enum { 307enum {
308 NVME_DIR_IDENTIFY = 0x00,
309 NVME_DIR_STREAMS = 0x01,
310 NVME_DIR_SND_ID_OP_ENABLE = 0x01,
311 NVME_DIR_SND_ST_OP_REL_ID = 0x01,
312 NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
313 NVME_DIR_RCV_ID_OP_PARAM = 0x01,
314 NVME_DIR_RCV_ST_OP_PARAM = 0x01,
315 NVME_DIR_RCV_ST_OP_STATUS = 0x02,
316 NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
317 NVME_DIR_ENDIR = 0x01,
318};
319
320enum {
299 NVME_NS_FEAT_THIN = 1 << 0, 321 NVME_NS_FEAT_THIN = 1 << 0,
300 NVME_NS_FLBAS_LBA_MASK = 0xf, 322 NVME_NS_FLBAS_LBA_MASK = 0xf,
301 NVME_NS_FLBAS_META_EXT = 0x10, 323 NVME_NS_FLBAS_META_EXT = 0x10,
@@ -315,6 +337,22 @@ enum {
315 NVME_NS_DPS_PI_TYPE3 = 3, 337 NVME_NS_DPS_PI_TYPE3 = 3,
316}; 338};
317 339
340struct nvme_ns_id_desc {
341 __u8 nidt;
342 __u8 nidl;
343 __le16 reserved;
344};
345
346#define NVME_NIDT_EUI64_LEN 8
347#define NVME_NIDT_NGUID_LEN 16
348#define NVME_NIDT_UUID_LEN 16
349
350enum {
351 NVME_NIDT_EUI64 = 0x01,
352 NVME_NIDT_NGUID = 0x02,
353 NVME_NIDT_UUID = 0x03,
354};
355
318struct nvme_smart_log { 356struct nvme_smart_log {
319 __u8 critical_warning; 357 __u8 critical_warning;
320 __u8 temperature[2]; 358 __u8 temperature[2];
@@ -536,6 +574,7 @@ enum {
536 NVME_RW_PRINFO_PRCHK_APP = 1 << 11, 574 NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
537 NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, 575 NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
538 NVME_RW_PRINFO_PRACT = 1 << 13, 576 NVME_RW_PRINFO_PRACT = 1 << 13,
577 NVME_RW_DTYPE_STREAMS = 1 << 4,
539}; 578};
540 579
541struct nvme_dsm_cmd { 580struct nvme_dsm_cmd {
@@ -587,6 +626,11 @@ struct nvme_feat_auto_pst {
587 __le64 entries[32]; 626 __le64 entries[32];
588}; 627};
589 628
629enum {
630 NVME_HOST_MEM_ENABLE = (1 << 0),
631 NVME_HOST_MEM_RETURN = (1 << 1),
632};
633
590/* Admin commands */ 634/* Admin commands */
591 635
592enum nvme_admin_opcode { 636enum nvme_admin_opcode {
@@ -605,6 +649,8 @@ enum nvme_admin_opcode {
605 nvme_admin_download_fw = 0x11, 649 nvme_admin_download_fw = 0x11,
606 nvme_admin_ns_attach = 0x15, 650 nvme_admin_ns_attach = 0x15,
607 nvme_admin_keep_alive = 0x18, 651 nvme_admin_keep_alive = 0x18,
652 nvme_admin_directive_send = 0x19,
653 nvme_admin_directive_recv = 0x1a,
608 nvme_admin_dbbuf = 0x7C, 654 nvme_admin_dbbuf = 0x7C,
609 nvme_admin_format_nvm = 0x80, 655 nvme_admin_format_nvm = 0x80,
610 nvme_admin_security_send = 0x81, 656 nvme_admin_security_send = 0x81,
@@ -659,6 +705,8 @@ struct nvme_identify {
659 __u32 rsvd11[5]; 705 __u32 rsvd11[5];
660}; 706};
661 707
708#define NVME_IDENTIFY_DATA_SIZE 4096
709
662struct nvme_features { 710struct nvme_features {
663 __u8 opcode; 711 __u8 opcode;
664 __u8 flags; 712 __u8 flags;
@@ -668,7 +716,16 @@ struct nvme_features {
668 union nvme_data_ptr dptr; 716 union nvme_data_ptr dptr;
669 __le32 fid; 717 __le32 fid;
670 __le32 dword11; 718 __le32 dword11;
671 __u32 rsvd12[4]; 719 __le32 dword12;
720 __le32 dword13;
721 __le32 dword14;
722 __le32 dword15;
723};
724
725struct nvme_host_mem_buf_desc {
726 __le64 addr;
727 __le32 size;
728 __u32 rsvd;
672}; 729};
673 730
674struct nvme_create_cq { 731struct nvme_create_cq {
@@ -757,6 +814,24 @@ struct nvme_get_log_page_command {
757 __u32 rsvd14[2]; 814 __u32 rsvd14[2];
758}; 815};
759 816
817struct nvme_directive_cmd {
818 __u8 opcode;
819 __u8 flags;
820 __u16 command_id;
821 __le32 nsid;
822 __u64 rsvd2[2];
823 union nvme_data_ptr dptr;
824 __le32 numd;
825 __u8 doper;
826 __u8 dtype;
827 __le16 dspec;
828 __u8 endir;
829 __u8 tdtype;
830 __u16 rsvd15;
831
832 __u32 rsvd16[3];
833};
834
760/* 835/*
761 * Fabrics subcommands. 836 * Fabrics subcommands.
762 */ 837 */
@@ -887,6 +962,18 @@ struct nvme_dbbuf {
887 __u32 rsvd12[6]; 962 __u32 rsvd12[6];
888}; 963};
889 964
965struct streams_directive_params {
966 __u16 msl;
967 __u16 nssa;
968 __u16 nsso;
969 __u8 rsvd[10];
970 __u32 sws;
971 __u16 sgs;
972 __u16 nsa;
973 __u16 nso;
974 __u8 rsvd2[6];
975};
976
890struct nvme_command { 977struct nvme_command {
891 union { 978 union {
892 struct nvme_common_command common; 979 struct nvme_common_command common;
@@ -907,6 +994,7 @@ struct nvme_command {
907 struct nvmf_property_set_command prop_set; 994 struct nvmf_property_set_command prop_set;
908 struct nvmf_property_get_command prop_get; 995 struct nvmf_property_get_command prop_get;
909 struct nvme_dbbuf dbbuf; 996 struct nvme_dbbuf dbbuf;
997 struct nvme_directive_cmd directive;
910 }; 998 };
911}; 999};
912 1000
@@ -1051,4 +1139,8 @@ struct nvme_completion {
1051#define NVME_VS(major, minor, tertiary) \ 1139#define NVME_VS(major, minor, tertiary) \
1052 (((major) << 16) | ((minor) << 8) | (tertiary)) 1140 (((major) << 16) | ((minor) << 8) | (tertiary))
1053 1141
1142#define NVME_MAJOR(ver) ((ver) >> 16)
1143#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
1144#define NVME_TERTIARY(ver) ((ver) & 0xff)
1145
1054#endif /* _LINUX_NVME_H */ 1146#endif /* _LINUX_NVME_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index cb3c8fe6acd7..4b3286ac60c8 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
278 const void *buf, size_t buflen, off_t skip); 278 const void *buf, size_t buflen, off_t skip);
279size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, 279size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
280 void *buf, size_t buflen, off_t skip); 280 void *buf, size_t buflen, off_t skip);
281size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
282 size_t buflen, off_t skip);
281 283
282/* 284/*
283 * Maximum number of entries that will be allocated in one piece, if 285 * Maximum number of entries that will be allocated in one piece, if
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca829082..a29d3086eb56 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -157,7 +157,7 @@ struct osd_request {
157 157
158 osd_req_done_fn *async_done; 158 osd_req_done_fn *async_done;
159 void *async_private; 159 void *async_private;
160 int async_error; 160 blk_status_t async_error;
161 int req_errors; 161 int req_errors;
162}; 162};
163 163
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index b379f93a2c48..da9bf2bcdf1a 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
166extern void scsi_kunmap_atomic_sg(void *virt); 166extern void scsi_kunmap_atomic_sg(void *virt);
167 167
168extern int scsi_init_io(struct scsi_cmnd *cmd); 168extern int scsi_init_io(struct scsi_cmnd *cmd);
169extern void scsi_initialize_rq(struct request *rq);
169 170
170extern int scsi_dma_map(struct scsi_cmnd *cmd); 171extern int scsi_dma_map(struct scsi_cmnd *cmd);
171extern void scsi_dma_unmap(struct scsi_cmnd *cmd); 172extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index f0c76f9dc285..e0afa445ee4e 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
27 kfree(req->cmd); 27 kfree(req->cmd);
28} 28}
29 29
30void scsi_req_init(struct request *); 30void scsi_req_init(struct scsi_request *req);
31 31
32#endif /* _SCSI_SCSI_REQUEST_H */ 32#endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f7fbd1..a2d4a8ac94ca 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -79,7 +79,7 @@ struct io_event {
79struct iocb { 79struct iocb {
80 /* these are internal to the kernel/libc. */ 80 /* these are internal to the kernel/libc. */
81 __u64 aio_data; /* data to be returned in event's data */ 81 __u64 aio_data; /* data to be returned in event's data */
82 __u32 PADDED(aio_key, aio_reserved1); 82 __u32 PADDED(aio_key, aio_rw_flags);
83 /* the kernel sets aio_key to the req # */ 83 /* the kernel sets aio_key to the req # */
84 84
85 /* common fields */ 85 /* common fields */
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 4bf9f1eabffc..2f6c77aebe1a 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
268 268
269#define DM_VERSION_MAJOR 4 269#define DM_VERSION_MAJOR 4
270#define DM_VERSION_MINOR 35 270#define DM_VERSION_MINOR 36
271#define DM_VERSION_PATCHLEVEL 0 271#define DM_VERSION_PATCHLEVEL 0
272#define DM_VERSION_EXTRA "-ioctl (2016-06-23)" 272#define DM_VERSION_EXTRA "-ioctl (2017-06-09)"
273 273
274/* Status bits */ 274/* Status bits */
275#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 275#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6eee71..ec69d55bcec7 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -43,6 +43,27 @@
43/* (1U << 31) is reserved for signed error codes */ 43/* (1U << 31) is reserved for signed error codes */
44 44
45/* 45/*
46 * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
47 * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
48 * the specific file.
49 */
50#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
51#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
52#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
53#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
54
55/*
56 * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
57 * used to clear any hints previously set.
58 */
59#define RWF_WRITE_LIFE_NOT_SET 0
60#define RWH_WRITE_LIFE_NONE 1
61#define RWH_WRITE_LIFE_SHORT 2
62#define RWH_WRITE_LIFE_MEDIUM 3
63#define RWH_WRITE_LIFE_LONG 4
64#define RWH_WRITE_LIFE_EXTREME 5
65
66/*
46 * Types of directory notifications that may be requested. 67 * Types of directory notifications that may be requested.
47 */ 68 */
48#define DN_ACCESS 0x00000001 /* File accessed */ 69#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a54feaa..27d8c36c04af 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -360,5 +360,9 @@ struct fscrypt_key {
360#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */ 360#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */
361#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ 361#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */
362#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ 362#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */
363#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */
364
365#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
366 RWF_NOWAIT)
363 367
364#endif /* _UAPI_LINUX_FS_H */ 368#endif /* _UAPI_LINUX_FS_H */
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec1f4f2..a3960f98679c 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,6 +22,7 @@ enum {
22 LO_FLAGS_AUTOCLEAR = 4, 22 LO_FLAGS_AUTOCLEAR = 4,
23 LO_FLAGS_PARTSCAN = 8, 23 LO_FLAGS_PARTSCAN = 8,
24 LO_FLAGS_DIRECT_IO = 16, 24 LO_FLAGS_DIRECT_IO = 16,
25 LO_FLAGS_BLOCKSIZE = 32,
25}; 26};
26 27
27#include <asm/posix_types.h> /* for __kernel_old_dev_t */ 28#include <asm/posix_types.h> /* for __kernel_old_dev_t */
@@ -59,6 +60,8 @@ struct loop_info64 {
59 __u64 lo_init[2]; 60 __u64 lo_init[2];
60}; 61};
61 62
63#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
64
62/* 65/*
63 * Loop filter types 66 * Loop filter types
64 */ 67 */
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 155e33f81913..a50527ebf671 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -41,10 +41,14 @@ enum {
41#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ 41#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */
42#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ 42#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */
43#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ 43#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */
44#define NBD_FLAG_SEND_FUA (1 << 3) /* send FUA (forced unit access) */
44/* there is a gap here to match userspace */ 45/* there is a gap here to match userspace */
45#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ 46#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */
46#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ 47#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */
47 48
49/* values for cmd flags in the upper 16 bits of request type */
50#define NBD_CMD_FLAG_FUA (1 << 16) /* FUA (forced unit access) op */
51
48/* These are client behavior specific flags. */ 52/* These are client behavior specific flags. */
49#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on 53#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on
50 disconnect. */ 54 disconnect. */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33639e0..57d22571f306 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
225struct hib_bio_batch { 225struct hib_bio_batch {
226 atomic_t count; 226 atomic_t count;
227 wait_queue_head_t wait; 227 wait_queue_head_t wait;
228 int error; 228 blk_status_t error;
229}; 229};
230 230
231static void hib_init_batch(struct hib_bio_batch *hb) 231static void hib_init_batch(struct hib_bio_batch *hb)
232{ 232{
233 atomic_set(&hb->count, 0); 233 atomic_set(&hb->count, 0);
234 init_waitqueue_head(&hb->wait); 234 init_waitqueue_head(&hb->wait);
235 hb->error = 0; 235 hb->error = BLK_STS_OK;
236} 236}
237 237
238static void hib_end_io(struct bio *bio) 238static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
240 struct hib_bio_batch *hb = bio->bi_private; 240 struct hib_bio_batch *hb = bio->bi_private;
241 struct page *page = bio->bi_io_vec[0].bv_page; 241 struct page *page = bio->bi_io_vec[0].bv_page;
242 242
243 if (bio->bi_error) { 243 if (bio->bi_status) {
244 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", 244 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
245 imajor(bio->bi_bdev->bd_inode), 245 imajor(bio->bi_bdev->bd_inode),
246 iminor(bio->bi_bdev->bd_inode), 246 iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
253 flush_icache_range((unsigned long)page_address(page), 253 flush_icache_range((unsigned long)page_address(page),
254 (unsigned long)page_address(page) + PAGE_SIZE); 254 (unsigned long)page_address(page) + PAGE_SIZE);
255 255
256 if (bio->bi_error && !hb->error) 256 if (bio->bi_status && !hb->error)
257 hb->error = bio->bi_error; 257 hb->error = bio->bi_status;
258 if (atomic_dec_and_test(&hb->count)) 258 if (atomic_dec_and_test(&hb->count))
259 wake_up(&hb->wait); 259 wake_up(&hb->wait);
260 260
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
293 return error; 293 return error;
294} 294}
295 295
296static int hib_wait_io(struct hib_bio_batch *hb) 296static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
297{ 297{
298 wait_event(hb->wait, atomic_read(&hb->count) == 0); 298 wait_event(hb->wait, atomic_read(&hb->count) == 0);
299 return hb->error; 299 return blk_status_to_errno(hb->error);
300} 300}
301 301
302/* 302/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 193c5f5e3f79..bc364f86100a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
867 867
868 __blk_add_trace(bt, bio->bi_iter.bi_sector, 868 __blk_add_trace(bt, bio->bi_iter.bi_sector,
869 bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, 869 bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
870 BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu), 870 BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
871 &rpdu); 871 &rpdu);
872 } 872 }
873} 873}
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
900 r.sector_from = cpu_to_be64(from); 900 r.sector_from = cpu_to_be64(from);
901 901
902 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, 902 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
903 bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error, 903 bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
904 sizeof(r), &r); 904 sizeof(r), &r);
905} 905}
906 906
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf82242d65..be7b4dd6b68d 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
751 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); 751 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
752} 752}
753EXPORT_SYMBOL(sg_pcopy_to_buffer); 753EXPORT_SYMBOL(sg_pcopy_to_buffer);
754
755/**
756 * sg_zero_buffer - Zero-out a part of a SG list
757 * @sgl: The SG list
758 * @nents: Number of SG entries
759 * @buflen: The number of bytes to zero out
760 * @skip: Number of bytes to skip before zeroing
761 *
762 * Returns the number of bytes zeroed.
763 **/
764size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
765 size_t buflen, off_t skip)
766{
767 unsigned int offset = 0;
768 struct sg_mapping_iter miter;
769 unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
770
771 sg_miter_start(&miter, sgl, nents, sg_flags);
772
773 if (!sg_miter_skip(&miter, skip))
774 return false;
775
776 while (offset < buflen && sg_miter_next(&miter)) {
777 unsigned int len;
778
779 len = min(miter.length, buflen - offset);
780 memset(miter.addr, 0, len);
781
782 offset += len;
783 }
784
785 sg_miter_stop(&miter);
786 return offset;
787}
788EXPORT_SYMBOL(sg_zero_buffer);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..742034e56100 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
376} 376}
377EXPORT_SYMBOL(filemap_flush); 377EXPORT_SYMBOL(filemap_flush);
378 378
379/**
380 * filemap_range_has_page - check if a page exists in range.
381 * @mapping: address space within which to check
382 * @start_byte: offset in bytes where the range starts
383 * @end_byte: offset in bytes where the range ends (inclusive)
384 *
385 * Find at least one page in the range supplied, usually used to check if
386 * direct writing in this range will trigger a writeback.
387 */
388bool filemap_range_has_page(struct address_space *mapping,
389 loff_t start_byte, loff_t end_byte)
390{
391 pgoff_t index = start_byte >> PAGE_SHIFT;
392 pgoff_t end = end_byte >> PAGE_SHIFT;
393 struct pagevec pvec;
394 bool ret;
395
396 if (end_byte < start_byte)
397 return false;
398
399 if (mapping->nrpages == 0)
400 return false;
401
402 pagevec_init(&pvec, 0);
403 if (!pagevec_lookup(&pvec, mapping, index, 1))
404 return false;
405 ret = (pvec.pages[0]->index <= end);
406 pagevec_release(&pvec);
407 return ret;
408}
409EXPORT_SYMBOL(filemap_range_has_page);
410
379static int __filemap_fdatawait_range(struct address_space *mapping, 411static int __filemap_fdatawait_range(struct address_space *mapping,
380 loff_t start_byte, loff_t end_byte) 412 loff_t start_byte, loff_t end_byte)
381{ 413{
@@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2038 loff_t size; 2070 loff_t size;
2039 2071
2040 size = i_size_read(inode); 2072 size = i_size_read(inode);
2041 retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, 2073 if (iocb->ki_flags & IOCB_NOWAIT) {
2042 iocb->ki_pos + count - 1); 2074 if (filemap_range_has_page(mapping, iocb->ki_pos,
2043 if (retval < 0) 2075 iocb->ki_pos + count - 1))
2044 goto out; 2076 return -EAGAIN;
2077 } else {
2078 retval = filemap_write_and_wait_range(mapping,
2079 iocb->ki_pos,
2080 iocb->ki_pos + count - 1);
2081 if (retval < 0)
2082 goto out;
2083 }
2045 2084
2046 file_accessed(file); 2085 file_accessed(file);
2047 2086
@@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
2642 2681
2643 pos = iocb->ki_pos; 2682 pos = iocb->ki_pos;
2644 2683
2684 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
2685 return -EINVAL;
2686
2645 if (limit != RLIM_INFINITY) { 2687 if (limit != RLIM_INFINITY) {
2646 if (iocb->ki_pos >= limit) { 2688 if (iocb->ki_pos >= limit) {
2647 send_sig(SIGXFSZ, current, 0); 2689 send_sig(SIGXFSZ, current, 0);
@@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2710 write_len = iov_iter_count(from); 2752 write_len = iov_iter_count(from);
2711 end = (pos + write_len - 1) >> PAGE_SHIFT; 2753 end = (pos + write_len - 1) >> PAGE_SHIFT;
2712 2754
2713 written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); 2755 if (iocb->ki_flags & IOCB_NOWAIT) {
2714 if (written) 2756 /* If there are pages to writeback, return */
2715 goto out; 2757 if (filemap_range_has_page(inode->i_mapping, pos,
2758 pos + iov_iter_count(from)))
2759 return -EAGAIN;
2760 } else {
2761 written = filemap_write_and_wait_range(mapping, pos,
2762 pos + write_len - 1);
2763 if (written)
2764 goto out;
2765 }
2716 2766
2717 /* 2767 /*
2718 * After a write we want buffered reads to be sure to go to disk to get 2768 * After a write we want buffered reads to be sure to go to disk to get
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d3470f..2da71e627812 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
45{ 45{
46 struct page *page = bio->bi_io_vec[0].bv_page; 46 struct page *page = bio->bi_io_vec[0].bv_page;
47 47
48 if (bio->bi_error) { 48 if (bio->bi_status) {
49 SetPageError(page); 49 SetPageError(page);
50 /* 50 /*
51 * We failed to write the page out to swap-space. 51 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
118{ 118{
119 struct page *page = bio->bi_io_vec[0].bv_page; 119 struct page *page = bio->bi_io_vec[0].bv_page;
120 120
121 if (bio->bi_error) { 121 if (bio->bi_status) {
122 SetPageError(page); 122 SetPageError(page);
123 ClearPageUptodate(page); 123 ClearPageUptodate(page);
124 pr_alert("Read-error on swap-device (%u:%u:%llu)\n", 124 pr_alert("Read-error on swap-device (%u:%u:%llu)\n",