aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 13:34:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 13:34:51 -0400
commitc6b1e36c8fa04a6680c44fe0321d0370400e90b6 (patch)
tree5110f0639bfa803baa8d213cb21efe37beeaf742
parent81e3e044897b0875a52953b3fb6241a33428e4f9 (diff)
parenta84ebb837b419787c2ece74efa566c998929cead (diff)
Merge branch 'for-4.13/block' of git://git.kernel.dk/linux-block
Pull core block/IO updates from Jens Axboe: "This is the main pull request for the block layer for 4.13. Not a huge round in terms of features, but there's a lot of churn related to some core cleanups. Note this depends on the UUID tree pull request, that Christoph already sent out. This pull request contains: - A series from Christoph, unifying the error/stats codes in the block layer. We now use blk_status_t everywhere, instead of using different schemes for different places. - Also from Christoph, some cleanups around request allocation and IO scheduler interactions in blk-mq. - And yet another series from Christoph, cleaning up how we handle and do bounce buffering in the block layer. - A blk-mq debugfs series from Bart, further improving on the support we have for exporting internal information to aid debugging IO hangs or stalls. - Also from Bart, a series that cleans up the request initialization differences across types of devices. - A series from Goldwyn Rodrigues, allowing the block layer to return failure if we will block and the user asked for non-blocking. - Patch from Hannes for supporting setting loop devices block size to that of the underlying device. - Two series of patches from Javier, fixing various issues with lightnvm, particular around pblk. - A series from me, adding support for write hints. This comes with NVMe support as well, so applications can help guide data placement on flash to improve performance, latencies, and write amplification. - A series from Ming, improving and hardening blk-mq support for stopping/starting and quiescing hardware queues. - Two pull requests for NVMe updates. Nothing major on the feature side, but lots of cleanups and bug fixes. From the usual crew. - A series from Neil Brown, greatly improving the bio rescue set support. Most notably, this kills the bio rescue work queues, if we don't really need them. - Lots of other little bug fixes that are all over the place" * 'for-4.13/block' of git://git.kernel.dk/linux-block: (217 commits) lightnvm: pblk: set line bitmap check under debug lightnvm: pblk: verify that cache read is still valid lightnvm: pblk: add initialization check lightnvm: pblk: remove target using async. I/Os lightnvm: pblk: use vmalloc for GC data buffer lightnvm: pblk: use right metadata buffer for recovery lightnvm: pblk: schedule if data is not ready lightnvm: pblk: remove unused return variable lightnvm: pblk: fix double-free on pblk init lightnvm: pblk: fix bad le64 assignations nvme: Makefile: remove dead build rule blk-mq: map all HWQ also in hyperthreaded system nvmet-rdma: register ib_client to not deadlock in device removal nvme_fc: fix error recovery on link down. nvmet_fc: fix crashes on bad opcodes nvme_fc: Fix crash when nvme controller connection fails. nvme_fc: replace ioabort msleep loop with completion nvme_fc: fix double calls to nvme_cleanup_cmd() nvme-fabrics: verify that a controller returns the correct NQN nvme: simplify nvme_dev_attrs_are_visible ...
-rw-r--r--Documentation/block/biodoc.txt2
-rw-r--r--arch/s390/include/asm/eadm.h6
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--block/badblocks.c1
-rw-r--r--block/bfq-iosched.c59
-rw-r--r--block/bio-integrity.c8
-rw-r--r--block/bio.c85
-rw-r--r--block/blk-core.c331
-rw-r--r--block/blk-exec.c4
-rw-r--r--block/blk-flush.c16
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-map.c7
-rw-r--r--block/blk-merge.c48
-rw-r--r--block/blk-mq-cpumap.c68
-rw-r--r--block/blk-mq-debugfs.c101
-rw-r--r--block/blk-mq-sched.c158
-rw-r--r--block/blk-mq-sched.h28
-rw-r--r--block/blk-mq.c399
-rw-r--r--block/blk-mq.h11
-rw-r--r--block/blk-settings.c5
-rw-r--r--block/blk-tag.c15
-rw-r--r--block/blk-timeout.c4
-rw-r--r--block/blk.h15
-rw-r--r--block/bounce.c47
-rw-r--r--block/bsg-lib.c5
-rw-r--r--block/bsg.c13
-rw-r--r--block/cfq-iosched.c9
-rw-r--r--block/elevator.c1
-rw-r--r--block/genhd.c4
-rw-r--r--block/ioprio.c3
-rw-r--r--block/kyber-iosched.c31
-rw-r--r--block/scsi_ioctl.c13
-rw-r--r--block/t10-pi.c32
-rw-r--r--drivers/block/DAC960.c2
-rw-r--r--drivers/block/amiflop.c10
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c12
-rw-r--r--drivers/block/aoe/aoedev.c2
-rw-r--r--drivers/block/ataflop.c16
-rw-r--r--drivers/block/brd.c1
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/drbd/drbd_actlog.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c6
-rw-r--r--drivers/block/drbd/drbd_int.h5
-rw-r--r--drivers/block/drbd/drbd_main.c14
-rw-r--r--drivers/block/drbd/drbd_nl.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c6
-rw-r--r--drivers/block/drbd/drbd_req.c8
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_worker.c16
-rw-r--r--drivers/block/floppy.c9
-rw-r--r--drivers/block/loop.c64
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c54
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h2
-rw-r--r--drivers/block/nbd.c44
-rw-r--r--drivers/block/null_blk.c125
-rw-r--r--drivers/block/paride/pcd.c9
-rw-r--r--drivers/block/paride/pd.c3
-rw-r--r--drivers/block/paride/pf.c19
-rw-r--r--drivers/block/pktcdvd.c40
-rw-r--r--drivers/block/ps3disk.c11
-rw-r--r--drivers/block/ps3vram.c16
-rw-r--r--drivers/block/rbd.c28
-rw-r--r--drivers/block/rsxx/dev.c17
-rw-r--r--drivers/block/rsxx/dma.c13
-rw-r--r--drivers/block/rsxx/rsxx_priv.h2
-rw-r--r--drivers/block/skd_main.c32
-rw-r--r--drivers/block/sunvdc.c4
-rw-r--r--drivers/block/swim.c8
-rw-r--r--drivers/block/swim3.c29
-rw-r--r--drivers/block/sx8.c20
-rw-r--r--drivers/block/umem.c4
-rw-r--r--drivers/block/virtio_blk.c23
-rw-r--r--drivers/block/xen-blkback/blkback.c19
-rw-r--r--drivers/block/xen-blkfront.c81
-rw-r--r--drivers/block/xsysace.c9
-rw-r--r--drivers/block/z2ram.c4
-rw-r--r--drivers/cdrom/cdrom.c7
-rw-r--r--drivers/cdrom/gdrom.c10
-rw-r--r--drivers/ide/ide-atapi.c12
-rw-r--r--drivers/ide/ide-cd.c11
-rw-r--r--drivers/ide/ide-cd_ioctl.c1
-rw-r--r--drivers/ide/ide-devsets.c1
-rw-r--r--drivers/ide/ide-disk.c1
-rw-r--r--drivers/ide/ide-dma.c2
-rw-r--r--drivers/ide/ide-eh.c16
-rw-r--r--drivers/ide/ide-floppy.c6
-rw-r--r--drivers/ide/ide-io.c10
-rw-r--r--drivers/ide/ide-ioctls.c2
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/ide/ide-pm.c8
-rw-r--r--drivers/ide/ide-probe.c7
-rw-r--r--drivers/ide/ide-tape.c3
-rw-r--r--drivers/ide/ide-taskfile.c7
-rw-r--r--drivers/ide/siimage.c6
-rw-r--r--drivers/lightnvm/core.c13
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c617
-rw-r--r--drivers/lightnvm/pblk-gc.c475
-rw-r--r--drivers/lightnvm/pblk-init.c389
-rw-r--r--drivers/lightnvm/pblk-map.c75
-rw-r--r--drivers/lightnvm/pblk-rb.c106
-rw-r--r--drivers/lightnvm/pblk-read.c93
-rw-r--r--drivers/lightnvm/pblk-recovery.c275
-rw-r--r--drivers/lightnvm/pblk-rl.c90
-rw-r--r--drivers/lightnvm/pblk-sysfs.c94
-rw-r--r--drivers/lightnvm/pblk-write.c353
-rw-r--r--drivers/lightnvm/pblk.h296
-rw-r--r--drivers/lightnvm/rrpc.c10
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/btree.c6
-rw-r--r--drivers/md/bcache/debug.c2
-rw-r--r--drivers/md/bcache/io.c6
-rw-r--r--drivers/md/bcache/journal.c2
-rw-r--r--drivers/md/bcache/movinggc.c10
-rw-r--r--drivers/md/bcache/request.c28
-rw-r--r--drivers/md/bcache/request.h2
-rw-r--r--drivers/md/bcache/super.c14
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.h2
-rw-r--r--drivers/md/dm-bufio.c28
-rw-r--r--drivers/md/dm-cache-target.c36
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-flakey.c13
-rw-r--r--drivers/md/dm-integrity.c30
-rw-r--r--drivers/md/dm-io.c13
-rw-r--r--drivers/md/dm-log-writes.c13
-rw-r--r--drivers/md/dm-mpath.c85
-rw-r--r--drivers/md/dm-raid1.c29
-rw-r--r--drivers/md/dm-rq.c30
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-snap.c15
-rw-r--r--drivers/md/dm-stripe.c17
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin.c67
-rw-r--r--drivers/md/dm-verity-target.c16
-rw-r--r--drivers/md/dm-zero.c4
-rw-r--r--drivers/md/dm.c88
-rw-r--r--drivers/md/md.c14
-rw-r--r--drivers/md/multipath.c10
-rw-r--r--drivers/md/raid1.c38
-rw-r--r--drivers/md/raid10.c38
-rw-r--r--drivers/md/raid5-cache.c6
-rw-r--r--drivers/md/raid5-ppl.c4
-rw-r--r--drivers/md/raid5.c24
-rw-r--r--drivers/memstick/core/ms_block.c7
-rw-r--r--drivers/memstick/core/mspro_block.c8
-rw-r--r--drivers/mmc/core/block.c37
-rw-r--r--drivers/mmc/core/queue.c3
-rw-r--r--drivers/mtd/mtd_blkdevs.c31
-rw-r--r--drivers/mtd/ubi/block.c8
-rw-r--r--drivers/nvdimm/blk.c5
-rw-r--r--drivers/nvdimm/btt.c5
-rw-r--r--drivers/nvdimm/pmem.c29
-rw-r--r--drivers/nvme/host/Kconfig12
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c525
-rw-r--r--drivers/nvme/host/fabrics.c69
-rw-r--r--drivers/nvme/host/fabrics.h4
-rw-r--r--drivers/nvme/host/fc.c137
-rw-r--r--drivers/nvme/host/lightnvm.c18
-rw-r--r--drivers/nvme/host/nvme.h42
-rw-r--r--drivers/nvme/host/pci.c647
-rw-r--r--drivers/nvme/host/rdma.c212
-rw-r--r--drivers/nvme/host/scsi.c2460
-rw-r--r--drivers/nvme/target/admin-cmd.c65
-rw-r--r--drivers/nvme/target/configfs.c68
-rw-r--r--drivers/nvme/target/core.c3
-rw-r--r--drivers/nvme/target/discovery.c4
-rw-r--r--drivers/nvme/target/fc.c10
-rw-r--r--drivers/nvme/target/fcloop.c2
-rw-r--r--drivers/nvme/target/io-cmd.c4
-rw-r--r--drivers/nvme/target/loop.c67
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/nvme/target/rdma.c102
-rw-r--r--drivers/s390/block/dasd.c36
-rw-r--r--drivers/s390/block/dcssblk.c2
-rw-r--r--drivers/s390/block/scm_blk.c8
-rw-r--r--drivers/s390/block/scm_blk.h4
-rw-r--r--drivers/s390/block/xpram.c2
-rw-r--r--drivers/s390/cio/eadm_sch.c6
-rw-r--r--drivers/s390/cio/scm.c2
-rw-r--r--drivers/sbus/char/jsflash.c5
-rw-r--r--drivers/scsi/osd/osd_initiator.c29
-rw-r--r--drivers/scsi/osst.c3
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_lib.c104
-rw-r--r--drivers/scsi/scsi_transport_sas.c10
-rw-r--r--drivers/scsi/sg.c8
-rw-r--r--drivers/scsi/st.c3
-rw-r--r--drivers/target/target_core_iblock.c12
-rw-r--r--drivers/target/target_core_pscsi.c6
-rw-r--r--fs/aio.c15
-rw-r--r--fs/block_dev.c25
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/check-integrity.c4
-rw-r--r--fs/btrfs/compression.c46
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c75
-rw-r--r--fs/btrfs/disk-io.h12
-rw-r--r--fs/btrfs/extent_io.c27
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file-item.c14
-rw-r--r--fs/btrfs/file.c33
-rw-r--r--fs/btrfs/inode.c82
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/scrub.c26
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/buffer.c15
-rw-r--r--fs/crypto/bio.c2
-rw-r--r--fs/direct-io.c25
-rw-r--r--fs/ext4/file.c35
-rw-r--r--fs/ext4/page-io.c15
-rw-r--r--fs/ext4/readpage.c4
-rw-r--r--fs/f2fs/data.c10
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/fcntl.c67
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/inode.c1
-rw-r--r--fs/iomap.c13
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_metapage.c4
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/open.c1
-rw-r--r--fs/read_write.c12
-rw-r--r--fs/xfs/xfs_aops.c9
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_file.c32
-rw-r--r--fs/xfs/xfs_iomap.c22
-rw-r--r--fs/xfs/xfs_super.c3
-rw-r--r--include/linux/bio.h21
-rw-r--r--include/linux/blk-mq.h28
-rw-r--r--include/linux/blk_types.h26
-rw-r--r--include/linux/blkdev.h72
-rw-r--r--include/linux/device-mapper.h4
-rw-r--r--include/linux/elevator.h7
-rw-r--r--include/linux/fs.h74
-rw-r--r--include/linux/ide.h6
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/nvme.h102
-rw-r--r--include/linux/scatterlist.h2
-rw-r--r--include/scsi/osd_initiator.h2
-rw-r--r--include/scsi/scsi_cmnd.h1
-rw-r--r--include/scsi/scsi_request.h2
-rw-r--r--include/uapi/linux/aio_abi.h2
-rw-r--r--include/uapi/linux/dm-ioctl.h4
-rw-r--r--include/uapi/linux/fcntl.h21
-rw-r--r--include/uapi/linux/fs.h4
-rw-r--r--include/uapi/linux/loop.h3
-rw-r--r--include/uapi/linux/nbd.h4
-rw-r--r--kernel/power/swap.c14
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--lib/scatterlist.c35
-rw-r--r--mm/filemap.c64
-rw-r--r--mm/page_io.c4
265 files changed, 5912 insertions, 6237 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 01ddeaf64b0f..9490f2845f06 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -632,7 +632,7 @@ to i/o submission, if the bio fields are likely to be accessed after the
632i/o is issued (since the bio may otherwise get freed in case i/o completion 632i/o is issued (since the bio may otherwise get freed in case i/o completion
633happens in the meantime). 633happens in the meantime).
634 634
635The bio_clone() routine may be used to duplicate a bio, where the clone 635The bio_clone_fast() routine may be used to duplicate a bio, where the clone
636shares the bio_vec_list with the original bio (i.e. both point to the 636shares the bio_vec_list with the original bio (i.e. both point to the
637same bio_vec_list). This would typically be used for splitting i/o requests 637same bio_vec_list). This would typically be used for splitting i/o requests
638in lvm or md. 638in lvm or md.
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 67026300c88e..144809a3f4f6 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -3,6 +3,7 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/device.h> 5#include <linux/device.h>
6#include <linux/blkdev.h>
6 7
7struct arqb { 8struct arqb {
8 u64 data; 9 u64 data;
@@ -105,13 +106,14 @@ struct scm_driver {
105 int (*probe) (struct scm_device *scmdev); 106 int (*probe) (struct scm_device *scmdev);
106 int (*remove) (struct scm_device *scmdev); 107 int (*remove) (struct scm_device *scmdev);
107 void (*notify) (struct scm_device *scmdev, enum scm_event event); 108 void (*notify) (struct scm_device *scmdev, enum scm_event event);
108 void (*handler) (struct scm_device *scmdev, void *data, int error); 109 void (*handler) (struct scm_device *scmdev, void *data,
110 blk_status_t error);
109}; 111};
110 112
111int scm_driver_register(struct scm_driver *scmdrv); 113int scm_driver_register(struct scm_driver *scmdrv);
112void scm_driver_unregister(struct scm_driver *scmdrv); 114void scm_driver_unregister(struct scm_driver *scmdrv);
113 115
114int eadm_start_aob(struct aob *aob); 116int eadm_start_aob(struct aob *aob);
115void scm_irq_handler(struct aob *aob, int error); 117void scm_irq_handler(struct aob *aob, blk_status_t error);
116 118
117#endif /* _ASM_S390_EADM_H */ 119#endif /* _ASM_S390_EADM_H */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 85410279beab..b55fe9bf5d3e 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -534,7 +534,7 @@ static void ubd_handler(void)
534 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 534 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
535 blk_end_request( 535 blk_end_request(
536 (*irq_req_buffer)[count]->req, 536 (*irq_req_buffer)[count]->req,
537 0, 537 BLK_STS_OK,
538 (*irq_req_buffer)[count]->length 538 (*irq_req_buffer)[count]->length
539 ); 539 );
540 kfree((*irq_req_buffer)[count]); 540 kfree((*irq_req_buffer)[count]);
diff --git a/block/badblocks.c b/block/badblocks.c
index 6ebcef282314..43c71166e1e2 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -533,6 +533,7 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
533 case 3: 533 case 3:
534 if (newline != '\n') 534 if (newline != '\n')
535 return -EINVAL; 535 return -EINVAL;
536 /* fall through */
536 case 2: 537 case 2:
537 if (length <= 0) 538 if (length <= 0)
538 return -EINVAL; 539 return -EINVAL;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index ed93da2462ab..12bbc6b8657d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -725,8 +725,12 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
725} 725}
726 726
727static void 727static void
728bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic) 728bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
729 struct bfq_io_cq *bic, bool bfq_already_existing)
729{ 730{
731 unsigned int old_wr_coeff = bfqq->wr_coeff;
732 bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq);
733
730 if (bic->saved_idle_window) 734 if (bic->saved_idle_window)
731 bfq_mark_bfqq_idle_window(bfqq); 735 bfq_mark_bfqq_idle_window(bfqq);
732 else 736 else
@@ -754,6 +758,14 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
754 758
755 /* make sure weight will be updated, however we got here */ 759 /* make sure weight will be updated, however we got here */
756 bfqq->entity.prio_changed = 1; 760 bfqq->entity.prio_changed = 1;
761
762 if (likely(!busy))
763 return;
764
765 if (old_wr_coeff == 1 && bfqq->wr_coeff > 1)
766 bfqd->wr_busy_queues++;
767 else if (old_wr_coeff > 1 && bfqq->wr_coeff == 1)
768 bfqd->wr_busy_queues--;
757} 769}
758 770
759static int bfqq_process_refs(struct bfq_queue *bfqq) 771static int bfqq_process_refs(struct bfq_queue *bfqq)
@@ -4290,10 +4302,16 @@ static void bfq_put_rq_priv_body(struct bfq_queue *bfqq)
4290 bfq_put_queue(bfqq); 4302 bfq_put_queue(bfqq);
4291} 4303}
4292 4304
4293static void bfq_put_rq_private(struct request_queue *q, struct request *rq) 4305static void bfq_finish_request(struct request *rq)
4294{ 4306{
4295 struct bfq_queue *bfqq = RQ_BFQQ(rq); 4307 struct bfq_queue *bfqq;
4296 struct bfq_data *bfqd = bfqq->bfqd; 4308 struct bfq_data *bfqd;
4309
4310 if (!rq->elv.icq)
4311 return;
4312
4313 bfqq = RQ_BFQQ(rq);
4314 bfqd = bfqq->bfqd;
4297 4315
4298 if (rq->rq_flags & RQF_STARTED) 4316 if (rq->rq_flags & RQF_STARTED)
4299 bfqg_stats_update_completion(bfqq_group(bfqq), 4317 bfqg_stats_update_completion(bfqq_group(bfqq),
@@ -4324,7 +4342,7 @@ static void bfq_put_rq_private(struct request_queue *q, struct request *rq)
4324 */ 4342 */
4325 4343
4326 if (!RB_EMPTY_NODE(&rq->rb_node)) 4344 if (!RB_EMPTY_NODE(&rq->rb_node))
4327 bfq_remove_request(q, rq); 4345 bfq_remove_request(rq->q, rq);
4328 bfq_put_rq_priv_body(bfqq); 4346 bfq_put_rq_priv_body(bfqq);
4329 } 4347 }
4330 4348
@@ -4394,20 +4412,21 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
4394/* 4412/*
4395 * Allocate bfq data structures associated with this request. 4413 * Allocate bfq data structures associated with this request.
4396 */ 4414 */
4397static int bfq_get_rq_private(struct request_queue *q, struct request *rq, 4415static void bfq_prepare_request(struct request *rq, struct bio *bio)
4398 struct bio *bio)
4399{ 4416{
4417 struct request_queue *q = rq->q;
4400 struct bfq_data *bfqd = q->elevator->elevator_data; 4418 struct bfq_data *bfqd = q->elevator->elevator_data;
4401 struct bfq_io_cq *bic = icq_to_bic(rq->elv.icq); 4419 struct bfq_io_cq *bic;
4402 const int is_sync = rq_is_sync(rq); 4420 const int is_sync = rq_is_sync(rq);
4403 struct bfq_queue *bfqq; 4421 struct bfq_queue *bfqq;
4404 bool new_queue = false; 4422 bool new_queue = false;
4405 bool split = false; 4423 bool bfqq_already_existing = false, split = false;
4406 4424
4407 spin_lock_irq(&bfqd->lock); 4425 if (!rq->elv.icq)
4426 return;
4427 bic = icq_to_bic(rq->elv.icq);
4408 4428
4409 if (!bic) 4429 spin_lock_irq(&bfqd->lock);
4410 goto queue_fail;
4411 4430
4412 bfq_check_ioprio_change(bic, bio); 4431 bfq_check_ioprio_change(bic, bio);
4413 4432
@@ -4432,6 +4451,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
4432 bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, 4451 bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio,
4433 true, is_sync, 4452 true, is_sync,
4434 NULL); 4453 NULL);
4454 else
4455 bfqq_already_existing = true;
4435 } 4456 }
4436 } 4457 }
4437 4458
@@ -4457,7 +4478,8 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
4457 * queue: restore the idle window and the 4478 * queue: restore the idle window and the
4458 * possible weight raising period. 4479 * possible weight raising period.
4459 */ 4480 */
4460 bfq_bfqq_resume_state(bfqq, bic); 4481 bfq_bfqq_resume_state(bfqq, bfqd, bic,
4482 bfqq_already_existing);
4461 } 4483 }
4462 } 4484 }
4463 4485
@@ -4465,13 +4487,6 @@ static int bfq_get_rq_private(struct request_queue *q, struct request *rq,
4465 bfq_handle_burst(bfqd, bfqq); 4487 bfq_handle_burst(bfqd, bfqq);
4466 4488
4467 spin_unlock_irq(&bfqd->lock); 4489 spin_unlock_irq(&bfqd->lock);
4468
4469 return 0;
4470
4471queue_fail:
4472 spin_unlock_irq(&bfqd->lock);
4473
4474 return 1;
4475} 4490}
4476 4491
4477static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) 4492static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -4950,8 +4965,8 @@ static struct elv_fs_entry bfq_attrs[] = {
4950 4965
4951static struct elevator_type iosched_bfq_mq = { 4966static struct elevator_type iosched_bfq_mq = {
4952 .ops.mq = { 4967 .ops.mq = {
4953 .get_rq_priv = bfq_get_rq_private, 4968 .prepare_request = bfq_prepare_request,
4954 .put_rq_priv = bfq_put_rq_private, 4969 .finish_request = bfq_finish_request,
4955 .exit_icq = bfq_exit_icq, 4970 .exit_icq = bfq_exit_icq,
4956 .insert_requests = bfq_insert_requests, 4971 .insert_requests = bfq_insert_requests,
4957 .dispatch_request = bfq_dispatch_request, 4972 .dispatch_request = bfq_dispatch_request,
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index b5009a896a7f..b8a3a65f7364 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -224,7 +224,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
224 * @bio: bio to generate/verify integrity metadata for 224 * @bio: bio to generate/verify integrity metadata for
225 * @proc_fn: Pointer to the relevant processing function 225 * @proc_fn: Pointer to the relevant processing function
226 */ 226 */
227static int bio_integrity_process(struct bio *bio, 227static blk_status_t bio_integrity_process(struct bio *bio,
228 integrity_processing_fn *proc_fn) 228 integrity_processing_fn *proc_fn)
229{ 229{
230 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 230 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
@@ -232,7 +232,7 @@ static int bio_integrity_process(struct bio *bio,
232 struct bvec_iter bviter; 232 struct bvec_iter bviter;
233 struct bio_vec bv; 233 struct bio_vec bv;
234 struct bio_integrity_payload *bip = bio_integrity(bio); 234 struct bio_integrity_payload *bip = bio_integrity(bio);
235 unsigned int ret = 0; 235 blk_status_t ret = BLK_STS_OK;
236 void *prot_buf = page_address(bip->bip_vec->bv_page) + 236 void *prot_buf = page_address(bip->bip_vec->bv_page) +
237 bip->bip_vec->bv_offset; 237 bip->bip_vec->bv_offset;
238 238
@@ -369,7 +369,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
369 struct bio *bio = bip->bip_bio; 369 struct bio *bio = bip->bip_bio;
370 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); 370 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
371 371
372 bio->bi_error = bio_integrity_process(bio, bi->profile->verify_fn); 372 bio->bi_status = bio_integrity_process(bio, bi->profile->verify_fn);
373 373
374 /* Restore original bio completion handler */ 374 /* Restore original bio completion handler */
375 bio->bi_end_io = bip->bip_end_io; 375 bio->bi_end_io = bip->bip_end_io;
@@ -398,7 +398,7 @@ void bio_integrity_endio(struct bio *bio)
398 * integrity metadata. Restore original bio end_io handler 398 * integrity metadata. Restore original bio end_io handler
399 * and run it. 399 * and run it.
400 */ 400 */
401 if (bio->bi_error) { 401 if (bio->bi_status) {
402 bio->bi_end_io = bip->bip_end_io; 402 bio->bi_end_io = bip->bip_end_io;
403 bio_endio(bio); 403 bio_endio(bio);
404 404
diff --git a/block/bio.c b/block/bio.c
index 26b0810fb8ea..1cfcd0df3f30 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -315,8 +315,8 @@ static struct bio *__bio_chain_endio(struct bio *bio)
315{ 315{
316 struct bio *parent = bio->bi_private; 316 struct bio *parent = bio->bi_private;
317 317
318 if (!parent->bi_error) 318 if (!parent->bi_status)
319 parent->bi_error = bio->bi_error; 319 parent->bi_status = bio->bi_status;
320 bio_put(bio); 320 bio_put(bio);
321 return parent; 321 return parent;
322} 322}
@@ -369,6 +369,8 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
369 struct bio_list punt, nopunt; 369 struct bio_list punt, nopunt;
370 struct bio *bio; 370 struct bio *bio;
371 371
372 if (WARN_ON_ONCE(!bs->rescue_workqueue))
373 return;
372 /* 374 /*
373 * In order to guarantee forward progress we must punt only bios that 375 * In order to guarantee forward progress we must punt only bios that
374 * were allocated from this bio_set; otherwise, if there was a bio on 376 * were allocated from this bio_set; otherwise, if there was a bio on
@@ -480,7 +482,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, unsigned int nr_iovecs,
480 482
481 if (current->bio_list && 483 if (current->bio_list &&
482 (!bio_list_empty(&current->bio_list[0]) || 484 (!bio_list_empty(&current->bio_list[0]) ||
483 !bio_list_empty(&current->bio_list[1]))) 485 !bio_list_empty(&current->bio_list[1])) &&
486 bs->rescue_workqueue)
484 gfp_mask &= ~__GFP_DIRECT_RECLAIM; 487 gfp_mask &= ~__GFP_DIRECT_RECLAIM;
485 488
486 p = mempool_alloc(bs->bio_pool, gfp_mask); 489 p = mempool_alloc(bs->bio_pool, gfp_mask);
@@ -550,7 +553,7 @@ EXPORT_SYMBOL(zero_fill_bio);
550 * 553 *
551 * Description: 554 * Description:
552 * Put a reference to a &struct bio, either one you have gotten with 555 * Put a reference to a &struct bio, either one you have gotten with
553 * bio_alloc, bio_get or bio_clone. The last put of a bio will free it. 556 * bio_alloc, bio_get or bio_clone_*. The last put of a bio will free it.
554 **/ 557 **/
555void bio_put(struct bio *bio) 558void bio_put(struct bio *bio)
556{ 559{
@@ -599,6 +602,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
599 bio->bi_bdev = bio_src->bi_bdev; 602 bio->bi_bdev = bio_src->bi_bdev;
600 bio_set_flag(bio, BIO_CLONED); 603 bio_set_flag(bio, BIO_CLONED);
601 bio->bi_opf = bio_src->bi_opf; 604 bio->bi_opf = bio_src->bi_opf;
605 bio->bi_write_hint = bio_src->bi_write_hint;
602 bio->bi_iter = bio_src->bi_iter; 606 bio->bi_iter = bio_src->bi_iter;
603 bio->bi_io_vec = bio_src->bi_io_vec; 607 bio->bi_io_vec = bio_src->bi_io_vec;
604 608
@@ -682,6 +686,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
682 return NULL; 686 return NULL;
683 bio->bi_bdev = bio_src->bi_bdev; 687 bio->bi_bdev = bio_src->bi_bdev;
684 bio->bi_opf = bio_src->bi_opf; 688 bio->bi_opf = bio_src->bi_opf;
689 bio->bi_write_hint = bio_src->bi_write_hint;
685 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; 690 bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
686 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; 691 bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
687 692
@@ -924,7 +929,7 @@ static void submit_bio_wait_endio(struct bio *bio)
924{ 929{
925 struct submit_bio_ret *ret = bio->bi_private; 930 struct submit_bio_ret *ret = bio->bi_private;
926 931
927 ret->error = bio->bi_error; 932 ret->error = blk_status_to_errno(bio->bi_status);
928 complete(&ret->event); 933 complete(&ret->event);
929} 934}
930 935
@@ -1823,8 +1828,8 @@ again:
1823 } 1828 }
1824 1829
1825 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { 1830 if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
1826 trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), 1831 trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio,
1827 bio, bio->bi_error); 1832 blk_status_to_errno(bio->bi_status));
1828 bio_clear_flag(bio, BIO_TRACE_COMPLETION); 1833 bio_clear_flag(bio, BIO_TRACE_COMPLETION);
1829 } 1834 }
1830 1835
@@ -1927,9 +1932,29 @@ void bioset_free(struct bio_set *bs)
1927} 1932}
1928EXPORT_SYMBOL(bioset_free); 1933EXPORT_SYMBOL(bioset_free);
1929 1934
1930static struct bio_set *__bioset_create(unsigned int pool_size, 1935/**
1931 unsigned int front_pad, 1936 * bioset_create - Create a bio_set
1932 bool create_bvec_pool) 1937 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1938 * @front_pad: Number of bytes to allocate in front of the returned bio
1939 * @flags: Flags to modify behavior, currently %BIOSET_NEED_BVECS
1940 * and %BIOSET_NEED_RESCUER
1941 *
1942 * Description:
1943 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1944 * to ask for a number of bytes to be allocated in front of the bio.
1945 * Front pad allocation is useful for embedding the bio inside
1946 * another structure, to avoid allocating extra data to go with the bio.
1947 * Note that the bio must be embedded at the END of that structure always,
1948 * or things will break badly.
1949 * If %BIOSET_NEED_BVECS is set in @flags, a separate pool will be allocated
1950 * for allocating iovecs. This pool is not needed e.g. for bio_clone_fast().
1951 * If %BIOSET_NEED_RESCUER is set, a workqueue is created which can be used to
1952 * dispatch queued requests when the mempool runs out of space.
1953 *
1954 */
1955struct bio_set *bioset_create(unsigned int pool_size,
1956 unsigned int front_pad,
1957 int flags)
1933{ 1958{
1934 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec); 1959 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1935 struct bio_set *bs; 1960 struct bio_set *bs;
@@ -1954,12 +1979,15 @@ static struct bio_set *__bioset_create(unsigned int pool_size,
1954 if (!bs->bio_pool) 1979 if (!bs->bio_pool)
1955 goto bad; 1980 goto bad;
1956 1981
1957 if (create_bvec_pool) { 1982 if (flags & BIOSET_NEED_BVECS) {
1958 bs->bvec_pool = biovec_create_pool(pool_size); 1983 bs->bvec_pool = biovec_create_pool(pool_size);
1959 if (!bs->bvec_pool) 1984 if (!bs->bvec_pool)
1960 goto bad; 1985 goto bad;
1961 } 1986 }
1962 1987
1988 if (!(flags & BIOSET_NEED_RESCUER))
1989 return bs;
1990
1963 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0); 1991 bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
1964 if (!bs->rescue_workqueue) 1992 if (!bs->rescue_workqueue)
1965 goto bad; 1993 goto bad;
@@ -1969,41 +1997,8 @@ bad:
1969 bioset_free(bs); 1997 bioset_free(bs);
1970 return NULL; 1998 return NULL;
1971} 1999}
1972
1973/**
1974 * bioset_create - Create a bio_set
1975 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1976 * @front_pad: Number of bytes to allocate in front of the returned bio
1977 *
1978 * Description:
1979 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1980 * to ask for a number of bytes to be allocated in front of the bio.
1981 * Front pad allocation is useful for embedding the bio inside
1982 * another structure, to avoid allocating extra data to go with the bio.
1983 * Note that the bio must be embedded at the END of that structure always,
1984 * or things will break badly.
1985 */
1986struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1987{
1988 return __bioset_create(pool_size, front_pad, true);
1989}
1990EXPORT_SYMBOL(bioset_create); 2000EXPORT_SYMBOL(bioset_create);
1991 2001
1992/**
1993 * bioset_create_nobvec - Create a bio_set without bio_vec mempool
1994 * @pool_size: Number of bio to cache in the mempool
1995 * @front_pad: Number of bytes to allocate in front of the returned bio
1996 *
1997 * Description:
1998 * Same functionality as bioset_create() except that mempool is not
1999 * created for bio_vecs. Saving some memory for bio_clone_fast() users.
2000 */
2001struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
2002{
2003 return __bioset_create(pool_size, front_pad, false);
2004}
2005EXPORT_SYMBOL(bioset_create_nobvec);
2006
2007#ifdef CONFIG_BLK_CGROUP 2002#ifdef CONFIG_BLK_CGROUP
2008 2003
2009/** 2004/**
@@ -2118,7 +2113,7 @@ static int __init init_bio(void)
2118 bio_integrity_init(); 2113 bio_integrity_init();
2119 biovec_init_slabs(); 2114 biovec_init_slabs();
2120 2115
2121 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 2116 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
2122 if (!fs_bio_set) 2117 if (!fs_bio_set)
2123 panic("bio: can't allocate bios\n"); 2118 panic("bio: can't allocate bios\n");
2124 2119
diff --git a/block/blk-core.c b/block/blk-core.c
index a7421b772d0e..af393d5a9680 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -129,11 +129,70 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
129} 129}
130EXPORT_SYMBOL(blk_rq_init); 130EXPORT_SYMBOL(blk_rq_init);
131 131
132static const struct {
133 int errno;
134 const char *name;
135} blk_errors[] = {
136 [BLK_STS_OK] = { 0, "" },
137 [BLK_STS_NOTSUPP] = { -EOPNOTSUPP, "operation not supported" },
138 [BLK_STS_TIMEOUT] = { -ETIMEDOUT, "timeout" },
139 [BLK_STS_NOSPC] = { -ENOSPC, "critical space allocation" },
140 [BLK_STS_TRANSPORT] = { -ENOLINK, "recoverable transport" },
141 [BLK_STS_TARGET] = { -EREMOTEIO, "critical target" },
142 [BLK_STS_NEXUS] = { -EBADE, "critical nexus" },
143 [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
144 [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
145 [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
146 [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
147
148 /* device mapper special case, should not leak out: */
149 [BLK_STS_DM_REQUEUE] = { -EREMCHG, "dm internal retry" },
150
151 /* everything else not covered above: */
152 [BLK_STS_IOERR] = { -EIO, "I/O" },
153};
154
155blk_status_t errno_to_blk_status(int errno)
156{
157 int i;
158
159 for (i = 0; i < ARRAY_SIZE(blk_errors); i++) {
160 if (blk_errors[i].errno == errno)
161 return (__force blk_status_t)i;
162 }
163
164 return BLK_STS_IOERR;
165}
166EXPORT_SYMBOL_GPL(errno_to_blk_status);
167
168int blk_status_to_errno(blk_status_t status)
169{
170 int idx = (__force int)status;
171
172 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
173 return -EIO;
174 return blk_errors[idx].errno;
175}
176EXPORT_SYMBOL_GPL(blk_status_to_errno);
177
178static void print_req_error(struct request *req, blk_status_t status)
179{
180 int idx = (__force int)status;
181
182 if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
183 return;
184
185 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
186 __func__, blk_errors[idx].name, req->rq_disk ?
187 req->rq_disk->disk_name : "?",
188 (unsigned long long)blk_rq_pos(req));
189}
190
132static void req_bio_endio(struct request *rq, struct bio *bio, 191static void req_bio_endio(struct request *rq, struct bio *bio,
133 unsigned int nbytes, int error) 192 unsigned int nbytes, blk_status_t error)
134{ 193{
135 if (error) 194 if (error)
136 bio->bi_error = error; 195 bio->bi_status = error;
137 196
138 if (unlikely(rq->rq_flags & RQF_QUIET)) 197 if (unlikely(rq->rq_flags & RQF_QUIET))
139 bio_set_flag(bio, BIO_QUIET); 198 bio_set_flag(bio, BIO_QUIET);
@@ -177,10 +236,13 @@ static void blk_delay_work(struct work_struct *work)
177 * Description: 236 * Description:
178 * Sometimes queueing needs to be postponed for a little while, to allow 237 * Sometimes queueing needs to be postponed for a little while, to allow
179 * resources to come back. This function will make sure that queueing is 238 * resources to come back. This function will make sure that queueing is
180 * restarted around the specified time. Queue lock must be held. 239 * restarted around the specified time.
181 */ 240 */
182void blk_delay_queue(struct request_queue *q, unsigned long msecs) 241void blk_delay_queue(struct request_queue *q, unsigned long msecs)
183{ 242{
243 lockdep_assert_held(q->queue_lock);
244 WARN_ON_ONCE(q->mq_ops);
245
184 if (likely(!blk_queue_dead(q))) 246 if (likely(!blk_queue_dead(q)))
185 queue_delayed_work(kblockd_workqueue, &q->delay_work, 247 queue_delayed_work(kblockd_workqueue, &q->delay_work,
186 msecs_to_jiffies(msecs)); 248 msecs_to_jiffies(msecs));
@@ -198,6 +260,9 @@ EXPORT_SYMBOL(blk_delay_queue);
198 **/ 260 **/
199void blk_start_queue_async(struct request_queue *q) 261void blk_start_queue_async(struct request_queue *q)
200{ 262{
263 lockdep_assert_held(q->queue_lock);
264 WARN_ON_ONCE(q->mq_ops);
265
201 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 266 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
202 blk_run_queue_async(q); 267 blk_run_queue_async(q);
203} 268}
@@ -210,11 +275,13 @@ EXPORT_SYMBOL(blk_start_queue_async);
210 * Description: 275 * Description:
211 * blk_start_queue() will clear the stop flag on the queue, and call 276 * blk_start_queue() will clear the stop flag on the queue, and call
212 * the request_fn for the queue if it was in a stopped state when 277 * the request_fn for the queue if it was in a stopped state when
213 * entered. Also see blk_stop_queue(). Queue lock must be held. 278 * entered. Also see blk_stop_queue().
214 **/ 279 **/
215void blk_start_queue(struct request_queue *q) 280void blk_start_queue(struct request_queue *q)
216{ 281{
282 lockdep_assert_held(q->queue_lock);
217 WARN_ON(!irqs_disabled()); 283 WARN_ON(!irqs_disabled());
284 WARN_ON_ONCE(q->mq_ops);
218 285
219 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 286 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
220 __blk_run_queue(q); 287 __blk_run_queue(q);
@@ -233,10 +300,13 @@ EXPORT_SYMBOL(blk_start_queue);
233 * or if it simply chooses not to queue more I/O at one point, it can 300 * or if it simply chooses not to queue more I/O at one point, it can
234 * call this function to prevent the request_fn from being called until 301 * call this function to prevent the request_fn from being called until
235 * the driver has signalled it's ready to go again. This happens by calling 302 * the driver has signalled it's ready to go again. This happens by calling
236 * blk_start_queue() to restart queue operations. Queue lock must be held. 303 * blk_start_queue() to restart queue operations.
237 **/ 304 **/
238void blk_stop_queue(struct request_queue *q) 305void blk_stop_queue(struct request_queue *q)
239{ 306{
307 lockdep_assert_held(q->queue_lock);
308 WARN_ON_ONCE(q->mq_ops);
309
240 cancel_delayed_work(&q->delay_work); 310 cancel_delayed_work(&q->delay_work);
241 queue_flag_set(QUEUE_FLAG_STOPPED, q); 311 queue_flag_set(QUEUE_FLAG_STOPPED, q);
242} 312}
@@ -289,6 +359,9 @@ EXPORT_SYMBOL(blk_sync_queue);
289 */ 359 */
290inline void __blk_run_queue_uncond(struct request_queue *q) 360inline void __blk_run_queue_uncond(struct request_queue *q)
291{ 361{
362 lockdep_assert_held(q->queue_lock);
363 WARN_ON_ONCE(q->mq_ops);
364
292 if (unlikely(blk_queue_dead(q))) 365 if (unlikely(blk_queue_dead(q)))
293 return; 366 return;
294 367
@@ -310,11 +383,13 @@ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
310 * @q: The queue to run 383 * @q: The queue to run
311 * 384 *
312 * Description: 385 * Description:
313 * See @blk_run_queue. This variant must be called with the queue lock 386 * See @blk_run_queue.
314 * held and interrupts disabled.
315 */ 387 */
316void __blk_run_queue(struct request_queue *q) 388void __blk_run_queue(struct request_queue *q)
317{ 389{
390 lockdep_assert_held(q->queue_lock);
391 WARN_ON_ONCE(q->mq_ops);
392
318 if (unlikely(blk_queue_stopped(q))) 393 if (unlikely(blk_queue_stopped(q)))
319 return; 394 return;
320 395
@@ -328,10 +403,18 @@ EXPORT_SYMBOL(__blk_run_queue);
328 * 403 *
329 * Description: 404 * Description:
330 * Tells kblockd to perform the equivalent of @blk_run_queue on behalf 405 * Tells kblockd to perform the equivalent of @blk_run_queue on behalf
331 * of us. The caller must hold the queue lock. 406 * of us.
407 *
408 * Note:
409 * Since it is not allowed to run q->delay_work after blk_cleanup_queue()
410 * has canceled q->delay_work, callers must hold the queue lock to avoid
411 * race conditions between blk_cleanup_queue() and blk_run_queue_async().
332 */ 412 */
333void blk_run_queue_async(struct request_queue *q) 413void blk_run_queue_async(struct request_queue *q)
334{ 414{
415 lockdep_assert_held(q->queue_lock);
416 WARN_ON_ONCE(q->mq_ops);
417
335 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q))) 418 if (likely(!blk_queue_stopped(q) && !blk_queue_dead(q)))
336 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0); 419 mod_delayed_work(kblockd_workqueue, &q->delay_work, 0);
337} 420}
@@ -349,6 +432,8 @@ void blk_run_queue(struct request_queue *q)
349{ 432{
350 unsigned long flags; 433 unsigned long flags;
351 434
435 WARN_ON_ONCE(q->mq_ops);
436
352 spin_lock_irqsave(q->queue_lock, flags); 437 spin_lock_irqsave(q->queue_lock, flags);
353 __blk_run_queue(q); 438 __blk_run_queue(q);
354 spin_unlock_irqrestore(q->queue_lock, flags); 439 spin_unlock_irqrestore(q->queue_lock, flags);
@@ -377,6 +462,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
377 int i; 462 int i;
378 463
379 lockdep_assert_held(q->queue_lock); 464 lockdep_assert_held(q->queue_lock);
465 WARN_ON_ONCE(q->mq_ops);
380 466
381 while (true) { 467 while (true) {
382 bool drain = false; 468 bool drain = false;
@@ -455,6 +541,8 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
455 */ 541 */
456void blk_queue_bypass_start(struct request_queue *q) 542void blk_queue_bypass_start(struct request_queue *q)
457{ 543{
544 WARN_ON_ONCE(q->mq_ops);
545
458 spin_lock_irq(q->queue_lock); 546 spin_lock_irq(q->queue_lock);
459 q->bypass_depth++; 547 q->bypass_depth++;
460 queue_flag_set(QUEUE_FLAG_BYPASS, q); 548 queue_flag_set(QUEUE_FLAG_BYPASS, q);
@@ -481,6 +569,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
481 * @q: queue of interest 569 * @q: queue of interest
482 * 570 *
483 * Leave bypass mode and restore the normal queueing behavior. 571 * Leave bypass mode and restore the normal queueing behavior.
572 *
573 * Note: although blk_queue_bypass_start() is only called for blk-sq queues,
574 * this function is called for both blk-sq and blk-mq queues.
484 */ 575 */
485void blk_queue_bypass_end(struct request_queue *q) 576void blk_queue_bypass_end(struct request_queue *q)
486{ 577{
@@ -732,7 +823,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
732 if (q->id < 0) 823 if (q->id < 0)
733 goto fail_q; 824 goto fail_q;
734 825
735 q->bio_split = bioset_create(BIO_POOL_SIZE, 0); 826 q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
736 if (!q->bio_split) 827 if (!q->bio_split)
737 goto fail_id; 828 goto fail_id;
738 829
@@ -878,6 +969,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
878 969
879int blk_init_allocated_queue(struct request_queue *q) 970int blk_init_allocated_queue(struct request_queue *q)
880{ 971{
972 WARN_ON_ONCE(q->mq_ops);
973
881 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size); 974 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
882 if (!q->fq) 975 if (!q->fq)
883 return -ENOMEM; 976 return -ENOMEM;
@@ -1015,6 +1108,8 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
1015 struct request_list *rl; 1108 struct request_list *rl;
1016 int on_thresh, off_thresh; 1109 int on_thresh, off_thresh;
1017 1110
1111 WARN_ON_ONCE(q->mq_ops);
1112
1018 spin_lock_irq(q->queue_lock); 1113 spin_lock_irq(q->queue_lock);
1019 q->nr_requests = nr; 1114 q->nr_requests = nr;
1020 blk_queue_congestion_threshold(q); 1115 blk_queue_congestion_threshold(q);
@@ -1077,6 +1172,8 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
1077 int may_queue; 1172 int may_queue;
1078 req_flags_t rq_flags = RQF_ALLOCED; 1173 req_flags_t rq_flags = RQF_ALLOCED;
1079 1174
1175 lockdep_assert_held(q->queue_lock);
1176
1080 if (unlikely(blk_queue_dying(q))) 1177 if (unlikely(blk_queue_dying(q)))
1081 return ERR_PTR(-ENODEV); 1178 return ERR_PTR(-ENODEV);
1082 1179
@@ -1250,12 +1347,20 @@ static struct request *get_request(struct request_queue *q, unsigned int op,
1250 struct request_list *rl; 1347 struct request_list *rl;
1251 struct request *rq; 1348 struct request *rq;
1252 1349
1350 lockdep_assert_held(q->queue_lock);
1351 WARN_ON_ONCE(q->mq_ops);
1352
1253 rl = blk_get_rl(q, bio); /* transferred to @rq on success */ 1353 rl = blk_get_rl(q, bio); /* transferred to @rq on success */
1254retry: 1354retry:
1255 rq = __get_request(rl, op, bio, gfp_mask); 1355 rq = __get_request(rl, op, bio, gfp_mask);
1256 if (!IS_ERR(rq)) 1356 if (!IS_ERR(rq))
1257 return rq; 1357 return rq;
1258 1358
1359 if (op & REQ_NOWAIT) {
1360 blk_put_rl(rl);
1361 return ERR_PTR(-EAGAIN);
1362 }
1363
1259 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) { 1364 if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) {
1260 blk_put_rl(rl); 1365 blk_put_rl(rl);
1261 return rq; 1366 return rq;
@@ -1283,16 +1388,18 @@ retry:
1283 goto retry; 1388 goto retry;
1284} 1389}
1285 1390
1286static struct request *blk_old_get_request(struct request_queue *q, int rw, 1391static struct request *blk_old_get_request(struct request_queue *q,
1287 gfp_t gfp_mask) 1392 unsigned int op, gfp_t gfp_mask)
1288{ 1393{
1289 struct request *rq; 1394 struct request *rq;
1290 1395
1396 WARN_ON_ONCE(q->mq_ops);
1397
1291 /* create ioc upfront */ 1398 /* create ioc upfront */
1292 create_io_context(gfp_mask, q->node); 1399 create_io_context(gfp_mask, q->node);
1293 1400
1294 spin_lock_irq(q->queue_lock); 1401 spin_lock_irq(q->queue_lock);
1295 rq = get_request(q, rw, NULL, gfp_mask); 1402 rq = get_request(q, op, NULL, gfp_mask);
1296 if (IS_ERR(rq)) { 1403 if (IS_ERR(rq)) {
1297 spin_unlock_irq(q->queue_lock); 1404 spin_unlock_irq(q->queue_lock);
1298 return rq; 1405 return rq;
@@ -1305,14 +1412,24 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1305 return rq; 1412 return rq;
1306} 1413}
1307 1414
1308struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1415struct request *blk_get_request(struct request_queue *q, unsigned int op,
1416 gfp_t gfp_mask)
1309{ 1417{
1310 if (q->mq_ops) 1418 struct request *req;
1311 return blk_mq_alloc_request(q, rw, 1419
1420 if (q->mq_ops) {
1421 req = blk_mq_alloc_request(q, op,
1312 (gfp_mask & __GFP_DIRECT_RECLAIM) ? 1422 (gfp_mask & __GFP_DIRECT_RECLAIM) ?
1313 0 : BLK_MQ_REQ_NOWAIT); 1423 0 : BLK_MQ_REQ_NOWAIT);
1314 else 1424 if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
1315 return blk_old_get_request(q, rw, gfp_mask); 1425 q->mq_ops->initialize_rq_fn(req);
1426 } else {
1427 req = blk_old_get_request(q, op, gfp_mask);
1428 if (!IS_ERR(req) && q->initialize_rq_fn)
1429 q->initialize_rq_fn(req);
1430 }
1431
1432 return req;
1316} 1433}
1317EXPORT_SYMBOL(blk_get_request); 1434EXPORT_SYMBOL(blk_get_request);
1318 1435
@@ -1328,6 +1445,9 @@ EXPORT_SYMBOL(blk_get_request);
1328 */ 1445 */
1329void blk_requeue_request(struct request_queue *q, struct request *rq) 1446void blk_requeue_request(struct request_queue *q, struct request *rq)
1330{ 1447{
1448 lockdep_assert_held(q->queue_lock);
1449 WARN_ON_ONCE(q->mq_ops);
1450
1331 blk_delete_timer(rq); 1451 blk_delete_timer(rq);
1332 blk_clear_rq_complete(rq); 1452 blk_clear_rq_complete(rq);
1333 trace_block_rq_requeue(q, rq); 1453 trace_block_rq_requeue(q, rq);
@@ -1402,9 +1522,6 @@ static void blk_pm_put_request(struct request *rq)
1402static inline void blk_pm_put_request(struct request *rq) {} 1522static inline void blk_pm_put_request(struct request *rq) {}
1403#endif 1523#endif
1404 1524
1405/*
1406 * queue lock must be held
1407 */
1408void __blk_put_request(struct request_queue *q, struct request *req) 1525void __blk_put_request(struct request_queue *q, struct request *req)
1409{ 1526{
1410 req_flags_t rq_flags = req->rq_flags; 1527 req_flags_t rq_flags = req->rq_flags;
@@ -1417,6 +1534,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1417 return; 1534 return;
1418 } 1535 }
1419 1536
1537 lockdep_assert_held(q->queue_lock);
1538
1420 blk_pm_put_request(req); 1539 blk_pm_put_request(req);
1421 1540
1422 elv_completed_request(q, req); 1541 elv_completed_request(q, req);
@@ -1646,6 +1765,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
1646 req->ioprio = ioc->ioprio; 1765 req->ioprio = ioc->ioprio;
1647 else 1766 else
1648 req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); 1767 req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
1768 req->write_hint = bio->bi_write_hint;
1649 blk_rq_bio_prep(req->q, req, bio); 1769 blk_rq_bio_prep(req->q, req, bio);
1650} 1770}
1651EXPORT_SYMBOL_GPL(blk_init_request_from_bio); 1771EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -1665,10 +1785,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1665 */ 1785 */
1666 blk_queue_bounce(q, &bio); 1786 blk_queue_bounce(q, &bio);
1667 1787
1668 blk_queue_split(q, &bio, q->bio_split); 1788 blk_queue_split(q, &bio);
1669 1789
1670 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1790 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1671 bio->bi_error = -EIO; 1791 bio->bi_status = BLK_STS_IOERR;
1672 bio_endio(bio); 1792 bio_endio(bio);
1673 return BLK_QC_T_NONE; 1793 return BLK_QC_T_NONE;
1674 } 1794 }
@@ -1726,7 +1846,10 @@ get_rq:
1726 req = get_request(q, bio->bi_opf, bio, GFP_NOIO); 1846 req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
1727 if (IS_ERR(req)) { 1847 if (IS_ERR(req)) {
1728 __wbt_done(q->rq_wb, wb_acct); 1848 __wbt_done(q->rq_wb, wb_acct);
1729 bio->bi_error = PTR_ERR(req); 1849 if (PTR_ERR(req) == -ENOMEM)
1850 bio->bi_status = BLK_STS_RESOURCE;
1851 else
1852 bio->bi_status = BLK_STS_IOERR;
1730 bio_endio(bio); 1853 bio_endio(bio);
1731 goto out_unlock; 1854 goto out_unlock;
1732 } 1855 }
@@ -1881,7 +2004,7 @@ generic_make_request_checks(struct bio *bio)
1881{ 2004{
1882 struct request_queue *q; 2005 struct request_queue *q;
1883 int nr_sectors = bio_sectors(bio); 2006 int nr_sectors = bio_sectors(bio);
1884 int err = -EIO; 2007 blk_status_t status = BLK_STS_IOERR;
1885 char b[BDEVNAME_SIZE]; 2008 char b[BDEVNAME_SIZE];
1886 struct hd_struct *part; 2009 struct hd_struct *part;
1887 2010
@@ -1900,6 +2023,14 @@ generic_make_request_checks(struct bio *bio)
1900 goto end_io; 2023 goto end_io;
1901 } 2024 }
1902 2025
2026 /*
2027 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
2028 * if queue is not a request based queue.
2029 */
2030
2031 if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q))
2032 goto not_supported;
2033
1903 part = bio->bi_bdev->bd_part; 2034 part = bio->bi_bdev->bd_part;
1904 if (should_fail_request(part, bio->bi_iter.bi_size) || 2035 if (should_fail_request(part, bio->bi_iter.bi_size) ||
1905 should_fail_request(&part_to_disk(part)->part0, 2036 should_fail_request(&part_to_disk(part)->part0,
@@ -1924,7 +2055,7 @@ generic_make_request_checks(struct bio *bio)
1924 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { 2055 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1925 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); 2056 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
1926 if (!nr_sectors) { 2057 if (!nr_sectors) {
1927 err = 0; 2058 status = BLK_STS_OK;
1928 goto end_io; 2059 goto end_io;
1929 } 2060 }
1930 } 2061 }
@@ -1976,9 +2107,9 @@ generic_make_request_checks(struct bio *bio)
1976 return true; 2107 return true;
1977 2108
1978not_supported: 2109not_supported:
1979 err = -EOPNOTSUPP; 2110 status = BLK_STS_NOTSUPP;
1980end_io: 2111end_io:
1981 bio->bi_error = err; 2112 bio->bi_status = status;
1982 bio_endio(bio); 2113 bio_endio(bio);
1983 return false; 2114 return false;
1984} 2115}
@@ -2057,7 +2188,7 @@ blk_qc_t generic_make_request(struct bio *bio)
2057 do { 2188 do {
2058 struct request_queue *q = bdev_get_queue(bio->bi_bdev); 2189 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2059 2190
2060 if (likely(blk_queue_enter(q, false) == 0)) { 2191 if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
2061 struct bio_list lower, same; 2192 struct bio_list lower, same;
2062 2193
2063 /* Create a fresh bio_list for all subordinate requests */ 2194 /* Create a fresh bio_list for all subordinate requests */
@@ -2082,7 +2213,11 @@ blk_qc_t generic_make_request(struct bio *bio)
2082 bio_list_merge(&bio_list_on_stack[0], &same); 2213 bio_list_merge(&bio_list_on_stack[0], &same);
2083 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); 2214 bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
2084 } else { 2215 } else {
2085 bio_io_error(bio); 2216 if (unlikely(!blk_queue_dying(q) &&
2217 (bio->bi_opf & REQ_NOWAIT)))
2218 bio_wouldblock_error(bio);
2219 else
2220 bio_io_error(bio);
2086 } 2221 }
2087 bio = bio_list_pop(&bio_list_on_stack[0]); 2222 bio = bio_list_pop(&bio_list_on_stack[0]);
2088 } while (bio); 2223 } while (bio);
@@ -2183,29 +2318,29 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
2183 * @q: the queue to submit the request 2318 * @q: the queue to submit the request
2184 * @rq: the request being queued 2319 * @rq: the request being queued
2185 */ 2320 */
2186int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 2321blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2187{ 2322{
2188 unsigned long flags; 2323 unsigned long flags;
2189 int where = ELEVATOR_INSERT_BACK; 2324 int where = ELEVATOR_INSERT_BACK;
2190 2325
2191 if (blk_cloned_rq_check_limits(q, rq)) 2326 if (blk_cloned_rq_check_limits(q, rq))
2192 return -EIO; 2327 return BLK_STS_IOERR;
2193 2328
2194 if (rq->rq_disk && 2329 if (rq->rq_disk &&
2195 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) 2330 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
2196 return -EIO; 2331 return BLK_STS_IOERR;
2197 2332
2198 if (q->mq_ops) { 2333 if (q->mq_ops) {
2199 if (blk_queue_io_stat(q)) 2334 if (blk_queue_io_stat(q))
2200 blk_account_io_start(rq, true); 2335 blk_account_io_start(rq, true);
2201 blk_mq_sched_insert_request(rq, false, true, false, false); 2336 blk_mq_sched_insert_request(rq, false, true, false, false);
2202 return 0; 2337 return BLK_STS_OK;
2203 } 2338 }
2204 2339
2205 spin_lock_irqsave(q->queue_lock, flags); 2340 spin_lock_irqsave(q->queue_lock, flags);
2206 if (unlikely(blk_queue_dying(q))) { 2341 if (unlikely(blk_queue_dying(q))) {
2207 spin_unlock_irqrestore(q->queue_lock, flags); 2342 spin_unlock_irqrestore(q->queue_lock, flags);
2208 return -ENODEV; 2343 return BLK_STS_IOERR;
2209 } 2344 }
2210 2345
2211 /* 2346 /*
@@ -2222,7 +2357,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2222 __blk_run_queue(q); 2357 __blk_run_queue(q);
2223 spin_unlock_irqrestore(q->queue_lock, flags); 2358 spin_unlock_irqrestore(q->queue_lock, flags);
2224 2359
2225 return 0; 2360 return BLK_STS_OK;
2226} 2361}
2227EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 2362EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2228 2363
@@ -2238,9 +2373,6 @@ EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
2238 * 2373 *
2239 * Return: 2374 * Return:
2240 * The number of bytes to fail. 2375 * The number of bytes to fail.
2241 *
2242 * Context:
2243 * queue_lock must be held.
2244 */ 2376 */
2245unsigned int blk_rq_err_bytes(const struct request *rq) 2377unsigned int blk_rq_err_bytes(const struct request *rq)
2246{ 2378{
@@ -2380,15 +2512,15 @@ void blk_account_io_start(struct request *rq, bool new_io)
2380 * Return: 2512 * Return:
2381 * Pointer to the request at the top of @q if available. Null 2513 * Pointer to the request at the top of @q if available. Null
2382 * otherwise. 2514 * otherwise.
2383 *
2384 * Context:
2385 * queue_lock must be held.
2386 */ 2515 */
2387struct request *blk_peek_request(struct request_queue *q) 2516struct request *blk_peek_request(struct request_queue *q)
2388{ 2517{
2389 struct request *rq; 2518 struct request *rq;
2390 int ret; 2519 int ret;
2391 2520
2521 lockdep_assert_held(q->queue_lock);
2522 WARN_ON_ONCE(q->mq_ops);
2523
2392 while ((rq = __elv_next_request(q)) != NULL) { 2524 while ((rq = __elv_next_request(q)) != NULL) {
2393 2525
2394 rq = blk_pm_peek_request(q, rq); 2526 rq = blk_pm_peek_request(q, rq);
@@ -2456,15 +2588,14 @@ struct request *blk_peek_request(struct request_queue *q)
2456 rq = NULL; 2588 rq = NULL;
2457 break; 2589 break;
2458 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) { 2590 } else if (ret == BLKPREP_KILL || ret == BLKPREP_INVALID) {
2459 int err = (ret == BLKPREP_INVALID) ? -EREMOTEIO : -EIO;
2460
2461 rq->rq_flags |= RQF_QUIET; 2591 rq->rq_flags |= RQF_QUIET;
2462 /* 2592 /*
2463 * Mark this request as started so we don't trigger 2593 * Mark this request as started so we don't trigger
2464 * any debug logic in the end I/O path. 2594 * any debug logic in the end I/O path.
2465 */ 2595 */
2466 blk_start_request(rq); 2596 blk_start_request(rq);
2467 __blk_end_request_all(rq, err); 2597 __blk_end_request_all(rq, ret == BLKPREP_INVALID ?
2598 BLK_STS_TARGET : BLK_STS_IOERR);
2468 } else { 2599 } else {
2469 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 2600 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
2470 break; 2601 break;
@@ -2505,12 +2636,12 @@ void blk_dequeue_request(struct request *rq)
2505 * 2636 *
2506 * Block internal functions which don't want to start timer should 2637 * Block internal functions which don't want to start timer should
2507 * call blk_dequeue_request(). 2638 * call blk_dequeue_request().
2508 *
2509 * Context:
2510 * queue_lock must be held.
2511 */ 2639 */
2512void blk_start_request(struct request *req) 2640void blk_start_request(struct request *req)
2513{ 2641{
2642 lockdep_assert_held(req->q->queue_lock);
2643 WARN_ON_ONCE(req->q->mq_ops);
2644
2514 blk_dequeue_request(req); 2645 blk_dequeue_request(req);
2515 2646
2516 if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) { 2647 if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
@@ -2535,14 +2666,14 @@ EXPORT_SYMBOL(blk_start_request);
2535 * Return: 2666 * Return:
2536 * Pointer to the request at the top of @q if available. Null 2667 * Pointer to the request at the top of @q if available. Null
2537 * otherwise. 2668 * otherwise.
2538 *
2539 * Context:
2540 * queue_lock must be held.
2541 */ 2669 */
2542struct request *blk_fetch_request(struct request_queue *q) 2670struct request *blk_fetch_request(struct request_queue *q)
2543{ 2671{
2544 struct request *rq; 2672 struct request *rq;
2545 2673
2674 lockdep_assert_held(q->queue_lock);
2675 WARN_ON_ONCE(q->mq_ops);
2676
2546 rq = blk_peek_request(q); 2677 rq = blk_peek_request(q);
2547 if (rq) 2678 if (rq)
2548 blk_start_request(rq); 2679 blk_start_request(rq);
@@ -2553,7 +2684,7 @@ EXPORT_SYMBOL(blk_fetch_request);
2553/** 2684/**
2554 * blk_update_request - Special helper function for request stacking drivers 2685 * blk_update_request - Special helper function for request stacking drivers
2555 * @req: the request being processed 2686 * @req: the request being processed
2556 * @error: %0 for success, < %0 for error 2687 * @error: block status code
2557 * @nr_bytes: number of bytes to complete @req 2688 * @nr_bytes: number of bytes to complete @req
2558 * 2689 *
2559 * Description: 2690 * Description:
@@ -2572,49 +2703,19 @@ EXPORT_SYMBOL(blk_fetch_request);
2572 * %false - this request doesn't have any more data 2703 * %false - this request doesn't have any more data
2573 * %true - this request has more data 2704 * %true - this request has more data
2574 **/ 2705 **/
2575bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) 2706bool blk_update_request(struct request *req, blk_status_t error,
2707 unsigned int nr_bytes)
2576{ 2708{
2577 int total_bytes; 2709 int total_bytes;
2578 2710
2579 trace_block_rq_complete(req, error, nr_bytes); 2711 trace_block_rq_complete(req, blk_status_to_errno(error), nr_bytes);
2580 2712
2581 if (!req->bio) 2713 if (!req->bio)
2582 return false; 2714 return false;
2583 2715
2584 if (error && !blk_rq_is_passthrough(req) && 2716 if (unlikely(error && !blk_rq_is_passthrough(req) &&
2585 !(req->rq_flags & RQF_QUIET)) { 2717 !(req->rq_flags & RQF_QUIET)))
2586 char *error_type; 2718 print_req_error(req, error);
2587
2588 switch (error) {
2589 case -ENOLINK:
2590 error_type = "recoverable transport";
2591 break;
2592 case -EREMOTEIO:
2593 error_type = "critical target";
2594 break;
2595 case -EBADE:
2596 error_type = "critical nexus";
2597 break;
2598 case -ETIMEDOUT:
2599 error_type = "timeout";
2600 break;
2601 case -ENOSPC:
2602 error_type = "critical space allocation";
2603 break;
2604 case -ENODATA:
2605 error_type = "critical medium";
2606 break;
2607 case -EIO:
2608 default:
2609 error_type = "I/O";
2610 break;
2611 }
2612 printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n",
2613 __func__, error_type, req->rq_disk ?
2614 req->rq_disk->disk_name : "?",
2615 (unsigned long long)blk_rq_pos(req));
2616
2617 }
2618 2719
2619 blk_account_io_completion(req, nr_bytes); 2720 blk_account_io_completion(req, nr_bytes);
2620 2721
@@ -2680,7 +2781,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2680} 2781}
2681EXPORT_SYMBOL_GPL(blk_update_request); 2782EXPORT_SYMBOL_GPL(blk_update_request);
2682 2783
2683static bool blk_update_bidi_request(struct request *rq, int error, 2784static bool blk_update_bidi_request(struct request *rq, blk_status_t error,
2684 unsigned int nr_bytes, 2785 unsigned int nr_bytes,
2685 unsigned int bidi_bytes) 2786 unsigned int bidi_bytes)
2686{ 2787{
@@ -2718,13 +2819,13 @@ void blk_unprep_request(struct request *req)
2718} 2819}
2719EXPORT_SYMBOL_GPL(blk_unprep_request); 2820EXPORT_SYMBOL_GPL(blk_unprep_request);
2720 2821
2721/* 2822void blk_finish_request(struct request *req, blk_status_t error)
2722 * queue lock must be held
2723 */
2724void blk_finish_request(struct request *req, int error)
2725{ 2823{
2726 struct request_queue *q = req->q; 2824 struct request_queue *q = req->q;
2727 2825
2826 lockdep_assert_held(req->q->queue_lock);
2827 WARN_ON_ONCE(q->mq_ops);
2828
2728 if (req->rq_flags & RQF_STATS) 2829 if (req->rq_flags & RQF_STATS)
2729 blk_stat_add(req); 2830 blk_stat_add(req);
2730 2831
@@ -2758,7 +2859,7 @@ EXPORT_SYMBOL(blk_finish_request);
2758/** 2859/**
2759 * blk_end_bidi_request - Complete a bidi request 2860 * blk_end_bidi_request - Complete a bidi request
2760 * @rq: the request to complete 2861 * @rq: the request to complete
2761 * @error: %0 for success, < %0 for error 2862 * @error: block status code
2762 * @nr_bytes: number of bytes to complete @rq 2863 * @nr_bytes: number of bytes to complete @rq
2763 * @bidi_bytes: number of bytes to complete @rq->next_rq 2864 * @bidi_bytes: number of bytes to complete @rq->next_rq
2764 * 2865 *
@@ -2772,12 +2873,14 @@ EXPORT_SYMBOL(blk_finish_request);
2772 * %false - we are done with this request 2873 * %false - we are done with this request
2773 * %true - still buffers pending for this request 2874 * %true - still buffers pending for this request
2774 **/ 2875 **/
2775static bool blk_end_bidi_request(struct request *rq, int error, 2876static bool blk_end_bidi_request(struct request *rq, blk_status_t error,
2776 unsigned int nr_bytes, unsigned int bidi_bytes) 2877 unsigned int nr_bytes, unsigned int bidi_bytes)
2777{ 2878{
2778 struct request_queue *q = rq->q; 2879 struct request_queue *q = rq->q;
2779 unsigned long flags; 2880 unsigned long flags;
2780 2881
2882 WARN_ON_ONCE(q->mq_ops);
2883
2781 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2884 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2782 return true; 2885 return true;
2783 2886
@@ -2791,7 +2894,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2791/** 2894/**
2792 * __blk_end_bidi_request - Complete a bidi request with queue lock held 2895 * __blk_end_bidi_request - Complete a bidi request with queue lock held
2793 * @rq: the request to complete 2896 * @rq: the request to complete
2794 * @error: %0 for success, < %0 for error 2897 * @error: block status code
2795 * @nr_bytes: number of bytes to complete @rq 2898 * @nr_bytes: number of bytes to complete @rq
2796 * @bidi_bytes: number of bytes to complete @rq->next_rq 2899 * @bidi_bytes: number of bytes to complete @rq->next_rq
2797 * 2900 *
@@ -2803,9 +2906,12 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2803 * %false - we are done with this request 2906 * %false - we are done with this request
2804 * %true - still buffers pending for this request 2907 * %true - still buffers pending for this request
2805 **/ 2908 **/
2806static bool __blk_end_bidi_request(struct request *rq, int error, 2909static bool __blk_end_bidi_request(struct request *rq, blk_status_t error,
2807 unsigned int nr_bytes, unsigned int bidi_bytes) 2910 unsigned int nr_bytes, unsigned int bidi_bytes)
2808{ 2911{
2912 lockdep_assert_held(rq->q->queue_lock);
2913 WARN_ON_ONCE(rq->q->mq_ops);
2914
2809 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2915 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2810 return true; 2916 return true;
2811 2917
@@ -2817,7 +2923,7 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
2817/** 2923/**
2818 * blk_end_request - Helper function for drivers to complete the request. 2924 * blk_end_request - Helper function for drivers to complete the request.
2819 * @rq: the request being processed 2925 * @rq: the request being processed
2820 * @error: %0 for success, < %0 for error 2926 * @error: block status code
2821 * @nr_bytes: number of bytes to complete 2927 * @nr_bytes: number of bytes to complete
2822 * 2928 *
2823 * Description: 2929 * Description:
@@ -2828,8 +2934,10 @@ static bool __blk_end_bidi_request(struct request *rq, int error,
2828 * %false - we are done with this request 2934 * %false - we are done with this request
2829 * %true - still buffers pending for this request 2935 * %true - still buffers pending for this request
2830 **/ 2936 **/
2831bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2937bool blk_end_request(struct request *rq, blk_status_t error,
2938 unsigned int nr_bytes)
2832{ 2939{
2940 WARN_ON_ONCE(rq->q->mq_ops);
2833 return blk_end_bidi_request(rq, error, nr_bytes, 0); 2941 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2834} 2942}
2835EXPORT_SYMBOL(blk_end_request); 2943EXPORT_SYMBOL(blk_end_request);
@@ -2837,12 +2945,12 @@ EXPORT_SYMBOL(blk_end_request);
2837/** 2945/**
2838 * blk_end_request_all - Helper function for drives to finish the request. 2946 * blk_end_request_all - Helper function for drives to finish the request.
2839 * @rq: the request to finish 2947 * @rq: the request to finish
2840 * @error: %0 for success, < %0 for error 2948 * @error: block status code
2841 * 2949 *
2842 * Description: 2950 * Description:
2843 * Completely finish @rq. 2951 * Completely finish @rq.
2844 */ 2952 */
2845void blk_end_request_all(struct request *rq, int error) 2953void blk_end_request_all(struct request *rq, blk_status_t error)
2846{ 2954{
2847 bool pending; 2955 bool pending;
2848 unsigned int bidi_bytes = 0; 2956 unsigned int bidi_bytes = 0;
@@ -2858,7 +2966,7 @@ EXPORT_SYMBOL(blk_end_request_all);
2858/** 2966/**
2859 * __blk_end_request - Helper function for drivers to complete the request. 2967 * __blk_end_request - Helper function for drivers to complete the request.
2860 * @rq: the request being processed 2968 * @rq: the request being processed
2861 * @error: %0 for success, < %0 for error 2969 * @error: block status code
2862 * @nr_bytes: number of bytes to complete 2970 * @nr_bytes: number of bytes to complete
2863 * 2971 *
2864 * Description: 2972 * Description:
@@ -2868,8 +2976,12 @@ EXPORT_SYMBOL(blk_end_request_all);
2868 * %false - we are done with this request 2976 * %false - we are done with this request
2869 * %true - still buffers pending for this request 2977 * %true - still buffers pending for this request
2870 **/ 2978 **/
2871bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 2979bool __blk_end_request(struct request *rq, blk_status_t error,
2980 unsigned int nr_bytes)
2872{ 2981{
2982 lockdep_assert_held(rq->q->queue_lock);
2983 WARN_ON_ONCE(rq->q->mq_ops);
2984
2873 return __blk_end_bidi_request(rq, error, nr_bytes, 0); 2985 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2874} 2986}
2875EXPORT_SYMBOL(__blk_end_request); 2987EXPORT_SYMBOL(__blk_end_request);
@@ -2877,16 +2989,19 @@ EXPORT_SYMBOL(__blk_end_request);
2877/** 2989/**
2878 * __blk_end_request_all - Helper function for drives to finish the request. 2990 * __blk_end_request_all - Helper function for drives to finish the request.
2879 * @rq: the request to finish 2991 * @rq: the request to finish
2880 * @error: %0 for success, < %0 for error 2992 * @error: block status code
2881 * 2993 *
2882 * Description: 2994 * Description:
2883 * Completely finish @rq. Must be called with queue lock held. 2995 * Completely finish @rq. Must be called with queue lock held.
2884 */ 2996 */
2885void __blk_end_request_all(struct request *rq, int error) 2997void __blk_end_request_all(struct request *rq, blk_status_t error)
2886{ 2998{
2887 bool pending; 2999 bool pending;
2888 unsigned int bidi_bytes = 0; 3000 unsigned int bidi_bytes = 0;
2889 3001
3002 lockdep_assert_held(rq->q->queue_lock);
3003 WARN_ON_ONCE(rq->q->mq_ops);
3004
2890 if (unlikely(blk_bidi_rq(rq))) 3005 if (unlikely(blk_bidi_rq(rq)))
2891 bidi_bytes = blk_rq_bytes(rq->next_rq); 3006 bidi_bytes = blk_rq_bytes(rq->next_rq);
2892 3007
@@ -2898,7 +3013,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
2898/** 3013/**
2899 * __blk_end_request_cur - Helper function to finish the current request chunk. 3014 * __blk_end_request_cur - Helper function to finish the current request chunk.
2900 * @rq: the request to finish the current chunk for 3015 * @rq: the request to finish the current chunk for
2901 * @error: %0 for success, < %0 for error 3016 * @error: block status code
2902 * 3017 *
2903 * Description: 3018 * Description:
2904 * Complete the current consecutively mapped chunk from @rq. Must 3019 * Complete the current consecutively mapped chunk from @rq. Must
@@ -2908,7 +3023,7 @@ EXPORT_SYMBOL(__blk_end_request_all);
2908 * %false - we are done with this request 3023 * %false - we are done with this request
2909 * %true - still buffers pending for this request 3024 * %true - still buffers pending for this request
2910 */ 3025 */
2911bool __blk_end_request_cur(struct request *rq, int error) 3026bool __blk_end_request_cur(struct request *rq, blk_status_t error)
2912{ 3027{
2913 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 3028 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2914} 3029}
@@ -3151,6 +3266,8 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth,
3151 bool from_schedule) 3266 bool from_schedule)
3152 __releases(q->queue_lock) 3267 __releases(q->queue_lock)
3153{ 3268{
3269 lockdep_assert_held(q->queue_lock);
3270
3154 trace_block_unplug(q, depth, !from_schedule); 3271 trace_block_unplug(q, depth, !from_schedule);
3155 3272
3156 if (from_schedule) 3273 if (from_schedule)
@@ -3249,7 +3366,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3249 * Short-circuit if @q is dead 3366 * Short-circuit if @q is dead
3250 */ 3367 */
3251 if (unlikely(blk_queue_dying(q))) { 3368 if (unlikely(blk_queue_dying(q))) {
3252 __blk_end_request_all(rq, -ENODEV); 3369 __blk_end_request_all(rq, BLK_STS_IOERR);
3253 continue; 3370 continue;
3254 } 3371 }
3255 3372
diff --git a/block/blk-exec.c b/block/blk-exec.c
index a9451e3b8587..5c0f3dc446dc 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
16 * @rq: request to complete 16 * @rq: request to complete
17 * @error: end I/O status of the request 17 * @error: end I/O status of the request
18 */ 18 */
19static void blk_end_sync_rq(struct request *rq, int error) 19static void blk_end_sync_rq(struct request *rq, blk_status_t error)
20{ 20{
21 struct completion *waiting = rq->end_io_data; 21 struct completion *waiting = rq->end_io_data;
22 22
@@ -69,7 +69,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
69 69
70 if (unlikely(blk_queue_dying(q))) { 70 if (unlikely(blk_queue_dying(q))) {
71 rq->rq_flags |= RQF_QUIET; 71 rq->rq_flags |= RQF_QUIET;
72 __blk_end_request_all(rq, -ENXIO); 72 __blk_end_request_all(rq, BLK_STS_IOERR);
73 spin_unlock_irq(q->queue_lock); 73 spin_unlock_irq(q->queue_lock);
74 return; 74 return;
75 } 75 }
diff --git a/block/blk-flush.c b/block/blk-flush.c
index c4e0880b54bb..ed5fe322abba 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -164,7 +164,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
164 */ 164 */
165static bool blk_flush_complete_seq(struct request *rq, 165static bool blk_flush_complete_seq(struct request *rq,
166 struct blk_flush_queue *fq, 166 struct blk_flush_queue *fq,
167 unsigned int seq, int error) 167 unsigned int seq, blk_status_t error)
168{ 168{
169 struct request_queue *q = rq->q; 169 struct request_queue *q = rq->q;
170 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; 170 struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
@@ -216,7 +216,7 @@ static bool blk_flush_complete_seq(struct request *rq,
216 return kicked | queued; 216 return kicked | queued;
217} 217}
218 218
219static void flush_end_io(struct request *flush_rq, int error) 219static void flush_end_io(struct request *flush_rq, blk_status_t error)
220{ 220{
221 struct request_queue *q = flush_rq->q; 221 struct request_queue *q = flush_rq->q;
222 struct list_head *running; 222 struct list_head *running;
@@ -341,11 +341,13 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
341 return blk_flush_queue_rq(flush_rq, false); 341 return blk_flush_queue_rq(flush_rq, false);
342} 342}
343 343
344static void flush_data_end_io(struct request *rq, int error) 344static void flush_data_end_io(struct request *rq, blk_status_t error)
345{ 345{
346 struct request_queue *q = rq->q; 346 struct request_queue *q = rq->q;
347 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); 347 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
348 348
349 lockdep_assert_held(q->queue_lock);
350
349 /* 351 /*
350 * Updating q->in_flight[] here for making this tag usable 352 * Updating q->in_flight[] here for making this tag usable
351 * early. Because in blk_queue_start_tag(), 353 * early. Because in blk_queue_start_tag(),
@@ -382,7 +384,7 @@ static void flush_data_end_io(struct request *rq, int error)
382 blk_run_queue_async(q); 384 blk_run_queue_async(q);
383} 385}
384 386
385static void mq_flush_data_end_io(struct request *rq, int error) 387static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
386{ 388{
387 struct request_queue *q = rq->q; 389 struct request_queue *q = rq->q;
388 struct blk_mq_hw_ctx *hctx; 390 struct blk_mq_hw_ctx *hctx;
@@ -411,9 +413,6 @@ static void mq_flush_data_end_io(struct request *rq, int error)
411 * or __blk_mq_run_hw_queue() to dispatch request. 413 * or __blk_mq_run_hw_queue() to dispatch request.
412 * @rq is being submitted. Analyze what needs to be done and put it on the 414 * @rq is being submitted. Analyze what needs to be done and put it on the
413 * right queue. 415 * right queue.
414 *
415 * CONTEXT:
416 * spin_lock_irq(q->queue_lock) in !mq case
417 */ 416 */
418void blk_insert_flush(struct request *rq) 417void blk_insert_flush(struct request *rq)
419{ 418{
@@ -422,6 +421,9 @@ void blk_insert_flush(struct request *rq)
422 unsigned int policy = blk_flush_policy(fflags, rq); 421 unsigned int policy = blk_flush_policy(fflags, rq);
423 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx); 422 struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
424 423
424 if (!q->mq_ops)
425 lockdep_assert_held(q->queue_lock);
426
425 /* 427 /*
426 * @policy now records what operations need to be done. Adjust 428 * @policy now records what operations need to be done. Adjust
427 * REQ_PREFLUSH and FUA for the driver. 429 * REQ_PREFLUSH and FUA for the driver.
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f891a9aff4d..feb30570eaf5 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -384,9 +384,9 @@ static struct kobj_type integrity_ktype = {
384 .sysfs_ops = &integrity_ops, 384 .sysfs_ops = &integrity_ops,
385}; 385};
386 386
387static int blk_integrity_nop_fn(struct blk_integrity_iter *iter) 387static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
388{ 388{
389 return 0; 389 return BLK_STS_OK;
390} 390}
391 391
392static const struct blk_integrity_profile nop_profile = { 392static const struct blk_integrity_profile nop_profile = {
diff --git a/block/blk-map.c b/block/blk-map.c
index 3b5cb863318f..2547016aa7aa 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,6 +16,8 @@
16 */ 16 */
17int blk_rq_append_bio(struct request *rq, struct bio *bio) 17int blk_rq_append_bio(struct request *rq, struct bio *bio)
18{ 18{
19 blk_queue_bounce(rq->q, &bio);
20
19 if (!rq->bio) { 21 if (!rq->bio) {
20 blk_rq_bio_prep(rq->q, rq, bio); 22 blk_rq_bio_prep(rq->q, rq, bio);
21 } else { 23 } else {
@@ -72,15 +74,13 @@ static int __blk_rq_map_user_iov(struct request *rq,
72 map_data->offset += bio->bi_iter.bi_size; 74 map_data->offset += bio->bi_iter.bi_size;
73 75
74 orig_bio = bio; 76 orig_bio = bio;
75 blk_queue_bounce(q, &bio);
76 77
77 /* 78 /*
78 * We link the bounce buffer in and could have to traverse it 79 * We link the bounce buffer in and could have to traverse it
79 * later so we have to get a ref to prevent it from being freed 80 * later so we have to get a ref to prevent it from being freed
80 */ 81 */
81 bio_get(bio);
82
83 ret = blk_rq_append_bio(rq, bio); 82 ret = blk_rq_append_bio(rq, bio);
83 bio_get(bio);
84 if (ret) { 84 if (ret) {
85 bio_endio(bio); 85 bio_endio(bio);
86 __blk_rq_unmap_user(orig_bio); 86 __blk_rq_unmap_user(orig_bio);
@@ -249,7 +249,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
249 return ret; 249 return ret;
250 } 250 }
251 251
252 blk_queue_bounce(q, &rq->bio);
253 return 0; 252 return 0;
254} 253}
255EXPORT_SYMBOL(blk_rq_map_kern); 254EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3990ae406341..99038830fb42 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -108,31 +108,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
108 bool do_split = true; 108 bool do_split = true;
109 struct bio *new = NULL; 109 struct bio *new = NULL;
110 const unsigned max_sectors = get_max_io_size(q, bio); 110 const unsigned max_sectors = get_max_io_size(q, bio);
111 unsigned bvecs = 0;
112 111
113 bio_for_each_segment(bv, bio, iter) { 112 bio_for_each_segment(bv, bio, iter) {
114 /* 113 /*
115 * With arbitrary bio size, the incoming bio may be very
116 * big. We have to split the bio into small bios so that
117 * each holds at most BIO_MAX_PAGES bvecs because
118 * bio_clone() can fail to allocate big bvecs.
119 *
120 * It should have been better to apply the limit per
121 * request queue in which bio_clone() is involved,
122 * instead of globally. The biggest blocker is the
123 * bio_clone() in bio bounce.
124 *
125 * If bio is splitted by this reason, we should have
126 * allowed to continue bios merging, but don't do
127 * that now for making the change simple.
128 *
129 * TODO: deal with bio bounce's bio_clone() gracefully
130 * and convert the global limit into per-queue limit.
131 */
132 if (bvecs++ >= BIO_MAX_PAGES)
133 goto split;
134
135 /*
136 * If the queue doesn't support SG gaps and adding this 114 * If the queue doesn't support SG gaps and adding this
137 * offset would create a gap, disallow it. 115 * offset would create a gap, disallow it.
138 */ 116 */
@@ -202,8 +180,7 @@ split:
202 return do_split ? new : NULL; 180 return do_split ? new : NULL;
203} 181}
204 182
205void blk_queue_split(struct request_queue *q, struct bio **bio, 183void blk_queue_split(struct request_queue *q, struct bio **bio)
206 struct bio_set *bs)
207{ 184{
208 struct bio *split, *res; 185 struct bio *split, *res;
209 unsigned nsegs; 186 unsigned nsegs;
@@ -211,13 +188,13 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
211 switch (bio_op(*bio)) { 188 switch (bio_op(*bio)) {
212 case REQ_OP_DISCARD: 189 case REQ_OP_DISCARD:
213 case REQ_OP_SECURE_ERASE: 190 case REQ_OP_SECURE_ERASE:
214 split = blk_bio_discard_split(q, *bio, bs, &nsegs); 191 split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
215 break; 192 break;
216 case REQ_OP_WRITE_ZEROES: 193 case REQ_OP_WRITE_ZEROES:
217 split = blk_bio_write_zeroes_split(q, *bio, bs, &nsegs); 194 split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
218 break; 195 break;
219 case REQ_OP_WRITE_SAME: 196 case REQ_OP_WRITE_SAME:
220 split = blk_bio_write_same_split(q, *bio, bs, &nsegs); 197 split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
221 break; 198 break;
222 default: 199 default:
223 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); 200 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -671,6 +648,9 @@ static void blk_account_io_merge(struct request *req)
671static struct request *attempt_merge(struct request_queue *q, 648static struct request *attempt_merge(struct request_queue *q,
672 struct request *req, struct request *next) 649 struct request *req, struct request *next)
673{ 650{
651 if (!q->mq_ops)
652 lockdep_assert_held(q->queue_lock);
653
674 if (!rq_mergeable(req) || !rq_mergeable(next)) 654 if (!rq_mergeable(req) || !rq_mergeable(next))
675 return NULL; 655 return NULL;
676 656
@@ -693,6 +673,13 @@ static struct request *attempt_merge(struct request_queue *q,
693 return NULL; 673 return NULL;
694 674
695 /* 675 /*
676 * Don't allow merge of different write hints, or for a hint with
677 * non-hint IO.
678 */
679 if (req->write_hint != next->write_hint)
680 return NULL;
681
682 /*
696 * If we are allowed to merge, then append bio list 683 * If we are allowed to merge, then append bio list
697 * from next to rq and release next. merge_requests_fn 684 * from next to rq and release next. merge_requests_fn
698 * will have updated segment counts, update sector 685 * will have updated segment counts, update sector
@@ -811,6 +798,13 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
811 !blk_write_same_mergeable(rq->bio, bio)) 798 !blk_write_same_mergeable(rq->bio, bio))
812 return false; 799 return false;
813 800
801 /*
802 * Don't allow merge of different write hints, or for a hint with
803 * non-hint IO.
804 */
805 if (rq->write_hint != bio->bi_write_hint)
806 return false;
807
814 return true; 808 return true;
815} 809}
816 810
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e8640e17..2cca4fc43f45 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -14,10 +14,15 @@
14#include "blk.h" 14#include "blk.h"
15#include "blk-mq.h" 15#include "blk-mq.h"
16 16
17static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues, 17static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
18 const int cpu) 18 const struct cpumask *online_mask)
19{ 19{
20 return cpu * nr_queues / nr_cpus; 20 /*
21 * Non online CPU will be mapped to queue index 0.
22 */
23 if (!cpumask_test_cpu(cpu, online_mask))
24 return 0;
25 return cpu % nr_queues;
21} 26}
22 27
23static int get_first_sibling(unsigned int cpu) 28static int get_first_sibling(unsigned int cpu)
@@ -36,55 +41,26 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
36 unsigned int *map = set->mq_map; 41 unsigned int *map = set->mq_map;
37 unsigned int nr_queues = set->nr_hw_queues; 42 unsigned int nr_queues = set->nr_hw_queues;
38 const struct cpumask *online_mask = cpu_online_mask; 43 const struct cpumask *online_mask = cpu_online_mask;
39 unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; 44 unsigned int cpu, first_sibling;
40 cpumask_var_t cpus;
41
42 if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
43 return -ENOMEM;
44
45 cpumask_clear(cpus);
46 nr_cpus = nr_uniq_cpus = 0;
47 for_each_cpu(i, online_mask) {
48 nr_cpus++;
49 first_sibling = get_first_sibling(i);
50 if (!cpumask_test_cpu(first_sibling, cpus))
51 nr_uniq_cpus++;
52 cpumask_set_cpu(i, cpus);
53 }
54
55 queue = 0;
56 for_each_possible_cpu(i) {
57 if (!cpumask_test_cpu(i, online_mask)) {
58 map[i] = 0;
59 continue;
60 }
61 45
46 for_each_possible_cpu(cpu) {
62 /* 47 /*
63 * Easy case - we have equal or more hardware queues. Or 48 * First do sequential mapping between CPUs and queues.
64 * there are no thread siblings to take into account. Do 49 * In case we still have CPUs to map, and we have some number of
65 * 1:1 if enough, or sequential mapping if less. 50 * threads per cores then map sibling threads to the same queue for
51 * performace optimizations.
66 */ 52 */
67 if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) { 53 if (cpu < nr_queues) {
68 map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue); 54 map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
69 queue++; 55 } else {
70 continue; 56 first_sibling = get_first_sibling(cpu);
57 if (first_sibling == cpu)
58 map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
59 else
60 map[cpu] = map[first_sibling];
71 } 61 }
72
73 /*
74 * Less then nr_cpus queues, and we have some number of
75 * threads per cores. Map sibling threads to the same
76 * queue.
77 */
78 first_sibling = get_first_sibling(i);
79 if (first_sibling == i) {
80 map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
81 queue);
82 queue++;
83 } else
84 map[i] = map[first_sibling];
85 } 62 }
86 63
87 free_cpumask_var(cpus);
88 return 0; 64 return 0;
89} 65}
90EXPORT_SYMBOL_GPL(blk_mq_map_queues); 66EXPORT_SYMBOL_GPL(blk_mq_map_queues);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 803aed4d7221..9ebc2945f991 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -114,10 +114,12 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
114 blk_mq_run_hw_queues(q, true); 114 blk_mq_run_hw_queues(q, true);
115 } else if (strcmp(op, "start") == 0) { 115 } else if (strcmp(op, "start") == 0) {
116 blk_mq_start_stopped_hw_queues(q, true); 116 blk_mq_start_stopped_hw_queues(q, true);
117 } else if (strcmp(op, "kick") == 0) {
118 blk_mq_kick_requeue_list(q);
117 } else { 119 } else {
118 pr_err("%s: unsupported operation '%s'\n", __func__, op); 120 pr_err("%s: unsupported operation '%s'\n", __func__, op);
119inval: 121inval:
120 pr_err("%s: use either 'run' or 'start'\n", __func__); 122 pr_err("%s: use 'run', 'start' or 'kick'\n", __func__);
121 return -EINVAL; 123 return -EINVAL;
122 } 124 }
123 return count; 125 return count;
@@ -133,6 +135,29 @@ static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
133 } 135 }
134} 136}
135 137
138static int queue_write_hint_show(void *data, struct seq_file *m)
139{
140 struct request_queue *q = data;
141 int i;
142
143 for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
144 seq_printf(m, "hint%d: %llu\n", i, q->write_hints[i]);
145
146 return 0;
147}
148
149static ssize_t queue_write_hint_store(void *data, const char __user *buf,
150 size_t count, loff_t *ppos)
151{
152 struct request_queue *q = data;
153 int i;
154
155 for (i = 0; i < BLK_MAX_WRITE_HINTS; i++)
156 q->write_hints[i] = 0;
157
158 return count;
159}
160
136static int queue_poll_stat_show(void *data, struct seq_file *m) 161static int queue_poll_stat_show(void *data, struct seq_file *m)
137{ 162{
138 struct request_queue *q = data; 163 struct request_queue *q = data;
@@ -267,6 +292,14 @@ static const char *const rqf_name[] = {
267}; 292};
268#undef RQF_NAME 293#undef RQF_NAME
269 294
295#define RQAF_NAME(name) [REQ_ATOM_##name] = #name
296static const char *const rqaf_name[] = {
297 RQAF_NAME(COMPLETE),
298 RQAF_NAME(STARTED),
299 RQAF_NAME(POLL_SLEPT),
300};
301#undef RQAF_NAME
302
270int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) 303int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
271{ 304{
272 const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; 305 const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -283,6 +316,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
283 seq_puts(m, ", .rq_flags="); 316 seq_puts(m, ", .rq_flags=");
284 blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, 317 blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
285 ARRAY_SIZE(rqf_name)); 318 ARRAY_SIZE(rqf_name));
319 seq_puts(m, ", .atomic_flags=");
320 blk_flags_show(m, rq->atomic_flags, rqaf_name, ARRAY_SIZE(rqaf_name));
286 seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag, 321 seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
287 rq->internal_tag); 322 rq->internal_tag);
288 if (mq_ops->show_rq) 323 if (mq_ops->show_rq)
@@ -298,6 +333,37 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
298} 333}
299EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); 334EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
300 335
336static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
337 __acquires(&q->requeue_lock)
338{
339 struct request_queue *q = m->private;
340
341 spin_lock_irq(&q->requeue_lock);
342 return seq_list_start(&q->requeue_list, *pos);
343}
344
345static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
346{
347 struct request_queue *q = m->private;
348
349 return seq_list_next(v, &q->requeue_list, pos);
350}
351
352static void queue_requeue_list_stop(struct seq_file *m, void *v)
353 __releases(&q->requeue_lock)
354{
355 struct request_queue *q = m->private;
356
357 spin_unlock_irq(&q->requeue_lock);
358}
359
360static const struct seq_operations queue_requeue_list_seq_ops = {
361 .start = queue_requeue_list_start,
362 .next = queue_requeue_list_next,
363 .stop = queue_requeue_list_stop,
364 .show = blk_mq_debugfs_rq_show,
365};
366
301static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) 367static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
302 __acquires(&hctx->lock) 368 __acquires(&hctx->lock)
303{ 369{
@@ -329,6 +395,36 @@ static const struct seq_operations hctx_dispatch_seq_ops = {
329 .show = blk_mq_debugfs_rq_show, 395 .show = blk_mq_debugfs_rq_show,
330}; 396};
331 397
398struct show_busy_params {
399 struct seq_file *m;
400 struct blk_mq_hw_ctx *hctx;
401};
402
403/*
404 * Note: the state of a request may change while this function is in progress,
405 * e.g. due to a concurrent blk_mq_finish_request() call.
406 */
407static void hctx_show_busy_rq(struct request *rq, void *data, bool reserved)
408{
409 const struct show_busy_params *params = data;
410
411 if (blk_mq_map_queue(rq->q, rq->mq_ctx->cpu) == params->hctx &&
412 test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
413 __blk_mq_debugfs_rq_show(params->m,
414 list_entry_rq(&rq->queuelist));
415}
416
417static int hctx_busy_show(void *data, struct seq_file *m)
418{
419 struct blk_mq_hw_ctx *hctx = data;
420 struct show_busy_params params = { .m = m, .hctx = hctx };
421
422 blk_mq_tagset_busy_iter(hctx->queue->tag_set, hctx_show_busy_rq,
423 &params);
424
425 return 0;
426}
427
332static int hctx_ctx_map_show(void *data, struct seq_file *m) 428static int hctx_ctx_map_show(void *data, struct seq_file *m)
333{ 429{
334 struct blk_mq_hw_ctx *hctx = data; 430 struct blk_mq_hw_ctx *hctx = data;
@@ -655,7 +751,9 @@ const struct file_operations blk_mq_debugfs_fops = {
655 751
656static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = { 752static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
657 {"poll_stat", 0400, queue_poll_stat_show}, 753 {"poll_stat", 0400, queue_poll_stat_show},
754 {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
658 {"state", 0600, queue_state_show, queue_state_write}, 755 {"state", 0600, queue_state_show, queue_state_write},
756 {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
659 {}, 757 {},
660}; 758};
661 759
@@ -663,6 +761,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
663 {"state", 0400, hctx_state_show}, 761 {"state", 0400, hctx_state_show},
664 {"flags", 0400, hctx_flags_show}, 762 {"flags", 0400, hctx_flags_show},
665 {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops}, 763 {"dispatch", 0400, .seq_ops = &hctx_dispatch_seq_ops},
764 {"busy", 0400, hctx_busy_show},
666 {"ctx_map", 0400, hctx_ctx_map_show}, 765 {"ctx_map", 0400, hctx_ctx_map_show},
667 {"tags", 0400, hctx_tags_show}, 766 {"tags", 0400, hctx_tags_show},
668 {"tags_bitmap", 0400, hctx_tags_bitmap_show}, 767 {"tags_bitmap", 0400, hctx_tags_bitmap_show},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 0ded5e846335..7f0dc48ffb40 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -31,11 +31,10 @@ void blk_mq_sched_free_hctx_data(struct request_queue *q,
31} 31}
32EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); 32EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
33 33
34static void __blk_mq_sched_assign_ioc(struct request_queue *q, 34void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio)
35 struct request *rq,
36 struct bio *bio,
37 struct io_context *ioc)
38{ 35{
36 struct request_queue *q = rq->q;
37 struct io_context *ioc = rq_ioc(bio);
39 struct io_cq *icq; 38 struct io_cq *icq;
40 39
41 spin_lock_irq(q->queue_lock); 40 spin_lock_irq(q->queue_lock);
@@ -47,25 +46,8 @@ static void __blk_mq_sched_assign_ioc(struct request_queue *q,
47 if (!icq) 46 if (!icq)
48 return; 47 return;
49 } 48 }
50 49 get_io_context(icq->ioc);
51 rq->elv.icq = icq; 50 rq->elv.icq = icq;
52 if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
53 rq->rq_flags |= RQF_ELVPRIV;
54 get_io_context(icq->ioc);
55 return;
56 }
57
58 rq->elv.icq = NULL;
59}
60
61static void blk_mq_sched_assign_ioc(struct request_queue *q,
62 struct request *rq, struct bio *bio)
63{
64 struct io_context *ioc;
65
66 ioc = rq_ioc(bio);
67 if (ioc)
68 __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
69} 51}
70 52
71/* 53/*
@@ -107,71 +89,6 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
107 return false; 89 return false;
108} 90}
109 91
110struct request *blk_mq_sched_get_request(struct request_queue *q,
111 struct bio *bio,
112 unsigned int op,
113 struct blk_mq_alloc_data *data)
114{
115 struct elevator_queue *e = q->elevator;
116 struct request *rq;
117
118 blk_queue_enter_live(q);
119 data->q = q;
120 if (likely(!data->ctx))
121 data->ctx = blk_mq_get_ctx(q);
122 if (likely(!data->hctx))
123 data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
124
125 if (e) {
126 data->flags |= BLK_MQ_REQ_INTERNAL;
127
128 /*
129 * Flush requests are special and go directly to the
130 * dispatch list.
131 */
132 if (!op_is_flush(op) && e->type->ops.mq.get_request) {
133 rq = e->type->ops.mq.get_request(q, op, data);
134 if (rq)
135 rq->rq_flags |= RQF_QUEUED;
136 } else
137 rq = __blk_mq_alloc_request(data, op);
138 } else {
139 rq = __blk_mq_alloc_request(data, op);
140 }
141
142 if (rq) {
143 if (!op_is_flush(op)) {
144 rq->elv.icq = NULL;
145 if (e && e->type->icq_cache)
146 blk_mq_sched_assign_ioc(q, rq, bio);
147 }
148 data->hctx->queued++;
149 return rq;
150 }
151
152 blk_queue_exit(q);
153 return NULL;
154}
155
156void blk_mq_sched_put_request(struct request *rq)
157{
158 struct request_queue *q = rq->q;
159 struct elevator_queue *e = q->elevator;
160
161 if (rq->rq_flags & RQF_ELVPRIV) {
162 blk_mq_sched_put_rq_priv(rq->q, rq);
163 if (rq->elv.icq) {
164 put_io_context(rq->elv.icq->ioc);
165 rq->elv.icq = NULL;
166 }
167 }
168
169 if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
170 e->type->ops.mq.put_request(rq);
171 else
172 blk_mq_finish_request(rq);
173}
174
175void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) 92void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
176{ 93{
177 struct request_queue *q = hctx->queue; 94 struct request_queue *q = hctx->queue;
@@ -180,7 +97,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
180 bool did_work = false; 97 bool did_work = false;
181 LIST_HEAD(rq_list); 98 LIST_HEAD(rq_list);
182 99
183 if (unlikely(blk_mq_hctx_stopped(hctx))) 100 /* RCU or SRCU read lock is needed before checking quiesced flag */
101 if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
184 return; 102 return;
185 103
186 hctx->run++; 104 hctx->run++;
@@ -260,19 +178,73 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
260} 178}
261EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 179EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
262 180
181/*
182 * Reverse check our software queue for entries that we could potentially
183 * merge with. Currently includes a hand-wavy stop count of 8, to not spend
184 * too much time checking for merges.
185 */
186static bool blk_mq_attempt_merge(struct request_queue *q,
187 struct blk_mq_ctx *ctx, struct bio *bio)
188{
189 struct request *rq;
190 int checked = 8;
191
192 lockdep_assert_held(&ctx->lock);
193
194 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
195 bool merged = false;
196
197 if (!checked--)
198 break;
199
200 if (!blk_rq_merge_ok(rq, bio))
201 continue;
202
203 switch (blk_try_merge(rq, bio)) {
204 case ELEVATOR_BACK_MERGE:
205 if (blk_mq_sched_allow_merge(q, rq, bio))
206 merged = bio_attempt_back_merge(q, rq, bio);
207 break;
208 case ELEVATOR_FRONT_MERGE:
209 if (blk_mq_sched_allow_merge(q, rq, bio))
210 merged = bio_attempt_front_merge(q, rq, bio);
211 break;
212 case ELEVATOR_DISCARD_MERGE:
213 merged = bio_attempt_discard_merge(q, rq, bio);
214 break;
215 default:
216 continue;
217 }
218
219 if (merged)
220 ctx->rq_merged++;
221 return merged;
222 }
223
224 return false;
225}
226
263bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 227bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
264{ 228{
265 struct elevator_queue *e = q->elevator; 229 struct elevator_queue *e = q->elevator;
230 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
231 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
232 bool ret = false;
266 233
267 if (e->type->ops.mq.bio_merge) { 234 if (e && e->type->ops.mq.bio_merge) {
268 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
269 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
270
271 blk_mq_put_ctx(ctx); 235 blk_mq_put_ctx(ctx);
272 return e->type->ops.mq.bio_merge(hctx, bio); 236 return e->type->ops.mq.bio_merge(hctx, bio);
273 } 237 }
274 238
275 return false; 239 if (hctx->flags & BLK_MQ_F_SHOULD_MERGE) {
240 /* default per sw-queue merge */
241 spin_lock(&ctx->lock);
242 ret = blk_mq_attempt_merge(q, ctx, bio);
243 spin_unlock(&ctx->lock);
244 }
245
246 blk_mq_put_ctx(ctx);
247 return ret;
276} 248}
277 249
278bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) 250bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5007edece51a..9267d0b7c197 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,8 +7,7 @@
7void blk_mq_sched_free_hctx_data(struct request_queue *q, 7void blk_mq_sched_free_hctx_data(struct request_queue *q,
8 void (*exit)(struct blk_mq_hw_ctx *)); 8 void (*exit)(struct blk_mq_hw_ctx *));
9 9
10struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data); 10void blk_mq_sched_assign_ioc(struct request *rq, struct bio *bio);
11void blk_mq_sched_put_request(struct request *rq);
12 11
13void blk_mq_sched_request_inserted(struct request *rq); 12void blk_mq_sched_request_inserted(struct request *rq);
14bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 13bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
@@ -38,35 +37,12 @@ int blk_mq_sched_init(struct request_queue *q);
38static inline bool 37static inline bool
39blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) 38blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
40{ 39{
41 struct elevator_queue *e = q->elevator; 40 if (blk_queue_nomerges(q) || !bio_mergeable(bio))
42
43 if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
44 return false; 41 return false;
45 42
46 return __blk_mq_sched_bio_merge(q, bio); 43 return __blk_mq_sched_bio_merge(q, bio);
47} 44}
48 45
49static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
50 struct request *rq,
51 struct bio *bio)
52{
53 struct elevator_queue *e = q->elevator;
54
55 if (e && e->type->ops.mq.get_rq_priv)
56 return e->type->ops.mq.get_rq_priv(q, rq, bio);
57
58 return 0;
59}
60
61static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
62 struct request *rq)
63{
64 struct elevator_queue *e = q->elevator;
65
66 if (e && e->type->ops.mq.put_rq_priv)
67 e->type->ops.mq.put_rq_priv(q, rq);
68}
69
70static inline bool 46static inline bool
71blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, 47blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
72 struct bio *bio) 48 struct bio *bio)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 958cedaff8b8..05dfa3f270ae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
42 42
43static void blk_mq_poll_stats_start(struct request_queue *q); 43static void blk_mq_poll_stats_start(struct request_queue *q);
44static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); 44static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
45static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
46 45
47static int blk_mq_poll_stats_bkt(const struct request *rq) 46static int blk_mq_poll_stats_bkt(const struct request *rq)
48{ 47{
@@ -154,13 +153,28 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
154} 153}
155EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); 154EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
156 155
156/*
157 * FIXME: replace the scsi_internal_device_*block_nowait() calls in the
158 * mpt3sas driver such that this function can be removed.
159 */
160void blk_mq_quiesce_queue_nowait(struct request_queue *q)
161{
162 unsigned long flags;
163
164 spin_lock_irqsave(q->queue_lock, flags);
165 queue_flag_set(QUEUE_FLAG_QUIESCED, q);
166 spin_unlock_irqrestore(q->queue_lock, flags);
167}
168EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
169
157/** 170/**
158 * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished 171 * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
159 * @q: request queue. 172 * @q: request queue.
160 * 173 *
161 * Note: this function does not prevent that the struct request end_io() 174 * Note: this function does not prevent that the struct request end_io()
162 * callback function is invoked. Additionally, it is not prevented that 175 * callback function is invoked. Once this function is returned, we make
163 * new queue_rq() calls occur unless the queue has been stopped first. 176 * sure no dispatch can happen until the queue is unquiesced via
177 * blk_mq_unquiesce_queue().
164 */ 178 */
165void blk_mq_quiesce_queue(struct request_queue *q) 179void blk_mq_quiesce_queue(struct request_queue *q)
166{ 180{
@@ -168,11 +182,11 @@ void blk_mq_quiesce_queue(struct request_queue *q)
168 unsigned int i; 182 unsigned int i;
169 bool rcu = false; 183 bool rcu = false;
170 184
171 __blk_mq_stop_hw_queues(q, true); 185 blk_mq_quiesce_queue_nowait(q);
172 186
173 queue_for_each_hw_ctx(q, hctx, i) { 187 queue_for_each_hw_ctx(q, hctx, i) {
174 if (hctx->flags & BLK_MQ_F_BLOCKING) 188 if (hctx->flags & BLK_MQ_F_BLOCKING)
175 synchronize_srcu(&hctx->queue_rq_srcu); 189 synchronize_srcu(hctx->queue_rq_srcu);
176 else 190 else
177 rcu = true; 191 rcu = true;
178 } 192 }
@@ -181,6 +195,26 @@ void blk_mq_quiesce_queue(struct request_queue *q)
181} 195}
182EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue); 196EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
183 197
198/*
199 * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
200 * @q: request queue.
201 *
202 * This function recovers queue into the state before quiescing
203 * which is done by blk_mq_quiesce_queue.
204 */
205void blk_mq_unquiesce_queue(struct request_queue *q)
206{
207 unsigned long flags;
208
209 spin_lock_irqsave(q->queue_lock, flags);
210 queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
211 spin_unlock_irqrestore(q->queue_lock, flags);
212
213 /* dispatch requests which are inserted during quiescing */
214 blk_mq_run_hw_queues(q, true);
215}
216EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
217
184void blk_mq_wake_waiters(struct request_queue *q) 218void blk_mq_wake_waiters(struct request_queue *q)
185{ 219{
186 struct blk_mq_hw_ctx *hctx; 220 struct blk_mq_hw_ctx *hctx;
@@ -204,15 +238,33 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
204} 238}
205EXPORT_SYMBOL(blk_mq_can_queue); 239EXPORT_SYMBOL(blk_mq_can_queue);
206 240
207void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, 241static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
208 struct request *rq, unsigned int op) 242 unsigned int tag, unsigned int op)
209{ 243{
244 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
245 struct request *rq = tags->static_rqs[tag];
246
247 rq->rq_flags = 0;
248
249 if (data->flags & BLK_MQ_REQ_INTERNAL) {
250 rq->tag = -1;
251 rq->internal_tag = tag;
252 } else {
253 if (blk_mq_tag_busy(data->hctx)) {
254 rq->rq_flags = RQF_MQ_INFLIGHT;
255 atomic_inc(&data->hctx->nr_active);
256 }
257 rq->tag = tag;
258 rq->internal_tag = -1;
259 data->hctx->tags->rqs[rq->tag] = rq;
260 }
261
210 INIT_LIST_HEAD(&rq->queuelist); 262 INIT_LIST_HEAD(&rq->queuelist);
211 /* csd/requeue_work/fifo_time is initialized before use */ 263 /* csd/requeue_work/fifo_time is initialized before use */
212 rq->q = q; 264 rq->q = data->q;
213 rq->mq_ctx = ctx; 265 rq->mq_ctx = data->ctx;
214 rq->cmd_flags = op; 266 rq->cmd_flags = op;
215 if (blk_queue_io_stat(q)) 267 if (blk_queue_io_stat(data->q))
216 rq->rq_flags |= RQF_IO_STAT; 268 rq->rq_flags |= RQF_IO_STAT;
217 /* do not touch atomic flags, it needs atomic ops against the timer */ 269 /* do not touch atomic flags, it needs atomic ops against the timer */
218 rq->cpu = -1; 270 rq->cpu = -1;
@@ -241,44 +293,60 @@ void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
241 rq->end_io_data = NULL; 293 rq->end_io_data = NULL;
242 rq->next_rq = NULL; 294 rq->next_rq = NULL;
243 295
244 ctx->rq_dispatched[op_is_sync(op)]++; 296 data->ctx->rq_dispatched[op_is_sync(op)]++;
297 return rq;
245} 298}
246EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
247 299
248struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, 300static struct request *blk_mq_get_request(struct request_queue *q,
249 unsigned int op) 301 struct bio *bio, unsigned int op,
302 struct blk_mq_alloc_data *data)
250{ 303{
304 struct elevator_queue *e = q->elevator;
251 struct request *rq; 305 struct request *rq;
252 unsigned int tag; 306 unsigned int tag;
253 307
254 tag = blk_mq_get_tag(data); 308 blk_queue_enter_live(q);
255 if (tag != BLK_MQ_TAG_FAIL) { 309 data->q = q;
256 struct blk_mq_tags *tags = blk_mq_tags_from_data(data); 310 if (likely(!data->ctx))
311 data->ctx = blk_mq_get_ctx(q);
312 if (likely(!data->hctx))
313 data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
314 if (op & REQ_NOWAIT)
315 data->flags |= BLK_MQ_REQ_NOWAIT;
257 316
258 rq = tags->static_rqs[tag]; 317 if (e) {
318 data->flags |= BLK_MQ_REQ_INTERNAL;
259 319
260 if (data->flags & BLK_MQ_REQ_INTERNAL) { 320 /*
261 rq->tag = -1; 321 * Flush requests are special and go directly to the
262 rq->internal_tag = tag; 322 * dispatch list.
263 } else { 323 */
264 if (blk_mq_tag_busy(data->hctx)) { 324 if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
265 rq->rq_flags = RQF_MQ_INFLIGHT; 325 e->type->ops.mq.limit_depth(op, data);
266 atomic_inc(&data->hctx->nr_active); 326 }
267 }
268 rq->tag = tag;
269 rq->internal_tag = -1;
270 data->hctx->tags->rqs[rq->tag] = rq;
271 }
272 327
273 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); 328 tag = blk_mq_get_tag(data);
274 return rq; 329 if (tag == BLK_MQ_TAG_FAIL) {
330 blk_queue_exit(q);
331 return NULL;
275 } 332 }
276 333
277 return NULL; 334 rq = blk_mq_rq_ctx_init(data, tag, op);
335 if (!op_is_flush(op)) {
336 rq->elv.icq = NULL;
337 if (e && e->type->ops.mq.prepare_request) {
338 if (e->type->icq_cache && rq_ioc(bio))
339 blk_mq_sched_assign_ioc(rq, bio);
340
341 e->type->ops.mq.prepare_request(rq, bio);
342 rq->rq_flags |= RQF_ELVPRIV;
343 }
344 }
345 data->hctx->queued++;
346 return rq;
278} 347}
279EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
280 348
281struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 349struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
282 unsigned int flags) 350 unsigned int flags)
283{ 351{
284 struct blk_mq_alloc_data alloc_data = { .flags = flags }; 352 struct blk_mq_alloc_data alloc_data = { .flags = flags };
@@ -289,7 +357,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
289 if (ret) 357 if (ret)
290 return ERR_PTR(ret); 358 return ERR_PTR(ret);
291 359
292 rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); 360 rq = blk_mq_get_request(q, NULL, op, &alloc_data);
293 361
294 blk_mq_put_ctx(alloc_data.ctx); 362 blk_mq_put_ctx(alloc_data.ctx);
295 blk_queue_exit(q); 363 blk_queue_exit(q);
@@ -304,8 +372,8 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
304} 372}
305EXPORT_SYMBOL(blk_mq_alloc_request); 373EXPORT_SYMBOL(blk_mq_alloc_request);
306 374
307struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, 375struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
308 unsigned int flags, unsigned int hctx_idx) 376 unsigned int op, unsigned int flags, unsigned int hctx_idx)
309{ 377{
310 struct blk_mq_alloc_data alloc_data = { .flags = flags }; 378 struct blk_mq_alloc_data alloc_data = { .flags = flags };
311 struct request *rq; 379 struct request *rq;
@@ -340,7 +408,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
340 cpu = cpumask_first(alloc_data.hctx->cpumask); 408 cpu = cpumask_first(alloc_data.hctx->cpumask);
341 alloc_data.ctx = __blk_mq_get_ctx(q, cpu); 409 alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
342 410
343 rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); 411 rq = blk_mq_get_request(q, NULL, op, &alloc_data);
344 412
345 blk_queue_exit(q); 413 blk_queue_exit(q);
346 414
@@ -351,17 +419,28 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
351} 419}
352EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); 420EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
353 421
354void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 422void blk_mq_free_request(struct request *rq)
355 struct request *rq)
356{ 423{
357 const int sched_tag = rq->internal_tag;
358 struct request_queue *q = rq->q; 424 struct request_queue *q = rq->q;
425 struct elevator_queue *e = q->elevator;
426 struct blk_mq_ctx *ctx = rq->mq_ctx;
427 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
428 const int sched_tag = rq->internal_tag;
359 429
430 if (rq->rq_flags & RQF_ELVPRIV) {
431 if (e && e->type->ops.mq.finish_request)
432 e->type->ops.mq.finish_request(rq);
433 if (rq->elv.icq) {
434 put_io_context(rq->elv.icq->ioc);
435 rq->elv.icq = NULL;
436 }
437 }
438
439 ctx->rq_completed[rq_is_sync(rq)]++;
360 if (rq->rq_flags & RQF_MQ_INFLIGHT) 440 if (rq->rq_flags & RQF_MQ_INFLIGHT)
361 atomic_dec(&hctx->nr_active); 441 atomic_dec(&hctx->nr_active);
362 442
363 wbt_done(q->rq_wb, &rq->issue_stat); 443 wbt_done(q->rq_wb, &rq->issue_stat);
364 rq->rq_flags = 0;
365 444
366 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 445 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
367 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); 446 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
@@ -372,29 +451,9 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
372 blk_mq_sched_restart(hctx); 451 blk_mq_sched_restart(hctx);
373 blk_queue_exit(q); 452 blk_queue_exit(q);
374} 453}
375
376static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
377 struct request *rq)
378{
379 struct blk_mq_ctx *ctx = rq->mq_ctx;
380
381 ctx->rq_completed[rq_is_sync(rq)]++;
382 __blk_mq_finish_request(hctx, ctx, rq);
383}
384
385void blk_mq_finish_request(struct request *rq)
386{
387 blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
388}
389EXPORT_SYMBOL_GPL(blk_mq_finish_request);
390
391void blk_mq_free_request(struct request *rq)
392{
393 blk_mq_sched_put_request(rq);
394}
395EXPORT_SYMBOL_GPL(blk_mq_free_request); 454EXPORT_SYMBOL_GPL(blk_mq_free_request);
396 455
397inline void __blk_mq_end_request(struct request *rq, int error) 456inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
398{ 457{
399 blk_account_io_done(rq); 458 blk_account_io_done(rq);
400 459
@@ -409,7 +468,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
409} 468}
410EXPORT_SYMBOL(__blk_mq_end_request); 469EXPORT_SYMBOL(__blk_mq_end_request);
411 470
412void blk_mq_end_request(struct request *rq, int error) 471void blk_mq_end_request(struct request *rq, blk_status_t error)
413{ 472{
414 if (blk_update_request(rq, error, blk_rq_bytes(rq))) 473 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
415 BUG(); 474 BUG();
@@ -753,50 +812,6 @@ static void blk_mq_timeout_work(struct work_struct *work)
753 blk_queue_exit(q); 812 blk_queue_exit(q);
754} 813}
755 814
756/*
757 * Reverse check our software queue for entries that we could potentially
758 * merge with. Currently includes a hand-wavy stop count of 8, to not spend
759 * too much time checking for merges.
760 */
761static bool blk_mq_attempt_merge(struct request_queue *q,
762 struct blk_mq_ctx *ctx, struct bio *bio)
763{
764 struct request *rq;
765 int checked = 8;
766
767 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
768 bool merged = false;
769
770 if (!checked--)
771 break;
772
773 if (!blk_rq_merge_ok(rq, bio))
774 continue;
775
776 switch (blk_try_merge(rq, bio)) {
777 case ELEVATOR_BACK_MERGE:
778 if (blk_mq_sched_allow_merge(q, rq, bio))
779 merged = bio_attempt_back_merge(q, rq, bio);
780 break;
781 case ELEVATOR_FRONT_MERGE:
782 if (blk_mq_sched_allow_merge(q, rq, bio))
783 merged = bio_attempt_front_merge(q, rq, bio);
784 break;
785 case ELEVATOR_DISCARD_MERGE:
786 merged = bio_attempt_discard_merge(q, rq, bio);
787 break;
788 default:
789 continue;
790 }
791
792 if (merged)
793 ctx->rq_merged++;
794 return merged;
795 }
796
797 return false;
798}
799
800struct flush_busy_ctx_data { 815struct flush_busy_ctx_data {
801 struct blk_mq_hw_ctx *hctx; 816 struct blk_mq_hw_ctx *hctx;
802 struct list_head *list; 817 struct list_head *list;
@@ -968,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
968{ 983{
969 struct blk_mq_hw_ctx *hctx; 984 struct blk_mq_hw_ctx *hctx;
970 struct request *rq; 985 struct request *rq;
971 int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; 986 int errors, queued;
972 987
973 if (list_empty(list)) 988 if (list_empty(list))
974 return false; 989 return false;
@@ -979,6 +994,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
979 errors = queued = 0; 994 errors = queued = 0;
980 do { 995 do {
981 struct blk_mq_queue_data bd; 996 struct blk_mq_queue_data bd;
997 blk_status_t ret;
982 998
983 rq = list_first_entry(list, struct request, queuelist); 999 rq = list_first_entry(list, struct request, queuelist);
984 if (!blk_mq_get_driver_tag(rq, &hctx, false)) { 1000 if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1019,25 +1035,20 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
1019 } 1035 }
1020 1036
1021 ret = q->mq_ops->queue_rq(hctx, &bd); 1037 ret = q->mq_ops->queue_rq(hctx, &bd);
1022 switch (ret) { 1038 if (ret == BLK_STS_RESOURCE) {
1023 case BLK_MQ_RQ_QUEUE_OK:
1024 queued++;
1025 break;
1026 case BLK_MQ_RQ_QUEUE_BUSY:
1027 blk_mq_put_driver_tag_hctx(hctx, rq); 1039 blk_mq_put_driver_tag_hctx(hctx, rq);
1028 list_add(&rq->queuelist, list); 1040 list_add(&rq->queuelist, list);
1029 __blk_mq_requeue_request(rq); 1041 __blk_mq_requeue_request(rq);
1030 break; 1042 break;
1031 default: 1043 }
1032 pr_err("blk-mq: bad return on queue: %d\n", ret); 1044
1033 case BLK_MQ_RQ_QUEUE_ERROR: 1045 if (unlikely(ret != BLK_STS_OK)) {
1034 errors++; 1046 errors++;
1035 blk_mq_end_request(rq, -EIO); 1047 blk_mq_end_request(rq, BLK_STS_IOERR);
1036 break; 1048 continue;
1037 } 1049 }
1038 1050
1039 if (ret == BLK_MQ_RQ_QUEUE_BUSY) 1051 queued++;
1040 break;
1041 } while (!list_empty(list)); 1052 } while (!list_empty(list));
1042 1053
1043 hctx->dispatched[queued_to_index(queued)]++; 1054 hctx->dispatched[queued_to_index(queued)]++;
@@ -1075,7 +1086,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
1075 * - blk_mq_run_hw_queue() checks whether or not a queue has 1086 * - blk_mq_run_hw_queue() checks whether or not a queue has
1076 * been stopped before rerunning a queue. 1087 * been stopped before rerunning a queue.
1077 * - Some but not all block drivers stop a queue before 1088 * - Some but not all block drivers stop a queue before
1078 * returning BLK_MQ_RQ_QUEUE_BUSY. Two exceptions are scsi-mq 1089 * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
1079 * and dm-rq. 1090 * and dm-rq.
1080 */ 1091 */
1081 if (!blk_mq_sched_needs_restart(hctx) && 1092 if (!blk_mq_sched_needs_restart(hctx) &&
@@ -1100,9 +1111,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
1100 } else { 1111 } else {
1101 might_sleep(); 1112 might_sleep();
1102 1113
1103 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); 1114 srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
1104 blk_mq_sched_dispatch_requests(hctx); 1115 blk_mq_sched_dispatch_requests(hctx);
1105 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); 1116 srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
1106 } 1117 }
1107} 1118}
1108 1119
@@ -1134,8 +1145,10 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
1134static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, 1145static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
1135 unsigned long msecs) 1146 unsigned long msecs)
1136{ 1147{
1137 if (unlikely(blk_mq_hctx_stopped(hctx) || 1148 if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
1138 !blk_mq_hw_queue_mapped(hctx))) 1149 return;
1150
1151 if (unlikely(blk_mq_hctx_stopped(hctx)))
1139 return; 1152 return;
1140 1153
1141 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { 1154 if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) {
@@ -1201,34 +1214,39 @@ bool blk_mq_queue_stopped(struct request_queue *q)
1201} 1214}
1202EXPORT_SYMBOL(blk_mq_queue_stopped); 1215EXPORT_SYMBOL(blk_mq_queue_stopped);
1203 1216
1204static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync) 1217/*
1218 * This function is often used for pausing .queue_rq() by driver when
1219 * there isn't enough resource or some conditions aren't satisfied, and
1220 * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
1221 *
1222 * We do not guarantee that dispatch can be drained or blocked
1223 * after blk_mq_stop_hw_queue() returns. Please use
1224 * blk_mq_quiesce_queue() for that requirement.
1225 */
1226void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
1205{ 1227{
1206 if (sync) 1228 cancel_delayed_work(&hctx->run_work);
1207 cancel_delayed_work_sync(&hctx->run_work);
1208 else
1209 cancel_delayed_work(&hctx->run_work);
1210 1229
1211 set_bit(BLK_MQ_S_STOPPED, &hctx->state); 1230 set_bit(BLK_MQ_S_STOPPED, &hctx->state);
1212} 1231}
1213
1214void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
1215{
1216 __blk_mq_stop_hw_queue(hctx, false);
1217}
1218EXPORT_SYMBOL(blk_mq_stop_hw_queue); 1232EXPORT_SYMBOL(blk_mq_stop_hw_queue);
1219 1233
1220static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync) 1234/*
1235 * This function is often used for pausing .queue_rq() by driver when
1236 * there isn't enough resource or some conditions aren't satisfied, and
1237 * BLK_MQ_RQ_QUEUE_BUSY is usually returned.
1238 *
1239 * We do not guarantee that dispatch can be drained or blocked
1240 * after blk_mq_stop_hw_queues() returns. Please use
1241 * blk_mq_quiesce_queue() for that requirement.
1242 */
1243void blk_mq_stop_hw_queues(struct request_queue *q)
1221{ 1244{
1222 struct blk_mq_hw_ctx *hctx; 1245 struct blk_mq_hw_ctx *hctx;
1223 int i; 1246 int i;
1224 1247
1225 queue_for_each_hw_ctx(q, hctx, i) 1248 queue_for_each_hw_ctx(q, hctx, i)
1226 __blk_mq_stop_hw_queue(hctx, sync); 1249 blk_mq_stop_hw_queue(hctx);
1227}
1228
1229void blk_mq_stop_hw_queues(struct request_queue *q)
1230{
1231 __blk_mq_stop_hw_queues(q, false);
1232} 1250}
1233EXPORT_SYMBOL(blk_mq_stop_hw_queues); 1251EXPORT_SYMBOL(blk_mq_stop_hw_queues);
1234 1252
@@ -1295,7 +1313,7 @@ static void blk_mq_run_work_fn(struct work_struct *work)
1295 1313
1296void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) 1314void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
1297{ 1315{
1298 if (unlikely(!blk_mq_hw_queue_mapped(hctx))) 1316 if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
1299 return; 1317 return;
1300 1318
1301 /* 1319 /*
@@ -1317,6 +1335,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
1317{ 1335{
1318 struct blk_mq_ctx *ctx = rq->mq_ctx; 1336 struct blk_mq_ctx *ctx = rq->mq_ctx;
1319 1337
1338 lockdep_assert_held(&ctx->lock);
1339
1320 trace_block_rq_insert(hctx->queue, rq); 1340 trace_block_rq_insert(hctx->queue, rq);
1321 1341
1322 if (at_head) 1342 if (at_head)
@@ -1330,6 +1350,8 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
1330{ 1350{
1331 struct blk_mq_ctx *ctx = rq->mq_ctx; 1351 struct blk_mq_ctx *ctx = rq->mq_ctx;
1332 1352
1353 lockdep_assert_held(&ctx->lock);
1354
1333 __blk_mq_insert_req_list(hctx, rq, at_head); 1355 __blk_mq_insert_req_list(hctx, rq, at_head);
1334 blk_mq_hctx_mark_pending(hctx, ctx); 1356 blk_mq_hctx_mark_pending(hctx, ctx);
1335} 1357}
@@ -1427,30 +1449,13 @@ static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx)
1427 !blk_queue_nomerges(hctx->queue); 1449 !blk_queue_nomerges(hctx->queue);
1428} 1450}
1429 1451
1430static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, 1452static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
1431 struct blk_mq_ctx *ctx, 1453 struct blk_mq_ctx *ctx,
1432 struct request *rq, struct bio *bio) 1454 struct request *rq)
1433{ 1455{
1434 if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) { 1456 spin_lock(&ctx->lock);
1435 blk_mq_bio_to_request(rq, bio); 1457 __blk_mq_insert_request(hctx, rq, false);
1436 spin_lock(&ctx->lock); 1458 spin_unlock(&ctx->lock);
1437insert_rq:
1438 __blk_mq_insert_request(hctx, rq, false);
1439 spin_unlock(&ctx->lock);
1440 return false;
1441 } else {
1442 struct request_queue *q = hctx->queue;
1443
1444 spin_lock(&ctx->lock);
1445 if (!blk_mq_attempt_merge(q, ctx, bio)) {
1446 blk_mq_bio_to_request(rq, bio);
1447 goto insert_rq;
1448 }
1449
1450 spin_unlock(&ctx->lock);
1451 __blk_mq_finish_request(hctx, ctx, rq);
1452 return true;
1453 }
1454} 1459}
1455 1460
1456static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) 1461static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
@@ -1471,10 +1476,11 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
1471 .last = true, 1476 .last = true,
1472 }; 1477 };
1473 blk_qc_t new_cookie; 1478 blk_qc_t new_cookie;
1474 int ret; 1479 blk_status_t ret;
1475 bool run_queue = true; 1480 bool run_queue = true;
1476 1481
1477 if (blk_mq_hctx_stopped(hctx)) { 1482 /* RCU or SRCU read lock is needed before checking quiesced flag */
1483 if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
1478 run_queue = false; 1484 run_queue = false;
1479 goto insert; 1485 goto insert;
1480 } 1486 }
@@ -1493,18 +1499,19 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
1493 * would have done 1499 * would have done
1494 */ 1500 */
1495 ret = q->mq_ops->queue_rq(hctx, &bd); 1501 ret = q->mq_ops->queue_rq(hctx, &bd);
1496 if (ret == BLK_MQ_RQ_QUEUE_OK) { 1502 switch (ret) {
1503 case BLK_STS_OK:
1497 *cookie = new_cookie; 1504 *cookie = new_cookie;
1498 return; 1505 return;
1499 } 1506 case BLK_STS_RESOURCE:
1500 1507 __blk_mq_requeue_request(rq);
1501 if (ret == BLK_MQ_RQ_QUEUE_ERROR) { 1508 goto insert;
1509 default:
1502 *cookie = BLK_QC_T_NONE; 1510 *cookie = BLK_QC_T_NONE;
1503 blk_mq_end_request(rq, -EIO); 1511 blk_mq_end_request(rq, ret);
1504 return; 1512 return;
1505 } 1513 }
1506 1514
1507 __blk_mq_requeue_request(rq);
1508insert: 1515insert:
1509 blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep); 1516 blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
1510} 1517}
@@ -1521,9 +1528,9 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
1521 1528
1522 might_sleep(); 1529 might_sleep();
1523 1530
1524 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); 1531 srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
1525 __blk_mq_try_issue_directly(hctx, rq, cookie, true); 1532 __blk_mq_try_issue_directly(hctx, rq, cookie, true);
1526 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); 1533 srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
1527 } 1534 }
1528} 1535}
1529 1536
@@ -1541,7 +1548,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1541 1548
1542 blk_queue_bounce(q, &bio); 1549 blk_queue_bounce(q, &bio);
1543 1550
1544 blk_queue_split(q, &bio, q->bio_split); 1551 blk_queue_split(q, &bio);
1545 1552
1546 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1553 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1547 bio_io_error(bio); 1554 bio_io_error(bio);
@@ -1559,9 +1566,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1559 1566
1560 trace_block_getrq(q, bio, bio->bi_opf); 1567 trace_block_getrq(q, bio, bio->bi_opf);
1561 1568
1562 rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data); 1569 rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
1563 if (unlikely(!rq)) { 1570 if (unlikely(!rq)) {
1564 __wbt_done(q->rq_wb, wb_acct); 1571 __wbt_done(q->rq_wb, wb_acct);
1572 if (bio->bi_opf & REQ_NOWAIT)
1573 bio_wouldblock_error(bio);
1565 return BLK_QC_T_NONE; 1574 return BLK_QC_T_NONE;
1566 } 1575 }
1567 1576
@@ -1639,11 +1648,12 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1639 blk_mq_put_ctx(data.ctx); 1648 blk_mq_put_ctx(data.ctx);
1640 blk_mq_bio_to_request(rq, bio); 1649 blk_mq_bio_to_request(rq, bio);
1641 blk_mq_sched_insert_request(rq, false, true, true, true); 1650 blk_mq_sched_insert_request(rq, false, true, true, true);
1642 } else if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1651 } else {
1643 blk_mq_put_ctx(data.ctx); 1652 blk_mq_put_ctx(data.ctx);
1653 blk_mq_bio_to_request(rq, bio);
1654 blk_mq_queue_io(data.hctx, data.ctx, rq);
1644 blk_mq_run_hw_queue(data.hctx, true); 1655 blk_mq_run_hw_queue(data.hctx, true);
1645 } else 1656 }
1646 blk_mq_put_ctx(data.ctx);
1647 1657
1648 return cookie; 1658 return cookie;
1649} 1659}
@@ -1866,7 +1876,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1866 set->ops->exit_hctx(hctx, hctx_idx); 1876 set->ops->exit_hctx(hctx, hctx_idx);
1867 1877
1868 if (hctx->flags & BLK_MQ_F_BLOCKING) 1878 if (hctx->flags & BLK_MQ_F_BLOCKING)
1869 cleanup_srcu_struct(&hctx->queue_rq_srcu); 1879 cleanup_srcu_struct(hctx->queue_rq_srcu);
1870 1880
1871 blk_mq_remove_cpuhp(hctx); 1881 blk_mq_remove_cpuhp(hctx);
1872 blk_free_flush_queue(hctx->fq); 1882 blk_free_flush_queue(hctx->fq);
@@ -1900,7 +1910,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
1900 spin_lock_init(&hctx->lock); 1910 spin_lock_init(&hctx->lock);
1901 INIT_LIST_HEAD(&hctx->dispatch); 1911 INIT_LIST_HEAD(&hctx->dispatch);
1902 hctx->queue = q; 1912 hctx->queue = q;
1903 hctx->queue_num = hctx_idx;
1904 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; 1913 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
1905 1914
1906 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); 1915 cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
@@ -1939,7 +1948,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
1939 goto free_fq; 1948 goto free_fq;
1940 1949
1941 if (hctx->flags & BLK_MQ_F_BLOCKING) 1950 if (hctx->flags & BLK_MQ_F_BLOCKING)
1942 init_srcu_struct(&hctx->queue_rq_srcu); 1951 init_srcu_struct(hctx->queue_rq_srcu);
1943 1952
1944 blk_mq_debugfs_register_hctx(q, hctx); 1953 blk_mq_debugfs_register_hctx(q, hctx);
1945 1954
@@ -2224,6 +2233,20 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
2224} 2233}
2225EXPORT_SYMBOL(blk_mq_init_queue); 2234EXPORT_SYMBOL(blk_mq_init_queue);
2226 2235
2236static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
2237{
2238 int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
2239
2240 BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, queue_rq_srcu),
2241 __alignof__(struct blk_mq_hw_ctx)) !=
2242 sizeof(struct blk_mq_hw_ctx));
2243
2244 if (tag_set->flags & BLK_MQ_F_BLOCKING)
2245 hw_ctx_size += sizeof(struct srcu_struct);
2246
2247 return hw_ctx_size;
2248}
2249
2227static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, 2250static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2228 struct request_queue *q) 2251 struct request_queue *q)
2229{ 2252{
@@ -2238,7 +2261,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2238 continue; 2261 continue;
2239 2262
2240 node = blk_mq_hw_queue_to_node(q->mq_map, i); 2263 node = blk_mq_hw_queue_to_node(q->mq_map, i);
2241 hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx), 2264 hctxs[i] = kzalloc_node(blk_mq_hw_ctx_size(set),
2242 GFP_KERNEL, node); 2265 GFP_KERNEL, node);
2243 if (!hctxs[i]) 2266 if (!hctxs[i])
2244 break; 2267 break;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index cc67b48e3551..1a06fdf9fd4d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,17 +128,6 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
128 return data->hctx->tags; 128 return data->hctx->tags;
129} 129}
130 130
131/*
132 * Internal helpers for request allocation/init/free
133 */
134void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
135 struct request *rq, unsigned int op);
136void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
137 struct request *rq);
138void blk_mq_finish_request(struct request *rq);
139struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
140 unsigned int op);
141
142static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 131static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
143{ 132{
144 return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 133 return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 4fa81ed383ca..be1f115b538b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -172,11 +172,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
172 q->nr_batching = BLK_BATCH_REQ; 172 q->nr_batching = BLK_BATCH_REQ;
173 173
174 blk_set_default_limits(&q->limits); 174 blk_set_default_limits(&q->limits);
175
176 /*
177 * by default assume old behaviour and bounce for any highmem page
178 */
179 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
180} 175}
181EXPORT_SYMBOL(blk_queue_make_request); 176EXPORT_SYMBOL(blk_queue_make_request);
182 177
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 07cc329fa4b0..2290f65b9d73 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -258,15 +258,14 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
258 * all transfers have been done for a request. It's important to call 258 * all transfers have been done for a request. It's important to call
259 * this function before end_that_request_last(), as that will put the 259 * this function before end_that_request_last(), as that will put the
260 * request back on the free list thus corrupting the internal tag list. 260 * request back on the free list thus corrupting the internal tag list.
261 *
262 * Notes:
263 * queue lock must be held.
264 **/ 261 **/
265void blk_queue_end_tag(struct request_queue *q, struct request *rq) 262void blk_queue_end_tag(struct request_queue *q, struct request *rq)
266{ 263{
267 struct blk_queue_tag *bqt = q->queue_tags; 264 struct blk_queue_tag *bqt = q->queue_tags;
268 unsigned tag = rq->tag; /* negative tags invalid */ 265 unsigned tag = rq->tag; /* negative tags invalid */
269 266
267 lockdep_assert_held(q->queue_lock);
268
270 BUG_ON(tag >= bqt->real_max_depth); 269 BUG_ON(tag >= bqt->real_max_depth);
271 270
272 list_del_init(&rq->queuelist); 271 list_del_init(&rq->queuelist);
@@ -307,9 +306,6 @@ EXPORT_SYMBOL(blk_queue_end_tag);
307 * calling this function. The request will also be removed from 306 * calling this function. The request will also be removed from
308 * the request queue, so it's the drivers responsibility to readd 307 * the request queue, so it's the drivers responsibility to readd
309 * it if it should need to be restarted for some reason. 308 * it if it should need to be restarted for some reason.
310 *
311 * Notes:
312 * queue lock must be held.
313 **/ 309 **/
314int blk_queue_start_tag(struct request_queue *q, struct request *rq) 310int blk_queue_start_tag(struct request_queue *q, struct request *rq)
315{ 311{
@@ -317,6 +313,8 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
317 unsigned max_depth; 313 unsigned max_depth;
318 int tag; 314 int tag;
319 315
316 lockdep_assert_held(q->queue_lock);
317
320 if (unlikely((rq->rq_flags & RQF_QUEUED))) { 318 if (unlikely((rq->rq_flags & RQF_QUEUED))) {
321 printk(KERN_ERR 319 printk(KERN_ERR
322 "%s: request %p for device [%s] already tagged %d", 320 "%s: request %p for device [%s] already tagged %d",
@@ -389,14 +387,13 @@ EXPORT_SYMBOL(blk_queue_start_tag);
389 * Hardware conditions may dictate a need to stop all pending requests. 387 * Hardware conditions may dictate a need to stop all pending requests.
390 * In this case, we will safely clear the block side of the tag queue and 388 * In this case, we will safely clear the block side of the tag queue and
391 * readd all requests to the request queue in the right order. 389 * readd all requests to the request queue in the right order.
392 *
393 * Notes:
394 * queue lock must be held.
395 **/ 390 **/
396void blk_queue_invalidate_tags(struct request_queue *q) 391void blk_queue_invalidate_tags(struct request_queue *q)
397{ 392{
398 struct list_head *tmp, *n; 393 struct list_head *tmp, *n;
399 394
395 lockdep_assert_held(q->queue_lock);
396
400 list_for_each_safe(tmp, n, &q->tag_busy_list) 397 list_for_each_safe(tmp, n, &q->tag_busy_list)
401 blk_requeue_request(q, list_entry_rq(tmp)); 398 blk_requeue_request(q, list_entry_rq(tmp));
402} 399}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index cbff183f3d9f..17ec83bb0900 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -189,13 +189,15 @@ unsigned long blk_rq_timeout(unsigned long timeout)
189 * Notes: 189 * Notes:
190 * Each request has its own timer, and as it is added to the queue, we 190 * Each request has its own timer, and as it is added to the queue, we
191 * set up the timer. When the request completes, we cancel the timer. 191 * set up the timer. When the request completes, we cancel the timer.
192 * Queue lock must be held for the non-mq case, mq case doesn't care.
193 */ 192 */
194void blk_add_timer(struct request *req) 193void blk_add_timer(struct request *req)
195{ 194{
196 struct request_queue *q = req->q; 195 struct request_queue *q = req->q;
197 unsigned long expiry; 196 unsigned long expiry;
198 197
198 if (!q->mq_ops)
199 lockdep_assert_held(q->queue_lock);
200
199 /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ 201 /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
200 if (!q->mq_ops && !q->rq_timed_out_fn) 202 if (!q->mq_ops && !q->rq_timed_out_fn)
201 return; 203 return;
diff --git a/block/blk.h b/block/blk.h
index 83c8e1100525..01ebb8185f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -143,6 +143,8 @@ static inline struct request *__elv_next_request(struct request_queue *q)
143 struct request *rq; 143 struct request *rq;
144 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); 144 struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
145 145
146 WARN_ON_ONCE(q->mq_ops);
147
146 while (1) { 148 while (1) {
147 if (!list_empty(&q->queue_head)) { 149 if (!list_empty(&q->queue_head)) {
148 rq = list_entry_rq(q->queue_head.next); 150 rq = list_entry_rq(q->queue_head.next);
@@ -334,4 +336,17 @@ static inline void blk_throtl_bio_endio(struct bio *bio) { }
334static inline void blk_throtl_stat_add(struct request *rq, u64 time) { } 336static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
335#endif 337#endif
336 338
339#ifdef CONFIG_BOUNCE
340extern int init_emergency_isa_pool(void);
341extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
342#else
343static inline int init_emergency_isa_pool(void)
344{
345 return 0;
346}
347static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
348{
349}
350#endif /* CONFIG_BOUNCE */
351
337#endif /* BLK_INTERNAL_H */ 352#endif /* BLK_INTERNAL_H */
diff --git a/block/bounce.c b/block/bounce.c
index 1cb5dd3a5da1..5793c2dc1a15 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -22,10 +22,12 @@
22#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
23 23
24#include <trace/events/block.h> 24#include <trace/events/block.h>
25#include "blk.h"
25 26
26#define POOL_SIZE 64 27#define POOL_SIZE 64
27#define ISA_POOL_SIZE 16 28#define ISA_POOL_SIZE 16
28 29
30static struct bio_set *bounce_bio_set, *bounce_bio_split;
29static mempool_t *page_pool, *isa_page_pool; 31static mempool_t *page_pool, *isa_page_pool;
30 32
31#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL) 33#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL)
@@ -40,6 +42,14 @@ static __init int init_emergency_pool(void)
40 BUG_ON(!page_pool); 42 BUG_ON(!page_pool);
41 pr_info("pool size: %d pages\n", POOL_SIZE); 43 pr_info("pool size: %d pages\n", POOL_SIZE);
42 44
45 bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
46 BUG_ON(!bounce_bio_set);
47 if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
48 BUG_ON(1);
49
50 bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
51 BUG_ON(!bounce_bio_split);
52
43 return 0; 53 return 0;
44} 54}
45 55
@@ -143,7 +153,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
143 mempool_free(bvec->bv_page, pool); 153 mempool_free(bvec->bv_page, pool);
144 } 154 }
145 155
146 bio_orig->bi_error = bio->bi_error; 156 bio_orig->bi_status = bio->bi_status;
147 bio_endio(bio_orig); 157 bio_endio(bio_orig);
148 bio_put(bio); 158 bio_put(bio);
149} 159}
@@ -163,7 +173,7 @@ static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
163{ 173{
164 struct bio *bio_orig = bio->bi_private; 174 struct bio *bio_orig = bio->bi_private;
165 175
166 if (!bio->bi_error) 176 if (!bio->bi_status)
167 copy_to_high_bio_irq(bio_orig, bio); 177 copy_to_high_bio_irq(bio_orig, bio);
168 178
169 bounce_end_io(bio, pool); 179 bounce_end_io(bio, pool);
@@ -186,20 +196,31 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
186 int rw = bio_data_dir(*bio_orig); 196 int rw = bio_data_dir(*bio_orig);
187 struct bio_vec *to, from; 197 struct bio_vec *to, from;
188 struct bvec_iter iter; 198 struct bvec_iter iter;
189 unsigned i; 199 unsigned i = 0;
190 200 bool bounce = false;
191 bio_for_each_segment(from, *bio_orig, iter) 201 int sectors = 0;
192 if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q)) 202
193 goto bounce; 203 bio_for_each_segment(from, *bio_orig, iter) {
204 if (i++ < BIO_MAX_PAGES)
205 sectors += from.bv_len >> 9;
206 if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
207 bounce = true;
208 }
209 if (!bounce)
210 return;
194 211
195 return; 212 if (sectors < bio_sectors(*bio_orig)) {
196bounce: 213 bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
197 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set); 214 bio_chain(bio, *bio_orig);
215 generic_make_request(*bio_orig);
216 *bio_orig = bio;
217 }
218 bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
198 219
199 bio_for_each_segment_all(to, bio, i) { 220 bio_for_each_segment_all(to, bio, i) {
200 struct page *page = to->bv_page; 221 struct page *page = to->bv_page;
201 222
202 if (page_to_pfn(page) <= queue_bounce_pfn(q)) 223 if (page_to_pfn(page) <= q->limits.bounce_pfn)
203 continue; 224 continue;
204 225
205 to->bv_page = mempool_alloc(pool, q->bounce_gfp); 226 to->bv_page = mempool_alloc(pool, q->bounce_gfp);
@@ -251,7 +272,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
251 * don't waste time iterating over bio segments 272 * don't waste time iterating over bio segments
252 */ 273 */
253 if (!(q->bounce_gfp & GFP_DMA)) { 274 if (!(q->bounce_gfp & GFP_DMA)) {
254 if (queue_bounce_pfn(q) >= blk_max_pfn) 275 if (q->limits.bounce_pfn >= blk_max_pfn)
255 return; 276 return;
256 pool = page_pool; 277 pool = page_pool;
257 } else { 278 } else {
@@ -264,5 +285,3 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
264 */ 285 */
265 __blk_queue_bounce(q, bio_orig, pool); 286 __blk_queue_bounce(q, bio_orig, pool);
266} 287}
267
268EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 0a23dbba2d30..c4513b23f57a 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -37,7 +37,7 @@ static void bsg_destroy_job(struct kref *kref)
37 struct bsg_job *job = container_of(kref, struct bsg_job, kref); 37 struct bsg_job *job = container_of(kref, struct bsg_job, kref);
38 struct request *rq = job->req; 38 struct request *rq = job->req;
39 39
40 blk_end_request_all(rq, scsi_req(rq)->result); 40 blk_end_request_all(rq, BLK_STS_OK);
41 41
42 put_device(job->dev); /* release reference for the request */ 42 put_device(job->dev); /* release reference for the request */
43 43
@@ -202,7 +202,7 @@ static void bsg_request_fn(struct request_queue *q)
202 ret = bsg_create_job(dev, req); 202 ret = bsg_create_job(dev, req);
203 if (ret) { 203 if (ret) {
204 scsi_req(req)->result = ret; 204 scsi_req(req)->result = ret;
205 blk_end_request_all(req, ret); 205 blk_end_request_all(req, BLK_STS_OK);
206 spin_lock_irq(q->queue_lock); 206 spin_lock_irq(q->queue_lock);
207 continue; 207 continue;
208 } 208 }
@@ -246,6 +246,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
246 q->bsg_job_size = dd_job_size; 246 q->bsg_job_size = dd_job_size;
247 q->bsg_job_fn = job_fn; 247 q->bsg_job_fn = job_fn;
248 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 248 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
249 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
249 blk_queue_softirq_done(q, bsg_softirq_done); 250 blk_queue_softirq_done(q, bsg_softirq_done);
250 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); 251 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
251 252
diff --git a/block/bsg.c b/block/bsg.c
index 6fd08544d77e..37663b664666 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -236,7 +236,6 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
236 rq = blk_get_request(q, op, GFP_KERNEL); 236 rq = blk_get_request(q, op, GFP_KERNEL);
237 if (IS_ERR(rq)) 237 if (IS_ERR(rq))
238 return rq; 238 return rq;
239 scsi_req_init(rq);
240 239
241 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); 240 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
242 if (ret) 241 if (ret)
@@ -294,14 +293,14 @@ out:
294 * async completion call-back from the block layer, when scsi/ide/whatever 293 * async completion call-back from the block layer, when scsi/ide/whatever
295 * calls end_that_request_last() on a request 294 * calls end_that_request_last() on a request
296 */ 295 */
297static void bsg_rq_end_io(struct request *rq, int uptodate) 296static void bsg_rq_end_io(struct request *rq, blk_status_t status)
298{ 297{
299 struct bsg_command *bc = rq->end_io_data; 298 struct bsg_command *bc = rq->end_io_data;
300 struct bsg_device *bd = bc->bd; 299 struct bsg_device *bd = bc->bd;
301 unsigned long flags; 300 unsigned long flags;
302 301
303 dprintk("%s: finished rq %p bc %p, bio %p stat %d\n", 302 dprintk("%s: finished rq %p bc %p, bio %p\n",
304 bd->name, rq, bc, bc->bio, uptodate); 303 bd->name, rq, bc, bc->bio);
305 304
306 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration); 305 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
307 306
@@ -750,6 +749,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
750#ifdef BSG_DEBUG 749#ifdef BSG_DEBUG
751 unsigned char buf[32]; 750 unsigned char buf[32];
752#endif 751#endif
752
753 if (!blk_queue_scsi_passthrough(rq)) {
754 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
755 return ERR_PTR(-EINVAL);
756 }
757
753 if (!blk_get_queue(rq)) 758 if (!blk_get_queue(rq))
754 return ERR_PTR(-ENXIO); 759 return ERR_PTR(-ENXIO);
755 760
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b7e9c7feeab2..3d5c28945719 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -982,15 +982,6 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
982 return min_vdisktime; 982 return min_vdisktime;
983} 983}
984 984
985static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
986{
987 s64 delta = (s64)(vdisktime - min_vdisktime);
988 if (delta < 0)
989 min_vdisktime = vdisktime;
990
991 return min_vdisktime;
992}
993
994static void update_min_vdisktime(struct cfq_rb_root *st) 985static void update_min_vdisktime(struct cfq_rb_root *st)
995{ 986{
996 struct cfq_group *cfqg; 987 struct cfq_group *cfqg;
diff --git a/block/elevator.c b/block/elevator.c
index dac99fbfc273..4bb2f0c93fa6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -681,6 +681,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
681 */ 681 */
682 if (elv_attempt_insert_merge(q, rq)) 682 if (elv_attempt_insert_merge(q, rq))
683 break; 683 break;
684 /* fall through */
684 case ELEVATOR_INSERT_SORT: 685 case ELEVATOR_INSERT_SORT:
685 BUG_ON(blk_rq_is_passthrough(rq)); 686 BUG_ON(blk_rq_is_passthrough(rq));
686 rq->rq_flags |= RQF_SORTED; 687 rq->rq_flags |= RQF_SORTED;
diff --git a/block/genhd.c b/block/genhd.c
index d252d29fe837..7f520fa25d16 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -36,7 +36,7 @@ struct kobject *block_depr;
36static DEFINE_SPINLOCK(ext_devt_lock); 36static DEFINE_SPINLOCK(ext_devt_lock);
37static DEFINE_IDR(ext_devt_idr); 37static DEFINE_IDR(ext_devt_idr);
38 38
39static struct device_type disk_type; 39static const struct device_type disk_type;
40 40
41static void disk_check_events(struct disk_events *ev, 41static void disk_check_events(struct disk_events *ev,
42 unsigned int *clearing_ptr); 42 unsigned int *clearing_ptr);
@@ -1183,7 +1183,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
1183 return NULL; 1183 return NULL;
1184} 1184}
1185 1185
1186static struct device_type disk_type = { 1186static const struct device_type disk_type = {
1187 .name = "disk", 1187 .name = "disk",
1188 .groups = disk_attr_groups, 1188 .groups = disk_attr_groups,
1189 .release = disk_release, 1189 .release = disk_release,
diff --git a/block/ioprio.c b/block/ioprio.c
index 4b120c9cf7e8..6f5d0b6625e3 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -75,7 +75,8 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
75 case IOPRIO_CLASS_RT: 75 case IOPRIO_CLASS_RT:
76 if (!capable(CAP_SYS_ADMIN)) 76 if (!capable(CAP_SYS_ADMIN))
77 return -EPERM; 77 return -EPERM;
78 /* fall through, rt has prio field too */ 78 /* fall through */
79 /* rt has prio field too */
79 case IOPRIO_CLASS_BE: 80 case IOPRIO_CLASS_BE:
80 if (data >= IOPRIO_BE_NR || data < 0) 81 if (data >= IOPRIO_BE_NR || data < 0)
81 return -EINVAL; 82 return -EINVAL;
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b9faabc75fdb..a9f6fd3fab8e 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -426,33 +426,29 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd,
426 } 426 }
427} 427}
428 428
429static struct request *kyber_get_request(struct request_queue *q, 429static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
430 unsigned int op,
431 struct blk_mq_alloc_data *data)
432{ 430{
433 struct kyber_queue_data *kqd = q->elevator->elevator_data;
434 struct request *rq;
435
436 /* 431 /*
437 * We use the scheduler tags as per-hardware queue queueing tokens. 432 * We use the scheduler tags as per-hardware queue queueing tokens.
438 * Async requests can be limited at this stage. 433 * Async requests can be limited at this stage.
439 */ 434 */
440 if (!op_is_sync(op)) 435 if (!op_is_sync(op)) {
436 struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
437
441 data->shallow_depth = kqd->async_depth; 438 data->shallow_depth = kqd->async_depth;
439 }
440}
442 441
443 rq = __blk_mq_alloc_request(data, op); 442static void kyber_prepare_request(struct request *rq, struct bio *bio)
444 if (rq) 443{
445 rq_set_domain_token(rq, -1); 444 rq_set_domain_token(rq, -1);
446 return rq;
447} 445}
448 446
449static void kyber_put_request(struct request *rq) 447static void kyber_finish_request(struct request *rq)
450{ 448{
451 struct request_queue *q = rq->q; 449 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
452 struct kyber_queue_data *kqd = q->elevator->elevator_data;
453 450
454 rq_clear_domain_token(kqd, rq); 451 rq_clear_domain_token(kqd, rq);
455 blk_mq_finish_request(rq);
456} 452}
457 453
458static void kyber_completed_request(struct request *rq) 454static void kyber_completed_request(struct request *rq)
@@ -815,8 +811,9 @@ static struct elevator_type kyber_sched = {
815 .exit_sched = kyber_exit_sched, 811 .exit_sched = kyber_exit_sched,
816 .init_hctx = kyber_init_hctx, 812 .init_hctx = kyber_init_hctx,
817 .exit_hctx = kyber_exit_hctx, 813 .exit_hctx = kyber_exit_hctx,
818 .get_request = kyber_get_request, 814 .limit_depth = kyber_limit_depth,
819 .put_request = kyber_put_request, 815 .prepare_request = kyber_prepare_request,
816 .finish_request = kyber_finish_request,
820 .completed_request = kyber_completed_request, 817 .completed_request = kyber_completed_request,
821 .dispatch_request = kyber_dispatch_request, 818 .dispatch_request = kyber_dispatch_request,
822 .has_work = kyber_has_work, 819 .has_work = kyber_has_work,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 4a294a5f7fab..7440de44dd85 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -326,7 +326,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
326 if (IS_ERR(rq)) 326 if (IS_ERR(rq))
327 return PTR_ERR(rq); 327 return PTR_ERR(rq);
328 req = scsi_req(rq); 328 req = scsi_req(rq);
329 scsi_req_init(rq);
330 329
331 if (hdr->cmd_len > BLK_MAX_CDB) { 330 if (hdr->cmd_len > BLK_MAX_CDB) {
332 req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); 331 req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
@@ -456,7 +455,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
456 goto error_free_buffer; 455 goto error_free_buffer;
457 } 456 }
458 req = scsi_req(rq); 457 req = scsi_req(rq);
459 scsi_req_init(rq);
460 458
461 cmdlen = COMMAND_SIZE(opcode); 459 cmdlen = COMMAND_SIZE(opcode);
462 460
@@ -542,7 +540,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
542 rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM); 540 rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
543 if (IS_ERR(rq)) 541 if (IS_ERR(rq))
544 return PTR_ERR(rq); 542 return PTR_ERR(rq);
545 scsi_req_init(rq);
546 rq->timeout = BLK_DEFAULT_SG_TIMEOUT; 543 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
547 scsi_req(rq)->cmd[0] = cmd; 544 scsi_req(rq)->cmd[0] = cmd;
548 scsi_req(rq)->cmd[4] = data; 545 scsi_req(rq)->cmd[4] = data;
@@ -744,10 +741,14 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
744} 741}
745EXPORT_SYMBOL(scsi_cmd_blk_ioctl); 742EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
746 743
747void scsi_req_init(struct request *rq) 744/**
745 * scsi_req_init - initialize certain fields of a scsi_request structure
746 * @req: Pointer to a scsi_request structure.
747 * Initializes .__cmd[], .cmd, .cmd_len and .sense_len but no other members
748 * of struct scsi_request.
749 */
750void scsi_req_init(struct scsi_request *req)
748{ 751{
749 struct scsi_request *req = scsi_req(rq);
750
751 memset(req->__cmd, 0, sizeof(req->__cmd)); 752 memset(req->__cmd, 0, sizeof(req->__cmd));
752 req->cmd = req->__cmd; 753 req->cmd = req->__cmd;
753 req->cmd_len = BLK_MAX_CDB; 754 req->cmd_len = BLK_MAX_CDB;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 680c6d636298..3416dadf7b15 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -46,8 +46,8 @@ static __be16 t10_pi_ip_fn(void *data, unsigned int len)
46 * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref 46 * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
47 * tag. 47 * tag.
48 */ 48 */
49static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn, 49static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
50 unsigned int type) 50 csum_fn *fn, unsigned int type)
51{ 51{
52 unsigned int i; 52 unsigned int i;
53 53
@@ -67,11 +67,11 @@ static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
67 iter->seed++; 67 iter->seed++;
68 } 68 }
69 69
70 return 0; 70 return BLK_STS_OK;
71} 71}
72 72
73static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn, 73static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
74 unsigned int type) 74 csum_fn *fn, unsigned int type)
75{ 75{
76 unsigned int i; 76 unsigned int i;
77 77
@@ -91,7 +91,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
91 "(rcvd %u)\n", iter->disk_name, 91 "(rcvd %u)\n", iter->disk_name,
92 (unsigned long long) 92 (unsigned long long)
93 iter->seed, be32_to_cpu(pi->ref_tag)); 93 iter->seed, be32_to_cpu(pi->ref_tag));
94 return -EILSEQ; 94 return BLK_STS_PROTECTION;
95 } 95 }
96 break; 96 break;
97 case 3: 97 case 3:
@@ -108,7 +108,7 @@ static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
108 "(rcvd %04x, want %04x)\n", iter->disk_name, 108 "(rcvd %04x, want %04x)\n", iter->disk_name,
109 (unsigned long long)iter->seed, 109 (unsigned long long)iter->seed,
110 be16_to_cpu(pi->guard_tag), be16_to_cpu(csum)); 110 be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
111 return -EILSEQ; 111 return BLK_STS_PROTECTION;
112 } 112 }
113 113
114next: 114next:
@@ -117,45 +117,45 @@ next:
117 iter->seed++; 117 iter->seed++;
118 } 118 }
119 119
120 return 0; 120 return BLK_STS_OK;
121} 121}
122 122
123static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) 123static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
124{ 124{
125 return t10_pi_generate(iter, t10_pi_crc_fn, 1); 125 return t10_pi_generate(iter, t10_pi_crc_fn, 1);
126} 126}
127 127
128static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) 128static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
129{ 129{
130 return t10_pi_generate(iter, t10_pi_ip_fn, 1); 130 return t10_pi_generate(iter, t10_pi_ip_fn, 1);
131} 131}
132 132
133static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) 133static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
134{ 134{
135 return t10_pi_verify(iter, t10_pi_crc_fn, 1); 135 return t10_pi_verify(iter, t10_pi_crc_fn, 1);
136} 136}
137 137
138static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) 138static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
139{ 139{
140 return t10_pi_verify(iter, t10_pi_ip_fn, 1); 140 return t10_pi_verify(iter, t10_pi_ip_fn, 1);
141} 141}
142 142
143static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) 143static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
144{ 144{
145 return t10_pi_generate(iter, t10_pi_crc_fn, 3); 145 return t10_pi_generate(iter, t10_pi_crc_fn, 3);
146} 146}
147 147
148static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) 148static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
149{ 149{
150 return t10_pi_generate(iter, t10_pi_ip_fn, 3); 150 return t10_pi_generate(iter, t10_pi_ip_fn, 3);
151} 151}
152 152
153static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) 153static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
154{ 154{
155 return t10_pi_verify(iter, t10_pi_crc_fn, 3); 155 return t10_pi_verify(iter, t10_pi_crc_fn, 3);
156} 156}
157 157
158static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) 158static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
159{ 159{
160 return t10_pi_verify(iter, t10_pi_ip_fn, 3); 160 return t10_pi_verify(iter, t10_pi_ip_fn, 3);
161} 161}
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 26a51be77227..245a879b036e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3464,7 +3464,7 @@ static inline bool DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
3464 bool SuccessfulIO) 3464 bool SuccessfulIO)
3465{ 3465{
3466 struct request *Request = Command->Request; 3466 struct request *Request = Command->Request;
3467 int Error = SuccessfulIO ? 0 : -EIO; 3467 blk_status_t Error = SuccessfulIO ? BLK_STS_OK : BLK_STS_IOERR;
3468 3468
3469 pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist, 3469 pci_unmap_sg(Command->Controller->PCIDevice, Command->cmd_sglist,
3470 Command->SegmentCount, Command->DmaDirection); 3470 Command->SegmentCount, Command->DmaDirection);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a328f673adfe..49908c74bfcb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1378,7 +1378,7 @@ static void redo_fd_request(void)
1378 struct amiga_floppy_struct *floppy; 1378 struct amiga_floppy_struct *floppy;
1379 char *data; 1379 char *data;
1380 unsigned long flags; 1380 unsigned long flags;
1381 int err; 1381 blk_status_t err;
1382 1382
1383next_req: 1383next_req:
1384 rq = set_next_request(); 1384 rq = set_next_request();
@@ -1392,7 +1392,7 @@ next_req:
1392 1392
1393next_segment: 1393next_segment:
1394 /* Here someone could investigate to be more efficient */ 1394 /* Here someone could investigate to be more efficient */
1395 for (cnt = 0, err = 0; cnt < blk_rq_cur_sectors(rq); cnt++) { 1395 for (cnt = 0, err = BLK_STS_OK; cnt < blk_rq_cur_sectors(rq); cnt++) {
1396#ifdef DEBUG 1396#ifdef DEBUG
1397 printk("fd: sector %ld + %d requested for %s\n", 1397 printk("fd: sector %ld + %d requested for %s\n",
1398 blk_rq_pos(rq), cnt, 1398 blk_rq_pos(rq), cnt,
@@ -1400,7 +1400,7 @@ next_segment:
1400#endif 1400#endif
1401 block = blk_rq_pos(rq) + cnt; 1401 block = blk_rq_pos(rq) + cnt;
1402 if ((int)block > floppy->blocks) { 1402 if ((int)block > floppy->blocks) {
1403 err = -EIO; 1403 err = BLK_STS_IOERR;
1404 break; 1404 break;
1405 } 1405 }
1406 1406
@@ -1413,7 +1413,7 @@ next_segment:
1413#endif 1413#endif
1414 1414
1415 if (get_track(drive, track) == -1) { 1415 if (get_track(drive, track) == -1) {
1416 err = -EIO; 1416 err = BLK_STS_IOERR;
1417 break; 1417 break;
1418 } 1418 }
1419 1419
@@ -1424,7 +1424,7 @@ next_segment:
1424 1424
1425 /* keep the drive spinning while writes are scheduled */ 1425 /* keep the drive spinning while writes are scheduled */
1426 if (!fd_motor_on(drive)) { 1426 if (!fd_motor_on(drive)) {
1427 err = -EIO; 1427 err = BLK_STS_IOERR;
1428 break; 1428 break;
1429 } 1429 }
1430 /* 1430 /*
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876370bc..6797e6c23c8a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -388,6 +388,7 @@ aoeblk_gdalloc(void *vp)
388 d->aoemajor, d->aoeminor); 388 d->aoemajor, d->aoeminor);
389 goto err_mempool; 389 goto err_mempool;
390 } 390 }
391 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
391 392
392 spin_lock_irqsave(&d->lock, flags); 393 spin_lock_irqsave(&d->lock, flags);
393 WARN_ON(!(d->flags & DEVFL_GD_NOW)); 394 WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 3c606c09fd5a..dc43254e05a4 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1070,8 +1070,8 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1070 d->ip.rq = NULL; 1070 d->ip.rq = NULL;
1071 do { 1071 do {
1072 bio = rq->bio; 1072 bio = rq->bio;
1073 bok = !fastfail && !bio->bi_error; 1073 bok = !fastfail && !bio->bi_status;
1074 } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size)); 1074 } while (__blk_end_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
1075 1075
1076 /* cf. http://lkml.org/lkml/2006/10/31/28 */ 1076 /* cf. http://lkml.org/lkml/2006/10/31/28 */
1077 if (!fastfail) 1077 if (!fastfail)
@@ -1131,7 +1131,7 @@ ktiocomplete(struct frame *f)
1131 ahout->cmdstat, ahin->cmdstat, 1131 ahout->cmdstat, ahin->cmdstat,
1132 d->aoemajor, d->aoeminor); 1132 d->aoemajor, d->aoeminor);
1133noskb: if (buf) 1133noskb: if (buf)
1134 buf->bio->bi_error = -EIO; 1134 buf->bio->bi_status = BLK_STS_IOERR;
1135 goto out; 1135 goto out;
1136 } 1136 }
1137 1137
@@ -1144,7 +1144,7 @@ noskb: if (buf)
1144 "aoe: runt data size in read from", 1144 "aoe: runt data size in read from",
1145 (long) d->aoemajor, d->aoeminor, 1145 (long) d->aoemajor, d->aoeminor,
1146 skb->len, n); 1146 skb->len, n);
1147 buf->bio->bi_error = -EIO; 1147 buf->bio->bi_status = BLK_STS_IOERR;
1148 break; 1148 break;
1149 } 1149 }
1150 if (n > f->iter.bi_size) { 1150 if (n > f->iter.bi_size) {
@@ -1152,7 +1152,7 @@ noskb: if (buf)
1152 "aoe: too-large data size in read from", 1152 "aoe: too-large data size in read from",
1153 (long) d->aoemajor, d->aoeminor, 1153 (long) d->aoemajor, d->aoeminor,
1154 n, f->iter.bi_size); 1154 n, f->iter.bi_size);
1155 buf->bio->bi_error = -EIO; 1155 buf->bio->bi_status = BLK_STS_IOERR;
1156 break; 1156 break;
1157 } 1157 }
1158 bvcpy(skb, f->buf->bio, f->iter, n); 1158 bvcpy(skb, f->buf->bio, f->iter, n);
@@ -1654,7 +1654,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf)
1654 if (buf == NULL) 1654 if (buf == NULL)
1655 return; 1655 return;
1656 buf->iter.bi_size = 0; 1656 buf->iter.bi_size = 0;
1657 buf->bio->bi_error = -EIO; 1657 buf->bio->bi_status = BLK_STS_IOERR;
1658 if (buf->nframesout == 0) 1658 if (buf->nframesout == 0)
1659 aoe_end_buf(d, buf); 1659 aoe_end_buf(d, buf);
1660} 1660}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index ffd1947500c6..b28fefb90391 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d)
170 if (rq == NULL) 170 if (rq == NULL)
171 return; 171 return;
172 while ((bio = d->ip.nxbio)) { 172 while ((bio = d->ip.nxbio)) {
173 bio->bi_error = -EIO; 173 bio->bi_status = BLK_STS_IOERR;
174 d->ip.nxbio = bio->bi_next; 174 d->ip.nxbio = bio->bi_next;
175 n = (unsigned long) rq->special; 175 n = (unsigned long) rq->special;
176 rq->special = (void *) --n; 176 rq->special = (void *) --n;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index fa69ecd52cb5..92da886180aa 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -378,7 +378,7 @@ static DEFINE_TIMER(readtrack_timer, fd_readtrack_check, 0, 0);
378static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0); 378static DEFINE_TIMER(timeout_timer, fd_times_out, 0, 0);
379static DEFINE_TIMER(fd_timer, check_change, 0, 0); 379static DEFINE_TIMER(fd_timer, check_change, 0, 0);
380 380
381static void fd_end_request_cur(int err) 381static void fd_end_request_cur(blk_status_t err)
382{ 382{
383 if (!__blk_end_request_cur(fd_request, err)) 383 if (!__blk_end_request_cur(fd_request, err))
384 fd_request = NULL; 384 fd_request = NULL;
@@ -620,7 +620,7 @@ static void fd_error( void )
620 fd_request->error_count++; 620 fd_request->error_count++;
621 if (fd_request->error_count >= MAX_ERRORS) { 621 if (fd_request->error_count >= MAX_ERRORS) {
622 printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); 622 printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
623 fd_end_request_cur(-EIO); 623 fd_end_request_cur(BLK_STS_IOERR);
624 } 624 }
625 else if (fd_request->error_count == RECALIBRATE_ERRORS) { 625 else if (fd_request->error_count == RECALIBRATE_ERRORS) {
626 printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); 626 printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive );
@@ -739,7 +739,7 @@ static void do_fd_action( int drive )
739 } 739 }
740 else { 740 else {
741 /* all sectors finished */ 741 /* all sectors finished */
742 fd_end_request_cur(0); 742 fd_end_request_cur(BLK_STS_OK);
743 redo_fd_request(); 743 redo_fd_request();
744 return; 744 return;
745 } 745 }
@@ -1144,7 +1144,7 @@ static void fd_rwsec_done1(int status)
1144 } 1144 }
1145 else { 1145 else {
1146 /* all sectors finished */ 1146 /* all sectors finished */
1147 fd_end_request_cur(0); 1147 fd_end_request_cur(BLK_STS_OK);
1148 redo_fd_request(); 1148 redo_fd_request();
1149 } 1149 }
1150 return; 1150 return;
@@ -1445,7 +1445,7 @@ repeat:
1445 if (!UD.connected) { 1445 if (!UD.connected) {
1446 /* drive not connected */ 1446 /* drive not connected */
1447 printk(KERN_ERR "Unknown Device: fd%d\n", drive ); 1447 printk(KERN_ERR "Unknown Device: fd%d\n", drive );
1448 fd_end_request_cur(-EIO); 1448 fd_end_request_cur(BLK_STS_IOERR);
1449 goto repeat; 1449 goto repeat;
1450 } 1450 }
1451 1451
@@ -1461,12 +1461,12 @@ repeat:
1461 /* user supplied disk type */ 1461 /* user supplied disk type */
1462 if (--type >= NUM_DISK_MINORS) { 1462 if (--type >= NUM_DISK_MINORS) {
1463 printk(KERN_WARNING "fd%d: invalid disk format", drive ); 1463 printk(KERN_WARNING "fd%d: invalid disk format", drive );
1464 fd_end_request_cur(-EIO); 1464 fd_end_request_cur(BLK_STS_IOERR);
1465 goto repeat; 1465 goto repeat;
1466 } 1466 }
1467 if (minor2disktype[type].drive_types > DriveType) { 1467 if (minor2disktype[type].drive_types > DriveType) {
1468 printk(KERN_WARNING "fd%d: unsupported disk format", drive ); 1468 printk(KERN_WARNING "fd%d: unsupported disk format", drive );
1469 fd_end_request_cur(-EIO); 1469 fd_end_request_cur(BLK_STS_IOERR);
1470 goto repeat; 1470 goto repeat;
1471 } 1471 }
1472 type = minor2disktype[type].index; 1472 type = minor2disktype[type].index;
@@ -1476,7 +1476,7 @@ repeat:
1476 } 1476 }
1477 1477
1478 if (blk_rq_pos(fd_request) + 1 > UDT->blocks) { 1478 if (blk_rq_pos(fd_request) + 1 > UDT->blocks) {
1479 fd_end_request_cur(-EIO); 1479 fd_end_request_cur(BLK_STS_IOERR);
1480 goto repeat; 1480 goto repeat;
1481 } 1481 }
1482 1482
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 57b574f2f66a..6112e99bedf7 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,7 +418,6 @@ static struct brd_device *brd_alloc(int i)
418 418
419 blk_queue_make_request(brd->brd_queue, brd_make_request); 419 blk_queue_make_request(brd->brd_queue, brd_make_request);
420 blk_queue_max_hw_sectors(brd->brd_queue, 1024); 420 blk_queue_max_hw_sectors(brd->brd_queue, 1024);
421 blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
422 421
423 /* This is so fdisk will align partitions on 4k, because of 422 /* This is so fdisk will align partitions on 4k, because of
424 * direct_access API needing 4k alignment, returning a PFN 423 * direct_access API needing 4k alignment, returning a PFN
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index cd375503f7b0..02a611993bb4 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1864,7 +1864,8 @@ static void cciss_softirq_done(struct request *rq)
1864 /* set the residual count for pc requests */ 1864 /* set the residual count for pc requests */
1865 if (blk_rq_is_passthrough(rq)) 1865 if (blk_rq_is_passthrough(rq))
1866 scsi_req(rq)->resid_len = c->err_info->ResidualCnt; 1866 scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
1867 blk_end_request_all(rq, scsi_req(rq)->result ? -EIO : 0); 1867 blk_end_request_all(rq, scsi_req(rq)->result ?
1868 BLK_STS_IOERR : BLK_STS_OK);
1868 1869
1869 spin_lock_irqsave(&h->lock, flags); 1870 spin_lock_irqsave(&h->lock, flags);
1870 cmd_free(h, c); 1871 cmd_free(h, c);
@@ -1956,6 +1957,7 @@ static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1956 disk->queue->cmd_size = sizeof(struct scsi_request); 1957 disk->queue->cmd_size = sizeof(struct scsi_request);
1957 disk->queue->request_fn = do_cciss_request; 1958 disk->queue->request_fn = do_cciss_request;
1958 disk->queue->queue_lock = &h->lock; 1959 disk->queue->queue_lock = &h->lock;
1960 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, disk->queue);
1959 if (blk_init_allocated_queue(disk->queue) < 0) 1961 if (blk_init_allocated_queue(disk->queue) < 0)
1960 goto cleanup_queue; 1962 goto cleanup_queue;
1961 1963
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 8d7bcfa49c12..e02c45cd3c5a 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -178,7 +178,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
178 else 178 else
179 submit_bio(bio); 179 submit_bio(bio);
180 wait_until_done_or_force_detached(device, bdev, &device->md_io.done); 180 wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
181 if (!bio->bi_error) 181 if (!bio->bi_status)
182 err = device->md_io.error; 182 err = device->md_io.error;
183 183
184 out: 184 out:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a804a4107fbc..809fd245c3dc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -959,16 +959,16 @@ static void drbd_bm_endio(struct bio *bio)
959 !bm_test_page_unchanged(b->bm_pages[idx])) 959 !bm_test_page_unchanged(b->bm_pages[idx]))
960 drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); 960 drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx);
961 961
962 if (bio->bi_error) { 962 if (bio->bi_status) {
963 /* ctx error will hold the completed-last non-zero error code, 963 /* ctx error will hold the completed-last non-zero error code,
964 * in case error codes differ. */ 964 * in case error codes differ. */
965 ctx->error = bio->bi_error; 965 ctx->error = blk_status_to_errno(bio->bi_status);
966 bm_set_page_io_err(b->bm_pages[idx]); 966 bm_set_page_io_err(b->bm_pages[idx]);
967 /* Not identical to on disk version of it. 967 /* Not identical to on disk version of it.
968 * Is BM_PAGE_IO_ERROR enough? */ 968 * Is BM_PAGE_IO_ERROR enough? */
969 if (__ratelimit(&drbd_ratelimit_state)) 969 if (__ratelimit(&drbd_ratelimit_state))
970 drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", 970 drbd_err(device, "IO ERROR %d on bitmap page idx %u\n",
971 bio->bi_error, idx); 971 bio->bi_status, idx);
972 } else { 972 } else {
973 bm_clear_page_io_err(b->bm_pages[idx]); 973 bm_clear_page_io_err(b->bm_pages[idx]);
974 dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); 974 dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d5da45bb03a6..d17b6e6393c7 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1441,6 +1441,9 @@ extern struct bio_set *drbd_md_io_bio_set;
1441/* to allocate from that set */ 1441/* to allocate from that set */
1442extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); 1442extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
1443 1443
1444/* And a bio_set for cloning */
1445extern struct bio_set *drbd_io_bio_set;
1446
1444extern struct mutex resources_mutex; 1447extern struct mutex resources_mutex;
1445 1448
1446extern int conn_lowest_minor(struct drbd_connection *connection); 1449extern int conn_lowest_minor(struct drbd_connection *connection);
@@ -1627,7 +1630,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
1627 __release(local); 1630 __release(local);
1628 if (!bio->bi_bdev) { 1631 if (!bio->bi_bdev) {
1629 drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); 1632 drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
1630 bio->bi_error = -ENODEV; 1633 bio->bi_status = BLK_STS_IOERR;
1631 bio_endio(bio); 1634 bio_endio(bio);
1632 return; 1635 return;
1633 } 1636 }
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 84455c365f57..5fb99e06ebe4 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -128,6 +128,7 @@ mempool_t *drbd_request_mempool;
128mempool_t *drbd_ee_mempool; 128mempool_t *drbd_ee_mempool;
129mempool_t *drbd_md_io_page_pool; 129mempool_t *drbd_md_io_page_pool;
130struct bio_set *drbd_md_io_bio_set; 130struct bio_set *drbd_md_io_bio_set;
131struct bio_set *drbd_io_bio_set;
131 132
132/* I do not use a standard mempool, because: 133/* I do not use a standard mempool, because:
133 1) I want to hand out the pre-allocated objects first. 134 1) I want to hand out the pre-allocated objects first.
@@ -2098,6 +2099,8 @@ static void drbd_destroy_mempools(void)
2098 2099
2099 /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */ 2100 /* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
2100 2101
2102 if (drbd_io_bio_set)
2103 bioset_free(drbd_io_bio_set);
2101 if (drbd_md_io_bio_set) 2104 if (drbd_md_io_bio_set)
2102 bioset_free(drbd_md_io_bio_set); 2105 bioset_free(drbd_md_io_bio_set);
2103 if (drbd_md_io_page_pool) 2106 if (drbd_md_io_page_pool)
@@ -2115,6 +2118,7 @@ static void drbd_destroy_mempools(void)
2115 if (drbd_al_ext_cache) 2118 if (drbd_al_ext_cache)
2116 kmem_cache_destroy(drbd_al_ext_cache); 2119 kmem_cache_destroy(drbd_al_ext_cache);
2117 2120
2121 drbd_io_bio_set = NULL;
2118 drbd_md_io_bio_set = NULL; 2122 drbd_md_io_bio_set = NULL;
2119 drbd_md_io_page_pool = NULL; 2123 drbd_md_io_page_pool = NULL;
2120 drbd_ee_mempool = NULL; 2124 drbd_ee_mempool = NULL;
@@ -2142,6 +2146,7 @@ static int drbd_create_mempools(void)
2142 drbd_pp_pool = NULL; 2146 drbd_pp_pool = NULL;
2143 drbd_md_io_page_pool = NULL; 2147 drbd_md_io_page_pool = NULL;
2144 drbd_md_io_bio_set = NULL; 2148 drbd_md_io_bio_set = NULL;
2149 drbd_io_bio_set = NULL;
2145 2150
2146 /* caches */ 2151 /* caches */
2147 drbd_request_cache = kmem_cache_create( 2152 drbd_request_cache = kmem_cache_create(
@@ -2165,7 +2170,13 @@ static int drbd_create_mempools(void)
2165 goto Enomem; 2170 goto Enomem;
2166 2171
2167 /* mempools */ 2172 /* mempools */
2168 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); 2173 drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_RESCUER);
2174 if (drbd_io_bio_set == NULL)
2175 goto Enomem;
2176
2177 drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
2178 BIOSET_NEED_BVECS |
2179 BIOSET_NEED_RESCUER);
2169 if (drbd_md_io_bio_set == NULL) 2180 if (drbd_md_io_bio_set == NULL)
2170 goto Enomem; 2181 goto Enomem;
2171 2182
@@ -2839,7 +2850,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2839 /* Setting the max_hw_sectors to an odd value of 8kibyte here 2850 /* Setting the max_hw_sectors to an odd value of 8kibyte here
2840 This triggers a max_bio_size message upon first attach or connect */ 2851 This triggers a max_bio_size message upon first attach or connect */
2841 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); 2852 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
2842 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
2843 q->queue_lock = &resource->req_lock; 2853 q->queue_lock = &resource->req_lock;
2844 2854
2845 device->md_io.page = alloc_page(GFP_KERNEL); 2855 device->md_io.page = alloc_page(GFP_KERNEL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 02255a0d68b9..ad0fcb43e45c 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2294,7 +2294,7 @@ _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_
2294static enum drbd_ret_code 2294static enum drbd_ret_code
2295check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf) 2295check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2296{ 2296{
2297 static enum drbd_ret_code rv; 2297 enum drbd_ret_code rv;
2298 struct drbd_peer_device *peer_device; 2298 struct drbd_peer_device *peer_device;
2299 int i; 2299 int i;
2300 2300
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 1b0a2be24f39..c7e95e6380fb 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1229,9 +1229,9 @@ void one_flush_endio(struct bio *bio)
1229 struct drbd_device *device = octx->device; 1229 struct drbd_device *device = octx->device;
1230 struct issue_flush_context *ctx = octx->ctx; 1230 struct issue_flush_context *ctx = octx->ctx;
1231 1231
1232 if (bio->bi_error) { 1232 if (bio->bi_status) {
1233 ctx->error = bio->bi_error; 1233 ctx->error = blk_status_to_errno(bio->bi_status);
1234 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error); 1234 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_status);
1235 } 1235 }
1236 kfree(octx); 1236 kfree(octx);
1237 bio_put(bio); 1237 bio_put(bio);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 656624314f0d..f6e865b2d543 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -203,7 +203,7 @@ void start_new_tl_epoch(struct drbd_connection *connection)
203void complete_master_bio(struct drbd_device *device, 203void complete_master_bio(struct drbd_device *device,
204 struct bio_and_error *m) 204 struct bio_and_error *m)
205{ 205{
206 m->bio->bi_error = m->error; 206 m->bio->bi_status = errno_to_blk_status(m->error);
207 bio_endio(m->bio); 207 bio_endio(m->bio);
208 dec_ap_bio(device); 208 dec_ap_bio(device);
209} 209}
@@ -1157,7 +1157,7 @@ static void drbd_process_discard_req(struct drbd_request *req)
1157 1157
1158 if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9, 1158 if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
1159 GFP_NOIO, 0)) 1159 GFP_NOIO, 0))
1160 req->private_bio->bi_error = -EIO; 1160 req->private_bio->bi_status = BLK_STS_IOERR;
1161 bio_endio(req->private_bio); 1161 bio_endio(req->private_bio);
1162} 1162}
1163 1163
@@ -1225,7 +1225,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
1225 /* only pass the error to the upper layers. 1225 /* only pass the error to the upper layers.
1226 * if user cannot handle io errors, that's not our business. */ 1226 * if user cannot handle io errors, that's not our business. */
1227 drbd_err(device, "could not kmalloc() req\n"); 1227 drbd_err(device, "could not kmalloc() req\n");
1228 bio->bi_error = -ENOMEM; 1228 bio->bi_status = BLK_STS_RESOURCE;
1229 bio_endio(bio); 1229 bio_endio(bio);
1230 return ERR_PTR(-ENOMEM); 1230 return ERR_PTR(-ENOMEM);
1231 } 1231 }
@@ -1560,7 +1560,7 @@ blk_qc_t drbd_make_request(struct request_queue *q, struct bio *bio)
1560 struct drbd_device *device = (struct drbd_device *) q->queuedata; 1560 struct drbd_device *device = (struct drbd_device *) q->queuedata;
1561 unsigned long start_jif; 1561 unsigned long start_jif;
1562 1562
1563 blk_queue_split(q, &bio, q->bio_split); 1563 blk_queue_split(q, &bio);
1564 1564
1565 start_jif = jiffies; 1565 start_jif = jiffies;
1566 1566
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index eb49e7f2da91..9e1866ab238f 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -263,7 +263,7 @@ enum drbd_req_state_bits {
263static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src) 263static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
264{ 264{
265 struct bio *bio; 265 struct bio *bio;
266 bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ 266 bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
267 267
268 req->private_bio = bio; 268 req->private_bio = bio;
269 269
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1afcb4e02d8d..1d8726a8df34 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -63,7 +63,7 @@ void drbd_md_endio(struct bio *bio)
63 struct drbd_device *device; 63 struct drbd_device *device;
64 64
65 device = bio->bi_private; 65 device = bio->bi_private;
66 device->md_io.error = bio->bi_error; 66 device->md_io.error = blk_status_to_errno(bio->bi_status);
67 67
68 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 68 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
69 * to timeout on the lower level device, and eventually detach from it. 69 * to timeout on the lower level device, and eventually detach from it.
@@ -177,13 +177,13 @@ void drbd_peer_request_endio(struct bio *bio)
177 bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES || 177 bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
178 bio_op(bio) == REQ_OP_DISCARD; 178 bio_op(bio) == REQ_OP_DISCARD;
179 179
180 if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) 180 if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
181 drbd_warn(device, "%s: error=%d s=%llus\n", 181 drbd_warn(device, "%s: error=%d s=%llus\n",
182 is_write ? (is_discard ? "discard" : "write") 182 is_write ? (is_discard ? "discard" : "write")
183 : "read", bio->bi_error, 183 : "read", bio->bi_status,
184 (unsigned long long)peer_req->i.sector); 184 (unsigned long long)peer_req->i.sector);
185 185
186 if (bio->bi_error) 186 if (bio->bi_status)
187 set_bit(__EE_WAS_ERROR, &peer_req->flags); 187 set_bit(__EE_WAS_ERROR, &peer_req->flags);
188 188
189 bio_put(bio); /* no need for the bio anymore */ 189 bio_put(bio); /* no need for the bio anymore */
@@ -243,16 +243,16 @@ void drbd_request_endio(struct bio *bio)
243 if (__ratelimit(&drbd_ratelimit_state)) 243 if (__ratelimit(&drbd_ratelimit_state))
244 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); 244 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
245 245
246 if (!bio->bi_error) 246 if (!bio->bi_status)
247 drbd_panic_after_delayed_completion_of_aborted_request(device); 247 drbd_panic_after_delayed_completion_of_aborted_request(device);
248 } 248 }
249 249
250 /* to avoid recursion in __req_mod */ 250 /* to avoid recursion in __req_mod */
251 if (unlikely(bio->bi_error)) { 251 if (unlikely(bio->bi_status)) {
252 switch (bio_op(bio)) { 252 switch (bio_op(bio)) {
253 case REQ_OP_WRITE_ZEROES: 253 case REQ_OP_WRITE_ZEROES:
254 case REQ_OP_DISCARD: 254 case REQ_OP_DISCARD:
255 if (bio->bi_error == -EOPNOTSUPP) 255 if (bio->bi_status == BLK_STS_NOTSUPP)
256 what = DISCARD_COMPLETED_NOTSUPP; 256 what = DISCARD_COMPLETED_NOTSUPP;
257 else 257 else
258 what = DISCARD_COMPLETED_WITH_ERROR; 258 what = DISCARD_COMPLETED_WITH_ERROR;
@@ -272,7 +272,7 @@ void drbd_request_endio(struct bio *bio)
272 } 272 }
273 273
274 bio_put(req->private_bio); 274 bio_put(req->private_bio);
275 req->private_bio = ERR_PTR(bio->bi_error); 275 req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
276 276
277 /* not req_mod(), we need irqsave here! */ 277 /* not req_mod(), we need irqsave here! */
278 spin_lock_irqsave(&device->resource->req_lock, flags); 278 spin_lock_irqsave(&device->resource->req_lock, flags);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 60d4c7653178..ce823647a9c4 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2202,7 +2202,7 @@ static int do_format(int drive, struct format_descr *tmp_format_req)
2202 * ============================= 2202 * =============================
2203 */ 2203 */
2204 2204
2205static void floppy_end_request(struct request *req, int error) 2205static void floppy_end_request(struct request *req, blk_status_t error)
2206{ 2206{
2207 unsigned int nr_sectors = current_count_sectors; 2207 unsigned int nr_sectors = current_count_sectors;
2208 unsigned int drive = (unsigned long)req->rq_disk->private_data; 2208 unsigned int drive = (unsigned long)req->rq_disk->private_data;
@@ -2263,7 +2263,7 @@ static void request_done(int uptodate)
2263 DRWE->last_error_generation = DRS->generation; 2263 DRWE->last_error_generation = DRS->generation;
2264 } 2264 }
2265 spin_lock_irqsave(q->queue_lock, flags); 2265 spin_lock_irqsave(q->queue_lock, flags);
2266 floppy_end_request(req, -EIO); 2266 floppy_end_request(req, BLK_STS_IOERR);
2267 spin_unlock_irqrestore(q->queue_lock, flags); 2267 spin_unlock_irqrestore(q->queue_lock, flags);
2268 } 2268 }
2269} 2269}
@@ -3780,9 +3780,9 @@ static void floppy_rb0_cb(struct bio *bio)
3780 struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private; 3780 struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
3781 int drive = cbdata->drive; 3781 int drive = cbdata->drive;
3782 3782
3783 if (bio->bi_error) { 3783 if (bio->bi_status) {
3784 pr_info("floppy: error %d while reading block 0\n", 3784 pr_info("floppy: error %d while reading block 0\n",
3785 bio->bi_error); 3785 bio->bi_status);
3786 set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); 3786 set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
3787 } 3787 }
3788 complete(&cbdata->complete); 3788 complete(&cbdata->complete);
@@ -4203,6 +4203,7 @@ static int __init do_floppy_init(void)
4203 goto out_put_disk; 4203 goto out_put_disk;
4204 } 4204 }
4205 4205
4206 blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH);
4206 blk_queue_max_hw_sectors(disks[drive]->queue, 64); 4207 blk_queue_max_hw_sectors(disks[drive]->queue, 64);
4207 disks[drive]->major = FLOPPY_MAJOR; 4208 disks[drive]->major = FLOPPY_MAJOR;
4208 disks[drive]->first_minor = TOMINOR(drive); 4209 disks[drive]->first_minor = TOMINOR(drive);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ebbd0c3fe0ed..0de11444e317 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,7 +221,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
221} 221}
222 222
223static int 223static int
224figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) 224figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
225 loff_t logical_blocksize)
225{ 226{
226 loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); 227 loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
227 sector_t x = (sector_t)size; 228 sector_t x = (sector_t)size;
@@ -233,6 +234,12 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
233 lo->lo_offset = offset; 234 lo->lo_offset = offset;
234 if (lo->lo_sizelimit != sizelimit) 235 if (lo->lo_sizelimit != sizelimit)
235 lo->lo_sizelimit = sizelimit; 236 lo->lo_sizelimit = sizelimit;
237 if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
238 lo->lo_logical_blocksize = logical_blocksize;
239 blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
240 blk_queue_logical_block_size(lo->lo_queue,
241 lo->lo_logical_blocksize);
242 }
236 set_capacity(lo->lo_disk, x); 243 set_capacity(lo->lo_disk, x);
237 bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); 244 bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
238 /* let user-space know about the new size */ 245 /* let user-space know about the new size */
@@ -457,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
457 zero_fill_bio(bio); 464 zero_fill_bio(bio);
458 } 465 }
459 466
460 blk_mq_end_request(rq, cmd->ret < 0 ? -EIO : 0); 467 blk_mq_end_request(rq, cmd->ret < 0 ? BLK_STS_IOERR : BLK_STS_OK);
461} 468}
462 469
463static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) 470static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
@@ -813,6 +820,7 @@ static void loop_config_discard(struct loop_device *lo)
813 struct file *file = lo->lo_backing_file; 820 struct file *file = lo->lo_backing_file;
814 struct inode *inode = file->f_mapping->host; 821 struct inode *inode = file->f_mapping->host;
815 struct request_queue *q = lo->lo_queue; 822 struct request_queue *q = lo->lo_queue;
823 int lo_bits = 9;
816 824
817 /* 825 /*
818 * We use punch hole to reclaim the free space used by the 826 * We use punch hole to reclaim the free space used by the
@@ -832,8 +840,11 @@ static void loop_config_discard(struct loop_device *lo)
832 840
833 q->limits.discard_granularity = inode->i_sb->s_blocksize; 841 q->limits.discard_granularity = inode->i_sb->s_blocksize;
834 q->limits.discard_alignment = 0; 842 q->limits.discard_alignment = 0;
835 blk_queue_max_discard_sectors(q, UINT_MAX >> 9); 843 if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
836 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); 844 lo_bits = blksize_bits(lo->lo_logical_blocksize);
845
846 blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
847 blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
837 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 848 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
838} 849}
839 850
@@ -843,10 +854,16 @@ static void loop_unprepare_queue(struct loop_device *lo)
843 kthread_stop(lo->worker_task); 854 kthread_stop(lo->worker_task);
844} 855}
845 856
857static int loop_kthread_worker_fn(void *worker_ptr)
858{
859 current->flags |= PF_LESS_THROTTLE;
860 return kthread_worker_fn(worker_ptr);
861}
862
846static int loop_prepare_queue(struct loop_device *lo) 863static int loop_prepare_queue(struct loop_device *lo)
847{ 864{
848 kthread_init_worker(&lo->worker); 865 kthread_init_worker(&lo->worker);
849 lo->worker_task = kthread_run(kthread_worker_fn, 866 lo->worker_task = kthread_run(loop_kthread_worker_fn,
850 &lo->worker, "loop%d", lo->lo_number); 867 &lo->worker, "loop%d", lo->lo_number);
851 if (IS_ERR(lo->worker_task)) 868 if (IS_ERR(lo->worker_task))
852 return -ENOMEM; 869 return -ENOMEM;
@@ -921,6 +938,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
921 938
922 lo->use_dio = false; 939 lo->use_dio = false;
923 lo->lo_blocksize = lo_blocksize; 940 lo->lo_blocksize = lo_blocksize;
941 lo->lo_logical_blocksize = 512;
924 lo->lo_device = bdev; 942 lo->lo_device = bdev;
925 lo->lo_flags = lo_flags; 943 lo->lo_flags = lo_flags;
926 lo->lo_backing_file = file; 944 lo->lo_backing_file = file;
@@ -1086,6 +1104,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1086 int err; 1104 int err;
1087 struct loop_func_table *xfer; 1105 struct loop_func_table *xfer;
1088 kuid_t uid = current_uid(); 1106 kuid_t uid = current_uid();
1107 int lo_flags = lo->lo_flags;
1089 1108
1090 if (lo->lo_encrypt_key_size && 1109 if (lo->lo_encrypt_key_size &&
1091 !uid_eq(lo->lo_key_owner, uid) && 1110 !uid_eq(lo->lo_key_owner, uid) &&
@@ -1118,12 +1137,30 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1118 if (err) 1137 if (err)
1119 goto exit; 1138 goto exit;
1120 1139
1140 if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
1141 if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
1142 lo->lo_logical_blocksize = 512;
1143 lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
1144 if (LO_INFO_BLOCKSIZE(info) != 512 &&
1145 LO_INFO_BLOCKSIZE(info) != 1024 &&
1146 LO_INFO_BLOCKSIZE(info) != 2048 &&
1147 LO_INFO_BLOCKSIZE(info) != 4096)
1148 return -EINVAL;
1149 if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
1150 return -EINVAL;
1151 }
1152
1121 if (lo->lo_offset != info->lo_offset || 1153 if (lo->lo_offset != info->lo_offset ||
1122 lo->lo_sizelimit != info->lo_sizelimit) 1154 lo->lo_sizelimit != info->lo_sizelimit ||
1123 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { 1155 lo->lo_flags != lo_flags ||
1156 ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
1157 lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
1158 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
1159 LO_INFO_BLOCKSIZE(info))) {
1124 err = -EFBIG; 1160 err = -EFBIG;
1125 goto exit; 1161 goto exit;
1126 } 1162 }
1163 }
1127 1164
1128 loop_config_discard(lo); 1165 loop_config_discard(lo);
1129 1166
@@ -1306,12 +1343,13 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1306 return err; 1343 return err;
1307} 1344}
1308 1345
1309static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) 1346static int loop_set_capacity(struct loop_device *lo)
1310{ 1347{
1311 if (unlikely(lo->lo_state != Lo_bound)) 1348 if (unlikely(lo->lo_state != Lo_bound))
1312 return -ENXIO; 1349 return -ENXIO;
1313 1350
1314 return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); 1351 return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
1352 lo->lo_logical_blocksize);
1315} 1353}
1316 1354
1317static int loop_set_dio(struct loop_device *lo, unsigned long arg) 1355static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1369,7 +1407,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1369 case LOOP_SET_CAPACITY: 1407 case LOOP_SET_CAPACITY:
1370 err = -EPERM; 1408 err = -EPERM;
1371 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1409 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1372 err = loop_set_capacity(lo, bdev); 1410 err = loop_set_capacity(lo);
1373 break; 1411 break;
1374 case LOOP_SET_DIRECT_IO: 1412 case LOOP_SET_DIRECT_IO:
1375 err = -EPERM; 1413 err = -EPERM;
@@ -1645,7 +1683,7 @@ int loop_unregister_transfer(int number)
1645EXPORT_SYMBOL(loop_register_transfer); 1683EXPORT_SYMBOL(loop_register_transfer);
1646EXPORT_SYMBOL(loop_unregister_transfer); 1684EXPORT_SYMBOL(loop_unregister_transfer);
1647 1685
1648static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, 1686static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1649 const struct blk_mq_queue_data *bd) 1687 const struct blk_mq_queue_data *bd)
1650{ 1688{
1651 struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 1689 struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -1654,7 +1692,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1654 blk_mq_start_request(bd->rq); 1692 blk_mq_start_request(bd->rq);
1655 1693
1656 if (lo->lo_state != Lo_bound) 1694 if (lo->lo_state != Lo_bound)
1657 return BLK_MQ_RQ_QUEUE_ERROR; 1695 return BLK_STS_IOERR;
1658 1696
1659 switch (req_op(cmd->rq)) { 1697 switch (req_op(cmd->rq)) {
1660 case REQ_OP_FLUSH: 1698 case REQ_OP_FLUSH:
@@ -1669,7 +1707,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1669 1707
1670 kthread_queue_work(&lo->worker, &cmd->work); 1708 kthread_queue_work(&lo->worker, &cmd->work);
1671 1709
1672 return BLK_MQ_RQ_QUEUE_OK; 1710 return BLK_STS_OK;
1673} 1711}
1674 1712
1675static void loop_handle_cmd(struct loop_cmd *cmd) 1713static void loop_handle_cmd(struct loop_cmd *cmd)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index fecd3f97ef8c..2c096b9a17b8 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,6 +49,7 @@ struct loop_device {
49 struct file * lo_backing_file; 49 struct file * lo_backing_file;
50 struct block_device *lo_device; 50 struct block_device *lo_device;
51 unsigned lo_blocksize; 51 unsigned lo_blocksize;
52 unsigned lo_logical_blocksize;
52 void *key_data; 53 void *key_data;
53 54
54 gfp_t old_gfp_mask; 55 gfp_t old_gfp_mask;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3a779a4f5653..61b046f256ca 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -532,7 +532,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
532static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, 532static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
533 struct smart_attr *attrib); 533 struct smart_attr *attrib);
534 534
535static void mtip_complete_command(struct mtip_cmd *cmd, int status) 535static void mtip_complete_command(struct mtip_cmd *cmd, blk_status_t status)
536{ 536{
537 struct request *req = blk_mq_rq_from_pdu(cmd); 537 struct request *req = blk_mq_rq_from_pdu(cmd);
538 538
@@ -568,7 +568,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
568 if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { 568 if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
569 cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); 569 cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
570 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); 570 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
571 mtip_complete_command(cmd, -EIO); 571 mtip_complete_command(cmd, BLK_STS_IOERR);
572 return; 572 return;
573 } 573 }
574 574
@@ -667,7 +667,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
667 tag, 667 tag,
668 fail_reason != NULL ? 668 fail_reason != NULL ?
669 fail_reason : "unknown"); 669 fail_reason : "unknown");
670 mtip_complete_command(cmd, -ENODATA); 670 mtip_complete_command(cmd, BLK_STS_MEDIUM);
671 continue; 671 continue;
672 } 672 }
673 } 673 }
@@ -690,7 +690,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
690 dev_warn(&port->dd->pdev->dev, 690 dev_warn(&port->dd->pdev->dev,
691 "retiring tag %d\n", tag); 691 "retiring tag %d\n", tag);
692 692
693 mtip_complete_command(cmd, -EIO); 693 mtip_complete_command(cmd, BLK_STS_IOERR);
694 } 694 }
695 } 695 }
696 print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt); 696 print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
@@ -1063,23 +1063,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
1063 /* insert request and run queue */ 1063 /* insert request and run queue */
1064 blk_execute_rq(rq->q, NULL, rq, true); 1064 blk_execute_rq(rq->q, NULL, rq, true);
1065 1065
1066 rv = int_cmd->status; 1066 if (int_cmd->status) {
1067 if (rv < 0) { 1067 dev_err(&dd->pdev->dev, "Internal command [%02X] failed %d\n",
1068 if (rv == -ERESTARTSYS) { /* interrupted */ 1068 fis->command, int_cmd->status);
1069 dev_err(&dd->pdev->dev, 1069 rv = -EIO;
1070 "Internal command [%02X] was interrupted after %u ms\n",
1071 fis->command,
1072 jiffies_to_msecs(jiffies - start));
1073 rv = -EINTR;
1074 goto exec_ic_exit;
1075 } else if (rv == 0) /* timeout */
1076 dev_err(&dd->pdev->dev,
1077 "Internal command did not complete [%02X] within timeout of %lu ms\n",
1078 fis->command, timeout);
1079 else
1080 dev_err(&dd->pdev->dev,
1081 "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
1082 fis->command, rv, timeout);
1083 1070
1084 if (mtip_check_surprise_removal(dd->pdev) || 1071 if (mtip_check_surprise_removal(dd->pdev) ||
1085 test_bit(MTIP_DDF_REMOVE_PENDING_BIT, 1072 test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
@@ -2753,7 +2740,7 @@ static void mtip_abort_cmd(struct request *req, void *data,
2753 dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag); 2740 dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
2754 2741
2755 clear_bit(req->tag, dd->port->cmds_to_issue); 2742 clear_bit(req->tag, dd->port->cmds_to_issue);
2756 cmd->status = -EIO; 2743 cmd->status = BLK_STS_IOERR;
2757 mtip_softirq_done_fn(req); 2744 mtip_softirq_done_fn(req);
2758} 2745}
2759 2746
@@ -3597,7 +3584,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
3597 int err; 3584 int err;
3598 3585
3599 err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); 3586 err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
3600 blk_mq_end_request(rq, err); 3587 blk_mq_end_request(rq, err ? BLK_STS_IOERR : BLK_STS_OK);
3601 return 0; 3588 return 0;
3602 } 3589 }
3603 3590
@@ -3633,8 +3620,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
3633 return false; 3620 return false;
3634} 3621}
3635 3622
3636static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx, 3623static blk_status_t mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3637 struct request *rq) 3624 struct request *rq)
3638{ 3625{
3639 struct driver_data *dd = hctx->queue->queuedata; 3626 struct driver_data *dd = hctx->queue->queuedata;
3640 struct mtip_int_cmd *icmd = rq->special; 3627 struct mtip_int_cmd *icmd = rq->special;
@@ -3642,7 +3629,7 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3642 struct mtip_cmd_sg *command_sg; 3629 struct mtip_cmd_sg *command_sg;
3643 3630
3644 if (mtip_commands_active(dd->port)) 3631 if (mtip_commands_active(dd->port))
3645 return BLK_MQ_RQ_QUEUE_BUSY; 3632 return BLK_STS_RESOURCE;
3646 3633
3647 /* Populate the SG list */ 3634 /* Populate the SG list */
3648 cmd->command_header->opts = 3635 cmd->command_header->opts =
@@ -3666,10 +3653,10 @@ static int mtip_issue_reserved_cmd(struct blk_mq_hw_ctx *hctx,
3666 3653
3667 blk_mq_start_request(rq); 3654 blk_mq_start_request(rq);
3668 mtip_issue_non_ncq_command(dd->port, rq->tag); 3655 mtip_issue_non_ncq_command(dd->port, rq->tag);
3669 return BLK_MQ_RQ_QUEUE_OK; 3656 return 0;
3670} 3657}
3671 3658
3672static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, 3659static blk_status_t mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
3673 const struct blk_mq_queue_data *bd) 3660 const struct blk_mq_queue_data *bd)
3674{ 3661{
3675 struct request *rq = bd->rq; 3662 struct request *rq = bd->rq;
@@ -3681,15 +3668,14 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
3681 return mtip_issue_reserved_cmd(hctx, rq); 3668 return mtip_issue_reserved_cmd(hctx, rq);
3682 3669
3683 if (unlikely(mtip_check_unal_depth(hctx, rq))) 3670 if (unlikely(mtip_check_unal_depth(hctx, rq)))
3684 return BLK_MQ_RQ_QUEUE_BUSY; 3671 return BLK_STS_RESOURCE;
3685 3672
3686 blk_mq_start_request(rq); 3673 blk_mq_start_request(rq);
3687 3674
3688 ret = mtip_submit_request(hctx, rq); 3675 ret = mtip_submit_request(hctx, rq);
3689 if (likely(!ret)) 3676 if (likely(!ret))
3690 return BLK_MQ_RQ_QUEUE_OK; 3677 return BLK_STS_OK;
3691 3678 return BLK_STS_IOERR;
3692 return BLK_MQ_RQ_QUEUE_ERROR;
3693} 3679}
3694 3680
3695static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq, 3681static void mtip_free_cmd(struct blk_mq_tag_set *set, struct request *rq,
@@ -3730,7 +3716,7 @@ static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req,
3730 if (reserved) { 3716 if (reserved) {
3731 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); 3717 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
3732 3718
3733 cmd->status = -ETIME; 3719 cmd->status = BLK_STS_TIMEOUT;
3734 return BLK_EH_HANDLED; 3720 return BLK_EH_HANDLED;
3735 } 3721 }
3736 3722
@@ -3961,7 +3947,7 @@ static void mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv)
3961{ 3947{
3962 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); 3948 struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
3963 3949
3964 cmd->status = -ENODEV; 3950 cmd->status = BLK_STS_IOERR;
3965 blk_mq_complete_request(rq); 3951 blk_mq_complete_request(rq);
3966} 3952}
3967 3953
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 37b8e3e0bb78..e8286af50e16 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -342,7 +342,7 @@ struct mtip_cmd {
342 int retries; /* The number of retries left for this command. */ 342 int retries; /* The number of retries left for this command. */
343 343
344 int direction; /* Data transfer direction */ 344 int direction; /* Data transfer direction */
345 int status; 345 blk_status_t status;
346}; 346};
347 347
348/* Structure used to describe a port. */ 348/* Structure used to describe a port. */
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f3f191ba8ca4..977ec960dd2f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -116,7 +116,7 @@ struct nbd_cmd {
116 int index; 116 int index;
117 int cookie; 117 int cookie;
118 struct completion send_complete; 118 struct completion send_complete;
119 int status; 119 blk_status_t status;
120}; 120};
121 121
122#if IS_ENABLED(CONFIG_DEBUG_FS) 122#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -286,7 +286,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
286 struct nbd_config *config; 286 struct nbd_config *config;
287 287
288 if (!refcount_inc_not_zero(&nbd->config_refs)) { 288 if (!refcount_inc_not_zero(&nbd->config_refs)) {
289 cmd->status = -EIO; 289 cmd->status = BLK_STS_TIMEOUT;
290 return BLK_EH_HANDLED; 290 return BLK_EH_HANDLED;
291 } 291 }
292 292
@@ -331,7 +331,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
331 "Connection timed out\n"); 331 "Connection timed out\n");
332 } 332 }
333 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 333 set_bit(NBD_TIMEDOUT, &config->runtime_flags);
334 cmd->status = -EIO; 334 cmd->status = BLK_STS_IOERR;
335 sock_shutdown(nbd); 335 sock_shutdown(nbd);
336 nbd_config_put(nbd); 336 nbd_config_put(nbd);
337 337
@@ -400,6 +400,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
400 unsigned long size = blk_rq_bytes(req); 400 unsigned long size = blk_rq_bytes(req);
401 struct bio *bio; 401 struct bio *bio;
402 u32 type; 402 u32 type;
403 u32 nbd_cmd_flags = 0;
403 u32 tag = blk_mq_unique_tag(req); 404 u32 tag = blk_mq_unique_tag(req);
404 int sent = nsock->sent, skip = 0; 405 int sent = nsock->sent, skip = 0;
405 406
@@ -429,6 +430,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
429 return -EIO; 430 return -EIO;
430 } 431 }
431 432
433 if (req->cmd_flags & REQ_FUA)
434 nbd_cmd_flags |= NBD_CMD_FLAG_FUA;
435
432 /* We did a partial send previously, and we at least sent the whole 436 /* We did a partial send previously, and we at least sent the whole
433 * request struct, so just go and send the rest of the pages in the 437 * request struct, so just go and send the rest of the pages in the
434 * request. 438 * request.
@@ -442,7 +446,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
442 } 446 }
443 cmd->index = index; 447 cmd->index = index;
444 cmd->cookie = nsock->cookie; 448 cmd->cookie = nsock->cookie;
445 request.type = htonl(type); 449 request.type = htonl(type | nbd_cmd_flags);
446 if (type != NBD_CMD_FLUSH) { 450 if (type != NBD_CMD_FLUSH) {
447 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 451 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
448 request.len = htonl(size); 452 request.len = htonl(size);
@@ -465,7 +469,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
465 nsock->pending = req; 469 nsock->pending = req;
466 nsock->sent = sent; 470 nsock->sent = sent;
467 } 471 }
468 return BLK_MQ_RQ_QUEUE_BUSY; 472 return BLK_STS_RESOURCE;
469 } 473 }
470 dev_err_ratelimited(disk_to_dev(nbd->disk), 474 dev_err_ratelimited(disk_to_dev(nbd->disk),
471 "Send control failed (result %d)\n", result); 475 "Send control failed (result %d)\n", result);
@@ -506,7 +510,7 @@ send_pages:
506 */ 510 */
507 nsock->pending = req; 511 nsock->pending = req;
508 nsock->sent = sent; 512 nsock->sent = sent;
509 return BLK_MQ_RQ_QUEUE_BUSY; 513 return BLK_STS_RESOURCE;
510 } 514 }
511 dev_err(disk_to_dev(nbd->disk), 515 dev_err(disk_to_dev(nbd->disk),
512 "Send data failed (result %d)\n", 516 "Send data failed (result %d)\n",
@@ -574,7 +578,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
574 if (ntohl(reply.error)) { 578 if (ntohl(reply.error)) {
575 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 579 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
576 ntohl(reply.error)); 580 ntohl(reply.error));
577 cmd->status = -EIO; 581 cmd->status = BLK_STS_IOERR;
578 return cmd; 582 return cmd;
579 } 583 }
580 584
@@ -599,7 +603,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
599 */ 603 */
600 if (nbd_disconnected(config) || 604 if (nbd_disconnected(config) ||
601 config->num_connections <= 1) { 605 config->num_connections <= 1) {
602 cmd->status = -EIO; 606 cmd->status = BLK_STS_IOERR;
603 return cmd; 607 return cmd;
604 } 608 }
605 return ERR_PTR(-EIO); 609 return ERR_PTR(-EIO);
@@ -651,7 +655,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved)
651 if (!blk_mq_request_started(req)) 655 if (!blk_mq_request_started(req))
652 return; 656 return;
653 cmd = blk_mq_rq_to_pdu(req); 657 cmd = blk_mq_rq_to_pdu(req);
654 cmd->status = -EIO; 658 cmd->status = BLK_STS_IOERR;
655 blk_mq_complete_request(req); 659 blk_mq_complete_request(req);
656} 660}
657 661
@@ -740,7 +744,7 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
740 nbd_config_put(nbd); 744 nbd_config_put(nbd);
741 return -EINVAL; 745 return -EINVAL;
742 } 746 }
743 cmd->status = 0; 747 cmd->status = BLK_STS_OK;
744again: 748again:
745 nsock = config->socks[index]; 749 nsock = config->socks[index];
746 mutex_lock(&nsock->tx_lock); 750 mutex_lock(&nsock->tx_lock);
@@ -794,7 +798,7 @@ out:
794 return ret; 798 return ret;
795} 799}
796 800
797static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, 801static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
798 const struct blk_mq_queue_data *bd) 802 const struct blk_mq_queue_data *bd)
799{ 803{
800 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 804 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -818,13 +822,9 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
818 * appropriate. 822 * appropriate.
819 */ 823 */
820 ret = nbd_handle_cmd(cmd, hctx->queue_num); 824 ret = nbd_handle_cmd(cmd, hctx->queue_num);
821 if (ret < 0)
822 ret = BLK_MQ_RQ_QUEUE_ERROR;
823 if (!ret)
824 ret = BLK_MQ_RQ_QUEUE_OK;
825 complete(&cmd->send_complete); 825 complete(&cmd->send_complete);
826 826
827 return ret; 827 return ret < 0 ? BLK_STS_IOERR : BLK_STS_OK;
828} 828}
829 829
830static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, 830static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
@@ -910,6 +910,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
910 continue; 910 continue;
911 } 911 }
912 sk_set_memalloc(sock->sk); 912 sk_set_memalloc(sock->sk);
913 sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
913 atomic_inc(&config->recv_threads); 914 atomic_inc(&config->recv_threads);
914 refcount_inc(&nbd->config_refs); 915 refcount_inc(&nbd->config_refs);
915 old = nsock->sock; 916 old = nsock->sock;
@@ -957,8 +958,12 @@ static void nbd_parse_flags(struct nbd_device *nbd)
957 set_disk_ro(nbd->disk, false); 958 set_disk_ro(nbd->disk, false);
958 if (config->flags & NBD_FLAG_SEND_TRIM) 959 if (config->flags & NBD_FLAG_SEND_TRIM)
959 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 960 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
960 if (config->flags & NBD_FLAG_SEND_FLUSH) 961 if (config->flags & NBD_FLAG_SEND_FLUSH) {
961 blk_queue_write_cache(nbd->disk->queue, true, false); 962 if (config->flags & NBD_FLAG_SEND_FUA)
963 blk_queue_write_cache(nbd->disk->queue, true, true);
964 else
965 blk_queue_write_cache(nbd->disk->queue, true, false);
966 }
962 else 967 else
963 blk_queue_write_cache(nbd->disk->queue, false, false); 968 blk_queue_write_cache(nbd->disk->queue, false, false);
964} 969}
@@ -1071,6 +1076,7 @@ static int nbd_start_device(struct nbd_device *nbd)
1071 return -ENOMEM; 1076 return -ENOMEM;
1072 } 1077 }
1073 sk_set_memalloc(config->socks[i]->sock->sk); 1078 sk_set_memalloc(config->socks[i]->sock->sk);
1079 config->socks[i]->sock->sk->sk_sndtimeo = nbd->tag_set.timeout;
1074 atomic_inc(&config->recv_threads); 1080 atomic_inc(&config->recv_threads);
1075 refcount_inc(&nbd->config_refs); 1081 refcount_inc(&nbd->config_refs);
1076 INIT_WORK(&args->work, recv_work); 1082 INIT_WORK(&args->work, recv_work);
@@ -1305,6 +1311,8 @@ static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1305 seq_puts(s, "NBD_FLAG_READ_ONLY\n"); 1311 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1306 if (flags & NBD_FLAG_SEND_FLUSH) 1312 if (flags & NBD_FLAG_SEND_FLUSH)
1307 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n"); 1313 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
1314 if (flags & NBD_FLAG_SEND_FUA)
1315 seq_puts(s, "NBD_FLAG_SEND_FUA\n");
1308 if (flags & NBD_FLAG_SEND_TRIM) 1316 if (flags & NBD_FLAG_SEND_TRIM)
1309 seq_puts(s, "NBD_FLAG_SEND_TRIM\n"); 1317 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1310 1318
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index d946e1eeac8e..71f4422eba81 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -35,7 +35,8 @@ struct nullb {
35 struct request_queue *q; 35 struct request_queue *q;
36 struct gendisk *disk; 36 struct gendisk *disk;
37 struct nvm_dev *ndev; 37 struct nvm_dev *ndev;
38 struct blk_mq_tag_set tag_set; 38 struct blk_mq_tag_set *tag_set;
39 struct blk_mq_tag_set __tag_set;
39 struct hrtimer timer; 40 struct hrtimer timer;
40 unsigned int queue_depth; 41 unsigned int queue_depth;
41 spinlock_t lock; 42 spinlock_t lock;
@@ -50,6 +51,7 @@ static struct mutex lock;
50static int null_major; 51static int null_major;
51static int nullb_indexes; 52static int nullb_indexes;
52static struct kmem_cache *ppa_cache; 53static struct kmem_cache *ppa_cache;
54static struct blk_mq_tag_set tag_set;
53 55
54enum { 56enum {
55 NULL_IRQ_NONE = 0, 57 NULL_IRQ_NONE = 0,
@@ -109,7 +111,7 @@ static int bs = 512;
109module_param(bs, int, S_IRUGO); 111module_param(bs, int, S_IRUGO);
110MODULE_PARM_DESC(bs, "Block size (in bytes)"); 112MODULE_PARM_DESC(bs, "Block size (in bytes)");
111 113
112static int nr_devices = 2; 114static int nr_devices = 1;
113module_param(nr_devices, int, S_IRUGO); 115module_param(nr_devices, int, S_IRUGO);
114MODULE_PARM_DESC(nr_devices, "Number of devices to register"); 116MODULE_PARM_DESC(nr_devices, "Number of devices to register");
115 117
@@ -121,6 +123,10 @@ static bool blocking;
121module_param(blocking, bool, S_IRUGO); 123module_param(blocking, bool, S_IRUGO);
122MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device"); 124MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
123 125
126static bool shared_tags;
127module_param(shared_tags, bool, S_IRUGO);
128MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
129
124static int irqmode = NULL_IRQ_SOFTIRQ; 130static int irqmode = NULL_IRQ_SOFTIRQ;
125 131
126static int null_set_irqmode(const char *str, const struct kernel_param *kp) 132static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -229,11 +235,11 @@ static void end_cmd(struct nullb_cmd *cmd)
229 235
230 switch (queue_mode) { 236 switch (queue_mode) {
231 case NULL_Q_MQ: 237 case NULL_Q_MQ:
232 blk_mq_end_request(cmd->rq, 0); 238 blk_mq_end_request(cmd->rq, BLK_STS_OK);
233 return; 239 return;
234 case NULL_Q_RQ: 240 case NULL_Q_RQ:
235 INIT_LIST_HEAD(&cmd->rq->queuelist); 241 INIT_LIST_HEAD(&cmd->rq->queuelist);
236 blk_end_request_all(cmd->rq, 0); 242 blk_end_request_all(cmd->rq, BLK_STS_OK);
237 break; 243 break;
238 case NULL_Q_BIO: 244 case NULL_Q_BIO:
239 bio_endio(cmd->bio); 245 bio_endio(cmd->bio);
@@ -356,7 +362,7 @@ static void null_request_fn(struct request_queue *q)
356 } 362 }
357} 363}
358 364
359static int null_queue_rq(struct blk_mq_hw_ctx *hctx, 365static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
360 const struct blk_mq_queue_data *bd) 366 const struct blk_mq_queue_data *bd)
361{ 367{
362 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); 368 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
@@ -373,34 +379,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
373 blk_mq_start_request(bd->rq); 379 blk_mq_start_request(bd->rq);
374 380
375 null_handle_cmd(cmd); 381 null_handle_cmd(cmd);
376 return BLK_MQ_RQ_QUEUE_OK; 382 return BLK_STS_OK;
377}
378
379static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
380{
381 BUG_ON(!nullb);
382 BUG_ON(!nq);
383
384 init_waitqueue_head(&nq->wait);
385 nq->queue_depth = nullb->queue_depth;
386}
387
388static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
389 unsigned int index)
390{
391 struct nullb *nullb = data;
392 struct nullb_queue *nq = &nullb->queues[index];
393
394 hctx->driver_data = nq;
395 null_init_queue(nullb, nq);
396 nullb->nr_queues++;
397
398 return 0;
399} 383}
400 384
401static const struct blk_mq_ops null_mq_ops = { 385static const struct blk_mq_ops null_mq_ops = {
402 .queue_rq = null_queue_rq, 386 .queue_rq = null_queue_rq,
403 .init_hctx = null_init_hctx,
404 .complete = null_softirq_done_fn, 387 .complete = null_softirq_done_fn,
405}; 388};
406 389
@@ -422,11 +405,12 @@ static void cleanup_queues(struct nullb *nullb)
422 405
423#ifdef CONFIG_NVM 406#ifdef CONFIG_NVM
424 407
425static void null_lnvm_end_io(struct request *rq, int error) 408static void null_lnvm_end_io(struct request *rq, blk_status_t status)
426{ 409{
427 struct nvm_rq *rqd = rq->end_io_data; 410 struct nvm_rq *rqd = rq->end_io_data;
428 411
429 rqd->error = error; 412 /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
413 rqd->error = status ? -EIO : 0;
430 nvm_end_io(rqd); 414 nvm_end_io(rqd);
431 415
432 blk_put_request(rq); 416 blk_put_request(rq);
@@ -591,8 +575,8 @@ static void null_del_dev(struct nullb *nullb)
591 else 575 else
592 del_gendisk(nullb->disk); 576 del_gendisk(nullb->disk);
593 blk_cleanup_queue(nullb->q); 577 blk_cleanup_queue(nullb->q);
594 if (queue_mode == NULL_Q_MQ) 578 if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
595 blk_mq_free_tag_set(&nullb->tag_set); 579 blk_mq_free_tag_set(nullb->tag_set);
596 if (!use_lightnvm) 580 if (!use_lightnvm)
597 put_disk(nullb->disk); 581 put_disk(nullb->disk);
598 cleanup_queues(nullb); 582 cleanup_queues(nullb);
@@ -614,6 +598,32 @@ static const struct block_device_operations null_fops = {
614 .release = null_release, 598 .release = null_release,
615}; 599};
616 600
601static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
602{
603 BUG_ON(!nullb);
604 BUG_ON(!nq);
605
606 init_waitqueue_head(&nq->wait);
607 nq->queue_depth = nullb->queue_depth;
608}
609
610static void null_init_queues(struct nullb *nullb)
611{
612 struct request_queue *q = nullb->q;
613 struct blk_mq_hw_ctx *hctx;
614 struct nullb_queue *nq;
615 int i;
616
617 queue_for_each_hw_ctx(q, hctx, i) {
618 if (!hctx->nr_ctx || !hctx->tags)
619 continue;
620 nq = &nullb->queues[i];
621 hctx->driver_data = nq;
622 null_init_queue(nullb, nq);
623 nullb->nr_queues++;
624 }
625}
626
617static int setup_commands(struct nullb_queue *nq) 627static int setup_commands(struct nullb_queue *nq)
618{ 628{
619 struct nullb_cmd *cmd; 629 struct nullb_cmd *cmd;
@@ -694,6 +704,22 @@ static int null_gendisk_register(struct nullb *nullb)
694 return 0; 704 return 0;
695} 705}
696 706
707static int null_init_tag_set(struct blk_mq_tag_set *set)
708{
709 set->ops = &null_mq_ops;
710 set->nr_hw_queues = submit_queues;
711 set->queue_depth = hw_queue_depth;
712 set->numa_node = home_node;
713 set->cmd_size = sizeof(struct nullb_cmd);
714 set->flags = BLK_MQ_F_SHOULD_MERGE;
715 set->driver_data = NULL;
716
717 if (blocking)
718 set->flags |= BLK_MQ_F_BLOCKING;
719
720 return blk_mq_alloc_tag_set(set);
721}
722
697static int null_add_dev(void) 723static int null_add_dev(void)
698{ 724{
699 struct nullb *nullb; 725 struct nullb *nullb;
@@ -715,26 +741,23 @@ static int null_add_dev(void)
715 goto out_free_nullb; 741 goto out_free_nullb;
716 742
717 if (queue_mode == NULL_Q_MQ) { 743 if (queue_mode == NULL_Q_MQ) {
718 nullb->tag_set.ops = &null_mq_ops; 744 if (shared_tags) {
719 nullb->tag_set.nr_hw_queues = submit_queues; 745 nullb->tag_set = &tag_set;
720 nullb->tag_set.queue_depth = hw_queue_depth; 746 rv = 0;
721 nullb->tag_set.numa_node = home_node; 747 } else {
722 nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); 748 nullb->tag_set = &nullb->__tag_set;
723 nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 749 rv = null_init_tag_set(nullb->tag_set);
724 nullb->tag_set.driver_data = nullb; 750 }
725 751
726 if (blocking)
727 nullb->tag_set.flags |= BLK_MQ_F_BLOCKING;
728
729 rv = blk_mq_alloc_tag_set(&nullb->tag_set);
730 if (rv) 752 if (rv)
731 goto out_cleanup_queues; 753 goto out_cleanup_queues;
732 754
733 nullb->q = blk_mq_init_queue(&nullb->tag_set); 755 nullb->q = blk_mq_init_queue(nullb->tag_set);
734 if (IS_ERR(nullb->q)) { 756 if (IS_ERR(nullb->q)) {
735 rv = -ENOMEM; 757 rv = -ENOMEM;
736 goto out_cleanup_tags; 758 goto out_cleanup_tags;
737 } 759 }
760 null_init_queues(nullb);
738 } else if (queue_mode == NULL_Q_BIO) { 761 } else if (queue_mode == NULL_Q_BIO) {
739 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); 762 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
740 if (!nullb->q) { 763 if (!nullb->q) {
@@ -787,8 +810,8 @@ static int null_add_dev(void)
787out_cleanup_blk_queue: 810out_cleanup_blk_queue:
788 blk_cleanup_queue(nullb->q); 811 blk_cleanup_queue(nullb->q);
789out_cleanup_tags: 812out_cleanup_tags:
790 if (queue_mode == NULL_Q_MQ) 813 if (queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
791 blk_mq_free_tag_set(&nullb->tag_set); 814 blk_mq_free_tag_set(nullb->tag_set);
792out_cleanup_queues: 815out_cleanup_queues:
793 cleanup_queues(nullb); 816 cleanup_queues(nullb);
794out_free_nullb: 817out_free_nullb:
@@ -821,6 +844,9 @@ static int __init null_init(void)
821 queue_mode = NULL_Q_MQ; 844 queue_mode = NULL_Q_MQ;
822 } 845 }
823 846
847 if (queue_mode == NULL_Q_MQ && shared_tags)
848 null_init_tag_set(&tag_set);
849
824 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) { 850 if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
825 if (submit_queues < nr_online_nodes) { 851 if (submit_queues < nr_online_nodes) {
826 pr_warn("null_blk: submit_queues param is set to %u.", 852 pr_warn("null_blk: submit_queues param is set to %u.",
@@ -881,6 +907,9 @@ static void __exit null_exit(void)
881 } 907 }
882 mutex_unlock(&lock); 908 mutex_unlock(&lock);
883 909
910 if (queue_mode == NULL_Q_MQ && shared_tags)
911 blk_mq_free_tag_set(&tag_set);
912
884 kmem_cache_destroy(ppa_cache); 913 kmem_cache_destroy(ppa_cache);
885} 914}
886 915
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index b1267ef34d5a..7b8c6368beb7 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -305,6 +305,7 @@ static void pcd_init_units(void)
305 put_disk(disk); 305 put_disk(disk);
306 continue; 306 continue;
307 } 307 }
308 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
308 cd->disk = disk; 309 cd->disk = disk;
309 cd->pi = &cd->pia; 310 cd->pi = &cd->pia;
310 cd->present = 0; 311 cd->present = 0;
@@ -783,7 +784,7 @@ static void pcd_request(void)
783 ps_set_intr(do_pcd_read, NULL, 0, nice); 784 ps_set_intr(do_pcd_read, NULL, 0, nice);
784 return; 785 return;
785 } else { 786 } else {
786 __blk_end_request_all(pcd_req, -EIO); 787 __blk_end_request_all(pcd_req, BLK_STS_IOERR);
787 pcd_req = NULL; 788 pcd_req = NULL;
788 } 789 }
789 } 790 }
@@ -794,7 +795,7 @@ static void do_pcd_request(struct request_queue *q)
794 pcd_request(); 795 pcd_request();
795} 796}
796 797
797static inline void next_request(int err) 798static inline void next_request(blk_status_t err)
798{ 799{
799 unsigned long saved_flags; 800 unsigned long saved_flags;
800 801
@@ -837,7 +838,7 @@ static void pcd_start(void)
837 838
838 if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) { 839 if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) {
839 pcd_bufblk = -1; 840 pcd_bufblk = -1;
840 next_request(-EIO); 841 next_request(BLK_STS_IOERR);
841 return; 842 return;
842 } 843 }
843 844
@@ -871,7 +872,7 @@ static void do_pcd_read_drq(void)
871 return; 872 return;
872 } 873 }
873 pcd_bufblk = -1; 874 pcd_bufblk = -1;
874 next_request(-EIO); 875 next_request(BLK_STS_IOERR);
875 return; 876 return;
876 } 877 }
877 878
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 7d2402f90978..27a44b97393a 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -438,7 +438,7 @@ static void run_fsm(void)
438 phase = NULL; 438 phase = NULL;
439 spin_lock_irqsave(&pd_lock, saved_flags); 439 spin_lock_irqsave(&pd_lock, saved_flags);
440 if (!__blk_end_request_cur(pd_req, 440 if (!__blk_end_request_cur(pd_req,
441 res == Ok ? 0 : -EIO)) { 441 res == Ok ? 0 : BLK_STS_IOERR)) {
442 if (!set_next_request()) 442 if (!set_next_request())
443 stop = 1; 443 stop = 1;
444 } 444 }
@@ -863,6 +863,7 @@ static void pd_probe_drive(struct pd_unit *disk)
863 return; 863 return;
864 } 864 }
865 blk_queue_max_hw_sectors(p->queue, cluster); 865 blk_queue_max_hw_sectors(p->queue, cluster);
866 blk_queue_bounce_limit(p->queue, BLK_BOUNCE_HIGH);
866 867
867 if (disk->drive == -1) { 868 if (disk->drive == -1) {
868 for (disk->drive = 0; disk->drive <= 1; disk->drive++) 869 for (disk->drive = 0; disk->drive <= 1; disk->drive++)
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f24ca7315ddc..eef7a91f667d 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -293,6 +293,7 @@ static void __init pf_init_units(void)
293 return; 293 return;
294 } 294 }
295 blk_queue_max_segments(disk->queue, cluster); 295 blk_queue_max_segments(disk->queue, cluster);
296 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
296 pf->disk = disk; 297 pf->disk = disk;
297 pf->pi = &pf->pia; 298 pf->pi = &pf->pia;
298 pf->media_status = PF_NM; 299 pf->media_status = PF_NM;
@@ -801,7 +802,7 @@ static int set_next_request(void)
801 return pf_req != NULL; 802 return pf_req != NULL;
802} 803}
803 804
804static void pf_end_request(int err) 805static void pf_end_request(blk_status_t err)
805{ 806{
806 if (pf_req && !__blk_end_request_cur(pf_req, err)) 807 if (pf_req && !__blk_end_request_cur(pf_req, err))
807 pf_req = NULL; 808 pf_req = NULL;
@@ -821,7 +822,7 @@ repeat:
821 pf_count = blk_rq_cur_sectors(pf_req); 822 pf_count = blk_rq_cur_sectors(pf_req);
822 823
823 if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) { 824 if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) {
824 pf_end_request(-EIO); 825 pf_end_request(BLK_STS_IOERR);
825 goto repeat; 826 goto repeat;
826 } 827 }
827 828
@@ -836,7 +837,7 @@ repeat:
836 pi_do_claimed(pf_current->pi, do_pf_write); 837 pi_do_claimed(pf_current->pi, do_pf_write);
837 else { 838 else {
838 pf_busy = 0; 839 pf_busy = 0;
839 pf_end_request(-EIO); 840 pf_end_request(BLK_STS_IOERR);
840 goto repeat; 841 goto repeat;
841 } 842 }
842} 843}
@@ -868,7 +869,7 @@ static int pf_next_buf(void)
868 return 0; 869 return 0;
869} 870}
870 871
871static inline void next_request(int err) 872static inline void next_request(blk_status_t err)
872{ 873{
873 unsigned long saved_flags; 874 unsigned long saved_flags;
874 875
@@ -896,7 +897,7 @@ static void do_pf_read_start(void)
896 pi_do_claimed(pf_current->pi, do_pf_read_start); 897 pi_do_claimed(pf_current->pi, do_pf_read_start);
897 return; 898 return;
898 } 899 }
899 next_request(-EIO); 900 next_request(BLK_STS_IOERR);
900 return; 901 return;
901 } 902 }
902 pf_mask = STAT_DRQ; 903 pf_mask = STAT_DRQ;
@@ -915,7 +916,7 @@ static void do_pf_read_drq(void)
915 pi_do_claimed(pf_current->pi, do_pf_read_start); 916 pi_do_claimed(pf_current->pi, do_pf_read_start);
916 return; 917 return;
917 } 918 }
918 next_request(-EIO); 919 next_request(BLK_STS_IOERR);
919 return; 920 return;
920 } 921 }
921 pi_read_block(pf_current->pi, pf_buf, 512); 922 pi_read_block(pf_current->pi, pf_buf, 512);
@@ -942,7 +943,7 @@ static void do_pf_write_start(void)
942 pi_do_claimed(pf_current->pi, do_pf_write_start); 943 pi_do_claimed(pf_current->pi, do_pf_write_start);
943 return; 944 return;
944 } 945 }
945 next_request(-EIO); 946 next_request(BLK_STS_IOERR);
946 return; 947 return;
947 } 948 }
948 949
@@ -955,7 +956,7 @@ static void do_pf_write_start(void)
955 pi_do_claimed(pf_current->pi, do_pf_write_start); 956 pi_do_claimed(pf_current->pi, do_pf_write_start);
956 return; 957 return;
957 } 958 }
958 next_request(-EIO); 959 next_request(BLK_STS_IOERR);
959 return; 960 return;
960 } 961 }
961 pi_write_block(pf_current->pi, pf_buf, 512); 962 pi_write_block(pf_current->pi, pf_buf, 512);
@@ -975,7 +976,7 @@ static void do_pf_write_done(void)
975 pi_do_claimed(pf_current->pi, do_pf_write_start); 976 pi_do_claimed(pf_current->pi, do_pf_write_start);
976 return; 977 return;
977 } 978 }
978 next_request(-EIO); 979 next_request(BLK_STS_IOERR);
979 return; 980 return;
980 } 981 }
981 pi_disconnect(pf_current->pi); 982 pi_disconnect(pf_current->pi);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 205b865ebeb9..467beca397a2 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -98,6 +98,7 @@ static int write_congestion_on = PKT_WRITE_CONGESTION_ON;
98static int write_congestion_off = PKT_WRITE_CONGESTION_OFF; 98static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
99static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ 99static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */
100static mempool_t *psd_pool; 100static mempool_t *psd_pool;
101static struct bio_set *pkt_bio_set;
101 102
102static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */ 103static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */
103static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */ 104static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -707,7 +708,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
707 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); 708 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
708 if (IS_ERR(rq)) 709 if (IS_ERR(rq))
709 return PTR_ERR(rq); 710 return PTR_ERR(rq);
710 scsi_req_init(rq);
711 711
712 if (cgc->buflen) { 712 if (cgc->buflen) {
713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, 713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -952,9 +952,9 @@ static void pkt_end_io_read(struct bio *bio)
952 952
953 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", 953 pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
954 bio, (unsigned long long)pkt->sector, 954 bio, (unsigned long long)pkt->sector,
955 (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error); 955 (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
956 956
957 if (bio->bi_error) 957 if (bio->bi_status)
958 atomic_inc(&pkt->io_errors); 958 atomic_inc(&pkt->io_errors);
959 if (atomic_dec_and_test(&pkt->io_wait)) { 959 if (atomic_dec_and_test(&pkt->io_wait)) {
960 atomic_inc(&pkt->run_sm); 960 atomic_inc(&pkt->run_sm);
@@ -969,7 +969,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
969 struct pktcdvd_device *pd = pkt->pd; 969 struct pktcdvd_device *pd = pkt->pd;
970 BUG_ON(!pd); 970 BUG_ON(!pd);
971 971
972 pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error); 972 pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
973 973
974 pd->stats.pkt_ended++; 974 pd->stats.pkt_ended++;
975 975
@@ -1305,16 +1305,16 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1305 pkt_queue_bio(pd, pkt->w_bio); 1305 pkt_queue_bio(pd, pkt->w_bio);
1306} 1306}
1307 1307
1308static void pkt_finish_packet(struct packet_data *pkt, int error) 1308static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
1309{ 1309{
1310 struct bio *bio; 1310 struct bio *bio;
1311 1311
1312 if (error) 1312 if (status)
1313 pkt->cache_valid = 0; 1313 pkt->cache_valid = 0;
1314 1314
1315 /* Finish all bios corresponding to this packet */ 1315 /* Finish all bios corresponding to this packet */
1316 while ((bio = bio_list_pop(&pkt->orig_bios))) { 1316 while ((bio = bio_list_pop(&pkt->orig_bios))) {
1317 bio->bi_error = error; 1317 bio->bi_status = status;
1318 bio_endio(bio); 1318 bio_endio(bio);
1319 } 1319 }
1320} 1320}
@@ -1349,7 +1349,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
1349 if (atomic_read(&pkt->io_wait) > 0) 1349 if (atomic_read(&pkt->io_wait) > 0)
1350 return; 1350 return;
1351 1351
1352 if (!pkt->w_bio->bi_error) { 1352 if (!pkt->w_bio->bi_status) {
1353 pkt_set_state(pkt, PACKET_FINISHED_STATE); 1353 pkt_set_state(pkt, PACKET_FINISHED_STATE);
1354 } else { 1354 } else {
1355 pkt_set_state(pkt, PACKET_RECOVERY_STATE); 1355 pkt_set_state(pkt, PACKET_RECOVERY_STATE);
@@ -1366,7 +1366,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
1366 break; 1366 break;
1367 1367
1368 case PACKET_FINISHED_STATE: 1368 case PACKET_FINISHED_STATE:
1369 pkt_finish_packet(pkt, pkt->w_bio->bi_error); 1369 pkt_finish_packet(pkt, pkt->w_bio->bi_status);
1370 return; 1370 return;
1371 1371
1372 default: 1372 default:
@@ -2301,7 +2301,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
2301 struct packet_stacked_data *psd = bio->bi_private; 2301 struct packet_stacked_data *psd = bio->bi_private;
2302 struct pktcdvd_device *pd = psd->pd; 2302 struct pktcdvd_device *pd = psd->pd;
2303 2303
2304 psd->bio->bi_error = bio->bi_error; 2304 psd->bio->bi_status = bio->bi_status;
2305 bio_put(bio); 2305 bio_put(bio);
2306 bio_endio(psd->bio); 2306 bio_endio(psd->bio);
2307 mempool_free(psd, psd_pool); 2307 mempool_free(psd, psd_pool);
@@ -2310,7 +2310,7 @@ static void pkt_end_io_read_cloned(struct bio *bio)
2310 2310
2311static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) 2311static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
2312{ 2312{
2313 struct bio *cloned_bio = bio_clone(bio, GFP_NOIO); 2313 struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
2314 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO); 2314 struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
2315 2315
2316 psd->pd = pd; 2316 psd->pd = pd;
@@ -2412,9 +2412,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
2412 char b[BDEVNAME_SIZE]; 2412 char b[BDEVNAME_SIZE];
2413 struct bio *split; 2413 struct bio *split;
2414 2414
2415 blk_queue_bounce(q, &bio); 2415 blk_queue_split(q, &bio);
2416
2417 blk_queue_split(q, &bio, q->bio_split);
2418 2416
2419 pd = q->queuedata; 2417 pd = q->queuedata;
2420 if (!pd) { 2418 if (!pd) {
@@ -2455,7 +2453,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio)
2455 2453
2456 split = bio_split(bio, last_zone - 2454 split = bio_split(bio, last_zone -
2457 bio->bi_iter.bi_sector, 2455 bio->bi_iter.bi_sector,
2458 GFP_NOIO, fs_bio_set); 2456 GFP_NOIO, pkt_bio_set);
2459 bio_chain(split, bio); 2457 bio_chain(split, bio);
2460 } else { 2458 } else {
2461 split = bio; 2459 split = bio;
@@ -2583,6 +2581,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
2583 bdev = bdget(dev); 2581 bdev = bdget(dev);
2584 if (!bdev) 2582 if (!bdev)
2585 return -ENOMEM; 2583 return -ENOMEM;
2584 if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
2585 WARN_ONCE(true, "Attempt to register a non-SCSI queue\n");
2586 bdput(bdev);
2587 return -EINVAL;
2588 }
2586 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL); 2589 ret = blkdev_get(bdev, FMODE_READ | FMODE_NDELAY, NULL);
2587 if (ret) 2590 if (ret)
2588 return ret; 2591 return ret;
@@ -2919,6 +2922,11 @@ static int __init pkt_init(void)
2919 sizeof(struct packet_stacked_data)); 2922 sizeof(struct packet_stacked_data));
2920 if (!psd_pool) 2923 if (!psd_pool)
2921 return -ENOMEM; 2924 return -ENOMEM;
2925 pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
2926 if (!pkt_bio_set) {
2927 mempool_destroy(psd_pool);
2928 return -ENOMEM;
2929 }
2922 2930
2923 ret = register_blkdev(pktdev_major, DRIVER_NAME); 2931 ret = register_blkdev(pktdev_major, DRIVER_NAME);
2924 if (ret < 0) { 2932 if (ret < 0) {
@@ -2951,6 +2959,7 @@ out:
2951 unregister_blkdev(pktdev_major, DRIVER_NAME); 2959 unregister_blkdev(pktdev_major, DRIVER_NAME);
2952out2: 2960out2:
2953 mempool_destroy(psd_pool); 2961 mempool_destroy(psd_pool);
2962 bioset_free(pkt_bio_set);
2954 return ret; 2963 return ret;
2955} 2964}
2956 2965
@@ -2964,6 +2973,7 @@ static void __exit pkt_exit(void)
2964 2973
2965 unregister_blkdev(pktdev_major, DRIVER_NAME); 2974 unregister_blkdev(pktdev_major, DRIVER_NAME);
2966 mempool_destroy(psd_pool); 2975 mempool_destroy(psd_pool);
2976 bioset_free(pkt_bio_set);
2967} 2977}
2968 2978
2969MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); 2979MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index a809e3e9feb8..075662f2cf46 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev,
158 if (res) { 158 if (res) {
159 dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, 159 dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__,
160 __LINE__, op, res); 160 __LINE__, op, res);
161 __blk_end_request_all(req, -EIO); 161 __blk_end_request_all(req, BLK_STS_IOERR);
162 return 0; 162 return 0;
163 } 163 }
164 164
@@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev,
180 if (res) { 180 if (res) {
181 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", 181 dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n",
182 __func__, __LINE__, res); 182 __func__, __LINE__, res);
183 __blk_end_request_all(req, -EIO); 183 __blk_end_request_all(req, BLK_STS_IOERR);
184 return 0; 184 return 0;
185 } 185 }
186 186
@@ -208,7 +208,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
208 break; 208 break;
209 default: 209 default:
210 blk_dump_rq_flags(req, DEVICE_NAME " bad request"); 210 blk_dump_rq_flags(req, DEVICE_NAME " bad request");
211 __blk_end_request_all(req, -EIO); 211 __blk_end_request_all(req, BLK_STS_IOERR);
212 } 212 }
213 } 213 }
214} 214}
@@ -231,7 +231,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
231 struct ps3_storage_device *dev = data; 231 struct ps3_storage_device *dev = data;
232 struct ps3disk_private *priv; 232 struct ps3disk_private *priv;
233 struct request *req; 233 struct request *req;
234 int res, read, error; 234 int res, read;
235 blk_status_t error;
235 u64 tag, status; 236 u64 tag, status;
236 const char *op; 237 const char *op;
237 238
@@ -269,7 +270,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
269 if (status) { 270 if (status) {
270 dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, 271 dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__,
271 __LINE__, op, status); 272 __LINE__, op, status);
272 error = -EIO; 273 error = BLK_STS_IOERR;
273 } else { 274 } else {
274 dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__, 275 dev_dbg(&dev->sbd.core, "%s:%u: %s completed\n", __func__,
275 __LINE__, op); 276 __LINE__, op);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 456b4fe21559..e0e81cacd781 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -428,7 +428,7 @@ static void ps3vram_cache_cleanup(struct ps3_system_bus_device *dev)
428 kfree(priv->cache.tags); 428 kfree(priv->cache.tags);
429} 429}
430 430
431static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from, 431static blk_status_t ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
432 size_t len, size_t *retlen, u_char *buf) 432 size_t len, size_t *retlen, u_char *buf)
433{ 433{
434 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); 434 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -438,7 +438,7 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
438 (unsigned int)from, len); 438 (unsigned int)from, len);
439 439
440 if (from >= priv->size) 440 if (from >= priv->size)
441 return -EIO; 441 return BLK_STS_IOERR;
442 442
443 if (len > priv->size - from) 443 if (len > priv->size - from)
444 len = priv->size - from; 444 len = priv->size - from;
@@ -472,14 +472,14 @@ static int ps3vram_read(struct ps3_system_bus_device *dev, loff_t from,
472 return 0; 472 return 0;
473} 473}
474 474
475static int ps3vram_write(struct ps3_system_bus_device *dev, loff_t to, 475static blk_status_t ps3vram_write(struct ps3_system_bus_device *dev, loff_t to,
476 size_t len, size_t *retlen, const u_char *buf) 476 size_t len, size_t *retlen, const u_char *buf)
477{ 477{
478 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); 478 struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
479 unsigned int cached, count; 479 unsigned int cached, count;
480 480
481 if (to >= priv->size) 481 if (to >= priv->size)
482 return -EIO; 482 return BLK_STS_IOERR;
483 483
484 if (len > priv->size - to) 484 if (len > priv->size - to)
485 len = priv->size - to; 485 len = priv->size - to;
@@ -554,7 +554,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
554 int write = bio_data_dir(bio) == WRITE; 554 int write = bio_data_dir(bio) == WRITE;
555 const char *op = write ? "write" : "read"; 555 const char *op = write ? "write" : "read";
556 loff_t offset = bio->bi_iter.bi_sector << 9; 556 loff_t offset = bio->bi_iter.bi_sector << 9;
557 int error = 0; 557 blk_status_t error = 0;
558 struct bio_vec bvec; 558 struct bio_vec bvec;
559 struct bvec_iter iter; 559 struct bvec_iter iter;
560 struct bio *next; 560 struct bio *next;
@@ -578,7 +578,7 @@ static struct bio *ps3vram_do_bio(struct ps3_system_bus_device *dev,
578 578
579 if (retlen != len) { 579 if (retlen != len) {
580 dev_err(&dev->core, "Short %s\n", op); 580 dev_err(&dev->core, "Short %s\n", op);
581 error = -EIO; 581 error = BLK_STS_IOERR;
582 goto out; 582 goto out;
583 } 583 }
584 584
@@ -593,7 +593,7 @@ out:
593 next = bio_list_peek(&priv->list); 593 next = bio_list_peek(&priv->list);
594 spin_unlock_irq(&priv->lock); 594 spin_unlock_irq(&priv->lock);
595 595
596 bio->bi_error = error; 596 bio->bi_status = error;
597 bio_endio(bio); 597 bio_endio(bio);
598 return next; 598 return next;
599} 599}
@@ -606,7 +606,7 @@ static blk_qc_t ps3vram_make_request(struct request_queue *q, struct bio *bio)
606 606
607 dev_dbg(&dev->core, "%s\n", __func__); 607 dev_dbg(&dev->core, "%s\n", __func__);
608 608
609 blk_queue_split(q, &bio, q->bio_split); 609 blk_queue_split(q, &bio);
610 610
611 spin_lock_irq(&priv->lock); 611 spin_lock_irq(&priv->lock);
612 busy = !bio_list_empty(&priv->list); 612 busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c16f74547804..b008b6a98098 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -442,6 +442,8 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
442static struct kmem_cache *rbd_img_request_cache; 442static struct kmem_cache *rbd_img_request_cache;
443static struct kmem_cache *rbd_obj_request_cache; 443static struct kmem_cache *rbd_obj_request_cache;
444 444
445static struct bio_set *rbd_bio_clone;
446
445static int rbd_major; 447static int rbd_major;
446static DEFINE_IDA(rbd_dev_id_ida); 448static DEFINE_IDA(rbd_dev_id_ida);
447 449
@@ -1363,7 +1365,7 @@ static struct bio *bio_clone_range(struct bio *bio_src,
1363{ 1365{
1364 struct bio *bio; 1366 struct bio *bio;
1365 1367
1366 bio = bio_clone(bio_src, gfpmask); 1368 bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
1367 if (!bio) 1369 if (!bio)
1368 return NULL; /* ENOMEM */ 1370 return NULL; /* ENOMEM */
1369 1371
@@ -2293,11 +2295,13 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
2293 rbd_assert(img_request->obj_request != NULL); 2295 rbd_assert(img_request->obj_request != NULL);
2294 more = obj_request->which < img_request->obj_request_count - 1; 2296 more = obj_request->which < img_request->obj_request_count - 1;
2295 } else { 2297 } else {
2298 blk_status_t status = errno_to_blk_status(result);
2299
2296 rbd_assert(img_request->rq != NULL); 2300 rbd_assert(img_request->rq != NULL);
2297 2301
2298 more = blk_update_request(img_request->rq, result, xferred); 2302 more = blk_update_request(img_request->rq, status, xferred);
2299 if (!more) 2303 if (!more)
2300 __blk_mq_end_request(img_request->rq, result); 2304 __blk_mq_end_request(img_request->rq, status);
2301 } 2305 }
2302 2306
2303 return more; 2307 return more;
@@ -4150,17 +4154,17 @@ err_rq:
4150 obj_op_name(op_type), length, offset, result); 4154 obj_op_name(op_type), length, offset, result);
4151 ceph_put_snap_context(snapc); 4155 ceph_put_snap_context(snapc);
4152err: 4156err:
4153 blk_mq_end_request(rq, result); 4157 blk_mq_end_request(rq, errno_to_blk_status(result));
4154} 4158}
4155 4159
4156static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx, 4160static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
4157 const struct blk_mq_queue_data *bd) 4161 const struct blk_mq_queue_data *bd)
4158{ 4162{
4159 struct request *rq = bd->rq; 4163 struct request *rq = bd->rq;
4160 struct work_struct *work = blk_mq_rq_to_pdu(rq); 4164 struct work_struct *work = blk_mq_rq_to_pdu(rq);
4161 4165
4162 queue_work(rbd_wq, work); 4166 queue_work(rbd_wq, work);
4163 return BLK_MQ_RQ_QUEUE_OK; 4167 return BLK_STS_OK;
4164} 4168}
4165 4169
4166static void rbd_free_disk(struct rbd_device *rbd_dev) 4170static void rbd_free_disk(struct rbd_device *rbd_dev)
@@ -6414,8 +6418,16 @@ static int rbd_slab_init(void)
6414 if (!rbd_obj_request_cache) 6418 if (!rbd_obj_request_cache)
6415 goto out_err; 6419 goto out_err;
6416 6420
6421 rbd_assert(!rbd_bio_clone);
6422 rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
6423 if (!rbd_bio_clone)
6424 goto out_err_clone;
6425
6417 return 0; 6426 return 0;
6418 6427
6428out_err_clone:
6429 kmem_cache_destroy(rbd_obj_request_cache);
6430 rbd_obj_request_cache = NULL;
6419out_err: 6431out_err:
6420 kmem_cache_destroy(rbd_img_request_cache); 6432 kmem_cache_destroy(rbd_img_request_cache);
6421 rbd_img_request_cache = NULL; 6433 rbd_img_request_cache = NULL;
@@ -6431,6 +6443,10 @@ static void rbd_slab_exit(void)
6431 rbd_assert(rbd_img_request_cache); 6443 rbd_assert(rbd_img_request_cache);
6432 kmem_cache_destroy(rbd_img_request_cache); 6444 kmem_cache_destroy(rbd_img_request_cache);
6433 rbd_img_request_cache = NULL; 6445 rbd_img_request_cache = NULL;
6446
6447 rbd_assert(rbd_bio_clone);
6448 bioset_free(rbd_bio_clone);
6449 rbd_bio_clone = NULL;
6434} 6450}
6435 6451
6436static int __init rbd_init(void) 6452static int __init rbd_init(void)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 9c566364ac9c..7f4acebf4657 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -149,9 +149,9 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
149{ 149{
150 struct rsxx_cardinfo *card = q->queuedata; 150 struct rsxx_cardinfo *card = q->queuedata;
151 struct rsxx_bio_meta *bio_meta; 151 struct rsxx_bio_meta *bio_meta;
152 int st = -EINVAL; 152 blk_status_t st = BLK_STS_IOERR;
153 153
154 blk_queue_split(q, &bio, q->bio_split); 154 blk_queue_split(q, &bio);
155 155
156 might_sleep(); 156 might_sleep();
157 157
@@ -161,15 +161,11 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
161 if (bio_end_sector(bio) > get_capacity(card->gendisk)) 161 if (bio_end_sector(bio) > get_capacity(card->gendisk))
162 goto req_err; 162 goto req_err;
163 163
164 if (unlikely(card->halt)) { 164 if (unlikely(card->halt))
165 st = -EFAULT;
166 goto req_err; 165 goto req_err;
167 }
168 166
169 if (unlikely(card->dma_fault)) { 167 if (unlikely(card->dma_fault))
170 st = (-EFAULT);
171 goto req_err; 168 goto req_err;
172 }
173 169
174 if (bio->bi_iter.bi_size == 0) { 170 if (bio->bi_iter.bi_size == 0) {
175 dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); 171 dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
@@ -178,7 +174,7 @@ static blk_qc_t rsxx_make_request(struct request_queue *q, struct bio *bio)
178 174
179 bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); 175 bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL);
180 if (!bio_meta) { 176 if (!bio_meta) {
181 st = -ENOMEM; 177 st = BLK_STS_RESOURCE;
182 goto req_err; 178 goto req_err;
183 } 179 }
184 180
@@ -205,7 +201,7 @@ queue_err:
205 kmem_cache_free(bio_meta_pool, bio_meta); 201 kmem_cache_free(bio_meta_pool, bio_meta);
206req_err: 202req_err:
207 if (st) 203 if (st)
208 bio->bi_error = st; 204 bio->bi_status = st;
209 bio_endio(bio); 205 bio_endio(bio);
210 return BLK_QC_T_NONE; 206 return BLK_QC_T_NONE;
211} 207}
@@ -288,7 +284,6 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
288 } 284 }
289 285
290 blk_queue_make_request(card->queue, rsxx_make_request); 286 blk_queue_make_request(card->queue, rsxx_make_request);
291 blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
292 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); 287 blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors);
293 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); 288 blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE);
294 289
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 5a20385f87d0..6a1b2177951c 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -611,7 +611,7 @@ static void rsxx_schedule_done(struct work_struct *work)
611 mutex_unlock(&ctrl->work_lock); 611 mutex_unlock(&ctrl->work_lock);
612} 612}
613 613
614static int rsxx_queue_discard(struct rsxx_cardinfo *card, 614static blk_status_t rsxx_queue_discard(struct rsxx_cardinfo *card,
615 struct list_head *q, 615 struct list_head *q,
616 unsigned int laddr, 616 unsigned int laddr,
617 rsxx_dma_cb cb, 617 rsxx_dma_cb cb,
@@ -621,7 +621,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
621 621
622 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); 622 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
623 if (!dma) 623 if (!dma)
624 return -ENOMEM; 624 return BLK_STS_RESOURCE;
625 625
626 dma->cmd = HW_CMD_BLK_DISCARD; 626 dma->cmd = HW_CMD_BLK_DISCARD;
627 dma->laddr = laddr; 627 dma->laddr = laddr;
@@ -640,7 +640,7 @@ static int rsxx_queue_discard(struct rsxx_cardinfo *card,
640 return 0; 640 return 0;
641} 641}
642 642
643static int rsxx_queue_dma(struct rsxx_cardinfo *card, 643static blk_status_t rsxx_queue_dma(struct rsxx_cardinfo *card,
644 struct list_head *q, 644 struct list_head *q,
645 int dir, 645 int dir,
646 unsigned int dma_off, 646 unsigned int dma_off,
@@ -655,7 +655,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
655 655
656 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); 656 dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL);
657 if (!dma) 657 if (!dma)
658 return -ENOMEM; 658 return BLK_STS_RESOURCE;
659 659
660 dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; 660 dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ;
661 dma->laddr = laddr; 661 dma->laddr = laddr;
@@ -677,7 +677,7 @@ static int rsxx_queue_dma(struct rsxx_cardinfo *card,
677 return 0; 677 return 0;
678} 678}
679 679
680int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, 680blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
681 struct bio *bio, 681 struct bio *bio,
682 atomic_t *n_dmas, 682 atomic_t *n_dmas,
683 rsxx_dma_cb cb, 683 rsxx_dma_cb cb,
@@ -694,7 +694,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
694 unsigned int dma_len; 694 unsigned int dma_len;
695 int dma_cnt[RSXX_MAX_TARGETS]; 695 int dma_cnt[RSXX_MAX_TARGETS];
696 int tgt; 696 int tgt;
697 int st; 697 blk_status_t st;
698 int i; 698 int i;
699 699
700 addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */ 700 addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
@@ -769,7 +769,6 @@ bvec_err:
769 for (i = 0; i < card->n_targets; i++) 769 for (i = 0; i < card->n_targets; i++)
770 rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i], 770 rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i],
771 FREE_DMA); 771 FREE_DMA);
772
773 return st; 772 return st;
774} 773}
775 774
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 6bbc64d0f690..277f27e673a2 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -391,7 +391,7 @@ int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
391void rsxx_dma_cleanup(void); 391void rsxx_dma_cleanup(void);
392void rsxx_dma_queue_reset(struct rsxx_cardinfo *card); 392void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
393int rsxx_dma_configure(struct rsxx_cardinfo *card); 393int rsxx_dma_configure(struct rsxx_cardinfo *card);
394int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, 394blk_status_t rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
395 struct bio *bio, 395 struct bio *bio,
396 atomic_t *n_dmas, 396 atomic_t *n_dmas,
397 rsxx_dma_cb cb, 397 rsxx_dma_cb cb,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 27833e4dae2a..d0368682bd43 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -451,8 +451,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev,
451 struct skd_special_context *skspcl); 451 struct skd_special_context *skspcl);
452static void skd_request_fn(struct request_queue *rq); 452static void skd_request_fn(struct request_queue *rq);
453static void skd_end_request(struct skd_device *skdev, 453static void skd_end_request(struct skd_device *skdev,
454 struct skd_request_context *skreq, int error); 454 struct skd_request_context *skreq, blk_status_t status);
455static int skd_preop_sg_list(struct skd_device *skdev, 455static bool skd_preop_sg_list(struct skd_device *skdev,
456 struct skd_request_context *skreq); 456 struct skd_request_context *skreq);
457static void skd_postop_sg_list(struct skd_device *skdev, 457static void skd_postop_sg_list(struct skd_device *skdev,
458 struct skd_request_context *skreq); 458 struct skd_request_context *skreq);
@@ -491,7 +491,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
491 if (req == NULL) 491 if (req == NULL)
492 break; 492 break;
493 blk_start_request(req); 493 blk_start_request(req);
494 __blk_end_request_all(req, -EIO); 494 __blk_end_request_all(req, BLK_STS_IOERR);
495 } 495 }
496} 496}
497 497
@@ -545,7 +545,6 @@ static void skd_request_fn(struct request_queue *q)
545 struct request *req = NULL; 545 struct request *req = NULL;
546 struct skd_scsi_request *scsi_req; 546 struct skd_scsi_request *scsi_req;
547 unsigned long io_flags; 547 unsigned long io_flags;
548 int error;
549 u32 lba; 548 u32 lba;
550 u32 count; 549 u32 count;
551 int data_dir; 550 int data_dir;
@@ -716,9 +715,7 @@ static void skd_request_fn(struct request_queue *q)
716 if (!req->bio) 715 if (!req->bio)
717 goto skip_sg; 716 goto skip_sg;
718 717
719 error = skd_preop_sg_list(skdev, skreq); 718 if (!skd_preop_sg_list(skdev, skreq)) {
720
721 if (error != 0) {
722 /* 719 /*
723 * Complete the native request with error. 720 * Complete the native request with error.
724 * Note that the request context is still at the 721 * Note that the request context is still at the
@@ -730,7 +727,7 @@ static void skd_request_fn(struct request_queue *q)
730 */ 727 */
731 pr_debug("%s:%s:%d error Out\n", 728 pr_debug("%s:%s:%d error Out\n",
732 skdev->name, __func__, __LINE__); 729 skdev->name, __func__, __LINE__);
733 skd_end_request(skdev, skreq, error); 730 skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
734 continue; 731 continue;
735 } 732 }
736 733
@@ -805,7 +802,7 @@ skip_sg:
805} 802}
806 803
807static void skd_end_request(struct skd_device *skdev, 804static void skd_end_request(struct skd_device *skdev,
808 struct skd_request_context *skreq, int error) 805 struct skd_request_context *skreq, blk_status_t error)
809{ 806{
810 if (unlikely(error)) { 807 if (unlikely(error)) {
811 struct request *req = skreq->req; 808 struct request *req = skreq->req;
@@ -822,7 +819,7 @@ static void skd_end_request(struct skd_device *skdev,
822 __blk_end_request_all(skreq->req, error); 819 __blk_end_request_all(skreq->req, error);
823} 820}
824 821
825static int skd_preop_sg_list(struct skd_device *skdev, 822static bool skd_preop_sg_list(struct skd_device *skdev,
826 struct skd_request_context *skreq) 823 struct skd_request_context *skreq)
827{ 824{
828 struct request *req = skreq->req; 825 struct request *req = skreq->req;
@@ -839,7 +836,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
839 836
840 n_sg = blk_rq_map_sg(skdev->queue, req, sg); 837 n_sg = blk_rq_map_sg(skdev->queue, req, sg);
841 if (n_sg <= 0) 838 if (n_sg <= 0)
842 return -EINVAL; 839 return false;
843 840
844 /* 841 /*
845 * Map scatterlist to PCI bus addresses. 842 * Map scatterlist to PCI bus addresses.
@@ -847,7 +844,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
847 */ 844 */
848 n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir); 845 n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
849 if (n_sg <= 0) 846 if (n_sg <= 0)
850 return -EINVAL; 847 return false;
851 848
852 SKD_ASSERT(n_sg <= skdev->sgs_per_request); 849 SKD_ASSERT(n_sg <= skdev->sgs_per_request);
853 850
@@ -882,7 +879,7 @@ static int skd_preop_sg_list(struct skd_device *skdev,
882 } 879 }
883 } 880 }
884 881
885 return 0; 882 return true;
886} 883}
887 884
888static void skd_postop_sg_list(struct skd_device *skdev, 885static void skd_postop_sg_list(struct skd_device *skdev,
@@ -2333,7 +2330,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
2333 switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) { 2330 switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
2334 case SKD_CHECK_STATUS_REPORT_GOOD: 2331 case SKD_CHECK_STATUS_REPORT_GOOD:
2335 case SKD_CHECK_STATUS_REPORT_SMART_ALERT: 2332 case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
2336 skd_end_request(skdev, skreq, 0); 2333 skd_end_request(skdev, skreq, BLK_STS_OK);
2337 break; 2334 break;
2338 2335
2339 case SKD_CHECK_STATUS_BUSY_IMMINENT: 2336 case SKD_CHECK_STATUS_BUSY_IMMINENT:
@@ -2355,7 +2352,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
2355 2352
2356 case SKD_CHECK_STATUS_REPORT_ERROR: 2353 case SKD_CHECK_STATUS_REPORT_ERROR:
2357 default: 2354 default:
2358 skd_end_request(skdev, skreq, -EIO); 2355 skd_end_request(skdev, skreq, BLK_STS_IOERR);
2359 break; 2356 break;
2360 } 2357 }
2361} 2358}
@@ -2748,7 +2745,7 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
2748 * native request. 2745 * native request.
2749 */ 2746 */
2750 if (likely(cmp_status == SAM_STAT_GOOD)) 2747 if (likely(cmp_status == SAM_STAT_GOOD))
2751 skd_end_request(skdev, skreq, 0); 2748 skd_end_request(skdev, skreq, BLK_STS_OK);
2752 else 2749 else
2753 skd_resolve_req_exception(skdev, skreq); 2750 skd_resolve_req_exception(skdev, skreq);
2754 } 2751 }
@@ -3190,7 +3187,7 @@ static void skd_recover_requests(struct skd_device *skdev, int requeue)
3190 SKD_MAX_RETRIES) 3187 SKD_MAX_RETRIES)
3191 blk_requeue_request(skdev->queue, skreq->req); 3188 blk_requeue_request(skdev->queue, skreq->req);
3192 else 3189 else
3193 skd_end_request(skdev, skreq, -EIO); 3190 skd_end_request(skdev, skreq, BLK_STS_IOERR);
3194 3191
3195 skreq->req = NULL; 3192 skreq->req = NULL;
3196 3193
@@ -4276,6 +4273,7 @@ static int skd_cons_disk(struct skd_device *skdev)
4276 rc = -ENOMEM; 4273 rc = -ENOMEM;
4277 goto err_out; 4274 goto err_out;
4278 } 4275 }
4276 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
4279 4277
4280 skdev->queue = q; 4278 skdev->queue = q;
4281 disk->queue = q; 4279 disk->queue = q;
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 3f3a3ab3d50a..6b16ead1da58 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -316,7 +316,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
316 316
317 rqe->req = NULL; 317 rqe->req = NULL;
318 318
319 __blk_end_request(req, (desc->status ? -EIO : 0), desc->size); 319 __blk_end_request(req, (desc->status ? BLK_STS_IOERR : 0), desc->size);
320 320
321 vdc_blk_queue_start(port); 321 vdc_blk_queue_start(port);
322} 322}
@@ -1023,7 +1023,7 @@ static void vdc_queue_drain(struct vdc_port *port)
1023 struct request *req; 1023 struct request *req;
1024 1024
1025 while ((req = blk_fetch_request(port->disk->queue)) != NULL) 1025 while ((req = blk_fetch_request(port->disk->queue)) != NULL)
1026 __blk_end_request_all(req, -EIO); 1026 __blk_end_request_all(req, BLK_STS_IOERR);
1027} 1027}
1028 1028
1029static void vdc_ldc_reset_timer(unsigned long _arg) 1029static void vdc_ldc_reset_timer(unsigned long _arg)
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 3064be6cf375..84434d3ea19b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -493,7 +493,7 @@ static inline int swim_read_sector(struct floppy_state *fs,
493 return ret; 493 return ret;
494} 494}
495 495
496static int floppy_read_sectors(struct floppy_state *fs, 496static blk_status_t floppy_read_sectors(struct floppy_state *fs,
497 int req_sector, int sectors_nb, 497 int req_sector, int sectors_nb,
498 unsigned char *buffer) 498 unsigned char *buffer)
499{ 499{
@@ -516,7 +516,7 @@ static int floppy_read_sectors(struct floppy_state *fs,
516 ret = swim_read_sector(fs, side, track, sector, 516 ret = swim_read_sector(fs, side, track, sector,
517 buffer); 517 buffer);
518 if (try-- == 0) 518 if (try-- == 0)
519 return -EIO; 519 return BLK_STS_IOERR;
520 } while (ret != 512); 520 } while (ret != 512);
521 521
522 buffer += ret; 522 buffer += ret;
@@ -553,7 +553,7 @@ static void do_fd_request(struct request_queue *q)
553 553
554 req = swim_next_request(swd); 554 req = swim_next_request(swd);
555 while (req) { 555 while (req) {
556 int err = -EIO; 556 blk_status_t err = BLK_STS_IOERR;
557 557
558 fs = req->rq_disk->private_data; 558 fs = req->rq_disk->private_data;
559 if (blk_rq_pos(req) >= fs->total_secs) 559 if (blk_rq_pos(req) >= fs->total_secs)
@@ -864,6 +864,8 @@ static int swim_floppy_init(struct swim_priv *swd)
864 put_disk(swd->unit[drive].disk); 864 put_disk(swd->unit[drive].disk);
865 goto exit_put_disks; 865 goto exit_put_disks;
866 } 866 }
867 blk_queue_bounce_limit(swd->unit[drive].disk->queue,
868 BLK_BOUNCE_HIGH);
867 swd->unit[drive].disk->queue->queuedata = swd; 869 swd->unit[drive].disk->queue->queuedata = swd;
868 swd->unit[drive].swd = swd; 870 swd->unit[drive].swd = swd;
869 } 871 }
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ba4809c9bdba..9f931f8f6b4c 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -257,7 +257,7 @@ static unsigned int floppy_check_events(struct gendisk *disk,
257 unsigned int clearing); 257 unsigned int clearing);
258static int floppy_revalidate(struct gendisk *disk); 258static int floppy_revalidate(struct gendisk *disk);
259 259
260static bool swim3_end_request(struct floppy_state *fs, int err, unsigned int nr_bytes) 260static bool swim3_end_request(struct floppy_state *fs, blk_status_t err, unsigned int nr_bytes)
261{ 261{
262 struct request *req = fs->cur_req; 262 struct request *req = fs->cur_req;
263 int rc; 263 int rc;
@@ -334,7 +334,7 @@ static void start_request(struct floppy_state *fs)
334 if (fs->mdev->media_bay && 334 if (fs->mdev->media_bay &&
335 check_media_bay(fs->mdev->media_bay) != MB_FD) { 335 check_media_bay(fs->mdev->media_bay) != MB_FD) {
336 swim3_dbg("%s", " media bay absent, dropping req\n"); 336 swim3_dbg("%s", " media bay absent, dropping req\n");
337 swim3_end_request(fs, -ENODEV, 0); 337 swim3_end_request(fs, BLK_STS_IOERR, 0);
338 continue; 338 continue;
339 } 339 }
340 340
@@ -350,12 +350,12 @@ static void start_request(struct floppy_state *fs)
350 if (blk_rq_pos(req) >= fs->total_secs) { 350 if (blk_rq_pos(req) >= fs->total_secs) {
351 swim3_dbg(" pos out of bounds (%ld, max is %ld)\n", 351 swim3_dbg(" pos out of bounds (%ld, max is %ld)\n",
352 (long)blk_rq_pos(req), (long)fs->total_secs); 352 (long)blk_rq_pos(req), (long)fs->total_secs);
353 swim3_end_request(fs, -EIO, 0); 353 swim3_end_request(fs, BLK_STS_IOERR, 0);
354 continue; 354 continue;
355 } 355 }
356 if (fs->ejected) { 356 if (fs->ejected) {
357 swim3_dbg("%s", " disk ejected\n"); 357 swim3_dbg("%s", " disk ejected\n");
358 swim3_end_request(fs, -EIO, 0); 358 swim3_end_request(fs, BLK_STS_IOERR, 0);
359 continue; 359 continue;
360 } 360 }
361 361
@@ -364,7 +364,7 @@ static void start_request(struct floppy_state *fs)
364 fs->write_prot = swim3_readbit(fs, WRITE_PROT); 364 fs->write_prot = swim3_readbit(fs, WRITE_PROT);
365 if (fs->write_prot) { 365 if (fs->write_prot) {
366 swim3_dbg("%s", " try to write, disk write protected\n"); 366 swim3_dbg("%s", " try to write, disk write protected\n");
367 swim3_end_request(fs, -EIO, 0); 367 swim3_end_request(fs, BLK_STS_IOERR, 0);
368 continue; 368 continue;
369 } 369 }
370 } 370 }
@@ -548,7 +548,7 @@ static void act(struct floppy_state *fs)
548 if (fs->retries > 5) { 548 if (fs->retries > 5) {
549 swim3_err("Wrong cylinder in transfer, want: %d got %d\n", 549 swim3_err("Wrong cylinder in transfer, want: %d got %d\n",
550 fs->req_cyl, fs->cur_cyl); 550 fs->req_cyl, fs->cur_cyl);
551 swim3_end_request(fs, -EIO, 0); 551 swim3_end_request(fs, BLK_STS_IOERR, 0);
552 fs->state = idle; 552 fs->state = idle;
553 return; 553 return;
554 } 554 }
@@ -584,7 +584,7 @@ static void scan_timeout(unsigned long data)
584 out_8(&sw->intr_enable, 0); 584 out_8(&sw->intr_enable, 0);
585 fs->cur_cyl = -1; 585 fs->cur_cyl = -1;
586 if (fs->retries > 5) { 586 if (fs->retries > 5) {
587 swim3_end_request(fs, -EIO, 0); 587 swim3_end_request(fs, BLK_STS_IOERR, 0);
588 fs->state = idle; 588 fs->state = idle;
589 start_request(fs); 589 start_request(fs);
590 } else { 590 } else {
@@ -608,7 +608,7 @@ static void seek_timeout(unsigned long data)
608 out_8(&sw->select, RELAX); 608 out_8(&sw->select, RELAX);
609 out_8(&sw->intr_enable, 0); 609 out_8(&sw->intr_enable, 0);
610 swim3_err("%s", "Seek timeout\n"); 610 swim3_err("%s", "Seek timeout\n");
611 swim3_end_request(fs, -EIO, 0); 611 swim3_end_request(fs, BLK_STS_IOERR, 0);
612 fs->state = idle; 612 fs->state = idle;
613 start_request(fs); 613 start_request(fs);
614 spin_unlock_irqrestore(&swim3_lock, flags); 614 spin_unlock_irqrestore(&swim3_lock, flags);
@@ -637,7 +637,7 @@ static void settle_timeout(unsigned long data)
637 goto unlock; 637 goto unlock;
638 } 638 }
639 swim3_err("%s", "Seek settle timeout\n"); 639 swim3_err("%s", "Seek settle timeout\n");
640 swim3_end_request(fs, -EIO, 0); 640 swim3_end_request(fs, BLK_STS_IOERR, 0);
641 fs->state = idle; 641 fs->state = idle;
642 start_request(fs); 642 start_request(fs);
643 unlock: 643 unlock:
@@ -666,7 +666,7 @@ static void xfer_timeout(unsigned long data)
666 swim3_err("Timeout %sing sector %ld\n", 666 swim3_err("Timeout %sing sector %ld\n",
667 (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"), 667 (rq_data_dir(fs->cur_req)==WRITE? "writ": "read"),
668 (long)blk_rq_pos(fs->cur_req)); 668 (long)blk_rq_pos(fs->cur_req));
669 swim3_end_request(fs, -EIO, 0); 669 swim3_end_request(fs, BLK_STS_IOERR, 0);
670 fs->state = idle; 670 fs->state = idle;
671 start_request(fs); 671 start_request(fs);
672 spin_unlock_irqrestore(&swim3_lock, flags); 672 spin_unlock_irqrestore(&swim3_lock, flags);
@@ -703,7 +703,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
703 swim3_err("%s", "Seen sector but cyl=ff?\n"); 703 swim3_err("%s", "Seen sector but cyl=ff?\n");
704 fs->cur_cyl = -1; 704 fs->cur_cyl = -1;
705 if (fs->retries > 5) { 705 if (fs->retries > 5) {
706 swim3_end_request(fs, -EIO, 0); 706 swim3_end_request(fs, BLK_STS_IOERR, 0);
707 fs->state = idle; 707 fs->state = idle;
708 start_request(fs); 708 start_request(fs);
709 } else { 709 } else {
@@ -786,7 +786,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
786 swim3_err("Error %sing block %ld (err=%x)\n", 786 swim3_err("Error %sing block %ld (err=%x)\n",
787 rq_data_dir(req) == WRITE? "writ": "read", 787 rq_data_dir(req) == WRITE? "writ": "read",
788 (long)blk_rq_pos(req), err); 788 (long)blk_rq_pos(req), err);
789 swim3_end_request(fs, -EIO, 0); 789 swim3_end_request(fs, BLK_STS_IOERR, 0);
790 fs->state = idle; 790 fs->state = idle;
791 } 791 }
792 } else { 792 } else {
@@ -795,7 +795,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
795 swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid); 795 swim3_err("fd dma error: stat=%x resid=%d\n", stat, resid);
796 swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n", 796 swim3_err(" state=%d, dir=%x, intr=%x, err=%x\n",
797 fs->state, rq_data_dir(req), intr, err); 797 fs->state, rq_data_dir(req), intr, err);
798 swim3_end_request(fs, -EIO, 0); 798 swim3_end_request(fs, BLK_STS_IOERR, 0);
799 fs->state = idle; 799 fs->state = idle;
800 start_request(fs); 800 start_request(fs);
801 break; 801 break;
@@ -1223,6 +1223,7 @@ static int swim3_attach(struct macio_dev *mdev,
1223 put_disk(disk); 1223 put_disk(disk);
1224 return -ENOMEM; 1224 return -ENOMEM;
1225 } 1225 }
1226 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
1226 disk->queue->queuedata = &floppy_states[index]; 1227 disk->queue->queuedata = &floppy_states[index];
1227 1228
1228 if (index == 0) { 1229 if (index == 0) {
@@ -1245,7 +1246,7 @@ static int swim3_attach(struct macio_dev *mdev,
1245 return 0; 1246 return 0;
1246} 1247}
1247 1248
1248static struct of_device_id swim3_match[] = 1249static const struct of_device_id swim3_match[] =
1249{ 1250{
1250 { 1251 {
1251 .name = "swim3", 1252 .name = "swim3",
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index c8e072caf56f..08586dc14e85 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -745,7 +745,7 @@ static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
745 745
746static inline void carm_end_request_queued(struct carm_host *host, 746static inline void carm_end_request_queued(struct carm_host *host,
747 struct carm_request *crq, 747 struct carm_request *crq,
748 int error) 748 blk_status_t error)
749{ 749{
750 struct request *req = crq->rq; 750 struct request *req = crq->rq;
751 int rc; 751 int rc;
@@ -791,7 +791,7 @@ static inline void carm_round_robin(struct carm_host *host)
791} 791}
792 792
793static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq, 793static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
794 int error) 794 blk_status_t error)
795{ 795{
796 carm_end_request_queued(host, crq, error); 796 carm_end_request_queued(host, crq, error);
797 if (max_queue == 1) 797 if (max_queue == 1)
@@ -869,14 +869,14 @@ queue_one_request:
869 sg = &crq->sg[0]; 869 sg = &crq->sg[0];
870 n_elem = blk_rq_map_sg(q, rq, sg); 870 n_elem = blk_rq_map_sg(q, rq, sg);
871 if (n_elem <= 0) { 871 if (n_elem <= 0) {
872 carm_end_rq(host, crq, -EIO); 872 carm_end_rq(host, crq, BLK_STS_IOERR);
873 return; /* request with no s/g entries? */ 873 return; /* request with no s/g entries? */
874 } 874 }
875 875
876 /* map scatterlist to PCI bus addresses */ 876 /* map scatterlist to PCI bus addresses */
877 n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir); 877 n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
878 if (n_elem <= 0) { 878 if (n_elem <= 0) {
879 carm_end_rq(host, crq, -EIO); 879 carm_end_rq(host, crq, BLK_STS_IOERR);
880 return; /* request with no s/g entries? */ 880 return; /* request with no s/g entries? */
881 } 881 }
882 crq->n_elem = n_elem; 882 crq->n_elem = n_elem;
@@ -937,7 +937,7 @@ queue_one_request:
937 937
938static void carm_handle_array_info(struct carm_host *host, 938static void carm_handle_array_info(struct carm_host *host,
939 struct carm_request *crq, u8 *mem, 939 struct carm_request *crq, u8 *mem,
940 int error) 940 blk_status_t error)
941{ 941{
942 struct carm_port *port; 942 struct carm_port *port;
943 u8 *msg_data = mem + sizeof(struct carm_array_info); 943 u8 *msg_data = mem + sizeof(struct carm_array_info);
@@ -997,7 +997,7 @@ out:
997 997
998static void carm_handle_scan_chan(struct carm_host *host, 998static void carm_handle_scan_chan(struct carm_host *host,
999 struct carm_request *crq, u8 *mem, 999 struct carm_request *crq, u8 *mem,
1000 int error) 1000 blk_status_t error)
1001{ 1001{
1002 u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET; 1002 u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
1003 unsigned int i, dev_count = 0; 1003 unsigned int i, dev_count = 0;
@@ -1029,7 +1029,7 @@ out:
1029} 1029}
1030 1030
1031static void carm_handle_generic(struct carm_host *host, 1031static void carm_handle_generic(struct carm_host *host,
1032 struct carm_request *crq, int error, 1032 struct carm_request *crq, blk_status_t error,
1033 int cur_state, int next_state) 1033 int cur_state, int next_state)
1034{ 1034{
1035 DPRINTK("ENTER\n"); 1035 DPRINTK("ENTER\n");
@@ -1045,7 +1045,7 @@ static void carm_handle_generic(struct carm_host *host,
1045} 1045}
1046 1046
1047static inline void carm_handle_rw(struct carm_host *host, 1047static inline void carm_handle_rw(struct carm_host *host,
1048 struct carm_request *crq, int error) 1048 struct carm_request *crq, blk_status_t error)
1049{ 1049{
1050 int pci_dir; 1050 int pci_dir;
1051 1051
@@ -1067,7 +1067,7 @@ static inline void carm_handle_resp(struct carm_host *host,
1067 u32 handle = le32_to_cpu(ret_handle_le); 1067 u32 handle = le32_to_cpu(ret_handle_le);
1068 unsigned int msg_idx; 1068 unsigned int msg_idx;
1069 struct carm_request *crq; 1069 struct carm_request *crq;
1070 int error = (status == RMSG_OK) ? 0 : -EIO; 1070 blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
1071 u8 *mem; 1071 u8 *mem;
1072 1072
1073 VPRINTK("ENTER, handle == 0x%x\n", handle); 1073 VPRINTK("ENTER, handle == 0x%x\n", handle);
@@ -1155,7 +1155,7 @@ static inline void carm_handle_resp(struct carm_host *host,
1155err_out: 1155err_out:
1156 printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n", 1156 printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
1157 pci_name(host->pdev), crq->msg_type, crq->msg_subtype); 1157 pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
1158 carm_end_rq(host, crq, -EIO); 1158 carm_end_rq(host, crq, BLK_STS_IOERR);
1159} 1159}
1160 1160
1161static inline void carm_handle_responses(struct carm_host *host) 1161static inline void carm_handle_responses(struct carm_host *host)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index c141cc3be22b..0677d2514665 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -454,7 +454,7 @@ static void process_page(unsigned long data)
454 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); 454 PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
455 if (control & DMASCR_HARD_ERROR) { 455 if (control & DMASCR_HARD_ERROR) {
456 /* error */ 456 /* error */
457 bio->bi_error = -EIO; 457 bio->bi_status = BLK_STS_IOERR;
458 dev_printk(KERN_WARNING, &card->dev->dev, 458 dev_printk(KERN_WARNING, &card->dev->dev,
459 "I/O error on sector %d/%d\n", 459 "I/O error on sector %d/%d\n",
460 le32_to_cpu(desc->local_addr)>>9, 460 le32_to_cpu(desc->local_addr)>>9,
@@ -529,7 +529,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio)
529 (unsigned long long)bio->bi_iter.bi_sector, 529 (unsigned long long)bio->bi_iter.bi_sector,
530 bio->bi_iter.bi_size); 530 bio->bi_iter.bi_size);
531 531
532 blk_queue_split(q, &bio, q->bio_split); 532 blk_queue_split(q, &bio);
533 533
534 spin_lock_irq(&card->lock); 534 spin_lock_irq(&card->lock);
535 *card->biotail = bio; 535 *card->biotail = bio;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 553cc4c542b4..0297ad7c1452 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -64,15 +64,15 @@ struct virtblk_req {
64 struct scatterlist sg[]; 64 struct scatterlist sg[];
65}; 65};
66 66
67static inline int virtblk_result(struct virtblk_req *vbr) 67static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
68{ 68{
69 switch (vbr->status) { 69 switch (vbr->status) {
70 case VIRTIO_BLK_S_OK: 70 case VIRTIO_BLK_S_OK:
71 return 0; 71 return BLK_STS_OK;
72 case VIRTIO_BLK_S_UNSUPP: 72 case VIRTIO_BLK_S_UNSUPP:
73 return -ENOTTY; 73 return BLK_STS_NOTSUPP;
74 default: 74 default:
75 return -EIO; 75 return BLK_STS_IOERR;
76 } 76 }
77} 77}
78 78
@@ -214,7 +214,7 @@ static void virtblk_done(struct virtqueue *vq)
214 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 214 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
215} 215}
216 216
217static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 217static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
218 const struct blk_mq_queue_data *bd) 218 const struct blk_mq_queue_data *bd)
219{ 219{
220 struct virtio_blk *vblk = hctx->queue->queuedata; 220 struct virtio_blk *vblk = hctx->queue->queuedata;
@@ -246,7 +246,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
246 break; 246 break;
247 default: 247 default:
248 WARN_ON_ONCE(1); 248 WARN_ON_ONCE(1);
249 return BLK_MQ_RQ_QUEUE_ERROR; 249 return BLK_STS_IOERR;
250 } 250 }
251 251
252 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 252 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
@@ -276,8 +276,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
276 /* Out of mem doesn't actually happen, since we fall back 276 /* Out of mem doesn't actually happen, since we fall back
277 * to direct descriptors */ 277 * to direct descriptors */
278 if (err == -ENOMEM || err == -ENOSPC) 278 if (err == -ENOMEM || err == -ENOSPC)
279 return BLK_MQ_RQ_QUEUE_BUSY; 279 return BLK_STS_RESOURCE;
280 return BLK_MQ_RQ_QUEUE_ERROR; 280 return BLK_STS_IOERR;
281 } 281 }
282 282
283 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 283 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
@@ -286,7 +286,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
286 286
287 if (notify) 287 if (notify)
288 virtqueue_notify(vblk->vqs[qid].vq); 288 virtqueue_notify(vblk->vqs[qid].vq);
289 return BLK_MQ_RQ_QUEUE_OK; 289 return BLK_STS_OK;
290} 290}
291 291
292/* return id (s/n) string for *disk to *id_str 292/* return id (s/n) string for *disk to *id_str
@@ -307,7 +307,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
307 goto out; 307 goto out;
308 308
309 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 309 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
310 err = virtblk_result(blk_mq_rq_to_pdu(req)); 310 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
311out: 311out:
312 blk_put_request(req); 312 blk_put_request(req);
313 return err; 313 return err;
@@ -720,9 +720,6 @@ static int virtblk_probe(struct virtio_device *vdev)
720 /* We can handle whatever the host told us to handle. */ 720 /* We can handle whatever the host told us to handle. */
721 blk_queue_max_segments(q, vblk->sg_elems-2); 721 blk_queue_max_segments(q, vblk->sg_elems-2);
722 722
723 /* No need to bounce any requests */
724 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
725
726 /* No real sector limit. */ 723 /* No real sector limit. */
727 blk_queue_max_hw_sectors(q, -1U); 724 blk_queue_max_hw_sectors(q, -1U);
728 725
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 0e824091a12f..fe7cd58c43d0 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1066,20 +1066,17 @@ static void xen_blk_drain_io(struct xen_blkif_ring *ring)
1066 atomic_set(&blkif->drain, 0); 1066 atomic_set(&blkif->drain, 0);
1067} 1067}
1068 1068
1069/* 1069static void __end_block_io_op(struct pending_req *pending_req,
1070 * Completion callback on the bio's. Called as bh->b_end_io() 1070 blk_status_t error)
1071 */
1072
1073static void __end_block_io_op(struct pending_req *pending_req, int error)
1074{ 1071{
1075 /* An error fails the entire request. */ 1072 /* An error fails the entire request. */
1076 if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) && 1073 if (pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE &&
1077 (error == -EOPNOTSUPP)) { 1074 error == BLK_STS_NOTSUPP) {
1078 pr_debug("flush diskcache op failed, not supported\n"); 1075 pr_debug("flush diskcache op failed, not supported\n");
1079 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0); 1076 xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
1080 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 1077 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
1081 } else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && 1078 } else if (pending_req->operation == BLKIF_OP_WRITE_BARRIER &&
1082 (error == -EOPNOTSUPP)) { 1079 error == BLK_STS_NOTSUPP) {
1083 pr_debug("write barrier op failed, not supported\n"); 1080 pr_debug("write barrier op failed, not supported\n");
1084 xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0); 1081 xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
1085 pending_req->status = BLKIF_RSP_EOPNOTSUPP; 1082 pending_req->status = BLKIF_RSP_EOPNOTSUPP;
@@ -1103,7 +1100,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
1103 */ 1100 */
1104static void end_block_io_op(struct bio *bio) 1101static void end_block_io_op(struct bio *bio)
1105{ 1102{
1106 __end_block_io_op(bio->bi_private, bio->bi_error); 1103 __end_block_io_op(bio->bi_private, bio->bi_status);
1107 bio_put(bio); 1104 bio_put(bio);
1108} 1105}
1109 1106
@@ -1420,7 +1417,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
1420 for (i = 0; i < nbio; i++) 1417 for (i = 0; i < nbio; i++)
1421 bio_put(biolist[i]); 1418 bio_put(biolist[i]);
1422 atomic_set(&pending_req->pendcnt, 1); 1419 atomic_set(&pending_req->pendcnt, 1);
1423 __end_block_io_op(pending_req, -EINVAL); 1420 __end_block_io_op(pending_req, BLK_STS_RESOURCE);
1424 msleep(1); /* back off a bit */ 1421 msleep(1); /* back off a bit */
1425 return -EIO; 1422 return -EIO;
1426} 1423}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 39459631667c..c852ed3c01d5 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -110,11 +110,6 @@ struct blk_shadow {
110 unsigned long associated_id; 110 unsigned long associated_id;
111}; 111};
112 112
113struct split_bio {
114 struct bio *bio;
115 atomic_t pending;
116};
117
118struct blkif_req { 113struct blkif_req {
119 int error; 114 int error;
120}; 115};
@@ -881,7 +876,7 @@ static inline bool blkif_request_flush_invalid(struct request *req,
881 !info->feature_fua)); 876 !info->feature_fua));
882} 877}
883 878
884static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx, 879static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
885 const struct blk_mq_queue_data *qd) 880 const struct blk_mq_queue_data *qd)
886{ 881{
887 unsigned long flags; 882 unsigned long flags;
@@ -904,16 +899,16 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
904 899
905 flush_requests(rinfo); 900 flush_requests(rinfo);
906 spin_unlock_irqrestore(&rinfo->ring_lock, flags); 901 spin_unlock_irqrestore(&rinfo->ring_lock, flags);
907 return BLK_MQ_RQ_QUEUE_OK; 902 return BLK_STS_OK;
908 903
909out_err: 904out_err:
910 spin_unlock_irqrestore(&rinfo->ring_lock, flags); 905 spin_unlock_irqrestore(&rinfo->ring_lock, flags);
911 return BLK_MQ_RQ_QUEUE_ERROR; 906 return BLK_STS_IOERR;
912 907
913out_busy: 908out_busy:
914 spin_unlock_irqrestore(&rinfo->ring_lock, flags); 909 spin_unlock_irqrestore(&rinfo->ring_lock, flags);
915 blk_mq_stop_hw_queue(hctx); 910 blk_mq_stop_hw_queue(hctx);
916 return BLK_MQ_RQ_QUEUE_BUSY; 911 return BLK_STS_RESOURCE;
917} 912}
918 913
919static void blkif_complete_rq(struct request *rq) 914static void blkif_complete_rq(struct request *rq)
@@ -958,9 +953,6 @@ static void blkif_set_queue_limits(struct blkfront_info *info)
958 953
959 /* Make sure buffer addresses are sector-aligned. */ 954 /* Make sure buffer addresses are sector-aligned. */
960 blk_queue_dma_alignment(rq, 511); 955 blk_queue_dma_alignment(rq, 511);
961
962 /* Make sure we don't use bounce buffers. */
963 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
964} 956}
965 957
966static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, 958static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -1601,14 +1593,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1601 continue; 1593 continue;
1602 } 1594 }
1603 1595
1604 blkif_req(req)->error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; 1596 if (bret->status == BLKIF_RSP_OKAY)
1597 blkif_req(req)->error = BLK_STS_OK;
1598 else
1599 blkif_req(req)->error = BLK_STS_IOERR;
1600
1605 switch (bret->operation) { 1601 switch (bret->operation) {
1606 case BLKIF_OP_DISCARD: 1602 case BLKIF_OP_DISCARD:
1607 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 1603 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
1608 struct request_queue *rq = info->rq; 1604 struct request_queue *rq = info->rq;
1609 printk(KERN_WARNING "blkfront: %s: %s op failed\n", 1605 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
1610 info->gd->disk_name, op_name(bret->operation)); 1606 info->gd->disk_name, op_name(bret->operation));
1611 blkif_req(req)->error = -EOPNOTSUPP; 1607 blkif_req(req)->error = BLK_STS_NOTSUPP;
1612 info->feature_discard = 0; 1608 info->feature_discard = 0;
1613 info->feature_secdiscard = 0; 1609 info->feature_secdiscard = 0;
1614 queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 1610 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
@@ -1626,11 +1622,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1626 rinfo->shadow[id].req.u.rw.nr_segments == 0)) { 1622 rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
1627 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", 1623 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
1628 info->gd->disk_name, op_name(bret->operation)); 1624 info->gd->disk_name, op_name(bret->operation));
1629 blkif_req(req)->error = -EOPNOTSUPP; 1625 blkif_req(req)->error = BLK_STS_NOTSUPP;
1630 } 1626 }
1631 if (unlikely(blkif_req(req)->error)) { 1627 if (unlikely(blkif_req(req)->error)) {
1632 if (blkif_req(req)->error == -EOPNOTSUPP) 1628 if (blkif_req(req)->error == BLK_STS_NOTSUPP)
1633 blkif_req(req)->error = 0; 1629 blkif_req(req)->error = BLK_STS_OK;
1634 info->feature_fua = 0; 1630 info->feature_fua = 0;
1635 info->feature_flush = 0; 1631 info->feature_flush = 0;
1636 xlvbd_flush(info); 1632 xlvbd_flush(info);
@@ -1996,28 +1992,13 @@ static int blkfront_probe(struct xenbus_device *dev,
1996 return 0; 1992 return 0;
1997} 1993}
1998 1994
1999static void split_bio_end(struct bio *bio)
2000{
2001 struct split_bio *split_bio = bio->bi_private;
2002
2003 if (atomic_dec_and_test(&split_bio->pending)) {
2004 split_bio->bio->bi_phys_segments = 0;
2005 split_bio->bio->bi_error = bio->bi_error;
2006 bio_endio(split_bio->bio);
2007 kfree(split_bio);
2008 }
2009 bio_put(bio);
2010}
2011
2012static int blkif_recover(struct blkfront_info *info) 1995static int blkif_recover(struct blkfront_info *info)
2013{ 1996{
2014 unsigned int i, r_index; 1997 unsigned int r_index;
2015 struct request *req, *n; 1998 struct request *req, *n;
2016 int rc; 1999 int rc;
2017 struct bio *bio, *cloned_bio; 2000 struct bio *bio;
2018 unsigned int segs, offset; 2001 unsigned int segs;
2019 int pending, size;
2020 struct split_bio *split_bio;
2021 2002
2022 blkfront_gather_backend_features(info); 2003 blkfront_gather_backend_features(info);
2023 /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ 2004 /* Reset limits changed by blk_mq_update_nr_hw_queues(). */
@@ -2056,34 +2037,6 @@ static int blkif_recover(struct blkfront_info *info)
2056 2037
2057 while ((bio = bio_list_pop(&info->bio_list)) != NULL) { 2038 while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
2058 /* Traverse the list of pending bios and re-queue them */ 2039 /* Traverse the list of pending bios and re-queue them */
2059 if (bio_segments(bio) > segs) {
2060 /*
2061 * This bio has more segments than what we can
2062 * handle, we have to split it.
2063 */
2064 pending = (bio_segments(bio) + segs - 1) / segs;
2065 split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
2066 BUG_ON(split_bio == NULL);
2067 atomic_set(&split_bio->pending, pending);
2068 split_bio->bio = bio;
2069 for (i = 0; i < pending; i++) {
2070 offset = (i * segs * XEN_PAGE_SIZE) >> 9;
2071 size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
2072 (unsigned int)bio_sectors(bio) - offset);
2073 cloned_bio = bio_clone(bio, GFP_NOIO);
2074 BUG_ON(cloned_bio == NULL);
2075 bio_trim(cloned_bio, offset, size);
2076 cloned_bio->bi_private = split_bio;
2077 cloned_bio->bi_end_io = split_bio_end;
2078 submit_bio(cloned_bio);
2079 }
2080 /*
2081 * Now we have to wait for all those smaller bios to
2082 * end, so we can also end the "parent" bio.
2083 */
2084 continue;
2085 }
2086 /* We don't need to split this bio */
2087 submit_bio(bio); 2040 submit_bio(bio);
2088 } 2041 }
2089 2042
@@ -2137,7 +2090,7 @@ static int blkfront_resume(struct xenbus_device *dev)
2137 merge_bio.tail = shadow[j].request->biotail; 2090 merge_bio.tail = shadow[j].request->biotail;
2138 bio_list_merge(&info->bio_list, &merge_bio); 2091 bio_list_merge(&info->bio_list, &merge_bio);
2139 shadow[j].request->bio = NULL; 2092 shadow[j].request->bio = NULL;
2140 blk_mq_end_request(shadow[j].request, 0); 2093 blk_mq_end_request(shadow[j].request, BLK_STS_OK);
2141 } 2094 }
2142 } 2095 }
2143 2096
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 757dce2147e0..14459d66ef0c 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -471,7 +471,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
471 if (!blk_rq_is_passthrough(req)) 471 if (!blk_rq_is_passthrough(req))
472 break; 472 break;
473 blk_start_request(req); 473 blk_start_request(req);
474 __blk_end_request_all(req, -EIO); 474 __blk_end_request_all(req, BLK_STS_IOERR);
475 } 475 }
476 return req; 476 return req;
477} 477}
@@ -499,11 +499,11 @@ static void ace_fsm_dostate(struct ace_device *ace)
499 499
500 /* Drop all in-flight and pending requests */ 500 /* Drop all in-flight and pending requests */
501 if (ace->req) { 501 if (ace->req) {
502 __blk_end_request_all(ace->req, -EIO); 502 __blk_end_request_all(ace->req, BLK_STS_IOERR);
503 ace->req = NULL; 503 ace->req = NULL;
504 } 504 }
505 while ((req = blk_fetch_request(ace->queue)) != NULL) 505 while ((req = blk_fetch_request(ace->queue)) != NULL)
506 __blk_end_request_all(req, -EIO); 506 __blk_end_request_all(req, BLK_STS_IOERR);
507 507
508 /* Drop back to IDLE state and notify waiters */ 508 /* Drop back to IDLE state and notify waiters */
509 ace->fsm_state = ACE_FSM_STATE_IDLE; 509 ace->fsm_state = ACE_FSM_STATE_IDLE;
@@ -728,7 +728,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
728 } 728 }
729 729
730 /* bio finished; is there another one? */ 730 /* bio finished; is there another one? */
731 if (__blk_end_request_cur(ace->req, 0)) { 731 if (__blk_end_request_cur(ace->req, BLK_STS_OK)) {
732 /* dev_dbg(ace->dev, "next block; h=%u c=%u\n", 732 /* dev_dbg(ace->dev, "next block; h=%u c=%u\n",
733 * blk_rq_sectors(ace->req), 733 * blk_rq_sectors(ace->req),
734 * blk_rq_cur_sectors(ace->req)); 734 * blk_rq_cur_sectors(ace->req));
@@ -993,6 +993,7 @@ static int ace_setup(struct ace_device *ace)
993 if (ace->queue == NULL) 993 if (ace->queue == NULL)
994 goto err_blk_initq; 994 goto err_blk_initq;
995 blk_queue_logical_block_size(ace->queue, 512); 995 blk_queue_logical_block_size(ace->queue, 512);
996 blk_queue_bounce_limit(ace->queue, BLK_BOUNCE_HIGH);
996 997
997 /* 998 /*
998 * Allocate and initialize GD structure 999 * Allocate and initialize GD structure
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 968f9e52effa..41c95c9b2ab4 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -74,14 +74,14 @@ static void do_z2_request(struct request_queue *q)
74 while (req) { 74 while (req) {
75 unsigned long start = blk_rq_pos(req) << 9; 75 unsigned long start = blk_rq_pos(req) << 9;
76 unsigned long len = blk_rq_cur_bytes(req); 76 unsigned long len = blk_rq_cur_bytes(req);
77 int err = 0; 77 blk_status_t err = BLK_STS_OK;
78 78
79 if (start + len > z2ram_size) { 79 if (start + len > z2ram_size) {
80 pr_err(DEVICE_NAME ": bad access: block=%llu, " 80 pr_err(DEVICE_NAME ": bad access: block=%llu, "
81 "count=%u\n", 81 "count=%u\n",
82 (unsigned long long)blk_rq_pos(req), 82 (unsigned long long)blk_rq_pos(req),
83 blk_rq_cur_sectors(req)); 83 blk_rq_cur_sectors(req));
84 err = -EIO; 84 err = BLK_STS_IOERR;
85 goto done; 85 goto done;
86 } 86 }
87 while (len) { 87 while (len) {
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 76c952fd9ab9..e36d160c458f 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2178,6 +2178,12 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2178 if (!q) 2178 if (!q)
2179 return -ENXIO; 2179 return -ENXIO;
2180 2180
2181 if (!blk_queue_scsi_passthrough(q)) {
2182 WARN_ONCE(true,
2183 "Attempt read CDDA info through a non-SCSI queue\n");
2184 return -EINVAL;
2185 }
2186
2181 cdi->last_sense = 0; 2187 cdi->last_sense = 0;
2182 2188
2183 while (nframes) { 2189 while (nframes) {
@@ -2195,7 +2201,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2195 break; 2201 break;
2196 } 2202 }
2197 req = scsi_req(rq); 2203 req = scsi_req(rq);
2198 scsi_req_init(rq);
2199 2204
2200 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); 2205 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
2201 if (ret) { 2206 if (ret) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1372763a948f..6495b03f576c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -583,7 +583,8 @@ static int gdrom_set_interrupt_handlers(void)
583 */ 583 */
584static void gdrom_readdisk_dma(struct work_struct *work) 584static void gdrom_readdisk_dma(struct work_struct *work)
585{ 585{
586 int err, block, block_cnt; 586 int block, block_cnt;
587 blk_status_t err;
587 struct packet_command *read_command; 588 struct packet_command *read_command;
588 struct list_head *elem, *next; 589 struct list_head *elem, *next;
589 struct request *req; 590 struct request *req;
@@ -641,7 +642,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
641 __raw_writeb(1, GDROM_DMA_STATUS_REG); 642 __raw_writeb(1, GDROM_DMA_STATUS_REG);
642 wait_event_interruptible_timeout(request_queue, 643 wait_event_interruptible_timeout(request_queue,
643 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); 644 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
644 err = gd.transfer ? -EIO : 0; 645 err = gd.transfer ? BLK_STS_IOERR : BLK_STS_OK;
645 gd.transfer = 0; 646 gd.transfer = 0;
646 gd.pending = 0; 647 gd.pending = 0;
647 /* now seek to take the request spinlock 648 /* now seek to take the request spinlock
@@ -670,11 +671,11 @@ static void gdrom_request(struct request_queue *rq)
670 break; 671 break;
671 case REQ_OP_WRITE: 672 case REQ_OP_WRITE:
672 pr_notice("Read only device - write request ignored\n"); 673 pr_notice("Read only device - write request ignored\n");
673 __blk_end_request_all(req, -EIO); 674 __blk_end_request_all(req, BLK_STS_IOERR);
674 break; 675 break;
675 default: 676 default:
676 printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); 677 printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
677 __blk_end_request_all(req, -EIO); 678 __blk_end_request_all(req, BLK_STS_IOERR);
678 break; 679 break;
679 } 680 }
680 } 681 }
@@ -812,6 +813,7 @@ static int probe_gdrom(struct platform_device *devptr)
812 err = -ENOMEM; 813 err = -ENOMEM;
813 goto probe_fail_requestq; 814 goto probe_fail_requestq;
814 } 815 }
816 blk_queue_bounce_limit(gd.gdrom_rq, BLK_BOUNCE_HIGH);
815 817
816 err = probe_gdrom_setupqueue(); 818 err = probe_gdrom_setupqueue();
817 if (err) 819 if (err)
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 5901937284e7..14d1e7d9a1d6 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -93,7 +93,6 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
93 int error; 93 int error;
94 94
95 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 95 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
96 scsi_req_init(rq);
97 ide_req(rq)->type = ATA_PRIV_MISC; 96 ide_req(rq)->type = ATA_PRIV_MISC;
98 rq->special = (char *)pc; 97 rq->special = (char *)pc;
99 98
@@ -200,7 +199,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
200 memset(sense, 0, sizeof(*sense)); 199 memset(sense, 0, sizeof(*sense));
201 200
202 blk_rq_init(rq->q, sense_rq); 201 blk_rq_init(rq->q, sense_rq);
203 scsi_req_init(sense_rq); 202 scsi_req_init(req);
204 203
205 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, 204 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
206 GFP_NOIO); 205 GFP_NOIO);
@@ -273,7 +272,7 @@ void ide_retry_pc(ide_drive_t *drive)
273 ide_requeue_and_plug(drive, failed_rq); 272 ide_requeue_and_plug(drive, failed_rq);
274 if (ide_queue_sense_rq(drive, pc)) { 273 if (ide_queue_sense_rq(drive, pc)) {
275 blk_start_request(failed_rq); 274 blk_start_request(failed_rq);
276 ide_complete_rq(drive, -EIO, blk_rq_bytes(failed_rq)); 275 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
277 } 276 }
278} 277}
279EXPORT_SYMBOL_GPL(ide_retry_pc); 278EXPORT_SYMBOL_GPL(ide_retry_pc);
@@ -437,7 +436,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
437 436
438 /* No more interrupts */ 437 /* No more interrupts */
439 if ((stat & ATA_DRQ) == 0) { 438 if ((stat & ATA_DRQ) == 0) {
440 int uptodate, error; 439 int uptodate;
440 blk_status_t error;
441 441
442 debug_log("Packet command completed, %d bytes transferred\n", 442 debug_log("Packet command completed, %d bytes transferred\n",
443 blk_rq_bytes(rq)); 443 blk_rq_bytes(rq));
@@ -490,7 +490,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
490 490
491 if (ata_misc_request(rq)) { 491 if (ata_misc_request(rq)) {
492 scsi_req(rq)->result = 0; 492 scsi_req(rq)->result = 0;
493 error = 0; 493 error = BLK_STS_OK;
494 } else { 494 } else {
495 495
496 if (blk_rq_is_passthrough(rq) && uptodate <= 0) { 496 if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
@@ -498,7 +498,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
498 scsi_req(rq)->result = -EIO; 498 scsi_req(rq)->result = -EIO;
499 } 499 }
500 500
501 error = uptodate ? 0 : -EIO; 501 error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
502 } 502 }
503 503
504 ide_complete_rq(drive, error, blk_rq_bytes(rq)); 504 ide_complete_rq(drive, error, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 07e5ff3a64c3..81e18f9628d0 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -228,7 +228,7 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
228 scsi_req(failed)->sense_len = scsi_req(rq)->sense_len; 228 scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
229 cdrom_analyze_sense_data(drive, failed); 229 cdrom_analyze_sense_data(drive, failed);
230 230
231 if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed))) 231 if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
232 BUG(); 232 BUG();
233 } else 233 } else
234 cdrom_analyze_sense_data(drive, NULL); 234 cdrom_analyze_sense_data(drive, NULL);
@@ -438,7 +438,6 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
438 438
439 rq = blk_get_request(drive->queue, 439 rq = blk_get_request(drive->queue,
440 write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); 440 write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
441 scsi_req_init(rq);
442 memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB); 441 memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
443 ide_req(rq)->type = ATA_PRIV_PC; 442 ide_req(rq)->type = ATA_PRIV_PC;
444 rq->rq_flags |= rq_flags; 443 rq->rq_flags |= rq_flags;
@@ -508,7 +507,7 @@ static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
508 nr_bytes -= cmd->last_xfer_len; 507 nr_bytes -= cmd->last_xfer_len;
509 508
510 if (nr_bytes > 0) { 509 if (nr_bytes > 0) {
511 ide_complete_rq(drive, 0, nr_bytes); 510 ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
512 return true; 511 return true;
513 } 512 }
514 513
@@ -674,7 +673,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
674out_end: 673out_end:
675 if (blk_rq_is_scsi(rq) && rc == 0) { 674 if (blk_rq_is_scsi(rq) && rc == 0) {
676 scsi_req(rq)->resid_len = 0; 675 scsi_req(rq)->resid_len = 0;
677 blk_end_request_all(rq, 0); 676 blk_end_request_all(rq, BLK_STS_OK);
678 hwif->rq = NULL; 677 hwif->rq = NULL;
679 } else { 678 } else {
680 if (sense && uptodate) 679 if (sense && uptodate)
@@ -699,7 +698,7 @@ out_end:
699 scsi_req(rq)->resid_len += cmd->last_xfer_len; 698 scsi_req(rq)->resid_len += cmd->last_xfer_len;
700 } 699 }
701 700
702 ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq)); 701 ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
703 702
704 if (sense && rc == 2) 703 if (sense && rc == 2)
705 ide_error(drive, "request sense failure", stat); 704 ide_error(drive, "request sense failure", stat);
@@ -844,7 +843,7 @@ out_end:
844 if (nsectors == 0) 843 if (nsectors == 0)
845 nsectors = 1; 844 nsectors = 1;
846 845
847 ide_complete_rq(drive, uptodate ? 0 : -EIO, nsectors << 9); 846 ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
848 847
849 return ide_stopped; 848 return ide_stopped;
850} 849}
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 55cd736c39c6..9d26c9737e21 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,6 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
304 int ret; 304 int ret;
305 305
306 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 306 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
307 scsi_req_init(rq);
308 ide_req(rq)->type = ATA_PRIV_MISC; 307 ide_req(rq)->type = ATA_PRIV_MISC;
309 rq->rq_flags = RQF_QUIET; 308 rq->rq_flags = RQF_QUIET;
310 blk_execute_rq(drive->queue, cd->disk, rq, 0); 309 blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 9b69c32ee560..ef7c8c43a380 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,6 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
166 return setting->set(drive, arg); 166 return setting->set(drive, arg);
167 167
168 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); 168 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
169 scsi_req_init(rq);
170 ide_req(rq)->type = ATA_PRIV_MISC; 169 ide_req(rq)->type = ATA_PRIV_MISC;
171 scsi_req(rq)->cmd_len = 5; 170 scsi_req(rq)->cmd_len = 5;
172 scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC; 171 scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7c06237f3479..241983da5fc4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,6 @@ static int set_multcount(ide_drive_t *drive, int arg)
478 return -EBUSY; 478 return -EBUSY;
479 479
480 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 480 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
481 scsi_req_init(rq);
482 ide_req(rq)->type = ATA_PRIV_TASKFILE; 481 ide_req(rq)->type = ATA_PRIV_TASKFILE;
483 482
484 drive->mult_req = arg; 483 drive->mult_req = arg;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 51c81223e56d..54d4d78ca46a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -104,7 +104,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *drive)
104 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0) 104 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
105 ide_finish_cmd(drive, cmd, stat); 105 ide_finish_cmd(drive, cmd, stat);
106 else 106 else
107 ide_complete_rq(drive, 0, 107 ide_complete_rq(drive, BLK_STS_OK,
108 blk_rq_sectors(cmd->rq) << 9); 108 blk_rq_sectors(cmd->rq) << 9);
109 return ide_stopped; 109 return ide_stopped;
110 } 110 }
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 4b7ffd7d158d..47d5f3379748 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -135,7 +135,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
135 return ide_stopped; 135 return ide_stopped;
136 } 136 }
137 scsi_req(rq)->result = err; 137 scsi_req(rq)->result = err;
138 ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); 138 ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
139 return ide_stopped; 139 return ide_stopped;
140 } 140 }
141 141
@@ -143,7 +143,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
143} 143}
144EXPORT_SYMBOL_GPL(ide_error); 144EXPORT_SYMBOL_GPL(ide_error);
145 145
146static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) 146static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
147{ 147{
148 struct request *rq = drive->hwif->rq; 148 struct request *rq = drive->hwif->rq;
149 149
@@ -151,7 +151,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
151 scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) { 151 scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
152 if (err <= 0 && scsi_req(rq)->result == 0) 152 if (err <= 0 && scsi_req(rq)->result == 0)
153 scsi_req(rq)->result = -EIO; 153 scsi_req(rq)->result = -EIO;
154 ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); 154 ide_complete_rq(drive, err, blk_rq_bytes(rq));
155 } 155 }
156} 156}
157 157
@@ -191,7 +191,7 @@ static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
191 } 191 }
192 /* done polling */ 192 /* done polling */
193 hwif->polling = 0; 193 hwif->polling = 0;
194 ide_complete_drive_reset(drive, 0); 194 ide_complete_drive_reset(drive, BLK_STS_OK);
195 return ide_stopped; 195 return ide_stopped;
196} 196}
197 197
@@ -225,7 +225,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
225 ide_hwif_t *hwif = drive->hwif; 225 ide_hwif_t *hwif = drive->hwif;
226 const struct ide_port_ops *port_ops = hwif->port_ops; 226 const struct ide_port_ops *port_ops = hwif->port_ops;
227 u8 tmp; 227 u8 tmp;
228 int err = 0; 228 blk_status_t err = BLK_STS_OK;
229 229
230 if (port_ops && port_ops->reset_poll) { 230 if (port_ops && port_ops->reset_poll) {
231 err = port_ops->reset_poll(drive); 231 err = port_ops->reset_poll(drive);
@@ -247,7 +247,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
247 printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n", 247 printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
248 hwif->name, tmp); 248 hwif->name, tmp);
249 drive->failures++; 249 drive->failures++;
250 err = -EIO; 250 err = BLK_STS_IOERR;
251 } else { 251 } else {
252 tmp = ide_read_error(drive); 252 tmp = ide_read_error(drive);
253 253
@@ -257,7 +257,7 @@ static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
257 } else { 257 } else {
258 ide_reset_report_error(hwif, tmp); 258 ide_reset_report_error(hwif, tmp);
259 drive->failures++; 259 drive->failures++;
260 err = -EIO; 260 err = BLK_STS_IOERR;
261 } 261 }
262 } 262 }
263out: 263out:
@@ -392,7 +392,7 @@ static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
392 392
393 if (io_ports->ctl_addr == 0) { 393 if (io_ports->ctl_addr == 0) {
394 spin_unlock_irqrestore(&hwif->lock, flags); 394 spin_unlock_irqrestore(&hwif->lock, flags);
395 ide_complete_drive_reset(drive, -ENXIO); 395 ide_complete_drive_reset(drive, BLK_STS_IOERR);
396 return ide_stopped; 396 return ide_stopped;
397 } 397 }
398 398
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8ac6048cd2df..627b1f62a749 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -143,7 +143,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
143 143
144 drive->failed_pc = NULL; 144 drive->failed_pc = NULL;
145 drive->pc_callback(drive, 0); 145 drive->pc_callback(drive, 0);
146 ide_complete_rq(drive, -EIO, done); 146 ide_complete_rq(drive, BLK_STS_IOERR, done);
147 return ide_stopped; 147 return ide_stopped;
148 } 148 }
149 149
@@ -248,7 +248,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
248 248
249 if (ata_misc_request(rq)) { 249 if (ata_misc_request(rq)) {
250 scsi_req(rq)->result = 0; 250 scsi_req(rq)->result = 0;
251 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 251 ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
252 return ide_stopped; 252 return ide_stopped;
253 } else 253 } else
254 goto out_end; 254 goto out_end;
@@ -303,7 +303,7 @@ out_end:
303 drive->failed_pc = NULL; 303 drive->failed_pc = NULL;
304 if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0) 304 if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
305 scsi_req(rq)->result = -EIO; 305 scsi_req(rq)->result = -EIO;
306 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 306 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
307 return ide_stopped; 307 return ide_stopped;
308} 308}
309 309
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 323af721f8cb..3a234701d92c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,7 +54,7 @@
54#include <linux/uaccess.h> 54#include <linux/uaccess.h>
55#include <asm/io.h> 55#include <asm/io.h>
56 56
57int ide_end_rq(ide_drive_t *drive, struct request *rq, int error, 57int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
58 unsigned int nr_bytes) 58 unsigned int nr_bytes)
59{ 59{
60 /* 60 /*
@@ -112,7 +112,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
112 } 112 }
113} 113}
114 114
115int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes) 115int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
116{ 116{
117 ide_hwif_t *hwif = drive->hwif; 117 ide_hwif_t *hwif = drive->hwif;
118 struct request *rq = hwif->rq; 118 struct request *rq = hwif->rq;
@@ -122,7 +122,7 @@ int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
122 * if failfast is set on a request, override number of sectors 122 * if failfast is set on a request, override number of sectors
123 * and complete the whole request right now 123 * and complete the whole request right now
124 */ 124 */
125 if (blk_noretry_request(rq) && error <= 0) 125 if (blk_noretry_request(rq) && error)
126 nr_bytes = blk_rq_sectors(rq) << 9; 126 nr_bytes = blk_rq_sectors(rq) << 9;
127 127
128 rc = ide_end_rq(drive, rq, error, nr_bytes); 128 rc = ide_end_rq(drive, rq, error, nr_bytes);
@@ -149,7 +149,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
149 scsi_req(rq)->result = -EIO; 149 scsi_req(rq)->result = -EIO;
150 } 150 }
151 151
152 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 152 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
153} 153}
154 154
155static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) 155static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
@@ -272,7 +272,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
272 printk("%s: DRIVE_CMD (null)\n", drive->name); 272 printk("%s: DRIVE_CMD (null)\n", drive->name);
273#endif 273#endif
274 scsi_req(rq)->result = 0; 274 scsi_req(rq)->result = 0;
275 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 275 ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
276 276
277 return ide_stopped; 277 return ide_stopped;
278} 278}
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 8c0d17297a7a..3661abb16a5f 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -126,7 +126,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
126 struct request *rq; 126 struct request *rq;
127 127
128 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 128 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
129 scsi_req_init(rq);
130 ide_req(rq)->type = ATA_PRIV_TASKFILE; 129 ide_req(rq)->type = ATA_PRIV_TASKFILE;
131 blk_execute_rq(drive->queue, NULL, rq, 0); 130 blk_execute_rq(drive->queue, NULL, rq, 0);
132 err = scsi_req(rq)->result ? -EIO : 0; 131 err = scsi_req(rq)->result ? -EIO : 0;
@@ -224,7 +223,6 @@ static int generic_drive_reset(ide_drive_t *drive)
224 int ret = 0; 223 int ret = 0;
225 224
226 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 225 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
227 scsi_req_init(rq);
228 ide_req(rq)->type = ATA_PRIV_MISC; 226 ide_req(rq)->type = ATA_PRIV_MISC;
229 scsi_req(rq)->cmd_len = 1; 227 scsi_req(rq)->cmd_len = 1;
230 scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET; 228 scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 94e3107f59b9..1f264d5d3f3f 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
32 spin_unlock_irq(&hwif->lock); 32 spin_unlock_irq(&hwif->lock);
33 33
34 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM); 34 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
35 scsi_req_init(rq);
36 scsi_req(rq)->cmd[0] = REQ_PARK_HEADS; 35 scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
37 scsi_req(rq)->cmd_len = 1; 36 scsi_req(rq)->cmd_len = 1;
38 ide_req(rq)->type = ATA_PRIV_MISC; 37 ide_req(rq)->type = ATA_PRIV_MISC;
@@ -48,7 +47,6 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
48 * timeout has expired, so power management will be reenabled. 47 * timeout has expired, so power management will be reenabled.
49 */ 48 */
50 rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT); 49 rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
51 scsi_req_init(rq);
52 if (IS_ERR(rq)) 50 if (IS_ERR(rq))
53 goto out; 51 goto out;
54 52
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 0977fc1f40ce..544f02d673ca 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
19 19
20 memset(&rqpm, 0, sizeof(rqpm)); 20 memset(&rqpm, 0, sizeof(rqpm));
21 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 21 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
22 scsi_req_init(rq);
23 ide_req(rq)->type = ATA_PRIV_PM_SUSPEND; 22 ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
24 rq->special = &rqpm; 23 rq->special = &rqpm;
25 rqpm.pm_step = IDE_PM_START_SUSPEND; 24 rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -40,7 +39,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
40 return ret; 39 return ret;
41} 40}
42 41
43static void ide_end_sync_rq(struct request *rq, int error) 42static void ide_end_sync_rq(struct request *rq, blk_status_t error)
44{ 43{
45 complete(rq->end_io_data); 44 complete(rq->end_io_data);
46} 45}
@@ -57,7 +56,7 @@ static int ide_pm_execute_rq(struct request *rq)
57 if (unlikely(blk_queue_dying(q))) { 56 if (unlikely(blk_queue_dying(q))) {
58 rq->rq_flags |= RQF_QUIET; 57 rq->rq_flags |= RQF_QUIET;
59 scsi_req(rq)->result = -ENXIO; 58 scsi_req(rq)->result = -ENXIO;
60 __blk_end_request_all(rq, 0); 59 __blk_end_request_all(rq, BLK_STS_OK);
61 spin_unlock_irq(q->queue_lock); 60 spin_unlock_irq(q->queue_lock);
62 return -ENXIO; 61 return -ENXIO;
63 } 62 }
@@ -91,7 +90,6 @@ int generic_ide_resume(struct device *dev)
91 90
92 memset(&rqpm, 0, sizeof(rqpm)); 91 memset(&rqpm, 0, sizeof(rqpm));
93 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 92 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
94 scsi_req_init(rq);
95 ide_req(rq)->type = ATA_PRIV_PM_RESUME; 93 ide_req(rq)->type = ATA_PRIV_PM_RESUME;
96 rq->rq_flags |= RQF_PREEMPT; 94 rq->rq_flags |= RQF_PREEMPT;
97 rq->special = &rqpm; 95 rq->special = &rqpm;
@@ -235,7 +233,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
235 233
236 drive->hwif->rq = NULL; 234 drive->hwif->rq = NULL;
237 235
238 if (blk_end_request(rq, 0, 0)) 236 if (blk_end_request(rq, BLK_STS_OK, 0))
239 BUG(); 237 BUG();
240} 238}
241 239
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 023562565d11..01b2adfd8226 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,12 +741,12 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
741 } 741 }
742} 742}
743 743
744static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp) 744static void ide_initialize_rq(struct request *rq)
745{ 745{
746 struct ide_request *req = blk_mq_rq_to_pdu(rq); 746 struct ide_request *req = blk_mq_rq_to_pdu(rq);
747 747
748 scsi_req_init(&req->sreq);
748 req->sreq.sense = req->sense; 749 req->sreq.sense = req->sense;
749 return 0;
750} 750}
751 751
752/* 752/*
@@ -771,8 +771,9 @@ static int ide_init_queue(ide_drive_t *drive)
771 return 1; 771 return 1;
772 772
773 q->request_fn = do_ide_request; 773 q->request_fn = do_ide_request;
774 q->init_rq_fn = ide_init_rq; 774 q->initialize_rq_fn = ide_initialize_rq;
775 q->cmd_size = sizeof(struct ide_request); 775 q->cmd_size = sizeof(struct ide_request);
776 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
776 if (blk_init_allocated_queue(q) < 0) { 777 if (blk_init_allocated_queue(q) < 0) {
777 blk_cleanup_queue(q); 778 blk_cleanup_queue(q);
778 return 1; 779 return 1;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index a0651f948b76..fd57e8ccc47a 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -474,7 +474,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
474 474
475 drive->failed_pc = NULL; 475 drive->failed_pc = NULL;
476 drive->pc_callback(drive, 0); 476 drive->pc_callback(drive, 0);
477 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 477 ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
478 return ide_stopped; 478 return ide_stopped;
479 } 479 }
480 ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries, 480 ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
@@ -855,7 +855,6 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
855 BUG_ON(size < 0 || size % tape->blk_size); 855 BUG_ON(size < 0 || size % tape->blk_size);
856 856
857 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); 857 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
858 scsi_req_init(rq);
859 ide_req(rq)->type = ATA_PRIV_MISC; 858 ide_req(rq)->type = ATA_PRIV_MISC;
860 scsi_req(rq)->cmd[13] = cmd; 859 scsi_req(rq)->cmd[13] = cmd;
861 rq->rq_disk = tape->disk; 860 rq->rq_disk = tape->disk;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index d71199d23c9e..4efe4c6e956c 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -318,7 +318,7 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
318 } 318 }
319 319
320 if (nr_bytes > 0) 320 if (nr_bytes > 0)
321 ide_complete_rq(drive, 0, nr_bytes); 321 ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
322 } 322 }
323} 323}
324 324
@@ -336,7 +336,7 @@ void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
336 ide_driveid_update(drive); 336 ide_driveid_update(drive);
337 } 337 }
338 338
339 ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq)); 339 ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
340} 340}
341 341
342/* 342/*
@@ -394,7 +394,7 @@ out_end:
394 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0) 394 if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
395 ide_finish_cmd(drive, cmd, stat); 395 ide_finish_cmd(drive, cmd, stat);
396 else 396 else
397 ide_complete_rq(drive, 0, blk_rq_sectors(cmd->rq) << 9); 397 ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
398 return ide_stopped; 398 return ide_stopped;
399out_err: 399out_err:
400 ide_error_cmd(drive, cmd); 400 ide_error_cmd(drive, cmd);
@@ -433,7 +433,6 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
433 rq = blk_get_request(drive->queue, 433 rq = blk_get_request(drive->queue,
434 (cmd->tf_flags & IDE_TFLAG_WRITE) ? 434 (cmd->tf_flags & IDE_TFLAG_WRITE) ?
435 REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM); 435 REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
436 scsi_req_init(rq);
437 ide_req(rq)->type = ATA_PRIV_TASKFILE; 436 ide_req(rq)->type = ATA_PRIV_TASKFILE;
438 437
439 /* 438 /*
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index 6a1849bb476c..57eea5a9047f 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -406,7 +406,7 @@ static int siimage_dma_test_irq(ide_drive_t *drive)
406 * yet. 406 * yet.
407 */ 407 */
408 408
409static int sil_sata_reset_poll(ide_drive_t *drive) 409static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
410{ 410{
411 ide_hwif_t *hwif = drive->hwif; 411 ide_hwif_t *hwif = drive->hwif;
412 void __iomem *sata_status_addr 412 void __iomem *sata_status_addr
@@ -419,11 +419,11 @@ static int sil_sata_reset_poll(ide_drive_t *drive)
419 if ((sata_stat & 0x03) != 0x03) { 419 if ((sata_stat & 0x03) != 0x03) {
420 printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n", 420 printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
421 hwif->name, sata_stat); 421 hwif->name, sata_stat);
422 return -ENXIO; 422 return BLK_STS_IOERR;
423 } 423 }
424 } 424 }
425 425
426 return 0; 426 return BLK_STS_OK;
427} 427}
428 428
429/** 429/**
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 6a4aa608ad95..ddae430b6eae 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -252,8 +252,9 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
252 } 252 }
253 mutex_unlock(&dev->mlock); 253 mutex_unlock(&dev->mlock);
254 254
255 if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end)) 255 ret = nvm_reserve_luns(dev, s->lun_begin, s->lun_end);
256 return -ENOMEM; 256 if (ret)
257 return ret;
257 258
258 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); 259 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
259 if (!t) { 260 if (!t) {
@@ -640,6 +641,7 @@ EXPORT_SYMBOL(nvm_max_phys_sects);
640int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) 641int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
641{ 642{
642 struct nvm_dev *dev = tgt_dev->parent; 643 struct nvm_dev *dev = tgt_dev->parent;
644 int ret;
643 645
644 if (!dev->ops->submit_io) 646 if (!dev->ops->submit_io)
645 return -ENODEV; 647 return -ENODEV;
@@ -647,7 +649,12 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
647 nvm_rq_tgt_to_dev(tgt_dev, rqd); 649 nvm_rq_tgt_to_dev(tgt_dev, rqd);
648 650
649 rqd->dev = tgt_dev; 651 rqd->dev = tgt_dev;
650 return dev->ops->submit_io(dev, rqd); 652
653 /* In case of error, fail with right address format */
654 ret = dev->ops->submit_io(dev, rqd);
655 if (ret)
656 nvm_rq_dev_to_tgt(tgt_dev, rqd);
657 return ret;
651} 658}
652EXPORT_SYMBOL(nvm_submit_io); 659EXPORT_SYMBOL(nvm_submit_io);
653 660
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 59bcea88db84..024a8fc93069 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -31,9 +31,13 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
31 */ 31 */
32retry: 32retry:
33 ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos); 33 ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
34 if (ret == NVM_IO_REQUEUE) { 34 switch (ret) {
35 case NVM_IO_REQUEUE:
35 io_schedule(); 36 io_schedule();
36 goto retry; 37 goto retry;
38 case NVM_IO_ERR:
39 pblk_pipeline_stop(pblk);
40 goto out;
37 } 41 }
38 42
39 if (unlikely(!bio_has_data(bio))) 43 if (unlikely(!bio_has_data(bio)))
@@ -58,6 +62,8 @@ retry:
58 atomic_long_add(nr_entries, &pblk->req_writes); 62 atomic_long_add(nr_entries, &pblk->req_writes);
59#endif 63#endif
60 64
65 pblk_rl_inserted(&pblk->rl, nr_entries);
66
61out: 67out:
62 pblk_write_should_kick(pblk); 68 pblk_write_should_kick(pblk);
63 return ret; 69 return ret;
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 5e44768ccffa..11fe0c5b2a9c 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -17,7 +17,6 @@
17 */ 17 */
18 18
19#include "pblk.h" 19#include "pblk.h"
20#include <linux/time.h>
21 20
22static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, 21static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
23 struct ppa_addr *ppa) 22 struct ppa_addr *ppa)
@@ -34,7 +33,7 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
34 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", 33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
35 line->id, pos); 34 line->id, pos);
36 35
37 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb); 36 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq);
38} 37}
39 38
40static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) 39static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
@@ -54,6 +53,8 @@ static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
54 *ppa = rqd->ppa_addr; 53 *ppa = rqd->ppa_addr;
55 pblk_mark_bb(pblk, line, ppa); 54 pblk_mark_bb(pblk, line, ppa);
56 } 55 }
56
57 atomic_dec(&pblk->inflight_io);
57} 58}
58 59
59/* Erase completion assumes that only one block is erased at the time */ 60/* Erase completion assumes that only one block is erased at the time */
@@ -61,13 +62,12 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
61{ 62{
62 struct pblk *pblk = rqd->private; 63 struct pblk *pblk = rqd->private;
63 64
64 up(&pblk->erase_sem);
65 __pblk_end_io_erase(pblk, rqd); 65 __pblk_end_io_erase(pblk, rqd);
66 mempool_free(rqd, pblk->r_rq_pool); 66 mempool_free(rqd, pblk->g_rq_pool);
67} 67}
68 68
69static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, 69void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
70 u64 paddr) 70 u64 paddr)
71{ 71{
72 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 72 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
73 struct list_head *move_list = NULL; 73 struct list_head *move_list = NULL;
@@ -88,7 +88,7 @@ static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
88 spin_unlock(&line->lock); 88 spin_unlock(&line->lock);
89 return; 89 return;
90 } 90 }
91 line->vsc--; 91 le32_add_cpu(line->vsc, -1);
92 92
93 if (line->state == PBLK_LINESTATE_CLOSED) 93 if (line->state == PBLK_LINESTATE_CLOSED)
94 move_list = pblk_line_gc_list(pblk, line); 94 move_list = pblk_line_gc_list(pblk, line);
@@ -130,18 +130,6 @@ void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
130 __pblk_map_invalidate(pblk, line, paddr); 130 __pblk_map_invalidate(pblk, line, paddr);
131} 131}
132 132
133void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
134 u64 paddr)
135{
136 __pblk_map_invalidate(pblk, line, paddr);
137
138 pblk_rb_sync_init(&pblk->rwb, NULL);
139 line->left_ssecs--;
140 if (!line->left_ssecs)
141 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
142 pblk_rb_sync_end(&pblk->rwb, NULL);
143}
144
145static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, 133static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
146 unsigned int nr_secs) 134 unsigned int nr_secs)
147{ 135{
@@ -172,8 +160,8 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
172 pool = pblk->w_rq_pool; 160 pool = pblk->w_rq_pool;
173 rq_size = pblk_w_rq_size; 161 rq_size = pblk_w_rq_size;
174 } else { 162 } else {
175 pool = pblk->r_rq_pool; 163 pool = pblk->g_rq_pool;
176 rq_size = pblk_r_rq_size; 164 rq_size = pblk_g_rq_size;
177 } 165 }
178 166
179 rqd = mempool_alloc(pool, GFP_KERNEL); 167 rqd = mempool_alloc(pool, GFP_KERNEL);
@@ -189,7 +177,7 @@ void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
189 if (rw == WRITE) 177 if (rw == WRITE)
190 pool = pblk->w_rq_pool; 178 pool = pblk->w_rq_pool;
191 else 179 else
192 pool = pblk->r_rq_pool; 180 pool = pblk->g_rq_pool;
193 181
194 mempool_free(rqd, pool); 182 mempool_free(rqd, pool);
195} 183}
@@ -271,35 +259,26 @@ void pblk_end_io_sync(struct nvm_rq *rqd)
271 complete(waiting); 259 complete(waiting);
272} 260}
273 261
274void pblk_flush_writer(struct pblk *pblk) 262void pblk_wait_for_meta(struct pblk *pblk)
275{ 263{
276 struct bio *bio; 264 do {
277 int ret; 265 if (!atomic_read(&pblk->inflight_io))
278 DECLARE_COMPLETION_ONSTACK(wait); 266 break;
279
280 bio = bio_alloc(GFP_KERNEL, 1);
281 if (!bio)
282 return;
283
284 bio->bi_iter.bi_sector = 0; /* internal bio */
285 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
286 bio->bi_private = &wait;
287 bio->bi_end_io = pblk_end_bio_sync;
288 267
289 ret = pblk_write_to_cache(pblk, bio, 0); 268 schedule();
290 if (ret == NVM_IO_OK) { 269 } while (1);
291 if (!wait_for_completion_io_timeout(&wait, 270}
292 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
293 pr_err("pblk: flush cache timed out\n");
294 }
295 } else if (ret != NVM_IO_DONE) {
296 pr_err("pblk: tear down bio failed\n");
297 }
298 271
299 if (bio->bi_error) 272static void pblk_flush_writer(struct pblk *pblk)
300 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error); 273{
274 pblk_rb_flush(&pblk->rwb);
275 do {
276 if (!pblk_rb_sync_count(&pblk->rwb))
277 break;
301 278
302 bio_put(bio); 279 pblk_write_kick(pblk);
280 schedule();
281 } while (1);
303} 282}
304 283
305struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) 284struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
@@ -307,28 +286,31 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
307 struct pblk_line_meta *lm = &pblk->lm; 286 struct pblk_line_meta *lm = &pblk->lm;
308 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 287 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
309 struct list_head *move_list = NULL; 288 struct list_head *move_list = NULL;
289 int vsc = le32_to_cpu(*line->vsc);
310 290
311 if (!line->vsc) { 291 lockdep_assert_held(&line->lock);
292
293 if (!vsc) {
312 if (line->gc_group != PBLK_LINEGC_FULL) { 294 if (line->gc_group != PBLK_LINEGC_FULL) {
313 line->gc_group = PBLK_LINEGC_FULL; 295 line->gc_group = PBLK_LINEGC_FULL;
314 move_list = &l_mg->gc_full_list; 296 move_list = &l_mg->gc_full_list;
315 } 297 }
316 } else if (line->vsc < lm->mid_thrs) { 298 } else if (vsc < lm->high_thrs) {
317 if (line->gc_group != PBLK_LINEGC_HIGH) { 299 if (line->gc_group != PBLK_LINEGC_HIGH) {
318 line->gc_group = PBLK_LINEGC_HIGH; 300 line->gc_group = PBLK_LINEGC_HIGH;
319 move_list = &l_mg->gc_high_list; 301 move_list = &l_mg->gc_high_list;
320 } 302 }
321 } else if (line->vsc < lm->high_thrs) { 303 } else if (vsc < lm->mid_thrs) {
322 if (line->gc_group != PBLK_LINEGC_MID) { 304 if (line->gc_group != PBLK_LINEGC_MID) {
323 line->gc_group = PBLK_LINEGC_MID; 305 line->gc_group = PBLK_LINEGC_MID;
324 move_list = &l_mg->gc_mid_list; 306 move_list = &l_mg->gc_mid_list;
325 } 307 }
326 } else if (line->vsc < line->sec_in_line) { 308 } else if (vsc < line->sec_in_line) {
327 if (line->gc_group != PBLK_LINEGC_LOW) { 309 if (line->gc_group != PBLK_LINEGC_LOW) {
328 line->gc_group = PBLK_LINEGC_LOW; 310 line->gc_group = PBLK_LINEGC_LOW;
329 move_list = &l_mg->gc_low_list; 311 move_list = &l_mg->gc_low_list;
330 } 312 }
331 } else if (line->vsc == line->sec_in_line) { 313 } else if (vsc == line->sec_in_line) {
332 if (line->gc_group != PBLK_LINEGC_EMPTY) { 314 if (line->gc_group != PBLK_LINEGC_EMPTY) {
333 line->gc_group = PBLK_LINEGC_EMPTY; 315 line->gc_group = PBLK_LINEGC_EMPTY;
334 move_list = &l_mg->gc_empty_list; 316 move_list = &l_mg->gc_empty_list;
@@ -338,7 +320,7 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
338 line->gc_group = PBLK_LINEGC_NONE; 320 line->gc_group = PBLK_LINEGC_NONE;
339 move_list = &l_mg->corrupt_list; 321 move_list = &l_mg->corrupt_list;
340 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n", 322 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
341 line->id, line->vsc, 323 line->id, vsc,
342 line->sec_in_line, 324 line->sec_in_line,
343 lm->high_thrs, lm->mid_thrs); 325 lm->high_thrs, lm->mid_thrs);
344 } 326 }
@@ -397,6 +379,11 @@ void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
397#endif 379#endif
398} 380}
399 381
382void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
383{
384 pblk->sec_per_write = sec_per_write;
385}
386
400int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) 387int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
401{ 388{
402 struct nvm_tgt_dev *dev = pblk->dev; 389 struct nvm_tgt_dev *dev = pblk->dev;
@@ -431,21 +418,23 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
431 } 418 }
432 } 419 }
433#endif 420#endif
421
422 atomic_inc(&pblk->inflight_io);
423
434 return nvm_submit_io(dev, rqd); 424 return nvm_submit_io(dev, rqd);
435} 425}
436 426
437struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, 427struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
438 unsigned int nr_secs, unsigned int len, 428 unsigned int nr_secs, unsigned int len,
439 gfp_t gfp_mask) 429 int alloc_type, gfp_t gfp_mask)
440{ 430{
441 struct nvm_tgt_dev *dev = pblk->dev; 431 struct nvm_tgt_dev *dev = pblk->dev;
442 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
443 void *kaddr = data; 432 void *kaddr = data;
444 struct page *page; 433 struct page *page;
445 struct bio *bio; 434 struct bio *bio;
446 int i, ret; 435 int i, ret;
447 436
448 if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META) 437 if (alloc_type == PBLK_KMALLOC_META)
449 return bio_map_kern(dev->q, kaddr, len, gfp_mask); 438 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
450 439
451 bio = bio_kmalloc(gfp_mask, nr_secs); 440 bio = bio_kmalloc(gfp_mask, nr_secs);
@@ -478,7 +467,7 @@ out:
478int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, 467int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
479 unsigned long secs_to_flush) 468 unsigned long secs_to_flush)
480{ 469{
481 int max = pblk->max_write_pgs; 470 int max = pblk->sec_per_write;
482 int min = pblk->min_write_pgs; 471 int min = pblk->min_write_pgs;
483 int secs_to_sync = 0; 472 int secs_to_sync = 0;
484 473
@@ -492,12 +481,26 @@ int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
492 return secs_to_sync; 481 return secs_to_sync;
493} 482}
494 483
495static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, 484void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
496 int nr_secs) 485{
486 u64 addr;
487 int i;
488
489 addr = find_next_zero_bit(line->map_bitmap,
490 pblk->lm.sec_per_line, line->cur_sec);
491 line->cur_sec = addr - nr_secs;
492
493 for (i = 0; i < nr_secs; i++, line->cur_sec--)
494 WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
495}
496
497u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
497{ 498{
498 u64 addr; 499 u64 addr;
499 int i; 500 int i;
500 501
502 lockdep_assert_held(&line->lock);
503
501 /* logic error: ppa out-of-bounds. Prevent generating bad address */ 504 /* logic error: ppa out-of-bounds. Prevent generating bad address */
502 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) { 505 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
503 WARN(1, "pblk: page allocation out of bounds\n"); 506 WARN(1, "pblk: page allocation out of bounds\n");
@@ -528,27 +531,38 @@ u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
528 return addr; 531 return addr;
529} 532}
530 533
534u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
535{
536 u64 paddr;
537
538 spin_lock(&line->lock);
539 paddr = find_next_zero_bit(line->map_bitmap,
540 pblk->lm.sec_per_line, line->cur_sec);
541 spin_unlock(&line->lock);
542
543 return paddr;
544}
545
531/* 546/*
532 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when 547 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
533 * taking the per LUN semaphore. 548 * taking the per LUN semaphore.
534 */ 549 */
535static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, 550static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
536 u64 paddr, int dir) 551 void *emeta_buf, u64 paddr, int dir)
537{ 552{
538 struct nvm_tgt_dev *dev = pblk->dev; 553 struct nvm_tgt_dev *dev = pblk->dev;
539 struct nvm_geo *geo = &dev->geo; 554 struct nvm_geo *geo = &dev->geo;
555 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
540 struct pblk_line_meta *lm = &pblk->lm; 556 struct pblk_line_meta *lm = &pblk->lm;
557 void *ppa_list, *meta_list;
541 struct bio *bio; 558 struct bio *bio;
542 struct nvm_rq rqd; 559 struct nvm_rq rqd;
543 struct ppa_addr *ppa_list; 560 dma_addr_t dma_ppa_list, dma_meta_list;
544 dma_addr_t dma_ppa_list;
545 void *emeta = line->emeta;
546 int min = pblk->min_write_pgs; 561 int min = pblk->min_write_pgs;
547 int left_ppas = lm->emeta_sec; 562 int left_ppas = lm->emeta_sec[0];
548 int id = line->id; 563 int id = line->id;
549 int rq_ppas, rq_len; 564 int rq_ppas, rq_len;
550 int cmd_op, bio_op; 565 int cmd_op, bio_op;
551 int flags;
552 int i, j; 566 int i, j;
553 int ret; 567 int ret;
554 DECLARE_COMPLETION_ONSTACK(wait); 568 DECLARE_COMPLETION_ONSTACK(wait);
@@ -556,25 +570,28 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
556 if (dir == WRITE) { 570 if (dir == WRITE) {
557 bio_op = REQ_OP_WRITE; 571 bio_op = REQ_OP_WRITE;
558 cmd_op = NVM_OP_PWRITE; 572 cmd_op = NVM_OP_PWRITE;
559 flags = pblk_set_progr_mode(pblk, WRITE);
560 } else if (dir == READ) { 573 } else if (dir == READ) {
561 bio_op = REQ_OP_READ; 574 bio_op = REQ_OP_READ;
562 cmd_op = NVM_OP_PREAD; 575 cmd_op = NVM_OP_PREAD;
563 flags = pblk_set_read_mode(pblk);
564 } else 576 } else
565 return -EINVAL; 577 return -EINVAL;
566 578
567 ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list); 579 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
568 if (!ppa_list) 580 &dma_meta_list);
581 if (!meta_list)
569 return -ENOMEM; 582 return -ENOMEM;
570 583
584 ppa_list = meta_list + pblk_dma_meta_size;
585 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
586
571next_rq: 587next_rq:
572 memset(&rqd, 0, sizeof(struct nvm_rq)); 588 memset(&rqd, 0, sizeof(struct nvm_rq));
573 589
574 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 590 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
575 rq_len = rq_ppas * geo->sec_size; 591 rq_len = rq_ppas * geo->sec_size;
576 592
577 bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL); 593 bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
594 l_mg->emeta_alloc_type, GFP_KERNEL);
578 if (IS_ERR(bio)) { 595 if (IS_ERR(bio)) {
579 ret = PTR_ERR(bio); 596 ret = PTR_ERR(bio);
580 goto free_rqd_dma; 597 goto free_rqd_dma;
@@ -584,27 +601,38 @@ next_rq:
584 bio_set_op_attrs(bio, bio_op, 0); 601 bio_set_op_attrs(bio, bio_op, 0);
585 602
586 rqd.bio = bio; 603 rqd.bio = bio;
587 rqd.opcode = cmd_op; 604 rqd.meta_list = meta_list;
588 rqd.flags = flags;
589 rqd.nr_ppas = rq_ppas;
590 rqd.ppa_list = ppa_list; 605 rqd.ppa_list = ppa_list;
606 rqd.dma_meta_list = dma_meta_list;
591 rqd.dma_ppa_list = dma_ppa_list; 607 rqd.dma_ppa_list = dma_ppa_list;
608 rqd.opcode = cmd_op;
609 rqd.nr_ppas = rq_ppas;
592 rqd.end_io = pblk_end_io_sync; 610 rqd.end_io = pblk_end_io_sync;
593 rqd.private = &wait; 611 rqd.private = &wait;
594 612
595 if (dir == WRITE) { 613 if (dir == WRITE) {
614 struct pblk_sec_meta *meta_list = rqd.meta_list;
615
616 rqd.flags = pblk_set_progr_mode(pblk, WRITE);
596 for (i = 0; i < rqd.nr_ppas; ) { 617 for (i = 0; i < rqd.nr_ppas; ) {
597 spin_lock(&line->lock); 618 spin_lock(&line->lock);
598 paddr = __pblk_alloc_page(pblk, line, min); 619 paddr = __pblk_alloc_page(pblk, line, min);
599 spin_unlock(&line->lock); 620 spin_unlock(&line->lock);
600 for (j = 0; j < min; j++, i++, paddr++) 621 for (j = 0; j < min; j++, i++, paddr++) {
622 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
601 rqd.ppa_list[i] = 623 rqd.ppa_list[i] =
602 addr_to_gen_ppa(pblk, paddr, id); 624 addr_to_gen_ppa(pblk, paddr, id);
625 }
603 } 626 }
604 } else { 627 } else {
605 for (i = 0; i < rqd.nr_ppas; ) { 628 for (i = 0; i < rqd.nr_ppas; ) {
606 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id); 629 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
607 int pos = pblk_dev_ppa_to_pos(geo, ppa); 630 int pos = pblk_dev_ppa_to_pos(geo, ppa);
631 int read_type = PBLK_READ_RANDOM;
632
633 if (pblk_io_aligned(pblk, rq_ppas))
634 read_type = PBLK_READ_SEQUENTIAL;
635 rqd.flags = pblk_set_read_mode(pblk, read_type);
608 636
609 while (test_bit(pos, line->blk_bitmap)) { 637 while (test_bit(pos, line->blk_bitmap)) {
610 paddr += min; 638 paddr += min;
@@ -645,9 +673,11 @@ next_rq:
645 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 673 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
646 pr_err("pblk: emeta I/O timed out\n"); 674 pr_err("pblk: emeta I/O timed out\n");
647 } 675 }
676 atomic_dec(&pblk->inflight_io);
648 reinit_completion(&wait); 677 reinit_completion(&wait);
649 678
650 bio_put(bio); 679 if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
680 bio_put(bio);
651 681
652 if (rqd.error) { 682 if (rqd.error) {
653 if (dir == WRITE) 683 if (dir == WRITE)
@@ -656,12 +686,12 @@ next_rq:
656 pblk_log_read_err(pblk, &rqd); 686 pblk_log_read_err(pblk, &rqd);
657 } 687 }
658 688
659 emeta += rq_len; 689 emeta_buf += rq_len;
660 left_ppas -= rq_ppas; 690 left_ppas -= rq_ppas;
661 if (left_ppas) 691 if (left_ppas)
662 goto next_rq; 692 goto next_rq;
663free_rqd_dma: 693free_rqd_dma:
664 nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list); 694 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
665 return ret; 695 return ret;
666} 696}
667 697
@@ -697,21 +727,24 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
697 bio_op = REQ_OP_WRITE; 727 bio_op = REQ_OP_WRITE;
698 cmd_op = NVM_OP_PWRITE; 728 cmd_op = NVM_OP_PWRITE;
699 flags = pblk_set_progr_mode(pblk, WRITE); 729 flags = pblk_set_progr_mode(pblk, WRITE);
700 lba_list = pblk_line_emeta_to_lbas(line->emeta); 730 lba_list = emeta_to_lbas(pblk, line->emeta->buf);
701 } else if (dir == READ) { 731 } else if (dir == READ) {
702 bio_op = REQ_OP_READ; 732 bio_op = REQ_OP_READ;
703 cmd_op = NVM_OP_PREAD; 733 cmd_op = NVM_OP_PREAD;
704 flags = pblk_set_read_mode(pblk); 734 flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
705 } else 735 } else
706 return -EINVAL; 736 return -EINVAL;
707 737
708 memset(&rqd, 0, sizeof(struct nvm_rq)); 738 memset(&rqd, 0, sizeof(struct nvm_rq));
709 739
710 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 740 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
711 &rqd.dma_ppa_list); 741 &rqd.dma_meta_list);
712 if (!rqd.ppa_list) 742 if (!rqd.meta_list)
713 return -ENOMEM; 743 return -ENOMEM;
714 744
745 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
746 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
747
715 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL); 748 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
716 if (IS_ERR(bio)) { 749 if (IS_ERR(bio)) {
717 ret = PTR_ERR(bio); 750 ret = PTR_ERR(bio);
@@ -729,9 +762,15 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
729 rqd.private = &wait; 762 rqd.private = &wait;
730 763
731 for (i = 0; i < lm->smeta_sec; i++, paddr++) { 764 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
765 struct pblk_sec_meta *meta_list = rqd.meta_list;
766
732 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 767 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
733 if (dir == WRITE) 768
734 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); 769 if (dir == WRITE) {
770 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
771
772 meta_list[i].lba = lba_list[paddr] = addr_empty;
773 }
735 } 774 }
736 775
737 /* 776 /*
@@ -750,6 +789,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
750 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 789 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
751 pr_err("pblk: smeta I/O timed out\n"); 790 pr_err("pblk: smeta I/O timed out\n");
752 } 791 }
792 atomic_dec(&pblk->inflight_io);
753 793
754 if (rqd.error) { 794 if (rqd.error) {
755 if (dir == WRITE) 795 if (dir == WRITE)
@@ -759,7 +799,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
759 } 799 }
760 800
761free_ppa_list: 801free_ppa_list:
762 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); 802 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
763 803
764 return ret; 804 return ret;
765} 805}
@@ -771,9 +811,11 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
771 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ); 811 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
772} 812}
773 813
774int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line) 814int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
815 void *emeta_buf)
775{ 816{
776 return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ); 817 return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
818 line->emeta_ssec, READ);
777} 819}
778 820
779static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, 821static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -789,7 +831,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
789static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) 831static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
790{ 832{
791 struct nvm_rq rqd; 833 struct nvm_rq rqd;
792 int ret; 834 int ret = 0;
793 DECLARE_COMPLETION_ONSTACK(wait); 835 DECLARE_COMPLETION_ONSTACK(wait);
794 836
795 memset(&rqd, 0, sizeof(struct nvm_rq)); 837 memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -824,14 +866,14 @@ out:
824 rqd.private = pblk; 866 rqd.private = pblk;
825 __pblk_end_io_erase(pblk, &rqd); 867 __pblk_end_io_erase(pblk, &rqd);
826 868
827 return 0; 869 return ret;
828} 870}
829 871
830int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) 872int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
831{ 873{
832 struct pblk_line_meta *lm = &pblk->lm; 874 struct pblk_line_meta *lm = &pblk->lm;
833 struct ppa_addr ppa; 875 struct ppa_addr ppa;
834 int bit = -1; 876 int ret, bit = -1;
835 877
836 /* Erase only good blocks, one at a time */ 878 /* Erase only good blocks, one at a time */
837 do { 879 do {
@@ -850,27 +892,59 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
850 WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); 892 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
851 spin_unlock(&line->lock); 893 spin_unlock(&line->lock);
852 894
853 if (pblk_blk_erase_sync(pblk, ppa)) { 895 ret = pblk_blk_erase_sync(pblk, ppa);
896 if (ret) {
854 pr_err("pblk: failed to erase line %d\n", line->id); 897 pr_err("pblk: failed to erase line %d\n", line->id);
855 return -ENOMEM; 898 return ret;
856 } 899 }
857 } while (1); 900 } while (1);
858 901
859 return 0; 902 return 0;
860} 903}
861 904
905static void pblk_line_setup_metadata(struct pblk_line *line,
906 struct pblk_line_mgmt *l_mg,
907 struct pblk_line_meta *lm)
908{
909 int meta_line;
910
911 lockdep_assert_held(&l_mg->free_lock);
912
913retry_meta:
914 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
915 if (meta_line == PBLK_DATA_LINES) {
916 spin_unlock(&l_mg->free_lock);
917 io_schedule();
918 spin_lock(&l_mg->free_lock);
919 goto retry_meta;
920 }
921
922 set_bit(meta_line, &l_mg->meta_bitmap);
923 line->meta_line = meta_line;
924
925 line->smeta = l_mg->sline_meta[meta_line];
926 line->emeta = l_mg->eline_meta[meta_line];
927
928 memset(line->smeta, 0, lm->smeta_len);
929 memset(line->emeta->buf, 0, lm->emeta_len[0]);
930
931 line->emeta->mem = 0;
932 atomic_set(&line->emeta->sync, 0);
933}
934
862/* For now lines are always assumed full lines. Thus, smeta former and current 935/* For now lines are always assumed full lines. Thus, smeta former and current
863 * lun bitmaps are omitted. 936 * lun bitmaps are omitted.
864 */ 937 */
865static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line, 938static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
866 struct pblk_line *cur) 939 struct pblk_line *cur)
867{ 940{
868 struct nvm_tgt_dev *dev = pblk->dev; 941 struct nvm_tgt_dev *dev = pblk->dev;
869 struct nvm_geo *geo = &dev->geo; 942 struct nvm_geo *geo = &dev->geo;
870 struct pblk_line_meta *lm = &pblk->lm; 943 struct pblk_line_meta *lm = &pblk->lm;
871 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 944 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
872 struct line_smeta *smeta = line->smeta; 945 struct pblk_emeta *emeta = line->emeta;
873 struct line_emeta *emeta = line->emeta; 946 struct line_emeta *emeta_buf = emeta->buf;
947 struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
874 int nr_blk_line; 948 int nr_blk_line;
875 949
876 /* After erasing the line, new bad blocks might appear and we risk 950 /* After erasing the line, new bad blocks might appear and we risk
@@ -893,42 +967,44 @@ static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
893 } 967 }
894 968
895 /* Run-time metadata */ 969 /* Run-time metadata */
896 line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta); 970 line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
897 971
898 /* Mark LUNs allocated in this line (all for now) */ 972 /* Mark LUNs allocated in this line (all for now) */
899 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len); 973 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
900 974
901 smeta->header.identifier = cpu_to_le32(PBLK_MAGIC); 975 smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
902 memcpy(smeta->header.uuid, pblk->instance_uuid, 16); 976 memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
903 smeta->header.id = cpu_to_le32(line->id); 977 smeta_buf->header.id = cpu_to_le32(line->id);
904 smeta->header.type = cpu_to_le16(line->type); 978 smeta_buf->header.type = cpu_to_le16(line->type);
905 smeta->header.version = cpu_to_le16(1); 979 smeta_buf->header.version = cpu_to_le16(1);
906 980
907 /* Start metadata */ 981 /* Start metadata */
908 smeta->seq_nr = cpu_to_le64(line->seq_nr); 982 smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
909 smeta->window_wr_lun = cpu_to_le32(geo->nr_luns); 983 smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
910 984
911 /* Fill metadata among lines */ 985 /* Fill metadata among lines */
912 if (cur) { 986 if (cur) {
913 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len); 987 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
914 smeta->prev_id = cpu_to_le32(cur->id); 988 smeta_buf->prev_id = cpu_to_le32(cur->id);
915 cur->emeta->next_id = cpu_to_le32(line->id); 989 cur->emeta->buf->next_id = cpu_to_le32(line->id);
916 } else { 990 } else {
917 smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY); 991 smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
918 } 992 }
919 993
920 /* All smeta must be set at this point */ 994 /* All smeta must be set at this point */
921 smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta)); 995 smeta_buf->header.crc = cpu_to_le32(
922 smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta)); 996 pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
997 smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
923 998
924 /* End metadata */ 999 /* End metadata */
925 memcpy(&emeta->header, &smeta->header, sizeof(struct line_header)); 1000 memcpy(&emeta_buf->header, &smeta_buf->header,
926 emeta->seq_nr = cpu_to_le64(line->seq_nr); 1001 sizeof(struct line_header));
927 emeta->nr_lbas = cpu_to_le64(line->sec_in_line); 1002 emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
928 emeta->nr_valid_lbas = cpu_to_le64(0); 1003 emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
929 emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY); 1004 emeta_buf->nr_valid_lbas = cpu_to_le64(0);
930 emeta->crc = cpu_to_le32(0); 1005 emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
931 emeta->prev_id = smeta->prev_id; 1006 emeta_buf->crc = cpu_to_le32(0);
1007 emeta_buf->prev_id = smeta_buf->prev_id;
932 1008
933 return 1; 1009 return 1;
934} 1010}
@@ -965,7 +1041,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
965 /* Mark smeta metadata sectors as bad sectors */ 1041 /* Mark smeta metadata sectors as bad sectors */
966 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); 1042 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
967 off = bit * geo->sec_per_pl; 1043 off = bit * geo->sec_per_pl;
968retry_smeta:
969 bitmap_set(line->map_bitmap, off, lm->smeta_sec); 1044 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
970 line->sec_in_line -= lm->smeta_sec; 1045 line->sec_in_line -= lm->smeta_sec;
971 line->smeta_ssec = off; 1046 line->smeta_ssec = off;
@@ -973,8 +1048,7 @@ retry_smeta:
973 1048
974 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { 1049 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
975 pr_debug("pblk: line smeta I/O failed. Retry\n"); 1050 pr_debug("pblk: line smeta I/O failed. Retry\n");
976 off += geo->sec_per_pl; 1051 return 1;
977 goto retry_smeta;
978 } 1052 }
979 1053
980 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line); 1054 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -983,8 +1057,8 @@ retry_smeta:
983 * blocks to make sure that there are enough sectors to store emeta 1057 * blocks to make sure that there are enough sectors to store emeta
984 */ 1058 */
985 bit = lm->sec_per_line; 1059 bit = lm->sec_per_line;
986 off = lm->sec_per_line - lm->emeta_sec; 1060 off = lm->sec_per_line - lm->emeta_sec[0];
987 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec); 1061 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
988 while (nr_bb) { 1062 while (nr_bb) {
989 off -= geo->sec_per_pl; 1063 off -= geo->sec_per_pl;
990 if (!test_bit(off, line->invalid_bitmap)) { 1064 if (!test_bit(off, line->invalid_bitmap)) {
@@ -993,9 +1067,11 @@ retry_smeta:
993 } 1067 }
994 } 1068 }
995 1069
996 line->sec_in_line -= lm->emeta_sec; 1070 line->sec_in_line -= lm->emeta_sec[0];
997 line->emeta_ssec = off; 1071 line->emeta_ssec = off;
998 line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line; 1072 line->nr_valid_lbas = 0;
1073 line->left_msecs = line->sec_in_line;
1074 *line->vsc = cpu_to_le32(line->sec_in_line);
999 1075
1000 if (lm->sec_per_line - line->sec_in_line != 1076 if (lm->sec_per_line - line->sec_in_line !=
1001 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) { 1077 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
@@ -1034,14 +1110,20 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1034 1110
1035 spin_lock(&line->lock); 1111 spin_lock(&line->lock);
1036 if (line->state != PBLK_LINESTATE_FREE) { 1112 if (line->state != PBLK_LINESTATE_FREE) {
1113 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1114 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1037 spin_unlock(&line->lock); 1115 spin_unlock(&line->lock);
1038 WARN(1, "pblk: corrupted line state\n"); 1116 WARN(1, "pblk: corrupted line %d, state %d\n",
1039 return -EINTR; 1117 line->id, line->state);
1118 return -EAGAIN;
1040 } 1119 }
1120
1041 line->state = PBLK_LINESTATE_OPEN; 1121 line->state = PBLK_LINESTATE_OPEN;
1042 1122
1043 atomic_set(&line->left_eblks, blk_in_line); 1123 atomic_set(&line->left_eblks, blk_in_line);
1044 atomic_set(&line->left_seblks, blk_in_line); 1124 atomic_set(&line->left_seblks, blk_in_line);
1125
1126 line->meta_distance = lm->meta_distance;
1045 spin_unlock(&line->lock); 1127 spin_unlock(&line->lock);
1046 1128
1047 /* Bad blocks do not need to be erased */ 1129 /* Bad blocks do not need to be erased */
@@ -1091,15 +1173,15 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
1091{ 1173{
1092 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1174 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1093 struct pblk_line_meta *lm = &pblk->lm; 1175 struct pblk_line_meta *lm = &pblk->lm;
1094 struct pblk_line *line = NULL; 1176 struct pblk_line *line;
1095 int bit; 1177 int ret, bit;
1096 1178
1097 lockdep_assert_held(&l_mg->free_lock); 1179 lockdep_assert_held(&l_mg->free_lock);
1098 1180
1099retry_get: 1181retry:
1100 if (list_empty(&l_mg->free_list)) { 1182 if (list_empty(&l_mg->free_list)) {
1101 pr_err("pblk: no free lines\n"); 1183 pr_err("pblk: no free lines\n");
1102 goto out; 1184 return NULL;
1103 } 1185 }
1104 1186
1105 line = list_first_entry(&l_mg->free_list, struct pblk_line, list); 1187 line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
@@ -1115,16 +1197,22 @@ retry_get:
1115 list_add_tail(&line->list, &l_mg->bad_list); 1197 list_add_tail(&line->list, &l_mg->bad_list);
1116 1198
1117 pr_debug("pblk: line %d is bad\n", line->id); 1199 pr_debug("pblk: line %d is bad\n", line->id);
1118 goto retry_get; 1200 goto retry;
1119 } 1201 }
1120 1202
1121 if (pblk_line_prepare(pblk, line)) { 1203 ret = pblk_line_prepare(pblk, line);
1122 pr_err("pblk: failed to prepare line %d\n", line->id); 1204 if (ret) {
1123 list_add(&line->list, &l_mg->free_list); 1205 if (ret == -EAGAIN) {
1124 return NULL; 1206 list_add(&line->list, &l_mg->corrupt_list);
1207 goto retry;
1208 } else {
1209 pr_err("pblk: failed to prepare line %d\n", line->id);
1210 list_add(&line->list, &l_mg->free_list);
1211 l_mg->nr_free_lines++;
1212 return NULL;
1213 }
1125 } 1214 }
1126 1215
1127out:
1128 return line; 1216 return line;
1129} 1217}
1130 1218
@@ -1134,6 +1222,7 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1134 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1222 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1135 struct pblk_line *retry_line; 1223 struct pblk_line *retry_line;
1136 1224
1225retry:
1137 spin_lock(&l_mg->free_lock); 1226 spin_lock(&l_mg->free_lock);
1138 retry_line = pblk_line_get(pblk); 1227 retry_line = pblk_line_get(pblk);
1139 if (!retry_line) { 1228 if (!retry_line) {
@@ -1150,23 +1239,25 @@ static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1150 l_mg->data_line = retry_line; 1239 l_mg->data_line = retry_line;
1151 spin_unlock(&l_mg->free_lock); 1240 spin_unlock(&l_mg->free_lock);
1152 1241
1153 if (pblk_line_erase(pblk, retry_line)) {
1154 spin_lock(&l_mg->free_lock);
1155 l_mg->data_line = NULL;
1156 spin_unlock(&l_mg->free_lock);
1157 return NULL;
1158 }
1159
1160 pblk_rl_free_lines_dec(&pblk->rl, retry_line); 1242 pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1161 1243
1244 if (pblk_line_erase(pblk, retry_line))
1245 goto retry;
1246
1162 return retry_line; 1247 return retry_line;
1163} 1248}
1164 1249
1250static void pblk_set_space_limit(struct pblk *pblk)
1251{
1252 struct pblk_rl *rl = &pblk->rl;
1253
1254 atomic_set(&rl->rb_space, 0);
1255}
1256
1165struct pblk_line *pblk_line_get_first_data(struct pblk *pblk) 1257struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1166{ 1258{
1167 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1259 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1168 struct pblk_line *line; 1260 struct pblk_line *line;
1169 int meta_line;
1170 int is_next = 0; 1261 int is_next = 0;
1171 1262
1172 spin_lock(&l_mg->free_lock); 1263 spin_lock(&l_mg->free_lock);
@@ -1180,30 +1271,37 @@ struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1180 line->type = PBLK_LINETYPE_DATA; 1271 line->type = PBLK_LINETYPE_DATA;
1181 l_mg->data_line = line; 1272 l_mg->data_line = line;
1182 1273
1183 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); 1274 pblk_line_setup_metadata(line, l_mg, &pblk->lm);
1184 set_bit(meta_line, &l_mg->meta_bitmap);
1185 line->smeta = l_mg->sline_meta[meta_line].meta;
1186 line->emeta = l_mg->eline_meta[meta_line].meta;
1187 line->meta_line = meta_line;
1188 1275
1189 /* Allocate next line for preparation */ 1276 /* Allocate next line for preparation */
1190 l_mg->data_next = pblk_line_get(pblk); 1277 l_mg->data_next = pblk_line_get(pblk);
1191 if (l_mg->data_next) { 1278 if (!l_mg->data_next) {
1279 /* If we cannot get a new line, we need to stop the pipeline.
1280 * Only allow as many writes in as we can store safely and then
1281 * fail gracefully
1282 */
1283 pblk_set_space_limit(pblk);
1284
1285 l_mg->data_next = NULL;
1286 } else {
1192 l_mg->data_next->seq_nr = l_mg->d_seq_nr++; 1287 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1193 l_mg->data_next->type = PBLK_LINETYPE_DATA; 1288 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1194 is_next = 1; 1289 is_next = 1;
1195 } 1290 }
1196 spin_unlock(&l_mg->free_lock); 1291 spin_unlock(&l_mg->free_lock);
1197 1292
1293 if (pblk_line_erase(pblk, line)) {
1294 line = pblk_line_retry(pblk, line);
1295 if (!line)
1296 return NULL;
1297 }
1298
1198 pblk_rl_free_lines_dec(&pblk->rl, line); 1299 pblk_rl_free_lines_dec(&pblk->rl, line);
1199 if (is_next) 1300 if (is_next)
1200 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); 1301 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1201 1302
1202 if (pblk_line_erase(pblk, line))
1203 return NULL;
1204
1205retry_setup: 1303retry_setup:
1206 if (!pblk_line_set_metadata(pblk, line, NULL)) { 1304 if (!pblk_line_init_metadata(pblk, line, NULL)) {
1207 line = pblk_line_retry(pblk, line); 1305 line = pblk_line_retry(pblk, line);
1208 if (!line) 1306 if (!line)
1209 return NULL; 1307 return NULL;
@@ -1222,69 +1320,89 @@ retry_setup:
1222 return line; 1320 return line;
1223} 1321}
1224 1322
1225struct pblk_line *pblk_line_replace_data(struct pblk *pblk) 1323static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line)
1324{
1325 lockdep_assert_held(&pblk->l_mg.free_lock);
1326
1327 pblk_set_space_limit(pblk);
1328 pblk->state = PBLK_STATE_STOPPING;
1329}
1330
1331void pblk_pipeline_stop(struct pblk *pblk)
1332{
1333 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1334 int ret;
1335
1336 spin_lock(&l_mg->free_lock);
1337 if (pblk->state == PBLK_STATE_RECOVERING ||
1338 pblk->state == PBLK_STATE_STOPPED) {
1339 spin_unlock(&l_mg->free_lock);
1340 return;
1341 }
1342 pblk->state = PBLK_STATE_RECOVERING;
1343 spin_unlock(&l_mg->free_lock);
1344
1345 pblk_flush_writer(pblk);
1346 pblk_wait_for_meta(pblk);
1347
1348 ret = pblk_recov_pad(pblk);
1349 if (ret) {
1350 pr_err("pblk: could not close data on teardown(%d)\n", ret);
1351 return;
1352 }
1353
1354 flush_workqueue(pblk->bb_wq);
1355 pblk_line_close_meta_sync(pblk);
1356
1357 spin_lock(&l_mg->free_lock);
1358 pblk->state = PBLK_STATE_STOPPED;
1359 l_mg->data_line = NULL;
1360 l_mg->data_next = NULL;
1361 spin_unlock(&l_mg->free_lock);
1362}
1363
1364void pblk_line_replace_data(struct pblk *pblk)
1226{ 1365{
1227 struct pblk_line_meta *lm = &pblk->lm;
1228 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1366 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1229 struct pblk_line *cur, *new; 1367 struct pblk_line *cur, *new;
1230 unsigned int left_seblks; 1368 unsigned int left_seblks;
1231 int meta_line;
1232 int is_next = 0; 1369 int is_next = 0;
1233 1370
1234 cur = l_mg->data_line; 1371 cur = l_mg->data_line;
1235 new = l_mg->data_next; 1372 new = l_mg->data_next;
1236 if (!new) 1373 if (!new)
1237 return NULL; 1374 return;
1238 l_mg->data_line = new; 1375 l_mg->data_line = new;
1239 1376
1240retry_line: 1377 spin_lock(&l_mg->free_lock);
1378 if (pblk->state != PBLK_STATE_RUNNING) {
1379 l_mg->data_line = NULL;
1380 l_mg->data_next = NULL;
1381 spin_unlock(&l_mg->free_lock);
1382 return;
1383 }
1384
1385 pblk_line_setup_metadata(new, l_mg, &pblk->lm);
1386 spin_unlock(&l_mg->free_lock);
1387
1388retry_erase:
1241 left_seblks = atomic_read(&new->left_seblks); 1389 left_seblks = atomic_read(&new->left_seblks);
1242 if (left_seblks) { 1390 if (left_seblks) {
1243 /* If line is not fully erased, erase it */ 1391 /* If line is not fully erased, erase it */
1244 if (atomic_read(&new->left_eblks)) { 1392 if (atomic_read(&new->left_eblks)) {
1245 if (pblk_line_erase(pblk, new)) 1393 if (pblk_line_erase(pblk, new))
1246 return NULL; 1394 return;
1247 } else { 1395 } else {
1248 io_schedule(); 1396 io_schedule();
1249 } 1397 }
1250 goto retry_line; 1398 goto retry_erase;
1251 } 1399 }
1252 1400
1253 spin_lock(&l_mg->free_lock);
1254 /* Allocate next line for preparation */
1255 l_mg->data_next = pblk_line_get(pblk);
1256 if (l_mg->data_next) {
1257 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1258 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1259 is_next = 1;
1260 }
1261
1262retry_meta:
1263 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
1264 if (meta_line == PBLK_DATA_LINES) {
1265 spin_unlock(&l_mg->free_lock);
1266 io_schedule();
1267 spin_lock(&l_mg->free_lock);
1268 goto retry_meta;
1269 }
1270
1271 set_bit(meta_line, &l_mg->meta_bitmap);
1272 new->smeta = l_mg->sline_meta[meta_line].meta;
1273 new->emeta = l_mg->eline_meta[meta_line].meta;
1274 new->meta_line = meta_line;
1275
1276 memset(new->smeta, 0, lm->smeta_len);
1277 memset(new->emeta, 0, lm->emeta_len);
1278 spin_unlock(&l_mg->free_lock);
1279
1280 if (is_next)
1281 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1282
1283retry_setup: 1401retry_setup:
1284 if (!pblk_line_set_metadata(pblk, new, cur)) { 1402 if (!pblk_line_init_metadata(pblk, new, cur)) {
1285 new = pblk_line_retry(pblk, new); 1403 new = pblk_line_retry(pblk, new);
1286 if (!new) 1404 if (!new)
1287 return NULL; 1405 return;
1288 1406
1289 goto retry_setup; 1407 goto retry_setup;
1290 } 1408 }
@@ -1292,12 +1410,30 @@ retry_setup:
1292 if (!pblk_line_init_bb(pblk, new, 1)) { 1410 if (!pblk_line_init_bb(pblk, new, 1)) {
1293 new = pblk_line_retry(pblk, new); 1411 new = pblk_line_retry(pblk, new);
1294 if (!new) 1412 if (!new)
1295 return NULL; 1413 return;
1296 1414
1297 goto retry_setup; 1415 goto retry_setup;
1298 } 1416 }
1299 1417
1300 return new; 1418 /* Allocate next line for preparation */
1419 spin_lock(&l_mg->free_lock);
1420 l_mg->data_next = pblk_line_get(pblk);
1421 if (!l_mg->data_next) {
1422 /* If we cannot get a new line, we need to stop the pipeline.
1423 * Only allow as many writes in as we can store safely and then
1424 * fail gracefully
1425 */
1426 pblk_stop_writes(pblk, new);
1427 l_mg->data_next = NULL;
1428 } else {
1429 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1430 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1431 is_next = 1;
1432 }
1433 spin_unlock(&l_mg->free_lock);
1434
1435 if (is_next)
1436 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1301} 1437}
1302 1438
1303void pblk_line_free(struct pblk *pblk, struct pblk_line *line) 1439void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
@@ -1307,6 +1443,8 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1307 if (line->invalid_bitmap) 1443 if (line->invalid_bitmap)
1308 mempool_free(line->invalid_bitmap, pblk->line_meta_pool); 1444 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1309 1445
1446 *line->vsc = cpu_to_le32(EMPTY_ENTRY);
1447
1310 line->map_bitmap = NULL; 1448 line->map_bitmap = NULL;
1311 line->invalid_bitmap = NULL; 1449 line->invalid_bitmap = NULL;
1312 line->smeta = NULL; 1450 line->smeta = NULL;
@@ -1339,8 +1477,8 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1339 struct nvm_rq *rqd; 1477 struct nvm_rq *rqd;
1340 int err; 1478 int err;
1341 1479
1342 rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL); 1480 rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
1343 memset(rqd, 0, pblk_r_rq_size); 1481 memset(rqd, 0, pblk_g_rq_size);
1344 1482
1345 pblk_setup_e_rq(pblk, rqd, ppa); 1483 pblk_setup_e_rq(pblk, rqd, ppa);
1346 1484
@@ -1368,7 +1506,8 @@ struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1368 return pblk->l_mg.data_line; 1506 return pblk->l_mg.data_line;
1369} 1507}
1370 1508
1371struct pblk_line *pblk_line_get_data_next(struct pblk *pblk) 1509/* For now, always erase next line */
1510struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
1372{ 1511{
1373 return pblk->l_mg.data_next; 1512 return pblk->l_mg.data_next;
1374} 1513}
@@ -1378,18 +1517,58 @@ int pblk_line_is_full(struct pblk_line *line)
1378 return (line->left_msecs == 0); 1517 return (line->left_msecs == 0);
1379} 1518}
1380 1519
1520void pblk_line_close_meta_sync(struct pblk *pblk)
1521{
1522 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1523 struct pblk_line_meta *lm = &pblk->lm;
1524 struct pblk_line *line, *tline;
1525 LIST_HEAD(list);
1526
1527 spin_lock(&l_mg->close_lock);
1528 if (list_empty(&l_mg->emeta_list)) {
1529 spin_unlock(&l_mg->close_lock);
1530 return;
1531 }
1532
1533 list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev);
1534 spin_unlock(&l_mg->close_lock);
1535
1536 list_for_each_entry_safe(line, tline, &list, list) {
1537 struct pblk_emeta *emeta = line->emeta;
1538
1539 while (emeta->mem < lm->emeta_len[0]) {
1540 int ret;
1541
1542 ret = pblk_submit_meta_io(pblk, line);
1543 if (ret) {
1544 pr_err("pblk: sync meta line %d failed (%d)\n",
1545 line->id, ret);
1546 return;
1547 }
1548 }
1549 }
1550
1551 pblk_wait_for_meta(pblk);
1552 flush_workqueue(pblk->close_wq);
1553}
1554
1555static void pblk_line_should_sync_meta(struct pblk *pblk)
1556{
1557 if (pblk_rl_is_limit(&pblk->rl))
1558 pblk_line_close_meta_sync(pblk);
1559}
1560
1381void pblk_line_close(struct pblk *pblk, struct pblk_line *line) 1561void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1382{ 1562{
1383 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1563 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1384 struct list_head *move_list; 1564 struct list_head *move_list;
1385 1565
1386 line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta)); 1566#ifdef CONFIG_NVM_DEBUG
1387 1567 struct pblk_line_meta *lm = &pblk->lm;
1388 if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
1389 pr_err("pblk: line %d close I/O failed\n", line->id);
1390 1568
1391 WARN(!bitmap_full(line->map_bitmap, line->sec_in_line), 1569 WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
1392 "pblk: corrupt closed line %d\n", line->id); 1570 "pblk: corrupt closed line %d\n", line->id);
1571#endif
1393 1572
1394 spin_lock(&l_mg->free_lock); 1573 spin_lock(&l_mg->free_lock);
1395 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap)); 1574 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
@@ -1410,6 +1589,31 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1410 1589
1411 spin_unlock(&line->lock); 1590 spin_unlock(&line->lock);
1412 spin_unlock(&l_mg->gc_lock); 1591 spin_unlock(&l_mg->gc_lock);
1592
1593 pblk_gc_should_kick(pblk);
1594}
1595
1596void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
1597{
1598 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1599 struct pblk_line_meta *lm = &pblk->lm;
1600 struct pblk_emeta *emeta = line->emeta;
1601 struct line_emeta *emeta_buf = emeta->buf;
1602
1603 /* No need for exact vsc value; avoid a big line lock and take aprox. */
1604 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
1605 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
1606
1607 emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
1608 emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
1609
1610 spin_lock(&l_mg->close_lock);
1611 spin_lock(&line->lock);
1612 list_add_tail(&line->list, &l_mg->emeta_list);
1613 spin_unlock(&line->lock);
1614 spin_unlock(&l_mg->close_lock);
1615
1616 pblk_line_should_sync_meta(pblk);
1413} 1617}
1414 1618
1415void pblk_line_close_ws(struct work_struct *work) 1619void pblk_line_close_ws(struct work_struct *work)
@@ -1449,7 +1653,8 @@ void pblk_line_mark_bb(struct work_struct *work)
1449} 1653}
1450 1654
1451void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, 1655void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1452 void (*work)(struct work_struct *)) 1656 void (*work)(struct work_struct *),
1657 struct workqueue_struct *wq)
1453{ 1658{
1454 struct pblk_line_ws *line_ws; 1659 struct pblk_line_ws *line_ws;
1455 1660
@@ -1462,7 +1667,7 @@ void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1462 line_ws->priv = priv; 1667 line_ws->priv = priv;
1463 1668
1464 INIT_WORK(&line_ws->ws, work); 1669 INIT_WORK(&line_ws->ws, work);
1465 queue_work(pblk->kw_wq, &line_ws->ws); 1670 queue_work(wq, &line_ws->ws);
1466} 1671}
1467 1672
1468void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, 1673void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -1471,7 +1676,7 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1471 struct nvm_tgt_dev *dev = pblk->dev; 1676 struct nvm_tgt_dev *dev = pblk->dev;
1472 struct nvm_geo *geo = &dev->geo; 1677 struct nvm_geo *geo = &dev->geo;
1473 struct pblk_lun *rlun; 1678 struct pblk_lun *rlun;
1474 int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun; 1679 int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
1475 int ret; 1680 int ret;
1476 1681
1477 /* 1682 /*
@@ -1488,10 +1693,10 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1488 /* If the LUN has been locked for this same request, do no attempt to 1693 /* If the LUN has been locked for this same request, do no attempt to
1489 * lock it again 1694 * lock it again
1490 */ 1695 */
1491 if (test_and_set_bit(lun_id, lun_bitmap)) 1696 if (test_and_set_bit(pos, lun_bitmap))
1492 return; 1697 return;
1493 1698
1494 rlun = &pblk->luns[lun_id]; 1699 rlun = &pblk->luns[pos];
1495 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000)); 1700 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1496 if (ret) { 1701 if (ret) {
1497 switch (ret) { 1702 switch (ret) {
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index eaf479c6b63c..6090d28f7995 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -20,8 +20,7 @@
20 20
21static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) 21static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22{ 22{
23 kfree(gc_rq->data); 23 vfree(gc_rq->data);
24 kfree(gc_rq->lba_list);
25 kfree(gc_rq); 24 kfree(gc_rq);
26} 25}
27 26
@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
37 return 1; 36 return 1;
38 } 37 }
39 38
40 list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) { 39 list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
41 list_move_tail(&gc_rq->list, &w_list); 40 gc->w_entries = 0;
42 gc->w_entries--;
43 }
44 spin_unlock(&gc->w_lock); 41 spin_unlock(&gc->w_lock);
45 42
46 list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { 43 list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
48 gc_rq->nr_secs, gc_rq->secs_to_gc, 45 gc_rq->nr_secs, gc_rq->secs_to_gc,
49 gc_rq->line, PBLK_IOTYPE_GC); 46 gc_rq->line, PBLK_IOTYPE_GC);
50 47
51 kref_put(&gc_rq->line->ref, pblk_line_put);
52
53 list_del(&gc_rq->list); 48 list_del(&gc_rq->list);
49 kref_put(&gc_rq->line->ref, pblk_line_put);
54 pblk_gc_free_gc_rq(gc_rq); 50 pblk_gc_free_gc_rq(gc_rq);
55 } 51 }
56 52
@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
66 * Responsible for managing all memory related to a gc request. Also in case of 62 * Responsible for managing all memory related to a gc request. Also in case of
67 * failure 63 * failure
68 */ 64 */
69static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, 65static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
70 u64 *lba_list, unsigned int nr_secs)
71{ 66{
72 struct nvm_tgt_dev *dev = pblk->dev; 67 struct nvm_tgt_dev *dev = pblk->dev;
73 struct nvm_geo *geo = &dev->geo; 68 struct nvm_geo *geo = &dev->geo;
74 struct pblk_gc *gc = &pblk->gc; 69 struct pblk_gc *gc = &pblk->gc;
75 struct pblk_gc_rq *gc_rq; 70 struct pblk_line *line = gc_rq->line;
76 void *data; 71 void *data;
77 unsigned int secs_to_gc; 72 unsigned int secs_to_gc;
78 int ret = NVM_IO_OK; 73 int ret = 0;
79 74
80 data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL); 75 data = vmalloc(gc_rq->nr_secs * geo->sec_size);
81 if (!data) { 76 if (!data) {
82 ret = NVM_IO_ERR; 77 ret = -ENOMEM;
83 goto free_lba_list; 78 goto out;
84 } 79 }
85 80
86 /* Read from GC victim block */ 81 /* Read from GC victim block */
87 if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs, 82 if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
88 &secs_to_gc, line)) { 83 &secs_to_gc, line)) {
89 ret = NVM_IO_ERR; 84 ret = -EFAULT;
90 goto free_data; 85 goto free_data;
91 } 86 }
92 87
93 if (!secs_to_gc) 88 if (!secs_to_gc)
94 goto free_data; 89 goto free_rq;
95
96 gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
97 if (!gc_rq) {
98 ret = NVM_IO_ERR;
99 goto free_data;
100 }
101 90
102 gc_rq->line = line;
103 gc_rq->data = data; 91 gc_rq->data = data;
104 gc_rq->lba_list = lba_list;
105 gc_rq->nr_secs = nr_secs;
106 gc_rq->secs_to_gc = secs_to_gc; 92 gc_rq->secs_to_gc = secs_to_gc;
107 93
108 kref_get(&line->ref);
109
110retry: 94retry:
111 spin_lock(&gc->w_lock); 95 spin_lock(&gc->w_lock);
112 if (gc->w_entries > 256) { 96 if (gc->w_entries >= PBLK_GC_W_QD) {
113 spin_unlock(&gc->w_lock); 97 spin_unlock(&gc->w_lock);
114 usleep_range(256, 1024); 98 pblk_gc_writer_kick(&pblk->gc);
99 usleep_range(128, 256);
115 goto retry; 100 goto retry;
116 } 101 }
117 gc->w_entries++; 102 gc->w_entries++;
@@ -120,13 +105,14 @@ retry:
120 105
121 pblk_gc_writer_kick(&pblk->gc); 106 pblk_gc_writer_kick(&pblk->gc);
122 107
123 return NVM_IO_OK; 108 return 0;
124 109
110free_rq:
111 kfree(gc_rq);
125free_data: 112free_data:
126 kfree(data); 113 vfree(data);
127free_lba_list: 114out:
128 kfree(lba_list); 115 kref_put(&line->ref, pblk_line_put);
129
130 return ret; 116 return ret;
131} 117}
132 118
@@ -150,140 +136,206 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
150 136
151static void pblk_gc_line_ws(struct work_struct *work) 137static void pblk_gc_line_ws(struct work_struct *work)
152{ 138{
139 struct pblk_line_ws *line_rq_ws = container_of(work,
140 struct pblk_line_ws, ws);
141 struct pblk *pblk = line_rq_ws->pblk;
142 struct pblk_gc *gc = &pblk->gc;
143 struct pblk_line *line = line_rq_ws->line;
144 struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
145
146 up(&gc->gc_sem);
147
148 if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
149 pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
150 line->id, *line->vsc,
151 gc_rq->nr_secs);
152 }
153
154 mempool_free(line_rq_ws, pblk->line_ws_pool);
155}
156
157static void pblk_gc_line_prepare_ws(struct work_struct *work)
158{
153 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, 159 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
154 ws); 160 ws);
155 struct pblk *pblk = line_ws->pblk; 161 struct pblk *pblk = line_ws->pblk;
156 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
157 struct pblk_line *line = line_ws->line; 162 struct pblk_line *line = line_ws->line;
163 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
158 struct pblk_line_meta *lm = &pblk->lm; 164 struct pblk_line_meta *lm = &pblk->lm;
159 __le64 *lba_list = line_ws->priv; 165 struct pblk_gc *gc = &pblk->gc;
160 u64 *gc_list; 166 struct line_emeta *emeta_buf;
161 int sec_left; 167 struct pblk_line_ws *line_rq_ws;
162 int nr_ppas, bit; 168 struct pblk_gc_rq *gc_rq;
163 int put_line = 1; 169 __le64 *lba_list;
170 int sec_left, nr_secs, bit;
171 int ret;
164 172
165 pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); 173 emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
174 GFP_KERNEL);
175 if (!emeta_buf) {
176 pr_err("pblk: cannot use GC emeta\n");
177 return;
178 }
166 179
167 spin_lock(&line->lock); 180 ret = pblk_line_read_emeta(pblk, line, emeta_buf);
168 sec_left = line->vsc; 181 if (ret) {
169 if (!sec_left) { 182 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
170 /* Lines are erased before being used (l_mg->data_/log_next) */ 183 goto fail_free_emeta;
171 spin_unlock(&line->lock); 184 }
172 goto out; 185
186 /* If this read fails, it means that emeta is corrupted. For now, leave
187 * the line untouched. TODO: Implement a recovery routine that scans and
188 * moves all sectors on the line.
189 */
190 lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
191 if (!lba_list) {
192 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
193 goto fail_free_emeta;
173 } 194 }
174 spin_unlock(&line->lock);
175 195
196 sec_left = pblk_line_vsc(line);
176 if (sec_left < 0) { 197 if (sec_left < 0) {
177 pr_err("pblk: corrupted GC line (%d)\n", line->id); 198 pr_err("pblk: corrupted GC line (%d)\n", line->id);
178 put_line = 0; 199 goto fail_free_emeta;
179 pblk_put_line_back(pblk, line);
180 goto out;
181 } 200 }
182 201
183 bit = -1; 202 bit = -1;
184next_rq: 203next_rq:
185 gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL); 204 gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
186 if (!gc_list) { 205 if (!gc_rq)
187 put_line = 0; 206 goto fail_free_emeta;
188 pblk_put_line_back(pblk, line);
189 goto out;
190 }
191 207
192 nr_ppas = 0; 208 nr_secs = 0;
193 do { 209 do {
194 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, 210 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
195 bit + 1); 211 bit + 1);
196 if (bit > line->emeta_ssec) 212 if (bit > line->emeta_ssec)
197 break; 213 break;
198 214
199 gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]); 215 gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
200 } while (nr_ppas < pblk->max_write_pgs); 216 } while (nr_secs < pblk->max_write_pgs);
201 217
202 if (unlikely(!nr_ppas)) { 218 if (unlikely(!nr_secs)) {
203 kfree(gc_list); 219 kfree(gc_rq);
204 goto out; 220 goto out;
205 } 221 }
206 222
207 if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) { 223 gc_rq->nr_secs = nr_secs;
208 pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n", 224 gc_rq->line = line;
209 line->id, line->vsc, 225
210 nr_ppas, nr_ppas); 226 line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
211 put_line = 0; 227 if (!line_rq_ws)
212 pblk_put_line_back(pblk, line); 228 goto fail_free_gc_rq;
213 goto out;
214 }
215 229
216 sec_left -= nr_ppas; 230 line_rq_ws->pblk = pblk;
231 line_rq_ws->line = line;
232 line_rq_ws->priv = gc_rq;
233
234 down(&gc->gc_sem);
235 kref_get(&line->ref);
236
237 INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
238 queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
239
240 sec_left -= nr_secs;
217 if (sec_left > 0) 241 if (sec_left > 0)
218 goto next_rq; 242 goto next_rq;
219 243
220out: 244out:
221 pblk_mfree(line->emeta, l_mg->emeta_alloc_type); 245 pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
222 mempool_free(line_ws, pblk->line_ws_pool); 246 mempool_free(line_ws, pblk->line_ws_pool);
223 atomic_dec(&pblk->gc.inflight_gc); 247
224 if (put_line) 248 kref_put(&line->ref, pblk_line_put);
225 kref_put(&line->ref, pblk_line_put); 249 atomic_dec(&gc->inflight_gc);
250
251 return;
252
253fail_free_gc_rq:
254 kfree(gc_rq);
255fail_free_emeta:
256 pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
257 pblk_put_line_back(pblk, line);
258 kref_put(&line->ref, pblk_line_put);
259 mempool_free(line_ws, pblk->line_ws_pool);
260 atomic_dec(&gc->inflight_gc);
261
262 pr_err("pblk: Failed to GC line %d\n", line->id);
226} 263}
227 264
228static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) 265static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
229{ 266{
230 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 267 struct pblk_gc *gc = &pblk->gc;
231 struct pblk_line_meta *lm = &pblk->lm;
232 struct pblk_line_ws *line_ws; 268 struct pblk_line_ws *line_ws;
233 __le64 *lba_list;
234 int ret;
235 269
236 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); 270 pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
237 line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
238 GFP_KERNEL);
239 if (!line->emeta) {
240 pr_err("pblk: cannot use GC emeta\n");
241 goto fail_free_ws;
242 }
243
244 ret = pblk_line_read_emeta(pblk, line);
245 if (ret) {
246 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
247 goto fail_free_emeta;
248 }
249 271
250 /* If this read fails, it means that emeta is corrupted. For now, leave 272 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
251 * the line untouched. TODO: Implement a recovery routine that scans and 273 if (!line_ws)
252 * moves all sectors on the line. 274 return -ENOMEM;
253 */
254 lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
255 if (!lba_list) {
256 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
257 goto fail_free_emeta;
258 }
259 275
260 line_ws->pblk = pblk; 276 line_ws->pblk = pblk;
261 line_ws->line = line; 277 line_ws->line = line;
262 line_ws->priv = lba_list;
263 278
264 INIT_WORK(&line_ws->ws, pblk_gc_line_ws); 279 INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
265 queue_work(pblk->gc.gc_reader_wq, &line_ws->ws); 280 queue_work(gc->gc_reader_wq, &line_ws->ws);
266 281
267 return 0; 282 return 0;
283}
268 284
269fail_free_emeta: 285static int pblk_gc_read(struct pblk *pblk)
270 pblk_mfree(line->emeta, l_mg->emeta_alloc_type); 286{
271fail_free_ws: 287 struct pblk_gc *gc = &pblk->gc;
272 mempool_free(line_ws, pblk->line_ws_pool); 288 struct pblk_line *line;
273 pblk_put_line_back(pblk, line); 289
290 spin_lock(&gc->r_lock);
291 if (list_empty(&gc->r_list)) {
292 spin_unlock(&gc->r_lock);
293 return 1;
294 }
295
296 line = list_first_entry(&gc->r_list, struct pblk_line, list);
297 list_del(&line->list);
298 spin_unlock(&gc->r_lock);
299
300 pblk_gc_kick(pblk);
274 301
275 return 1; 302 if (pblk_gc_line(pblk, line))
303 pr_err("pblk: failed to GC line %d\n", line->id);
304
305 return 0;
276} 306}
277 307
278static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list) 308static void pblk_gc_reader_kick(struct pblk_gc *gc)
279{ 309{
280 struct pblk_line *line, *tline; 310 wake_up_process(gc->gc_reader_ts);
311}
281 312
282 list_for_each_entry_safe(line, tline, gc_list, list) { 313static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
283 if (pblk_gc_line(pblk, line)) 314 struct list_head *group_list)
284 pr_err("pblk: failed to GC line %d\n", line->id); 315{
285 list_del(&line->list); 316 struct pblk_line *line, *victim;
317 int line_vsc, victim_vsc;
318
319 victim = list_first_entry(group_list, struct pblk_line, list);
320 list_for_each_entry(line, group_list, list) {
321 line_vsc = le32_to_cpu(*line->vsc);
322 victim_vsc = le32_to_cpu(*victim->vsc);
323 if (line_vsc < victim_vsc)
324 victim = line;
286 } 325 }
326
327 return victim;
328}
329
330static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
331{
332 unsigned int nr_blocks_free, nr_blocks_need;
333
334 nr_blocks_need = pblk_rl_high_thrs(rl);
335 nr_blocks_free = pblk_rl_nr_free_blks(rl);
336
337 /* This is not critical, no need to take lock here */
338 return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
287} 339}
288 340
289/* 341/*
@@ -296,71 +348,83 @@ static void pblk_gc_run(struct pblk *pblk)
296{ 348{
297 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 349 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
298 struct pblk_gc *gc = &pblk->gc; 350 struct pblk_gc *gc = &pblk->gc;
299 struct pblk_line *line, *tline; 351 struct pblk_line *line;
300 unsigned int nr_blocks_free, nr_blocks_need;
301 struct list_head *group_list; 352 struct list_head *group_list;
302 int run_gc, gc_group = 0; 353 bool run_gc;
303 int prev_gc = 0; 354 int inflight_gc, gc_group = 0, prev_group = 0;
304 int inflight_gc = atomic_read(&gc->inflight_gc); 355
305 LIST_HEAD(gc_list); 356 do {
357 spin_lock(&l_mg->gc_lock);
358 if (list_empty(&l_mg->gc_full_list)) {
359 spin_unlock(&l_mg->gc_lock);
360 break;
361 }
362
363 line = list_first_entry(&l_mg->gc_full_list,
364 struct pblk_line, list);
306 365
307 spin_lock(&l_mg->gc_lock);
308 list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
309 spin_lock(&line->lock); 366 spin_lock(&line->lock);
310 WARN_ON(line->state != PBLK_LINESTATE_CLOSED); 367 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
311 line->state = PBLK_LINESTATE_GC; 368 line->state = PBLK_LINESTATE_GC;
312 spin_unlock(&line->lock); 369 spin_unlock(&line->lock);
313 370
314 list_del(&line->list); 371 list_del(&line->list);
372 spin_unlock(&l_mg->gc_lock);
373
315 kref_put(&line->ref, pblk_line_put); 374 kref_put(&line->ref, pblk_line_put);
316 } 375 } while (1);
317 spin_unlock(&l_mg->gc_lock);
318 376
319 nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl); 377 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
320 nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl); 378 if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
321 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); 379 return;
322 380
323next_gc_group: 381next_gc_group:
324 group_list = l_mg->gc_lists[gc_group++]; 382 group_list = l_mg->gc_lists[gc_group++];
325 spin_lock(&l_mg->gc_lock); 383
326 while (run_gc && !list_empty(group_list)) { 384 do {
327 /* No need to queue up more GC lines than we can handle */ 385 spin_lock(&l_mg->gc_lock);
328 if (!run_gc || inflight_gc > gc->gc_jobs_active) { 386 if (list_empty(group_list)) {
329 spin_unlock(&l_mg->gc_lock); 387 spin_unlock(&l_mg->gc_lock);
330 pblk_gc_lines(pblk, &gc_list); 388 break;
331 return;
332 } 389 }
333 390
334 line = list_first_entry(group_list, struct pblk_line, list); 391 line = pblk_gc_get_victim_line(pblk, group_list);
335 nr_blocks_free += atomic_read(&line->blk_in_line);
336 392
337 spin_lock(&line->lock); 393 spin_lock(&line->lock);
338 WARN_ON(line->state != PBLK_LINESTATE_CLOSED); 394 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
339 line->state = PBLK_LINESTATE_GC; 395 line->state = PBLK_LINESTATE_GC;
340 list_move_tail(&line->list, &gc_list);
341 atomic_inc(&gc->inflight_gc);
342 inflight_gc++;
343 spin_unlock(&line->lock); 396 spin_unlock(&line->lock);
344 397
345 prev_gc = 1; 398 list_del(&line->list);
346 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); 399 spin_unlock(&l_mg->gc_lock);
347 } 400
348 spin_unlock(&l_mg->gc_lock); 401 spin_lock(&gc->r_lock);
402 list_add_tail(&line->list, &gc->r_list);
403 spin_unlock(&gc->r_lock);
349 404
350 pblk_gc_lines(pblk, &gc_list); 405 inflight_gc = atomic_inc_return(&gc->inflight_gc);
406 pblk_gc_reader_kick(gc);
351 407
352 if (!prev_gc && pblk->rl.rb_state > gc_group && 408 prev_group = 1;
353 gc_group < PBLK_NR_GC_LISTS) 409
410 /* No need to queue up more GC lines than we can handle */
411 run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
412 if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
413 break;
414 } while (1);
415
416 if (!prev_group && pblk->rl.rb_state > gc_group &&
417 gc_group < PBLK_GC_NR_LISTS)
354 goto next_gc_group; 418 goto next_gc_group;
355} 419}
356 420
357 421void pblk_gc_kick(struct pblk *pblk)
358static void pblk_gc_kick(struct pblk *pblk)
359{ 422{
360 struct pblk_gc *gc = &pblk->gc; 423 struct pblk_gc *gc = &pblk->gc;
361 424
362 wake_up_process(gc->gc_ts); 425 wake_up_process(gc->gc_ts);
363 pblk_gc_writer_kick(gc); 426 pblk_gc_writer_kick(gc);
427 pblk_gc_reader_kick(gc);
364 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); 428 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
365} 429}
366 430
@@ -398,42 +462,34 @@ static int pblk_gc_writer_ts(void *data)
398 return 0; 462 return 0;
399} 463}
400 464
401static void pblk_gc_start(struct pblk *pblk) 465static int pblk_gc_reader_ts(void *data)
402{ 466{
403 pblk->gc.gc_active = 1; 467 struct pblk *pblk = data;
404 468
405 pr_debug("pblk: gc start\n"); 469 while (!kthread_should_stop()) {
470 if (!pblk_gc_read(pblk))
471 continue;
472 set_current_state(TASK_INTERRUPTIBLE);
473 io_schedule();
474 }
475
476 return 0;
406} 477}
407 478
408int pblk_gc_status(struct pblk *pblk) 479static void pblk_gc_start(struct pblk *pblk)
409{ 480{
410 struct pblk_gc *gc = &pblk->gc; 481 pblk->gc.gc_active = 1;
411 int ret; 482 pr_debug("pblk: gc start\n");
412
413 spin_lock(&gc->lock);
414 ret = gc->gc_active;
415 spin_unlock(&gc->lock);
416
417 return ret;
418} 483}
419 484
420static void __pblk_gc_should_start(struct pblk *pblk) 485void pblk_gc_should_start(struct pblk *pblk)
421{ 486{
422 struct pblk_gc *gc = &pblk->gc; 487 struct pblk_gc *gc = &pblk->gc;
423 488
424 lockdep_assert_held(&gc->lock);
425
426 if (gc->gc_enabled && !gc->gc_active) 489 if (gc->gc_enabled && !gc->gc_active)
427 pblk_gc_start(pblk); 490 pblk_gc_start(pblk);
428}
429 491
430void pblk_gc_should_start(struct pblk *pblk) 492 pblk_gc_kick(pblk);
431{
432 struct pblk_gc *gc = &pblk->gc;
433
434 spin_lock(&gc->lock);
435 __pblk_gc_should_start(pblk);
436 spin_unlock(&gc->lock);
437} 493}
438 494
439/* 495/*
@@ -442,10 +498,7 @@ void pblk_gc_should_start(struct pblk *pblk)
442 */ 498 */
443static void pblk_gc_stop(struct pblk *pblk, int flush_wq) 499static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
444{ 500{
445 spin_lock(&pblk->gc.lock);
446 pblk->gc.gc_active = 0; 501 pblk->gc.gc_active = 0;
447 spin_unlock(&pblk->gc.lock);
448
449 pr_debug("pblk: gc stop\n"); 502 pr_debug("pblk: gc stop\n");
450} 503}
451 504
@@ -468,20 +521,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
468 spin_unlock(&gc->lock); 521 spin_unlock(&gc->lock);
469} 522}
470 523
471void pblk_gc_sysfs_force(struct pblk *pblk, int force) 524int pblk_gc_sysfs_force(struct pblk *pblk, int force)
472{ 525{
473 struct pblk_gc *gc = &pblk->gc; 526 struct pblk_gc *gc = &pblk->gc;
474 int rsv = 0; 527
528 if (force < 0 || force > 1)
529 return -EINVAL;
475 530
476 spin_lock(&gc->lock); 531 spin_lock(&gc->lock);
477 if (force) {
478 gc->gc_enabled = 1;
479 rsv = 64;
480 }
481 pblk_rl_set_gc_rsc(&pblk->rl, rsv);
482 gc->gc_forced = force; 532 gc->gc_forced = force;
483 __pblk_gc_should_start(pblk); 533
534 if (force)
535 gc->gc_enabled = 1;
536 else
537 gc->gc_enabled = 0;
484 spin_unlock(&gc->lock); 538 spin_unlock(&gc->lock);
539
540 pblk_gc_should_start(pblk);
541
542 return 0;
485} 543}
486 544
487int pblk_gc_init(struct pblk *pblk) 545int pblk_gc_init(struct pblk *pblk)
@@ -503,30 +561,58 @@ int pblk_gc_init(struct pblk *pblk)
503 goto fail_free_main_kthread; 561 goto fail_free_main_kthread;
504 } 562 }
505 563
564 gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
565 "pblk-gc-reader-ts");
566 if (IS_ERR(gc->gc_reader_ts)) {
567 pr_err("pblk: could not allocate GC reader kthread\n");
568 ret = PTR_ERR(gc->gc_reader_ts);
569 goto fail_free_writer_kthread;
570 }
571
506 setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk); 572 setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
507 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); 573 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
508 574
509 gc->gc_active = 0; 575 gc->gc_active = 0;
510 gc->gc_forced = 0; 576 gc->gc_forced = 0;
511 gc->gc_enabled = 1; 577 gc->gc_enabled = 1;
512 gc->gc_jobs_active = 8;
513 gc->w_entries = 0; 578 gc->w_entries = 0;
514 atomic_set(&gc->inflight_gc, 0); 579 atomic_set(&gc->inflight_gc, 0);
515 580
516 gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq", 581 /* Workqueue that reads valid sectors from a line and submit them to the
517 WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active); 582 * GC writer to be recycled.
583 */
584 gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
585 WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
586 if (!gc->gc_line_reader_wq) {
587 pr_err("pblk: could not allocate GC line reader workqueue\n");
588 ret = -ENOMEM;
589 goto fail_free_reader_kthread;
590 }
591
592 /* Workqueue that prepare lines for GC */
593 gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
594 WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
518 if (!gc->gc_reader_wq) { 595 if (!gc->gc_reader_wq) {
519 pr_err("pblk: could not allocate GC reader workqueue\n"); 596 pr_err("pblk: could not allocate GC reader workqueue\n");
520 ret = -ENOMEM; 597 ret = -ENOMEM;
521 goto fail_free_writer_kthread; 598 goto fail_free_reader_line_wq;
522 } 599 }
523 600
524 spin_lock_init(&gc->lock); 601 spin_lock_init(&gc->lock);
525 spin_lock_init(&gc->w_lock); 602 spin_lock_init(&gc->w_lock);
603 spin_lock_init(&gc->r_lock);
604
605 sema_init(&gc->gc_sem, 128);
606
526 INIT_LIST_HEAD(&gc->w_list); 607 INIT_LIST_HEAD(&gc->w_list);
608 INIT_LIST_HEAD(&gc->r_list);
527 609
528 return 0; 610 return 0;
529 611
612fail_free_reader_line_wq:
613 destroy_workqueue(gc->gc_line_reader_wq);
614fail_free_reader_kthread:
615 kthread_stop(gc->gc_reader_ts);
530fail_free_writer_kthread: 616fail_free_writer_kthread:
531 kthread_stop(gc->gc_writer_ts); 617 kthread_stop(gc->gc_writer_ts);
532fail_free_main_kthread: 618fail_free_main_kthread:
@@ -540,6 +626,7 @@ void pblk_gc_exit(struct pblk *pblk)
540 struct pblk_gc *gc = &pblk->gc; 626 struct pblk_gc *gc = &pblk->gc;
541 627
542 flush_workqueue(gc->gc_reader_wq); 628 flush_workqueue(gc->gc_reader_wq);
629 flush_workqueue(gc->gc_line_reader_wq);
543 630
544 del_timer(&gc->gc_timer); 631 del_timer(&gc->gc_timer);
545 pblk_gc_stop(pblk, 1); 632 pblk_gc_stop(pblk, 1);
@@ -547,9 +634,15 @@ void pblk_gc_exit(struct pblk *pblk)
547 if (gc->gc_ts) 634 if (gc->gc_ts)
548 kthread_stop(gc->gc_ts); 635 kthread_stop(gc->gc_ts);
549 636
550 if (pblk->gc.gc_reader_wq) 637 if (gc->gc_reader_wq)
551 destroy_workqueue(pblk->gc.gc_reader_wq); 638 destroy_workqueue(gc->gc_reader_wq);
639
640 if (gc->gc_line_reader_wq)
641 destroy_workqueue(gc->gc_line_reader_wq);
552 642
553 if (gc->gc_writer_ts) 643 if (gc->gc_writer_ts)
554 kthread_stop(gc->gc_writer_ts); 644 kthread_stop(gc->gc_writer_ts);
645
646 if (gc->gc_reader_ts)
647 kthread_stop(gc->gc_reader_ts);
555} 648}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index ae8cd6d5af8b..1b0f61233c21 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,9 +20,10 @@
20 20
21#include "pblk.h" 21#include "pblk.h"
22 22
23static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache, 23static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
24 *pblk_w_rq_cache, *pblk_line_meta_cache; 24 *pblk_w_rq_cache, *pblk_line_meta_cache;
25static DECLARE_RWSEM(pblk_lock); 25static DECLARE_RWSEM(pblk_lock);
26struct bio_set *pblk_bio_set;
26 27
27static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, 28static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
28 struct bio *bio) 29 struct bio *bio)
@@ -33,7 +34,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
33 * constraint. Writes can be of arbitrary size. 34 * constraint. Writes can be of arbitrary size.
34 */ 35 */
35 if (bio_data_dir(bio) == READ) { 36 if (bio_data_dir(bio) == READ) {
36 blk_queue_split(q, &bio, q->bio_split); 37 blk_queue_split(q, &bio);
37 ret = pblk_submit_read(pblk, bio); 38 ret = pblk_submit_read(pblk, bio);
38 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) 39 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
39 bio_put(bio); 40 bio_put(bio);
@@ -46,7 +47,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
46 * available for user I/O. 47 * available for user I/O.
47 */ 48 */
48 if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) 49 if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
49 blk_queue_split(q, &bio, q->bio_split); 50 blk_queue_split(q, &bio);
50 51
51 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); 52 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
52} 53}
@@ -199,9 +200,9 @@ static int pblk_init_global_caches(struct pblk *pblk)
199 return -ENOMEM; 200 return -ENOMEM;
200 } 201 }
201 202
202 pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size, 203 pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
203 0, 0, NULL); 204 0, 0, NULL);
204 if (!pblk_r_rq_cache) { 205 if (!pblk_g_rq_cache) {
205 kmem_cache_destroy(pblk_blk_ws_cache); 206 kmem_cache_destroy(pblk_blk_ws_cache);
206 kmem_cache_destroy(pblk_rec_cache); 207 kmem_cache_destroy(pblk_rec_cache);
207 up_write(&pblk_lock); 208 up_write(&pblk_lock);
@@ -213,7 +214,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
213 if (!pblk_w_rq_cache) { 214 if (!pblk_w_rq_cache) {
214 kmem_cache_destroy(pblk_blk_ws_cache); 215 kmem_cache_destroy(pblk_blk_ws_cache);
215 kmem_cache_destroy(pblk_rec_cache); 216 kmem_cache_destroy(pblk_rec_cache);
216 kmem_cache_destroy(pblk_r_rq_cache); 217 kmem_cache_destroy(pblk_g_rq_cache);
217 up_write(&pblk_lock); 218 up_write(&pblk_lock);
218 return -ENOMEM; 219 return -ENOMEM;
219 } 220 }
@@ -225,7 +226,7 @@ static int pblk_init_global_caches(struct pblk *pblk)
225 if (!pblk_line_meta_cache) { 226 if (!pblk_line_meta_cache) {
226 kmem_cache_destroy(pblk_blk_ws_cache); 227 kmem_cache_destroy(pblk_blk_ws_cache);
227 kmem_cache_destroy(pblk_rec_cache); 228 kmem_cache_destroy(pblk_rec_cache);
228 kmem_cache_destroy(pblk_r_rq_cache); 229 kmem_cache_destroy(pblk_g_rq_cache);
229 kmem_cache_destroy(pblk_w_rq_cache); 230 kmem_cache_destroy(pblk_w_rq_cache);
230 up_write(&pblk_lock); 231 up_write(&pblk_lock);
231 return -ENOMEM; 232 return -ENOMEM;
@@ -239,27 +240,10 @@ static int pblk_core_init(struct pblk *pblk)
239{ 240{
240 struct nvm_tgt_dev *dev = pblk->dev; 241 struct nvm_tgt_dev *dev = pblk->dev;
241 struct nvm_geo *geo = &dev->geo; 242 struct nvm_geo *geo = &dev->geo;
242 int max_write_ppas;
243 int mod;
244 243
245 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
246 max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
247 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
248 max_write_ppas : nvm_max_phys_sects(dev);
249 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * 244 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
250 geo->nr_planes * geo->nr_luns; 245 geo->nr_planes * geo->nr_luns;
251 246
252 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
253 pr_err("pblk: cannot support device max_phys_sect\n");
254 return -EINVAL;
255 }
256
257 div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
258 if (mod) {
259 pr_err("pblk: bad configuration of sectors/pages\n");
260 return -EINVAL;
261 }
262
263 if (pblk_init_global_caches(pblk)) 247 if (pblk_init_global_caches(pblk))
264 return -ENOMEM; 248 return -ENOMEM;
265 249
@@ -267,7 +251,7 @@ static int pblk_core_init(struct pblk *pblk)
267 if (!pblk->page_pool) 251 if (!pblk->page_pool)
268 return -ENOMEM; 252 return -ENOMEM;
269 253
270 pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns, 254 pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE,
271 pblk_blk_ws_cache); 255 pblk_blk_ws_cache);
272 if (!pblk->line_ws_pool) 256 if (!pblk->line_ws_pool)
273 goto free_page_pool; 257 goto free_page_pool;
@@ -276,41 +260,51 @@ static int pblk_core_init(struct pblk *pblk)
276 if (!pblk->rec_pool) 260 if (!pblk->rec_pool)
277 goto free_blk_ws_pool; 261 goto free_blk_ws_pool;
278 262
279 pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache); 263 pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE,
280 if (!pblk->r_rq_pool) 264 pblk_g_rq_cache);
265 if (!pblk->g_rq_pool)
281 goto free_rec_pool; 266 goto free_rec_pool;
282 267
283 pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache); 268 pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2,
269 pblk_w_rq_cache);
284 if (!pblk->w_rq_pool) 270 if (!pblk->w_rq_pool)
285 goto free_r_rq_pool; 271 goto free_g_rq_pool;
286 272
287 pblk->line_meta_pool = 273 pblk->line_meta_pool =
288 mempool_create_slab_pool(16, pblk_line_meta_cache); 274 mempool_create_slab_pool(PBLK_META_POOL_SIZE,
275 pblk_line_meta_cache);
289 if (!pblk->line_meta_pool) 276 if (!pblk->line_meta_pool)
290 goto free_w_rq_pool; 277 goto free_w_rq_pool;
291 278
292 pblk->kw_wq = alloc_workqueue("pblk-aux-wq", 279 pblk->close_wq = alloc_workqueue("pblk-close-wq",
293 WQ_MEM_RECLAIM | WQ_UNBOUND, 1); 280 WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
294 if (!pblk->kw_wq) 281 if (!pblk->close_wq)
295 goto free_line_meta_pool; 282 goto free_line_meta_pool;
296 283
284 pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
285 WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
286 if (!pblk->bb_wq)
287 goto free_close_wq;
288
297 if (pblk_set_ppaf(pblk)) 289 if (pblk_set_ppaf(pblk))
298 goto free_kw_wq; 290 goto free_bb_wq;
299 291
300 if (pblk_rwb_init(pblk)) 292 if (pblk_rwb_init(pblk))
301 goto free_kw_wq; 293 goto free_bb_wq;
302 294
303 INIT_LIST_HEAD(&pblk->compl_list); 295 INIT_LIST_HEAD(&pblk->compl_list);
304 return 0; 296 return 0;
305 297
306free_kw_wq: 298free_bb_wq:
307 destroy_workqueue(pblk->kw_wq); 299 destroy_workqueue(pblk->bb_wq);
300free_close_wq:
301 destroy_workqueue(pblk->close_wq);
308free_line_meta_pool: 302free_line_meta_pool:
309 mempool_destroy(pblk->line_meta_pool); 303 mempool_destroy(pblk->line_meta_pool);
310free_w_rq_pool: 304free_w_rq_pool:
311 mempool_destroy(pblk->w_rq_pool); 305 mempool_destroy(pblk->w_rq_pool);
312free_r_rq_pool: 306free_g_rq_pool:
313 mempool_destroy(pblk->r_rq_pool); 307 mempool_destroy(pblk->g_rq_pool);
314free_rec_pool: 308free_rec_pool:
315 mempool_destroy(pblk->rec_pool); 309 mempool_destroy(pblk->rec_pool);
316free_blk_ws_pool: 310free_blk_ws_pool:
@@ -322,19 +316,22 @@ free_page_pool:
322 316
323static void pblk_core_free(struct pblk *pblk) 317static void pblk_core_free(struct pblk *pblk)
324{ 318{
325 if (pblk->kw_wq) 319 if (pblk->close_wq)
326 destroy_workqueue(pblk->kw_wq); 320 destroy_workqueue(pblk->close_wq);
321
322 if (pblk->bb_wq)
323 destroy_workqueue(pblk->bb_wq);
327 324
328 mempool_destroy(pblk->page_pool); 325 mempool_destroy(pblk->page_pool);
329 mempool_destroy(pblk->line_ws_pool); 326 mempool_destroy(pblk->line_ws_pool);
330 mempool_destroy(pblk->rec_pool); 327 mempool_destroy(pblk->rec_pool);
331 mempool_destroy(pblk->r_rq_pool); 328 mempool_destroy(pblk->g_rq_pool);
332 mempool_destroy(pblk->w_rq_pool); 329 mempool_destroy(pblk->w_rq_pool);
333 mempool_destroy(pblk->line_meta_pool); 330 mempool_destroy(pblk->line_meta_pool);
334 331
335 kmem_cache_destroy(pblk_blk_ws_cache); 332 kmem_cache_destroy(pblk_blk_ws_cache);
336 kmem_cache_destroy(pblk_rec_cache); 333 kmem_cache_destroy(pblk_rec_cache);
337 kmem_cache_destroy(pblk_r_rq_cache); 334 kmem_cache_destroy(pblk_g_rq_cache);
338 kmem_cache_destroy(pblk_w_rq_cache); 335 kmem_cache_destroy(pblk_w_rq_cache);
339 kmem_cache_destroy(pblk_line_meta_cache); 336 kmem_cache_destroy(pblk_line_meta_cache);
340} 337}
@@ -344,6 +341,12 @@ static void pblk_luns_free(struct pblk *pblk)
344 kfree(pblk->luns); 341 kfree(pblk->luns);
345} 342}
346 343
344static void pblk_free_line_bitmaps(struct pblk_line *line)
345{
346 kfree(line->blk_bitmap);
347 kfree(line->erase_bitmap);
348}
349
347static void pblk_lines_free(struct pblk *pblk) 350static void pblk_lines_free(struct pblk *pblk)
348{ 351{
349 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 352 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -355,8 +358,7 @@ static void pblk_lines_free(struct pblk *pblk)
355 line = &pblk->lines[i]; 358 line = &pblk->lines[i];
356 359
357 pblk_line_free(pblk, line); 360 pblk_line_free(pblk, line);
358 kfree(line->blk_bitmap); 361 pblk_free_line_bitmaps(line);
359 kfree(line->erase_bitmap);
360 } 362 }
361 spin_unlock(&l_mg->free_lock); 363 spin_unlock(&l_mg->free_lock);
362} 364}
@@ -368,11 +370,15 @@ static void pblk_line_meta_free(struct pblk *pblk)
368 370
369 kfree(l_mg->bb_template); 371 kfree(l_mg->bb_template);
370 kfree(l_mg->bb_aux); 372 kfree(l_mg->bb_aux);
373 kfree(l_mg->vsc_list);
371 374
375 spin_lock(&l_mg->free_lock);
372 for (i = 0; i < PBLK_DATA_LINES; i++) { 376 for (i = 0; i < PBLK_DATA_LINES; i++) {
373 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); 377 kfree(l_mg->sline_meta[i]);
374 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); 378 pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
379 kfree(l_mg->eline_meta[i]);
375 } 380 }
381 spin_unlock(&l_mg->free_lock);
376 382
377 kfree(pblk->lines); 383 kfree(pblk->lines);
378} 384}
@@ -411,13 +417,31 @@ out:
411 return ret; 417 return ret;
412} 418}
413 419
414static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line) 420static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
421 int blk_per_line)
415{ 422{
416 struct pblk_line_meta *lm = &pblk->lm; 423 struct nvm_tgt_dev *dev = pblk->dev;
424 struct nvm_geo *geo = &dev->geo;
417 struct pblk_lun *rlun; 425 struct pblk_lun *rlun;
418 int bb_cnt = 0; 426 int bb_cnt = 0;
419 int i; 427 int i;
420 428
429 for (i = 0; i < blk_per_line; i++) {
430 rlun = &pblk->luns[i];
431 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
432 continue;
433
434 set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
435 bb_cnt++;
436 }
437
438 return bb_cnt;
439}
440
441static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line)
442{
443 struct pblk_line_meta *lm = &pblk->lm;
444
421 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); 445 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
422 if (!line->blk_bitmap) 446 if (!line->blk_bitmap)
423 return -ENOMEM; 447 return -ENOMEM;
@@ -428,16 +452,7 @@ static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
428 return -ENOMEM; 452 return -ENOMEM;
429 } 453 }
430 454
431 for (i = 0; i < lm->blk_per_line; i++) { 455 return 0;
432 rlun = &pblk->luns[i];
433 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
434 continue;
435
436 set_bit(i, line->blk_bitmap);
437 bb_cnt++;
438 }
439
440 return bb_cnt;
441} 456}
442 457
443static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) 458static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
@@ -505,12 +520,32 @@ static int pblk_lines_configure(struct pblk *pblk, int flags)
505} 520}
506 521
507/* See comment over struct line_emeta definition */ 522/* See comment over struct line_emeta definition */
508static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm) 523static unsigned int calc_emeta_len(struct pblk *pblk)
509{ 524{
510 return (sizeof(struct line_emeta) + 525 struct pblk_line_meta *lm = &pblk->lm;
511 ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) + 526 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
512 (pblk->l_mg.nr_lines * sizeof(u32)) + 527 struct nvm_tgt_dev *dev = pblk->dev;
513 lm->blk_bitmap_len); 528 struct nvm_geo *geo = &dev->geo;
529
530 /* Round to sector size so that lba_list starts on its own sector */
531 lm->emeta_sec[1] = DIV_ROUND_UP(
532 sizeof(struct line_emeta) + lm->blk_bitmap_len,
533 geo->sec_size);
534 lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
535
536 /* Round to sector size so that vsc_list starts on its own sector */
537 lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
538 lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
539 geo->sec_size);
540 lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;
541
542 lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
543 geo->sec_size);
544 lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;
545
546 lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);
547
548 return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
514} 549}
515 550
516static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) 551static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
@@ -534,6 +569,78 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
534 atomic_set(&pblk->rl.free_blocks, nr_free_blks); 569 atomic_set(&pblk->rl.free_blocks, nr_free_blks);
535} 570}
536 571
572static int pblk_lines_alloc_metadata(struct pblk *pblk)
573{
574 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
575 struct pblk_line_meta *lm = &pblk->lm;
576 int i;
577
578 /* smeta is always small enough to fit on a kmalloc memory allocation,
579 * emeta depends on the number of LUNs allocated to the pblk instance
580 */
581 for (i = 0; i < PBLK_DATA_LINES; i++) {
582 l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
583 if (!l_mg->sline_meta[i])
584 goto fail_free_smeta;
585 }
586
587 /* emeta allocates three different buffers for managing metadata with
588 * in-memory and in-media layouts
589 */
590 for (i = 0; i < PBLK_DATA_LINES; i++) {
591 struct pblk_emeta *emeta;
592
593 emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
594 if (!emeta)
595 goto fail_free_emeta;
596
597 if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
598 l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
599
600 emeta->buf = vmalloc(lm->emeta_len[0]);
601 if (!emeta->buf) {
602 kfree(emeta);
603 goto fail_free_emeta;
604 }
605
606 emeta->nr_entries = lm->emeta_sec[0];
607 l_mg->eline_meta[i] = emeta;
608 } else {
609 l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
610
611 emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
612 if (!emeta->buf) {
613 kfree(emeta);
614 goto fail_free_emeta;
615 }
616
617 emeta->nr_entries = lm->emeta_sec[0];
618 l_mg->eline_meta[i] = emeta;
619 }
620 }
621
622 l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
623 if (!l_mg->vsc_list)
624 goto fail_free_emeta;
625
626 for (i = 0; i < l_mg->nr_lines; i++)
627 l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);
628
629 return 0;
630
631fail_free_emeta:
632 while (--i >= 0) {
633 vfree(l_mg->eline_meta[i]->buf);
634 kfree(l_mg->eline_meta[i]);
635 }
636
637fail_free_smeta:
638 for (i = 0; i < PBLK_DATA_LINES; i++)
639 kfree(l_mg->sline_meta[i]);
640
641 return -ENOMEM;
642}
643
537static int pblk_lines_init(struct pblk *pblk) 644static int pblk_lines_init(struct pblk *pblk)
538{ 645{
539 struct nvm_tgt_dev *dev = pblk->dev; 646 struct nvm_tgt_dev *dev = pblk->dev;
@@ -542,10 +649,32 @@ static int pblk_lines_init(struct pblk *pblk)
542 struct pblk_line_meta *lm = &pblk->lm; 649 struct pblk_line_meta *lm = &pblk->lm;
543 struct pblk_line *line; 650 struct pblk_line *line;
544 unsigned int smeta_len, emeta_len; 651 unsigned int smeta_len, emeta_len;
545 long nr_bad_blks, nr_meta_blks, nr_free_blks; 652 long nr_bad_blks, nr_free_blks;
546 int bb_distance; 653 int bb_distance, max_write_ppas, mod;
547 int i; 654 int i, ret;
548 int ret; 655
656 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
657 max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
658 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
659 max_write_ppas : nvm_max_phys_sects(dev);
660 pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
661
662 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
663 pr_err("pblk: cannot support device max_phys_sect\n");
664 return -EINVAL;
665 }
666
667 div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
668 if (mod) {
669 pr_err("pblk: bad configuration of sectors/pages\n");
670 return -EINVAL;
671 }
672
673 l_mg->nr_lines = geo->blks_per_lun;
674 l_mg->log_line = l_mg->data_line = NULL;
675 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
676 l_mg->nr_free_lines = 0;
677 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
549 678
550 lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; 679 lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
551 lm->blk_per_line = geo->nr_luns; 680 lm->blk_per_line = geo->nr_luns;
@@ -554,20 +683,17 @@ static int pblk_lines_init(struct pblk *pblk)
554 lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); 683 lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
555 lm->high_thrs = lm->sec_per_line / 2; 684 lm->high_thrs = lm->sec_per_line / 2;
556 lm->mid_thrs = lm->sec_per_line / 4; 685 lm->mid_thrs = lm->sec_per_line / 4;
686 lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs;
557 687
558 /* Calculate necessary pages for smeta. See comment over struct 688 /* Calculate necessary pages for smeta. See comment over struct
559 * line_smeta definition 689 * line_smeta definition
560 */ 690 */
561 lm->smeta_len = sizeof(struct line_smeta) +
562 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
563
564 i = 1; 691 i = 1;
565add_smeta_page: 692add_smeta_page:
566 lm->smeta_sec = i * geo->sec_per_pl; 693 lm->smeta_sec = i * geo->sec_per_pl;
567 lm->smeta_len = lm->smeta_sec * geo->sec_size; 694 lm->smeta_len = lm->smeta_sec * geo->sec_size;
568 695
569 smeta_len = sizeof(struct line_smeta) + 696 smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
570 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
571 if (smeta_len > lm->smeta_len) { 697 if (smeta_len > lm->smeta_len) {
572 i++; 698 i++;
573 goto add_smeta_page; 699 goto add_smeta_page;
@@ -578,66 +704,28 @@ add_smeta_page:
578 */ 704 */
579 i = 1; 705 i = 1;
580add_emeta_page: 706add_emeta_page:
581 lm->emeta_sec = i * geo->sec_per_pl; 707 lm->emeta_sec[0] = i * geo->sec_per_pl;
582 lm->emeta_len = lm->emeta_sec * geo->sec_size; 708 lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
583 709
584 emeta_len = calc_emeta_len(pblk, lm); 710 emeta_len = calc_emeta_len(pblk);
585 if (emeta_len > lm->emeta_len) { 711 if (emeta_len > lm->emeta_len[0]) {
586 i++; 712 i++;
587 goto add_emeta_page; 713 goto add_emeta_page;
588 } 714 }
589 lm->emeta_bb = geo->nr_luns - i;
590
591 nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
592 (geo->sec_per_blk / 2)) / geo->sec_per_blk;
593 lm->min_blk_line = nr_meta_blks + 1;
594
595 l_mg->nr_lines = geo->blks_per_lun;
596 l_mg->log_line = l_mg->data_line = NULL;
597 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
598 l_mg->nr_free_lines = 0;
599 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
600 715
601 /* smeta is always small enough to fit on a kmalloc memory allocation, 716 lm->emeta_bb = geo->nr_luns - i;
602 * emeta depends on the number of LUNs allocated to the pblk instance 717 lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0],
603 */ 718 geo->sec_per_blk);
604 l_mg->smeta_alloc_type = PBLK_KMALLOC_META; 719 if (lm->min_blk_line > lm->blk_per_line) {
605 for (i = 0; i < PBLK_DATA_LINES; i++) { 720 pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
606 l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL); 721 lm->blk_per_line);
607 if (!l_mg->sline_meta[i].meta) 722 ret = -EINVAL;
608 while (--i >= 0) { 723 goto fail;
609 kfree(l_mg->sline_meta[i].meta);
610 ret = -ENOMEM;
611 goto fail;
612 }
613 } 724 }
614 725
615 if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) { 726 ret = pblk_lines_alloc_metadata(pblk);
616 l_mg->emeta_alloc_type = PBLK_VMALLOC_META; 727 if (ret)
617 728 goto fail;
618 for (i = 0; i < PBLK_DATA_LINES; i++) {
619 l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
620 if (!l_mg->eline_meta[i].meta)
621 while (--i >= 0) {
622 vfree(l_mg->eline_meta[i].meta);
623 ret = -ENOMEM;
624 goto fail;
625 }
626 }
627 } else {
628 l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
629
630 for (i = 0; i < PBLK_DATA_LINES; i++) {
631 l_mg->eline_meta[i].meta =
632 kmalloc(lm->emeta_len, GFP_KERNEL);
633 if (!l_mg->eline_meta[i].meta)
634 while (--i >= 0) {
635 kfree(l_mg->eline_meta[i].meta);
636 ret = -ENOMEM;
637 goto fail;
638 }
639 }
640 }
641 729
642 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); 730 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
643 if (!l_mg->bb_template) { 731 if (!l_mg->bb_template) {
@@ -664,11 +752,14 @@ add_emeta_page:
664 INIT_LIST_HEAD(&l_mg->gc_low_list); 752 INIT_LIST_HEAD(&l_mg->gc_low_list);
665 INIT_LIST_HEAD(&l_mg->gc_empty_list); 753 INIT_LIST_HEAD(&l_mg->gc_empty_list);
666 754
755 INIT_LIST_HEAD(&l_mg->emeta_list);
756
667 l_mg->gc_lists[0] = &l_mg->gc_high_list; 757 l_mg->gc_lists[0] = &l_mg->gc_high_list;
668 l_mg->gc_lists[1] = &l_mg->gc_mid_list; 758 l_mg->gc_lists[1] = &l_mg->gc_mid_list;
669 l_mg->gc_lists[2] = &l_mg->gc_low_list; 759 l_mg->gc_lists[2] = &l_mg->gc_low_list;
670 760
671 spin_lock_init(&l_mg->free_lock); 761 spin_lock_init(&l_mg->free_lock);
762 spin_lock_init(&l_mg->close_lock);
672 spin_lock_init(&l_mg->gc_lock); 763 spin_lock_init(&l_mg->gc_lock);
673 764
674 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), 765 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
@@ -689,10 +780,16 @@ add_emeta_page:
689 line->type = PBLK_LINETYPE_FREE; 780 line->type = PBLK_LINETYPE_FREE;
690 line->state = PBLK_LINESTATE_FREE; 781 line->state = PBLK_LINESTATE_FREE;
691 line->gc_group = PBLK_LINEGC_NONE; 782 line->gc_group = PBLK_LINEGC_NONE;
783 line->vsc = &l_mg->vsc_list[i];
692 spin_lock_init(&line->lock); 784 spin_lock_init(&line->lock);
693 785
694 nr_bad_blks = pblk_bb_line(pblk, line); 786 ret = pblk_alloc_line_bitmaps(pblk, line);
787 if (ret)
788 goto fail_free_lines;
789
790 nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line);
695 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) { 791 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) {
792 pblk_free_line_bitmaps(line);
696 ret = -EINVAL; 793 ret = -EINVAL;
697 goto fail_free_lines; 794 goto fail_free_lines;
698 } 795 }
@@ -713,24 +810,20 @@ add_emeta_page:
713 810
714 pblk_set_provision(pblk, nr_free_blks); 811 pblk_set_provision(pblk, nr_free_blks);
715 812
716 sema_init(&pblk->erase_sem, 1);
717
718 /* Cleanup per-LUN bad block lists - managed within lines on run-time */ 813 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
719 for (i = 0; i < geo->nr_luns; i++) 814 for (i = 0; i < geo->nr_luns; i++)
720 kfree(pblk->luns[i].bb_list); 815 kfree(pblk->luns[i].bb_list);
721 816
722 return 0; 817 return 0;
723fail_free_lines: 818fail_free_lines:
724 kfree(pblk->lines); 819 while (--i >= 0)
820 pblk_free_line_bitmaps(&pblk->lines[i]);
725fail_free_bb_aux: 821fail_free_bb_aux:
726 kfree(l_mg->bb_aux); 822 kfree(l_mg->bb_aux);
727fail_free_bb_template: 823fail_free_bb_template:
728 kfree(l_mg->bb_template); 824 kfree(l_mg->bb_template);
729fail_free_meta: 825fail_free_meta:
730 for (i = 0; i < PBLK_DATA_LINES; i++) { 826 pblk_line_meta_free(pblk);
731 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
732 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
733 }
734fail: 827fail:
735 for (i = 0; i < geo->nr_luns; i++) 828 for (i = 0; i < geo->nr_luns; i++)
736 kfree(pblk->luns[i].bb_list); 829 kfree(pblk->luns[i].bb_list);
@@ -754,6 +847,15 @@ static int pblk_writer_init(struct pblk *pblk)
754 847
755static void pblk_writer_stop(struct pblk *pblk) 848static void pblk_writer_stop(struct pblk *pblk)
756{ 849{
850 /* The pipeline must be stopped and the write buffer emptied before the
851 * write thread is stopped
852 */
853 WARN(pblk_rb_read_count(&pblk->rwb),
854 "Stopping not fully persisted write buffer\n");
855
856 WARN(pblk_rb_sync_count(&pblk->rwb),
857 "Stopping not fully synced write buffer\n");
858
757 if (pblk->writer_ts) 859 if (pblk->writer_ts)
758 kthread_stop(pblk->writer_ts); 860 kthread_stop(pblk->writer_ts);
759 del_timer(&pblk->wtimer); 861 del_timer(&pblk->wtimer);
@@ -772,10 +874,9 @@ static void pblk_free(struct pblk *pblk)
772 874
773static void pblk_tear_down(struct pblk *pblk) 875static void pblk_tear_down(struct pblk *pblk)
774{ 876{
775 pblk_flush_writer(pblk); 877 pblk_pipeline_stop(pblk);
776 pblk_writer_stop(pblk); 878 pblk_writer_stop(pblk);
777 pblk_rb_sync_l2p(&pblk->rwb); 879 pblk_rb_sync_l2p(&pblk->rwb);
778 pblk_recov_pad(pblk);
779 pblk_rwb_free(pblk); 880 pblk_rwb_free(pblk);
780 pblk_rl_free(&pblk->rl); 881 pblk_rl_free(&pblk->rl);
781 882
@@ -821,6 +922,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
821 922
822 pblk->dev = dev; 923 pblk->dev = dev;
823 pblk->disk = tdisk; 924 pblk->disk = tdisk;
925 pblk->state = PBLK_STATE_RUNNING;
824 926
825 spin_lock_init(&pblk->trans_lock); 927 spin_lock_init(&pblk->trans_lock);
826 spin_lock_init(&pblk->lock); 928 spin_lock_init(&pblk->lock);
@@ -836,8 +938,8 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
836 atomic_long_set(&pblk->req_writes, 0); 938 atomic_long_set(&pblk->req_writes, 0);
837 atomic_long_set(&pblk->sub_writes, 0); 939 atomic_long_set(&pblk->sub_writes, 0);
838 atomic_long_set(&pblk->sync_writes, 0); 940 atomic_long_set(&pblk->sync_writes, 0);
839 atomic_long_set(&pblk->compl_writes, 0);
840 atomic_long_set(&pblk->inflight_reads, 0); 941 atomic_long_set(&pblk->inflight_reads, 0);
942 atomic_long_set(&pblk->cache_reads, 0);
841 atomic_long_set(&pblk->sync_reads, 0); 943 atomic_long_set(&pblk->sync_reads, 0);
842 atomic_long_set(&pblk->recov_writes, 0); 944 atomic_long_set(&pblk->recov_writes, 0);
843 atomic_long_set(&pblk->recov_writes, 0); 945 atomic_long_set(&pblk->recov_writes, 0);
@@ -946,11 +1048,20 @@ static struct nvm_tgt_type tt_pblk = {
946 1048
947static int __init pblk_module_init(void) 1049static int __init pblk_module_init(void)
948{ 1050{
949 return nvm_register_tgt_type(&tt_pblk); 1051 int ret;
1052
1053 pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
1054 if (!pblk_bio_set)
1055 return -ENOMEM;
1056 ret = nvm_register_tgt_type(&tt_pblk);
1057 if (ret)
1058 bioset_free(pblk_bio_set);
1059 return ret;
950} 1060}
951 1061
952static void pblk_module_exit(void) 1062static void pblk_module_exit(void)
953{ 1063{
1064 bioset_free(pblk_bio_set);
954 nvm_unregister_tgt_type(&tt_pblk); 1065 nvm_unregister_tgt_type(&tt_pblk);
955} 1066}
956 1067
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 17c16955284d..fddb924f6dde 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -25,9 +25,9 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
25 unsigned int valid_secs) 25 unsigned int valid_secs)
26{ 26{
27 struct pblk_line *line = pblk_line_get_data(pblk); 27 struct pblk_line *line = pblk_line_get_data(pblk);
28 struct line_emeta *emeta = line->emeta; 28 struct pblk_emeta *emeta = line->emeta;
29 struct pblk_w_ctx *w_ctx; 29 struct pblk_w_ctx *w_ctx;
30 __le64 *lba_list = pblk_line_emeta_to_lbas(emeta); 30 __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf);
31 u64 paddr; 31 u64 paddr;
32 int nr_secs = pblk->min_write_pgs; 32 int nr_secs = pblk->min_write_pgs;
33 int i; 33 int i;
@@ -51,18 +51,20 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
51 w_ctx->ppa = ppa_list[i]; 51 w_ctx->ppa = ppa_list[i];
52 meta_list[i].lba = cpu_to_le64(w_ctx->lba); 52 meta_list[i].lba = cpu_to_le64(w_ctx->lba);
53 lba_list[paddr] = cpu_to_le64(w_ctx->lba); 53 lba_list[paddr] = cpu_to_le64(w_ctx->lba);
54 le64_add_cpu(&line->emeta->nr_valid_lbas, 1); 54 line->nr_valid_lbas++;
55 } else { 55 } else {
56 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); 56 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
57 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY); 57
58 pblk_map_pad_invalidate(pblk, line, paddr); 58 lba_list[paddr] = meta_list[i].lba = addr_empty;
59 __pblk_map_invalidate(pblk, line, paddr);
59 } 60 }
60 } 61 }
61 62
62 if (pblk_line_is_full(line)) { 63 if (pblk_line_is_full(line)) {
63 line = pblk_line_replace_data(pblk); 64 struct pblk_line *prev_line = line;
64 if (!line) 65
65 return; 66 pblk_line_replace_data(pblk);
67 pblk_line_close_meta(pblk, prev_line);
66 } 68 }
67 69
68 pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); 70 pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
@@ -91,8 +93,9 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
91{ 93{
92 struct nvm_tgt_dev *dev = pblk->dev; 94 struct nvm_tgt_dev *dev = pblk->dev;
93 struct nvm_geo *geo = &dev->geo; 95 struct nvm_geo *geo = &dev->geo;
94 struct pblk_line *e_line = pblk_line_get_data_next(pblk); 96 struct pblk_line_meta *lm = &pblk->lm;
95 struct pblk_sec_meta *meta_list = rqd->meta_list; 97 struct pblk_sec_meta *meta_list = rqd->meta_list;
98 struct pblk_line *e_line, *d_line;
96 unsigned int map_secs; 99 unsigned int map_secs;
97 int min = pblk->min_write_pgs; 100 int min = pblk->min_write_pgs;
98 int i, erase_lun; 101 int i, erase_lun;
@@ -102,35 +105,63 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
102 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i], 105 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
103 lun_bitmap, &meta_list[i], map_secs); 106 lun_bitmap, &meta_list[i], map_secs);
104 107
105 erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls + 108 erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
106 rqd->ppa_list[i].g.ch;
107 109
108 if (!test_bit(erase_lun, e_line->erase_bitmap)) { 110 /* line can change after page map. We might also be writing the
109 if (down_trylock(&pblk->erase_sem)) 111 * last line.
110 continue; 112 */
113 e_line = pblk_line_get_erase(pblk);
114 if (!e_line)
115 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
116 valid_secs, i + min);
111 117
118 spin_lock(&e_line->lock);
119 if (!test_bit(erase_lun, e_line->erase_bitmap)) {
112 set_bit(erase_lun, e_line->erase_bitmap); 120 set_bit(erase_lun, e_line->erase_bitmap);
113 atomic_dec(&e_line->left_eblks); 121 atomic_dec(&e_line->left_eblks);
122
114 *erase_ppa = rqd->ppa_list[i]; 123 *erase_ppa = rqd->ppa_list[i];
115 erase_ppa->g.blk = e_line->id; 124 erase_ppa->g.blk = e_line->id;
116 125
126 spin_unlock(&e_line->lock);
127
117 /* Avoid evaluating e_line->left_eblks */ 128 /* Avoid evaluating e_line->left_eblks */
118 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap, 129 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
119 valid_secs, i + min); 130 valid_secs, i + min);
120 } 131 }
132 spin_unlock(&e_line->lock);
121 } 133 }
122 134
123 /* Erase blocks that are bad in this line but might not be in next */ 135 d_line = pblk_line_get_data(pblk);
124 if (unlikely(ppa_empty(*erase_ppa))) { 136
125 struct pblk_line_meta *lm = &pblk->lm; 137 /* line can change after page map. We might also be writing the
138 * last line.
139 */
140 e_line = pblk_line_get_erase(pblk);
141 if (!e_line)
142 return;
126 143
127 i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line); 144 /* Erase blocks that are bad in this line but might not be in next */
128 if (i == lm->blk_per_line) 145 if (unlikely(ppa_empty(*erase_ppa)) &&
146 bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
147 int bit = -1;
148
149retry:
150 bit = find_next_bit(d_line->blk_bitmap,
151 lm->blk_per_line, bit + 1);
152 if (bit >= lm->blk_per_line)
129 return; 153 return;
130 154
131 set_bit(i, e_line->erase_bitmap); 155 spin_lock(&e_line->lock);
156 if (test_bit(bit, e_line->erase_bitmap)) {
157 spin_unlock(&e_line->lock);
158 goto retry;
159 }
160 spin_unlock(&e_line->lock);
161
162 set_bit(bit, e_line->erase_bitmap);
132 atomic_dec(&e_line->left_eblks); 163 atomic_dec(&e_line->left_eblks);
133 *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */ 164 *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
134 erase_ppa->g.blk = e_line->id; 165 erase_ppa->g.blk = e_line->id;
135 } 166 }
136} 167}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 045384ddc1f9..5ecc154f6831 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -150,6 +150,7 @@ try:
150 /* Release flags on context. Protect from writes and reads */ 150 /* Release flags on context. Protect from writes and reads */
151 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY); 151 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
152 pblk_ppa_set_empty(&w_ctx->ppa); 152 pblk_ppa_set_empty(&w_ctx->ppa);
153 w_ctx->lba = ADDR_EMPTY;
153} 154}
154 155
155#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size) 156#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
@@ -180,6 +181,14 @@ unsigned int pblk_rb_read_count(struct pblk_rb *rb)
180 return pblk_rb_ring_count(mem, subm, rb->nr_entries); 181 return pblk_rb_ring_count(mem, subm, rb->nr_entries);
181} 182}
182 183
184unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
185{
186 unsigned int mem = READ_ONCE(rb->mem);
187 unsigned int sync = READ_ONCE(rb->sync);
188
189 return pblk_rb_ring_count(mem, sync, rb->nr_entries);
190}
191
183unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) 192unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
184{ 193{
185 unsigned int subm; 194 unsigned int subm;
@@ -199,12 +208,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
199 struct pblk_line *line; 208 struct pblk_line *line;
200 struct pblk_rb_entry *entry; 209 struct pblk_rb_entry *entry;
201 struct pblk_w_ctx *w_ctx; 210 struct pblk_w_ctx *w_ctx;
211 unsigned int user_io = 0, gc_io = 0;
202 unsigned int i; 212 unsigned int i;
213 int flags;
203 214
204 for (i = 0; i < to_update; i++) { 215 for (i = 0; i < to_update; i++) {
205 entry = &rb->entries[*l2p_upd]; 216 entry = &rb->entries[*l2p_upd];
206 w_ctx = &entry->w_ctx; 217 w_ctx = &entry->w_ctx;
207 218
219 flags = READ_ONCE(entry->w_ctx.flags);
220 if (flags & PBLK_IOTYPE_USER)
221 user_io++;
222 else if (flags & PBLK_IOTYPE_GC)
223 gc_io++;
224 else
225 WARN(1, "pblk: unknown IO type\n");
226
208 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, 227 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
209 entry->cacheline); 228 entry->cacheline);
210 229
@@ -214,6 +233,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
214 *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); 233 *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
215 } 234 }
216 235
236 pblk_rl_out(&pblk->rl, user_io, gc_io);
237
217 return 0; 238 return 0;
218} 239}
219 240
@@ -357,6 +378,9 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
357 /* Protect syncs */ 378 /* Protect syncs */
358 smp_store_release(&rb->sync_point, sync_point); 379 smp_store_release(&rb->sync_point, sync_point);
359 380
381 if (!bio)
382 return 0;
383
360 spin_lock_irq(&rb->s_lock); 384 spin_lock_irq(&rb->s_lock);
361 bio_list_add(&entry->w_ctx.bios, bio); 385 bio_list_add(&entry->w_ctx.bios, bio);
362 spin_unlock_irq(&rb->s_lock); 386 spin_unlock_irq(&rb->s_lock);
@@ -395,6 +419,17 @@ static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
395 return 1; 419 return 1;
396} 420}
397 421
422void pblk_rb_flush(struct pblk_rb *rb)
423{
424 struct pblk *pblk = container_of(rb, struct pblk, rwb);
425 unsigned int mem = READ_ONCE(rb->mem);
426
427 if (pblk_rb_sync_point_set(rb, NULL, mem))
428 return;
429
430 pblk_write_should_kick(pblk);
431}
432
398static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, 433static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
399 unsigned int *pos, struct bio *bio, 434 unsigned int *pos, struct bio *bio,
400 int *io_ret) 435 int *io_ret)
@@ -431,15 +466,16 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
431 unsigned int nr_entries, unsigned int *pos) 466 unsigned int nr_entries, unsigned int *pos)
432{ 467{
433 struct pblk *pblk = container_of(rb, struct pblk, rwb); 468 struct pblk *pblk = container_of(rb, struct pblk, rwb);
434 int flush_done; 469 int io_ret;
435 470
436 spin_lock(&rb->w_lock); 471 spin_lock(&rb->w_lock);
437 if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) { 472 io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
473 if (io_ret) {
438 spin_unlock(&rb->w_lock); 474 spin_unlock(&rb->w_lock);
439 return NVM_IO_REQUEUE; 475 return io_ret;
440 } 476 }
441 477
442 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) { 478 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
443 spin_unlock(&rb->w_lock); 479 spin_unlock(&rb->w_lock);
444 return NVM_IO_REQUEUE; 480 return NVM_IO_REQUEUE;
445 } 481 }
@@ -447,7 +483,7 @@ int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
447 pblk_rl_user_in(&pblk->rl, nr_entries); 483 pblk_rl_user_in(&pblk->rl, nr_entries);
448 spin_unlock(&rb->w_lock); 484 spin_unlock(&rb->w_lock);
449 485
450 return flush_done; 486 return io_ret;
451} 487}
452 488
453/* 489/*
@@ -521,20 +557,18 @@ out:
521 * This function is used by the write thread to form the write bio that will 557 * This function is used by the write thread to form the write bio that will
522 * persist data on the write buffer to the media. 558 * persist data on the write buffer to the media.
523 */ 559 */
524unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, 560unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
525 struct pblk_c_ctx *c_ctx, 561 struct bio *bio, unsigned int pos,
526 unsigned int pos, 562 unsigned int nr_entries, unsigned int count)
527 unsigned int nr_entries,
528 unsigned int count)
529{ 563{
530 struct pblk *pblk = container_of(rb, struct pblk, rwb); 564 struct pblk *pblk = container_of(rb, struct pblk, rwb);
565 struct request_queue *q = pblk->dev->q;
566 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
531 struct pblk_rb_entry *entry; 567 struct pblk_rb_entry *entry;
532 struct page *page; 568 struct page *page;
533 unsigned int pad = 0, read = 0, to_read = nr_entries; 569 unsigned int pad = 0, to_read = nr_entries;
534 unsigned int user_io = 0, gc_io = 0;
535 unsigned int i; 570 unsigned int i;
536 int flags; 571 int flags;
537 int ret;
538 572
539 if (count < nr_entries) { 573 if (count < nr_entries) {
540 pad = nr_entries - count; 574 pad = nr_entries - count;
@@ -553,15 +587,10 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
553 */ 587 */
554try: 588try:
555 flags = READ_ONCE(entry->w_ctx.flags); 589 flags = READ_ONCE(entry->w_ctx.flags);
556 if (!(flags & PBLK_WRITTEN_DATA)) 590 if (!(flags & PBLK_WRITTEN_DATA)) {
591 io_schedule();
557 goto try; 592 goto try;
558 593 }
559 if (flags & PBLK_IOTYPE_USER)
560 user_io++;
561 else if (flags & PBLK_IOTYPE_GC)
562 gc_io++;
563 else
564 WARN(1, "pblk: unknown IO type\n");
565 594
566 page = virt_to_page(entry->data); 595 page = virt_to_page(entry->data);
567 if (!page) { 596 if (!page) {
@@ -570,17 +599,17 @@ try:
570 flags |= PBLK_SUBMITTED_ENTRY; 599 flags |= PBLK_SUBMITTED_ENTRY;
571 /* Release flags on context. Protect from writes */ 600 /* Release flags on context. Protect from writes */
572 smp_store_release(&entry->w_ctx.flags, flags); 601 smp_store_release(&entry->w_ctx.flags, flags);
573 goto out; 602 return NVM_IO_ERR;
574 } 603 }
575 604
576 ret = bio_add_page(bio, page, rb->seg_size, 0); 605 if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
577 if (ret != rb->seg_size) { 606 rb->seg_size) {
578 pr_err("pblk: could not add page to write bio\n"); 607 pr_err("pblk: could not add page to write bio\n");
579 flags &= ~PBLK_WRITTEN_DATA; 608 flags &= ~PBLK_WRITTEN_DATA;
580 flags |= PBLK_SUBMITTED_ENTRY; 609 flags |= PBLK_SUBMITTED_ENTRY;
581 /* Release flags on context. Protect from writes */ 610 /* Release flags on context. Protect from writes */
582 smp_store_release(&entry->w_ctx.flags, flags); 611 smp_store_release(&entry->w_ctx.flags, flags);
583 goto out; 612 return NVM_IO_ERR;
584 } 613 }
585 614
586 if (flags & PBLK_FLUSH_ENTRY) { 615 if (flags & PBLK_FLUSH_ENTRY) {
@@ -607,14 +636,19 @@ try:
607 pos = (pos + 1) & (rb->nr_entries - 1); 636 pos = (pos + 1) & (rb->nr_entries - 1);
608 } 637 }
609 638
610 read = to_read; 639 if (pad) {
611 pblk_rl_out(&pblk->rl, user_io, gc_io); 640 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
641 pr_err("pblk: could not pad page in write bio\n");
642 return NVM_IO_ERR;
643 }
644 }
645
612#ifdef CONFIG_NVM_DEBUG 646#ifdef CONFIG_NVM_DEBUG
613 atomic_long_add(pad, &((struct pblk *) 647 atomic_long_add(pad, &((struct pblk *)
614 (container_of(rb, struct pblk, rwb)))->padded_writes); 648 (container_of(rb, struct pblk, rwb)))->padded_writes);
615#endif 649#endif
616out: 650
617 return read; 651 return NVM_IO_OK;
618} 652}
619 653
620/* 654/*
@@ -623,15 +657,17 @@ out:
623 * be directed to disk. 657 * be directed to disk.
624 */ 658 */
625int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 659int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
626 u64 pos, int bio_iter) 660 struct ppa_addr ppa, int bio_iter)
627{ 661{
662 struct pblk *pblk = container_of(rb, struct pblk, rwb);
628 struct pblk_rb_entry *entry; 663 struct pblk_rb_entry *entry;
629 struct pblk_w_ctx *w_ctx; 664 struct pblk_w_ctx *w_ctx;
665 struct ppa_addr l2p_ppa;
666 u64 pos = pblk_addr_to_cacheline(ppa);
630 void *data; 667 void *data;
631 int flags; 668 int flags;
632 int ret = 1; 669 int ret = 1;
633 670
634 spin_lock(&rb->w_lock);
635 671
636#ifdef CONFIG_NVM_DEBUG 672#ifdef CONFIG_NVM_DEBUG
637 /* Caller must ensure that the access will not cause an overflow */ 673 /* Caller must ensure that the access will not cause an overflow */
@@ -641,8 +677,14 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
641 w_ctx = &entry->w_ctx; 677 w_ctx = &entry->w_ctx;
642 flags = READ_ONCE(w_ctx->flags); 678 flags = READ_ONCE(w_ctx->flags);
643 679
680 spin_lock(&rb->w_lock);
681 spin_lock(&pblk->trans_lock);
682 l2p_ppa = pblk_trans_map_get(pblk, lba);
683 spin_unlock(&pblk->trans_lock);
684
644 /* Check if the entry has been overwritten or is scheduled to be */ 685 /* Check if the entry has been overwritten or is scheduled to be */
645 if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) { 686 if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
687 flags & PBLK_WRITABLE_ENTRY) {
646 ret = 0; 688 ret = 0;
647 goto out; 689 goto out;
648 } 690 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 4a12f14d78c6..4e5c48f3de62 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -34,8 +34,7 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
34 BUG_ON(!pblk_addr_in_cache(ppa)); 34 BUG_ON(!pblk_addr_in_cache(ppa));
35#endif 35#endif
36 36
37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, 37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, bio_iter);
38 pblk_addr_to_cacheline(ppa), bio_iter);
39} 38}
40 39
41static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 40static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -76,6 +75,9 @@ retry:
76 } 75 }
77 WARN_ON(test_and_set_bit(i, read_bitmap)); 76 WARN_ON(test_and_set_bit(i, read_bitmap));
78 advanced_bio = 1; 77 advanced_bio = 1;
78#ifdef CONFIG_NVM_DEBUG
79 atomic_long_inc(&pblk->cache_reads);
80#endif
79 } else { 81 } else {
80 /* Read from media non-cached sectors */ 82 /* Read from media non-cached sectors */
81 rqd->ppa_list[j++] = p; 83 rqd->ppa_list[j++] = p;
@@ -85,6 +87,11 @@ retry:
85 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); 87 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
86 } 88 }
87 89
90 if (pblk_io_aligned(pblk, nr_secs))
91 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
92 else
93 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
94
88#ifdef CONFIG_NVM_DEBUG 95#ifdef CONFIG_NVM_DEBUG
89 atomic_long_add(nr_secs, &pblk->inflight_reads); 96 atomic_long_add(nr_secs, &pblk->inflight_reads);
90#endif 97#endif
@@ -94,8 +101,6 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
94{ 101{
95 int err; 102 int err;
96 103
97 rqd->flags = pblk_set_read_mode(pblk);
98
99 err = pblk_submit_io(pblk, rqd); 104 err = pblk_submit_io(pblk, rqd);
100 if (err) 105 if (err)
101 return NVM_IO_ERR; 106 return NVM_IO_ERR;
@@ -107,27 +112,27 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
107{ 112{
108 struct pblk *pblk = rqd->private; 113 struct pblk *pblk = rqd->private;
109 struct nvm_tgt_dev *dev = pblk->dev; 114 struct nvm_tgt_dev *dev = pblk->dev;
110 struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); 115 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
111 struct bio *bio = rqd->bio; 116 struct bio *bio = rqd->bio;
112 117
113 if (rqd->error) 118 if (rqd->error)
114 pblk_log_read_err(pblk, rqd); 119 pblk_log_read_err(pblk, rqd);
115#ifdef CONFIG_NVM_DEBUG 120#ifdef CONFIG_NVM_DEBUG
116 else 121 else
117 WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n"); 122 WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
118#endif 123#endif
119 124
120 if (rqd->nr_ppas > 1) 125 nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
121 nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
122 126
123 bio_put(bio); 127 bio_put(bio);
124 if (r_ctx->orig_bio) { 128 if (r_ctx->private) {
129 struct bio *orig_bio = r_ctx->private;
130
125#ifdef CONFIG_NVM_DEBUG 131#ifdef CONFIG_NVM_DEBUG
126 WARN_ONCE(r_ctx->orig_bio->bi_error, 132 WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n");
127 "pblk: corrupted read bio\n");
128#endif 133#endif
129 bio_endio(r_ctx->orig_bio); 134 bio_endio(orig_bio);
130 bio_put(r_ctx->orig_bio); 135 bio_put(orig_bio);
131 } 136 }
132 137
133#ifdef CONFIG_NVM_DEBUG 138#ifdef CONFIG_NVM_DEBUG
@@ -136,6 +141,7 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
136#endif 141#endif
137 142
138 pblk_free_rqd(pblk, rqd, READ); 143 pblk_free_rqd(pblk, rqd, READ);
144 atomic_dec(&pblk->inflight_io);
139} 145}
140 146
141static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, 147static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
@@ -173,6 +179,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
173 179
174 rqd->bio = new_bio; 180 rqd->bio = new_bio;
175 rqd->nr_ppas = nr_holes; 181 rqd->nr_ppas = nr_holes;
182 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
176 rqd->end_io = NULL; 183 rqd->end_io = NULL;
177 184
178 if (unlikely(nr_secs > 1 && nr_holes == 1)) { 185 if (unlikely(nr_secs > 1 && nr_holes == 1)) {
@@ -280,9 +287,14 @@ retry:
280 goto retry; 287 goto retry;
281 } 288 }
282 WARN_ON(test_and_set_bit(0, read_bitmap)); 289 WARN_ON(test_and_set_bit(0, read_bitmap));
290#ifdef CONFIG_NVM_DEBUG
291 atomic_long_inc(&pblk->cache_reads);
292#endif
283 } else { 293 } else {
284 rqd->ppa_addr = ppa; 294 rqd->ppa_addr = ppa;
285 } 295 }
296
297 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
286} 298}
287 299
288int pblk_submit_read(struct pblk *pblk, struct bio *bio) 300int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -316,13 +328,16 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
316 */ 328 */
317 bio_init_idx = pblk_get_bi_idx(bio); 329 bio_init_idx = pblk_get_bi_idx(bio);
318 330
331 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
332 &rqd->dma_meta_list);
333 if (!rqd->meta_list) {
334 pr_err("pblk: not able to allocate ppa list\n");
335 goto fail_rqd_free;
336 }
337
319 if (nr_secs > 1) { 338 if (nr_secs > 1) {
320 rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 339 rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
321 &rqd->dma_ppa_list); 340 rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
322 if (!rqd->ppa_list) {
323 pr_err("pblk: not able to allocate ppa list\n");
324 goto fail_rqd_free;
325 }
326 341
327 pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); 342 pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
328 } else { 343 } else {
@@ -332,6 +347,7 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
332 bio_get(bio); 347 bio_get(bio);
333 if (bitmap_full(&read_bitmap, nr_secs)) { 348 if (bitmap_full(&read_bitmap, nr_secs)) {
334 bio_endio(bio); 349 bio_endio(bio);
350 atomic_inc(&pblk->inflight_io);
335 pblk_end_io_read(rqd); 351 pblk_end_io_read(rqd);
336 return NVM_IO_OK; 352 return NVM_IO_OK;
337 } 353 }
@@ -339,17 +355,17 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
339 /* All sectors are to be read from the device */ 355 /* All sectors are to be read from the device */
340 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { 356 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
341 struct bio *int_bio = NULL; 357 struct bio *int_bio = NULL;
342 struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); 358 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
343 359
344 /* Clone read bio to deal with read errors internally */ 360 /* Clone read bio to deal with read errors internally */
345 int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set); 361 int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
346 if (!int_bio) { 362 if (!int_bio) {
347 pr_err("pblk: could not clone read bio\n"); 363 pr_err("pblk: could not clone read bio\n");
348 return NVM_IO_ERR; 364 return NVM_IO_ERR;
349 } 365 }
350 366
351 rqd->bio = int_bio; 367 rqd->bio = int_bio;
352 r_ctx->orig_bio = bio; 368 r_ctx->private = bio;
353 369
354 ret = pblk_submit_read_io(pblk, rqd); 370 ret = pblk_submit_read_io(pblk, rqd);
355 if (ret) { 371 if (ret) {
@@ -445,7 +461,6 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
445{ 461{
446 struct nvm_tgt_dev *dev = pblk->dev; 462 struct nvm_tgt_dev *dev = pblk->dev;
447 struct nvm_geo *geo = &dev->geo; 463 struct nvm_geo *geo = &dev->geo;
448 struct request_queue *q = dev->q;
449 struct bio *bio; 464 struct bio *bio;
450 struct nvm_rq rqd; 465 struct nvm_rq rqd;
451 int ret, data_len; 466 int ret, data_len;
@@ -453,22 +468,19 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
453 468
454 memset(&rqd, 0, sizeof(struct nvm_rq)); 469 memset(&rqd, 0, sizeof(struct nvm_rq));
455 470
471 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
472 &rqd.dma_meta_list);
473 if (!rqd.meta_list)
474 return NVM_IO_ERR;
475
456 if (nr_secs > 1) { 476 if (nr_secs > 1) {
457 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 477 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
458 &rqd.dma_ppa_list); 478 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
459 if (!rqd.ppa_list)
460 return NVM_IO_ERR;
461 479
462 *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, 480 *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
463 nr_secs); 481 nr_secs);
464 if (*secs_to_gc == 1) { 482 if (*secs_to_gc == 1)
465 struct ppa_addr ppa; 483 rqd.ppa_addr = rqd.ppa_list[0];
466
467 ppa = rqd.ppa_list[0];
468 nvm_dev_dma_free(dev->parent, rqd.ppa_list,
469 rqd.dma_ppa_list);
470 rqd.ppa_addr = ppa;
471 }
472 } else { 484 } else {
473 *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); 485 *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
474 } 486 }
@@ -477,7 +489,8 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
477 goto out; 489 goto out;
478 490
479 data_len = (*secs_to_gc) * geo->sec_size; 491 data_len = (*secs_to_gc) * geo->sec_size;
480 bio = bio_map_kern(q, data, data_len, GFP_KERNEL); 492 bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len,
493 PBLK_KMALLOC_META, GFP_KERNEL);
481 if (IS_ERR(bio)) { 494 if (IS_ERR(bio)) {
482 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); 495 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
483 goto err_free_dma; 496 goto err_free_dma;
@@ -490,6 +503,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
490 rqd.end_io = pblk_end_io_sync; 503 rqd.end_io = pblk_end_io_sync;
491 rqd.private = &wait; 504 rqd.private = &wait;
492 rqd.nr_ppas = *secs_to_gc; 505 rqd.nr_ppas = *secs_to_gc;
506 rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
493 rqd.bio = bio; 507 rqd.bio = bio;
494 508
495 ret = pblk_submit_read_io(pblk, &rqd); 509 ret = pblk_submit_read_io(pblk, &rqd);
@@ -503,6 +517,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
503 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 517 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
504 pr_err("pblk: GC read I/O timed out\n"); 518 pr_err("pblk: GC read I/O timed out\n");
505 } 519 }
520 atomic_dec(&pblk->inflight_io);
506 521
507 if (rqd.error) { 522 if (rqd.error) {
508 atomic_long_inc(&pblk->read_failed_gc); 523 atomic_long_inc(&pblk->read_failed_gc);
@@ -518,12 +533,10 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
518#endif 533#endif
519 534
520out: 535out:
521 if (rqd.nr_ppas > 1) 536 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
522 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
523 return NVM_IO_OK; 537 return NVM_IO_OK;
524 538
525err_free_dma: 539err_free_dma:
526 if (rqd.nr_ppas > 1) 540 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
527 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
528 return NVM_IO_ERR; 541 return NVM_IO_ERR;
529} 542}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index f8f85087cd3c..0e48d3e4e143 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -120,18 +120,18 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
120 return 0; 120 return 0;
121} 121}
122 122
123__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta) 123__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf)
124{ 124{
125 u32 crc; 125 u32 crc;
126 126
127 crc = pblk_calc_emeta_crc(pblk, emeta); 127 crc = pblk_calc_emeta_crc(pblk, emeta_buf);
128 if (le32_to_cpu(emeta->crc) != crc) 128 if (le32_to_cpu(emeta_buf->crc) != crc)
129 return NULL; 129 return NULL;
130 130
131 if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC) 131 if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC)
132 return NULL; 132 return NULL;
133 133
134 return pblk_line_emeta_to_lbas(emeta); 134 return emeta_to_lbas(pblk, emeta_buf);
135} 135}
136 136
137static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) 137static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
@@ -139,19 +139,20 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
139 struct nvm_tgt_dev *dev = pblk->dev; 139 struct nvm_tgt_dev *dev = pblk->dev;
140 struct nvm_geo *geo = &dev->geo; 140 struct nvm_geo *geo = &dev->geo;
141 struct pblk_line_meta *lm = &pblk->lm; 141 struct pblk_line_meta *lm = &pblk->lm;
142 struct line_emeta *emeta = line->emeta; 142 struct pblk_emeta *emeta = line->emeta;
143 struct line_emeta *emeta_buf = emeta->buf;
143 __le64 *lba_list; 144 __le64 *lba_list;
144 int data_start; 145 int data_start;
145 int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; 146 int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
146 int i; 147 int i;
147 148
148 lba_list = pblk_recov_get_lba_list(pblk, emeta); 149 lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
149 if (!lba_list) 150 if (!lba_list)
150 return 1; 151 return 1;
151 152
152 data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; 153 data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
153 nr_data_lbas = lm->sec_per_line - lm->emeta_sec; 154 nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0];
154 nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas); 155 nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas);
155 156
156 for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { 157 for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
157 struct ppa_addr ppa; 158 struct ppa_addr ppa;
@@ -169,7 +170,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
169 if (test_and_set_bit(i, line->invalid_bitmap)) 170 if (test_and_set_bit(i, line->invalid_bitmap))
170 WARN_ONCE(1, "pblk: rec. double invalidate:\n"); 171 WARN_ONCE(1, "pblk: rec. double invalidate:\n");
171 else 172 else
172 line->vsc--; 173 le32_add_cpu(line->vsc, -1);
173 spin_unlock(&line->lock); 174 spin_unlock(&line->lock);
174 175
175 continue; 176 continue;
@@ -181,7 +182,7 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
181 182
182 if (nr_valid_lbas != nr_lbas) 183 if (nr_valid_lbas != nr_lbas)
183 pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", 184 pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
184 line->id, line->emeta->nr_valid_lbas, nr_lbas); 185 line->id, emeta_buf->nr_valid_lbas, nr_lbas);
185 186
186 line->left_msecs = 0; 187 line->left_msecs = 0;
187 188
@@ -195,7 +196,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
195 struct pblk_line_meta *lm = &pblk->lm; 196 struct pblk_line_meta *lm = &pblk->lm;
196 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); 197 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
197 198
198 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec - 199 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] -
199 nr_bb * geo->sec_per_blk; 200 nr_bb * geo->sec_per_blk;
200} 201}
201 202
@@ -240,7 +241,7 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
240 r_ptr_int = r_ptr; 241 r_ptr_int = r_ptr;
241 242
242next_read_rq: 243next_read_rq:
243 memset(rqd, 0, pblk_r_rq_size); 244 memset(rqd, 0, pblk_g_rq_size);
244 245
245 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 246 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
246 if (!rq_ppas) 247 if (!rq_ppas)
@@ -256,7 +257,6 @@ next_read_rq:
256 257
257 rqd->bio = bio; 258 rqd->bio = bio;
258 rqd->opcode = NVM_OP_PREAD; 259 rqd->opcode = NVM_OP_PREAD;
259 rqd->flags = pblk_set_read_mode(pblk);
260 rqd->meta_list = meta_list; 260 rqd->meta_list = meta_list;
261 rqd->nr_ppas = rq_ppas; 261 rqd->nr_ppas = rq_ppas;
262 rqd->ppa_list = ppa_list; 262 rqd->ppa_list = ppa_list;
@@ -265,6 +265,11 @@ next_read_rq:
265 rqd->end_io = pblk_end_io_sync; 265 rqd->end_io = pblk_end_io_sync;
266 rqd->private = &wait; 266 rqd->private = &wait;
267 267
268 if (pblk_io_aligned(pblk, rq_ppas))
269 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
270 else
271 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
272
268 for (i = 0; i < rqd->nr_ppas; ) { 273 for (i = 0; i < rqd->nr_ppas; ) {
269 struct ppa_addr ppa; 274 struct ppa_addr ppa;
270 int pos; 275 int pos;
@@ -295,7 +300,7 @@ next_read_rq:
295 pr_err("pblk: L2P recovery read timed out\n"); 300 pr_err("pblk: L2P recovery read timed out\n");
296 return -EINTR; 301 return -EINTR;
297 } 302 }
298 303 atomic_dec(&pblk->inflight_io);
299 reinit_completion(&wait); 304 reinit_completion(&wait);
300 305
301 /* At this point, the read should not fail. If it does, it is a problem 306 /* At this point, the read should not fail. If it does, it is a problem
@@ -322,47 +327,94 @@ next_read_rq:
322 return 0; 327 return 0;
323} 328}
324 329
330static void pblk_recov_complete(struct kref *ref)
331{
332 struct pblk_pad_rq *pad_rq = container_of(ref, struct pblk_pad_rq, ref);
333
334 complete(&pad_rq->wait);
335}
336
337static void pblk_end_io_recov(struct nvm_rq *rqd)
338{
339 struct pblk_pad_rq *pad_rq = rqd->private;
340 struct pblk *pblk = pad_rq->pblk;
341 struct nvm_tgt_dev *dev = pblk->dev;
342
343 kref_put(&pad_rq->ref, pblk_recov_complete);
344 nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
345 pblk_free_rqd(pblk, rqd, WRITE);
346}
347
325static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, 348static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
326 struct pblk_recov_alloc p, int left_ppas) 349 int left_ppas)
327{ 350{
328 struct nvm_tgt_dev *dev = pblk->dev; 351 struct nvm_tgt_dev *dev = pblk->dev;
329 struct nvm_geo *geo = &dev->geo; 352 struct nvm_geo *geo = &dev->geo;
330 struct ppa_addr *ppa_list; 353 struct ppa_addr *ppa_list;
331 struct pblk_sec_meta *meta_list; 354 struct pblk_sec_meta *meta_list;
355 struct pblk_pad_rq *pad_rq;
332 struct nvm_rq *rqd; 356 struct nvm_rq *rqd;
333 struct bio *bio; 357 struct bio *bio;
334 void *data; 358 void *data;
335 dma_addr_t dma_ppa_list, dma_meta_list; 359 dma_addr_t dma_ppa_list, dma_meta_list;
336 __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta); 360 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
337 u64 w_ptr = line->cur_sec; 361 u64 w_ptr = line->cur_sec;
338 int left_line_ppas = line->left_msecs; 362 int left_line_ppas, rq_ppas, rq_len;
339 int rq_ppas, rq_len;
340 int i, j; 363 int i, j;
341 int ret = 0; 364 int ret = 0;
342 DECLARE_COMPLETION_ONSTACK(wait);
343 365
344 ppa_list = p.ppa_list; 366 spin_lock(&line->lock);
345 meta_list = p.meta_list; 367 left_line_ppas = line->left_msecs;
346 rqd = p.rqd; 368 spin_unlock(&line->lock);
347 data = p.data; 369
348 dma_ppa_list = p.dma_ppa_list; 370 pad_rq = kmalloc(sizeof(struct pblk_pad_rq), GFP_KERNEL);
349 dma_meta_list = p.dma_meta_list; 371 if (!pad_rq)
372 return -ENOMEM;
373
374 data = vzalloc(pblk->max_write_pgs * geo->sec_size);
375 if (!data) {
376 ret = -ENOMEM;
377 goto free_rq;
378 }
379
380 pad_rq->pblk = pblk;
381 init_completion(&pad_rq->wait);
382 kref_init(&pad_rq->ref);
350 383
351next_pad_rq: 384next_pad_rq:
352 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 385 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
353 if (!rq_ppas) 386 if (rq_ppas < pblk->min_write_pgs) {
354 rq_ppas = pblk->min_write_pgs; 387 pr_err("pblk: corrupted pad line %d\n", line->id);
388 goto free_rq;
389 }
390
355 rq_len = rq_ppas * geo->sec_size; 391 rq_len = rq_ppas * geo->sec_size;
356 392
393 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
394 if (!meta_list) {
395 ret = -ENOMEM;
396 goto free_data;
397 }
398
399 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
400 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
401
402 rqd = pblk_alloc_rqd(pblk, WRITE);
403 if (IS_ERR(rqd)) {
404 ret = PTR_ERR(rqd);
405 goto fail_free_meta;
406 }
407 memset(rqd, 0, pblk_w_rq_size);
408
357 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); 409 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
358 if (IS_ERR(bio)) 410 if (IS_ERR(bio)) {
359 return PTR_ERR(bio); 411 ret = PTR_ERR(bio);
412 goto fail_free_rqd;
413 }
360 414
361 bio->bi_iter.bi_sector = 0; /* internal bio */ 415 bio->bi_iter.bi_sector = 0; /* internal bio */
362 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 416 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
363 417
364 memset(rqd, 0, pblk_r_rq_size);
365
366 rqd->bio = bio; 418 rqd->bio = bio;
367 rqd->opcode = NVM_OP_PWRITE; 419 rqd->opcode = NVM_OP_PWRITE;
368 rqd->flags = pblk_set_progr_mode(pblk, WRITE); 420 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
@@ -371,8 +423,8 @@ next_pad_rq:
371 rqd->ppa_list = ppa_list; 423 rqd->ppa_list = ppa_list;
372 rqd->dma_ppa_list = dma_ppa_list; 424 rqd->dma_ppa_list = dma_ppa_list;
373 rqd->dma_meta_list = dma_meta_list; 425 rqd->dma_meta_list = dma_meta_list;
374 rqd->end_io = pblk_end_io_sync; 426 rqd->end_io = pblk_end_io_recov;
375 rqd->private = &wait; 427 rqd->private = pad_rq;
376 428
377 for (i = 0; i < rqd->nr_ppas; ) { 429 for (i = 0; i < rqd->nr_ppas; ) {
378 struct ppa_addr ppa; 430 struct ppa_addr ppa;
@@ -390,34 +442,51 @@ next_pad_rq:
390 442
391 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { 443 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
392 struct ppa_addr dev_ppa; 444 struct ppa_addr dev_ppa;
445 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
393 446
394 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); 447 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
395 448
396 pblk_map_invalidate(pblk, dev_ppa); 449 pblk_map_invalidate(pblk, dev_ppa);
397 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); 450 lba_list[w_ptr] = meta_list[i].lba = addr_empty;
398 lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
399 rqd->ppa_list[i] = dev_ppa; 451 rqd->ppa_list[i] = dev_ppa;
400 } 452 }
401 } 453 }
402 454
455 kref_get(&pad_rq->ref);
456
403 ret = pblk_submit_io(pblk, rqd); 457 ret = pblk_submit_io(pblk, rqd);
404 if (ret) { 458 if (ret) {
405 pr_err("pblk: I/O submission failed: %d\n", ret); 459 pr_err("pblk: I/O submission failed: %d\n", ret);
406 return ret; 460 goto free_data;
407 } 461 }
408 462
409 if (!wait_for_completion_io_timeout(&wait, 463 atomic_dec(&pblk->inflight_io);
410 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
411 pr_err("pblk: L2P recovery write timed out\n");
412 }
413 reinit_completion(&wait);
414 464
415 left_line_ppas -= rq_ppas; 465 left_line_ppas -= rq_ppas;
416 left_ppas -= rq_ppas; 466 left_ppas -= rq_ppas;
417 if (left_ppas > 0 && left_line_ppas) 467 if (left_ppas && left_line_ppas)
418 goto next_pad_rq; 468 goto next_pad_rq;
419 469
420 return 0; 470 kref_put(&pad_rq->ref, pblk_recov_complete);
471
472 if (!wait_for_completion_io_timeout(&pad_rq->wait,
473 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
474 pr_err("pblk: pad write timed out\n");
475 ret = -ETIME;
476 }
477
478free_rq:
479 kfree(pad_rq);
480free_data:
481 vfree(data);
482 return ret;
483
484fail_free_rqd:
485 pblk_free_rqd(pblk, rqd, WRITE);
486fail_free_meta:
487 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
488 kfree(pad_rq);
489 return ret;
421} 490}
422 491
423/* When this function is called, it means that not all upper pages have been 492/* When this function is called, it means that not all upper pages have been
@@ -456,7 +525,7 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
456 rec_round = 0; 525 rec_round = 0;
457 526
458next_rq: 527next_rq:
459 memset(rqd, 0, pblk_r_rq_size); 528 memset(rqd, 0, pblk_g_rq_size);
460 529
461 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 530 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
462 if (!rq_ppas) 531 if (!rq_ppas)
@@ -472,7 +541,6 @@ next_rq:
472 541
473 rqd->bio = bio; 542 rqd->bio = bio;
474 rqd->opcode = NVM_OP_PREAD; 543 rqd->opcode = NVM_OP_PREAD;
475 rqd->flags = pblk_set_read_mode(pblk);
476 rqd->meta_list = meta_list; 544 rqd->meta_list = meta_list;
477 rqd->nr_ppas = rq_ppas; 545 rqd->nr_ppas = rq_ppas;
478 rqd->ppa_list = ppa_list; 546 rqd->ppa_list = ppa_list;
@@ -481,6 +549,11 @@ next_rq:
481 rqd->end_io = pblk_end_io_sync; 549 rqd->end_io = pblk_end_io_sync;
482 rqd->private = &wait; 550 rqd->private = &wait;
483 551
552 if (pblk_io_aligned(pblk, rq_ppas))
553 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
554 else
555 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
556
484 for (i = 0; i < rqd->nr_ppas; ) { 557 for (i = 0; i < rqd->nr_ppas; ) {
485 struct ppa_addr ppa; 558 struct ppa_addr ppa;
486 int pos; 559 int pos;
@@ -510,6 +583,7 @@ next_rq:
510 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 583 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
511 pr_err("pblk: L2P recovery read timed out\n"); 584 pr_err("pblk: L2P recovery read timed out\n");
512 } 585 }
586 atomic_dec(&pblk->inflight_io);
513 reinit_completion(&wait); 587 reinit_completion(&wait);
514 588
515 /* This should not happen since the read failed during normal recovery, 589 /* This should not happen since the read failed during normal recovery,
@@ -544,7 +618,7 @@ next_rq:
544 if (pad_secs > line->left_msecs) 618 if (pad_secs > line->left_msecs)
545 pad_secs = line->left_msecs; 619 pad_secs = line->left_msecs;
546 620
547 ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); 621 ret = pblk_recov_pad_oob(pblk, line, pad_secs);
548 if (ret) 622 if (ret)
549 pr_err("pblk: OOB padding failed (err:%d)\n", ret); 623 pr_err("pblk: OOB padding failed (err:%d)\n", ret);
550 624
@@ -552,7 +626,6 @@ next_rq:
552 if (ret) 626 if (ret)
553 pr_err("pblk: OOB read failed (err:%d)\n", ret); 627 pr_err("pblk: OOB read failed (err:%d)\n", ret);
554 628
555 line->left_ssecs = line->left_msecs;
556 left_ppas = 0; 629 left_ppas = 0;
557 } 630 }
558 631
@@ -591,7 +664,7 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
591 *done = 1; 664 *done = 1;
592 665
593next_rq: 666next_rq:
594 memset(rqd, 0, pblk_r_rq_size); 667 memset(rqd, 0, pblk_g_rq_size);
595 668
596 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); 669 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
597 if (!rq_ppas) 670 if (!rq_ppas)
@@ -607,7 +680,6 @@ next_rq:
607 680
608 rqd->bio = bio; 681 rqd->bio = bio;
609 rqd->opcode = NVM_OP_PREAD; 682 rqd->opcode = NVM_OP_PREAD;
610 rqd->flags = pblk_set_read_mode(pblk);
611 rqd->meta_list = meta_list; 683 rqd->meta_list = meta_list;
612 rqd->nr_ppas = rq_ppas; 684 rqd->nr_ppas = rq_ppas;
613 rqd->ppa_list = ppa_list; 685 rqd->ppa_list = ppa_list;
@@ -616,6 +688,11 @@ next_rq:
616 rqd->end_io = pblk_end_io_sync; 688 rqd->end_io = pblk_end_io_sync;
617 rqd->private = &wait; 689 rqd->private = &wait;
618 690
691 if (pblk_io_aligned(pblk, rq_ppas))
692 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
693 else
694 rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
695
619 for (i = 0; i < rqd->nr_ppas; ) { 696 for (i = 0; i < rqd->nr_ppas; ) {
620 struct ppa_addr ppa; 697 struct ppa_addr ppa;
621 int pos; 698 int pos;
@@ -646,6 +723,7 @@ next_rq:
646 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { 723 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
647 pr_err("pblk: L2P recovery read timed out\n"); 724 pr_err("pblk: L2P recovery read timed out\n");
648 } 725 }
726 atomic_dec(&pblk->inflight_io);
649 reinit_completion(&wait); 727 reinit_completion(&wait);
650 728
651 /* Reached the end of the written line */ 729 /* Reached the end of the written line */
@@ -658,7 +736,6 @@ next_rq:
658 /* Roll back failed sectors */ 736 /* Roll back failed sectors */
659 line->cur_sec -= nr_error_bits; 737 line->cur_sec -= nr_error_bits;
660 line->left_msecs += nr_error_bits; 738 line->left_msecs += nr_error_bits;
661 line->left_ssecs = line->left_msecs;
662 bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); 739 bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
663 740
664 left_ppas = 0; 741 left_ppas = 0;
@@ -770,8 +847,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
770 struct pblk_line_meta *lm = &pblk->lm; 847 struct pblk_line_meta *lm = &pblk->lm;
771 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 848 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
772 struct pblk_line *line, *tline, *data_line = NULL; 849 struct pblk_line *line, *tline, *data_line = NULL;
773 struct line_smeta *smeta; 850 struct pblk_smeta *smeta;
774 struct line_emeta *emeta; 851 struct pblk_emeta *emeta;
852 struct line_smeta *smeta_buf;
775 int found_lines = 0, recovered_lines = 0, open_lines = 0; 853 int found_lines = 0, recovered_lines = 0, open_lines = 0;
776 int is_next = 0; 854 int is_next = 0;
777 int meta_line; 855 int meta_line;
@@ -784,8 +862,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
784 spin_lock(&l_mg->free_lock); 862 spin_lock(&l_mg->free_lock);
785 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); 863 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
786 set_bit(meta_line, &l_mg->meta_bitmap); 864 set_bit(meta_line, &l_mg->meta_bitmap);
787 smeta = l_mg->sline_meta[meta_line].meta; 865 smeta = l_mg->sline_meta[meta_line];
788 emeta = l_mg->eline_meta[meta_line].meta; 866 emeta = l_mg->eline_meta[meta_line];
867 smeta_buf = (struct line_smeta *)smeta;
789 spin_unlock(&l_mg->free_lock); 868 spin_unlock(&l_mg->free_lock);
790 869
791 /* Order data lines using their sequence number */ 870 /* Order data lines using their sequence number */
@@ -796,33 +875,33 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
796 875
797 memset(smeta, 0, lm->smeta_len); 876 memset(smeta, 0, lm->smeta_len);
798 line->smeta = smeta; 877 line->smeta = smeta;
799 line->lun_bitmap = ((void *)(smeta)) + 878 line->lun_bitmap = ((void *)(smeta_buf)) +
800 sizeof(struct line_smeta); 879 sizeof(struct line_smeta);
801 880
802 /* Lines that cannot be read are assumed as not written here */ 881 /* Lines that cannot be read are assumed as not written here */
803 if (pblk_line_read_smeta(pblk, line)) 882 if (pblk_line_read_smeta(pblk, line))
804 continue; 883 continue;
805 884
806 crc = pblk_calc_smeta_crc(pblk, smeta); 885 crc = pblk_calc_smeta_crc(pblk, smeta_buf);
807 if (le32_to_cpu(smeta->crc) != crc) 886 if (le32_to_cpu(smeta_buf->crc) != crc)
808 continue; 887 continue;
809 888
810 if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC) 889 if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC)
811 continue; 890 continue;
812 891
813 if (le16_to_cpu(smeta->header.version) != 1) { 892 if (le16_to_cpu(smeta_buf->header.version) != 1) {
814 pr_err("pblk: found incompatible line version %u\n", 893 pr_err("pblk: found incompatible line version %u\n",
815 smeta->header.version); 894 smeta_buf->header.version);
816 return ERR_PTR(-EINVAL); 895 return ERR_PTR(-EINVAL);
817 } 896 }
818 897
819 /* The first valid instance uuid is used for initialization */ 898 /* The first valid instance uuid is used for initialization */
820 if (!valid_uuid) { 899 if (!valid_uuid) {
821 memcpy(pblk->instance_uuid, smeta->header.uuid, 16); 900 memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16);
822 valid_uuid = 1; 901 valid_uuid = 1;
823 } 902 }
824 903
825 if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) { 904 if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) {
826 pr_debug("pblk: ignore line %u due to uuid mismatch\n", 905 pr_debug("pblk: ignore line %u due to uuid mismatch\n",
827 i); 906 i);
828 continue; 907 continue;
@@ -830,9 +909,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
830 909
831 /* Update line metadata */ 910 /* Update line metadata */
832 spin_lock(&line->lock); 911 spin_lock(&line->lock);
833 line->id = le32_to_cpu(line->smeta->header.id); 912 line->id = le32_to_cpu(smeta_buf->header.id);
834 line->type = le16_to_cpu(line->smeta->header.type); 913 line->type = le16_to_cpu(smeta_buf->header.type);
835 line->seq_nr = le64_to_cpu(line->smeta->seq_nr); 914 line->seq_nr = le64_to_cpu(smeta_buf->seq_nr);
836 spin_unlock(&line->lock); 915 spin_unlock(&line->lock);
837 916
838 /* Update general metadata */ 917 /* Update general metadata */
@@ -848,7 +927,7 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
848 pblk_recov_line_add_ordered(&recov_list, line); 927 pblk_recov_line_add_ordered(&recov_list, line);
849 found_lines++; 928 found_lines++;
850 pr_debug("pblk: recovering data line %d, seq:%llu\n", 929 pr_debug("pblk: recovering data line %d, seq:%llu\n",
851 line->id, smeta->seq_nr); 930 line->id, smeta_buf->seq_nr);
852 } 931 }
853 932
854 if (!found_lines) { 933 if (!found_lines) {
@@ -868,15 +947,15 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
868 947
869 recovered_lines++; 948 recovered_lines++;
870 /* Calculate where emeta starts based on the line bb */ 949 /* Calculate where emeta starts based on the line bb */
871 off = lm->sec_per_line - lm->emeta_sec; 950 off = lm->sec_per_line - lm->emeta_sec[0];
872 nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); 951 nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
873 off -= nr_bb * geo->sec_per_pl; 952 off -= nr_bb * geo->sec_per_pl;
874 953
875 memset(emeta, 0, lm->emeta_len);
876 line->emeta = emeta;
877 line->emeta_ssec = off; 954 line->emeta_ssec = off;
955 line->emeta = emeta;
956 memset(line->emeta->buf, 0, lm->emeta_len[0]);
878 957
879 if (pblk_line_read_emeta(pblk, line)) { 958 if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) {
880 pblk_recov_l2p_from_oob(pblk, line); 959 pblk_recov_l2p_from_oob(pblk, line);
881 goto next; 960 goto next;
882 } 961 }
@@ -941,58 +1020,26 @@ out:
941} 1020}
942 1021
943/* 1022/*
944 * Pad until smeta can be read on current data line 1023 * Pad current line
945 */ 1024 */
946void pblk_recov_pad(struct pblk *pblk) 1025int pblk_recov_pad(struct pblk *pblk)
947{ 1026{
948 struct nvm_tgt_dev *dev = pblk->dev;
949 struct nvm_geo *geo = &dev->geo;
950 struct pblk_line *line; 1027 struct pblk_line *line;
951 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 1028 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
952 struct nvm_rq *rqd; 1029 int left_msecs;
953 struct pblk_recov_alloc p; 1030 int ret = 0;
954 struct ppa_addr *ppa_list;
955 struct pblk_sec_meta *meta_list;
956 void *data;
957 dma_addr_t dma_ppa_list, dma_meta_list;
958 1031
959 spin_lock(&l_mg->free_lock); 1032 spin_lock(&l_mg->free_lock);
960 line = l_mg->data_line; 1033 line = l_mg->data_line;
1034 left_msecs = line->left_msecs;
961 spin_unlock(&l_mg->free_lock); 1035 spin_unlock(&l_mg->free_lock);
962 1036
963 rqd = pblk_alloc_rqd(pblk, READ); 1037 ret = pblk_recov_pad_oob(pblk, line, left_msecs);
964 if (IS_ERR(rqd)) 1038 if (ret) {
965 return; 1039 pr_err("pblk: Tear down padding failed (%d)\n", ret);
966 1040 return ret;
967 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
968 if (!meta_list)
969 goto free_rqd;
970
971 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
972 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
973
974 data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
975 if (!data)
976 goto free_meta_list;
977
978 p.ppa_list = ppa_list;
979 p.meta_list = meta_list;
980 p.rqd = rqd;
981 p.data = data;
982 p.dma_ppa_list = dma_ppa_list;
983 p.dma_meta_list = dma_meta_list;
984
985 if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
986 pr_err("pblk: Tear down padding failed\n");
987 goto free_data;
988 } 1041 }
989 1042
990 pblk_line_close(pblk, line); 1043 pblk_line_close_meta(pblk, line);
991 1044 return ret;
992free_data:
993 kfree(data);
994free_meta_list:
995 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
996free_rqd:
997 pblk_free_rqd(pblk, rqd, READ);
998} 1045}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index ab7cbb144f3f..2e6a5361baf0 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -23,11 +23,35 @@ static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
23 mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000)); 23 mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
24} 24}
25 25
26int pblk_rl_is_limit(struct pblk_rl *rl)
27{
28 int rb_space;
29
30 rb_space = atomic_read(&rl->rb_space);
31
32 return (rb_space == 0);
33}
34
26int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) 35int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
27{ 36{
28 int rb_user_cnt = atomic_read(&rl->rb_user_cnt); 37 int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
38 int rb_space = atomic_read(&rl->rb_space);
29 39
30 return (!(rb_user_cnt + nr_entries > rl->rb_user_max)); 40 if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
41 return NVM_IO_ERR;
42
43 if (rb_user_cnt >= rl->rb_user_max)
44 return NVM_IO_REQUEUE;
45
46 return NVM_IO_OK;
47}
48
49void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
50{
51 int rb_space = atomic_read(&rl->rb_space);
52
53 if (unlikely(rb_space >= 0))
54 atomic_sub(nr_entries, &rl->rb_space);
31} 55}
32 56
33int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) 57int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
@@ -37,7 +61,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
37 61
38 /* If there is no user I/O let GC take over space on the write buffer */ 62 /* If there is no user I/O let GC take over space on the write buffer */
39 rb_user_active = READ_ONCE(rl->rb_user_active); 63 rb_user_active = READ_ONCE(rl->rb_user_active);
40 return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active)); 64 return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
41} 65}
42 66
43void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) 67void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
@@ -77,33 +101,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
77 unsigned long free_blocks = pblk_rl_nr_free_blks(rl); 101 unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
78 102
79 if (free_blocks >= rl->high) { 103 if (free_blocks >= rl->high) {
80 rl->rb_user_max = max - rl->rb_gc_rsv; 104 rl->rb_user_max = max;
81 rl->rb_gc_max = rl->rb_gc_rsv; 105 rl->rb_gc_max = 0;
82 rl->rb_state = PBLK_RL_HIGH; 106 rl->rb_state = PBLK_RL_HIGH;
83 } else if (free_blocks < rl->high) { 107 } else if (free_blocks < rl->high) {
84 int shift = rl->high_pw - rl->rb_windows_pw; 108 int shift = rl->high_pw - rl->rb_windows_pw;
85 int user_windows = free_blocks >> shift; 109 int user_windows = free_blocks >> shift;
86 int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; 110 int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
87 int gc_max;
88 111
89 rl->rb_user_max = user_max; 112 rl->rb_user_max = user_max;
90 gc_max = max - rl->rb_user_max; 113 rl->rb_gc_max = max - user_max;
91 rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv); 114
92 115 if (free_blocks <= rl->rsv_blocks) {
93 if (free_blocks > rl->low) 116 rl->rb_user_max = 0;
94 rl->rb_state = PBLK_RL_MID; 117 rl->rb_gc_max = max;
95 else 118 }
96 rl->rb_state = PBLK_RL_LOW; 119
120 /* In the worst case, we will need to GC lines in the low list
121 * (high valid sector count). If there are lines to GC on high
122 * or mid lists, these will be prioritized
123 */
124 rl->rb_state = PBLK_RL_LOW;
97 } 125 }
98 126
99 return rl->rb_state; 127 return rl->rb_state;
100} 128}
101 129
102void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
103{
104 rl->rb_gc_rsv = rl->rb_gc_max = rsv;
105}
106
107void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) 130void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
108{ 131{
109 struct pblk *pblk = container_of(rl, struct pblk, rl); 132 struct pblk *pblk = container_of(rl, struct pblk, rl);
@@ -122,11 +145,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
122 145
123void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) 146void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
124{ 147{
125 struct pblk *pblk = container_of(rl, struct pblk, rl);
126 int blk_in_line = atomic_read(&line->blk_in_line); 148 int blk_in_line = atomic_read(&line->blk_in_line);
127 int ret;
128 149
129 atomic_sub(blk_in_line, &rl->free_blocks); 150 atomic_sub(blk_in_line, &rl->free_blocks);
151}
152
153void pblk_gc_should_kick(struct pblk *pblk)
154{
155 struct pblk_rl *rl = &pblk->rl;
156 int ret;
130 157
131 /* Rates will not change that often - no need to lock update */ 158 /* Rates will not change that often - no need to lock update */
132 ret = pblk_rl_update_rates(rl, rl->rb_budget); 159 ret = pblk_rl_update_rates(rl, rl->rb_budget);
@@ -136,11 +163,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
136 pblk_gc_should_stop(pblk); 163 pblk_gc_should_stop(pblk);
137} 164}
138 165
139int pblk_rl_gc_thrs(struct pblk_rl *rl) 166int pblk_rl_high_thrs(struct pblk_rl *rl)
140{ 167{
141 return rl->high; 168 return rl->high;
142} 169}
143 170
171int pblk_rl_low_thrs(struct pblk_rl *rl)
172{
173 return rl->low;
174}
175
144int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) 176int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
145{ 177{
146 return rl->rb_user_max; 178 return rl->rb_user_max;
@@ -161,24 +193,36 @@ void pblk_rl_free(struct pblk_rl *rl)
161 193
162void pblk_rl_init(struct pblk_rl *rl, int budget) 194void pblk_rl_init(struct pblk_rl *rl, int budget)
163{ 195{
196 struct pblk *pblk = container_of(rl, struct pblk, rl);
197 struct pblk_line_meta *lm = &pblk->lm;
198 int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
164 unsigned int rb_windows; 199 unsigned int rb_windows;
165 200
166 rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; 201 rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
167 rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
168 rl->high_pw = get_count_order(rl->high); 202 rl->high_pw = get_count_order(rl->high);
169 203
204 rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
205 if (rl->low < min_blocks)
206 rl->low = min_blocks;
207
208 rl->rsv_blocks = min_blocks;
209
170 /* This will always be a power-of-2 */ 210 /* This will always be a power-of-2 */
171 rb_windows = budget / PBLK_MAX_REQ_ADDRS; 211 rb_windows = budget / PBLK_MAX_REQ_ADDRS;
172 rl->rb_windows_pw = get_count_order(rb_windows) + 1; 212 rl->rb_windows_pw = get_count_order(rb_windows);
173 213
174 /* To start with, all buffer is available to user I/O writers */ 214 /* To start with, all buffer is available to user I/O writers */
175 rl->rb_budget = budget; 215 rl->rb_budget = budget;
176 rl->rb_user_max = budget; 216 rl->rb_user_max = budget;
177 atomic_set(&rl->rb_user_cnt, 0);
178 rl->rb_gc_max = 0; 217 rl->rb_gc_max = 0;
179 rl->rb_state = PBLK_RL_HIGH; 218 rl->rb_state = PBLK_RL_HIGH;
219
220 atomic_set(&rl->rb_user_cnt, 0);
180 atomic_set(&rl->rb_gc_cnt, 0); 221 atomic_set(&rl->rb_gc_cnt, 0);
222 atomic_set(&rl->rb_space, -1);
181 223
182 setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); 224 setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
225
183 rl->rb_user_active = 0; 226 rl->rb_user_active = 0;
227 rl->rb_gc_active = 0;
184} 228}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index f0af1d1ceeff..95fb434e2f01 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
49 49
50static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) 50static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
51{ 51{
52 struct nvm_tgt_dev *dev = pblk->dev;
53 struct nvm_geo *geo = &dev->geo;
54 int free_blocks, total_blocks; 52 int free_blocks, total_blocks;
55 int rb_user_max, rb_user_cnt; 53 int rb_user_max, rb_user_cnt;
56 int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state; 54 int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
57 55
58 free_blocks = atomic_read(&pblk->rl.free_blocks); 56 free_blocks = atomic_read(&pblk->rl.free_blocks);
59 rb_user_max = pblk->rl.rb_user_max; 57 rb_user_max = pblk->rl.rb_user_max;
60 rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); 58 rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
61 rb_gc_max = pblk->rl.rb_gc_max; 59 rb_gc_max = pblk->rl.rb_gc_max;
62 rb_gc_rsv = pblk->rl.rb_gc_rsv;
63 rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); 60 rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
64 rb_budget = pblk->rl.rb_budget; 61 rb_budget = pblk->rl.rb_budget;
65 rb_state = pblk->rl.rb_state; 62 rb_state = pblk->rl.rb_state;
66 63
67 total_blocks = geo->blks_per_lun * geo->nr_luns; 64 total_blocks = pblk->rl.total_blocks;
68 65
69 return snprintf(page, PAGE_SIZE, 66 return snprintf(page, PAGE_SIZE,
70 "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", 67 "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
71 rb_user_cnt, 68 rb_user_cnt,
72 rb_user_max, 69 rb_user_max,
73 rb_gc_cnt, 70 rb_gc_cnt,
74 rb_gc_max, 71 rb_gc_max,
75 rb_gc_rsv,
76 rb_state, 72 rb_state,
77 rb_budget, 73 rb_budget,
78 pblk->rl.low, 74 pblk->rl.low,
@@ -150,11 +146,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
150 ssize_t sz = 0; 146 ssize_t sz = 0;
151 int nr_free_lines; 147 int nr_free_lines;
152 int cur_data, cur_log; 148 int cur_data, cur_log;
153 int free_line_cnt = 0, closed_line_cnt = 0; 149 int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
154 int d_line_cnt = 0, l_line_cnt = 0; 150 int d_line_cnt = 0, l_line_cnt = 0;
155 int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0; 151 int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
156 int free = 0, bad = 0, cor = 0; 152 int bad = 0, cor = 0;
157 int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0; 153 int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
158 int map_weight = 0, meta_weight = 0; 154 int map_weight = 0, meta_weight = 0;
159 155
160 spin_lock(&l_mg->free_lock); 156 spin_lock(&l_mg->free_lock);
@@ -166,6 +162,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
166 free_line_cnt++; 162 free_line_cnt++;
167 spin_unlock(&l_mg->free_lock); 163 spin_unlock(&l_mg->free_lock);
168 164
165 spin_lock(&l_mg->close_lock);
166 list_for_each_entry(line, &l_mg->emeta_list, list)
167 emeta_line_cnt++;
168 spin_unlock(&l_mg->close_lock);
169
169 spin_lock(&l_mg->gc_lock); 170 spin_lock(&l_mg->gc_lock);
170 list_for_each_entry(line, &l_mg->gc_full_list, list) { 171 list_for_each_entry(line, &l_mg->gc_full_list, list) {
171 if (line->type == PBLK_LINETYPE_DATA) 172 if (line->type == PBLK_LINETYPE_DATA)
@@ -212,8 +213,6 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
212 gc_empty++; 213 gc_empty++;
213 } 214 }
214 215
215 list_for_each_entry(line, &l_mg->free_list, list)
216 free++;
217 list_for_each_entry(line, &l_mg->bad_list, list) 216 list_for_each_entry(line, &l_mg->bad_list, list)
218 bad++; 217 bad++;
219 list_for_each_entry(line, &l_mg->corrupt_list, list) 218 list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -224,8 +223,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
224 if (l_mg->data_line) { 223 if (l_mg->data_line) {
225 cur_sec = l_mg->data_line->cur_sec; 224 cur_sec = l_mg->data_line->cur_sec;
226 msecs = l_mg->data_line->left_msecs; 225 msecs = l_mg->data_line->left_msecs;
227 ssecs = l_mg->data_line->left_ssecs; 226 vsc = le32_to_cpu(*l_mg->data_line->vsc);
228 vsc = l_mg->data_line->vsc;
229 sec_in_line = l_mg->data_line->sec_in_line; 227 sec_in_line = l_mg->data_line->sec_in_line;
230 meta_weight = bitmap_weight(&l_mg->meta_bitmap, 228 meta_weight = bitmap_weight(&l_mg->meta_bitmap,
231 PBLK_DATA_LINES); 229 PBLK_DATA_LINES);
@@ -235,17 +233,20 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
235 spin_unlock(&l_mg->free_lock); 233 spin_unlock(&l_mg->free_lock);
236 234
237 if (nr_free_lines != free_line_cnt) 235 if (nr_free_lines != free_line_cnt)
238 pr_err("pblk: corrupted free line list\n"); 236 pr_err("pblk: corrupted free line list:%d/%d\n",
237 nr_free_lines, free_line_cnt);
239 238
240 sz = snprintf(page, PAGE_SIZE - sz, 239 sz = snprintf(page, PAGE_SIZE - sz,
241 "line: nluns:%d, nblks:%d, nsecs:%d\n", 240 "line: nluns:%d, nblks:%d, nsecs:%d\n",
242 geo->nr_luns, lm->blk_per_line, lm->sec_per_line); 241 geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
243 242
244 sz += snprintf(page + sz, PAGE_SIZE - sz, 243 sz += snprintf(page + sz, PAGE_SIZE - sz,
245 "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n", 244 "lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
246 cur_data, cur_log, 245 cur_data, cur_log,
247 free, nr_free_lines, bad, cor, 246 nr_free_lines,
247 emeta_line_cnt, meta_weight,
248 closed_line_cnt, 248 closed_line_cnt,
249 bad, cor,
249 d_line_cnt, l_line_cnt, 250 d_line_cnt, l_line_cnt,
250 l_mg->nr_lines); 251 l_mg->nr_lines);
251 252
@@ -255,9 +256,10 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
255 atomic_read(&pblk->gc.inflight_gc)); 256 atomic_read(&pblk->gc.inflight_gc));
256 257
257 sz += snprintf(page + sz, PAGE_SIZE - sz, 258 sz += snprintf(page + sz, PAGE_SIZE - sz,
258 "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n", 259 "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
259 cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line, 260 cur_data, cur_sec, msecs, vsc, sec_in_line,
260 map_weight, lm->sec_per_line, meta_weight); 261 map_weight, lm->sec_per_line,
262 atomic_read(&pblk->inflight_io));
261 263
262 return sz; 264 return sz;
263} 265}
@@ -274,7 +276,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
274 lm->smeta_len, lm->smeta_sec); 276 lm->smeta_len, lm->smeta_sec);
275 sz += snprintf(page + sz, PAGE_SIZE - sz, 277 sz += snprintf(page + sz, PAGE_SIZE - sz,
276 "emeta - len:%d, sec:%d, bb_start:%d\n", 278 "emeta - len:%d, sec:%d, bb_start:%d\n",
277 lm->emeta_len, lm->emeta_sec, 279 lm->emeta_len[0], lm->emeta_sec[0],
278 lm->emeta_bb); 280 lm->emeta_bb);
279 sz += snprintf(page + sz, PAGE_SIZE - sz, 281 sz += snprintf(page + sz, PAGE_SIZE - sz,
280 "bitmap lengths: sec:%d, blk:%d, lun:%d\n", 282 "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
@@ -290,6 +292,11 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
290 return sz; 292 return sz;
291} 293}
292 294
295static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
296{
297 return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
298}
299
293#ifdef CONFIG_NVM_DEBUG 300#ifdef CONFIG_NVM_DEBUG
294static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) 301static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
295{ 302{
@@ -303,52 +310,51 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
303 atomic_long_read(&pblk->padded_wb), 310 atomic_long_read(&pblk->padded_wb),
304 atomic_long_read(&pblk->sub_writes), 311 atomic_long_read(&pblk->sub_writes),
305 atomic_long_read(&pblk->sync_writes), 312 atomic_long_read(&pblk->sync_writes),
306 atomic_long_read(&pblk->compl_writes),
307 atomic_long_read(&pblk->recov_writes), 313 atomic_long_read(&pblk->recov_writes),
308 atomic_long_read(&pblk->recov_gc_writes), 314 atomic_long_read(&pblk->recov_gc_writes),
309 atomic_long_read(&pblk->recov_gc_reads), 315 atomic_long_read(&pblk->recov_gc_reads),
316 atomic_long_read(&pblk->cache_reads),
310 atomic_long_read(&pblk->sync_reads)); 317 atomic_long_read(&pblk->sync_reads));
311} 318}
312#endif 319#endif
313 320
314static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page, 321static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
315 size_t len) 322 size_t len)
316{ 323{
317 struct pblk_gc *gc = &pblk->gc;
318 size_t c_len; 324 size_t c_len;
319 int value; 325 int force;
320 326
321 c_len = strcspn(page, "\n"); 327 c_len = strcspn(page, "\n");
322 if (c_len >= len) 328 if (c_len >= len)
323 return -EINVAL; 329 return -EINVAL;
324 330
325 if (kstrtouint(page, 0, &value)) 331 if (kstrtouint(page, 0, &force))
326 return -EINVAL; 332 return -EINVAL;
327 333
328 spin_lock(&gc->lock); 334 pblk_gc_sysfs_force(pblk, force);
329 pblk_rl_set_gc_rsc(&pblk->rl, value);
330 spin_unlock(&gc->lock);
331 335
332 return len; 336 return len;
333} 337}
334 338
335static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, 339static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
336 size_t len) 340 const char *page, size_t len)
337{ 341{
338 size_t c_len; 342 size_t c_len;
339 int force; 343 int sec_per_write;
340 344
341 c_len = strcspn(page, "\n"); 345 c_len = strcspn(page, "\n");
342 if (c_len >= len) 346 if (c_len >= len)
343 return -EINVAL; 347 return -EINVAL;
344 348
345 if (kstrtouint(page, 0, &force)) 349 if (kstrtouint(page, 0, &sec_per_write))
346 return -EINVAL; 350 return -EINVAL;
347 351
348 if (force < 0 || force > 1) 352 if (sec_per_write < pblk->min_write_pgs
353 || sec_per_write > pblk->max_write_pgs
354 || sec_per_write % pblk->min_write_pgs != 0)
349 return -EINVAL; 355 return -EINVAL;
350 356
351 pblk_gc_sysfs_force(pblk, force); 357 pblk_set_sec_per_write(pblk, sec_per_write);
352 358
353 return len; 359 return len;
354} 360}
@@ -398,9 +404,9 @@ static struct attribute sys_gc_force = {
398 .mode = 0200, 404 .mode = 0200,
399}; 405};
400 406
401static struct attribute sys_gc_rl_max = { 407static struct attribute sys_max_sec_per_write = {
402 .name = "gc_rl_max", 408 .name = "max_sec_per_write",
403 .mode = 0200, 409 .mode = 0644,
404}; 410};
405 411
406#ifdef CONFIG_NVM_DEBUG 412#ifdef CONFIG_NVM_DEBUG
@@ -416,7 +422,7 @@ static struct attribute *pblk_attrs[] = {
416 &sys_errors_attr, 422 &sys_errors_attr,
417 &sys_gc_state, 423 &sys_gc_state,
418 &sys_gc_force, 424 &sys_gc_force,
419 &sys_gc_rl_max, 425 &sys_max_sec_per_write,
420 &sys_rb_attr, 426 &sys_rb_attr,
421 &sys_stats_ppaf_attr, 427 &sys_stats_ppaf_attr,
422 &sys_lines_attr, 428 &sys_lines_attr,
@@ -448,6 +454,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
448 return pblk_sysfs_lines(pblk, buf); 454 return pblk_sysfs_lines(pblk, buf);
449 else if (strcmp(attr->name, "lines_info") == 0) 455 else if (strcmp(attr->name, "lines_info") == 0)
450 return pblk_sysfs_lines_info(pblk, buf); 456 return pblk_sysfs_lines_info(pblk, buf);
457 else if (strcmp(attr->name, "max_sec_per_write") == 0)
458 return pblk_sysfs_get_sec_per_write(pblk, buf);
451#ifdef CONFIG_NVM_DEBUG 459#ifdef CONFIG_NVM_DEBUG
452 else if (strcmp(attr->name, "stats") == 0) 460 else if (strcmp(attr->name, "stats") == 0)
453 return pblk_sysfs_stats_debug(pblk, buf); 461 return pblk_sysfs_stats_debug(pblk, buf);
@@ -460,10 +468,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
460{ 468{
461 struct pblk *pblk = container_of(kobj, struct pblk, kobj); 469 struct pblk *pblk = container_of(kobj, struct pblk, kobj);
462 470
463 if (strcmp(attr->name, "gc_rl_max") == 0) 471 if (strcmp(attr->name, "gc_force") == 0)
464 return pblk_sysfs_rate_store(pblk, buf, len);
465 else if (strcmp(attr->name, "gc_force") == 0)
466 return pblk_sysfs_gc_force(pblk, buf, len); 472 return pblk_sysfs_gc_force(pblk, buf, len);
473 else if (strcmp(attr->name, "max_sec_per_write") == 0)
474 return pblk_sysfs_set_sec_per_write(pblk, buf, len);
467 475
468 return 0; 476 return 0;
469} 477}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index aef6fd7c4a0c..d62a8f4faaf4 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -17,18 +17,6 @@
17 17
18#include "pblk.h" 18#include "pblk.h"
19 19
20static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
21{
22#ifdef CONFIG_NVM_DEBUG
23 atomic_long_inc(&pblk->sync_writes);
24#endif
25
26 /* Counter protected by rb sync lock */
27 line->left_ssecs--;
28 if (!line->left_ssecs)
29 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
30}
31
32static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, 20static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
33 struct pblk_c_ctx *c_ctx) 21 struct pblk_c_ctx *c_ctx)
34{ 22{
@@ -39,21 +27,14 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
39 27
40 for (i = 0; i < c_ctx->nr_valid; i++) { 28 for (i = 0; i < c_ctx->nr_valid; i++) {
41 struct pblk_w_ctx *w_ctx; 29 struct pblk_w_ctx *w_ctx;
42 struct ppa_addr p;
43 struct pblk_line *line;
44 30
45 w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i); 31 w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
46
47 p = rqd->ppa_list[i];
48 line = &pblk->lines[pblk_dev_ppa_to_line(p)];
49 pblk_sync_line(pblk, line);
50
51 while ((original_bio = bio_list_pop(&w_ctx->bios))) 32 while ((original_bio = bio_list_pop(&w_ctx->bios)))
52 bio_endio(original_bio); 33 bio_endio(original_bio);
53 } 34 }
54 35
55#ifdef CONFIG_NVM_DEBUG 36#ifdef CONFIG_NVM_DEBUG
56 atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes); 37 atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes);
57#endif 38#endif
58 39
59 ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); 40 ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
@@ -169,7 +150,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
169 } 150 }
170 151
171 INIT_WORK(&recovery->ws_rec, pblk_submit_rec); 152 INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
172 queue_work(pblk->kw_wq, &recovery->ws_rec); 153 queue_work(pblk->close_wq, &recovery->ws_rec);
173 154
174out: 155out:
175 pblk_complete_write(pblk, rqd, c_ctx); 156 pblk_complete_write(pblk, rqd, c_ctx);
@@ -186,14 +167,50 @@ static void pblk_end_io_write(struct nvm_rq *rqd)
186 } 167 }
187#ifdef CONFIG_NVM_DEBUG 168#ifdef CONFIG_NVM_DEBUG
188 else 169 else
189 WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n"); 170 WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
190#endif 171#endif
191 172
192 pblk_complete_write(pblk, rqd, c_ctx); 173 pblk_complete_write(pblk, rqd, c_ctx);
174 atomic_dec(&pblk->inflight_io);
175}
176
177static void pblk_end_io_write_meta(struct nvm_rq *rqd)
178{
179 struct pblk *pblk = rqd->private;
180 struct nvm_tgt_dev *dev = pblk->dev;
181 struct nvm_geo *geo = &dev->geo;
182 struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
183 struct pblk_line *line = m_ctx->private;
184 struct pblk_emeta *emeta = line->emeta;
185 int pos = pblk_ppa_to_pos(geo, rqd->ppa_list[0]);
186 struct pblk_lun *rlun = &pblk->luns[pos];
187 int sync;
188
189 up(&rlun->wr_sem);
190
191 if (rqd->error) {
192 pblk_log_write_err(pblk, rqd);
193 pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
194 }
195#ifdef CONFIG_NVM_DEBUG
196 else
197 WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n");
198#endif
199
200 sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
201 if (sync == emeta->nr_entries)
202 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws,
203 pblk->close_wq);
204
205 bio_put(rqd->bio);
206 pblk_free_rqd(pblk, rqd, READ);
207
208 atomic_dec(&pblk->inflight_io);
193} 209}
194 210
195static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, 211static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
196 unsigned int nr_secs) 212 unsigned int nr_secs,
213 nvm_end_io_fn(*end_io))
197{ 214{
198 struct nvm_tgt_dev *dev = pblk->dev; 215 struct nvm_tgt_dev *dev = pblk->dev;
199 216
@@ -202,7 +219,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
202 rqd->nr_ppas = nr_secs; 219 rqd->nr_ppas = nr_secs;
203 rqd->flags = pblk_set_progr_mode(pblk, WRITE); 220 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
204 rqd->private = pblk; 221 rqd->private = pblk;
205 rqd->end_io = pblk_end_io_write; 222 rqd->end_io = end_io;
206 223
207 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, 224 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
208 &rqd->dma_meta_list); 225 &rqd->dma_meta_list);
@@ -219,11 +236,10 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
219} 236}
220 237
221static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, 238static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
222 struct pblk_c_ctx *c_ctx) 239 struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa)
223{ 240{
224 struct pblk_line_meta *lm = &pblk->lm; 241 struct pblk_line_meta *lm = &pblk->lm;
225 struct pblk_line *e_line = pblk_line_get_data_next(pblk); 242 struct pblk_line *e_line = pblk_line_get_erase(pblk);
226 struct ppa_addr erase_ppa;
227 unsigned int valid = c_ctx->nr_valid; 243 unsigned int valid = c_ctx->nr_valid;
228 unsigned int padded = c_ctx->nr_padded; 244 unsigned int padded = c_ctx->nr_padded;
229 unsigned int nr_secs = valid + padded; 245 unsigned int nr_secs = valid + padded;
@@ -231,40 +247,23 @@ static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
231 int ret = 0; 247 int ret = 0;
232 248
233 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); 249 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
234 if (!lun_bitmap) { 250 if (!lun_bitmap)
235 ret = -ENOMEM; 251 return -ENOMEM;
236 goto out;
237 }
238 c_ctx->lun_bitmap = lun_bitmap; 252 c_ctx->lun_bitmap = lun_bitmap;
239 253
240 ret = pblk_alloc_w_rq(pblk, rqd, nr_secs); 254 ret = pblk_alloc_w_rq(pblk, rqd, nr_secs, pblk_end_io_write);
241 if (ret) { 255 if (ret) {
242 kfree(lun_bitmap); 256 kfree(lun_bitmap);
243 goto out; 257 return ret;
244 } 258 }
245 259
246 ppa_set_empty(&erase_ppa);
247 if (likely(!e_line || !atomic_read(&e_line->left_eblks))) 260 if (likely(!e_line || !atomic_read(&e_line->left_eblks)))
248 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0); 261 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
249 else 262 else
250 pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, 263 pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
251 valid, &erase_ppa); 264 valid, erase_ppa);
252
253out:
254 if (unlikely(e_line && !ppa_empty(erase_ppa))) {
255 if (pblk_blk_erase_async(pblk, erase_ppa)) {
256 struct nvm_tgt_dev *dev = pblk->dev;
257 struct nvm_geo *geo = &dev->geo;
258 int bit;
259
260 atomic_inc(&e_line->left_eblks);
261 bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
262 WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
263 up(&pblk->erase_sem);
264 }
265 }
266 265
267 return ret; 266 return 0;
268} 267}
269 268
270int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, 269int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
@@ -280,7 +279,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
280 279
281 c_ctx->lun_bitmap = lun_bitmap; 280 c_ctx->lun_bitmap = lun_bitmap;
282 281
283 ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas); 282 ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
284 if (ret) 283 if (ret)
285 return ret; 284 return ret;
286 285
@@ -311,16 +310,237 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
311 return secs_to_sync; 310 return secs_to_sync;
312} 311}
313 312
313static inline int pblk_valid_meta_ppa(struct pblk *pblk,
314 struct pblk_line *meta_line,
315 struct ppa_addr *ppa_list, int nr_ppas)
316{
317 struct nvm_tgt_dev *dev = pblk->dev;
318 struct nvm_geo *geo = &dev->geo;
319 struct pblk_line *data_line;
320 struct ppa_addr ppa, ppa_opt;
321 u64 paddr;
322 int i;
323
324 data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
325 paddr = pblk_lookup_page(pblk, meta_line);
326 ppa = addr_to_gen_ppa(pblk, paddr, 0);
327
328 if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
329 return 1;
330
331 /* Schedule a metadata I/O that is half the distance from the data I/O
332 * with regards to the number of LUNs forming the pblk instance. This
333 * balances LUN conflicts across every I/O.
334 *
335 * When the LUN configuration changes (e.g., due to GC), this distance
336 * can align, which would result on a LUN deadlock. In this case, modify
337 * the distance to not be optimal, but allow metadata I/Os to succeed.
338 */
339 ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
340 if (unlikely(ppa_opt.ppa == ppa.ppa)) {
341 data_line->meta_distance--;
342 return 0;
343 }
344
345 for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
346 if (ppa_list[i].g.ch == ppa_opt.g.ch &&
347 ppa_list[i].g.lun == ppa_opt.g.lun)
348 return 1;
349
350 if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
351 for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
352 if (ppa_list[i].g.ch == ppa.g.ch &&
353 ppa_list[i].g.lun == ppa.g.lun)
354 return 0;
355
356 return 1;
357 }
358
359 return 0;
360}
361
362int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
363{
364 struct nvm_tgt_dev *dev = pblk->dev;
365 struct nvm_geo *geo = &dev->geo;
366 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
367 struct pblk_line_meta *lm = &pblk->lm;
368 struct pblk_emeta *emeta = meta_line->emeta;
369 struct pblk_g_ctx *m_ctx;
370 struct pblk_lun *rlun;
371 struct bio *bio;
372 struct nvm_rq *rqd;
373 void *data;
374 u64 paddr;
375 int rq_ppas = pblk->min_write_pgs;
376 int id = meta_line->id;
377 int rq_len;
378 int i, j;
379 int ret;
380
381 rqd = pblk_alloc_rqd(pblk, READ);
382 if (IS_ERR(rqd)) {
383 pr_err("pblk: cannot allocate write req.\n");
384 return PTR_ERR(rqd);
385 }
386 m_ctx = nvm_rq_to_pdu(rqd);
387 m_ctx->private = meta_line;
388
389 rq_len = rq_ppas * geo->sec_size;
390 data = ((void *)emeta->buf) + emeta->mem;
391
392 bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
393 l_mg->emeta_alloc_type, GFP_KERNEL);
394 if (IS_ERR(bio)) {
395 ret = PTR_ERR(bio);
396 goto fail_free_rqd;
397 }
398 bio->bi_iter.bi_sector = 0; /* internal bio */
399 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
400 rqd->bio = bio;
401
402 ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
403 if (ret)
404 goto fail_free_bio;
405
406 for (i = 0; i < rqd->nr_ppas; ) {
407 spin_lock(&meta_line->lock);
408 paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
409 spin_unlock(&meta_line->lock);
410 for (j = 0; j < rq_ppas; j++, i++, paddr++)
411 rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
412 }
413
414 rlun = &pblk->luns[pblk_ppa_to_pos(geo, rqd->ppa_list[0])];
415 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
416 if (ret) {
417 pr_err("pblk: lun semaphore timed out (%d)\n", ret);
418 goto fail_free_bio;
419 }
420
421 emeta->mem += rq_len;
422 if (emeta->mem >= lm->emeta_len[0]) {
423 spin_lock(&l_mg->close_lock);
424 list_del(&meta_line->list);
425 WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line),
426 "pblk: corrupt meta line %d\n", meta_line->id);
427 spin_unlock(&l_mg->close_lock);
428 }
429
430 ret = pblk_submit_io(pblk, rqd);
431 if (ret) {
432 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
433 goto fail_rollback;
434 }
435
436 return NVM_IO_OK;
437
438fail_rollback:
439 spin_lock(&l_mg->close_lock);
440 pblk_dealloc_page(pblk, meta_line, rq_ppas);
441 list_add(&meta_line->list, &meta_line->list);
442 spin_unlock(&l_mg->close_lock);
443fail_free_bio:
444 if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META))
445 bio_put(bio);
446fail_free_rqd:
447 pblk_free_rqd(pblk, rqd, READ);
448 return ret;
449}
450
451static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
452 int prev_n)
453{
454 struct pblk_line_meta *lm = &pblk->lm;
455 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
456 struct pblk_line *meta_line;
457
458 spin_lock(&l_mg->close_lock);
459retry:
460 if (list_empty(&l_mg->emeta_list)) {
461 spin_unlock(&l_mg->close_lock);
462 return 0;
463 }
464 meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
465 if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line))
466 goto retry;
467 spin_unlock(&l_mg->close_lock);
468
469 if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n))
470 return 0;
471
472 return pblk_submit_meta_io(pblk, meta_line);
473}
474
475static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
476{
477 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
478 struct ppa_addr erase_ppa;
479 int err;
480
481 ppa_set_empty(&erase_ppa);
482
483 /* Assign lbas to ppas and populate request structure */
484 err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa);
485 if (err) {
486 pr_err("pblk: could not setup write request: %d\n", err);
487 return NVM_IO_ERR;
488 }
489
490 if (likely(ppa_empty(erase_ppa))) {
491 /* Submit metadata write for previous data line */
492 err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
493 if (err) {
494 pr_err("pblk: metadata I/O submission failed: %d", err);
495 return NVM_IO_ERR;
496 }
497
498 /* Submit data write for current data line */
499 err = pblk_submit_io(pblk, rqd);
500 if (err) {
501 pr_err("pblk: data I/O submission failed: %d\n", err);
502 return NVM_IO_ERR;
503 }
504 } else {
505 /* Submit data write for current data line */
506 err = pblk_submit_io(pblk, rqd);
507 if (err) {
508 pr_err("pblk: data I/O submission failed: %d\n", err);
509 return NVM_IO_ERR;
510 }
511
512 /* Submit available erase for next data line */
513 if (pblk_blk_erase_async(pblk, erase_ppa)) {
514 struct pblk_line *e_line = pblk_line_get_erase(pblk);
515 struct nvm_tgt_dev *dev = pblk->dev;
516 struct nvm_geo *geo = &dev->geo;
517 int bit;
518
519 atomic_inc(&e_line->left_eblks);
520 bit = pblk_ppa_to_pos(geo, erase_ppa);
521 WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
522 }
523 }
524
525 return NVM_IO_OK;
526}
527
528static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd)
529{
530 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
531 struct bio *bio = rqd->bio;
532
533 if (c_ctx->nr_padded)
534 pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded);
535}
536
314static int pblk_submit_write(struct pblk *pblk) 537static int pblk_submit_write(struct pblk *pblk)
315{ 538{
316 struct bio *bio; 539 struct bio *bio;
317 struct nvm_rq *rqd; 540 struct nvm_rq *rqd;
318 struct pblk_c_ctx *c_ctx;
319 unsigned int pgs_read;
320 unsigned int secs_avail, secs_to_sync, secs_to_com; 541 unsigned int secs_avail, secs_to_sync, secs_to_com;
321 unsigned int secs_to_flush; 542 unsigned int secs_to_flush;
322 unsigned long pos; 543 unsigned long pos;
323 int err;
324 544
325 /* If there are no sectors in the cache, flushes (bios without data) 545 /* If there are no sectors in the cache, flushes (bios without data)
326 * will be cleared on the cache threads 546 * will be cleared on the cache threads
@@ -338,7 +558,6 @@ static int pblk_submit_write(struct pblk *pblk)
338 pr_err("pblk: cannot allocate write req.\n"); 558 pr_err("pblk: cannot allocate write req.\n");
339 return 1; 559 return 1;
340 } 560 }
341 c_ctx = nvm_rq_to_pdu(rqd);
342 561
343 bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); 562 bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
344 if (!bio) { 563 if (!bio) {
@@ -358,29 +577,14 @@ static int pblk_submit_write(struct pblk *pblk)
358 secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; 577 secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
359 pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); 578 pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
360 579
361 pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos, 580 if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync,
362 secs_to_sync, secs_avail); 581 secs_avail)) {
363 if (!pgs_read) {
364 pr_err("pblk: corrupted write bio\n"); 582 pr_err("pblk: corrupted write bio\n");
365 goto fail_put_bio; 583 goto fail_put_bio;
366 } 584 }
367 585
368 if (c_ctx->nr_padded) 586 if (pblk_submit_io_set(pblk, rqd))
369 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
370 goto fail_put_bio;
371
372 /* Assign lbas to ppas and populate request structure */
373 err = pblk_setup_w_rq(pblk, rqd, c_ctx);
374 if (err) {
375 pr_err("pblk: could not setup write request\n");
376 goto fail_free_bio;
377 }
378
379 err = pblk_submit_io(pblk, rqd);
380 if (err) {
381 pr_err("pblk: I/O submission failed: %d\n", err);
382 goto fail_free_bio; 587 goto fail_free_bio;
383 }
384 588
385#ifdef CONFIG_NVM_DEBUG 589#ifdef CONFIG_NVM_DEBUG
386 atomic_long_add(secs_to_sync, &pblk->sub_writes); 590 atomic_long_add(secs_to_sync, &pblk->sub_writes);
@@ -389,8 +593,7 @@ static int pblk_submit_write(struct pblk *pblk)
389 return 0; 593 return 0;
390 594
391fail_free_bio: 595fail_free_bio:
392 if (c_ctx->nr_padded) 596 pblk_free_write_rqd(pblk, rqd);
393 pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
394fail_put_bio: 597fail_put_bio:
395 bio_put(bio); 598 bio_put(bio);
396fail_free_rqd: 599fail_free_rqd:
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 99f3186b5288..15931381348c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -40,6 +40,12 @@
40#define PBLK_MAX_REQ_ADDRS (64) 40#define PBLK_MAX_REQ_ADDRS (64)
41#define PBLK_MAX_REQ_ADDRS_PW (6) 41#define PBLK_MAX_REQ_ADDRS_PW (6)
42 42
43#define PBLK_WS_POOL_SIZE (128)
44#define PBLK_META_POOL_SIZE (128)
45#define PBLK_READ_REQ_POOL_SIZE (1024)
46
47#define PBLK_NR_CLOSE_JOBS (4)
48
43#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) 49#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
44 50
45#define PBLK_COMMAND_TIMEOUT_MS 30000 51#define PBLK_COMMAND_TIMEOUT_MS 30000
@@ -72,11 +78,15 @@ enum {
72 PBLK_BLK_ST_CLOSED = 0x2, 78 PBLK_BLK_ST_CLOSED = 0x2,
73}; 79};
74 80
81struct pblk_sec_meta {
82 u64 reserved;
83 __le64 lba;
84};
85
75/* The number of GC lists and the rate-limiter states go together. This way the 86/* The number of GC lists and the rate-limiter states go together. This way the
76 * rate-limiter can dictate how much GC is needed based on resource utilization. 87 * rate-limiter can dictate how much GC is needed based on resource utilization.
77 */ 88 */
78#define PBLK_NR_GC_LISTS 3 89#define PBLK_GC_NR_LISTS 3
79#define PBLK_MAX_GC_JOBS 32
80 90
81enum { 91enum {
82 PBLK_RL_HIGH = 1, 92 PBLK_RL_HIGH = 1,
@@ -84,14 +94,9 @@ enum {
84 PBLK_RL_LOW = 3, 94 PBLK_RL_LOW = 3,
85}; 95};
86 96
87struct pblk_sec_meta {
88 u64 reserved;
89 __le64 lba;
90};
91
92#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) 97#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
93 98
94/* write completion context */ 99/* write buffer completion context */
95struct pblk_c_ctx { 100struct pblk_c_ctx {
96 struct list_head list; /* Head for out-of-order completion */ 101 struct list_head list; /* Head for out-of-order completion */
97 102
@@ -101,9 +106,16 @@ struct pblk_c_ctx {
101 unsigned int nr_padded; 106 unsigned int nr_padded;
102}; 107};
103 108
104/* Read context */ 109/* generic context */
105struct pblk_r_ctx { 110struct pblk_g_ctx {
106 struct bio *orig_bio; 111 void *private;
112};
113
114/* Pad context */
115struct pblk_pad_rq {
116 struct pblk *pblk;
117 struct completion wait;
118 struct kref ref;
107}; 119};
108 120
109/* Recovery context */ 121/* Recovery context */
@@ -195,29 +207,39 @@ struct pblk_lun {
195struct pblk_gc_rq { 207struct pblk_gc_rq {
196 struct pblk_line *line; 208 struct pblk_line *line;
197 void *data; 209 void *data;
198 u64 *lba_list; 210 u64 lba_list[PBLK_MAX_REQ_ADDRS];
199 int nr_secs; 211 int nr_secs;
200 int secs_to_gc; 212 int secs_to_gc;
201 struct list_head list; 213 struct list_head list;
202}; 214};
203 215
204struct pblk_gc { 216struct pblk_gc {
217 /* These states are not protected by a lock since (i) they are in the
218 * fast path, and (ii) they are not critical.
219 */
205 int gc_active; 220 int gc_active;
206 int gc_enabled; 221 int gc_enabled;
207 int gc_forced; 222 int gc_forced;
208 int gc_jobs_active;
209 atomic_t inflight_gc;
210 223
211 struct task_struct *gc_ts; 224 struct task_struct *gc_ts;
212 struct task_struct *gc_writer_ts; 225 struct task_struct *gc_writer_ts;
226 struct task_struct *gc_reader_ts;
227
228 struct workqueue_struct *gc_line_reader_wq;
213 struct workqueue_struct *gc_reader_wq; 229 struct workqueue_struct *gc_reader_wq;
230
214 struct timer_list gc_timer; 231 struct timer_list gc_timer;
215 232
233 struct semaphore gc_sem;
234 atomic_t inflight_gc;
216 int w_entries; 235 int w_entries;
236
217 struct list_head w_list; 237 struct list_head w_list;
238 struct list_head r_list;
218 239
219 spinlock_t lock; 240 spinlock_t lock;
220 spinlock_t w_lock; 241 spinlock_t w_lock;
242 spinlock_t r_lock;
221}; 243};
222 244
223struct pblk_rl { 245struct pblk_rl {
@@ -229,10 +251,8 @@ struct pblk_rl {
229 */ 251 */
230 unsigned int high_pw; /* High rounded up as a power of 2 */ 252 unsigned int high_pw; /* High rounded up as a power of 2 */
231 253
232#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent 254#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
233 * available blks 255#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
234 */
235#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
236 256
237 int rb_windows_pw; /* Number of rate windows in the write buffer 257 int rb_windows_pw; /* Number of rate windows in the write buffer
238 * given as a power-of-2. This guarantees that 258 * given as a power-of-2. This guarantees that
@@ -244,13 +264,19 @@ struct pblk_rl {
244 */ 264 */
245 int rb_budget; /* Total number of entries available for I/O */ 265 int rb_budget; /* Total number of entries available for I/O */
246 int rb_user_max; /* Max buffer entries available for user I/O */ 266 int rb_user_max; /* Max buffer entries available for user I/O */
247 atomic_t rb_user_cnt; /* User I/O buffer counter */
248 int rb_gc_max; /* Max buffer entries available for GC I/O */ 267 int rb_gc_max; /* Max buffer entries available for GC I/O */
249 int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ 268 int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
250 int rb_state; /* Rate-limiter current state */ 269 int rb_state; /* Rate-limiter current state */
270
271 atomic_t rb_user_cnt; /* User I/O buffer counter */
251 atomic_t rb_gc_cnt; /* GC I/O buffer counter */ 272 atomic_t rb_gc_cnt; /* GC I/O buffer counter */
273 atomic_t rb_space; /* Space limit in case of reaching capacity */
274
275 int rsv_blocks; /* Reserved blocks for GC */
252 276
253 int rb_user_active; 277 int rb_user_active;
278 int rb_gc_active;
279
254 struct timer_list u_timer; 280 struct timer_list u_timer;
255 281
256 unsigned long long nr_secs; 282 unsigned long long nr_secs;
@@ -258,8 +284,6 @@ struct pblk_rl {
258 atomic_t free_blocks; 284 atomic_t free_blocks;
259}; 285};
260 286
261#define PBLK_LINE_NR_LUN_BITMAP 2
262#define PBLK_LINE_NR_SEC_BITMAP 2
263#define PBLK_LINE_EMPTY (~0U) 287#define PBLK_LINE_EMPTY (~0U)
264 288
265enum { 289enum {
@@ -310,16 +334,19 @@ struct line_smeta {
310 __le32 window_wr_lun; /* Number of parallel LUNs to write */ 334 __le32 window_wr_lun; /* Number of parallel LUNs to write */
311 335
312 __le32 rsvd[2]; 336 __le32 rsvd[2];
337
338 __le64 lun_bitmap[];
313}; 339};
314 340
315/* 341/*
316 * Metadata Layout: 342 * Metadata layout in media:
317 * 1. struct pblk_emeta 343 * First sector:
318 * 2. nr_lbas u64 forming lba list 344 * 1. struct line_emeta
319 * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line) 345 * 2. bad block bitmap (u64 * window_wr_lun)
320 * 4. nr_luns bits (u64 format) forming line bad block bitmap 346 * Mid sectors (start at lbas_sector):
321 * 347 * 3. nr_lbas (u64) forming lba list
322 * 3. and 4. will be part of FTL log 348 * Last sectors (start at vsc_sector):
349 * 4. u32 valid sector count (vsc) for all lines (~0U: free line)
323 */ 350 */
324struct line_emeta { 351struct line_emeta {
325 struct line_header header; 352 struct line_header header;
@@ -339,6 +366,23 @@ struct line_emeta {
339 __le32 next_id; /* Line id for next line */ 366 __le32 next_id; /* Line id for next line */
340 __le64 nr_lbas; /* Number of lbas mapped in line */ 367 __le64 nr_lbas; /* Number of lbas mapped in line */
341 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ 368 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
369 __le64 bb_bitmap[]; /* Updated bad block bitmap for line */
370};
371
372struct pblk_emeta {
373 struct line_emeta *buf; /* emeta buffer in media format */
374 int mem; /* Write offset - points to next
375 * writable entry in memory
376 */
377 atomic_t sync; /* Synced - backpointer that signals the
378 * last entry that has been successfully
379 * persisted to media
380 */
381 unsigned int nr_entries; /* Number of emeta entries */
382};
383
384struct pblk_smeta {
385 struct line_smeta *buf; /* smeta buffer in persistent format */
342}; 386};
343 387
344struct pblk_line { 388struct pblk_line {
@@ -355,9 +399,12 @@ struct pblk_line {
355 399
356 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ 400 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
357 401
358 struct line_smeta *smeta; /* Start metadata */ 402 struct pblk_smeta *smeta; /* Start metadata */
359 struct line_emeta *emeta; /* End metadata */ 403 struct pblk_emeta *emeta; /* End medatada */
404
360 int meta_line; /* Metadata line id */ 405 int meta_line; /* Metadata line id */
406 int meta_distance; /* Distance between data and metadata */
407
361 u64 smeta_ssec; /* Sector where smeta starts */ 408 u64 smeta_ssec; /* Sector where smeta starts */
362 u64 emeta_ssec; /* Sector where emeta starts */ 409 u64 emeta_ssec; /* Sector where emeta starts */
363 410
@@ -374,9 +421,10 @@ struct pblk_line {
374 atomic_t left_seblks; /* Blocks left for sync erasing */ 421 atomic_t left_seblks; /* Blocks left for sync erasing */
375 422
376 int left_msecs; /* Sectors left for mapping */ 423 int left_msecs; /* Sectors left for mapping */
377 int left_ssecs; /* Sectors left to sync */
378 unsigned int cur_sec; /* Sector map pointer */ 424 unsigned int cur_sec; /* Sector map pointer */
379 unsigned int vsc; /* Valid sector count in line */ 425 unsigned int nr_valid_lbas; /* Number of valid lbas in line */
426
427 __le32 *vsc; /* Valid sector count in line */
380 428
381 struct kref ref; /* Write buffer L2P references */ 429 struct kref ref; /* Write buffer L2P references */
382 430
@@ -385,13 +433,15 @@ struct pblk_line {
385 433
386#define PBLK_DATA_LINES 4 434#define PBLK_DATA_LINES 4
387 435
388enum{ 436enum {
389 PBLK_KMALLOC_META = 1, 437 PBLK_KMALLOC_META = 1,
390 PBLK_VMALLOC_META = 2, 438 PBLK_VMALLOC_META = 2,
391}; 439};
392 440
393struct pblk_line_metadata { 441enum {
394 void *meta; 442 PBLK_EMETA_TYPE_HEADER = 1, /* struct line_emeta first sector */
443 PBLK_EMETA_TYPE_LLBA = 2, /* lba list - type: __le64 */
444 PBLK_EMETA_TYPE_VSC = 3, /* vsc list - type: __le32 */
395}; 445};
396 446
397struct pblk_line_mgmt { 447struct pblk_line_mgmt {
@@ -404,7 +454,7 @@ struct pblk_line_mgmt {
404 struct list_head bad_list; /* Full lines bad */ 454 struct list_head bad_list; /* Full lines bad */
405 455
406 /* GC lists - use gc_lock */ 456 /* GC lists - use gc_lock */
407 struct list_head *gc_lists[PBLK_NR_GC_LISTS]; 457 struct list_head *gc_lists[PBLK_GC_NR_LISTS];
408 struct list_head gc_high_list; /* Full lines ready to GC, high isc */ 458 struct list_head gc_high_list; /* Full lines ready to GC, high isc */
409 struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ 459 struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
410 struct list_head gc_low_list; /* Full lines ready to GC, low isc */ 460 struct list_head gc_low_list; /* Full lines ready to GC, low isc */
@@ -417,13 +467,16 @@ struct pblk_line_mgmt {
417 struct pblk_line *log_next; /* Next FTL log line */ 467 struct pblk_line *log_next; /* Next FTL log line */
418 struct pblk_line *data_next; /* Next data line */ 468 struct pblk_line *data_next; /* Next data line */
419 469
470 struct list_head emeta_list; /* Lines queued to schedule emeta */
471
472 __le32 *vsc_list; /* Valid sector counts for all lines */
473
420 /* Metadata allocation type: VMALLOC | KMALLOC */ 474 /* Metadata allocation type: VMALLOC | KMALLOC */
421 int smeta_alloc_type;
422 int emeta_alloc_type; 475 int emeta_alloc_type;
423 476
424 /* Pre-allocated metadata for data lines */ 477 /* Pre-allocated metadata for data lines */
425 struct pblk_line_metadata sline_meta[PBLK_DATA_LINES]; 478 struct pblk_smeta *sline_meta[PBLK_DATA_LINES];
426 struct pblk_line_metadata eline_meta[PBLK_DATA_LINES]; 479 struct pblk_emeta *eline_meta[PBLK_DATA_LINES];
427 unsigned long meta_bitmap; 480 unsigned long meta_bitmap;
428 481
429 /* Helpers for fast bitmap calculations */ 482 /* Helpers for fast bitmap calculations */
@@ -434,25 +487,40 @@ struct pblk_line_mgmt {
434 unsigned long l_seq_nr; /* Log line unique sequence number */ 487 unsigned long l_seq_nr; /* Log line unique sequence number */
435 488
436 spinlock_t free_lock; 489 spinlock_t free_lock;
490 spinlock_t close_lock;
437 spinlock_t gc_lock; 491 spinlock_t gc_lock;
438}; 492};
439 493
440struct pblk_line_meta { 494struct pblk_line_meta {
441 unsigned int smeta_len; /* Total length for smeta */ 495 unsigned int smeta_len; /* Total length for smeta */
442 unsigned int smeta_sec; /* Sectors needed for smeta*/ 496 unsigned int smeta_sec; /* Sectors needed for smeta */
443 unsigned int emeta_len; /* Total length for emeta */ 497
444 unsigned int emeta_sec; /* Sectors needed for emeta*/ 498 unsigned int emeta_len[4]; /* Lengths for emeta:
499 * [0]: Total length
500 * [1]: struct line_emeta length
501 * [2]: L2P portion length
502 * [3]: vsc list length
503 */
504 unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout
505 * as emeta_len
506 */
507
445 unsigned int emeta_bb; /* Boundary for bb that affects emeta */ 508 unsigned int emeta_bb; /* Boundary for bb that affects emeta */
509
510 unsigned int vsc_list_len; /* Length for vsc list */
446 unsigned int sec_bitmap_len; /* Length for sector bitmap in line */ 511 unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
447 unsigned int blk_bitmap_len; /* Length for block bitmap in line */ 512 unsigned int blk_bitmap_len; /* Length for block bitmap in line */
448 unsigned int lun_bitmap_len; /* Length for lun bitmap in line */ 513 unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
449 514
450 unsigned int blk_per_line; /* Number of blocks in a full line */ 515 unsigned int blk_per_line; /* Number of blocks in a full line */
451 unsigned int sec_per_line; /* Number of sectors in a line */ 516 unsigned int sec_per_line; /* Number of sectors in a line */
517 unsigned int dsec_per_line; /* Number of data sectors in a line */
452 unsigned int min_blk_line; /* Min. number of good blocks in line */ 518 unsigned int min_blk_line; /* Min. number of good blocks in line */
453 519
454 unsigned int mid_thrs; /* Threshold for GC mid list */ 520 unsigned int mid_thrs; /* Threshold for GC mid list */
455 unsigned int high_thrs; /* Threshold for GC high list */ 521 unsigned int high_thrs; /* Threshold for GC high list */
522
523 unsigned int meta_distance; /* Distance between data and metadata */
456}; 524};
457 525
458struct pblk_addr_format { 526struct pblk_addr_format {
@@ -470,6 +538,13 @@ struct pblk_addr_format {
470 u8 sec_offset; 538 u8 sec_offset;
471}; 539};
472 540
541enum {
542 PBLK_STATE_RUNNING = 0,
543 PBLK_STATE_STOPPING = 1,
544 PBLK_STATE_RECOVERING = 2,
545 PBLK_STATE_STOPPED = 3,
546};
547
473struct pblk { 548struct pblk {
474 struct nvm_tgt_dev *dev; 549 struct nvm_tgt_dev *dev;
475 struct gendisk *disk; 550 struct gendisk *disk;
@@ -487,6 +562,8 @@ struct pblk {
487 562
488 struct pblk_rb rwb; 563 struct pblk_rb rwb;
489 564
565 int state; /* pblk line state */
566
490 int min_write_pgs; /* Minimum amount of pages required by controller */ 567 int min_write_pgs; /* Minimum amount of pages required by controller */
491 int max_write_pgs; /* Maximum amount of pages supported by controller */ 568 int max_write_pgs; /* Maximum amount of pages supported by controller */
492 int pgs_in_buffer; /* Number of pages that need to be held in buffer to 569 int pgs_in_buffer; /* Number of pages that need to be held in buffer to
@@ -499,7 +576,7 @@ struct pblk {
499 /* pblk provisioning values. Used by rate limiter */ 576 /* pblk provisioning values. Used by rate limiter */
500 struct pblk_rl rl; 577 struct pblk_rl rl;
501 578
502 struct semaphore erase_sem; 579 int sec_per_write;
503 580
504 unsigned char instance_uuid[16]; 581 unsigned char instance_uuid[16];
505#ifdef CONFIG_NVM_DEBUG 582#ifdef CONFIG_NVM_DEBUG
@@ -511,8 +588,8 @@ struct pblk {
511 atomic_long_t req_writes; /* Sectors stored on write buffer */ 588 atomic_long_t req_writes; /* Sectors stored on write buffer */
512 atomic_long_t sub_writes; /* Sectors submitted from buffer */ 589 atomic_long_t sub_writes; /* Sectors submitted from buffer */
513 atomic_long_t sync_writes; /* Sectors synced to media */ 590 atomic_long_t sync_writes; /* Sectors synced to media */
514 atomic_long_t compl_writes; /* Sectors completed in write bio */
515 atomic_long_t inflight_reads; /* Inflight sector read requests */ 591 atomic_long_t inflight_reads; /* Inflight sector read requests */
592 atomic_long_t cache_reads; /* Read requests that hit the cache */
516 atomic_long_t sync_reads; /* Completed sector read requests */ 593 atomic_long_t sync_reads; /* Completed sector read requests */
517 atomic_long_t recov_writes; /* Sectors submitted from recovery */ 594 atomic_long_t recov_writes; /* Sectors submitted from recovery */
518 atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */ 595 atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
@@ -528,6 +605,8 @@ struct pblk {
528 atomic_long_t write_failed; 605 atomic_long_t write_failed;
529 atomic_long_t erase_failed; 606 atomic_long_t erase_failed;
530 607
608 atomic_t inflight_io; /* General inflight I/O counter */
609
531 struct task_struct *writer_ts; 610 struct task_struct *writer_ts;
532 611
533 /* Simple translation map of logical addresses to physical addresses. 612 /* Simple translation map of logical addresses to physical addresses.
@@ -542,11 +621,13 @@ struct pblk {
542 mempool_t *page_pool; 621 mempool_t *page_pool;
543 mempool_t *line_ws_pool; 622 mempool_t *line_ws_pool;
544 mempool_t *rec_pool; 623 mempool_t *rec_pool;
545 mempool_t *r_rq_pool; 624 mempool_t *g_rq_pool;
546 mempool_t *w_rq_pool; 625 mempool_t *w_rq_pool;
547 mempool_t *line_meta_pool; 626 mempool_t *line_meta_pool;
548 627
549 struct workqueue_struct *kw_wq; 628 struct workqueue_struct *close_wq;
629 struct workqueue_struct *bb_wq;
630
550 struct timer_list wtimer; 631 struct timer_list wtimer;
551 632
552 struct pblk_gc gc; 633 struct pblk_gc gc;
@@ -559,7 +640,7 @@ struct pblk_line_ws {
559 struct work_struct ws; 640 struct work_struct ws;
560}; 641};
561 642
562#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx)) 643#define pblk_g_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_g_ctx))
563#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx)) 644#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
564 645
565/* 646/*
@@ -579,18 +660,17 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
579 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, 660 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
580 unsigned int pos); 661 unsigned int pos);
581struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); 662struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
663void pblk_rb_flush(struct pblk_rb *rb);
582 664
583void pblk_rb_sync_l2p(struct pblk_rb *rb); 665void pblk_rb_sync_l2p(struct pblk_rb *rb);
584unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio, 666unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
585 struct pblk_c_ctx *c_ctx, 667 struct bio *bio, unsigned int pos,
586 unsigned int pos, 668 unsigned int nr_entries, unsigned int count);
587 unsigned int nr_entries,
588 unsigned int count);
589unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, 669unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
590 struct list_head *list, 670 struct list_head *list,
591 unsigned int max); 671 unsigned int max);
592int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 672int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
593 u64 pos, int bio_iter); 673 struct ppa_addr ppa, int bio_iter);
594unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); 674unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
595 675
596unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); 676unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -601,6 +681,7 @@ void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
601unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb); 681unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
602 682
603unsigned int pblk_rb_read_count(struct pblk_rb *rb); 683unsigned int pblk_rb_read_count(struct pblk_rb *rb);
684unsigned int pblk_rb_sync_count(struct pblk_rb *rb);
604unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos); 685unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
605 686
606int pblk_rb_tear_down_check(struct pblk_rb *rb); 687int pblk_rb_tear_down_check(struct pblk_rb *rb);
@@ -612,40 +693,50 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
612 * pblk core 693 * pblk core
613 */ 694 */
614struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw); 695struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
696void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
615int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, 697int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
616 struct pblk_c_ctx *c_ctx); 698 struct pblk_c_ctx *c_ctx);
617void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); 699void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
618void pblk_flush_writer(struct pblk *pblk); 700void pblk_wait_for_meta(struct pblk *pblk);
619struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); 701struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
620void pblk_discard(struct pblk *pblk, struct bio *bio); 702void pblk_discard(struct pblk *pblk, struct bio *bio);
621void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); 703void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
622void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); 704void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
623int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); 705int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
706int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
624struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, 707struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
625 unsigned int nr_secs, unsigned int len, 708 unsigned int nr_secs, unsigned int len,
626 gfp_t gfp_mask); 709 int alloc_type, gfp_t gfp_mask);
627struct pblk_line *pblk_line_get(struct pblk *pblk); 710struct pblk_line *pblk_line_get(struct pblk *pblk);
628struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); 711struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
629struct pblk_line *pblk_line_replace_data(struct pblk *pblk); 712void pblk_line_replace_data(struct pblk *pblk);
630int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); 713int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
631void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); 714void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
632struct pblk_line *pblk_line_get_data(struct pblk *pblk); 715struct pblk_line *pblk_line_get_data(struct pblk *pblk);
633struct pblk_line *pblk_line_get_data_next(struct pblk *pblk); 716struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
634int pblk_line_erase(struct pblk *pblk, struct pblk_line *line); 717int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
635int pblk_line_is_full(struct pblk_line *line); 718int pblk_line_is_full(struct pblk_line *line);
636void pblk_line_free(struct pblk *pblk, struct pblk_line *line); 719void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
637void pblk_line_close_ws(struct work_struct *work); 720void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
638void pblk_line_close(struct pblk *pblk, struct pblk_line *line); 721void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
722void pblk_line_close_meta_sync(struct pblk *pblk);
723void pblk_line_close_ws(struct work_struct *work);
724void pblk_pipeline_stop(struct pblk *pblk);
639void pblk_line_mark_bb(struct work_struct *work); 725void pblk_line_mark_bb(struct work_struct *work);
640void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, 726void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
641 void (*work)(struct work_struct *)); 727 void (*work)(struct work_struct *),
728 struct workqueue_struct *wq);
642u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); 729u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
643int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); 730int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
644int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line); 731int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
732 void *emeta_buf);
645int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); 733int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
646void pblk_line_put(struct kref *ref); 734void pblk_line_put(struct kref *ref);
647struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); 735struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
736u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line);
737void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
648u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); 738u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
739u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
649int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail, 740int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
650 unsigned long secs_to_flush); 741 unsigned long secs_to_flush);
651void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, 742void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
@@ -656,11 +747,11 @@ void pblk_end_bio_sync(struct bio *bio);
656void pblk_end_io_sync(struct nvm_rq *rqd); 747void pblk_end_io_sync(struct nvm_rq *rqd);
657int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, 748int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
658 int nr_pages); 749 int nr_pages);
659void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
660 u64 paddr);
661void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, 750void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
662 int nr_pages); 751 int nr_pages);
663void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa); 752void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
753void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
754 u64 paddr);
664void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa); 755void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
665void pblk_update_map_cache(struct pblk *pblk, sector_t lba, 756void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
666 struct ppa_addr ppa); 757 struct ppa_addr ppa);
@@ -702,6 +793,7 @@ void pblk_write_should_kick(struct pblk *pblk);
702/* 793/*
703 * pblk read path 794 * pblk read path
704 */ 795 */
796extern struct bio_set *pblk_bio_set;
705int pblk_submit_read(struct pblk *pblk, struct bio *bio); 797int pblk_submit_read(struct pblk *pblk, struct bio *bio);
706int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, 798int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
707 unsigned int nr_secs, unsigned int *secs_to_gc, 799 unsigned int nr_secs, unsigned int *secs_to_gc,
@@ -711,7 +803,7 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
711 */ 803 */
712void pblk_submit_rec(struct work_struct *work); 804void pblk_submit_rec(struct work_struct *work);
713struct pblk_line *pblk_recov_l2p(struct pblk *pblk); 805struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
714void pblk_recov_pad(struct pblk *pblk); 806int pblk_recov_pad(struct pblk *pblk);
715__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta); 807__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
716int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, 808int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
717 struct pblk_rec_ctx *recovery, u64 *comp_bits, 809 struct pblk_rec_ctx *recovery, u64 *comp_bits,
@@ -720,33 +812,40 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
720/* 812/*
721 * pblk gc 813 * pblk gc
722 */ 814 */
723#define PBLK_GC_TRIES 3 815#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
816#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */
817#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
818#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
724 819
725int pblk_gc_init(struct pblk *pblk); 820int pblk_gc_init(struct pblk *pblk);
726void pblk_gc_exit(struct pblk *pblk); 821void pblk_gc_exit(struct pblk *pblk);
727void pblk_gc_should_start(struct pblk *pblk); 822void pblk_gc_should_start(struct pblk *pblk);
728void pblk_gc_should_stop(struct pblk *pblk); 823void pblk_gc_should_stop(struct pblk *pblk);
729int pblk_gc_status(struct pblk *pblk); 824void pblk_gc_should_kick(struct pblk *pblk);
825void pblk_gc_kick(struct pblk *pblk);
730void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, 826void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
731 int *gc_active); 827 int *gc_active);
732void pblk_gc_sysfs_force(struct pblk *pblk, int force); 828int pblk_gc_sysfs_force(struct pblk *pblk, int force);
733 829
734/* 830/*
735 * pblk rate limiter 831 * pblk rate limiter
736 */ 832 */
737void pblk_rl_init(struct pblk_rl *rl, int budget); 833void pblk_rl_init(struct pblk_rl *rl, int budget);
738void pblk_rl_free(struct pblk_rl *rl); 834void pblk_rl_free(struct pblk_rl *rl);
739int pblk_rl_gc_thrs(struct pblk_rl *rl); 835int pblk_rl_high_thrs(struct pblk_rl *rl);
836int pblk_rl_low_thrs(struct pblk_rl *rl);
740unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); 837unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
741int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); 838int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
839void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries);
742void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); 840void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
743int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); 841int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
744void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); 842void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
745void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); 843void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
746void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
747int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); 844int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
748void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); 845void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
749void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); 846void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
847void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left);
848int pblk_rl_is_limit(struct pblk_rl *rl);
750 849
751/* 850/*
752 * pblk sysfs 851 * pblk sysfs
@@ -774,9 +873,30 @@ static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
774 return c_ctx - sizeof(struct nvm_rq); 873 return c_ctx - sizeof(struct nvm_rq);
775} 874}
776 875
777static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta) 876static inline void *emeta_to_bb(struct line_emeta *emeta)
877{
878 return emeta->bb_bitmap;
879}
880
881static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
882{
883 return ((void *)emeta + pblk->lm.emeta_len[1]);
884}
885
886static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
778{ 887{
779 return (emeta) + 1; 888 return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
889}
890
891static inline int pblk_line_vsc(struct pblk_line *line)
892{
893 int vsc;
894
895 spin_lock(&line->lock);
896 vsc = le32_to_cpu(*line->vsc);
897 spin_unlock(&line->lock);
898
899 return vsc;
780} 900}
781 901
782#define NVM_MEM_PAGE_WRITE (8) 902#define NVM_MEM_PAGE_WRITE (8)
@@ -917,6 +1037,14 @@ static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
917 ppa_addr->ppa = ADDR_EMPTY; 1037 ppa_addr->ppa = ADDR_EMPTY;
918} 1038}
919 1039
1040static inline bool pblk_ppa_comp(struct ppa_addr lppa, struct ppa_addr rppa)
1041{
1042 if (lppa.ppa == rppa.ppa)
1043 return true;
1044
1045 return false;
1046}
1047
920static inline int pblk_addr_in_cache(struct ppa_addr ppa) 1048static inline int pblk_addr_in_cache(struct ppa_addr ppa)
921{ 1049{
922 return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached); 1050 return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
@@ -964,11 +1092,11 @@ static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
964} 1092}
965 1093
966static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk, 1094static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
967 struct line_smeta *smeta) 1095 struct line_header *header)
968{ 1096{
969 u32 crc = ~(u32)0; 1097 u32 crc = ~(u32)0;
970 1098
971 crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc), 1099 crc = crc32_le(crc, (unsigned char *)header + sizeof(crc),
972 sizeof(struct line_header) - sizeof(crc)); 1100 sizeof(struct line_header) - sizeof(crc));
973 1101
974 return crc; 1102 return crc;
@@ -996,7 +1124,7 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
996 1124
997 crc = crc32_le(crc, (unsigned char *)emeta + 1125 crc = crc32_le(crc, (unsigned char *)emeta +
998 sizeof(struct line_header) + sizeof(crc), 1126 sizeof(struct line_header) + sizeof(crc),
999 lm->emeta_len - 1127 lm->emeta_len[0] -
1000 sizeof(struct line_header) - sizeof(crc)); 1128 sizeof(struct line_header) - sizeof(crc));
1001 1129
1002 return crc; 1130 return crc;
@@ -1016,9 +1144,27 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
1016 return flags; 1144 return flags;
1017} 1145}
1018 1146
1019static inline int pblk_set_read_mode(struct pblk *pblk) 1147enum {
1148 PBLK_READ_RANDOM = 0,
1149 PBLK_READ_SEQUENTIAL = 1,
1150};
1151
1152static inline int pblk_set_read_mode(struct pblk *pblk, int type)
1153{
1154 struct nvm_tgt_dev *dev = pblk->dev;
1155 struct nvm_geo *geo = &dev->geo;
1156 int flags;
1157
1158 flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
1159 if (type == PBLK_READ_SEQUENTIAL)
1160 flags |= geo->plane_mode >> 1;
1161
1162 return flags;
1163}
1164
1165static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
1020{ 1166{
1021 return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; 1167 return !(nr_secs % pblk->min_write_pgs);
1022} 1168}
1023 1169
1024#ifdef CONFIG_NVM_DEBUG 1170#ifdef CONFIG_NVM_DEBUG
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf0e28a0ff61..267f01ae87e4 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -279,8 +279,8 @@ static void rrpc_end_sync_bio(struct bio *bio)
279{ 279{
280 struct completion *waiting = bio->bi_private; 280 struct completion *waiting = bio->bi_private;
281 281
282 if (bio->bi_error) 282 if (bio->bi_status)
283 pr_err("nvm: gc request failed (%u).\n", bio->bi_error); 283 pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
284 284
285 complete(waiting); 285 complete(waiting);
286} 286}
@@ -359,7 +359,7 @@ try:
359 goto finished; 359 goto finished;
360 } 360 }
361 wait_for_completion_io(&wait); 361 wait_for_completion_io(&wait);
362 if (bio->bi_error) { 362 if (bio->bi_status) {
363 rrpc_inflight_laddr_release(rrpc, rqd); 363 rrpc_inflight_laddr_release(rrpc, rqd);
364 goto finished; 364 goto finished;
365 } 365 }
@@ -385,7 +385,7 @@ try:
385 wait_for_completion_io(&wait); 385 wait_for_completion_io(&wait);
386 386
387 rrpc_inflight_laddr_release(rrpc, rqd); 387 rrpc_inflight_laddr_release(rrpc, rqd);
388 if (bio->bi_error) 388 if (bio->bi_status)
389 goto finished; 389 goto finished;
390 390
391 bio_reset(bio); 391 bio_reset(bio);
@@ -994,7 +994,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
994 struct nvm_rq *rqd; 994 struct nvm_rq *rqd;
995 int err; 995 int err;
996 996
997 blk_queue_split(q, &bio, q->bio_split); 997 blk_queue_split(q, &bio);
998 998
999 if (bio_op(bio) == REQ_OP_DISCARD) { 999 if (bio_op(bio) == REQ_OP_DISCARD) {
1000 rrpc_discard(rrpc, bio); 1000 rrpc_discard(rrpc, bio);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index c3ea03c9a1a8..dee542fff68e 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -849,10 +849,11 @@ static inline void wake_up_allocators(struct cache_set *c)
849 849
850/* Forward declarations */ 850/* Forward declarations */
851 851
852void bch_count_io_errors(struct cache *, int, const char *); 852void bch_count_io_errors(struct cache *, blk_status_t, const char *);
853void bch_bbio_count_io_errors(struct cache_set *, struct bio *, 853void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
854 int, const char *); 854 blk_status_t, const char *);
855void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); 855void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
856 const char *);
856void bch_bbio_free(struct bio *, struct cache_set *); 857void bch_bbio_free(struct bio *, struct cache_set *);
857struct bio *bch_bbio_alloc(struct cache_set *); 858struct bio *bch_bbio_alloc(struct cache_set *);
858 859
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 450d0e848ae4..866dcf78ff8e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -307,7 +307,7 @@ static void bch_btree_node_read(struct btree *b)
307 bch_submit_bbio(bio, b->c, &b->key, 0); 307 bch_submit_bbio(bio, b->c, &b->key, 0);
308 closure_sync(&cl); 308 closure_sync(&cl);
309 309
310 if (bio->bi_error) 310 if (bio->bi_status)
311 set_btree_node_io_error(b); 311 set_btree_node_io_error(b);
312 312
313 bch_bbio_free(bio, b->c); 313 bch_bbio_free(bio, b->c);
@@ -374,10 +374,10 @@ static void btree_node_write_endio(struct bio *bio)
374 struct closure *cl = bio->bi_private; 374 struct closure *cl = bio->bi_private;
375 struct btree *b = container_of(cl, struct btree, io); 375 struct btree *b = container_of(cl, struct btree, io);
376 376
377 if (bio->bi_error) 377 if (bio->bi_status)
378 set_btree_node_io_error(b); 378 set_btree_node_io_error(b);
379 379
380 bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree"); 380 bch_bbio_count_io_errors(b->c, bio, bio->bi_status, "writing btree");
381 closure_put(cl); 381 closure_put(cl);
382} 382}
383 383
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 06f55056aaae..35a5a7210e51 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -110,7 +110,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
110 struct bio_vec bv, cbv; 110 struct bio_vec bv, cbv;
111 struct bvec_iter iter, citer = { 0 }; 111 struct bvec_iter iter, citer = { 0 };
112 112
113 check = bio_clone(bio, GFP_NOIO); 113 check = bio_clone_kmalloc(bio, GFP_NOIO);
114 if (!check) 114 if (!check)
115 return; 115 return;
116 check->bi_opf = REQ_OP_READ; 116 check->bi_opf = REQ_OP_READ;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index db45a88c0ce9..6a9b85095e7b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -50,7 +50,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
50 50
51/* IO errors */ 51/* IO errors */
52 52
53void bch_count_io_errors(struct cache *ca, int error, const char *m) 53void bch_count_io_errors(struct cache *ca, blk_status_t error, const char *m)
54{ 54{
55 /* 55 /*
56 * The halflife of an error is: 56 * The halflife of an error is:
@@ -103,7 +103,7 @@ void bch_count_io_errors(struct cache *ca, int error, const char *m)
103} 103}
104 104
105void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio, 105void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
106 int error, const char *m) 106 blk_status_t error, const char *m)
107{ 107{
108 struct bbio *b = container_of(bio, struct bbio, bio); 108 struct bbio *b = container_of(bio, struct bbio, bio);
109 struct cache *ca = PTR_CACHE(c, &b->key, 0); 109 struct cache *ca = PTR_CACHE(c, &b->key, 0);
@@ -132,7 +132,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
132} 132}
133 133
134void bch_bbio_endio(struct cache_set *c, struct bio *bio, 134void bch_bbio_endio(struct cache_set *c, struct bio *bio,
135 int error, const char *m) 135 blk_status_t error, const char *m)
136{ 136{
137 struct closure *cl = bio->bi_private; 137 struct closure *cl = bio->bi_private;
138 138
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1198e53d5670..0352d05e495c 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -549,7 +549,7 @@ static void journal_write_endio(struct bio *bio)
549{ 549{
550 struct journal_write *w = bio->bi_private; 550 struct journal_write *w = bio->bi_private;
551 551
552 cache_set_err_on(bio->bi_error, w->c, "journal io error"); 552 cache_set_err_on(bio->bi_status, w->c, "journal io error");
553 closure_put(&w->c->journal.io); 553 closure_put(&w->c->journal.io);
554} 554}
555 555
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 13b8a907006d..f633b30c962e 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -63,14 +63,14 @@ static void read_moving_endio(struct bio *bio)
63 struct moving_io *io = container_of(bio->bi_private, 63 struct moving_io *io = container_of(bio->bi_private,
64 struct moving_io, cl); 64 struct moving_io, cl);
65 65
66 if (bio->bi_error) 66 if (bio->bi_status)
67 io->op.error = bio->bi_error; 67 io->op.status = bio->bi_status;
68 else if (!KEY_DIRTY(&b->key) && 68 else if (!KEY_DIRTY(&b->key) &&
69 ptr_stale(io->op.c, &b->key, 0)) { 69 ptr_stale(io->op.c, &b->key, 0)) {
70 io->op.error = -EINTR; 70 io->op.status = BLK_STS_IOERR;
71 } 71 }
72 72
73 bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move"); 73 bch_bbio_endio(io->op.c, bio, bio->bi_status, "reading data to move");
74} 74}
75 75
76static void moving_init(struct moving_io *io) 76static void moving_init(struct moving_io *io)
@@ -92,7 +92,7 @@ static void write_moving(struct closure *cl)
92 struct moving_io *io = container_of(cl, struct moving_io, cl); 92 struct moving_io *io = container_of(cl, struct moving_io, cl);
93 struct data_insert_op *op = &io->op; 93 struct data_insert_op *op = &io->op;
94 94
95 if (!op->error) { 95 if (!op->status) {
96 moving_init(io); 96 moving_init(io);
97 97
98 io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key); 98 io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 709c9cc34369..019b3df9f1c6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -81,7 +81,7 @@ static void bch_data_insert_keys(struct closure *cl)
81 if (ret == -ESRCH) { 81 if (ret == -ESRCH) {
82 op->replace_collision = true; 82 op->replace_collision = true;
83 } else if (ret) { 83 } else if (ret) {
84 op->error = -ENOMEM; 84 op->status = BLK_STS_RESOURCE;
85 op->insert_data_done = true; 85 op->insert_data_done = true;
86 } 86 }
87 87
@@ -178,17 +178,17 @@ static void bch_data_insert_endio(struct bio *bio)
178 struct closure *cl = bio->bi_private; 178 struct closure *cl = bio->bi_private;
179 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 179 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
180 180
181 if (bio->bi_error) { 181 if (bio->bi_status) {
182 /* TODO: We could try to recover from this. */ 182 /* TODO: We could try to recover from this. */
183 if (op->writeback) 183 if (op->writeback)
184 op->error = bio->bi_error; 184 op->status = bio->bi_status;
185 else if (!op->replace) 185 else if (!op->replace)
186 set_closure_fn(cl, bch_data_insert_error, op->wq); 186 set_closure_fn(cl, bch_data_insert_error, op->wq);
187 else 187 else
188 set_closure_fn(cl, NULL, NULL); 188 set_closure_fn(cl, NULL, NULL);
189 } 189 }
190 190
191 bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache"); 191 bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
192} 192}
193 193
194static void bch_data_insert_start(struct closure *cl) 194static void bch_data_insert_start(struct closure *cl)
@@ -488,15 +488,15 @@ static void bch_cache_read_endio(struct bio *bio)
488 * from the backing device. 488 * from the backing device.
489 */ 489 */
490 490
491 if (bio->bi_error) 491 if (bio->bi_status)
492 s->iop.error = bio->bi_error; 492 s->iop.status = bio->bi_status;
493 else if (!KEY_DIRTY(&b->key) && 493 else if (!KEY_DIRTY(&b->key) &&
494 ptr_stale(s->iop.c, &b->key, 0)) { 494 ptr_stale(s->iop.c, &b->key, 0)) {
495 atomic_long_inc(&s->iop.c->cache_read_races); 495 atomic_long_inc(&s->iop.c->cache_read_races);
496 s->iop.error = -EINTR; 496 s->iop.status = BLK_STS_IOERR;
497 } 497 }
498 498
499 bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache"); 499 bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
500} 500}
501 501
502/* 502/*
@@ -593,9 +593,9 @@ static void request_endio(struct bio *bio)
593{ 593{
594 struct closure *cl = bio->bi_private; 594 struct closure *cl = bio->bi_private;
595 595
596 if (bio->bi_error) { 596 if (bio->bi_status) {
597 struct search *s = container_of(cl, struct search, cl); 597 struct search *s = container_of(cl, struct search, cl);
598 s->iop.error = bio->bi_error; 598 s->iop.status = bio->bi_status;
599 /* Only cache read errors are recoverable */ 599 /* Only cache read errors are recoverable */
600 s->recoverable = false; 600 s->recoverable = false;
601 } 601 }
@@ -611,7 +611,7 @@ static void bio_complete(struct search *s)
611 &s->d->disk->part0, s->start_time); 611 &s->d->disk->part0, s->start_time);
612 612
613 trace_bcache_request_end(s->d, s->orig_bio); 613 trace_bcache_request_end(s->d, s->orig_bio);
614 s->orig_bio->bi_error = s->iop.error; 614 s->orig_bio->bi_status = s->iop.status;
615 bio_endio(s->orig_bio); 615 bio_endio(s->orig_bio);
616 s->orig_bio = NULL; 616 s->orig_bio = NULL;
617 } 617 }
@@ -664,7 +664,7 @@ static inline struct search *search_alloc(struct bio *bio,
664 s->iop.inode = d->id; 664 s->iop.inode = d->id;
665 s->iop.write_point = hash_long((unsigned long) current, 16); 665 s->iop.write_point = hash_long((unsigned long) current, 16);
666 s->iop.write_prio = 0; 666 s->iop.write_prio = 0;
667 s->iop.error = 0; 667 s->iop.status = 0;
668 s->iop.flags = 0; 668 s->iop.flags = 0;
669 s->iop.flush_journal = op_is_flush(bio->bi_opf); 669 s->iop.flush_journal = op_is_flush(bio->bi_opf);
670 s->iop.wq = bcache_wq; 670 s->iop.wq = bcache_wq;
@@ -707,7 +707,7 @@ static void cached_dev_read_error(struct closure *cl)
707 /* Retry from the backing device: */ 707 /* Retry from the backing device: */
708 trace_bcache_read_retry(s->orig_bio); 708 trace_bcache_read_retry(s->orig_bio);
709 709
710 s->iop.error = 0; 710 s->iop.status = 0;
711 do_bio_hook(s, s->orig_bio); 711 do_bio_hook(s, s->orig_bio);
712 712
713 /* XXX: invalidate cache */ 713 /* XXX: invalidate cache */
@@ -767,7 +767,7 @@ static void cached_dev_read_done_bh(struct closure *cl)
767 !s->cache_miss, s->iop.bypass); 767 !s->cache_miss, s->iop.bypass);
768 trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); 768 trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass);
769 769
770 if (s->iop.error) 770 if (s->iop.status)
771 continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); 771 continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
772 else if (s->iop.bio || verify(dc, &s->bio.bio)) 772 else if (s->iop.bio || verify(dc, &s->bio.bio))
773 continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq); 773 continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 1ff36875c2b3..7689176951ce 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -10,7 +10,7 @@ struct data_insert_op {
10 unsigned inode; 10 unsigned inode;
11 uint16_t write_point; 11 uint16_t write_point;
12 uint16_t write_prio; 12 uint16_t write_prio;
13 short error; 13 blk_status_t status;
14 14
15 union { 15 union {
16 uint16_t flags; 16 uint16_t flags;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e57353e39168..8352fad765f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -271,7 +271,7 @@ static void write_super_endio(struct bio *bio)
271{ 271{
272 struct cache *ca = bio->bi_private; 272 struct cache *ca = bio->bi_private;
273 273
274 bch_count_io_errors(ca, bio->bi_error, "writing superblock"); 274 bch_count_io_errors(ca, bio->bi_status, "writing superblock");
275 closure_put(&ca->set->sb_write); 275 closure_put(&ca->set->sb_write);
276} 276}
277 277
@@ -321,7 +321,7 @@ static void uuid_endio(struct bio *bio)
321 struct closure *cl = bio->bi_private; 321 struct closure *cl = bio->bi_private;
322 struct cache_set *c = container_of(cl, struct cache_set, uuid_write); 322 struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
323 323
324 cache_set_err_on(bio->bi_error, c, "accessing uuids"); 324 cache_set_err_on(bio->bi_status, c, "accessing uuids");
325 bch_bbio_free(bio, c); 325 bch_bbio_free(bio, c);
326 closure_put(cl); 326 closure_put(cl);
327} 327}
@@ -494,7 +494,7 @@ static void prio_endio(struct bio *bio)
494{ 494{
495 struct cache *ca = bio->bi_private; 495 struct cache *ca = bio->bi_private;
496 496
497 cache_set_err_on(bio->bi_error, ca->set, "accessing priorities"); 497 cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
498 bch_bbio_free(bio, ca->set); 498 bch_bbio_free(bio, ca->set);
499 closure_put(&ca->prio); 499 closure_put(&ca->prio);
500} 500}
@@ -782,7 +782,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
782 782
783 minor *= BCACHE_MINORS; 783 minor *= BCACHE_MINORS;
784 784
785 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 785 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
786 BIOSET_NEED_BVECS |
787 BIOSET_NEED_RESCUER)) ||
786 !(d->disk = alloc_disk(BCACHE_MINORS))) { 788 !(d->disk = alloc_disk(BCACHE_MINORS))) {
787 ida_simple_remove(&bcache_minor, minor); 789 ida_simple_remove(&bcache_minor, minor);
788 return -ENOMEM; 790 return -ENOMEM;
@@ -1516,7 +1518,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1516 sizeof(struct bbio) + sizeof(struct bio_vec) * 1518 sizeof(struct bbio) + sizeof(struct bio_vec) *
1517 bucket_pages(c))) || 1519 bucket_pages(c))) ||
1518 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) || 1520 !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
1519 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 1521 !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
1522 BIOSET_NEED_BVECS |
1523 BIOSET_NEED_RESCUER)) ||
1520 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) || 1524 !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
1521 !(c->moving_gc_wq = alloc_workqueue("bcache_gc", 1525 !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
1522 WQ_MEM_RECLAIM, 0)) || 1526 WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6ac2e48b9235..42c66e76f05e 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -167,7 +167,7 @@ static void dirty_endio(struct bio *bio)
167 struct keybuf_key *w = bio->bi_private; 167 struct keybuf_key *w = bio->bi_private;
168 struct dirty_io *io = w->private; 168 struct dirty_io *io = w->private;
169 169
170 if (bio->bi_error) 170 if (bio->bi_status)
171 SET_KEY_DIRTY(&w->key, false); 171 SET_KEY_DIRTY(&w->key, false);
172 172
173 closure_put(&io->cl); 173 closure_put(&io->cl);
@@ -195,7 +195,7 @@ static void read_dirty_endio(struct bio *bio)
195 struct dirty_io *io = w->private; 195 struct dirty_io *io = w->private;
196 196
197 bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), 197 bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0),
198 bio->bi_error, "reading dirty data from cache"); 198 bio->bi_status, "reading dirty data from cache");
199 199
200 dirty_endio(bio); 200 dirty_endio(bio);
201} 201}
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index ae7da2c30a57..82d27384d31f 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -229,7 +229,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
229EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); 229EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
230 230
231void dm_cell_error(struct dm_bio_prison *prison, 231void dm_cell_error(struct dm_bio_prison *prison,
232 struct dm_bio_prison_cell *cell, int error) 232 struct dm_bio_prison_cell *cell, blk_status_t error)
233{ 233{
234 struct bio_list bios; 234 struct bio_list bios;
235 struct bio *bio; 235 struct bio *bio;
@@ -238,7 +238,7 @@ void dm_cell_error(struct dm_bio_prison *prison,
238 dm_cell_release(prison, cell, &bios); 238 dm_cell_release(prison, cell, &bios);
239 239
240 while ((bio = bio_list_pop(&bios))) { 240 while ((bio = bio_list_pop(&bios))) {
241 bio->bi_error = error; 241 bio->bi_status = error;
242 bio_endio(bio); 242 bio_endio(bio);
243 } 243 }
244} 244}
diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h
index cddd4ac07e2c..cec52ac5e1ae 100644
--- a/drivers/md/dm-bio-prison-v1.h
+++ b/drivers/md/dm-bio-prison-v1.h
@@ -91,7 +91,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison,
91 struct dm_bio_prison_cell *cell, 91 struct dm_bio_prison_cell *cell,
92 struct bio_list *inmates); 92 struct bio_list *inmates);
93void dm_cell_error(struct dm_bio_prison *prison, 93void dm_cell_error(struct dm_bio_prison *prison,
94 struct dm_bio_prison_cell *cell, int error); 94 struct dm_bio_prison_cell *cell, blk_status_t error);
95 95
96/* 96/*
97 * Visits the cell and then releases. Guarantees no new inmates are 97 * Visits the cell and then releases. Guarantees no new inmates are
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 840c1496b2b1..850ff6c67994 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -145,8 +145,8 @@ struct dm_buffer {
145 enum data_mode data_mode; 145 enum data_mode data_mode;
146 unsigned char list_mode; /* LIST_* */ 146 unsigned char list_mode; /* LIST_* */
147 unsigned hold_count; 147 unsigned hold_count;
148 int read_error; 148 blk_status_t read_error;
149 int write_error; 149 blk_status_t write_error;
150 unsigned long state; 150 unsigned long state;
151 unsigned long last_accessed; 151 unsigned long last_accessed;
152 struct dm_bufio_client *c; 152 struct dm_bufio_client *c;
@@ -555,7 +555,7 @@ static void dmio_complete(unsigned long error, void *context)
555{ 555{
556 struct dm_buffer *b = context; 556 struct dm_buffer *b = context;
557 557
558 b->bio.bi_error = error ? -EIO : 0; 558 b->bio.bi_status = error ? BLK_STS_IOERR : 0;
559 b->bio.bi_end_io(&b->bio); 559 b->bio.bi_end_io(&b->bio);
560} 560}
561 561
@@ -588,7 +588,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
588 588
589 r = dm_io(&io_req, 1, &region, NULL); 589 r = dm_io(&io_req, 1, &region, NULL);
590 if (r) { 590 if (r) {
591 b->bio.bi_error = r; 591 b->bio.bi_status = errno_to_blk_status(r);
592 end_io(&b->bio); 592 end_io(&b->bio);
593 } 593 }
594} 594}
@@ -596,7 +596,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
596static void inline_endio(struct bio *bio) 596static void inline_endio(struct bio *bio)
597{ 597{
598 bio_end_io_t *end_fn = bio->bi_private; 598 bio_end_io_t *end_fn = bio->bi_private;
599 int error = bio->bi_error; 599 blk_status_t status = bio->bi_status;
600 600
601 /* 601 /*
602 * Reset the bio to free any attached resources 602 * Reset the bio to free any attached resources
@@ -604,7 +604,7 @@ static void inline_endio(struct bio *bio)
604 */ 604 */
605 bio_reset(bio); 605 bio_reset(bio);
606 606
607 bio->bi_error = error; 607 bio->bi_status = status;
608 end_fn(bio); 608 end_fn(bio);
609} 609}
610 610
@@ -685,11 +685,12 @@ static void write_endio(struct bio *bio)
685{ 685{
686 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); 686 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
687 687
688 b->write_error = bio->bi_error; 688 b->write_error = bio->bi_status;
689 if (unlikely(bio->bi_error)) { 689 if (unlikely(bio->bi_status)) {
690 struct dm_bufio_client *c = b->c; 690 struct dm_bufio_client *c = b->c;
691 int error = bio->bi_error; 691
692 (void)cmpxchg(&c->async_write_error, 0, error); 692 (void)cmpxchg(&c->async_write_error, 0,
693 blk_status_to_errno(bio->bi_status));
693 } 694 }
694 695
695 BUG_ON(!test_bit(B_WRITING, &b->state)); 696 BUG_ON(!test_bit(B_WRITING, &b->state));
@@ -1063,7 +1064,7 @@ static void read_endio(struct bio *bio)
1063{ 1064{
1064 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); 1065 struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
1065 1066
1066 b->read_error = bio->bi_error; 1067 b->read_error = bio->bi_status;
1067 1068
1068 BUG_ON(!test_bit(B_READING, &b->state)); 1069 BUG_ON(!test_bit(B_READING, &b->state));
1069 1070
@@ -1107,7 +1108,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
1107 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); 1108 wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
1108 1109
1109 if (b->read_error) { 1110 if (b->read_error) {
1110 int error = b->read_error; 1111 int error = blk_status_to_errno(b->read_error);
1111 1112
1112 dm_bufio_release(b); 1113 dm_bufio_release(b);
1113 1114
@@ -1257,7 +1258,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers_async);
1257 */ 1258 */
1258int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c) 1259int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c)
1259{ 1260{
1260 int a, f; 1261 blk_status_t a;
1262 int f;
1261 unsigned long buffers_processed = 0; 1263 unsigned long buffers_processed = 0;
1262 struct dm_buffer *b, *tmp; 1264 struct dm_buffer *b, *tmp;
1263 1265
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d682a0511381..c5ea03fc7ee1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -119,7 +119,7 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
119 */ 119 */
120struct continuation { 120struct continuation {
121 struct work_struct ws; 121 struct work_struct ws;
122 int input; 122 blk_status_t input;
123}; 123};
124 124
125static inline void init_continuation(struct continuation *k, 125static inline void init_continuation(struct continuation *k,
@@ -145,7 +145,7 @@ struct batcher {
145 /* 145 /*
146 * The operation that everyone is waiting for. 146 * The operation that everyone is waiting for.
147 */ 147 */
148 int (*commit_op)(void *context); 148 blk_status_t (*commit_op)(void *context);
149 void *commit_context; 149 void *commit_context;
150 150
151 /* 151 /*
@@ -171,8 +171,7 @@ struct batcher {
171static void __commit(struct work_struct *_ws) 171static void __commit(struct work_struct *_ws)
172{ 172{
173 struct batcher *b = container_of(_ws, struct batcher, commit_work); 173 struct batcher *b = container_of(_ws, struct batcher, commit_work);
174 174 blk_status_t r;
175 int r;
176 unsigned long flags; 175 unsigned long flags;
177 struct list_head work_items; 176 struct list_head work_items;
178 struct work_struct *ws, *tmp; 177 struct work_struct *ws, *tmp;
@@ -205,7 +204,7 @@ static void __commit(struct work_struct *_ws)
205 204
206 while ((bio = bio_list_pop(&bios))) { 205 while ((bio = bio_list_pop(&bios))) {
207 if (r) { 206 if (r) {
208 bio->bi_error = r; 207 bio->bi_status = r;
209 bio_endio(bio); 208 bio_endio(bio);
210 } else 209 } else
211 b->issue_op(bio, b->issue_context); 210 b->issue_op(bio, b->issue_context);
@@ -213,7 +212,7 @@ static void __commit(struct work_struct *_ws)
213} 212}
214 213
215static void batcher_init(struct batcher *b, 214static void batcher_init(struct batcher *b,
216 int (*commit_op)(void *), 215 blk_status_t (*commit_op)(void *),
217 void *commit_context, 216 void *commit_context,
218 void (*issue_op)(struct bio *bio, void *), 217 void (*issue_op)(struct bio *bio, void *),
219 void *issue_context, 218 void *issue_context,
@@ -955,7 +954,7 @@ static void writethrough_endio(struct bio *bio)
955 954
956 dm_unhook_bio(&pb->hook_info, bio); 955 dm_unhook_bio(&pb->hook_info, bio);
957 956
958 if (bio->bi_error) { 957 if (bio->bi_status) {
959 bio_endio(bio); 958 bio_endio(bio);
960 return; 959 return;
961 } 960 }
@@ -1220,7 +1219,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
1220 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k); 1219 struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1221 1220
1222 if (read_err || write_err) 1221 if (read_err || write_err)
1223 mg->k.input = -EIO; 1222 mg->k.input = BLK_STS_IOERR;
1224 1223
1225 queue_continuation(mg->cache->wq, &mg->k); 1224 queue_continuation(mg->cache->wq, &mg->k);
1226} 1225}
@@ -1266,8 +1265,8 @@ static void overwrite_endio(struct bio *bio)
1266 1265
1267 dm_unhook_bio(&pb->hook_info, bio); 1266 dm_unhook_bio(&pb->hook_info, bio);
1268 1267
1269 if (bio->bi_error) 1268 if (bio->bi_status)
1270 mg->k.input = bio->bi_error; 1269 mg->k.input = bio->bi_status;
1271 1270
1272 queue_continuation(mg->cache->wq, &mg->k); 1271 queue_continuation(mg->cache->wq, &mg->k);
1273} 1272}
@@ -1323,8 +1322,10 @@ static void mg_complete(struct dm_cache_migration *mg, bool success)
1323 if (mg->overwrite_bio) { 1322 if (mg->overwrite_bio) {
1324 if (success) 1323 if (success)
1325 force_set_dirty(cache, cblock); 1324 force_set_dirty(cache, cblock);
1325 else if (mg->k.input)
1326 mg->overwrite_bio->bi_status = mg->k.input;
1326 else 1327 else
1327 mg->overwrite_bio->bi_error = (mg->k.input ? : -EIO); 1328 mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1328 bio_endio(mg->overwrite_bio); 1329 bio_endio(mg->overwrite_bio);
1329 } else { 1330 } else {
1330 if (success) 1331 if (success)
@@ -1504,7 +1505,7 @@ static void mg_copy(struct work_struct *ws)
1504 r = copy(mg, is_policy_promote); 1505 r = copy(mg, is_policy_promote);
1505 if (r) { 1506 if (r) {
1506 DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); 1507 DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
1507 mg->k.input = -EIO; 1508 mg->k.input = BLK_STS_IOERR;
1508 mg_complete(mg, false); 1509 mg_complete(mg, false);
1509 } 1510 }
1510 } 1511 }
@@ -1907,12 +1908,12 @@ static int commit(struct cache *cache, bool clean_shutdown)
1907/* 1908/*
1908 * Used by the batcher. 1909 * Used by the batcher.
1909 */ 1910 */
1910static int commit_op(void *context) 1911static blk_status_t commit_op(void *context)
1911{ 1912{
1912 struct cache *cache = context; 1913 struct cache *cache = context;
1913 1914
1914 if (dm_cache_changed_this_transaction(cache->cmd)) 1915 if (dm_cache_changed_this_transaction(cache->cmd))
1915 return commit(cache, false); 1916 return errno_to_blk_status(commit(cache, false));
1916 1917
1917 return 0; 1918 return 0;
1918} 1919}
@@ -2018,7 +2019,7 @@ static void requeue_deferred_bios(struct cache *cache)
2018 bio_list_init(&cache->deferred_bios); 2019 bio_list_init(&cache->deferred_bios);
2019 2020
2020 while ((bio = bio_list_pop(&bios))) { 2021 while ((bio = bio_list_pop(&bios))) {
2021 bio->bi_error = DM_ENDIO_REQUEUE; 2022 bio->bi_status = BLK_STS_DM_REQUEUE;
2022 bio_endio(bio); 2023 bio_endio(bio);
2023 } 2024 }
2024} 2025}
@@ -2820,7 +2821,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2820 return r; 2821 return r;
2821} 2822}
2822 2823
2823static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2824static int cache_end_io(struct dm_target *ti, struct bio *bio,
2825 blk_status_t *error)
2824{ 2826{
2825 struct cache *cache = ti->private; 2827 struct cache *cache = ti->private;
2826 unsigned long flags; 2828 unsigned long flags;
@@ -2838,7 +2840,7 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2838 bio_drop_shared_lock(cache, bio); 2840 bio_drop_shared_lock(cache, bio);
2839 accounted_complete(cache, bio); 2841 accounted_complete(cache, bio);
2840 2842
2841 return 0; 2843 return DM_ENDIO_DONE;
2842} 2844}
2843 2845
2844static int write_dirty_bitset(struct cache *cache) 2846static int write_dirty_bitset(struct cache *cache)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ebf9e72d479b..9e1b72e8f7ef 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -71,7 +71,7 @@ struct dm_crypt_io {
71 struct convert_context ctx; 71 struct convert_context ctx;
72 72
73 atomic_t io_pending; 73 atomic_t io_pending;
74 int error; 74 blk_status_t error;
75 sector_t sector; 75 sector_t sector;
76 76
77 struct rb_node rb_node; 77 struct rb_node rb_node;
@@ -1292,7 +1292,7 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
1292/* 1292/*
1293 * Encrypt / decrypt data from one bio to another one (can be the same one) 1293 * Encrypt / decrypt data from one bio to another one (can be the same one)
1294 */ 1294 */
1295static int crypt_convert(struct crypt_config *cc, 1295static blk_status_t crypt_convert(struct crypt_config *cc,
1296 struct convert_context *ctx) 1296 struct convert_context *ctx)
1297{ 1297{
1298 unsigned int tag_offset = 0; 1298 unsigned int tag_offset = 0;
@@ -1343,13 +1343,13 @@ static int crypt_convert(struct crypt_config *cc,
1343 */ 1343 */
1344 case -EBADMSG: 1344 case -EBADMSG:
1345 atomic_dec(&ctx->cc_pending); 1345 atomic_dec(&ctx->cc_pending);
1346 return -EILSEQ; 1346 return BLK_STS_PROTECTION;
1347 /* 1347 /*
1348 * There was an error while processing the request. 1348 * There was an error while processing the request.
1349 */ 1349 */
1350 default: 1350 default:
1351 atomic_dec(&ctx->cc_pending); 1351 atomic_dec(&ctx->cc_pending);
1352 return -EIO; 1352 return BLK_STS_IOERR;
1353 } 1353 }
1354 } 1354 }
1355 1355
@@ -1463,7 +1463,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1463{ 1463{
1464 struct crypt_config *cc = io->cc; 1464 struct crypt_config *cc = io->cc;
1465 struct bio *base_bio = io->base_bio; 1465 struct bio *base_bio = io->base_bio;
1466 int error = io->error; 1466 blk_status_t error = io->error;
1467 1467
1468 if (!atomic_dec_and_test(&io->io_pending)) 1468 if (!atomic_dec_and_test(&io->io_pending))
1469 return; 1469 return;
@@ -1476,7 +1476,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1476 else 1476 else
1477 kfree(io->integrity_metadata); 1477 kfree(io->integrity_metadata);
1478 1478
1479 base_bio->bi_error = error; 1479 base_bio->bi_status = error;
1480 bio_endio(base_bio); 1480 bio_endio(base_bio);
1481} 1481}
1482 1482
@@ -1502,7 +1502,7 @@ static void crypt_endio(struct bio *clone)
1502 struct dm_crypt_io *io = clone->bi_private; 1502 struct dm_crypt_io *io = clone->bi_private;
1503 struct crypt_config *cc = io->cc; 1503 struct crypt_config *cc = io->cc;
1504 unsigned rw = bio_data_dir(clone); 1504 unsigned rw = bio_data_dir(clone);
1505 int error; 1505 blk_status_t error;
1506 1506
1507 /* 1507 /*
1508 * free the processed pages 1508 * free the processed pages
@@ -1510,7 +1510,7 @@ static void crypt_endio(struct bio *clone)
1510 if (rw == WRITE) 1510 if (rw == WRITE)
1511 crypt_free_buffer_pages(cc, clone); 1511 crypt_free_buffer_pages(cc, clone);
1512 1512
1513 error = clone->bi_error; 1513 error = clone->bi_status;
1514 bio_put(clone); 1514 bio_put(clone);
1515 1515
1516 if (rw == READ && !error) { 1516 if (rw == READ && !error) {
@@ -1570,7 +1570,7 @@ static void kcryptd_io_read_work(struct work_struct *work)
1570 1570
1571 crypt_inc_pending(io); 1571 crypt_inc_pending(io);
1572 if (kcryptd_io_read(io, GFP_NOIO)) 1572 if (kcryptd_io_read(io, GFP_NOIO))
1573 io->error = -ENOMEM; 1573 io->error = BLK_STS_RESOURCE;
1574 crypt_dec_pending(io); 1574 crypt_dec_pending(io);
1575} 1575}
1576 1576
@@ -1656,7 +1656,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
1656 sector_t sector; 1656 sector_t sector;
1657 struct rb_node **rbp, *parent; 1657 struct rb_node **rbp, *parent;
1658 1658
1659 if (unlikely(io->error < 0)) { 1659 if (unlikely(io->error)) {
1660 crypt_free_buffer_pages(cc, clone); 1660 crypt_free_buffer_pages(cc, clone);
1661 bio_put(clone); 1661 bio_put(clone);
1662 crypt_dec_pending(io); 1662 crypt_dec_pending(io);
@@ -1697,7 +1697,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1697 struct bio *clone; 1697 struct bio *clone;
1698 int crypt_finished; 1698 int crypt_finished;
1699 sector_t sector = io->sector; 1699 sector_t sector = io->sector;
1700 int r; 1700 blk_status_t r;
1701 1701
1702 /* 1702 /*
1703 * Prevent io from disappearing until this function completes. 1703 * Prevent io from disappearing until this function completes.
@@ -1707,7 +1707,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1707 1707
1708 clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size); 1708 clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
1709 if (unlikely(!clone)) { 1709 if (unlikely(!clone)) {
1710 io->error = -EIO; 1710 io->error = BLK_STS_IOERR;
1711 goto dec; 1711 goto dec;
1712 } 1712 }
1713 1713
@@ -1718,7 +1718,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1718 1718
1719 crypt_inc_pending(io); 1719 crypt_inc_pending(io);
1720 r = crypt_convert(cc, &io->ctx); 1720 r = crypt_convert(cc, &io->ctx);
1721 if (r < 0) 1721 if (r)
1722 io->error = r; 1722 io->error = r;
1723 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); 1723 crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending);
1724 1724
@@ -1740,7 +1740,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
1740static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) 1740static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
1741{ 1741{
1742 struct crypt_config *cc = io->cc; 1742 struct crypt_config *cc = io->cc;
1743 int r = 0; 1743 blk_status_t r;
1744 1744
1745 crypt_inc_pending(io); 1745 crypt_inc_pending(io);
1746 1746
@@ -1748,7 +1748,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
1748 io->sector); 1748 io->sector);
1749 1749
1750 r = crypt_convert(cc, &io->ctx); 1750 r = crypt_convert(cc, &io->ctx);
1751 if (r < 0) 1751 if (r)
1752 io->error = r; 1752 io->error = r;
1753 1753
1754 if (atomic_dec_and_test(&io->ctx.cc_pending)) 1754 if (atomic_dec_and_test(&io->ctx.cc_pending))
@@ -1781,9 +1781,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
1781 if (error == -EBADMSG) { 1781 if (error == -EBADMSG) {
1782 DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu", 1782 DMERR_LIMIT("INTEGRITY AEAD ERROR, sector %llu",
1783 (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq))); 1783 (unsigned long long)le64_to_cpu(*org_sector_of_dmreq(cc, dmreq)));
1784 io->error = -EILSEQ; 1784 io->error = BLK_STS_PROTECTION;
1785 } else if (error < 0) 1785 } else if (error < 0)
1786 io->error = -EIO; 1786 io->error = BLK_STS_IOERR;
1787 1787
1788 crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); 1788 crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
1789 1789
@@ -2677,7 +2677,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2677 goto bad; 2677 goto bad;
2678 } 2678 }
2679 2679
2680 cc->bs = bioset_create(MIN_IOS, 0); 2680 cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
2681 BIOSET_NEED_RESCUER));
2681 if (!cc->bs) { 2682 if (!cc->bs) {
2682 ti->error = "Cannot allocate crypt bioset"; 2683 ti->error = "Cannot allocate crypt bioset";
2683 goto bad; 2684 goto bad;
@@ -2795,10 +2796,10 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
2795 * and is aligned to this size as defined in IO hints. 2796 * and is aligned to this size as defined in IO hints.
2796 */ 2797 */
2797 if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0)) 2798 if (unlikely((bio->bi_iter.bi_sector & ((cc->sector_size >> SECTOR_SHIFT) - 1)) != 0))
2798 return -EIO; 2799 return DM_MAPIO_KILL;
2799 2800
2800 if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1))) 2801 if (unlikely(bio->bi_iter.bi_size & (cc->sector_size - 1)))
2801 return -EIO; 2802 return DM_MAPIO_KILL;
2802 2803
2803 io = dm_per_bio_data(bio, cc->per_bio_data_size); 2804 io = dm_per_bio_data(bio, cc->per_bio_data_size);
2804 crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); 2805 crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 13305a182611..3d04d5ce19d9 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -321,7 +321,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
321 if (bio_data_dir(bio) == READ) { 321 if (bio_data_dir(bio) == READ) {
322 if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) && 322 if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags) &&
323 !test_bit(ERROR_WRITES, &fc->flags)) 323 !test_bit(ERROR_WRITES, &fc->flags))
324 return -EIO; 324 return DM_MAPIO_KILL;
325 goto map_bio; 325 goto map_bio;
326 } 326 }
327 327
@@ -349,7 +349,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
349 /* 349 /*
350 * By default, error all I/O. 350 * By default, error all I/O.
351 */ 351 */
352 return -EIO; 352 return DM_MAPIO_KILL;
353 } 353 }
354 354
355map_bio: 355map_bio:
@@ -358,12 +358,13 @@ map_bio:
358 return DM_MAPIO_REMAPPED; 358 return DM_MAPIO_REMAPPED;
359} 359}
360 360
361static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) 361static int flakey_end_io(struct dm_target *ti, struct bio *bio,
362 blk_status_t *error)
362{ 363{
363 struct flakey_c *fc = ti->private; 364 struct flakey_c *fc = ti->private;
364 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 365 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
365 366
366 if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { 367 if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
367 if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && 368 if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
368 all_corrupt_bio_flags_match(bio, fc)) { 369 all_corrupt_bio_flags_match(bio, fc)) {
369 /* 370 /*
@@ -377,11 +378,11 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error)
377 * Error read during the down_interval if drop_writes 378 * Error read during the down_interval if drop_writes
378 * and error_writes were not configured. 379 * and error_writes were not configured.
379 */ 380 */
380 return -EIO; 381 *error = BLK_STS_IOERR;
381 } 382 }
382 } 383 }
383 384
384 return error; 385 return DM_ENDIO_DONE;
385} 386}
386 387
387static void flakey_status(struct dm_target *ti, status_type_t type, 388static void flakey_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 93b181088168..1b224aa9cf15 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -246,7 +246,7 @@ struct dm_integrity_io {
246 unsigned metadata_offset; 246 unsigned metadata_offset;
247 247
248 atomic_t in_flight; 248 atomic_t in_flight;
249 int bi_error; 249 blk_status_t bi_status;
250 250
251 struct completion *completion; 251 struct completion *completion;
252 252
@@ -1118,8 +1118,8 @@ static void submit_flush_bio(struct dm_integrity_c *ic, struct dm_integrity_io *
1118static void do_endio(struct dm_integrity_c *ic, struct bio *bio) 1118static void do_endio(struct dm_integrity_c *ic, struct bio *bio)
1119{ 1119{
1120 int r = dm_integrity_failed(ic); 1120 int r = dm_integrity_failed(ic);
1121 if (unlikely(r) && !bio->bi_error) 1121 if (unlikely(r) && !bio->bi_status)
1122 bio->bi_error = r; 1122 bio->bi_status = errno_to_blk_status(r);
1123 bio_endio(bio); 1123 bio_endio(bio);
1124} 1124}
1125 1125
@@ -1127,7 +1127,7 @@ static void do_endio_flush(struct dm_integrity_c *ic, struct dm_integrity_io *di
1127{ 1127{
1128 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1128 struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1129 1129
1130 if (unlikely(dio->fua) && likely(!bio->bi_error) && likely(!dm_integrity_failed(ic))) 1130 if (unlikely(dio->fua) && likely(!bio->bi_status) && likely(!dm_integrity_failed(ic)))
1131 submit_flush_bio(ic, dio); 1131 submit_flush_bio(ic, dio);
1132 else 1132 else
1133 do_endio(ic, bio); 1133 do_endio(ic, bio);
@@ -1146,9 +1146,9 @@ static void dec_in_flight(struct dm_integrity_io *dio)
1146 1146
1147 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io)); 1147 bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
1148 1148
1149 if (unlikely(dio->bi_error) && !bio->bi_error) 1149 if (unlikely(dio->bi_status) && !bio->bi_status)
1150 bio->bi_error = dio->bi_error; 1150 bio->bi_status = dio->bi_status;
1151 if (likely(!bio->bi_error) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) { 1151 if (likely(!bio->bi_status) && unlikely(bio_sectors(bio) != dio->range.n_sectors)) {
1152 dio->range.logical_sector += dio->range.n_sectors; 1152 dio->range.logical_sector += dio->range.n_sectors;
1153 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); 1153 bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT);
1154 INIT_WORK(&dio->work, integrity_bio_wait); 1154 INIT_WORK(&dio->work, integrity_bio_wait);
@@ -1322,7 +1322,7 @@ skip_io:
1322 dec_in_flight(dio); 1322 dec_in_flight(dio);
1323 return; 1323 return;
1324error: 1324error:
1325 dio->bi_error = r; 1325 dio->bi_status = errno_to_blk_status(r);
1326 dec_in_flight(dio); 1326 dec_in_flight(dio);
1327} 1327}
1328 1328
@@ -1335,7 +1335,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1335 sector_t area, offset; 1335 sector_t area, offset;
1336 1336
1337 dio->ic = ic; 1337 dio->ic = ic;
1338 dio->bi_error = 0; 1338 dio->bi_status = 0;
1339 1339
1340 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 1340 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
1341 submit_flush_bio(ic, dio); 1341 submit_flush_bio(ic, dio);
@@ -1356,13 +1356,13 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1356 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx", 1356 DMERR("Too big sector number: 0x%llx + 0x%x > 0x%llx",
1357 (unsigned long long)dio->range.logical_sector, bio_sectors(bio), 1357 (unsigned long long)dio->range.logical_sector, bio_sectors(bio),
1358 (unsigned long long)ic->provided_data_sectors); 1358 (unsigned long long)ic->provided_data_sectors);
1359 return -EIO; 1359 return DM_MAPIO_KILL;
1360 } 1360 }
1361 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) { 1361 if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
1362 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x", 1362 DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
1363 ic->sectors_per_block, 1363 ic->sectors_per_block,
1364 (unsigned long long)dio->range.logical_sector, bio_sectors(bio)); 1364 (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
1365 return -EIO; 1365 return DM_MAPIO_KILL;
1366 } 1366 }
1367 1367
1368 if (ic->sectors_per_block > 1) { 1368 if (ic->sectors_per_block > 1) {
@@ -1372,7 +1372,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1372 if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { 1372 if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
1373 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", 1373 DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
1374 bv.bv_offset, bv.bv_len, ic->sectors_per_block); 1374 bv.bv_offset, bv.bv_len, ic->sectors_per_block);
1375 return -EIO; 1375 return DM_MAPIO_KILL;
1376 } 1376 }
1377 } 1377 }
1378 } 1378 }
@@ -1387,18 +1387,18 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio)
1387 wanted_tag_size *= ic->tag_size; 1387 wanted_tag_size *= ic->tag_size;
1388 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) { 1388 if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
1389 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size); 1389 DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
1390 return -EIO; 1390 return DM_MAPIO_KILL;
1391 } 1391 }
1392 } 1392 }
1393 } else { 1393 } else {
1394 if (unlikely(bip != NULL)) { 1394 if (unlikely(bip != NULL)) {
1395 DMERR("Unexpected integrity data when using internal hash"); 1395 DMERR("Unexpected integrity data when using internal hash");
1396 return -EIO; 1396 return DM_MAPIO_KILL;
1397 } 1397 }
1398 } 1398 }
1399 1399
1400 if (unlikely(ic->mode == 'R') && unlikely(dio->write)) 1400 if (unlikely(ic->mode == 'R') && unlikely(dio->write))
1401 return -EIO; 1401 return DM_MAPIO_KILL;
1402 1402
1403 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset); 1403 get_area_and_offset(ic, dio->range.logical_sector, &area, &offset);
1404 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset); 1404 dio->metadata_block = get_metadata_sector_and_offset(ic, area, offset, &dio->metadata_offset);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 8d5ca30f6551..25039607f3cb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,7 +58,8 @@ struct dm_io_client *dm_io_client_create(void)
58 if (!client->pool) 58 if (!client->pool)
59 goto bad; 59 goto bad;
60 60
61 client->bios = bioset_create(min_ios, 0); 61 client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
62 BIOSET_NEED_RESCUER));
62 if (!client->bios) 63 if (!client->bios)
63 goto bad; 64 goto bad;
64 65
@@ -124,7 +125,7 @@ static void complete_io(struct io *io)
124 fn(error_bits, context); 125 fn(error_bits, context);
125} 126}
126 127
127static void dec_count(struct io *io, unsigned int region, int error) 128static void dec_count(struct io *io, unsigned int region, blk_status_t error)
128{ 129{
129 if (error) 130 if (error)
130 set_bit(region, &io->error_bits); 131 set_bit(region, &io->error_bits);
@@ -137,9 +138,9 @@ static void endio(struct bio *bio)
137{ 138{
138 struct io *io; 139 struct io *io;
139 unsigned region; 140 unsigned region;
140 int error; 141 blk_status_t error;
141 142
142 if (bio->bi_error && bio_data_dir(bio) == READ) 143 if (bio->bi_status && bio_data_dir(bio) == READ)
143 zero_fill_bio(bio); 144 zero_fill_bio(bio);
144 145
145 /* 146 /*
@@ -147,7 +148,7 @@ static void endio(struct bio *bio)
147 */ 148 */
148 retrieve_io_and_region_from_bio(bio, &io, &region); 149 retrieve_io_and_region_from_bio(bio, &io, &region);
149 150
150 error = bio->bi_error; 151 error = bio->bi_status;
151 bio_put(bio); 152 bio_put(bio);
152 153
153 dec_count(io, region, error); 154 dec_count(io, region, error);
@@ -319,7 +320,7 @@ static void do_region(int op, int op_flags, unsigned region,
319 if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || 320 if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
320 op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { 321 op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
321 atomic_inc(&io->count); 322 atomic_inc(&io->count);
322 dec_count(io, region, -EOPNOTSUPP); 323 dec_count(io, region, BLK_STS_NOTSUPP);
323 return; 324 return;
324 } 325 }
325 326
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4dfe38655a49..a1da0eb58a93 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -150,10 +150,10 @@ static void log_end_io(struct bio *bio)
150{ 150{
151 struct log_writes_c *lc = bio->bi_private; 151 struct log_writes_c *lc = bio->bi_private;
152 152
153 if (bio->bi_error) { 153 if (bio->bi_status) {
154 unsigned long flags; 154 unsigned long flags;
155 155
156 DMERR("Error writing log block, error=%d", bio->bi_error); 156 DMERR("Error writing log block, error=%d", bio->bi_status);
157 spin_lock_irqsave(&lc->blocks_lock, flags); 157 spin_lock_irqsave(&lc->blocks_lock, flags);
158 lc->logging_enabled = false; 158 lc->logging_enabled = false;
159 spin_unlock_irqrestore(&lc->blocks_lock, flags); 159 spin_unlock_irqrestore(&lc->blocks_lock, flags);
@@ -586,7 +586,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
586 spin_lock_irq(&lc->blocks_lock); 586 spin_lock_irq(&lc->blocks_lock);
587 lc->logging_enabled = false; 587 lc->logging_enabled = false;
588 spin_unlock_irq(&lc->blocks_lock); 588 spin_unlock_irq(&lc->blocks_lock);
589 return -ENOMEM; 589 return DM_MAPIO_KILL;
590 } 590 }
591 INIT_LIST_HEAD(&block->list); 591 INIT_LIST_HEAD(&block->list);
592 pb->block = block; 592 pb->block = block;
@@ -639,7 +639,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
639 spin_lock_irq(&lc->blocks_lock); 639 spin_lock_irq(&lc->blocks_lock);
640 lc->logging_enabled = false; 640 lc->logging_enabled = false;
641 spin_unlock_irq(&lc->blocks_lock); 641 spin_unlock_irq(&lc->blocks_lock);
642 return -ENOMEM; 642 return DM_MAPIO_KILL;
643 } 643 }
644 644
645 src = kmap_atomic(bv.bv_page); 645 src = kmap_atomic(bv.bv_page);
@@ -664,7 +664,8 @@ map_bio:
664 return DM_MAPIO_REMAPPED; 664 return DM_MAPIO_REMAPPED;
665} 665}
666 666
667static int normal_end_io(struct dm_target *ti, struct bio *bio, int error) 667static int normal_end_io(struct dm_target *ti, struct bio *bio,
668 blk_status_t *error)
668{ 669{
669 struct log_writes_c *lc = ti->private; 670 struct log_writes_c *lc = ti->private;
670 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 671 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
@@ -686,7 +687,7 @@ static int normal_end_io(struct dm_target *ti, struct bio *bio, int error)
686 spin_unlock_irqrestore(&lc->blocks_lock, flags); 687 spin_unlock_irqrestore(&lc->blocks_lock, flags);
687 } 688 }
688 689
689 return error; 690 return DM_ENDIO_DONE;
690} 691}
691 692
692/* 693/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3df056b73b66..0e8ab5bb3575 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -559,13 +559,13 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
559 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 559 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
560 return DM_MAPIO_REQUEUE; 560 return DM_MAPIO_REQUEUE;
561 dm_report_EIO(m); 561 dm_report_EIO(m);
562 return -EIO; 562 return DM_MAPIO_KILL;
563 } 563 }
564 564
565 mpio->pgpath = pgpath; 565 mpio->pgpath = pgpath;
566 mpio->nr_bytes = nr_bytes; 566 mpio->nr_bytes = nr_bytes;
567 567
568 bio->bi_error = 0; 568 bio->bi_status = 0;
569 bio->bi_bdev = pgpath->path.dev->bdev; 569 bio->bi_bdev = pgpath->path.dev->bdev;
570 bio->bi_opf |= REQ_FAILFAST_TRANSPORT; 570 bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
571 571
@@ -621,11 +621,19 @@ static void process_queued_bios(struct work_struct *work)
621 blk_start_plug(&plug); 621 blk_start_plug(&plug);
622 while ((bio = bio_list_pop(&bios))) { 622 while ((bio = bio_list_pop(&bios))) {
623 r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio)); 623 r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
624 if (r < 0 || r == DM_MAPIO_REQUEUE) { 624 switch (r) {
625 bio->bi_error = r; 625 case DM_MAPIO_KILL:
626 bio->bi_status = BLK_STS_IOERR;
627 bio_endio(bio);
628 break;
629 case DM_MAPIO_REQUEUE:
630 bio->bi_status = BLK_STS_DM_REQUEUE;
626 bio_endio(bio); 631 bio_endio(bio);
627 } else if (r == DM_MAPIO_REMAPPED) 632 break;
633 case DM_MAPIO_REMAPPED:
628 generic_make_request(bio); 634 generic_make_request(bio);
635 break;
636 }
629 } 637 }
630 blk_finish_plug(&plug); 638 blk_finish_plug(&plug);
631} 639}
@@ -1442,22 +1450,15 @@ static void activate_path_work(struct work_struct *work)
1442 activate_or_offline_path(pgpath); 1450 activate_or_offline_path(pgpath);
1443} 1451}
1444 1452
1445static int noretry_error(int error) 1453static int noretry_error(blk_status_t error)
1446{ 1454{
1447 switch (error) { 1455 switch (error) {
1448 case -EBADE: 1456 case BLK_STS_NOTSUPP:
1449 /* 1457 case BLK_STS_NOSPC:
1450 * EBADE signals an reservation conflict. 1458 case BLK_STS_TARGET:
1451 * We shouldn't fail the path here as we can communicate with 1459 case BLK_STS_NEXUS:
1452 * the target. We should failover to the next path, but in 1460 case BLK_STS_MEDIUM:
1453 * doing so we might be causing a ping-pong between paths. 1461 case BLK_STS_RESOURCE:
1454 * So just return the reservation conflict error.
1455 */
1456 case -EOPNOTSUPP:
1457 case -EREMOTEIO:
1458 case -EILSEQ:
1459 case -ENODATA:
1460 case -ENOSPC:
1461 return 1; 1462 return 1;
1462 } 1463 }
1463 1464
@@ -1466,7 +1467,7 @@ static int noretry_error(int error)
1466} 1467}
1467 1468
1468static int multipath_end_io(struct dm_target *ti, struct request *clone, 1469static int multipath_end_io(struct dm_target *ti, struct request *clone,
1469 int error, union map_info *map_context) 1470 blk_status_t error, union map_info *map_context)
1470{ 1471{
1471 struct dm_mpath_io *mpio = get_mpio(map_context); 1472 struct dm_mpath_io *mpio = get_mpio(map_context);
1472 struct pgpath *pgpath = mpio->pgpath; 1473 struct pgpath *pgpath = mpio->pgpath;
@@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
1493 1494
1494 if (atomic_read(&m->nr_valid_paths) == 0 && 1495 if (atomic_read(&m->nr_valid_paths) == 0 &&
1495 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1496 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1496 if (error == -EIO) 1497 if (error == BLK_STS_IOERR)
1497 dm_report_EIO(m); 1498 dm_report_EIO(m);
1498 /* complete with the original error */ 1499 /* complete with the original error */
1499 r = DM_ENDIO_DONE; 1500 r = DM_ENDIO_DONE;
@@ -1510,24 +1511,26 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
1510 return r; 1511 return r;
1511} 1512}
1512 1513
1513static int do_end_io_bio(struct multipath *m, struct bio *clone, 1514static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
1514 int error, struct dm_mpath_io *mpio) 1515 blk_status_t *error)
1515{ 1516{
1517 struct multipath *m = ti->private;
1518 struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
1519 struct pgpath *pgpath = mpio->pgpath;
1516 unsigned long flags; 1520 unsigned long flags;
1521 int r = DM_ENDIO_DONE;
1517 1522
1518 if (!error) 1523 if (!*error || noretry_error(*error))
1519 return 0; /* I/O complete */ 1524 goto done;
1520
1521 if (noretry_error(error))
1522 return error;
1523 1525
1524 if (mpio->pgpath) 1526 if (pgpath)
1525 fail_path(mpio->pgpath); 1527 fail_path(pgpath);
1526 1528
1527 if (atomic_read(&m->nr_valid_paths) == 0 && 1529 if (atomic_read(&m->nr_valid_paths) == 0 &&
1528 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1530 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1529 dm_report_EIO(m); 1531 dm_report_EIO(m);
1530 return -EIO; 1532 *error = BLK_STS_IOERR;
1533 goto done;
1531 } 1534 }
1532 1535
1533 /* Queue for the daemon to resubmit */ 1536 /* Queue for the daemon to resubmit */
@@ -1539,23 +1542,11 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
1539 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) 1542 if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
1540 queue_work(kmultipathd, &m->process_queued_bios); 1543 queue_work(kmultipathd, &m->process_queued_bios);
1541 1544
1542 return DM_ENDIO_INCOMPLETE; 1545 r = DM_ENDIO_INCOMPLETE;
1543} 1546done:
1544
1545static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
1546{
1547 struct multipath *m = ti->private;
1548 struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
1549 struct pgpath *pgpath;
1550 struct path_selector *ps;
1551 int r;
1552
1553 BUG_ON(!mpio);
1554
1555 r = do_end_io_bio(m, clone, error, mpio);
1556 pgpath = mpio->pgpath;
1557 if (pgpath) { 1547 if (pgpath) {
1558 ps = &pgpath->pg->ps; 1548 struct path_selector *ps = &pgpath->pg->ps;
1549
1559 if (ps->type->end_io) 1550 if (ps->type->end_io)
1560 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1551 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1561 } 1552 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4da8858856fb..a4fbd911d566 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -491,9 +491,9 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio)
491 * If device is suspended, complete the bio. 491 * If device is suspended, complete the bio.
492 */ 492 */
493 if (dm_noflush_suspending(ms->ti)) 493 if (dm_noflush_suspending(ms->ti))
494 bio->bi_error = DM_ENDIO_REQUEUE; 494 bio->bi_status = BLK_STS_DM_REQUEUE;
495 else 495 else
496 bio->bi_error = -EIO; 496 bio->bi_status = BLK_STS_IOERR;
497 497
498 bio_endio(bio); 498 bio_endio(bio);
499 return; 499 return;
@@ -627,7 +627,7 @@ static void write_callback(unsigned long error, void *context)
627 * degrade the array. 627 * degrade the array.
628 */ 628 */
629 if (bio_op(bio) == REQ_OP_DISCARD) { 629 if (bio_op(bio) == REQ_OP_DISCARD) {
630 bio->bi_error = -EOPNOTSUPP; 630 bio->bi_status = BLK_STS_NOTSUPP;
631 bio_endio(bio); 631 bio_endio(bio);
632 return; 632 return;
633 } 633 }
@@ -1210,14 +1210,14 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1210 1210
1211 r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0); 1211 r = log->type->in_sync(log, dm_rh_bio_to_region(ms->rh, bio), 0);
1212 if (r < 0 && r != -EWOULDBLOCK) 1212 if (r < 0 && r != -EWOULDBLOCK)
1213 return r; 1213 return DM_MAPIO_KILL;
1214 1214
1215 /* 1215 /*
1216 * If region is not in-sync queue the bio. 1216 * If region is not in-sync queue the bio.
1217 */ 1217 */
1218 if (!r || (r == -EWOULDBLOCK)) { 1218 if (!r || (r == -EWOULDBLOCK)) {
1219 if (bio->bi_opf & REQ_RAHEAD) 1219 if (bio->bi_opf & REQ_RAHEAD)
1220 return -EWOULDBLOCK; 1220 return DM_MAPIO_KILL;
1221 1221
1222 queue_bio(ms, bio, rw); 1222 queue_bio(ms, bio, rw);
1223 return DM_MAPIO_SUBMITTED; 1223 return DM_MAPIO_SUBMITTED;
@@ -1229,7 +1229,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1229 */ 1229 */
1230 m = choose_mirror(ms, bio->bi_iter.bi_sector); 1230 m = choose_mirror(ms, bio->bi_iter.bi_sector);
1231 if (unlikely(!m)) 1231 if (unlikely(!m))
1232 return -EIO; 1232 return DM_MAPIO_KILL;
1233 1233
1234 dm_bio_record(&bio_record->details, bio); 1234 dm_bio_record(&bio_record->details, bio);
1235 bio_record->m = m; 1235 bio_record->m = m;
@@ -1239,7 +1239,8 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
1239 return DM_MAPIO_REMAPPED; 1239 return DM_MAPIO_REMAPPED;
1240} 1240}
1241 1241
1242static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) 1242static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1243 blk_status_t *error)
1243{ 1244{
1244 int rw = bio_data_dir(bio); 1245 int rw = bio_data_dir(bio);
1245 struct mirror_set *ms = (struct mirror_set *) ti->private; 1246 struct mirror_set *ms = (struct mirror_set *) ti->private;
@@ -1255,16 +1256,16 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1255 if (!(bio->bi_opf & REQ_PREFLUSH) && 1256 if (!(bio->bi_opf & REQ_PREFLUSH) &&
1256 bio_op(bio) != REQ_OP_DISCARD) 1257 bio_op(bio) != REQ_OP_DISCARD)
1257 dm_rh_dec(ms->rh, bio_record->write_region); 1258 dm_rh_dec(ms->rh, bio_record->write_region);
1258 return error; 1259 return DM_ENDIO_DONE;
1259 } 1260 }
1260 1261
1261 if (error == -EOPNOTSUPP) 1262 if (*error == BLK_STS_NOTSUPP)
1262 goto out; 1263 goto out;
1263 1264
1264 if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) 1265 if (bio->bi_opf & REQ_RAHEAD)
1265 goto out; 1266 goto out;
1266 1267
1267 if (unlikely(error)) { 1268 if (unlikely(*error)) {
1268 if (!bio_record->details.bi_bdev) { 1269 if (!bio_record->details.bi_bdev) {
1269 /* 1270 /*
1270 * There wasn't enough memory to record necessary 1271 * There wasn't enough memory to record necessary
@@ -1272,7 +1273,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1272 * mirror in-sync. 1273 * mirror in-sync.
1273 */ 1274 */
1274 DMERR_LIMIT("Mirror read failed."); 1275 DMERR_LIMIT("Mirror read failed.");
1275 return -EIO; 1276 return DM_ENDIO_DONE;
1276 } 1277 }
1277 1278
1278 m = bio_record->m; 1279 m = bio_record->m;
@@ -1291,7 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1291 1292
1292 dm_bio_restore(bd, bio); 1293 dm_bio_restore(bd, bio);
1293 bio_record->details.bi_bdev = NULL; 1294 bio_record->details.bi_bdev = NULL;
1294 bio->bi_error = 0; 1295 bio->bi_status = 0;
1295 1296
1296 queue_bio(ms, bio, rw); 1297 queue_bio(ms, bio, rw);
1297 return DM_ENDIO_INCOMPLETE; 1298 return DM_ENDIO_INCOMPLETE;
@@ -1302,7 +1303,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
1302out: 1303out:
1303 bio_record->details.bi_bdev = NULL; 1304 bio_record->details.bi_bdev = NULL;
1304 1305
1305 return error; 1306 return DM_ENDIO_DONE;
1306} 1307}
1307 1308
1308static void mirror_presuspend(struct dm_target *ti) 1309static void mirror_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7246ee..c6ebc5b1e00e 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
71 71
72static void dm_mq_start_queue(struct request_queue *q) 72static void dm_mq_start_queue(struct request_queue *q)
73{ 73{
74 blk_mq_start_stopped_hw_queues(q, true); 74 blk_mq_unquiesce_queue(q);
75 blk_mq_kick_requeue_list(q); 75 blk_mq_kick_requeue_list(q);
76} 76}
77 77
@@ -119,7 +119,7 @@ static void end_clone_bio(struct bio *clone)
119 struct dm_rq_target_io *tio = info->tio; 119 struct dm_rq_target_io *tio = info->tio;
120 struct bio *bio = info->orig; 120 struct bio *bio = info->orig;
121 unsigned int nr_bytes = info->orig->bi_iter.bi_size; 121 unsigned int nr_bytes = info->orig->bi_iter.bi_size;
122 int error = clone->bi_error; 122 blk_status_t error = clone->bi_status;
123 123
124 bio_put(clone); 124 bio_put(clone);
125 125
@@ -158,7 +158,7 @@ static void end_clone_bio(struct bio *clone)
158 * Do not use blk_end_request() here, because it may complete 158 * Do not use blk_end_request() here, because it may complete
159 * the original request before the clone, and break the ordering. 159 * the original request before the clone, and break the ordering.
160 */ 160 */
161 blk_update_request(tio->orig, 0, nr_bytes); 161 blk_update_request(tio->orig, BLK_STS_OK, nr_bytes);
162} 162}
163 163
164static struct dm_rq_target_io *tio_from_request(struct request *rq) 164static struct dm_rq_target_io *tio_from_request(struct request *rq)
@@ -216,7 +216,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
216 * Must be called without clone's queue lock held, 216 * Must be called without clone's queue lock held,
217 * see end_clone_request() for more details. 217 * see end_clone_request() for more details.
218 */ 218 */
219static void dm_end_request(struct request *clone, int error) 219static void dm_end_request(struct request *clone, blk_status_t error)
220{ 220{
221 int rw = rq_data_dir(clone); 221 int rw = rq_data_dir(clone);
222 struct dm_rq_target_io *tio = clone->end_io_data; 222 struct dm_rq_target_io *tio = clone->end_io_data;
@@ -285,7 +285,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
285 rq_completed(md, rw, false); 285 rq_completed(md, rw, false);
286} 286}
287 287
288static void dm_done(struct request *clone, int error, bool mapped) 288static void dm_done(struct request *clone, blk_status_t error, bool mapped)
289{ 289{
290 int r = DM_ENDIO_DONE; 290 int r = DM_ENDIO_DONE;
291 struct dm_rq_target_io *tio = clone->end_io_data; 291 struct dm_rq_target_io *tio = clone->end_io_data;
@@ -298,7 +298,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
298 r = rq_end_io(tio->ti, clone, error, &tio->info); 298 r = rq_end_io(tio->ti, clone, error, &tio->info);
299 } 299 }
300 300
301 if (unlikely(error == -EREMOTEIO)) { 301 if (unlikely(error == BLK_STS_TARGET)) {
302 if (req_op(clone) == REQ_OP_WRITE_SAME && 302 if (req_op(clone) == REQ_OP_WRITE_SAME &&
303 !clone->q->limits.max_write_same_sectors) 303 !clone->q->limits.max_write_same_sectors)
304 disable_write_same(tio->md); 304 disable_write_same(tio->md);
@@ -358,7 +358,7 @@ static void dm_softirq_done(struct request *rq)
358 * Complete the clone and the original request with the error status 358 * Complete the clone and the original request with the error status
359 * through softirq context. 359 * through softirq context.
360 */ 360 */
361static void dm_complete_request(struct request *rq, int error) 361static void dm_complete_request(struct request *rq, blk_status_t error)
362{ 362{
363 struct dm_rq_target_io *tio = tio_from_request(rq); 363 struct dm_rq_target_io *tio = tio_from_request(rq);
364 364
@@ -375,7 +375,7 @@ static void dm_complete_request(struct request *rq, int error)
375 * Target's rq_end_io() function isn't called. 375 * Target's rq_end_io() function isn't called.
376 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. 376 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
377 */ 377 */
378static void dm_kill_unmapped_request(struct request *rq, int error) 378static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
379{ 379{
380 rq->rq_flags |= RQF_FAILED; 380 rq->rq_flags |= RQF_FAILED;
381 dm_complete_request(rq, error); 381 dm_complete_request(rq, error);
@@ -384,7 +384,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
384/* 384/*
385 * Called with the clone's queue lock held (in the case of .request_fn) 385 * Called with the clone's queue lock held (in the case of .request_fn)
386 */ 386 */
387static void end_clone_request(struct request *clone, int error) 387static void end_clone_request(struct request *clone, blk_status_t error)
388{ 388{
389 struct dm_rq_target_io *tio = clone->end_io_data; 389 struct dm_rq_target_io *tio = clone->end_io_data;
390 390
@@ -401,7 +401,7 @@ static void end_clone_request(struct request *clone, int error)
401 401
402static void dm_dispatch_clone_request(struct request *clone, struct request *rq) 402static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
403{ 403{
404 int r; 404 blk_status_t r;
405 405
406 if (blk_queue_io_stat(clone->q)) 406 if (blk_queue_io_stat(clone->q))
407 clone->rq_flags |= RQF_IO_STAT; 407 clone->rq_flags |= RQF_IO_STAT;
@@ -506,7 +506,7 @@ static int map_request(struct dm_rq_target_io *tio)
506 break; 506 break;
507 case DM_MAPIO_KILL: 507 case DM_MAPIO_KILL:
508 /* The target wants to complete the I/O */ 508 /* The target wants to complete the I/O */
509 dm_kill_unmapped_request(rq, -EIO); 509 dm_kill_unmapped_request(rq, BLK_STS_IOERR);
510 break; 510 break;
511 default: 511 default:
512 DMWARN("unimplemented target map return value: %d", r); 512 DMWARN("unimplemented target map return value: %d", r);
@@ -727,7 +727,7 @@ static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
727 return __dm_rq_init_rq(set->driver_data, rq); 727 return __dm_rq_init_rq(set->driver_data, rq);
728} 728}
729 729
730static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 730static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
731 const struct blk_mq_queue_data *bd) 731 const struct blk_mq_queue_data *bd)
732{ 732{
733 struct request *rq = bd->rq; 733 struct request *rq = bd->rq;
@@ -744,7 +744,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
744 } 744 }
745 745
746 if (ti->type->busy && ti->type->busy(ti)) 746 if (ti->type->busy && ti->type->busy(ti))
747 return BLK_MQ_RQ_QUEUE_BUSY; 747 return BLK_STS_RESOURCE;
748 748
749 dm_start_request(md, rq); 749 dm_start_request(md, rq);
750 750
@@ -762,10 +762,10 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
762 rq_end_stats(md, rq); 762 rq_end_stats(md, rq);
763 rq_completed(md, rq_data_dir(rq), false); 763 rq_completed(md, rq_data_dir(rq), false);
764 blk_mq_delay_run_hw_queue(hctx, 100/*ms*/); 764 blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
765 return BLK_MQ_RQ_QUEUE_BUSY; 765 return BLK_STS_RESOURCE;
766 } 766 }
767 767
768 return BLK_MQ_RQ_QUEUE_OK; 768 return BLK_STS_OK;
769} 769}
770 770
771static const struct blk_mq_ops dm_mq_ops = { 771static const struct blk_mq_ops dm_mq_ops = {
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index f0020d21b95f..9813922e4fe5 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -24,7 +24,7 @@ struct dm_rq_target_io {
24 struct dm_target *ti; 24 struct dm_target *ti;
25 struct request *orig, *clone; 25 struct request *orig, *clone;
26 struct kthread_work work; 26 struct kthread_work work;
27 int error; 27 blk_status_t error;
28 union map_info info; 28 union map_info info;
29 struct dm_stats_aux stats_aux; 29 struct dm_stats_aux stats_aux;
30 unsigned long duration_jiffies; 30 unsigned long duration_jiffies;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e152d9817c81..1ba41048b438 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1590,7 +1590,7 @@ static void full_bio_end_io(struct bio *bio)
1590{ 1590{
1591 void *callback_data = bio->bi_private; 1591 void *callback_data = bio->bi_private;
1592 1592
1593 dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0); 1593 dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
1594} 1594}
1595 1595
1596static void start_full_bio(struct dm_snap_pending_exception *pe, 1596static void start_full_bio(struct dm_snap_pending_exception *pe,
@@ -1690,7 +1690,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1690 /* Full snapshots are not usable */ 1690 /* Full snapshots are not usable */
1691 /* To get here the table must be live so s->active is always set. */ 1691 /* To get here the table must be live so s->active is always set. */
1692 if (!s->valid) 1692 if (!s->valid)
1693 return -EIO; 1693 return DM_MAPIO_KILL;
1694 1694
1695 /* FIXME: should only take write lock if we need 1695 /* FIXME: should only take write lock if we need
1696 * to copy an exception */ 1696 * to copy an exception */
@@ -1698,7 +1698,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1698 1698
1699 if (!s->valid || (unlikely(s->snapshot_overflowed) && 1699 if (!s->valid || (unlikely(s->snapshot_overflowed) &&
1700 bio_data_dir(bio) == WRITE)) { 1700 bio_data_dir(bio) == WRITE)) {
1701 r = -EIO; 1701 r = DM_MAPIO_KILL;
1702 goto out_unlock; 1702 goto out_unlock;
1703 } 1703 }
1704 1704
@@ -1723,7 +1723,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1723 1723
1724 if (!s->valid || s->snapshot_overflowed) { 1724 if (!s->valid || s->snapshot_overflowed) {
1725 free_pending_exception(pe); 1725 free_pending_exception(pe);
1726 r = -EIO; 1726 r = DM_MAPIO_KILL;
1727 goto out_unlock; 1727 goto out_unlock;
1728 } 1728 }
1729 1729
@@ -1741,7 +1741,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
1741 DMERR("Snapshot overflowed: Unable to allocate exception."); 1741 DMERR("Snapshot overflowed: Unable to allocate exception.");
1742 } else 1742 } else
1743 __invalidate_snapshot(s, -ENOMEM); 1743 __invalidate_snapshot(s, -ENOMEM);
1744 r = -EIO; 1744 r = DM_MAPIO_KILL;
1745 goto out_unlock; 1745 goto out_unlock;
1746 } 1746 }
1747 } 1747 }
@@ -1851,14 +1851,15 @@ out_unlock:
1851 return r; 1851 return r;
1852} 1852}
1853 1853
1854static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) 1854static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1855 blk_status_t *error)
1855{ 1856{
1856 struct dm_snapshot *s = ti->private; 1857 struct dm_snapshot *s = ti->private;
1857 1858
1858 if (is_bio_tracked(bio)) 1859 if (is_bio_tracked(bio))
1859 stop_tracking_chunk(s, bio); 1860 stop_tracking_chunk(s, bio);
1860 1861
1861 return 0; 1862 return DM_ENDIO_DONE;
1862} 1863}
1863 1864
1864static void snapshot_merge_presuspend(struct dm_target *ti) 1865static void snapshot_merge_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 75152482f3ad..11621a0af887 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -375,20 +375,21 @@ static void stripe_status(struct dm_target *ti, status_type_t type,
375 } 375 }
376} 376}
377 377
378static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) 378static int stripe_end_io(struct dm_target *ti, struct bio *bio,
379 blk_status_t *error)
379{ 380{
380 unsigned i; 381 unsigned i;
381 char major_minor[16]; 382 char major_minor[16];
382 struct stripe_c *sc = ti->private; 383 struct stripe_c *sc = ti->private;
383 384
384 if (!error) 385 if (!*error)
385 return 0; /* I/O complete */ 386 return DM_ENDIO_DONE; /* I/O complete */
386 387
387 if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) 388 if (bio->bi_opf & REQ_RAHEAD)
388 return error; 389 return DM_ENDIO_DONE;
389 390
390 if (error == -EOPNOTSUPP) 391 if (*error == BLK_STS_NOTSUPP)
391 return error; 392 return DM_ENDIO_DONE;
392 393
393 memset(major_minor, 0, sizeof(major_minor)); 394 memset(major_minor, 0, sizeof(major_minor));
394 sprintf(major_minor, "%d:%d", 395 sprintf(major_minor, "%d:%d",
@@ -409,7 +410,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error)
409 schedule_work(&sc->trigger_event); 410 schedule_work(&sc->trigger_event);
410 } 411 }
411 412
412 return error; 413 return DM_ENDIO_DONE;
413} 414}
414 415
415static int stripe_iterate_devices(struct dm_target *ti, 416static int stripe_iterate_devices(struct dm_target *ti,
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index b242b750542f..c0d7e60820c4 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -128,7 +128,7 @@ static void io_err_dtr(struct dm_target *tt)
128 128
129static int io_err_map(struct dm_target *tt, struct bio *bio) 129static int io_err_map(struct dm_target *tt, struct bio *bio)
130{ 130{
131 return -EIO; 131 return DM_MAPIO_KILL;
132} 132}
133 133
134static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq, 134static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 28808e5ec0fd..9dec2f8cc739 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -383,8 +383,8 @@ static void end_discard(struct discard_op *op, int r)
383 * Even if r is set, there could be sub discards in flight that we 383 * Even if r is set, there could be sub discards in flight that we
384 * need to wait for. 384 * need to wait for.
385 */ 385 */
386 if (r && !op->parent_bio->bi_error) 386 if (r && !op->parent_bio->bi_status)
387 op->parent_bio->bi_error = r; 387 op->parent_bio->bi_status = errno_to_blk_status(r);
388 bio_endio(op->parent_bio); 388 bio_endio(op->parent_bio);
389} 389}
390 390
@@ -450,22 +450,20 @@ static void cell_release_no_holder(struct pool *pool,
450} 450}
451 451
452static void cell_error_with_code(struct pool *pool, 452static void cell_error_with_code(struct pool *pool,
453 struct dm_bio_prison_cell *cell, int error_code) 453 struct dm_bio_prison_cell *cell, blk_status_t error_code)
454{ 454{
455 dm_cell_error(pool->prison, cell, error_code); 455 dm_cell_error(pool->prison, cell, error_code);
456 dm_bio_prison_free_cell(pool->prison, cell); 456 dm_bio_prison_free_cell(pool->prison, cell);
457} 457}
458 458
459static int get_pool_io_error_code(struct pool *pool) 459static blk_status_t get_pool_io_error_code(struct pool *pool)
460{ 460{
461 return pool->out_of_data_space ? -ENOSPC : -EIO; 461 return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
462} 462}
463 463
464static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell) 464static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
465{ 465{
466 int error = get_pool_io_error_code(pool); 466 cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
467
468 cell_error_with_code(pool, cell, error);
469} 467}
470 468
471static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell) 469static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
@@ -475,7 +473,7 @@ static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
475 473
476static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell) 474static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
477{ 475{
478 cell_error_with_code(pool, cell, DM_ENDIO_REQUEUE); 476 cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
479} 477}
480 478
481/*----------------------------------------------------------------*/ 479/*----------------------------------------------------------------*/
@@ -555,17 +553,18 @@ static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
555 bio_list_init(master); 553 bio_list_init(master);
556} 554}
557 555
558static void error_bio_list(struct bio_list *bios, int error) 556static void error_bio_list(struct bio_list *bios, blk_status_t error)
559{ 557{
560 struct bio *bio; 558 struct bio *bio;
561 559
562 while ((bio = bio_list_pop(bios))) { 560 while ((bio = bio_list_pop(bios))) {
563 bio->bi_error = error; 561 bio->bi_status = error;
564 bio_endio(bio); 562 bio_endio(bio);
565 } 563 }
566} 564}
567 565
568static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error) 566static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
567 blk_status_t error)
569{ 568{
570 struct bio_list bios; 569 struct bio_list bios;
571 unsigned long flags; 570 unsigned long flags;
@@ -608,11 +607,11 @@ static void requeue_io(struct thin_c *tc)
608 __merge_bio_list(&bios, &tc->retry_on_resume_list); 607 __merge_bio_list(&bios, &tc->retry_on_resume_list);
609 spin_unlock_irqrestore(&tc->lock, flags); 608 spin_unlock_irqrestore(&tc->lock, flags);
610 609
611 error_bio_list(&bios, DM_ENDIO_REQUEUE); 610 error_bio_list(&bios, BLK_STS_DM_REQUEUE);
612 requeue_deferred_cells(tc); 611 requeue_deferred_cells(tc);
613} 612}
614 613
615static void error_retry_list_with_code(struct pool *pool, int error) 614static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
616{ 615{
617 struct thin_c *tc; 616 struct thin_c *tc;
618 617
@@ -624,9 +623,7 @@ static void error_retry_list_with_code(struct pool *pool, int error)
624 623
625static void error_retry_list(struct pool *pool) 624static void error_retry_list(struct pool *pool)
626{ 625{
627 int error = get_pool_io_error_code(pool); 626 error_retry_list_with_code(pool, get_pool_io_error_code(pool));
628
629 error_retry_list_with_code(pool, error);
630} 627}
631 628
632/* 629/*
@@ -774,7 +771,7 @@ struct dm_thin_new_mapping {
774 */ 771 */
775 atomic_t prepare_actions; 772 atomic_t prepare_actions;
776 773
777 int err; 774 blk_status_t status;
778 struct thin_c *tc; 775 struct thin_c *tc;
779 dm_block_t virt_begin, virt_end; 776 dm_block_t virt_begin, virt_end;
780 dm_block_t data_block; 777 dm_block_t data_block;
@@ -814,7 +811,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
814{ 811{
815 struct dm_thin_new_mapping *m = context; 812 struct dm_thin_new_mapping *m = context;
816 813
817 m->err = read_err || write_err ? -EIO : 0; 814 m->status = read_err || write_err ? BLK_STS_IOERR : 0;
818 complete_mapping_preparation(m); 815 complete_mapping_preparation(m);
819} 816}
820 817
@@ -825,7 +822,7 @@ static void overwrite_endio(struct bio *bio)
825 822
826 bio->bi_end_io = m->saved_bi_end_io; 823 bio->bi_end_io = m->saved_bi_end_io;
827 824
828 m->err = bio->bi_error; 825 m->status = bio->bi_status;
829 complete_mapping_preparation(m); 826 complete_mapping_preparation(m);
830} 827}
831 828
@@ -925,7 +922,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
925 struct bio *bio = m->bio; 922 struct bio *bio = m->bio;
926 int r; 923 int r;
927 924
928 if (m->err) { 925 if (m->status) {
929 cell_error(pool, m->cell); 926 cell_error(pool, m->cell);
930 goto out; 927 goto out;
931 } 928 }
@@ -1495,7 +1492,7 @@ static void retry_on_resume(struct bio *bio)
1495 spin_unlock_irqrestore(&tc->lock, flags); 1492 spin_unlock_irqrestore(&tc->lock, flags);
1496} 1493}
1497 1494
1498static int should_error_unserviceable_bio(struct pool *pool) 1495static blk_status_t should_error_unserviceable_bio(struct pool *pool)
1499{ 1496{
1500 enum pool_mode m = get_pool_mode(pool); 1497 enum pool_mode m = get_pool_mode(pool);
1501 1498
@@ -1503,27 +1500,27 @@ static int should_error_unserviceable_bio(struct pool *pool)
1503 case PM_WRITE: 1500 case PM_WRITE:
1504 /* Shouldn't get here */ 1501 /* Shouldn't get here */
1505 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); 1502 DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
1506 return -EIO; 1503 return BLK_STS_IOERR;
1507 1504
1508 case PM_OUT_OF_DATA_SPACE: 1505 case PM_OUT_OF_DATA_SPACE:
1509 return pool->pf.error_if_no_space ? -ENOSPC : 0; 1506 return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
1510 1507
1511 case PM_READ_ONLY: 1508 case PM_READ_ONLY:
1512 case PM_FAIL: 1509 case PM_FAIL:
1513 return -EIO; 1510 return BLK_STS_IOERR;
1514 default: 1511 default:
1515 /* Shouldn't get here */ 1512 /* Shouldn't get here */
1516 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); 1513 DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
1517 return -EIO; 1514 return BLK_STS_IOERR;
1518 } 1515 }
1519} 1516}
1520 1517
1521static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) 1518static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
1522{ 1519{
1523 int error = should_error_unserviceable_bio(pool); 1520 blk_status_t error = should_error_unserviceable_bio(pool);
1524 1521
1525 if (error) { 1522 if (error) {
1526 bio->bi_error = error; 1523 bio->bi_status = error;
1527 bio_endio(bio); 1524 bio_endio(bio);
1528 } else 1525 } else
1529 retry_on_resume(bio); 1526 retry_on_resume(bio);
@@ -1533,7 +1530,7 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c
1533{ 1530{
1534 struct bio *bio; 1531 struct bio *bio;
1535 struct bio_list bios; 1532 struct bio_list bios;
1536 int error; 1533 blk_status_t error;
1537 1534
1538 error = should_error_unserviceable_bio(pool); 1535 error = should_error_unserviceable_bio(pool);
1539 if (error) { 1536 if (error) {
@@ -2071,7 +2068,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
2071 unsigned count = 0; 2068 unsigned count = 0;
2072 2069
2073 if (tc->requeue_mode) { 2070 if (tc->requeue_mode) {
2074 error_thin_bio_list(tc, &tc->deferred_bio_list, DM_ENDIO_REQUEUE); 2071 error_thin_bio_list(tc, &tc->deferred_bio_list,
2072 BLK_STS_DM_REQUEUE);
2075 return; 2073 return;
2076 } 2074 }
2077 2075
@@ -2322,7 +2320,7 @@ static void do_no_space_timeout(struct work_struct *ws)
2322 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { 2320 if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
2323 pool->pf.error_if_no_space = true; 2321 pool->pf.error_if_no_space = true;
2324 notify_of_pool_mode_change_to_oods(pool); 2322 notify_of_pool_mode_change_to_oods(pool);
2325 error_retry_list_with_code(pool, -ENOSPC); 2323 error_retry_list_with_code(pool, BLK_STS_NOSPC);
2326 } 2324 }
2327} 2325}
2328 2326
@@ -2624,7 +2622,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
2624 thin_hook_bio(tc, bio); 2622 thin_hook_bio(tc, bio);
2625 2623
2626 if (tc->requeue_mode) { 2624 if (tc->requeue_mode) {
2627 bio->bi_error = DM_ENDIO_REQUEUE; 2625 bio->bi_status = BLK_STS_DM_REQUEUE;
2628 bio_endio(bio); 2626 bio_endio(bio);
2629 return DM_MAPIO_SUBMITTED; 2627 return DM_MAPIO_SUBMITTED;
2630 } 2628 }
@@ -4177,7 +4175,8 @@ static int thin_map(struct dm_target *ti, struct bio *bio)
4177 return thin_bio_map(ti, bio); 4175 return thin_bio_map(ti, bio);
4178} 4176}
4179 4177
4180static int thin_endio(struct dm_target *ti, struct bio *bio, int err) 4178static int thin_endio(struct dm_target *ti, struct bio *bio,
4179 blk_status_t *err)
4181{ 4180{
4182 unsigned long flags; 4181 unsigned long flags;
4183 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); 4182 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -4212,7 +4211,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
4212 if (h->cell) 4211 if (h->cell)
4213 cell_defer_no_holder(h->tc, h->cell); 4212 cell_defer_no_holder(h->tc, h->cell);
4214 4213
4215 return 0; 4214 return DM_ENDIO_DONE;
4216} 4215}
4217 4216
4218static void thin_presuspend(struct dm_target *ti) 4217static void thin_presuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 1ec9b2c51c07..b46705ebf01f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,13 +538,13 @@ static int verity_verify_io(struct dm_verity_io *io)
538/* 538/*
539 * End one "io" structure with a given error. 539 * End one "io" structure with a given error.
540 */ 540 */
541static void verity_finish_io(struct dm_verity_io *io, int error) 541static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
542{ 542{
543 struct dm_verity *v = io->v; 543 struct dm_verity *v = io->v;
544 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); 544 struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
545 545
546 bio->bi_end_io = io->orig_bi_end_io; 546 bio->bi_end_io = io->orig_bi_end_io;
547 bio->bi_error = error; 547 bio->bi_status = status;
548 548
549 verity_fec_finish_io(io); 549 verity_fec_finish_io(io);
550 550
@@ -555,15 +555,15 @@ static void verity_work(struct work_struct *w)
555{ 555{
556 struct dm_verity_io *io = container_of(w, struct dm_verity_io, work); 556 struct dm_verity_io *io = container_of(w, struct dm_verity_io, work);
557 557
558 verity_finish_io(io, verity_verify_io(io)); 558 verity_finish_io(io, errno_to_blk_status(verity_verify_io(io)));
559} 559}
560 560
561static void verity_end_io(struct bio *bio) 561static void verity_end_io(struct bio *bio)
562{ 562{
563 struct dm_verity_io *io = bio->bi_private; 563 struct dm_verity_io *io = bio->bi_private;
564 564
565 if (bio->bi_error && !verity_fec_is_enabled(io->v)) { 565 if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
566 verity_finish_io(io, bio->bi_error); 566 verity_finish_io(io, bio->bi_status);
567 return; 567 return;
568 } 568 }
569 569
@@ -643,17 +643,17 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
643 if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & 643 if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
644 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) { 644 ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
645 DMERR_LIMIT("unaligned io"); 645 DMERR_LIMIT("unaligned io");
646 return -EIO; 646 return DM_MAPIO_KILL;
647 } 647 }
648 648
649 if (bio_end_sector(bio) >> 649 if (bio_end_sector(bio) >>
650 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) { 650 (v->data_dev_block_bits - SECTOR_SHIFT) > v->data_blocks) {
651 DMERR_LIMIT("io out of range"); 651 DMERR_LIMIT("io out of range");
652 return -EIO; 652 return DM_MAPIO_KILL;
653 } 653 }
654 654
655 if (bio_data_dir(bio) == WRITE) 655 if (bio_data_dir(bio) == WRITE)
656 return -EIO; 656 return DM_MAPIO_KILL;
657 657
658 io = dm_per_bio_data(bio, ti->per_io_data_size); 658 io = dm_per_bio_data(bio, ti->per_io_data_size);
659 io->v = v; 659 io->v = v;
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b616f11d8473..b65ca8dcfbdc 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -39,7 +39,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
39 case REQ_OP_READ: 39 case REQ_OP_READ:
40 if (bio->bi_opf & REQ_RAHEAD) { 40 if (bio->bi_opf & REQ_RAHEAD) {
41 /* readahead of null bytes only wastes buffer cache */ 41 /* readahead of null bytes only wastes buffer cache */
42 return -EIO; 42 return DM_MAPIO_KILL;
43 } 43 }
44 zero_fill_bio(bio); 44 zero_fill_bio(bio);
45 break; 45 break;
@@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
47 /* writes get silently dropped */ 47 /* writes get silently dropped */
48 break; 48 break;
49 default: 49 default:
50 return -EIO; 50 return DM_MAPIO_KILL;
51 } 51 }
52 52
53 bio_endio(bio); 53 bio_endio(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 37ccd73c79ec..402946035308 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -63,7 +63,7 @@ static struct workqueue_struct *deferred_remove_workqueue;
63 */ 63 */
64struct dm_io { 64struct dm_io {
65 struct mapped_device *md; 65 struct mapped_device *md;
66 int error; 66 blk_status_t status;
67 atomic_t io_count; 67 atomic_t io_count;
68 struct bio *bio; 68 struct bio *bio;
69 unsigned long start_time; 69 unsigned long start_time;
@@ -768,23 +768,24 @@ static int __noflush_suspending(struct mapped_device *md)
768 * Decrements the number of outstanding ios that a bio has been 768 * Decrements the number of outstanding ios that a bio has been
769 * cloned into, completing the original io if necc. 769 * cloned into, completing the original io if necc.
770 */ 770 */
771static void dec_pending(struct dm_io *io, int error) 771static void dec_pending(struct dm_io *io, blk_status_t error)
772{ 772{
773 unsigned long flags; 773 unsigned long flags;
774 int io_error; 774 blk_status_t io_error;
775 struct bio *bio; 775 struct bio *bio;
776 struct mapped_device *md = io->md; 776 struct mapped_device *md = io->md;
777 777
778 /* Push-back supersedes any I/O errors */ 778 /* Push-back supersedes any I/O errors */
779 if (unlikely(error)) { 779 if (unlikely(error)) {
780 spin_lock_irqsave(&io->endio_lock, flags); 780 spin_lock_irqsave(&io->endio_lock, flags);
781 if (!(io->error > 0 && __noflush_suspending(md))) 781 if (!(io->status == BLK_STS_DM_REQUEUE &&
782 io->error = error; 782 __noflush_suspending(md)))
783 io->status = error;
783 spin_unlock_irqrestore(&io->endio_lock, flags); 784 spin_unlock_irqrestore(&io->endio_lock, flags);
784 } 785 }
785 786
786 if (atomic_dec_and_test(&io->io_count)) { 787 if (atomic_dec_and_test(&io->io_count)) {
787 if (io->error == DM_ENDIO_REQUEUE) { 788 if (io->status == BLK_STS_DM_REQUEUE) {
788 /* 789 /*
789 * Target requested pushing back the I/O. 790 * Target requested pushing back the I/O.
790 */ 791 */
@@ -793,16 +794,16 @@ static void dec_pending(struct dm_io *io, int error)
793 bio_list_add_head(&md->deferred, io->bio); 794 bio_list_add_head(&md->deferred, io->bio);
794 else 795 else
795 /* noflush suspend was interrupted. */ 796 /* noflush suspend was interrupted. */
796 io->error = -EIO; 797 io->status = BLK_STS_IOERR;
797 spin_unlock_irqrestore(&md->deferred_lock, flags); 798 spin_unlock_irqrestore(&md->deferred_lock, flags);
798 } 799 }
799 800
800 io_error = io->error; 801 io_error = io->status;
801 bio = io->bio; 802 bio = io->bio;
802 end_io_acct(io); 803 end_io_acct(io);
803 free_io(md, io); 804 free_io(md, io);
804 805
805 if (io_error == DM_ENDIO_REQUEUE) 806 if (io_error == BLK_STS_DM_REQUEUE)
806 return; 807 return;
807 808
808 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { 809 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
@@ -814,7 +815,7 @@ static void dec_pending(struct dm_io *io, int error)
814 queue_io(md, bio); 815 queue_io(md, bio);
815 } else { 816 } else {
816 /* done with normal IO or empty flush */ 817 /* done with normal IO or empty flush */
817 bio->bi_error = io_error; 818 bio->bi_status = io_error;
818 bio_endio(bio); 819 bio_endio(bio);
819 } 820 }
820 } 821 }
@@ -838,31 +839,13 @@ void disable_write_zeroes(struct mapped_device *md)
838 839
839static void clone_endio(struct bio *bio) 840static void clone_endio(struct bio *bio)
840{ 841{
841 int error = bio->bi_error; 842 blk_status_t error = bio->bi_status;
842 int r = error;
843 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 843 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
844 struct dm_io *io = tio->io; 844 struct dm_io *io = tio->io;
845 struct mapped_device *md = tio->io->md; 845 struct mapped_device *md = tio->io->md;
846 dm_endio_fn endio = tio->ti->type->end_io; 846 dm_endio_fn endio = tio->ti->type->end_io;
847 847
848 if (endio) { 848 if (unlikely(error == BLK_STS_TARGET)) {
849 r = endio(tio->ti, bio, error);
850 if (r < 0 || r == DM_ENDIO_REQUEUE)
851 /*
852 * error and requeue request are handled
853 * in dec_pending().
854 */
855 error = r;
856 else if (r == DM_ENDIO_INCOMPLETE)
857 /* The target will handle the io */
858 return;
859 else if (r) {
860 DMWARN("unimplemented target endio return value: %d", r);
861 BUG();
862 }
863 }
864
865 if (unlikely(r == -EREMOTEIO)) {
866 if (bio_op(bio) == REQ_OP_WRITE_SAME && 849 if (bio_op(bio) == REQ_OP_WRITE_SAME &&
867 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) 850 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)
868 disable_write_same(md); 851 disable_write_same(md);
@@ -871,6 +854,23 @@ static void clone_endio(struct bio *bio)
871 disable_write_zeroes(md); 854 disable_write_zeroes(md);
872 } 855 }
873 856
857 if (endio) {
858 int r = endio(tio->ti, bio, &error);
859 switch (r) {
860 case DM_ENDIO_REQUEUE:
861 error = BLK_STS_DM_REQUEUE;
862 /*FALLTHRU*/
863 case DM_ENDIO_DONE:
864 break;
865 case DM_ENDIO_INCOMPLETE:
866 /* The target will handle the io */
867 return;
868 default:
869 DMWARN("unimplemented target endio return value: %d", r);
870 BUG();
871 }
872 }
873
874 free_tio(tio); 874 free_tio(tio);
875 dec_pending(io, error); 875 dec_pending(io, error);
876} 876}
@@ -1036,7 +1036,8 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
1036 1036
1037 while ((bio = bio_list_pop(&list))) { 1037 while ((bio = bio_list_pop(&list))) {
1038 struct bio_set *bs = bio->bi_pool; 1038 struct bio_set *bs = bio->bi_pool;
1039 if (unlikely(!bs) || bs == fs_bio_set) { 1039 if (unlikely(!bs) || bs == fs_bio_set ||
1040 !bs->rescue_workqueue) {
1040 bio_list_add(&current->bio_list[i], bio); 1041 bio_list_add(&current->bio_list[i], bio);
1041 continue; 1042 continue;
1042 } 1043 }
@@ -1084,18 +1085,24 @@ static void __map_bio(struct dm_target_io *tio)
1084 r = ti->type->map(ti, clone); 1085 r = ti->type->map(ti, clone);
1085 dm_offload_end(&o); 1086 dm_offload_end(&o);
1086 1087
1087 if (r == DM_MAPIO_REMAPPED) { 1088 switch (r) {
1089 case DM_MAPIO_SUBMITTED:
1090 break;
1091 case DM_MAPIO_REMAPPED:
1088 /* the bio has been remapped so dispatch it */ 1092 /* the bio has been remapped so dispatch it */
1089
1090 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, 1093 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone,
1091 tio->io->bio->bi_bdev->bd_dev, sector); 1094 tio->io->bio->bi_bdev->bd_dev, sector);
1092
1093 generic_make_request(clone); 1095 generic_make_request(clone);
1094 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 1096 break;
1095 /* error the io and bail out, or requeue it if needed */ 1097 case DM_MAPIO_KILL:
1096 dec_pending(tio->io, r); 1098 dec_pending(tio->io, BLK_STS_IOERR);
1099 free_tio(tio);
1100 break;
1101 case DM_MAPIO_REQUEUE:
1102 dec_pending(tio->io, BLK_STS_DM_REQUEUE);
1097 free_tio(tio); 1103 free_tio(tio);
1098 } else if (r != DM_MAPIO_SUBMITTED) { 1104 break;
1105 default:
1099 DMWARN("unimplemented target map return value: %d", r); 1106 DMWARN("unimplemented target map return value: %d", r);
1100 BUG(); 1107 BUG();
1101 } 1108 }
@@ -1360,7 +1367,7 @@ static void __split_and_process_bio(struct mapped_device *md,
1360 ci.map = map; 1367 ci.map = map;
1361 ci.md = md; 1368 ci.md = md;
1362 ci.io = alloc_io(md); 1369 ci.io = alloc_io(md);
1363 ci.io->error = 0; 1370 ci.io->status = 0;
1364 atomic_set(&ci.io->io_count, 1); 1371 atomic_set(&ci.io->io_count, 1);
1365 ci.io->bio = bio; 1372 ci.io->bio = bio;
1366 ci.io->md = md; 1373 ci.io->md = md;
@@ -1527,7 +1534,6 @@ void dm_init_normal_md_queue(struct mapped_device *md)
1527 * Initialize aspects of queue that aren't relevant for blk-mq 1534 * Initialize aspects of queue that aren't relevant for blk-mq
1528 */ 1535 */
1529 md->queue->backing_dev_info->congested_fn = dm_any_congested; 1536 md->queue->backing_dev_info->congested_fn = dm_any_congested;
1530 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1531} 1537}
1532 1538
1533static void cleanup_mapped_device(struct mapped_device *md) 1539static void cleanup_mapped_device(struct mapped_device *md)
@@ -2654,7 +2660,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
2654 BUG(); 2660 BUG();
2655 } 2661 }
2656 2662
2657 pools->bs = bioset_create_nobvec(pool_size, front_pad); 2663 pools->bs = bioset_create(pool_size, front_pad, BIOSET_NEED_RESCUER);
2658 if (!pools->bs) 2664 if (!pools->bs)
2659 goto out; 2665 goto out;
2660 2666
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 84e76ebac4d4..31bcbfb09fef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -185,7 +185,7 @@ static int start_readonly;
185static bool create_on_open = true; 185static bool create_on_open = true;
186 186
187/* bio_clone_mddev 187/* bio_clone_mddev
188 * like bio_clone, but with a local bio set 188 * like bio_clone_bioset, but with a local bio set
189 */ 189 */
190 190
191struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, 191struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
@@ -265,7 +265,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
265 unsigned int sectors; 265 unsigned int sectors;
266 int cpu; 266 int cpu;
267 267
268 blk_queue_split(q, &bio, q->bio_split); 268 blk_queue_split(q, &bio);
269 269
270 if (mddev == NULL || mddev->pers == NULL) { 270 if (mddev == NULL || mddev->pers == NULL) {
271 bio_io_error(bio); 271 bio_io_error(bio);
@@ -273,7 +273,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
273 } 273 }
274 if (mddev->ro == 1 && unlikely(rw == WRITE)) { 274 if (mddev->ro == 1 && unlikely(rw == WRITE)) {
275 if (bio_sectors(bio) != 0) 275 if (bio_sectors(bio) != 0)
276 bio->bi_error = -EROFS; 276 bio->bi_status = BLK_STS_IOERR;
277 bio_endio(bio); 277 bio_endio(bio);
278 return BLK_QC_T_NONE; 278 return BLK_QC_T_NONE;
279 } 279 }
@@ -719,8 +719,8 @@ static void super_written(struct bio *bio)
719 struct md_rdev *rdev = bio->bi_private; 719 struct md_rdev *rdev = bio->bi_private;
720 struct mddev *mddev = rdev->mddev; 720 struct mddev *mddev = rdev->mddev;
721 721
722 if (bio->bi_error) { 722 if (bio->bi_status) {
723 pr_err("md: super_written gets error=%d\n", bio->bi_error); 723 pr_err("md: super_written gets error=%d\n", bio->bi_status);
724 md_error(mddev, rdev); 724 md_error(mddev, rdev);
725 if (!test_bit(Faulty, &rdev->flags) 725 if (!test_bit(Faulty, &rdev->flags)
726 && (bio->bi_opf & MD_FAILFAST)) { 726 && (bio->bi_opf & MD_FAILFAST)) {
@@ -801,7 +801,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
801 801
802 submit_bio_wait(bio); 802 submit_bio_wait(bio);
803 803
804 ret = !bio->bi_error; 804 ret = !bio->bi_status;
805 bio_put(bio); 805 bio_put(bio);
806 return ret; 806 return ret;
807} 807}
@@ -5428,7 +5428,7 @@ int md_run(struct mddev *mddev)
5428 } 5428 }
5429 5429
5430 if (mddev->bio_set == NULL) { 5430 if (mddev->bio_set == NULL) {
5431 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0); 5431 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
5432 if (!mddev->bio_set) 5432 if (!mddev->bio_set)
5433 return -ENOMEM; 5433 return -ENOMEM;
5434 } 5434 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index e95d521d93e9..68d036e64041 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -73,12 +73,12 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
73 * operation and are ready to return a success/failure code to the buffer 73 * operation and are ready to return a success/failure code to the buffer
74 * cache layer. 74 * cache layer.
75 */ 75 */
76static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) 76static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
77{ 77{
78 struct bio *bio = mp_bh->master_bio; 78 struct bio *bio = mp_bh->master_bio;
79 struct mpconf *conf = mp_bh->mddev->private; 79 struct mpconf *conf = mp_bh->mddev->private;
80 80
81 bio->bi_error = err; 81 bio->bi_status = status;
82 bio_endio(bio); 82 bio_endio(bio);
83 mempool_free(mp_bh, conf->pool); 83 mempool_free(mp_bh, conf->pool);
84} 84}
@@ -89,7 +89,7 @@ static void multipath_end_request(struct bio *bio)
89 struct mpconf *conf = mp_bh->mddev->private; 89 struct mpconf *conf = mp_bh->mddev->private;
90 struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; 90 struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
91 91
92 if (!bio->bi_error) 92 if (!bio->bi_status)
93 multipath_end_bh_io(mp_bh, 0); 93 multipath_end_bh_io(mp_bh, 0);
94 else if (!(bio->bi_opf & REQ_RAHEAD)) { 94 else if (!(bio->bi_opf & REQ_RAHEAD)) {
95 /* 95 /*
@@ -102,7 +102,7 @@ static void multipath_end_request(struct bio *bio)
102 (unsigned long long)bio->bi_iter.bi_sector); 102 (unsigned long long)bio->bi_iter.bi_sector);
103 multipath_reschedule_retry(mp_bh); 103 multipath_reschedule_retry(mp_bh);
104 } else 104 } else
105 multipath_end_bh_io(mp_bh, bio->bi_error); 105 multipath_end_bh_io(mp_bh, bio->bi_status);
106 rdev_dec_pending(rdev, conf->mddev); 106 rdev_dec_pending(rdev, conf->mddev);
107} 107}
108 108
@@ -347,7 +347,7 @@ static void multipathd(struct md_thread *thread)
347 pr_err("multipath: %s: unrecoverable IO read error for block %llu\n", 347 pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
348 bdevname(bio->bi_bdev,b), 348 bdevname(bio->bi_bdev,b),
349 (unsigned long long)bio->bi_iter.bi_sector); 349 (unsigned long long)bio->bi_iter.bi_sector);
350 multipath_end_bh_io(mp_bh, -EIO); 350 multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
351 } else { 351 } else {
352 pr_err("multipath: %s: redirecting sector %llu to another IO path\n", 352 pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
353 bdevname(bio->bi_bdev,b), 353 bdevname(bio->bi_bdev,b),
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e1a7e3d4c5e4..98ca2c1d3226 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -277,7 +277,7 @@ static void call_bio_endio(struct r1bio *r1_bio)
277 struct r1conf *conf = r1_bio->mddev->private; 277 struct r1conf *conf = r1_bio->mddev->private;
278 278
279 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) 279 if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
280 bio->bi_error = -EIO; 280 bio->bi_status = BLK_STS_IOERR;
281 281
282 bio_endio(bio); 282 bio_endio(bio);
283 /* 283 /*
@@ -335,7 +335,7 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
335 335
336static void raid1_end_read_request(struct bio *bio) 336static void raid1_end_read_request(struct bio *bio)
337{ 337{
338 int uptodate = !bio->bi_error; 338 int uptodate = !bio->bi_status;
339 struct r1bio *r1_bio = bio->bi_private; 339 struct r1bio *r1_bio = bio->bi_private;
340 struct r1conf *conf = r1_bio->mddev->private; 340 struct r1conf *conf = r1_bio->mddev->private;
341 struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev; 341 struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
@@ -426,12 +426,12 @@ static void raid1_end_write_request(struct bio *bio)
426 struct md_rdev *rdev = conf->mirrors[mirror].rdev; 426 struct md_rdev *rdev = conf->mirrors[mirror].rdev;
427 bool discard_error; 427 bool discard_error;
428 428
429 discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD; 429 discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
430 430
431 /* 431 /*
432 * 'one mirror IO has finished' event handler: 432 * 'one mirror IO has finished' event handler:
433 */ 433 */
434 if (bio->bi_error && !discard_error) { 434 if (bio->bi_status && !discard_error) {
435 set_bit(WriteErrorSeen, &rdev->flags); 435 set_bit(WriteErrorSeen, &rdev->flags);
436 if (!test_and_set_bit(WantReplacement, &rdev->flags)) 436 if (!test_and_set_bit(WantReplacement, &rdev->flags))
437 set_bit(MD_RECOVERY_NEEDED, & 437 set_bit(MD_RECOVERY_NEEDED, &
@@ -802,7 +802,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
802 bio->bi_next = NULL; 802 bio->bi_next = NULL;
803 bio->bi_bdev = rdev->bdev; 803 bio->bi_bdev = rdev->bdev;
804 if (test_bit(Faulty, &rdev->flags)) { 804 if (test_bit(Faulty, &rdev->flags)) {
805 bio->bi_error = -EIO; 805 bio->bi_status = BLK_STS_IOERR;
806 bio_endio(bio); 806 bio_endio(bio);
807 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 807 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
808 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 808 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1856,7 +1856,7 @@ static void end_sync_read(struct bio *bio)
1856 * or re-read if the read failed. 1856 * or re-read if the read failed.
1857 * We don't do much here, just schedule handling by raid1d 1857 * We don't do much here, just schedule handling by raid1d
1858 */ 1858 */
1859 if (!bio->bi_error) 1859 if (!bio->bi_status)
1860 set_bit(R1BIO_Uptodate, &r1_bio->state); 1860 set_bit(R1BIO_Uptodate, &r1_bio->state);
1861 1861
1862 if (atomic_dec_and_test(&r1_bio->remaining)) 1862 if (atomic_dec_and_test(&r1_bio->remaining))
@@ -1865,7 +1865,7 @@ static void end_sync_read(struct bio *bio)
1865 1865
1866static void end_sync_write(struct bio *bio) 1866static void end_sync_write(struct bio *bio)
1867{ 1867{
1868 int uptodate = !bio->bi_error; 1868 int uptodate = !bio->bi_status;
1869 struct r1bio *r1_bio = get_resync_r1bio(bio); 1869 struct r1bio *r1_bio = get_resync_r1bio(bio);
1870 struct mddev *mddev = r1_bio->mddev; 1870 struct mddev *mddev = r1_bio->mddev;
1871 struct r1conf *conf = mddev->private; 1871 struct r1conf *conf = mddev->private;
@@ -2058,7 +2058,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
2058 idx ++; 2058 idx ++;
2059 } 2059 }
2060 set_bit(R1BIO_Uptodate, &r1_bio->state); 2060 set_bit(R1BIO_Uptodate, &r1_bio->state);
2061 bio->bi_error = 0; 2061 bio->bi_status = 0;
2062 return 1; 2062 return 1;
2063} 2063}
2064 2064
@@ -2082,16 +2082,16 @@ static void process_checks(struct r1bio *r1_bio)
2082 for (i = 0; i < conf->raid_disks * 2; i++) { 2082 for (i = 0; i < conf->raid_disks * 2; i++) {
2083 int j; 2083 int j;
2084 int size; 2084 int size;
2085 int error; 2085 blk_status_t status;
2086 struct bio_vec *bi; 2086 struct bio_vec *bi;
2087 struct bio *b = r1_bio->bios[i]; 2087 struct bio *b = r1_bio->bios[i];
2088 struct resync_pages *rp = get_resync_pages(b); 2088 struct resync_pages *rp = get_resync_pages(b);
2089 if (b->bi_end_io != end_sync_read) 2089 if (b->bi_end_io != end_sync_read)
2090 continue; 2090 continue;
2091 /* fixup the bio for reuse, but preserve errno */ 2091 /* fixup the bio for reuse, but preserve errno */
2092 error = b->bi_error; 2092 status = b->bi_status;
2093 bio_reset(b); 2093 bio_reset(b);
2094 b->bi_error = error; 2094 b->bi_status = status;
2095 b->bi_vcnt = vcnt; 2095 b->bi_vcnt = vcnt;
2096 b->bi_iter.bi_size = r1_bio->sectors << 9; 2096 b->bi_iter.bi_size = r1_bio->sectors << 9;
2097 b->bi_iter.bi_sector = r1_bio->sector + 2097 b->bi_iter.bi_sector = r1_bio->sector +
@@ -2113,7 +2113,7 @@ static void process_checks(struct r1bio *r1_bio)
2113 } 2113 }
2114 for (primary = 0; primary < conf->raid_disks * 2; primary++) 2114 for (primary = 0; primary < conf->raid_disks * 2; primary++)
2115 if (r1_bio->bios[primary]->bi_end_io == end_sync_read && 2115 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
2116 !r1_bio->bios[primary]->bi_error) { 2116 !r1_bio->bios[primary]->bi_status) {
2117 r1_bio->bios[primary]->bi_end_io = NULL; 2117 r1_bio->bios[primary]->bi_end_io = NULL;
2118 rdev_dec_pending(conf->mirrors[primary].rdev, mddev); 2118 rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
2119 break; 2119 break;
@@ -2123,7 +2123,7 @@ static void process_checks(struct r1bio *r1_bio)
2123 int j; 2123 int j;
2124 struct bio *pbio = r1_bio->bios[primary]; 2124 struct bio *pbio = r1_bio->bios[primary];
2125 struct bio *sbio = r1_bio->bios[i]; 2125 struct bio *sbio = r1_bio->bios[i];
2126 int error = sbio->bi_error; 2126 blk_status_t status = sbio->bi_status;
2127 struct page **ppages = get_resync_pages(pbio)->pages; 2127 struct page **ppages = get_resync_pages(pbio)->pages;
2128 struct page **spages = get_resync_pages(sbio)->pages; 2128 struct page **spages = get_resync_pages(sbio)->pages;
2129 struct bio_vec *bi; 2129 struct bio_vec *bi;
@@ -2132,12 +2132,12 @@ static void process_checks(struct r1bio *r1_bio)
2132 if (sbio->bi_end_io != end_sync_read) 2132 if (sbio->bi_end_io != end_sync_read)
2133 continue; 2133 continue;
2134 /* Now we can 'fixup' the error value */ 2134 /* Now we can 'fixup' the error value */
2135 sbio->bi_error = 0; 2135 sbio->bi_status = 0;
2136 2136
2137 bio_for_each_segment_all(bi, sbio, j) 2137 bio_for_each_segment_all(bi, sbio, j)
2138 page_len[j] = bi->bv_len; 2138 page_len[j] = bi->bv_len;
2139 2139
2140 if (!error) { 2140 if (!status) {
2141 for (j = vcnt; j-- ; ) { 2141 for (j = vcnt; j-- ; ) {
2142 if (memcmp(page_address(ppages[j]), 2142 if (memcmp(page_address(ppages[j]),
2143 page_address(spages[j]), 2143 page_address(spages[j]),
@@ -2149,7 +2149,7 @@ static void process_checks(struct r1bio *r1_bio)
2149 if (j >= 0) 2149 if (j >= 0)
2150 atomic64_add(r1_bio->sectors, &mddev->resync_mismatches); 2150 atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
2151 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) 2151 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
2152 && !error)) { 2152 && !status)) {
2153 /* No need to write to this device. */ 2153 /* No need to write to this device. */
2154 sbio->bi_end_io = NULL; 2154 sbio->bi_end_io = NULL;
2155 rdev_dec_pending(conf->mirrors[i].rdev, mddev); 2155 rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@ -2400,11 +2400,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
2400 struct bio *bio = r1_bio->bios[m]; 2400 struct bio *bio = r1_bio->bios[m];
2401 if (bio->bi_end_io == NULL) 2401 if (bio->bi_end_io == NULL)
2402 continue; 2402 continue;
2403 if (!bio->bi_error && 2403 if (!bio->bi_status &&
2404 test_bit(R1BIO_MadeGood, &r1_bio->state)) { 2404 test_bit(R1BIO_MadeGood, &r1_bio->state)) {
2405 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); 2405 rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
2406 } 2406 }
2407 if (bio->bi_error && 2407 if (bio->bi_status &&
2408 test_bit(R1BIO_WriteError, &r1_bio->state)) { 2408 test_bit(R1BIO_WriteError, &r1_bio->state)) {
2409 if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) 2409 if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
2410 md_error(conf->mddev, rdev); 2410 md_error(conf->mddev, rdev);
@@ -2955,7 +2955,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2955 if (!conf->r1bio_pool) 2955 if (!conf->r1bio_pool)
2956 goto abort; 2956 goto abort;
2957 2957
2958 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); 2958 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
2959 if (!conf->bio_split) 2959 if (!conf->bio_split)
2960 goto abort; 2960 goto abort;
2961 2961
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 797ed60abd5e..57a250fdbbcc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -336,7 +336,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio)
336 struct r10conf *conf = r10_bio->mddev->private; 336 struct r10conf *conf = r10_bio->mddev->private;
337 337
338 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) 338 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
339 bio->bi_error = -EIO; 339 bio->bi_status = BLK_STS_IOERR;
340 340
341 bio_endio(bio); 341 bio_endio(bio);
342 /* 342 /*
@@ -389,7 +389,7 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
389 389
390static void raid10_end_read_request(struct bio *bio) 390static void raid10_end_read_request(struct bio *bio)
391{ 391{
392 int uptodate = !bio->bi_error; 392 int uptodate = !bio->bi_status;
393 struct r10bio *r10_bio = bio->bi_private; 393 struct r10bio *r10_bio = bio->bi_private;
394 int slot, dev; 394 int slot, dev;
395 struct md_rdev *rdev; 395 struct md_rdev *rdev;
@@ -477,7 +477,7 @@ static void raid10_end_write_request(struct bio *bio)
477 struct bio *to_put = NULL; 477 struct bio *to_put = NULL;
478 bool discard_error; 478 bool discard_error;
479 479
480 discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD; 480 discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
481 481
482 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 482 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
483 483
@@ -491,7 +491,7 @@ static void raid10_end_write_request(struct bio *bio)
491 /* 491 /*
492 * this branch is our 'one mirror IO has finished' event handler: 492 * this branch is our 'one mirror IO has finished' event handler:
493 */ 493 */
494 if (bio->bi_error && !discard_error) { 494 if (bio->bi_status && !discard_error) {
495 if (repl) 495 if (repl)
496 /* Never record new bad blocks to replacement, 496 /* Never record new bad blocks to replacement,
497 * just fail it. 497 * just fail it.
@@ -913,7 +913,7 @@ static void flush_pending_writes(struct r10conf *conf)
913 bio->bi_next = NULL; 913 bio->bi_next = NULL;
914 bio->bi_bdev = rdev->bdev; 914 bio->bi_bdev = rdev->bdev;
915 if (test_bit(Faulty, &rdev->flags)) { 915 if (test_bit(Faulty, &rdev->flags)) {
916 bio->bi_error = -EIO; 916 bio->bi_status = BLK_STS_IOERR;
917 bio_endio(bio); 917 bio_endio(bio);
918 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 918 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
919 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 919 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1098,7 +1098,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1098 bio->bi_next = NULL; 1098 bio->bi_next = NULL;
1099 bio->bi_bdev = rdev->bdev; 1099 bio->bi_bdev = rdev->bdev;
1100 if (test_bit(Faulty, &rdev->flags)) { 1100 if (test_bit(Faulty, &rdev->flags)) {
1101 bio->bi_error = -EIO; 1101 bio->bi_status = BLK_STS_IOERR;
1102 bio_endio(bio); 1102 bio_endio(bio);
1103 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 1103 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
1104 !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) 1104 !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
@@ -1888,7 +1888,7 @@ static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
1888{ 1888{
1889 struct r10conf *conf = r10_bio->mddev->private; 1889 struct r10conf *conf = r10_bio->mddev->private;
1890 1890
1891 if (!bio->bi_error) 1891 if (!bio->bi_status)
1892 set_bit(R10BIO_Uptodate, &r10_bio->state); 1892 set_bit(R10BIO_Uptodate, &r10_bio->state);
1893 else 1893 else
1894 /* The write handler will notice the lack of 1894 /* The write handler will notice the lack of
@@ -1972,7 +1972,7 @@ static void end_sync_write(struct bio *bio)
1972 else 1972 else
1973 rdev = conf->mirrors[d].rdev; 1973 rdev = conf->mirrors[d].rdev;
1974 1974
1975 if (bio->bi_error) { 1975 if (bio->bi_status) {
1976 if (repl) 1976 if (repl)
1977 md_error(mddev, rdev); 1977 md_error(mddev, rdev);
1978 else { 1978 else {
@@ -2021,7 +2021,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2021 2021
2022 /* find the first device with a block */ 2022 /* find the first device with a block */
2023 for (i=0; i<conf->copies; i++) 2023 for (i=0; i<conf->copies; i++)
2024 if (!r10_bio->devs[i].bio->bi_error) 2024 if (!r10_bio->devs[i].bio->bi_status)
2025 break; 2025 break;
2026 2026
2027 if (i == conf->copies) 2027 if (i == conf->copies)
@@ -2050,7 +2050,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2050 tpages = get_resync_pages(tbio)->pages; 2050 tpages = get_resync_pages(tbio)->pages;
2051 d = r10_bio->devs[i].devnum; 2051 d = r10_bio->devs[i].devnum;
2052 rdev = conf->mirrors[d].rdev; 2052 rdev = conf->mirrors[d].rdev;
2053 if (!r10_bio->devs[i].bio->bi_error) { 2053 if (!r10_bio->devs[i].bio->bi_status) {
2054 /* We know that the bi_io_vec layout is the same for 2054 /* We know that the bi_io_vec layout is the same for
2055 * both 'first' and 'i', so we just compare them. 2055 * both 'first' and 'i', so we just compare them.
2056 * All vec entries are PAGE_SIZE; 2056 * All vec entries are PAGE_SIZE;
@@ -2633,7 +2633,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2633 rdev = conf->mirrors[dev].rdev; 2633 rdev = conf->mirrors[dev].rdev;
2634 if (r10_bio->devs[m].bio == NULL) 2634 if (r10_bio->devs[m].bio == NULL)
2635 continue; 2635 continue;
2636 if (!r10_bio->devs[m].bio->bi_error) { 2636 if (!r10_bio->devs[m].bio->bi_status) {
2637 rdev_clear_badblocks( 2637 rdev_clear_badblocks(
2638 rdev, 2638 rdev,
2639 r10_bio->devs[m].addr, 2639 r10_bio->devs[m].addr,
@@ -2649,7 +2649,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2649 if (r10_bio->devs[m].repl_bio == NULL) 2649 if (r10_bio->devs[m].repl_bio == NULL)
2650 continue; 2650 continue;
2651 2651
2652 if (!r10_bio->devs[m].repl_bio->bi_error) { 2652 if (!r10_bio->devs[m].repl_bio->bi_status) {
2653 rdev_clear_badblocks( 2653 rdev_clear_badblocks(
2654 rdev, 2654 rdev,
2655 r10_bio->devs[m].addr, 2655 r10_bio->devs[m].addr,
@@ -2675,7 +2675,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2675 r10_bio->devs[m].addr, 2675 r10_bio->devs[m].addr,
2676 r10_bio->sectors, 0); 2676 r10_bio->sectors, 0);
2677 rdev_dec_pending(rdev, conf->mddev); 2677 rdev_dec_pending(rdev, conf->mddev);
2678 } else if (bio != NULL && bio->bi_error) { 2678 } else if (bio != NULL && bio->bi_status) {
2679 fail = true; 2679 fail = true;
2680 if (!narrow_write_error(r10_bio, m)) { 2680 if (!narrow_write_error(r10_bio, m)) {
2681 md_error(conf->mddev, rdev); 2681 md_error(conf->mddev, rdev);
@@ -3267,7 +3267,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3267 r10_bio->devs[i].repl_bio->bi_end_io = NULL; 3267 r10_bio->devs[i].repl_bio->bi_end_io = NULL;
3268 3268
3269 bio = r10_bio->devs[i].bio; 3269 bio = r10_bio->devs[i].bio;
3270 bio->bi_error = -EIO; 3270 bio->bi_status = BLK_STS_IOERR;
3271 rcu_read_lock(); 3271 rcu_read_lock();
3272 rdev = rcu_dereference(conf->mirrors[d].rdev); 3272 rdev = rcu_dereference(conf->mirrors[d].rdev);
3273 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) { 3273 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
@@ -3309,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3309 3309
3310 /* Need to set up for writing to the replacement */ 3310 /* Need to set up for writing to the replacement */
3311 bio = r10_bio->devs[i].repl_bio; 3311 bio = r10_bio->devs[i].repl_bio;
3312 bio->bi_error = -EIO; 3312 bio->bi_status = BLK_STS_IOERR;
3313 3313
3314 sector = r10_bio->devs[i].addr; 3314 sector = r10_bio->devs[i].addr;
3315 bio->bi_next = biolist; 3315 bio->bi_next = biolist;
@@ -3375,7 +3375,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3375 3375
3376 if (bio->bi_end_io == end_sync_read) { 3376 if (bio->bi_end_io == end_sync_read) {
3377 md_sync_acct(bio->bi_bdev, nr_sectors); 3377 md_sync_acct(bio->bi_bdev, nr_sectors);
3378 bio->bi_error = 0; 3378 bio->bi_status = 0;
3379 generic_make_request(bio); 3379 generic_make_request(bio);
3380 } 3380 }
3381 } 3381 }
@@ -3552,7 +3552,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
3552 if (!conf->r10bio_pool) 3552 if (!conf->r10bio_pool)
3553 goto out; 3553 goto out;
3554 3554
3555 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); 3555 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
3556 if (!conf->bio_split) 3556 if (!conf->bio_split)
3557 goto out; 3557 goto out;
3558 3558
@@ -4397,7 +4397,7 @@ read_more:
4397 read_bio->bi_end_io = end_reshape_read; 4397 read_bio->bi_end_io = end_reshape_read;
4398 bio_set_op_attrs(read_bio, REQ_OP_READ, 0); 4398 bio_set_op_attrs(read_bio, REQ_OP_READ, 0);
4399 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); 4399 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
4400 read_bio->bi_error = 0; 4400 read_bio->bi_status = 0;
4401 read_bio->bi_vcnt = 0; 4401 read_bio->bi_vcnt = 0;
4402 read_bio->bi_iter.bi_size = 0; 4402 read_bio->bi_iter.bi_size = 0;
4403 r10_bio->master_bio = read_bio; 4403 r10_bio->master_bio = read_bio;
@@ -4641,7 +4641,7 @@ static void end_reshape_write(struct bio *bio)
4641 rdev = conf->mirrors[d].rdev; 4641 rdev = conf->mirrors[d].rdev;
4642 } 4642 }
4643 4643
4644 if (bio->bi_error) { 4644 if (bio->bi_status) {
4645 /* FIXME should record badblock */ 4645 /* FIXME should record badblock */
4646 md_error(mddev, rdev); 4646 md_error(mddev, rdev);
4647 } 4647 }
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 0a7af8b0a80a..bfa1e907c472 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -572,7 +572,7 @@ static void r5l_log_endio(struct bio *bio)
572 struct r5l_log *log = io->log; 572 struct r5l_log *log = io->log;
573 unsigned long flags; 573 unsigned long flags;
574 574
575 if (bio->bi_error) 575 if (bio->bi_status)
576 md_error(log->rdev->mddev, log->rdev); 576 md_error(log->rdev->mddev, log->rdev);
577 577
578 bio_put(bio); 578 bio_put(bio);
@@ -1247,7 +1247,7 @@ static void r5l_log_flush_endio(struct bio *bio)
1247 unsigned long flags; 1247 unsigned long flags;
1248 struct r5l_io_unit *io; 1248 struct r5l_io_unit *io;
1249 1249
1250 if (bio->bi_error) 1250 if (bio->bi_status)
1251 md_error(log->rdev->mddev, log->rdev); 1251 md_error(log->rdev->mddev, log->rdev);
1252 1252
1253 spin_lock_irqsave(&log->io_list_lock, flags); 1253 spin_lock_irqsave(&log->io_list_lock, flags);
@@ -3063,7 +3063,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
3063 if (!log->io_pool) 3063 if (!log->io_pool)
3064 goto io_pool; 3064 goto io_pool;
3065 3065
3066 log->bs = bioset_create(R5L_POOL_SIZE, 0); 3066 log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
3067 if (!log->bs) 3067 if (!log->bs)
3068 goto io_bs; 3068 goto io_bs;
3069 3069
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index ccce92e68d7f..77cce3573aa8 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -397,7 +397,7 @@ static void ppl_log_endio(struct bio *bio)
397 397
398 pr_debug("%s: seq: %llu\n", __func__, io->seq); 398 pr_debug("%s: seq: %llu\n", __func__, io->seq);
399 399
400 if (bio->bi_error) 400 if (bio->bi_status)
401 md_error(ppl_conf->mddev, log->rdev); 401 md_error(ppl_conf->mddev, log->rdev);
402 402
403 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { 403 list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) {
@@ -1150,7 +1150,7 @@ int ppl_init_log(struct r5conf *conf)
1150 goto err; 1150 goto err;
1151 } 1151 }
1152 1152
1153 ppl_conf->bs = bioset_create(conf->raid_disks, 0); 1153 ppl_conf->bs = bioset_create(conf->raid_disks, 0, 0);
1154 if (!ppl_conf->bs) { 1154 if (!ppl_conf->bs) {
1155 ret = -ENOMEM; 1155 ret = -ENOMEM;
1156 goto err; 1156 goto err;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ec0f951ae19f..62c965be97e1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2476,7 +2476,7 @@ static void raid5_end_read_request(struct bio * bi)
2476 2476
2477 pr_debug("end_read_request %llu/%d, count: %d, error %d.\n", 2477 pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
2478 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 2478 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
2479 bi->bi_error); 2479 bi->bi_status);
2480 if (i == disks) { 2480 if (i == disks) {
2481 bio_reset(bi); 2481 bio_reset(bi);
2482 BUG(); 2482 BUG();
@@ -2496,7 +2496,7 @@ static void raid5_end_read_request(struct bio * bi)
2496 s = sh->sector + rdev->new_data_offset; 2496 s = sh->sector + rdev->new_data_offset;
2497 else 2497 else
2498 s = sh->sector + rdev->data_offset; 2498 s = sh->sector + rdev->data_offset;
2499 if (!bi->bi_error) { 2499 if (!bi->bi_status) {
2500 set_bit(R5_UPTODATE, &sh->dev[i].flags); 2500 set_bit(R5_UPTODATE, &sh->dev[i].flags);
2501 if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 2501 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
2502 /* Note that this cannot happen on a 2502 /* Note that this cannot happen on a
@@ -2613,7 +2613,7 @@ static void raid5_end_write_request(struct bio *bi)
2613 } 2613 }
2614 pr_debug("end_write_request %llu/%d, count %d, error: %d.\n", 2614 pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
2615 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 2615 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
2616 bi->bi_error); 2616 bi->bi_status);
2617 if (i == disks) { 2617 if (i == disks) {
2618 bio_reset(bi); 2618 bio_reset(bi);
2619 BUG(); 2619 BUG();
@@ -2621,14 +2621,14 @@ static void raid5_end_write_request(struct bio *bi)
2621 } 2621 }
2622 2622
2623 if (replacement) { 2623 if (replacement) {
2624 if (bi->bi_error) 2624 if (bi->bi_status)
2625 md_error(conf->mddev, rdev); 2625 md_error(conf->mddev, rdev);
2626 else if (is_badblock(rdev, sh->sector, 2626 else if (is_badblock(rdev, sh->sector,
2627 STRIPE_SECTORS, 2627 STRIPE_SECTORS,
2628 &first_bad, &bad_sectors)) 2628 &first_bad, &bad_sectors))
2629 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); 2629 set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
2630 } else { 2630 } else {
2631 if (bi->bi_error) { 2631 if (bi->bi_status) {
2632 set_bit(STRIPE_DEGRADED, &sh->state); 2632 set_bit(STRIPE_DEGRADED, &sh->state);
2633 set_bit(WriteErrorSeen, &rdev->flags); 2633 set_bit(WriteErrorSeen, &rdev->flags);
2634 set_bit(R5_WriteError, &sh->dev[i].flags); 2634 set_bit(R5_WriteError, &sh->dev[i].flags);
@@ -2649,7 +2649,7 @@ static void raid5_end_write_request(struct bio *bi)
2649 } 2649 }
2650 rdev_dec_pending(rdev, conf->mddev); 2650 rdev_dec_pending(rdev, conf->mddev);
2651 2651
2652 if (sh->batch_head && bi->bi_error && !replacement) 2652 if (sh->batch_head && bi->bi_status && !replacement)
2653 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); 2653 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
2654 2654
2655 bio_reset(bi); 2655 bio_reset(bi);
@@ -3381,7 +3381,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3381 sh->dev[i].sector + STRIPE_SECTORS) { 3381 sh->dev[i].sector + STRIPE_SECTORS) {
3382 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 3382 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
3383 3383
3384 bi->bi_error = -EIO; 3384 bi->bi_status = BLK_STS_IOERR;
3385 md_write_end(conf->mddev); 3385 md_write_end(conf->mddev);
3386 bio_endio(bi); 3386 bio_endio(bi);
3387 bi = nextbi; 3387 bi = nextbi;
@@ -3403,7 +3403,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3403 sh->dev[i].sector + STRIPE_SECTORS) { 3403 sh->dev[i].sector + STRIPE_SECTORS) {
3404 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 3404 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
3405 3405
3406 bi->bi_error = -EIO; 3406 bi->bi_status = BLK_STS_IOERR;
3407 md_write_end(conf->mddev); 3407 md_write_end(conf->mddev);
3408 bio_endio(bi); 3408 bio_endio(bi);
3409 bi = bi2; 3409 bi = bi2;
@@ -3429,7 +3429,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
3429 struct bio *nextbi = 3429 struct bio *nextbi =
3430 r5_next_bio(bi, sh->dev[i].sector); 3430 r5_next_bio(bi, sh->dev[i].sector);
3431 3431
3432 bi->bi_error = -EIO; 3432 bi->bi_status = BLK_STS_IOERR;
3433 bio_endio(bi); 3433 bio_endio(bi);
3434 bi = nextbi; 3434 bi = nextbi;
3435 } 3435 }
@@ -5154,7 +5154,7 @@ static void raid5_align_endio(struct bio *bi)
5154 struct mddev *mddev; 5154 struct mddev *mddev;
5155 struct r5conf *conf; 5155 struct r5conf *conf;
5156 struct md_rdev *rdev; 5156 struct md_rdev *rdev;
5157 int error = bi->bi_error; 5157 blk_status_t error = bi->bi_status;
5158 5158
5159 bio_put(bi); 5159 bio_put(bi);
5160 5160
@@ -5731,7 +5731,7 @@ static void raid5_make_request(struct mddev *mddev, struct bio * bi)
5731 release_stripe_plug(mddev, sh); 5731 release_stripe_plug(mddev, sh);
5732 } else { 5732 } else {
5733 /* cannot get stripe for read-ahead, just give-up */ 5733 /* cannot get stripe for read-ahead, just give-up */
5734 bi->bi_error = -EIO; 5734 bi->bi_status = BLK_STS_IOERR;
5735 break; 5735 break;
5736 } 5736 }
5737 } 5737 }
@@ -6943,7 +6943,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6943 goto abort; 6943 goto abort;
6944 } 6944 }
6945 6945
6946 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0); 6946 conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
6947 if (!conf->bio_split) 6947 if (!conf->bio_split)
6948 goto abort; 6948 goto abort;
6949 conf->mddev = mddev; 6949 conf->mddev = mddev;
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 99e651c27fb7..22de7f5ed032 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -1921,12 +1921,13 @@ static void msb_io_work(struct work_struct *work)
1921 spin_lock_irqsave(&msb->q_lock, flags); 1921 spin_lock_irqsave(&msb->q_lock, flags);
1922 1922
1923 if (len) 1923 if (len)
1924 if (!__blk_end_request(msb->req, 0, len)) 1924 if (!__blk_end_request(msb->req, BLK_STS_OK, len))
1925 msb->req = NULL; 1925 msb->req = NULL;
1926 1926
1927 if (error && msb->req) { 1927 if (error && msb->req) {
1928 blk_status_t ret = errno_to_blk_status(error);
1928 dbg_verbose("IO: ending one sector of the request with error"); 1929 dbg_verbose("IO: ending one sector of the request with error");
1929 if (!__blk_end_request(msb->req, error, msb->page_size)) 1930 if (!__blk_end_request(msb->req, ret, msb->page_size))
1930 msb->req = NULL; 1931 msb->req = NULL;
1931 } 1932 }
1932 1933
@@ -2014,7 +2015,7 @@ static void msb_submit_req(struct request_queue *q)
2014 WARN_ON(!msb->io_queue_stopped); 2015 WARN_ON(!msb->io_queue_stopped);
2015 2016
2016 while ((req = blk_fetch_request(q)) != NULL) 2017 while ((req = blk_fetch_request(q)) != NULL)
2017 __blk_end_request_all(req, -ENODEV); 2018 __blk_end_request_all(req, BLK_STS_IOERR);
2018 return; 2019 return;
2019 } 2020 }
2020 2021
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index c00d8a266878..8897962781bb 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -709,7 +709,8 @@ try_again:
709 msb->req_sg); 709 msb->req_sg);
710 710
711 if (!msb->seg_count) { 711 if (!msb->seg_count) {
712 chunk = __blk_end_request_cur(msb->block_req, -ENOMEM); 712 chunk = __blk_end_request_cur(msb->block_req,
713 BLK_STS_RESOURCE);
713 continue; 714 continue;
714 } 715 }
715 716
@@ -776,7 +777,8 @@ static int mspro_block_complete_req(struct memstick_dev *card, int error)
776 if (error && !t_len) 777 if (error && !t_len)
777 t_len = blk_rq_cur_bytes(msb->block_req); 778 t_len = blk_rq_cur_bytes(msb->block_req);
778 779
779 chunk = __blk_end_request(msb->block_req, error, t_len); 780 chunk = __blk_end_request(msb->block_req,
781 errno_to_blk_status(error), t_len);
780 782
781 error = mspro_block_issue_req(card, chunk); 783 error = mspro_block_issue_req(card, chunk);
782 784
@@ -838,7 +840,7 @@ static void mspro_block_submit_req(struct request_queue *q)
838 840
839 if (msb->eject) { 841 if (msb->eject) {
840 while ((req = blk_fetch_request(q)) != NULL) 842 while ((req = blk_fetch_request(q)) != NULL)
841 __blk_end_request_all(req, -ENODEV); 843 __blk_end_request_all(req, BLK_STS_IOERR);
842 844
843 return; 845 return;
844 } 846 }
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 8273b078686d..6ff94a948a4b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1184,9 +1184,10 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
1184 struct mmc_card *card = md->queue.card; 1184 struct mmc_card *card = md->queue.card;
1185 unsigned int from, nr, arg; 1185 unsigned int from, nr, arg;
1186 int err = 0, type = MMC_BLK_DISCARD; 1186 int err = 0, type = MMC_BLK_DISCARD;
1187 blk_status_t status = BLK_STS_OK;
1187 1188
1188 if (!mmc_can_erase(card)) { 1189 if (!mmc_can_erase(card)) {
1189 err = -EOPNOTSUPP; 1190 status = BLK_STS_NOTSUPP;
1190 goto fail; 1191 goto fail;
1191 } 1192 }
1192 1193
@@ -1212,10 +1213,12 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
1212 if (!err) 1213 if (!err)
1213 err = mmc_erase(card, from, nr, arg); 1214 err = mmc_erase(card, from, nr, arg);
1214 } while (err == -EIO && !mmc_blk_reset(md, card->host, type)); 1215 } while (err == -EIO && !mmc_blk_reset(md, card->host, type));
1215 if (!err) 1216 if (err)
1217 status = BLK_STS_IOERR;
1218 else
1216 mmc_blk_reset_success(md, type); 1219 mmc_blk_reset_success(md, type);
1217fail: 1220fail:
1218 blk_end_request(req, err, blk_rq_bytes(req)); 1221 blk_end_request(req, status, blk_rq_bytes(req));
1219} 1222}
1220 1223
1221static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq, 1224static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
@@ -1225,9 +1228,10 @@ static void mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
1225 struct mmc_card *card = md->queue.card; 1228 struct mmc_card *card = md->queue.card;
1226 unsigned int from, nr, arg; 1229 unsigned int from, nr, arg;
1227 int err = 0, type = MMC_BLK_SECDISCARD; 1230 int err = 0, type = MMC_BLK_SECDISCARD;
1231 blk_status_t status = BLK_STS_OK;
1228 1232
1229 if (!(mmc_can_secure_erase_trim(card))) { 1233 if (!(mmc_can_secure_erase_trim(card))) {
1230 err = -EOPNOTSUPP; 1234 status = BLK_STS_NOTSUPP;
1231 goto out; 1235 goto out;
1232 } 1236 }
1233 1237
@@ -1254,8 +1258,10 @@ retry:
1254 err = mmc_erase(card, from, nr, arg); 1258 err = mmc_erase(card, from, nr, arg);
1255 if (err == -EIO) 1259 if (err == -EIO)
1256 goto out_retry; 1260 goto out_retry;
1257 if (err) 1261 if (err) {
1262 status = BLK_STS_IOERR;
1258 goto out; 1263 goto out;
1264 }
1259 1265
1260 if (arg == MMC_SECURE_TRIM1_ARG) { 1266 if (arg == MMC_SECURE_TRIM1_ARG) {
1261 if (card->quirks & MMC_QUIRK_INAND_CMD38) { 1267 if (card->quirks & MMC_QUIRK_INAND_CMD38) {
@@ -1270,8 +1276,10 @@ retry:
1270 err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG); 1276 err = mmc_erase(card, from, nr, MMC_SECURE_TRIM2_ARG);
1271 if (err == -EIO) 1277 if (err == -EIO)
1272 goto out_retry; 1278 goto out_retry;
1273 if (err) 1279 if (err) {
1280 status = BLK_STS_IOERR;
1274 goto out; 1281 goto out;
1282 }
1275 } 1283 }
1276 1284
1277out_retry: 1285out_retry:
@@ -1280,7 +1288,7 @@ out_retry:
1280 if (!err) 1288 if (!err)
1281 mmc_blk_reset_success(md, type); 1289 mmc_blk_reset_success(md, type);
1282out: 1290out:
1283 blk_end_request(req, err, blk_rq_bytes(req)); 1291 blk_end_request(req, status, blk_rq_bytes(req));
1284} 1292}
1285 1293
1286static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req) 1294static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
@@ -1290,10 +1298,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
1290 int ret = 0; 1298 int ret = 0;
1291 1299
1292 ret = mmc_flush_cache(card); 1300 ret = mmc_flush_cache(card);
1293 if (ret) 1301 blk_end_request_all(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
1294 ret = -EIO;
1295
1296 blk_end_request_all(req, ret);
1297} 1302}
1298 1303
1299/* 1304/*
@@ -1641,7 +1646,7 @@ static void mmc_blk_rw_cmd_abort(struct mmc_queue *mq, struct mmc_card *card,
1641{ 1646{
1642 if (mmc_card_removed(card)) 1647 if (mmc_card_removed(card))
1643 req->rq_flags |= RQF_QUIET; 1648 req->rq_flags |= RQF_QUIET;
1644 while (blk_end_request(req, -EIO, blk_rq_cur_bytes(req))); 1649 while (blk_end_request(req, BLK_STS_IOERR, blk_rq_cur_bytes(req)));
1645 mmc_queue_req_free(mq, mqrq); 1650 mmc_queue_req_free(mq, mqrq);
1646} 1651}
1647 1652
@@ -1661,7 +1666,7 @@ static void mmc_blk_rw_try_restart(struct mmc_queue *mq, struct request *req,
1661 */ 1666 */
1662 if (mmc_card_removed(mq->card)) { 1667 if (mmc_card_removed(mq->card)) {
1663 req->rq_flags |= RQF_QUIET; 1668 req->rq_flags |= RQF_QUIET;
1664 blk_end_request_all(req, -EIO); 1669 blk_end_request_all(req, BLK_STS_IOERR);
1665 mmc_queue_req_free(mq, mqrq); 1670 mmc_queue_req_free(mq, mqrq);
1666 return; 1671 return;
1667 } 1672 }
@@ -1743,7 +1748,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
1743 */ 1748 */
1744 mmc_blk_reset_success(md, type); 1749 mmc_blk_reset_success(md, type);
1745 1750
1746 req_pending = blk_end_request(old_req, 0, 1751 req_pending = blk_end_request(old_req, BLK_STS_OK,
1747 brq->data.bytes_xfered); 1752 brq->data.bytes_xfered);
1748 /* 1753 /*
1749 * If the blk_end_request function returns non-zero even 1754 * If the blk_end_request function returns non-zero even
@@ -1811,7 +1816,7 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req)
1811 * time, so we only reach here after trying to 1816 * time, so we only reach here after trying to
1812 * read a single sector. 1817 * read a single sector.
1813 */ 1818 */
1814 req_pending = blk_end_request(old_req, -EIO, 1819 req_pending = blk_end_request(old_req, BLK_STS_IOERR,
1815 brq->data.blksz); 1820 brq->data.blksz);
1816 if (!req_pending) { 1821 if (!req_pending) {
1817 mmc_queue_req_free(mq, mq_rq); 1822 mmc_queue_req_free(mq, mq_rq);
@@ -1860,7 +1865,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
1860 ret = mmc_blk_part_switch(card, md); 1865 ret = mmc_blk_part_switch(card, md);
1861 if (ret) { 1866 if (ret) {
1862 if (req) { 1867 if (req) {
1863 blk_end_request_all(req, -EIO); 1868 blk_end_request_all(req, BLK_STS_IOERR);
1864 } 1869 }
1865 goto out; 1870 goto out;
1866 } 1871 }
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 5c37b6be3e7b..b659a28c8018 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -133,7 +133,7 @@ static void mmc_request_fn(struct request_queue *q)
133 if (!mq) { 133 if (!mq) {
134 while ((req = blk_fetch_request(q)) != NULL) { 134 while ((req = blk_fetch_request(q)) != NULL) {
135 req->rq_flags |= RQF_QUIET; 135 req->rq_flags |= RQF_QUIET;
136 __blk_end_request_all(req, -EIO); 136 __blk_end_request_all(req, BLK_STS_IOERR);
137 } 137 }
138 return; 138 return;
139 } 139 }
@@ -388,7 +388,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
388 mmc_queue_setup_discard(mq->queue, card); 388 mmc_queue_setup_discard(mq->queue, card);
389 389
390 if (card->bouncesz) { 390 if (card->bouncesz) {
391 blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
392 blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512); 391 blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512);
393 blk_queue_max_segments(mq->queue, card->bouncesz / 512); 392 blk_queue_max_segments(mq->queue, card->bouncesz / 512);
394 blk_queue_max_segment_size(mq->queue, card->bouncesz); 393 blk_queue_max_segment_size(mq->queue, card->bouncesz);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6b8d5cd7dbf6..f336a9b85576 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -73,7 +73,7 @@ static void blktrans_dev_put(struct mtd_blktrans_dev *dev)
73} 73}
74 74
75 75
76static int do_blktrans_request(struct mtd_blktrans_ops *tr, 76static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr,
77 struct mtd_blktrans_dev *dev, 77 struct mtd_blktrans_dev *dev,
78 struct request *req) 78 struct request *req)
79{ 79{
@@ -84,33 +84,37 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
84 nsect = blk_rq_cur_bytes(req) >> tr->blkshift; 84 nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
85 buf = bio_data(req->bio); 85 buf = bio_data(req->bio);
86 86
87 if (req_op(req) == REQ_OP_FLUSH) 87 if (req_op(req) == REQ_OP_FLUSH) {
88 return tr->flush(dev); 88 if (tr->flush(dev))
89 return BLK_STS_IOERR;
90 return BLK_STS_OK;
91 }
89 92
90 if (blk_rq_pos(req) + blk_rq_cur_sectors(req) > 93 if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
91 get_capacity(req->rq_disk)) 94 get_capacity(req->rq_disk))
92 return -EIO; 95 return BLK_STS_IOERR;
93 96
94 switch (req_op(req)) { 97 switch (req_op(req)) {
95 case REQ_OP_DISCARD: 98 case REQ_OP_DISCARD:
96 return tr->discard(dev, block, nsect); 99 if (tr->discard(dev, block, nsect))
100 return BLK_STS_IOERR;
101 return BLK_STS_OK;
97 case REQ_OP_READ: 102 case REQ_OP_READ:
98 for (; nsect > 0; nsect--, block++, buf += tr->blksize) 103 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
99 if (tr->readsect(dev, block, buf)) 104 if (tr->readsect(dev, block, buf))
100 return -EIO; 105 return BLK_STS_IOERR;
101 rq_flush_dcache_pages(req); 106 rq_flush_dcache_pages(req);
102 return 0; 107 return BLK_STS_OK;
103 case REQ_OP_WRITE: 108 case REQ_OP_WRITE:
104 if (!tr->writesect) 109 if (!tr->writesect)
105 return -EIO; 110 return BLK_STS_IOERR;
106 111
107 rq_flush_dcache_pages(req); 112 rq_flush_dcache_pages(req);
108 for (; nsect > 0; nsect--, block++, buf += tr->blksize) 113 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
109 if (tr->writesect(dev, block, buf)) 114 if (tr->writesect(dev, block, buf))
110 return -EIO; 115 return BLK_STS_IOERR;
111 return 0;
112 default: 116 default:
113 return -EIO; 117 return BLK_STS_IOERR;
114 } 118 }
115} 119}
116 120
@@ -132,7 +136,7 @@ static void mtd_blktrans_work(struct work_struct *work)
132 spin_lock_irq(rq->queue_lock); 136 spin_lock_irq(rq->queue_lock);
133 137
134 while (1) { 138 while (1) {
135 int res; 139 blk_status_t res;
136 140
137 dev->bg_stop = false; 141 dev->bg_stop = false;
138 if (!req && !(req = blk_fetch_request(rq))) { 142 if (!req && !(req = blk_fetch_request(rq))) {
@@ -178,7 +182,7 @@ static void mtd_blktrans_request(struct request_queue *rq)
178 182
179 if (!dev) 183 if (!dev)
180 while ((req = blk_fetch_request(rq)) != NULL) 184 while ((req = blk_fetch_request(rq)) != NULL)
181 __blk_end_request_all(req, -ENODEV); 185 __blk_end_request_all(req, BLK_STS_IOERR);
182 else 186 else
183 queue_work(dev->wq, &dev->work); 187 queue_work(dev->wq, &dev->work);
184} 188}
@@ -413,6 +417,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
413 new->rq->queuedata = new; 417 new->rq->queuedata = new;
414 blk_queue_logical_block_size(new->rq, tr->blksize); 418 blk_queue_logical_block_size(new->rq, tr->blksize);
415 419
420 blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
416 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq); 421 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq);
417 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq); 422 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq);
418 423
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 5497e65439df..c3963f880448 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -313,10 +313,10 @@ static void ubiblock_do_work(struct work_struct *work)
313 ret = ubiblock_read(pdu); 313 ret = ubiblock_read(pdu);
314 rq_flush_dcache_pages(req); 314 rq_flush_dcache_pages(req);
315 315
316 blk_mq_end_request(req, ret); 316 blk_mq_end_request(req, errno_to_blk_status(ret));
317} 317}
318 318
319static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx, 319static blk_status_t ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
320 const struct blk_mq_queue_data *bd) 320 const struct blk_mq_queue_data *bd)
321{ 321{
322 struct request *req = bd->rq; 322 struct request *req = bd->rq;
@@ -327,9 +327,9 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
327 case REQ_OP_READ: 327 case REQ_OP_READ:
328 ubi_sgl_init(&pdu->usgl); 328 ubi_sgl_init(&pdu->usgl);
329 queue_work(dev->wq, &pdu->work); 329 queue_work(dev->wq, &pdu->work);
330 return BLK_MQ_RQ_QUEUE_OK; 330 return BLK_STS_OK;
331 default: 331 default:
332 return BLK_MQ_RQ_QUEUE_ERROR; 332 return BLK_STS_IOERR;
333 } 333 }
334 334
335} 335}
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 822198a75e96..f12d23c49771 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -186,7 +186,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
186 * another kernel subsystem, and we just pass it through. 186 * another kernel subsystem, and we just pass it through.
187 */ 187 */
188 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 188 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
189 bio->bi_error = -EIO; 189 bio->bi_status = BLK_STS_IOERR;
190 goto out; 190 goto out;
191 } 191 }
192 192
@@ -205,7 +205,7 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
205 "io error in %s sector %lld, len %d,\n", 205 "io error in %s sector %lld, len %d,\n",
206 (rw == READ) ? "READ" : "WRITE", 206 (rw == READ) ? "READ" : "WRITE",
207 (unsigned long long) iter.bi_sector, len); 207 (unsigned long long) iter.bi_sector, len);
208 bio->bi_error = err; 208 bio->bi_status = errno_to_blk_status(err);
209 break; 209 break;
210 } 210 }
211 } 211 }
@@ -273,7 +273,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
273 273
274 blk_queue_make_request(q, nd_blk_make_request); 274 blk_queue_make_request(q, nd_blk_make_request);
275 blk_queue_max_hw_sectors(q, UINT_MAX); 275 blk_queue_max_hw_sectors(q, UINT_MAX);
276 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
277 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); 276 blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
278 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 277 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
279 q->queuedata = nsblk; 278 q->queuedata = nsblk;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 983718b8fd9b..b6ba0618ea46 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1210,7 +1210,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
1210 * another kernel subsystem, and we just pass it through. 1210 * another kernel subsystem, and we just pass it through.
1211 */ 1211 */
1212 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { 1212 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
1213 bio->bi_error = -EIO; 1213 bio->bi_status = BLK_STS_IOERR;
1214 goto out; 1214 goto out;
1215 } 1215 }
1216 1216
@@ -1232,7 +1232,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
1232 (op_is_write(bio_op(bio))) ? "WRITE" : 1232 (op_is_write(bio_op(bio))) ? "WRITE" :
1233 "READ", 1233 "READ",
1234 (unsigned long long) iter.bi_sector, len); 1234 (unsigned long long) iter.bi_sector, len);
1235 bio->bi_error = err; 1235 bio->bi_status = errno_to_blk_status(err);
1236 break; 1236 break;
1237 } 1237 }
1238 } 1238 }
@@ -1297,7 +1297,6 @@ static int btt_blk_init(struct btt *btt)
1297 blk_queue_make_request(btt->btt_queue, btt_make_request); 1297 blk_queue_make_request(btt->btt_queue, btt_make_request);
1298 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); 1298 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
1299 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); 1299 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
1300 blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
1301 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); 1300 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
1302 btt->btt_queue->queuedata = btt; 1301 btt->btt_queue->queuedata = btt;
1303 1302
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..6b577afb1d44 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -49,19 +49,19 @@ static struct nd_region *to_region(struct pmem_device *pmem)
49 return to_nd_region(to_dev(pmem)->parent); 49 return to_nd_region(to_dev(pmem)->parent);
50} 50}
51 51
52static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, 52static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
53 unsigned int len) 53 phys_addr_t offset, unsigned int len)
54{ 54{
55 struct device *dev = to_dev(pmem); 55 struct device *dev = to_dev(pmem);
56 sector_t sector; 56 sector_t sector;
57 long cleared; 57 long cleared;
58 int rc = 0; 58 blk_status_t rc = BLK_STS_OK;
59 59
60 sector = (offset - pmem->data_offset) / 512; 60 sector = (offset - pmem->data_offset) / 512;
61 61
62 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); 62 cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
63 if (cleared < len) 63 if (cleared < len)
64 rc = -EIO; 64 rc = BLK_STS_IOERR;
65 if (cleared > 0 && cleared / 512) { 65 if (cleared > 0 && cleared / 512) {
66 cleared /= 512; 66 cleared /= 512;
67 dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, 67 dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
@@ -84,7 +84,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
84 kunmap_atomic(mem); 84 kunmap_atomic(mem);
85} 85}
86 86
87static int read_pmem(struct page *page, unsigned int off, 87static blk_status_t read_pmem(struct page *page, unsigned int off,
88 void *pmem_addr, unsigned int len) 88 void *pmem_addr, unsigned int len)
89{ 89{
90 int rc; 90 int rc;
@@ -93,15 +93,15 @@ static int read_pmem(struct page *page, unsigned int off,
93 rc = memcpy_mcsafe(mem + off, pmem_addr, len); 93 rc = memcpy_mcsafe(mem + off, pmem_addr, len);
94 kunmap_atomic(mem); 94 kunmap_atomic(mem);
95 if (rc) 95 if (rc)
96 return -EIO; 96 return BLK_STS_IOERR;
97 return 0; 97 return BLK_STS_OK;
98} 98}
99 99
100static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, 100static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
101 unsigned int len, unsigned int off, bool is_write, 101 unsigned int len, unsigned int off, bool is_write,
102 sector_t sector) 102 sector_t sector)
103{ 103{
104 int rc = 0; 104 blk_status_t rc = BLK_STS_OK;
105 bool bad_pmem = false; 105 bool bad_pmem = false;
106 phys_addr_t pmem_off = sector * 512 + pmem->data_offset; 106 phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
107 void *pmem_addr = pmem->virt_addr + pmem_off; 107 void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -111,7 +111,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
111 111
112 if (!is_write) { 112 if (!is_write) {
113 if (unlikely(bad_pmem)) 113 if (unlikely(bad_pmem))
114 rc = -EIO; 114 rc = BLK_STS_IOERR;
115 else { 115 else {
116 rc = read_pmem(page, off, pmem_addr, len); 116 rc = read_pmem(page, off, pmem_addr, len);
117 flush_dcache_page(page); 117 flush_dcache_page(page);
@@ -149,7 +149,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
149 149
150static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) 150static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
151{ 151{
152 int rc = 0; 152 blk_status_t rc = 0;
153 bool do_acct; 153 bool do_acct;
154 unsigned long start; 154 unsigned long start;
155 struct bio_vec bvec; 155 struct bio_vec bvec;
@@ -166,7 +166,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
166 bvec.bv_offset, op_is_write(bio_op(bio)), 166 bvec.bv_offset, op_is_write(bio_op(bio)),
167 iter.bi_sector); 167 iter.bi_sector);
168 if (rc) { 168 if (rc) {
169 bio->bi_error = rc; 169 bio->bi_status = rc;
170 break; 170 break;
171 } 171 }
172 } 172 }
@@ -184,7 +184,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
184 struct page *page, bool is_write) 184 struct page *page, bool is_write)
185{ 185{
186 struct pmem_device *pmem = bdev->bd_queue->queuedata; 186 struct pmem_device *pmem = bdev->bd_queue->queuedata;
187 int rc; 187 blk_status_t rc;
188 188
189 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); 189 rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
190 190
@@ -197,7 +197,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
197 if (rc == 0) 197 if (rc == 0)
198 page_endio(page, is_write, 0); 198 page_endio(page, is_write, 0);
199 199
200 return rc; 200 return blk_status_to_errno(rc);
201} 201}
202 202
203/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ 203/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
@@ -343,7 +343,6 @@ static int pmem_attach_disk(struct device *dev,
343 blk_queue_make_request(q, pmem_make_request); 343 blk_queue_make_request(q, pmem_make_request);
344 blk_queue_physical_block_size(q, PAGE_SIZE); 344 blk_queue_physical_block_size(q, PAGE_SIZE);
345 blk_queue_max_hw_sectors(q, UINT_MAX); 345 blk_queue_max_hw_sectors(q, UINT_MAX);
346 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
347 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 346 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
348 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); 347 queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
349 q->queuedata = pmem; 348 q->queuedata = pmem;
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 90745a616df7..46d6cb1e03bd 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -13,18 +13,6 @@ config BLK_DEV_NVME
13 To compile this driver as a module, choose M here: the 13 To compile this driver as a module, choose M here: the
14 module will be called nvme. 14 module will be called nvme.
15 15
16config BLK_DEV_NVME_SCSI
17 bool "SCSI emulation for NVMe device nodes"
18 depends on NVME_CORE
19 ---help---
20 This adds support for the SG_IO ioctl on the NVMe character
21 and block devices nodes, as well as a translation for a small
22 number of selected SCSI commands to NVMe commands to the NVMe
23 driver. If you don't know what this means you probably want
24 to say N here, unless you run a distro that abuses the SCSI
25 emulation to provide stable device names for mount by id, like
26 some OpenSuSE and SLES versions.
27
28config NVME_FABRICS 16config NVME_FABRICS
29 tristate 17 tristate
30 18
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index f1a7d945fbb6..cc0aacb4c8b4 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -5,7 +5,6 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
5obj-$(CONFIG_NVME_FC) += nvme-fc.o 5obj-$(CONFIG_NVME_FC) += nvme-fc.o
6 6
7nvme-core-y := core.o 7nvme-core-y := core.o
8nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
9nvme-core-$(CONFIG_NVM) += lightnvm.o 8nvme-core-$(CONFIG_NVM) += lightnvm.o
10 9
11nvme-y += pci.o 10nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 903d5813023a..d70df1d0072d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -27,7 +27,6 @@
27#include <linux/nvme_ioctl.h> 27#include <linux/nvme_ioctl.h>
28#include <linux/t10-pi.h> 28#include <linux/t10-pi.h>
29#include <linux/pm_qos.h> 29#include <linux/pm_qos.h>
30#include <scsi/sg.h>
31#include <asm/unaligned.h> 30#include <asm/unaligned.h>
32 31
33#include "nvme.h" 32#include "nvme.h"
@@ -45,7 +44,7 @@ module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
45MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); 44MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
46EXPORT_SYMBOL_GPL(nvme_io_timeout); 45EXPORT_SYMBOL_GPL(nvme_io_timeout);
47 46
48unsigned char shutdown_timeout = 5; 47static unsigned char shutdown_timeout = 5;
49module_param(shutdown_timeout, byte, 0644); 48module_param(shutdown_timeout, byte, 0644);
50MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); 49MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
51 50
@@ -65,34 +64,53 @@ static bool force_apst;
65module_param(force_apst, bool, 0644); 64module_param(force_apst, bool, 0644);
66MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); 65MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
67 66
67static bool streams;
68module_param(streams, bool, 0644);
69MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
70
71struct workqueue_struct *nvme_wq;
72EXPORT_SYMBOL_GPL(nvme_wq);
73
68static LIST_HEAD(nvme_ctrl_list); 74static LIST_HEAD(nvme_ctrl_list);
69static DEFINE_SPINLOCK(dev_list_lock); 75static DEFINE_SPINLOCK(dev_list_lock);
70 76
71static struct class *nvme_class; 77static struct class *nvme_class;
72 78
73static int nvme_error_status(struct request *req) 79int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
80{
81 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
82 return -EBUSY;
83 if (!queue_work(nvme_wq, &ctrl->reset_work))
84 return -EBUSY;
85 return 0;
86}
87EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
88
89static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
90{
91 int ret;
92
93 ret = nvme_reset_ctrl(ctrl);
94 if (!ret)
95 flush_work(&ctrl->reset_work);
96 return ret;
97}
98
99static blk_status_t nvme_error_status(struct request *req)
74{ 100{
75 switch (nvme_req(req)->status & 0x7ff) { 101 switch (nvme_req(req)->status & 0x7ff) {
76 case NVME_SC_SUCCESS: 102 case NVME_SC_SUCCESS:
77 return 0; 103 return BLK_STS_OK;
78 case NVME_SC_CAP_EXCEEDED: 104 case NVME_SC_CAP_EXCEEDED:
79 return -ENOSPC; 105 return BLK_STS_NOSPC;
80 default:
81 return -EIO;
82
83 /*
84 * XXX: these errors are a nasty side-band protocol to
85 * drivers/md/dm-mpath.c:noretry_error() that aren't documented
86 * anywhere..
87 */
88 case NVME_SC_CMD_SEQ_ERROR:
89 return -EILSEQ;
90 case NVME_SC_ONCS_NOT_SUPPORTED: 106 case NVME_SC_ONCS_NOT_SUPPORTED:
91 return -EOPNOTSUPP; 107 return BLK_STS_NOTSUPP;
92 case NVME_SC_WRITE_FAULT: 108 case NVME_SC_WRITE_FAULT:
93 case NVME_SC_READ_ERROR: 109 case NVME_SC_READ_ERROR:
94 case NVME_SC_UNWRITTEN_BLOCK: 110 case NVME_SC_UNWRITTEN_BLOCK:
95 return -ENODATA; 111 return BLK_STS_MEDIUM;
112 default:
113 return BLK_STS_IOERR;
96 } 114 }
97} 115}
98 116
@@ -165,7 +183,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
165 switch (old_state) { 183 switch (old_state) {
166 case NVME_CTRL_NEW: 184 case NVME_CTRL_NEW:
167 case NVME_CTRL_LIVE: 185 case NVME_CTRL_LIVE:
168 case NVME_CTRL_RECONNECTING:
169 changed = true; 186 changed = true;
170 /* FALLTHRU */ 187 /* FALLTHRU */
171 default: 188 default:
@@ -283,6 +300,105 @@ struct request *nvme_alloc_request(struct request_queue *q,
283} 300}
284EXPORT_SYMBOL_GPL(nvme_alloc_request); 301EXPORT_SYMBOL_GPL(nvme_alloc_request);
285 302
303static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
304{
305 struct nvme_command c;
306
307 memset(&c, 0, sizeof(c));
308
309 c.directive.opcode = nvme_admin_directive_send;
310 c.directive.nsid = cpu_to_le32(0xffffffff);
311 c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE;
312 c.directive.dtype = NVME_DIR_IDENTIFY;
313 c.directive.tdtype = NVME_DIR_STREAMS;
314 c.directive.endir = enable ? NVME_DIR_ENDIR : 0;
315
316 return nvme_submit_sync_cmd(ctrl->admin_q, &c, NULL, 0);
317}
318
319static int nvme_disable_streams(struct nvme_ctrl *ctrl)
320{
321 return nvme_toggle_streams(ctrl, false);
322}
323
324static int nvme_enable_streams(struct nvme_ctrl *ctrl)
325{
326 return nvme_toggle_streams(ctrl, true);
327}
328
329static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
330 struct streams_directive_params *s, u32 nsid)
331{
332 struct nvme_command c;
333
334 memset(&c, 0, sizeof(c));
335 memset(s, 0, sizeof(*s));
336
337 c.directive.opcode = nvme_admin_directive_recv;
338 c.directive.nsid = cpu_to_le32(nsid);
339 c.directive.numd = sizeof(*s);
340 c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
341 c.directive.dtype = NVME_DIR_STREAMS;
342
343 return nvme_submit_sync_cmd(ctrl->admin_q, &c, s, sizeof(*s));
344}
345
346static int nvme_configure_directives(struct nvme_ctrl *ctrl)
347{
348 struct streams_directive_params s;
349 int ret;
350
351 if (!(ctrl->oacs & NVME_CTRL_OACS_DIRECTIVES))
352 return 0;
353 if (!streams)
354 return 0;
355
356 ret = nvme_enable_streams(ctrl);
357 if (ret)
358 return ret;
359
360 ret = nvme_get_stream_params(ctrl, &s, 0xffffffff);
361 if (ret)
362 return ret;
363
364 ctrl->nssa = le16_to_cpu(s.nssa);
365 if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
366 dev_info(ctrl->device, "too few streams (%u) available\n",
367 ctrl->nssa);
368 nvme_disable_streams(ctrl);
369 return 0;
370 }
371
372 ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
373 dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
374 return 0;
375}
376
377/*
378 * Check if 'req' has a write hint associated with it. If it does, assign
379 * a valid namespace stream to the write.
380 */
381static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
382 struct request *req, u16 *control,
383 u32 *dsmgmt)
384{
385 enum rw_hint streamid = req->write_hint;
386
387 if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
388 streamid = 0;
389 else {
390 streamid--;
391 if (WARN_ON_ONCE(streamid > ctrl->nr_streams))
392 return;
393
394 *control |= NVME_RW_DTYPE_STREAMS;
395 *dsmgmt |= streamid << 16;
396 }
397
398 if (streamid < ARRAY_SIZE(req->q->write_hints))
399 req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
400}
401
286static inline void nvme_setup_flush(struct nvme_ns *ns, 402static inline void nvme_setup_flush(struct nvme_ns *ns,
287 struct nvme_command *cmnd) 403 struct nvme_command *cmnd)
288{ 404{
@@ -291,7 +407,7 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
291 cmnd->common.nsid = cpu_to_le32(ns->ns_id); 407 cmnd->common.nsid = cpu_to_le32(ns->ns_id);
292} 408}
293 409
294static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, 410static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
295 struct nvme_command *cmnd) 411 struct nvme_command *cmnd)
296{ 412{
297 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0; 413 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
@@ -300,7 +416,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
300 416
301 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 417 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
302 if (!range) 418 if (!range)
303 return BLK_MQ_RQ_QUEUE_BUSY; 419 return BLK_STS_RESOURCE;
304 420
305 __rq_for_each_bio(bio, req) { 421 __rq_for_each_bio(bio, req) {
306 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); 422 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
@@ -314,7 +430,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
314 430
315 if (WARN_ON_ONCE(n != segments)) { 431 if (WARN_ON_ONCE(n != segments)) {
316 kfree(range); 432 kfree(range);
317 return BLK_MQ_RQ_QUEUE_ERROR; 433 return BLK_STS_IOERR;
318 } 434 }
319 435
320 memset(cmnd, 0, sizeof(*cmnd)); 436 memset(cmnd, 0, sizeof(*cmnd));
@@ -328,15 +444,26 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
328 req->special_vec.bv_len = sizeof(*range) * segments; 444 req->special_vec.bv_len = sizeof(*range) * segments;
329 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 445 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
330 446
331 return BLK_MQ_RQ_QUEUE_OK; 447 return BLK_STS_OK;
332} 448}
333 449
334static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, 450static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
335 struct nvme_command *cmnd) 451 struct request *req, struct nvme_command *cmnd)
336{ 452{
453 struct nvme_ctrl *ctrl = ns->ctrl;
337 u16 control = 0; 454 u16 control = 0;
338 u32 dsmgmt = 0; 455 u32 dsmgmt = 0;
339 456
457 /*
458 * If formated with metadata, require the block layer provide a buffer
459 * unless this namespace is formated such that the metadata can be
460 * stripped/generated by the controller with PRACT=1.
461 */
462 if (ns && ns->ms &&
463 (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
464 !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
465 return BLK_STS_NOTSUPP;
466
340 if (req->cmd_flags & REQ_FUA) 467 if (req->cmd_flags & REQ_FUA)
341 control |= NVME_RW_FUA; 468 control |= NVME_RW_FUA;
342 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) 469 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -351,6 +478,9 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
351 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); 478 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
352 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); 479 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
353 480
481 if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
482 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
483
354 if (ns->ms) { 484 if (ns->ms) {
355 switch (ns->pi_type) { 485 switch (ns->pi_type) {
356 case NVME_NS_DPS_PI_TYPE3: 486 case NVME_NS_DPS_PI_TYPE3:
@@ -370,12 +500,13 @@ static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
370 500
371 cmnd->rw.control = cpu_to_le16(control); 501 cmnd->rw.control = cpu_to_le16(control);
372 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 502 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
503 return 0;
373} 504}
374 505
375int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, 506blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
376 struct nvme_command *cmd) 507 struct nvme_command *cmd)
377{ 508{
378 int ret = BLK_MQ_RQ_QUEUE_OK; 509 blk_status_t ret = BLK_STS_OK;
379 510
380 if (!(req->rq_flags & RQF_DONTPREP)) { 511 if (!(req->rq_flags & RQF_DONTPREP)) {
381 nvme_req(req)->retries = 0; 512 nvme_req(req)->retries = 0;
@@ -398,11 +529,11 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
398 break; 529 break;
399 case REQ_OP_READ: 530 case REQ_OP_READ:
400 case REQ_OP_WRITE: 531 case REQ_OP_WRITE:
401 nvme_setup_rw(ns, req, cmd); 532 ret = nvme_setup_rw(ns, req, cmd);
402 break; 533 break;
403 default: 534 default:
404 WARN_ON_ONCE(1); 535 WARN_ON_ONCE(1);
405 return BLK_MQ_RQ_QUEUE_ERROR; 536 return BLK_STS_IOERR;
406 } 537 }
407 538
408 cmd->common.command_id = req->tag; 539 cmd->common.command_id = req->tag;
@@ -555,15 +686,16 @@ int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
555 result, timeout); 686 result, timeout);
556} 687}
557 688
558static void nvme_keep_alive_end_io(struct request *rq, int error) 689static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
559{ 690{
560 struct nvme_ctrl *ctrl = rq->end_io_data; 691 struct nvme_ctrl *ctrl = rq->end_io_data;
561 692
562 blk_mq_free_request(rq); 693 blk_mq_free_request(rq);
563 694
564 if (error) { 695 if (status) {
565 dev_err(ctrl->device, 696 dev_err(ctrl->device,
566 "failed nvme_keep_alive_end_io error=%d\n", error); 697 "failed nvme_keep_alive_end_io error=%d\n",
698 status);
567 return; 699 return;
568 } 700 }
569 701
@@ -599,7 +731,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
599 if (nvme_keep_alive(ctrl)) { 731 if (nvme_keep_alive(ctrl)) {
600 /* allocation failure, reset the controller */ 732 /* allocation failure, reset the controller */
601 dev_err(ctrl->device, "keep-alive failed\n"); 733 dev_err(ctrl->device, "keep-alive failed\n");
602 ctrl->ops->reset_ctrl(ctrl); 734 nvme_reset_ctrl(ctrl);
603 return; 735 return;
604 } 736 }
605} 737}
@@ -623,7 +755,7 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
623} 755}
624EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); 756EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
625 757
626int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) 758static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
627{ 759{
628 struct nvme_command c = { }; 760 struct nvme_command c = { };
629 int error; 761 int error;
@@ -643,6 +775,77 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
643 return error; 775 return error;
644} 776}
645 777
778static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid)
779{
780 struct nvme_command c = { };
781 int status;
782 void *data;
783 int pos;
784 int len;
785
786 c.identify.opcode = nvme_admin_identify;
787 c.identify.nsid = cpu_to_le32(nsid);
788 c.identify.cns = NVME_ID_CNS_NS_DESC_LIST;
789
790 data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
791 if (!data)
792 return -ENOMEM;
793
794 status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data,
795 NVME_IDENTIFY_DATA_SIZE);
796 if (status)
797 goto free_data;
798
799 for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
800 struct nvme_ns_id_desc *cur = data + pos;
801
802 if (cur->nidl == 0)
803 break;
804
805 switch (cur->nidt) {
806 case NVME_NIDT_EUI64:
807 if (cur->nidl != NVME_NIDT_EUI64_LEN) {
808 dev_warn(ns->ctrl->device,
809 "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n",
810 cur->nidl);
811 goto free_data;
812 }
813 len = NVME_NIDT_EUI64_LEN;
814 memcpy(ns->eui, data + pos + sizeof(*cur), len);
815 break;
816 case NVME_NIDT_NGUID:
817 if (cur->nidl != NVME_NIDT_NGUID_LEN) {
818 dev_warn(ns->ctrl->device,
819 "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n",
820 cur->nidl);
821 goto free_data;
822 }
823 len = NVME_NIDT_NGUID_LEN;
824 memcpy(ns->nguid, data + pos + sizeof(*cur), len);
825 break;
826 case NVME_NIDT_UUID:
827 if (cur->nidl != NVME_NIDT_UUID_LEN) {
828 dev_warn(ns->ctrl->device,
829 "ctrl returned bogus length: %d for NVME_NIDT_UUID\n",
830 cur->nidl);
831 goto free_data;
832 }
833 len = NVME_NIDT_UUID_LEN;
834 uuid_copy(&ns->uuid, data + pos + sizeof(*cur));
835 break;
836 default:
837 /* Skip unnkown types */
838 len = cur->nidl;
839 break;
840 }
841
842 len += sizeof(*cur);
843 }
844free_data:
845 kfree(data);
846 return status;
847}
848
646static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) 849static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
647{ 850{
648 struct nvme_command c = { }; 851 struct nvme_command c = { };
@@ -653,7 +856,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
653 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); 856 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
654} 857}
655 858
656int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, 859static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
657 struct nvme_id_ns **id) 860 struct nvme_id_ns **id)
658{ 861{
659 struct nvme_command c = { }; 862 struct nvme_command c = { };
@@ -675,26 +878,7 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
675 return error; 878 return error;
676} 879}
677 880
678int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, 881static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
679 void *buffer, size_t buflen, u32 *result)
680{
681 struct nvme_command c;
682 union nvme_result res;
683 int ret;
684
685 memset(&c, 0, sizeof(c));
686 c.features.opcode = nvme_admin_get_features;
687 c.features.nsid = cpu_to_le32(nsid);
688 c.features.fid = cpu_to_le32(fid);
689
690 ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, buffer, buflen, 0,
691 NVME_QID_ANY, 0, 0);
692 if (ret >= 0 && result)
693 *result = le32_to_cpu(res.u32);
694 return ret;
695}
696
697int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
698 void *buffer, size_t buflen, u32 *result) 882 void *buffer, size_t buflen, u32 *result)
699{ 883{
700 struct nvme_command c; 884 struct nvme_command c;
@@ -713,28 +897,6 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
713 return ret; 897 return ret;
714} 898}
715 899
716int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
717{
718 struct nvme_command c = { };
719 int error;
720
721 c.common.opcode = nvme_admin_get_log_page,
722 c.common.nsid = cpu_to_le32(0xFFFFFFFF),
723 c.common.cdw10[0] = cpu_to_le32(
724 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
725 NVME_LOG_SMART),
726
727 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
728 if (!*log)
729 return -ENOMEM;
730
731 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
732 sizeof(struct nvme_smart_log));
733 if (error)
734 kfree(*log);
735 return error;
736}
737
738int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) 900int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
739{ 901{
740 u32 q_count = (*count - 1) | ((*count - 1) << 16); 902 u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -752,7 +914,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
752 * access to the admin queue, as that might be only way to fix them up. 914 * access to the admin queue, as that might be only way to fix them up.
753 */ 915 */
754 if (status > 0) { 916 if (status > 0) {
755 dev_err(ctrl->dev, "Could not set queue count (%d)\n", status); 917 dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
756 *count = 0; 918 *count = 0;
757 } else { 919 } else {
758 nr_io_queues = min(result & 0xffff, result >> 16) + 1; 920 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
@@ -870,12 +1032,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
870 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); 1032 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
871 case NVME_IOCTL_SUBMIT_IO: 1033 case NVME_IOCTL_SUBMIT_IO:
872 return nvme_submit_io(ns, (void __user *)arg); 1034 return nvme_submit_io(ns, (void __user *)arg);
873#ifdef CONFIG_BLK_DEV_NVME_SCSI
874 case SG_GET_VERSION_NUM:
875 return nvme_sg_get_version_num((void __user *)arg);
876 case SG_IO:
877 return nvme_sg_io(ns, (void __user *)arg);
878#endif
879 default: 1035 default:
880#ifdef CONFIG_NVM 1036#ifdef CONFIG_NVM
881 if (ns->ndev) 1037 if (ns->ndev)
@@ -892,10 +1048,6 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
892static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, 1048static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
893 unsigned int cmd, unsigned long arg) 1049 unsigned int cmd, unsigned long arg)
894{ 1050{
895 switch (cmd) {
896 case SG_IO:
897 return -ENOIOCTLCMD;
898 }
899 return nvme_ioctl(bdev, mode, cmd, arg); 1051 return nvme_ioctl(bdev, mode, cmd, arg);
900} 1052}
901#else 1053#else
@@ -983,6 +1135,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
983} 1135}
984#endif /* CONFIG_BLK_DEV_INTEGRITY */ 1136#endif /* CONFIG_BLK_DEV_INTEGRITY */
985 1137
1138static void nvme_set_chunk_size(struct nvme_ns *ns)
1139{
1140 u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
1141 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
1142}
1143
986static void nvme_config_discard(struct nvme_ns *ns) 1144static void nvme_config_discard(struct nvme_ns *ns)
987{ 1145{
988 struct nvme_ctrl *ctrl = ns->ctrl; 1146 struct nvme_ctrl *ctrl = ns->ctrl;
@@ -991,8 +1149,15 @@ static void nvme_config_discard(struct nvme_ns *ns)
991 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < 1149 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
992 NVME_DSM_MAX_RANGES); 1150 NVME_DSM_MAX_RANGES);
993 1151
994 ns->queue->limits.discard_alignment = logical_block_size; 1152 if (ctrl->nr_streams && ns->sws && ns->sgs) {
995 ns->queue->limits.discard_granularity = logical_block_size; 1153 unsigned int sz = logical_block_size * ns->sws * ns->sgs;
1154
1155 ns->queue->limits.discard_alignment = sz;
1156 ns->queue->limits.discard_granularity = sz;
1157 } else {
1158 ns->queue->limits.discard_alignment = logical_block_size;
1159 ns->queue->limits.discard_granularity = logical_block_size;
1160 }
996 blk_queue_max_discard_sectors(ns->queue, UINT_MAX); 1161 blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
997 blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES); 1162 blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
998 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 1163 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
@@ -1016,7 +1181,15 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
1016 if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) 1181 if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
1017 memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); 1182 memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
1018 if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) 1183 if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
1019 memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid)); 1184 memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid));
1185 if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) {
1186 /* Don't treat error as fatal we potentially
1187 * already have a NGUID or EUI-64
1188 */
1189 if (nvme_identify_ns_descs(ns, ns->ns_id))
1190 dev_warn(ns->ctrl->device,
1191 "%s: Identify Descriptors failed\n", __func__);
1192 }
1020 1193
1021 return 0; 1194 return 0;
1022} 1195}
@@ -1024,6 +1197,7 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
1024static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) 1197static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1025{ 1198{
1026 struct nvme_ns *ns = disk->private_data; 1199 struct nvme_ns *ns = disk->private_data;
1200 struct nvme_ctrl *ctrl = ns->ctrl;
1027 u16 bs; 1201 u16 bs;
1028 1202
1029 /* 1203 /*
@@ -1034,12 +1208,15 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1034 if (ns->lba_shift == 0) 1208 if (ns->lba_shift == 0)
1035 ns->lba_shift = 9; 1209 ns->lba_shift = 9;
1036 bs = 1 << ns->lba_shift; 1210 bs = 1 << ns->lba_shift;
1211 ns->noiob = le16_to_cpu(id->noiob);
1037 1212
1038 blk_mq_freeze_queue(disk->queue); 1213 blk_mq_freeze_queue(disk->queue);
1039 1214
1040 if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) 1215 if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
1041 nvme_prep_integrity(disk, id, bs); 1216 nvme_prep_integrity(disk, id, bs);
1042 blk_queue_logical_block_size(ns->queue, bs); 1217 blk_queue_logical_block_size(ns->queue, bs);
1218 if (ns->noiob)
1219 nvme_set_chunk_size(ns);
1043 if (ns->ms && !blk_get_integrity(disk) && !ns->ext) 1220 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
1044 nvme_init_integrity(ns); 1221 nvme_init_integrity(ns);
1045 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) 1222 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
@@ -1047,7 +1224,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1047 else 1224 else
1048 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); 1225 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
1049 1226
1050 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) 1227 if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
1051 nvme_config_discard(ns); 1228 nvme_config_discard(ns);
1052 blk_mq_unfreeze_queue(disk->queue); 1229 blk_mq_unfreeze_queue(disk->queue);
1053} 1230}
@@ -1283,7 +1460,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
1283 1460
1284int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) 1461int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
1285{ 1462{
1286 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies; 1463 unsigned long timeout = jiffies + (shutdown_timeout * HZ);
1287 u32 csts; 1464 u32 csts;
1288 int ret; 1465 int ret;
1289 1466
@@ -1372,7 +1549,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
1372 if (!table) 1549 if (!table)
1373 return; 1550 return;
1374 1551
1375 if (ctrl->ps_max_latency_us == 0) { 1552 if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) {
1376 /* Turn off APST. */ 1553 /* Turn off APST. */
1377 apste = 0; 1554 apste = 0;
1378 dev_dbg(ctrl->device, "APST disabled\n"); 1555 dev_dbg(ctrl->device, "APST disabled\n");
@@ -1528,6 +1705,31 @@ static bool quirk_matches(const struct nvme_id_ctrl *id,
1528 string_matches(id->fr, q->fr, sizeof(id->fr)); 1705 string_matches(id->fr, q->fr, sizeof(id->fr));
1529} 1706}
1530 1707
1708static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
1709{
1710 size_t nqnlen;
1711 int off;
1712
1713 nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE);
1714 if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) {
1715 strcpy(ctrl->subnqn, id->subnqn);
1716 return;
1717 }
1718
1719 if (ctrl->vs >= NVME_VS(1, 2, 1))
1720 dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n");
1721
1722 /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */
1723 off = snprintf(ctrl->subnqn, NVMF_NQN_SIZE,
1724 "nqn.2014.08.org.nvmexpress:%4x%4x",
1725 le16_to_cpu(id->vid), le16_to_cpu(id->ssvid));
1726 memcpy(ctrl->subnqn + off, id->sn, sizeof(id->sn));
1727 off += sizeof(id->sn);
1728 memcpy(ctrl->subnqn + off, id->mn, sizeof(id->mn));
1729 off += sizeof(id->mn);
1730 memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
1731}
1732
1531/* 1733/*
1532 * Initialize the cached copies of the Identify data and various controller 1734 * Initialize the cached copies of the Identify data and various controller
1533 * register in our nvme_ctrl structure. This should be called as soon as 1735 * register in our nvme_ctrl structure. This should be called as soon as
@@ -1539,7 +1741,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1539 u64 cap; 1741 u64 cap;
1540 int ret, page_shift; 1742 int ret, page_shift;
1541 u32 max_hw_sectors; 1743 u32 max_hw_sectors;
1542 u8 prev_apsta; 1744 bool prev_apst_enabled;
1543 1745
1544 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); 1746 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
1545 if (ret) { 1747 if (ret) {
@@ -1563,6 +1765,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1563 return -EIO; 1765 return -EIO;
1564 } 1766 }
1565 1767
1768 nvme_init_subnqn(ctrl, id);
1769
1566 if (!ctrl->identified) { 1770 if (!ctrl->identified) {
1567 /* 1771 /*
1568 * Check for quirks. Quirk can depend on firmware version, 1772 * Check for quirks. Quirk can depend on firmware version,
@@ -1582,7 +1786,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1582 } 1786 }
1583 1787
1584 if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { 1788 if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) {
1585 dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); 1789 dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n");
1586 ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; 1790 ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS;
1587 } 1791 }
1588 1792
@@ -1607,16 +1811,17 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1607 ctrl->kas = le16_to_cpu(id->kas); 1811 ctrl->kas = le16_to_cpu(id->kas);
1608 1812
1609 ctrl->npss = id->npss; 1813 ctrl->npss = id->npss;
1610 prev_apsta = ctrl->apsta; 1814 ctrl->apsta = id->apsta;
1815 prev_apst_enabled = ctrl->apst_enabled;
1611 if (ctrl->quirks & NVME_QUIRK_NO_APST) { 1816 if (ctrl->quirks & NVME_QUIRK_NO_APST) {
1612 if (force_apst && id->apsta) { 1817 if (force_apst && id->apsta) {
1613 dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); 1818 dev_warn(ctrl->device, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n");
1614 ctrl->apsta = 1; 1819 ctrl->apst_enabled = true;
1615 } else { 1820 } else {
1616 ctrl->apsta = 0; 1821 ctrl->apst_enabled = false;
1617 } 1822 }
1618 } else { 1823 } else {
1619 ctrl->apsta = id->apsta; 1824 ctrl->apst_enabled = id->apsta;
1620 } 1825 }
1621 memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); 1826 memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
1622 1827
@@ -1634,22 +1839,25 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1634 ret = -EINVAL; 1839 ret = -EINVAL;
1635 1840
1636 if (!ctrl->opts->discovery_nqn && !ctrl->kas) { 1841 if (!ctrl->opts->discovery_nqn && !ctrl->kas) {
1637 dev_err(ctrl->dev, 1842 dev_err(ctrl->device,
1638 "keep-alive support is mandatory for fabrics\n"); 1843 "keep-alive support is mandatory for fabrics\n");
1639 ret = -EINVAL; 1844 ret = -EINVAL;
1640 } 1845 }
1641 } else { 1846 } else {
1642 ctrl->cntlid = le16_to_cpu(id->cntlid); 1847 ctrl->cntlid = le16_to_cpu(id->cntlid);
1848 ctrl->hmpre = le32_to_cpu(id->hmpre);
1849 ctrl->hmmin = le32_to_cpu(id->hmmin);
1643 } 1850 }
1644 1851
1645 kfree(id); 1852 kfree(id);
1646 1853
1647 if (ctrl->apsta && !prev_apsta) 1854 if (ctrl->apst_enabled && !prev_apst_enabled)
1648 dev_pm_qos_expose_latency_tolerance(ctrl->device); 1855 dev_pm_qos_expose_latency_tolerance(ctrl->device);
1649 else if (!ctrl->apsta && prev_apsta) 1856 else if (!ctrl->apst_enabled && prev_apst_enabled)
1650 dev_pm_qos_hide_latency_tolerance(ctrl->device); 1857 dev_pm_qos_hide_latency_tolerance(ctrl->device);
1651 1858
1652 nvme_configure_apst(ctrl); 1859 nvme_configure_apst(ctrl);
1860 nvme_configure_directives(ctrl);
1653 1861
1654 ctrl->identified = true; 1862 ctrl->identified = true;
1655 1863
@@ -1735,7 +1943,7 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
1735 return nvme_dev_user_cmd(ctrl, argp); 1943 return nvme_dev_user_cmd(ctrl, argp);
1736 case NVME_IOCTL_RESET: 1944 case NVME_IOCTL_RESET:
1737 dev_warn(ctrl->device, "resetting controller\n"); 1945 dev_warn(ctrl->device, "resetting controller\n");
1738 return ctrl->ops->reset_ctrl(ctrl); 1946 return nvme_reset_ctrl_sync(ctrl);
1739 case NVME_IOCTL_SUBSYS_RESET: 1947 case NVME_IOCTL_SUBSYS_RESET:
1740 return nvme_reset_subsystem(ctrl); 1948 return nvme_reset_subsystem(ctrl);
1741 case NVME_IOCTL_RESCAN: 1949 case NVME_IOCTL_RESCAN:
@@ -1761,7 +1969,7 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
1761 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 1969 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1762 int ret; 1970 int ret;
1763 1971
1764 ret = ctrl->ops->reset_ctrl(ctrl); 1972 ret = nvme_reset_ctrl_sync(ctrl);
1765 if (ret < 0) 1973 if (ret < 0)
1766 return ret; 1974 return ret;
1767 return count; 1975 return count;
@@ -1787,8 +1995,8 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
1787 int serial_len = sizeof(ctrl->serial); 1995 int serial_len = sizeof(ctrl->serial);
1788 int model_len = sizeof(ctrl->model); 1996 int model_len = sizeof(ctrl->model);
1789 1997
1790 if (memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 1998 if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
1791 return sprintf(buf, "eui.%16phN\n", ns->uuid); 1999 return sprintf(buf, "eui.%16phN\n", ns->nguid);
1792 2000
1793 if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) 2001 if (memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1794 return sprintf(buf, "eui.%8phN\n", ns->eui); 2002 return sprintf(buf, "eui.%8phN\n", ns->eui);
@@ -1803,11 +2011,28 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
1803} 2011}
1804static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL); 2012static DEVICE_ATTR(wwid, S_IRUGO, wwid_show, NULL);
1805 2013
2014static ssize_t nguid_show(struct device *dev, struct device_attribute *attr,
2015 char *buf)
2016{
2017 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
2018 return sprintf(buf, "%pU\n", ns->nguid);
2019}
2020static DEVICE_ATTR(nguid, S_IRUGO, nguid_show, NULL);
2021
1806static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, 2022static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1807 char *buf) 2023 char *buf)
1808{ 2024{
1809 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); 2025 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1810 return sprintf(buf, "%pU\n", ns->uuid); 2026
2027 /* For backward compatibility expose the NGUID to userspace if
2028 * we have no UUID set
2029 */
2030 if (uuid_is_null(&ns->uuid)) {
2031 printk_ratelimited(KERN_WARNING
2032 "No UUID available providing old NGUID\n");
2033 return sprintf(buf, "%pU\n", ns->nguid);
2034 }
2035 return sprintf(buf, "%pU\n", &ns->uuid);
1811} 2036}
1812static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); 2037static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1813 2038
@@ -1830,6 +2055,7 @@ static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1830static struct attribute *nvme_ns_attrs[] = { 2055static struct attribute *nvme_ns_attrs[] = {
1831 &dev_attr_wwid.attr, 2056 &dev_attr_wwid.attr,
1832 &dev_attr_uuid.attr, 2057 &dev_attr_uuid.attr,
2058 &dev_attr_nguid.attr,
1833 &dev_attr_eui.attr, 2059 &dev_attr_eui.attr,
1834 &dev_attr_nsid.attr, 2060 &dev_attr_nsid.attr,
1835 NULL, 2061 NULL,
@@ -1842,7 +2068,12 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
1842 struct nvme_ns *ns = nvme_get_ns_from_dev(dev); 2068 struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
1843 2069
1844 if (a == &dev_attr_uuid.attr) { 2070 if (a == &dev_attr_uuid.attr) {
1845 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) 2071 if (uuid_is_null(&ns->uuid) ||
2072 !memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
2073 return 0;
2074 }
2075 if (a == &dev_attr_nguid.attr) {
2076 if (!memchr_inv(ns->nguid, 0, sizeof(ns->nguid)))
1846 return 0; 2077 return 0;
1847 } 2078 }
1848 if (a == &dev_attr_eui.attr) { 2079 if (a == &dev_attr_eui.attr) {
@@ -1931,8 +2162,7 @@ static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
1931{ 2162{
1932 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 2163 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1933 2164
1934 return snprintf(buf, PAGE_SIZE, "%s\n", 2165 return snprintf(buf, PAGE_SIZE, "%s\n", ctrl->subnqn);
1935 ctrl->ops->get_subsysnqn(ctrl));
1936} 2166}
1937static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL); 2167static DEVICE_ATTR(subsysnqn, S_IRUGO, nvme_sysfs_show_subsysnqn, NULL);
1938 2168
@@ -1961,24 +2191,16 @@ static struct attribute *nvme_dev_attrs[] = {
1961 NULL 2191 NULL
1962}; 2192};
1963 2193
1964#define CHECK_ATTR(ctrl, a, name) \
1965 if ((a) == &dev_attr_##name.attr && \
1966 !(ctrl)->ops->get_##name) \
1967 return 0
1968
1969static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, 2194static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
1970 struct attribute *a, int n) 2195 struct attribute *a, int n)
1971{ 2196{
1972 struct device *dev = container_of(kobj, struct device, kobj); 2197 struct device *dev = container_of(kobj, struct device, kobj);
1973 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); 2198 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
1974 2199
1975 if (a == &dev_attr_delete_controller.attr) { 2200 if (a == &dev_attr_delete_controller.attr && !ctrl->ops->delete_ctrl)
1976 if (!ctrl->ops->delete_ctrl) 2201 return 0;
1977 return 0; 2202 if (a == &dev_attr_address.attr && !ctrl->ops->get_address)
1978 } 2203 return 0;
1979
1980 CHECK_ATTR(ctrl, a, subsysnqn);
1981 CHECK_ATTR(ctrl, a, address);
1982 2204
1983 return a->mode; 2205 return a->mode;
1984} 2206}
@@ -2019,6 +2241,32 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2019 return ret; 2241 return ret;
2020} 2242}
2021 2243
2244static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
2245{
2246 struct streams_directive_params s;
2247 int ret;
2248
2249 if (!ctrl->nr_streams)
2250 return 0;
2251
2252 ret = nvme_get_stream_params(ctrl, &s, ns->ns_id);
2253 if (ret)
2254 return ret;
2255
2256 ns->sws = le32_to_cpu(s.sws);
2257 ns->sgs = le16_to_cpu(s.sgs);
2258
2259 if (ns->sws) {
2260 unsigned int bs = 1 << ns->lba_shift;
2261
2262 blk_queue_io_min(ns->queue, bs * ns->sws);
2263 if (ns->sgs)
2264 blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
2265 }
2266
2267 return 0;
2268}
2269
2022static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) 2270static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2023{ 2271{
2024 struct nvme_ns *ns; 2272 struct nvme_ns *ns;
@@ -2048,6 +2296,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2048 2296
2049 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2297 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
2050 nvme_set_queue_limits(ctrl, ns->queue); 2298 nvme_set_queue_limits(ctrl, ns->queue);
2299 nvme_setup_streams_ns(ctrl, ns);
2051 2300
2052 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); 2301 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
2053 2302
@@ -2056,7 +2305,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2056 2305
2057 if (nvme_nvm_ns_supported(ns, id) && 2306 if (nvme_nvm_ns_supported(ns, id) &&
2058 nvme_nvm_register(ns, disk_name, node)) { 2307 nvme_nvm_register(ns, disk_name, node)) {
2059 dev_warn(ctrl->dev, "%s: LightNVM init failure\n", __func__); 2308 dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
2060 goto out_free_id; 2309 goto out_free_id;
2061 } 2310 }
2062 2311
@@ -2231,7 +2480,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
2231 * removal. 2480 * removal.
2232 */ 2481 */
2233 if (ctrl->state == NVME_CTRL_LIVE) 2482 if (ctrl->state == NVME_CTRL_LIVE)
2234 schedule_work(&ctrl->scan_work); 2483 queue_work(nvme_wq, &ctrl->scan_work);
2235} 2484}
2236EXPORT_SYMBOL_GPL(nvme_queue_scan); 2485EXPORT_SYMBOL_GPL(nvme_queue_scan);
2237 2486
@@ -2286,7 +2535,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
2286 /*FALLTHRU*/ 2535 /*FALLTHRU*/
2287 case NVME_SC_ABORT_REQ: 2536 case NVME_SC_ABORT_REQ:
2288 ++ctrl->event_limit; 2537 ++ctrl->event_limit;
2289 schedule_work(&ctrl->async_event_work); 2538 queue_work(nvme_wq, &ctrl->async_event_work);
2290 break; 2539 break;
2291 default: 2540 default:
2292 break; 2541 break;
@@ -2309,7 +2558,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
2309void nvme_queue_async_events(struct nvme_ctrl *ctrl) 2558void nvme_queue_async_events(struct nvme_ctrl *ctrl)
2310{ 2559{
2311 ctrl->event_limit = NVME_NR_AERS; 2560 ctrl->event_limit = NVME_NR_AERS;
2312 schedule_work(&ctrl->async_event_work); 2561 queue_work(nvme_wq, &ctrl->async_event_work);
2313} 2562}
2314EXPORT_SYMBOL_GPL(nvme_queue_async_events); 2563EXPORT_SYMBOL_GPL(nvme_queue_async_events);
2315 2564
@@ -2442,6 +2691,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
2442 2691
2443 mutex_lock(&ctrl->namespaces_mutex); 2692 mutex_lock(&ctrl->namespaces_mutex);
2444 2693
2694 /* Forcibly unquiesce queues to avoid blocking dispatch */
2695 blk_mq_unquiesce_queue(ctrl->admin_q);
2696
2445 /* Forcibly start all queues to avoid having stuck requests */ 2697 /* Forcibly start all queues to avoid having stuck requests */
2446 blk_mq_start_hw_queues(ctrl->admin_q); 2698 blk_mq_start_hw_queues(ctrl->admin_q);
2447 2699
@@ -2455,6 +2707,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
2455 revalidate_disk(ns->disk); 2707 revalidate_disk(ns->disk);
2456 blk_set_queue_dying(ns->queue); 2708 blk_set_queue_dying(ns->queue);
2457 2709
2710 /* Forcibly unquiesce queues to avoid blocking dispatch */
2711 blk_mq_unquiesce_queue(ns->queue);
2712
2458 /* 2713 /*
2459 * Forcibly start all queues to avoid having stuck requests. 2714 * Forcibly start all queues to avoid having stuck requests.
2460 * Note that we must ensure the queues are not stopped 2715 * Note that we must ensure the queues are not stopped
@@ -2533,7 +2788,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
2533 2788
2534 mutex_lock(&ctrl->namespaces_mutex); 2789 mutex_lock(&ctrl->namespaces_mutex);
2535 list_for_each_entry(ns, &ctrl->namespaces, list) { 2790 list_for_each_entry(ns, &ctrl->namespaces, list) {
2536 blk_mq_start_stopped_hw_queues(ns->queue, true); 2791 blk_mq_unquiesce_queue(ns->queue);
2537 blk_mq_kick_requeue_list(ns->queue); 2792 blk_mq_kick_requeue_list(ns->queue);
2538 } 2793 }
2539 mutex_unlock(&ctrl->namespaces_mutex); 2794 mutex_unlock(&ctrl->namespaces_mutex);
@@ -2544,10 +2799,15 @@ int __init nvme_core_init(void)
2544{ 2799{
2545 int result; 2800 int result;
2546 2801
2802 nvme_wq = alloc_workqueue("nvme-wq",
2803 WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
2804 if (!nvme_wq)
2805 return -ENOMEM;
2806
2547 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", 2807 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
2548 &nvme_dev_fops); 2808 &nvme_dev_fops);
2549 if (result < 0) 2809 if (result < 0)
2550 return result; 2810 goto destroy_wq;
2551 else if (result > 0) 2811 else if (result > 0)
2552 nvme_char_major = result; 2812 nvme_char_major = result;
2553 2813
@@ -2559,8 +2819,10 @@ int __init nvme_core_init(void)
2559 2819
2560 return 0; 2820 return 0;
2561 2821
2562 unregister_chrdev: 2822unregister_chrdev:
2563 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 2823 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
2824destroy_wq:
2825 destroy_workqueue(nvme_wq);
2564 return result; 2826 return result;
2565} 2827}
2566 2828
@@ -2568,6 +2830,7 @@ void nvme_core_exit(void)
2568{ 2830{
2569 class_destroy(nvme_class); 2831 class_destroy(nvme_class);
2570 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 2832 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
2833 destroy_workqueue(nvme_wq);
2571} 2834}
2572 2835
2573MODULE_LICENSE("GPL"); 2836MODULE_LICENSE("GPL");
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c190d7e36900..2e582a240943 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -58,7 +58,6 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn)
58 58
59 kref_init(&host->ref); 59 kref_init(&host->ref);
60 memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); 60 memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
61 uuid_gen(&host->id);
62 61
63 list_add_tail(&host->list, &nvmf_hosts); 62 list_add_tail(&host->list, &nvmf_hosts);
64out_unlock: 63out_unlock:
@@ -75,7 +74,6 @@ static struct nvmf_host *nvmf_host_default(void)
75 return NULL; 74 return NULL;
76 75
77 kref_init(&host->ref); 76 kref_init(&host->ref);
78 uuid_gen(&host->id);
79 snprintf(host->nqn, NVMF_NQN_SIZE, 77 snprintf(host->nqn, NVMF_NQN_SIZE,
80 "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); 78 "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
81 79
@@ -128,16 +126,6 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
128EXPORT_SYMBOL_GPL(nvmf_get_address); 126EXPORT_SYMBOL_GPL(nvmf_get_address);
129 127
130/** 128/**
131 * nvmf_get_subsysnqn() - Get subsystem NQN
132 * @ctrl: Host NVMe controller instance which we got the NQN
133 */
134const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl)
135{
136 return ctrl->opts->subsysnqn;
137}
138EXPORT_SYMBOL_GPL(nvmf_get_subsysnqn);
139
140/**
141 * nvmf_reg_read32() - NVMe Fabrics "Property Get" API function. 129 * nvmf_reg_read32() - NVMe Fabrics "Property Get" API function.
142 * @ctrl: Host NVMe controller instance maintaining the admin 130 * @ctrl: Host NVMe controller instance maintaining the admin
143 * queue used to submit the property read command to 131 * queue used to submit the property read command to
@@ -337,6 +325,24 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
337 } 325 }
338 } 326 }
339 break; 327 break;
328
329 case NVME_SC_CONNECT_INVALID_HOST:
330 dev_err(ctrl->device,
331 "Connect for subsystem %s is not allowed, hostnqn: %s\n",
332 data->subsysnqn, data->hostnqn);
333 break;
334
335 case NVME_SC_CONNECT_CTRL_BUSY:
336 dev_err(ctrl->device,
337 "Connect command failed: controller is busy or not available\n");
338 break;
339
340 case NVME_SC_CONNECT_FORMAT:
341 dev_err(ctrl->device,
342 "Connect incompatible format: %d",
343 cmd->connect.recfmt);
344 break;
345
340 default: 346 default:
341 dev_err(ctrl->device, 347 dev_err(ctrl->device,
342 "Connect command failed, error wo/DNR bit: %d\n", 348 "Connect command failed, error wo/DNR bit: %d\n",
@@ -376,13 +382,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
376 cmd.connect.opcode = nvme_fabrics_command; 382 cmd.connect.opcode = nvme_fabrics_command;
377 cmd.connect.fctype = nvme_fabrics_type_connect; 383 cmd.connect.fctype = nvme_fabrics_type_connect;
378 cmd.connect.qid = 0; 384 cmd.connect.qid = 0;
379 385 cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
380 /*
381 * fabrics spec sets a minimum of depth 32 for admin queue,
382 * so set the queue with this depth always until
383 * justification otherwise.
384 */
385 cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
386 386
387 /* 387 /*
388 * Set keep-alive timeout in seconds granularity (ms * 1000) 388 * Set keep-alive timeout in seconds granularity (ms * 1000)
@@ -474,7 +474,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
474bool nvmf_should_reconnect(struct nvme_ctrl *ctrl) 474bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
475{ 475{
476 if (ctrl->opts->max_reconnects != -1 && 476 if (ctrl->opts->max_reconnects != -1 &&
477 ctrl->opts->nr_reconnects < ctrl->opts->max_reconnects) 477 ctrl->nr_reconnects < ctrl->opts->max_reconnects)
478 return true; 478 return true;
479 479
480 return false; 480 return false;
@@ -547,6 +547,7 @@ static const match_table_t opt_tokens = {
547 { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, 547 { NVMF_OPT_KATO, "keep_alive_tmo=%d" },
548 { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, 548 { NVMF_OPT_HOSTNQN, "hostnqn=%s" },
549 { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, 549 { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
550 { NVMF_OPT_HOST_ID, "hostid=%s" },
550 { NVMF_OPT_ERR, NULL } 551 { NVMF_OPT_ERR, NULL }
551}; 552};
552 553
@@ -558,6 +559,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
558 int token, ret = 0; 559 int token, ret = 0;
559 size_t nqnlen = 0; 560 size_t nqnlen = 0;
560 int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO; 561 int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
562 uuid_t hostid;
561 563
562 /* Set defaults */ 564 /* Set defaults */
563 opts->queue_size = NVMF_DEF_QUEUE_SIZE; 565 opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -568,6 +570,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
568 if (!options) 570 if (!options)
569 return -ENOMEM; 571 return -ENOMEM;
570 572
573 uuid_gen(&hostid);
574
571 while ((p = strsep(&o, ",\n")) != NULL) { 575 while ((p = strsep(&o, ",\n")) != NULL) {
572 if (!*p) 576 if (!*p)
573 continue; 577 continue;
@@ -724,6 +728,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
724 } 728 }
725 opts->host_traddr = p; 729 opts->host_traddr = p;
726 break; 730 break;
731 case NVMF_OPT_HOST_ID:
732 p = match_strdup(args);
733 if (!p) {
734 ret = -ENOMEM;
735 goto out;
736 }
737 if (uuid_parse(p, &hostid)) {
738 ret = -EINVAL;
739 goto out;
740 }
741 break;
727 default: 742 default:
728 pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", 743 pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
729 p); 744 p);
@@ -743,6 +758,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
743 opts->host = nvmf_default_host; 758 opts->host = nvmf_default_host;
744 } 759 }
745 760
761 uuid_copy(&opts->host->id, &hostid);
762
746out: 763out:
747 if (!opts->discovery_nqn && !opts->kato) 764 if (!opts->discovery_nqn && !opts->kato)
748 opts->kato = NVME_DEFAULT_KATO; 765 opts->kato = NVME_DEFAULT_KATO;
@@ -803,7 +820,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
803 820
804#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) 821#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
805#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ 822#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
806 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN) 823 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
824 NVMF_OPT_HOST_ID)
807 825
808static struct nvme_ctrl * 826static struct nvme_ctrl *
809nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) 827nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -854,6 +872,15 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
854 goto out_unlock; 872 goto out_unlock;
855 } 873 }
856 874
875 if (strcmp(ctrl->subnqn, opts->subsysnqn)) {
876 dev_warn(ctrl->device,
877 "controller returned incorrect NQN: \"%s\".\n",
878 ctrl->subnqn);
879 mutex_unlock(&nvmf_transports_mutex);
880 ctrl->ops->delete_ctrl(ctrl);
881 return ERR_PTR(-EINVAL);
882 }
883
857 mutex_unlock(&nvmf_transports_mutex); 884 mutex_unlock(&nvmf_transports_mutex);
858 return ctrl; 885 return ctrl;
859 886
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 29be7600689d..bf33663218cd 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -56,6 +56,7 @@ enum {
56 NVMF_OPT_RECONNECT_DELAY = 1 << 9, 56 NVMF_OPT_RECONNECT_DELAY = 1 << 9,
57 NVMF_OPT_HOST_TRADDR = 1 << 10, 57 NVMF_OPT_HOST_TRADDR = 1 << 10,
58 NVMF_OPT_CTRL_LOSS_TMO = 1 << 11, 58 NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
59 NVMF_OPT_HOST_ID = 1 << 12,
59}; 60};
60 61
61/** 62/**
@@ -80,7 +81,6 @@ enum {
80 * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN. 81 * @discovery_nqn: indicates if the subsysnqn is the well-known discovery NQN.
81 * @kato: Keep-alive timeout. 82 * @kato: Keep-alive timeout.
82 * @host: Virtual NVMe host, contains the NQN and Host ID. 83 * @host: Virtual NVMe host, contains the NQN and Host ID.
83 * @nr_reconnects: number of reconnect attempted since the last ctrl failure
84 * @max_reconnects: maximum number of allowed reconnect attempts before removing 84 * @max_reconnects: maximum number of allowed reconnect attempts before removing
85 * the controller, (-1) means reconnect forever, zero means remove 85 * the controller, (-1) means reconnect forever, zero means remove
86 * immediately; 86 * immediately;
@@ -98,7 +98,6 @@ struct nvmf_ctrl_options {
98 bool discovery_nqn; 98 bool discovery_nqn;
99 unsigned int kato; 99 unsigned int kato;
100 struct nvmf_host *host; 100 struct nvmf_host *host;
101 int nr_reconnects;
102 int max_reconnects; 101 int max_reconnects;
103}; 102};
104 103
@@ -140,7 +139,6 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
140int nvmf_register_transport(struct nvmf_transport_ops *ops); 139int nvmf_register_transport(struct nvmf_transport_ops *ops);
141void nvmf_unregister_transport(struct nvmf_transport_ops *ops); 140void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
142void nvmf_free_options(struct nvmf_ctrl_options *opts); 141void nvmf_free_options(struct nvmf_ctrl_options *opts);
143const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
144int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 142int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
145bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 143bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
146 144
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5ee4c71d168d..ed87214fdc0e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -36,7 +36,7 @@
36 */ 36 */
37#define NVME_FC_NR_AEN_COMMANDS 1 37#define NVME_FC_NR_AEN_COMMANDS 1
38#define NVME_FC_AQ_BLKMQ_DEPTH \ 38#define NVME_FC_AQ_BLKMQ_DEPTH \
39 (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) 39 (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
40#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) 40#define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
41 41
42enum nvme_fc_queue_flags { 42enum nvme_fc_queue_flags {
@@ -161,12 +161,12 @@ struct nvme_fc_ctrl {
161 struct blk_mq_tag_set tag_set; 161 struct blk_mq_tag_set tag_set;
162 162
163 struct work_struct delete_work; 163 struct work_struct delete_work;
164 struct work_struct reset_work;
165 struct delayed_work connect_work; 164 struct delayed_work connect_work;
166 165
167 struct kref ref; 166 struct kref ref;
168 u32 flags; 167 u32 flags;
169 u32 iocnt; 168 u32 iocnt;
169 wait_queue_head_t ioabort_wait;
170 170
171 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; 171 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS];
172 172
@@ -214,7 +214,6 @@ static LIST_HEAD(nvme_fc_lport_list);
214static DEFINE_IDA(nvme_fc_local_port_cnt); 214static DEFINE_IDA(nvme_fc_local_port_cnt);
215static DEFINE_IDA(nvme_fc_ctrl_cnt); 215static DEFINE_IDA(nvme_fc_ctrl_cnt);
216 216
217static struct workqueue_struct *nvme_fc_wq;
218 217
219 218
220 219
@@ -1241,8 +1240,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
1241 1240
1242 spin_lock_irqsave(&ctrl->lock, flags); 1241 spin_lock_irqsave(&ctrl->lock, flags);
1243 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1242 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
1244 if (ctrl->flags & FCCTRL_TERMIO) 1243 if (ctrl->flags & FCCTRL_TERMIO) {
1245 ctrl->iocnt--; 1244 if (!--ctrl->iocnt)
1245 wake_up(&ctrl->ioabort_wait);
1246 }
1246 } 1247 }
1247 if (op->flags & FCOP_FLAGS_RELEASED) 1248 if (op->flags & FCOP_FLAGS_RELEASED)
1248 complete_rq = true; 1249 complete_rq = true;
@@ -1449,18 +1450,8 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
1449{ 1450{
1450 struct nvme_fc_ctrl *ctrl = set->driver_data; 1451 struct nvme_fc_ctrl *ctrl = set->driver_data;
1451 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1452 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1452 struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; 1453 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
1453 1454 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
1454 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
1455}
1456
1457static int
1458nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq,
1459 unsigned int hctx_idx, unsigned int numa_node)
1460{
1461 struct nvme_fc_ctrl *ctrl = set->driver_data;
1462 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1463 struct nvme_fc_queue *queue = &ctrl->queues[0];
1464 1455
1465 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1456 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++);
1466} 1457}
@@ -1758,16 +1749,16 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
1758static void 1749static void
1759nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 1750nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
1760{ 1751{
1752 /* only proceed if in LIVE state - e.g. on first error */
1753 if (ctrl->ctrl.state != NVME_CTRL_LIVE)
1754 return;
1755
1761 dev_warn(ctrl->ctrl.device, 1756 dev_warn(ctrl->ctrl.device,
1762 "NVME-FC{%d}: transport association error detected: %s\n", 1757 "NVME-FC{%d}: transport association error detected: %s\n",
1763 ctrl->cnum, errmsg); 1758 ctrl->cnum, errmsg);
1764 dev_warn(ctrl->ctrl.device, 1759 dev_warn(ctrl->ctrl.device,
1765 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 1760 "NVME-FC{%d}: resetting controller\n", ctrl->cnum);
1766 1761
1767 /* stop the queues on error, cleanup is in reset thread */
1768 if (ctrl->queue_count > 1)
1769 nvme_stop_queues(&ctrl->ctrl);
1770
1771 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1762 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
1772 dev_err(ctrl->ctrl.device, 1763 dev_err(ctrl->ctrl.device,
1773 "NVME-FC{%d}: error_recovery: Couldn't change state " 1764 "NVME-FC{%d}: error_recovery: Couldn't change state "
@@ -1775,10 +1766,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
1775 return; 1766 return;
1776 } 1767 }
1777 1768
1778 if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) 1769 nvme_reset_ctrl(&ctrl->ctrl);
1779 dev_err(ctrl->ctrl.device,
1780 "NVME-FC{%d}: error_recovery: Failed to schedule "
1781 "reset work\n", ctrl->cnum);
1782} 1770}
1783 1771
1784static enum blk_eh_timer_return 1772static enum blk_eh_timer_return
@@ -1887,7 +1875,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
1887 * level FC exchange resource that is also outstanding. This must be 1875 * level FC exchange resource that is also outstanding. This must be
1888 * considered in all cleanup operations. 1876 * considered in all cleanup operations.
1889 */ 1877 */
1890static int 1878static blk_status_t
1891nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1879nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1892 struct nvme_fc_fcp_op *op, u32 data_len, 1880 struct nvme_fc_fcp_op *op, u32 data_len,
1893 enum nvmefc_fcp_datadir io_dir) 1881 enum nvmefc_fcp_datadir io_dir)
@@ -1902,10 +1890,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1902 * the target device is present 1890 * the target device is present
1903 */ 1891 */
1904 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1892 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
1905 return BLK_MQ_RQ_QUEUE_ERROR; 1893 return BLK_STS_IOERR;
1906 1894
1907 if (!nvme_fc_ctrl_get(ctrl)) 1895 if (!nvme_fc_ctrl_get(ctrl))
1908 return BLK_MQ_RQ_QUEUE_ERROR; 1896 return BLK_STS_IOERR;
1909 1897
1910 /* format the FC-NVME CMD IU and fcp_req */ 1898 /* format the FC-NVME CMD IU and fcp_req */
1911 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 1899 cmdiu->connection_id = cpu_to_be64(queue->connection_id);
@@ -1953,8 +1941,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1953 if (ret < 0) { 1941 if (ret < 0) {
1954 nvme_cleanup_cmd(op->rq); 1942 nvme_cleanup_cmd(op->rq);
1955 nvme_fc_ctrl_put(ctrl); 1943 nvme_fc_ctrl_put(ctrl);
1956 return (ret == -ENOMEM || ret == -EAGAIN) ? 1944 if (ret == -ENOMEM || ret == -EAGAIN)
1957 BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; 1945 return BLK_STS_RESOURCE;
1946 return BLK_STS_IOERR;
1958 } 1947 }
1959 } 1948 }
1960 1949
@@ -1971,28 +1960,26 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
1971 queue->lldd_handle, &op->fcp_req); 1960 queue->lldd_handle, &op->fcp_req);
1972 1961
1973 if (ret) { 1962 if (ret) {
1974 if (op->rq) { /* normal request */ 1963 if (op->rq) /* normal request */
1975 nvme_fc_unmap_data(ctrl, op->rq, op); 1964 nvme_fc_unmap_data(ctrl, op->rq, op);
1976 nvme_cleanup_cmd(op->rq);
1977 }
1978 /* else - aen. no cleanup needed */ 1965 /* else - aen. no cleanup needed */
1979 1966
1980 nvme_fc_ctrl_put(ctrl); 1967 nvme_fc_ctrl_put(ctrl);
1981 1968
1982 if (ret != -EBUSY) 1969 if (ret != -EBUSY)
1983 return BLK_MQ_RQ_QUEUE_ERROR; 1970 return BLK_STS_IOERR;
1984 1971
1985 if (op->rq) { 1972 if (op->rq) {
1986 blk_mq_stop_hw_queues(op->rq->q); 1973 blk_mq_stop_hw_queues(op->rq->q);
1987 blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); 1974 blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
1988 } 1975 }
1989 return BLK_MQ_RQ_QUEUE_BUSY; 1976 return BLK_STS_RESOURCE;
1990 } 1977 }
1991 1978
1992 return BLK_MQ_RQ_QUEUE_OK; 1979 return BLK_STS_OK;
1993} 1980}
1994 1981
1995static int 1982static blk_status_t
1996nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 1983nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
1997 const struct blk_mq_queue_data *bd) 1984 const struct blk_mq_queue_data *bd)
1998{ 1985{
@@ -2005,7 +1992,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2005 struct nvme_command *sqe = &cmdiu->sqe; 1992 struct nvme_command *sqe = &cmdiu->sqe;
2006 enum nvmefc_fcp_datadir io_dir; 1993 enum nvmefc_fcp_datadir io_dir;
2007 u32 data_len; 1994 u32 data_len;
2008 int ret; 1995 blk_status_t ret;
2009 1996
2010 ret = nvme_setup_cmd(ns, rq, sqe); 1997 ret = nvme_setup_cmd(ns, rq, sqe);
2011 if (ret) 1998 if (ret)
@@ -2060,7 +2047,7 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
2060 struct nvme_fc_fcp_op *aen_op; 2047 struct nvme_fc_fcp_op *aen_op;
2061 unsigned long flags; 2048 unsigned long flags;
2062 bool terminating = false; 2049 bool terminating = false;
2063 int ret; 2050 blk_status_t ret;
2064 2051
2065 if (aer_idx > NVME_FC_NR_AEN_COMMANDS) 2052 if (aer_idx > NVME_FC_NR_AEN_COMMANDS)
2066 return; 2053 return;
@@ -2092,7 +2079,6 @@ __nvme_fc_final_op_cleanup(struct request *rq)
2092 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | 2079 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED |
2093 FCOP_FLAGS_COMPLETE); 2080 FCOP_FLAGS_COMPLETE);
2094 2081
2095 nvme_cleanup_cmd(rq);
2096 nvme_fc_unmap_data(ctrl, rq, op); 2082 nvme_fc_unmap_data(ctrl, rq, op);
2097 nvme_complete_rq(rq); 2083 nvme_complete_rq(rq);
2098 nvme_fc_ctrl_put(ctrl); 2084 nvme_fc_ctrl_put(ctrl);
@@ -2310,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2310 int ret; 2296 int ret;
2311 bool changed; 2297 bool changed;
2312 2298
2313 ++ctrl->ctrl.opts->nr_reconnects; 2299 ++ctrl->ctrl.nr_reconnects;
2314 2300
2315 /* 2301 /*
2316 * Create the admin queue 2302 * Create the admin queue
@@ -2407,7 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2407 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2393 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
2408 WARN_ON_ONCE(!changed); 2394 WARN_ON_ONCE(!changed);
2409 2395
2410 ctrl->ctrl.opts->nr_reconnects = 0; 2396 ctrl->ctrl.nr_reconnects = 0;
2411 2397
2412 if (ctrl->queue_count > 1) { 2398 if (ctrl->queue_count > 1) {
2413 nvme_start_queues(&ctrl->ctrl); 2399 nvme_start_queues(&ctrl->ctrl);
@@ -2493,11 +2479,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
2493 2479
2494 /* wait for all io that had to be aborted */ 2480 /* wait for all io that had to be aborted */
2495 spin_lock_irqsave(&ctrl->lock, flags); 2481 spin_lock_irqsave(&ctrl->lock, flags);
2496 while (ctrl->iocnt) { 2482 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
2497 spin_unlock_irqrestore(&ctrl->lock, flags);
2498 msleep(1000);
2499 spin_lock_irqsave(&ctrl->lock, flags);
2500 }
2501 ctrl->flags &= ~FCCTRL_TERMIO; 2483 ctrl->flags &= ~FCCTRL_TERMIO;
2502 spin_unlock_irqrestore(&ctrl->lock, flags); 2484 spin_unlock_irqrestore(&ctrl->lock, flags);
2503 2485
@@ -2527,7 +2509,7 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
2527 struct nvme_fc_ctrl *ctrl = 2509 struct nvme_fc_ctrl *ctrl =
2528 container_of(work, struct nvme_fc_ctrl, delete_work); 2510 container_of(work, struct nvme_fc_ctrl, delete_work);
2529 2511
2530 cancel_work_sync(&ctrl->reset_work); 2512 cancel_work_sync(&ctrl->ctrl.reset_work);
2531 cancel_delayed_work_sync(&ctrl->connect_work); 2513 cancel_delayed_work_sync(&ctrl->connect_work);
2532 2514
2533 /* 2515 /*
@@ -2554,7 +2536,7 @@ __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
2554 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2536 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
2555 return true; 2537 return true;
2556 2538
2557 if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) 2539 if (!queue_work(nvme_wq, &ctrl->delete_work))
2558 return true; 2540 return true;
2559 2541
2560 return false; 2542 return false;
@@ -2581,7 +2563,7 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
2581 ret = __nvme_fc_del_ctrl(ctrl); 2563 ret = __nvme_fc_del_ctrl(ctrl);
2582 2564
2583 if (!ret) 2565 if (!ret)
2584 flush_workqueue(nvme_fc_wq); 2566 flush_workqueue(nvme_wq);
2585 2567
2586 nvme_put_ctrl(&ctrl->ctrl); 2568 nvme_put_ctrl(&ctrl->ctrl);
2587 2569
@@ -2606,13 +2588,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
2606 dev_info(ctrl->ctrl.device, 2588 dev_info(ctrl->ctrl.device,
2607 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2589 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
2608 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2590 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
2609 queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, 2591 queue_delayed_work(nvme_wq, &ctrl->connect_work,
2610 ctrl->ctrl.opts->reconnect_delay * HZ); 2592 ctrl->ctrl.opts->reconnect_delay * HZ);
2611 } else { 2593 } else {
2612 dev_warn(ctrl->ctrl.device, 2594 dev_warn(ctrl->ctrl.device,
2613 "NVME-FC{%d}: Max reconnect attempts (%d) " 2595 "NVME-FC{%d}: Max reconnect attempts (%d) "
2614 "reached. Removing controller\n", 2596 "reached. Removing controller\n",
2615 ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); 2597 ctrl->cnum, ctrl->ctrl.nr_reconnects);
2616 WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2598 WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
2617 } 2599 }
2618} 2600}
@@ -2621,7 +2603,7 @@ static void
2621nvme_fc_reset_ctrl_work(struct work_struct *work) 2603nvme_fc_reset_ctrl_work(struct work_struct *work)
2622{ 2604{
2623 struct nvme_fc_ctrl *ctrl = 2605 struct nvme_fc_ctrl *ctrl =
2624 container_of(work, struct nvme_fc_ctrl, reset_work); 2606 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
2625 int ret; 2607 int ret;
2626 2608
2627 /* will block will waiting for io to terminate */ 2609 /* will block will waiting for io to terminate */
@@ -2635,29 +2617,6 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
2635 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2617 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
2636} 2618}
2637 2619
2638/*
2639 * called by the nvme core layer, for sysfs interface that requests
2640 * a reset of the nvme controller
2641 */
2642static int
2643nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
2644{
2645 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
2646
2647 dev_info(ctrl->ctrl.device,
2648 "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
2649
2650 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
2651 return -EBUSY;
2652
2653 if (!queue_work(nvme_fc_wq, &ctrl->reset_work))
2654 return -EBUSY;
2655
2656 flush_work(&ctrl->reset_work);
2657
2658 return 0;
2659}
2660
2661static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2620static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2662 .name = "fc", 2621 .name = "fc",
2663 .module = THIS_MODULE, 2622 .module = THIS_MODULE,
@@ -2665,11 +2624,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2665 .reg_read32 = nvmf_reg_read32, 2624 .reg_read32 = nvmf_reg_read32,
2666 .reg_read64 = nvmf_reg_read64, 2625 .reg_read64 = nvmf_reg_read64,
2667 .reg_write32 = nvmf_reg_write32, 2626 .reg_write32 = nvmf_reg_write32,
2668 .reset_ctrl = nvme_fc_reset_nvme_ctrl,
2669 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2627 .free_ctrl = nvme_fc_nvme_ctrl_freed,
2670 .submit_async_event = nvme_fc_submit_async_event, 2628 .submit_async_event = nvme_fc_submit_async_event,
2671 .delete_ctrl = nvme_fc_del_nvme_ctrl, 2629 .delete_ctrl = nvme_fc_del_nvme_ctrl,
2672 .get_subsysnqn = nvmf_get_subsysnqn,
2673 .get_address = nvmf_get_address, 2630 .get_address = nvmf_get_address,
2674}; 2631};
2675 2632
@@ -2695,7 +2652,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
2695static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2652static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
2696 .queue_rq = nvme_fc_queue_rq, 2653 .queue_rq = nvme_fc_queue_rq,
2697 .complete = nvme_fc_complete_rq, 2654 .complete = nvme_fc_complete_rq,
2698 .init_request = nvme_fc_init_admin_request, 2655 .init_request = nvme_fc_init_request,
2699 .exit_request = nvme_fc_exit_request, 2656 .exit_request = nvme_fc_exit_request,
2700 .reinit_request = nvme_fc_reinit_request, 2657 .reinit_request = nvme_fc_reinit_request,
2701 .init_hctx = nvme_fc_init_admin_hctx, 2658 .init_hctx = nvme_fc_init_admin_hctx,
@@ -2740,7 +2697,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2740 kref_init(&ctrl->ref); 2697 kref_init(&ctrl->ref);
2741 2698
2742 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 2699 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
2743 INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); 2700 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
2744 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 2701 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
2745 spin_lock_init(&ctrl->lock); 2702 spin_lock_init(&ctrl->lock);
2746 2703
@@ -2807,6 +2764,9 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2807 nvme_uninit_ctrl(&ctrl->ctrl); 2764 nvme_uninit_ctrl(&ctrl->ctrl);
2808 nvme_put_ctrl(&ctrl->ctrl); 2765 nvme_put_ctrl(&ctrl->ctrl);
2809 2766
2767 /* Remove core ctrl ref. */
2768 nvme_put_ctrl(&ctrl->ctrl);
2769
2810 /* as we're past the point where we transition to the ref 2770 /* as we're past the point where we transition to the ref
2811 * counting teardown path, if we return a bad pointer here, 2771 * counting teardown path, if we return a bad pointer here,
2812 * the calling routine, thinking it's prior to the 2772 * the calling routine, thinking it's prior to the
@@ -2965,20 +2925,7 @@ static struct nvmf_transport_ops nvme_fc_transport = {
2965 2925
2966static int __init nvme_fc_init_module(void) 2926static int __init nvme_fc_init_module(void)
2967{ 2927{
2968 int ret; 2928 return nvmf_register_transport(&nvme_fc_transport);
2969
2970 nvme_fc_wq = create_workqueue("nvme_fc_wq");
2971 if (!nvme_fc_wq)
2972 return -ENOMEM;
2973
2974 ret = nvmf_register_transport(&nvme_fc_transport);
2975 if (ret)
2976 goto err;
2977
2978 return 0;
2979err:
2980 destroy_workqueue(nvme_fc_wq);
2981 return ret;
2982} 2929}
2983 2930
2984static void __exit nvme_fc_exit_module(void) 2931static void __exit nvme_fc_exit_module(void)
@@ -2989,8 +2936,6 @@ static void __exit nvme_fc_exit_module(void)
2989 2936
2990 nvmf_unregister_transport(&nvme_fc_transport); 2937 nvmf_unregister_transport(&nvme_fc_transport);
2991 2938
2992 destroy_workqueue(nvme_fc_wq);
2993
2994 ida_destroy(&nvme_fc_local_port_cnt); 2939 ida_destroy(&nvme_fc_local_port_cnt);
2995 ida_destroy(&nvme_fc_ctrl_cnt); 2940 ida_destroy(&nvme_fc_ctrl_cnt);
2996} 2941}
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index f5df78ed1e10..be8541335e31 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -242,7 +242,7 @@ static inline void _nvme_nvm_check_size(void)
242 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); 242 BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
243 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); 243 BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
244 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); 244 BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
245 BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096); 245 BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE);
246 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); 246 BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64);
247} 247}
248 248
@@ -480,7 +480,7 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
480 rqd->bio->bi_iter.bi_sector)); 480 rqd->bio->bi_iter.bi_sector));
481} 481}
482 482
483static void nvme_nvm_end_io(struct request *rq, int error) 483static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
484{ 484{
485 struct nvm_rq *rqd = rq->end_io_data; 485 struct nvm_rq *rqd = rq->end_io_data;
486 486
@@ -509,7 +509,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
509 rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); 509 rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
510 if (IS_ERR(rq)) { 510 if (IS_ERR(rq)) {
511 kfree(cmd); 511 kfree(cmd);
512 return -ENOMEM; 512 return PTR_ERR(rq);
513 } 513 }
514 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; 514 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
515 515
@@ -571,13 +571,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
571 .max_phys_sect = 64, 571 .max_phys_sect = 64,
572}; 572};
573 573
574static void nvme_nvm_end_user_vio(struct request *rq, int error)
575{
576 struct completion *waiting = rq->end_io_data;
577
578 complete(waiting);
579}
580
581static int nvme_nvm_submit_user_cmd(struct request_queue *q, 574static int nvme_nvm_submit_user_cmd(struct request_queue *q,
582 struct nvme_ns *ns, 575 struct nvme_ns *ns,
583 struct nvme_nvm_command *vcmd, 576 struct nvme_nvm_command *vcmd,
@@ -608,7 +601,6 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
608 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; 601 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
609 602
610 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; 603 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
611 rq->end_io_data = &wait;
612 604
613 if (ppa_buf && ppa_len) { 605 if (ppa_buf && ppa_len) {
614 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); 606 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
@@ -662,9 +654,7 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q,
662 } 654 }
663 655
664submit: 656submit:
665 blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio); 657 blk_execute_rq(q, NULL, rq, 0);
666
667 wait_for_completion_io(&wait);
668 658
669 if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) 659 if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
670 ret = -EINTR; 660 ret = -EINTR;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9d6a070d4391..d70ff0fdd36b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -27,12 +27,11 @@ extern unsigned char nvme_io_timeout;
27extern unsigned char admin_timeout; 27extern unsigned char admin_timeout;
28#define ADMIN_TIMEOUT (admin_timeout * HZ) 28#define ADMIN_TIMEOUT (admin_timeout * HZ)
29 29
30extern unsigned char shutdown_timeout;
31#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
32
33#define NVME_DEFAULT_KATO 5 30#define NVME_DEFAULT_KATO 5
34#define NVME_KATO_GRACE 10 31#define NVME_KATO_GRACE 10
35 32
33extern struct workqueue_struct *nvme_wq;
34
36enum { 35enum {
37 NVME_NS_LBA = 0, 36 NVME_NS_LBA = 0,
38 NVME_NS_LIGHTNVM = 1, 37 NVME_NS_LIGHTNVM = 1,
@@ -131,6 +130,7 @@ struct nvme_ctrl {
131 struct device *device; /* char device */ 130 struct device *device; /* char device */
132 struct list_head node; 131 struct list_head node;
133 struct ida ns_ida; 132 struct ida ns_ida;
133 struct work_struct reset_work;
134 134
135 struct opal_dev *opal_dev; 135 struct opal_dev *opal_dev;
136 136
@@ -138,6 +138,7 @@ struct nvme_ctrl {
138 char serial[20]; 138 char serial[20];
139 char model[40]; 139 char model[40];
140 char firmware_rev[8]; 140 char firmware_rev[8];
141 char subnqn[NVMF_NQN_SIZE];
141 u16 cntlid; 142 u16 cntlid;
142 143
143 u32 ctrl_config; 144 u32 ctrl_config;
@@ -147,6 +148,8 @@ struct nvme_ctrl {
147 u16 oncs; 148 u16 oncs;
148 u16 vid; 149 u16 vid;
149 u16 oacs; 150 u16 oacs;
151 u16 nssa;
152 u16 nr_streams;
150 atomic_t abort_limit; 153 atomic_t abort_limit;
151 u8 event_limit; 154 u8 event_limit;
152 u8 vwc; 155 u8 vwc;
@@ -165,6 +168,10 @@ struct nvme_ctrl {
165 168
166 /* Power saving configuration */ 169 /* Power saving configuration */
167 u64 ps_max_latency_us; 170 u64 ps_max_latency_us;
171 bool apst_enabled;
172
173 u32 hmpre;
174 u32 hmmin;
168 175
169 /* Fabrics only */ 176 /* Fabrics only */
170 u16 sqsize; 177 u16 sqsize;
@@ -172,12 +179,10 @@ struct nvme_ctrl {
172 u32 iorcsz; 179 u32 iorcsz;
173 u16 icdoff; 180 u16 icdoff;
174 u16 maxcmd; 181 u16 maxcmd;
182 int nr_reconnects;
175 struct nvmf_ctrl_options *opts; 183 struct nvmf_ctrl_options *opts;
176}; 184};
177 185
178/*
179 * An NVM Express namespace is equivalent to a SCSI LUN
180 */
181struct nvme_ns { 186struct nvme_ns {
182 struct list_head list; 187 struct list_head list;
183 188
@@ -189,14 +194,18 @@ struct nvme_ns {
189 int instance; 194 int instance;
190 195
191 u8 eui[8]; 196 u8 eui[8];
192 u8 uuid[16]; 197 u8 nguid[16];
198 uuid_t uuid;
193 199
194 unsigned ns_id; 200 unsigned ns_id;
195 int lba_shift; 201 int lba_shift;
196 u16 ms; 202 u16 ms;
203 u16 sgs;
204 u32 sws;
197 bool ext; 205 bool ext;
198 u8 pi_type; 206 u8 pi_type;
199 unsigned long flags; 207 unsigned long flags;
208 u16 noiob;
200 209
201#define NVME_NS_REMOVING 0 210#define NVME_NS_REMOVING 0
202#define NVME_NS_DEAD 1 211#define NVME_NS_DEAD 1
@@ -214,11 +223,9 @@ struct nvme_ctrl_ops {
214 int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); 223 int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
215 int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); 224 int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
216 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); 225 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
217 int (*reset_ctrl)(struct nvme_ctrl *ctrl);
218 void (*free_ctrl)(struct nvme_ctrl *ctrl); 226 void (*free_ctrl)(struct nvme_ctrl *ctrl);
219 void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx); 227 void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
220 int (*delete_ctrl)(struct nvme_ctrl *ctrl); 228 int (*delete_ctrl)(struct nvme_ctrl *ctrl);
221 const char *(*get_subsysnqn)(struct nvme_ctrl *ctrl);
222 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 229 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
223}; 230};
224 231
@@ -296,7 +303,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl);
296#define NVME_QID_ANY -1 303#define NVME_QID_ANY -1
297struct request *nvme_alloc_request(struct request_queue *q, 304struct request *nvme_alloc_request(struct request_queue *q,
298 struct nvme_command *cmd, unsigned int flags, int qid); 305 struct nvme_command *cmd, unsigned int flags, int qid);
299int nvme_setup_cmd(struct nvme_ns *ns, struct request *req, 306blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
300 struct nvme_command *cmd); 307 struct nvme_command *cmd);
301int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 308int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
302 void *buf, unsigned bufflen); 309 void *buf, unsigned bufflen);
@@ -310,23 +317,10 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
310 void __user *ubuffer, unsigned bufflen, 317 void __user *ubuffer, unsigned bufflen,
311 void __user *meta_buffer, unsigned meta_len, u32 meta_seed, 318 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
312 u32 *result, unsigned timeout); 319 u32 *result, unsigned timeout);
313int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
314int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
315 struct nvme_id_ns **id);
316int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
317int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
318 void *buffer, size_t buflen, u32 *result);
319int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
320 void *buffer, size_t buflen, u32 *result);
321int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); 320int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
322void nvme_start_keep_alive(struct nvme_ctrl *ctrl); 321void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
323void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); 322void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
324 323int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
325struct sg_io_hdr;
326
327int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
328int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
329int nvme_sg_get_version_num(int __user *ip);
330 324
331#ifdef CONFIG_NVM 325#ifdef CONFIG_NVM
332int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id); 326int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 40c7581caeb0..33c3b9db7d36 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -17,28 +17,15 @@
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/blk-mq.h> 18#include <linux/blk-mq.h>
19#include <linux/blk-mq-pci.h> 19#include <linux/blk-mq-pci.h>
20#include <linux/cpu.h>
21#include <linux/delay.h>
22#include <linux/dmi.h> 20#include <linux/dmi.h>
23#include <linux/errno.h>
24#include <linux/fs.h>
25#include <linux/genhd.h>
26#include <linux/hdreg.h>
27#include <linux/idr.h>
28#include <linux/init.h> 21#include <linux/init.h>
29#include <linux/interrupt.h> 22#include <linux/interrupt.h>
30#include <linux/io.h> 23#include <linux/io.h>
31#include <linux/kdev_t.h>
32#include <linux/kernel.h>
33#include <linux/mm.h> 24#include <linux/mm.h>
34#include <linux/module.h> 25#include <linux/module.h>
35#include <linux/moduleparam.h>
36#include <linux/mutex.h> 26#include <linux/mutex.h>
37#include <linux/pci.h> 27#include <linux/pci.h>
38#include <linux/poison.h> 28#include <linux/poison.h>
39#include <linux/ptrace.h>
40#include <linux/sched.h>
41#include <linux/slab.h>
42#include <linux/t10-pi.h> 29#include <linux/t10-pi.h>
43#include <linux/timer.h> 30#include <linux/timer.h>
44#include <linux/types.h> 31#include <linux/types.h>
@@ -49,7 +36,6 @@
49#include "nvme.h" 36#include "nvme.h"
50 37
51#define NVME_Q_DEPTH 1024 38#define NVME_Q_DEPTH 1024
52#define NVME_AQ_DEPTH 256
53#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 39#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
54#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 40#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
55 41
@@ -66,12 +52,14 @@ static bool use_cmb_sqes = true;
66module_param(use_cmb_sqes, bool, 0644); 52module_param(use_cmb_sqes, bool, 0644);
67MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); 53MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
68 54
69static struct workqueue_struct *nvme_workq; 55static unsigned int max_host_mem_size_mb = 128;
56module_param(max_host_mem_size_mb, uint, 0444);
57MODULE_PARM_DESC(max_host_mem_size_mb,
58 "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
70 59
71struct nvme_dev; 60struct nvme_dev;
72struct nvme_queue; 61struct nvme_queue;
73 62
74static int nvme_reset(struct nvme_dev *dev);
75static void nvme_process_cq(struct nvme_queue *nvmeq); 63static void nvme_process_cq(struct nvme_queue *nvmeq);
76static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); 64static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
77 65
@@ -92,9 +80,8 @@ struct nvme_dev {
92 int q_depth; 80 int q_depth;
93 u32 db_stride; 81 u32 db_stride;
94 void __iomem *bar; 82 void __iomem *bar;
95 struct work_struct reset_work; 83 unsigned long bar_mapped_size;
96 struct work_struct remove_work; 84 struct work_struct remove_work;
97 struct timer_list watchdog_timer;
98 struct mutex shutdown_lock; 85 struct mutex shutdown_lock;
99 bool subsystem; 86 bool subsystem;
100 void __iomem *cmb; 87 void __iomem *cmb;
@@ -104,10 +91,18 @@ struct nvme_dev {
104 u32 cmbloc; 91 u32 cmbloc;
105 struct nvme_ctrl ctrl; 92 struct nvme_ctrl ctrl;
106 struct completion ioq_wait; 93 struct completion ioq_wait;
94
95 /* shadow doorbell buffer support: */
107 u32 *dbbuf_dbs; 96 u32 *dbbuf_dbs;
108 dma_addr_t dbbuf_dbs_dma_addr; 97 dma_addr_t dbbuf_dbs_dma_addr;
109 u32 *dbbuf_eis; 98 u32 *dbbuf_eis;
110 dma_addr_t dbbuf_eis_dma_addr; 99 dma_addr_t dbbuf_eis_dma_addr;
100
101 /* host memory buffer support: */
102 u64 host_mem_size;
103 u32 nr_host_mem_descs;
104 struct nvme_host_mem_buf_desc *host_mem_descs;
105 void **host_mem_desc_bufs;
111}; 106};
112 107
113static inline unsigned int sq_idx(unsigned int qid, u32 stride) 108static inline unsigned int sq_idx(unsigned int qid, u32 stride)
@@ -185,8 +180,8 @@ static inline void _nvme_check_size(void)
185 BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); 180 BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
186 BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); 181 BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
187 BUILD_BUG_ON(sizeof(struct nvme_command) != 64); 182 BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
188 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); 183 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
189 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); 184 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
190 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); 185 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
191 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); 186 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
192 BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); 187 BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
@@ -350,19 +345,6 @@ static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_i
350 nvmeq->tags = NULL; 345 nvmeq->tags = NULL;
351} 346}
352 347
353static int nvme_admin_init_request(struct blk_mq_tag_set *set,
354 struct request *req, unsigned int hctx_idx,
355 unsigned int numa_node)
356{
357 struct nvme_dev *dev = set->driver_data;
358 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
359 struct nvme_queue *nvmeq = dev->queues[0];
360
361 BUG_ON(!nvmeq);
362 iod->nvmeq = nvmeq;
363 return 0;
364}
365
366static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 348static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
367 unsigned int hctx_idx) 349 unsigned int hctx_idx)
368{ 350{
@@ -382,7 +364,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
382{ 364{
383 struct nvme_dev *dev = set->driver_data; 365 struct nvme_dev *dev = set->driver_data;
384 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 366 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
385 struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; 367 int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
368 struct nvme_queue *nvmeq = dev->queues[queue_idx];
386 369
387 BUG_ON(!nvmeq); 370 BUG_ON(!nvmeq);
388 iod->nvmeq = nvmeq; 371 iod->nvmeq = nvmeq;
@@ -427,7 +410,7 @@ static __le64 **iod_list(struct request *req)
427 return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); 410 return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
428} 411}
429 412
430static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) 413static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
431{ 414{
432 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); 415 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
433 int nseg = blk_rq_nr_phys_segments(rq); 416 int nseg = blk_rq_nr_phys_segments(rq);
@@ -436,7 +419,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
436 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 419 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
437 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); 420 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
438 if (!iod->sg) 421 if (!iod->sg)
439 return BLK_MQ_RQ_QUEUE_BUSY; 422 return BLK_STS_RESOURCE;
440 } else { 423 } else {
441 iod->sg = iod->inline_sg; 424 iod->sg = iod->inline_sg;
442 } 425 }
@@ -446,7 +429,7 @@ static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
446 iod->nents = 0; 429 iod->nents = 0;
447 iod->length = size; 430 iod->length = size;
448 431
449 return BLK_MQ_RQ_QUEUE_OK; 432 return BLK_STS_OK;
450} 433}
451 434
452static void nvme_free_iod(struct nvme_dev *dev, struct request *req) 435static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
@@ -616,21 +599,21 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
616 return true; 599 return true;
617} 600}
618 601
619static int nvme_map_data(struct nvme_dev *dev, struct request *req, 602static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
620 struct nvme_command *cmnd) 603 struct nvme_command *cmnd)
621{ 604{
622 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 605 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
623 struct request_queue *q = req->q; 606 struct request_queue *q = req->q;
624 enum dma_data_direction dma_dir = rq_data_dir(req) ? 607 enum dma_data_direction dma_dir = rq_data_dir(req) ?
625 DMA_TO_DEVICE : DMA_FROM_DEVICE; 608 DMA_TO_DEVICE : DMA_FROM_DEVICE;
626 int ret = BLK_MQ_RQ_QUEUE_ERROR; 609 blk_status_t ret = BLK_STS_IOERR;
627 610
628 sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); 611 sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
629 iod->nents = blk_rq_map_sg(q, req, iod->sg); 612 iod->nents = blk_rq_map_sg(q, req, iod->sg);
630 if (!iod->nents) 613 if (!iod->nents)
631 goto out; 614 goto out;
632 615
633 ret = BLK_MQ_RQ_QUEUE_BUSY; 616 ret = BLK_STS_RESOURCE;
634 if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, 617 if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
635 DMA_ATTR_NO_WARN)) 618 DMA_ATTR_NO_WARN))
636 goto out; 619 goto out;
@@ -638,7 +621,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
638 if (!nvme_setup_prps(dev, req)) 621 if (!nvme_setup_prps(dev, req))
639 goto out_unmap; 622 goto out_unmap;
640 623
641 ret = BLK_MQ_RQ_QUEUE_ERROR; 624 ret = BLK_STS_IOERR;
642 if (blk_integrity_rq(req)) { 625 if (blk_integrity_rq(req)) {
643 if (blk_rq_count_integrity_sg(q, req->bio) != 1) 626 if (blk_rq_count_integrity_sg(q, req->bio) != 1)
644 goto out_unmap; 627 goto out_unmap;
@@ -658,7 +641,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
658 cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma); 641 cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
659 if (blk_integrity_rq(req)) 642 if (blk_integrity_rq(req))
660 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); 643 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
661 return BLK_MQ_RQ_QUEUE_OK; 644 return BLK_STS_OK;
662 645
663out_unmap: 646out_unmap:
664 dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); 647 dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
@@ -688,7 +671,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
688/* 671/*
689 * NOTE: ns is NULL when called on the admin queue. 672 * NOTE: ns is NULL when called on the admin queue.
690 */ 673 */
691static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, 674static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
692 const struct blk_mq_queue_data *bd) 675 const struct blk_mq_queue_data *bd)
693{ 676{
694 struct nvme_ns *ns = hctx->queue->queuedata; 677 struct nvme_ns *ns = hctx->queue->queuedata;
@@ -696,47 +679,34 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
696 struct nvme_dev *dev = nvmeq->dev; 679 struct nvme_dev *dev = nvmeq->dev;
697 struct request *req = bd->rq; 680 struct request *req = bd->rq;
698 struct nvme_command cmnd; 681 struct nvme_command cmnd;
699 int ret = BLK_MQ_RQ_QUEUE_OK; 682 blk_status_t ret;
700
701 /*
702 * If formated with metadata, require the block layer provide a buffer
703 * unless this namespace is formated such that the metadata can be
704 * stripped/generated by the controller with PRACT=1.
705 */
706 if (ns && ns->ms && !blk_integrity_rq(req)) {
707 if (!(ns->pi_type && ns->ms == 8) &&
708 !blk_rq_is_passthrough(req)) {
709 blk_mq_end_request(req, -EFAULT);
710 return BLK_MQ_RQ_QUEUE_OK;
711 }
712 }
713 683
714 ret = nvme_setup_cmd(ns, req, &cmnd); 684 ret = nvme_setup_cmd(ns, req, &cmnd);
715 if (ret != BLK_MQ_RQ_QUEUE_OK) 685 if (ret)
716 return ret; 686 return ret;
717 687
718 ret = nvme_init_iod(req, dev); 688 ret = nvme_init_iod(req, dev);
719 if (ret != BLK_MQ_RQ_QUEUE_OK) 689 if (ret)
720 goto out_free_cmd; 690 goto out_free_cmd;
721 691
722 if (blk_rq_nr_phys_segments(req)) 692 if (blk_rq_nr_phys_segments(req)) {
723 ret = nvme_map_data(dev, req, &cmnd); 693 ret = nvme_map_data(dev, req, &cmnd);
724 694 if (ret)
725 if (ret != BLK_MQ_RQ_QUEUE_OK) 695 goto out_cleanup_iod;
726 goto out_cleanup_iod; 696 }
727 697
728 blk_mq_start_request(req); 698 blk_mq_start_request(req);
729 699
730 spin_lock_irq(&nvmeq->q_lock); 700 spin_lock_irq(&nvmeq->q_lock);
731 if (unlikely(nvmeq->cq_vector < 0)) { 701 if (unlikely(nvmeq->cq_vector < 0)) {
732 ret = BLK_MQ_RQ_QUEUE_ERROR; 702 ret = BLK_STS_IOERR;
733 spin_unlock_irq(&nvmeq->q_lock); 703 spin_unlock_irq(&nvmeq->q_lock);
734 goto out_cleanup_iod; 704 goto out_cleanup_iod;
735 } 705 }
736 __nvme_submit_cmd(nvmeq, &cmnd); 706 __nvme_submit_cmd(nvmeq, &cmnd);
737 nvme_process_cq(nvmeq); 707 nvme_process_cq(nvmeq);
738 spin_unlock_irq(&nvmeq->q_lock); 708 spin_unlock_irq(&nvmeq->q_lock);
739 return BLK_MQ_RQ_QUEUE_OK; 709 return BLK_STS_OK;
740out_cleanup_iod: 710out_cleanup_iod:
741 nvme_free_iod(dev, req); 711 nvme_free_iod(dev, req);
742out_free_cmd: 712out_free_cmd:
@@ -759,65 +729,75 @@ static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
759 return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; 729 return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
760} 730}
761 731
762static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) 732static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
763{ 733{
764 u16 head, phase; 734 u16 head = nvmeq->cq_head;
765
766 head = nvmeq->cq_head;
767 phase = nvmeq->cq_phase;
768
769 while (nvme_cqe_valid(nvmeq, head, phase)) {
770 struct nvme_completion cqe = nvmeq->cqes[head];
771 struct request *req;
772
773 if (++head == nvmeq->q_depth) {
774 head = 0;
775 phase = !phase;
776 }
777
778 if (tag && *tag == cqe.command_id)
779 *tag = -1;
780 735
781 if (unlikely(cqe.command_id >= nvmeq->q_depth)) { 736 if (likely(nvmeq->cq_vector >= 0)) {
782 dev_warn(nvmeq->dev->ctrl.device, 737 if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
783 "invalid id %d completed on queue %d\n", 738 nvmeq->dbbuf_cq_ei))
784 cqe.command_id, le16_to_cpu(cqe.sq_id)); 739 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
785 continue; 740 }
786 } 741}
787 742
788 /* 743static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
789 * AEN requests are special as they don't time out and can 744 struct nvme_completion *cqe)
790 * survive any kind of queue freeze and often don't respond to 745{
791 * aborts. We don't even bother to allocate a struct request 746 struct request *req;
792 * for them but rather special case them here.
793 */
794 if (unlikely(nvmeq->qid == 0 &&
795 cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
796 nvme_complete_async_event(&nvmeq->dev->ctrl,
797 cqe.status, &cqe.result);
798 continue;
799 }
800 747
801 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); 748 if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
802 nvme_end_request(req, cqe.status, cqe.result); 749 dev_warn(nvmeq->dev->ctrl.device,
750 "invalid id %d completed on queue %d\n",
751 cqe->command_id, le16_to_cpu(cqe->sq_id));
752 return;
803 } 753 }
804 754
805 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) 755 /*
756 * AEN requests are special as they don't time out and can
757 * survive any kind of queue freeze and often don't respond to
758 * aborts. We don't even bother to allocate a struct request
759 * for them but rather special case them here.
760 */
761 if (unlikely(nvmeq->qid == 0 &&
762 cqe->command_id >= NVME_AQ_BLKMQ_DEPTH)) {
763 nvme_complete_async_event(&nvmeq->dev->ctrl,
764 cqe->status, &cqe->result);
806 return; 765 return;
766 }
807 767
808 if (likely(nvmeq->cq_vector >= 0)) 768 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
809 if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, 769 nvme_end_request(req, cqe->status, cqe->result);
810 nvmeq->dbbuf_cq_ei)) 770}
811 writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
812 nvmeq->cq_head = head;
813 nvmeq->cq_phase = phase;
814 771
815 nvmeq->cqe_seen = 1; 772static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
773 struct nvme_completion *cqe)
774{
775 if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
776 *cqe = nvmeq->cqes[nvmeq->cq_head];
777
778 if (++nvmeq->cq_head == nvmeq->q_depth) {
779 nvmeq->cq_head = 0;
780 nvmeq->cq_phase = !nvmeq->cq_phase;
781 }
782 return true;
783 }
784 return false;
816} 785}
817 786
818static void nvme_process_cq(struct nvme_queue *nvmeq) 787static void nvme_process_cq(struct nvme_queue *nvmeq)
819{ 788{
820 __nvme_process_cq(nvmeq, NULL); 789 struct nvme_completion cqe;
790 int consumed = 0;
791
792 while (nvme_read_cqe(nvmeq, &cqe)) {
793 nvme_handle_cqe(nvmeq, &cqe);
794 consumed++;
795 }
796
797 if (consumed) {
798 nvme_ring_cq_doorbell(nvmeq);
799 nvmeq->cqe_seen = 1;
800 }
821} 801}
822 802
823static irqreturn_t nvme_irq(int irq, void *data) 803static irqreturn_t nvme_irq(int irq, void *data)
@@ -842,16 +822,28 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
842 822
843static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) 823static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
844{ 824{
845 if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { 825 struct nvme_completion cqe;
846 spin_lock_irq(&nvmeq->q_lock); 826 int found = 0, consumed = 0;
847 __nvme_process_cq(nvmeq, &tag);
848 spin_unlock_irq(&nvmeq->q_lock);
849 827
850 if (tag == -1) 828 if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
851 return 1; 829 return 0;
852 }
853 830
854 return 0; 831 spin_lock_irq(&nvmeq->q_lock);
832 while (nvme_read_cqe(nvmeq, &cqe)) {
833 nvme_handle_cqe(nvmeq, &cqe);
834 consumed++;
835
836 if (tag == cqe.command_id) {
837 found = 1;
838 break;
839 }
840 }
841
842 if (consumed)
843 nvme_ring_cq_doorbell(nvmeq);
844 spin_unlock_irq(&nvmeq->q_lock);
845
846 return found;
855} 847}
856 848
857static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 849static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -939,7 +931,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
939 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); 931 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
940} 932}
941 933
942static void abort_endio(struct request *req, int error) 934static void abort_endio(struct request *req, blk_status_t error)
943{ 935{
944 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 936 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
945 struct nvme_queue *nvmeq = iod->nvmeq; 937 struct nvme_queue *nvmeq = iod->nvmeq;
@@ -950,6 +942,51 @@ static void abort_endio(struct request *req, int error)
950 blk_mq_free_request(req); 942 blk_mq_free_request(req);
951} 943}
952 944
945static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
946{
947
948 /* If true, indicates loss of adapter communication, possibly by a
949 * NVMe Subsystem reset.
950 */
951 bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
952
953 /* If there is a reset ongoing, we shouldn't reset again. */
954 if (dev->ctrl.state == NVME_CTRL_RESETTING)
955 return false;
956
957 /* We shouldn't reset unless the controller is on fatal error state
958 * _or_ if we lost the communication with it.
959 */
960 if (!(csts & NVME_CSTS_CFS) && !nssro)
961 return false;
962
963 /* If PCI error recovery process is happening, we cannot reset or
964 * the recovery mechanism will surely fail.
965 */
966 if (pci_channel_offline(to_pci_dev(dev->dev)))
967 return false;
968
969 return true;
970}
971
972static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
973{
974 /* Read a config register to help see what died. */
975 u16 pci_status;
976 int result;
977
978 result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
979 &pci_status);
980 if (result == PCIBIOS_SUCCESSFUL)
981 dev_warn(dev->ctrl.device,
982 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
983 csts, pci_status);
984 else
985 dev_warn(dev->ctrl.device,
986 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
987 csts, result);
988}
989
953static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) 990static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
954{ 991{
955 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 992 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -957,6 +994,17 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
957 struct nvme_dev *dev = nvmeq->dev; 994 struct nvme_dev *dev = nvmeq->dev;
958 struct request *abort_req; 995 struct request *abort_req;
959 struct nvme_command cmd; 996 struct nvme_command cmd;
997 u32 csts = readl(dev->bar + NVME_REG_CSTS);
998
999 /*
1000 * Reset immediately if the controller is failed
1001 */
1002 if (nvme_should_reset(dev, csts)) {
1003 nvme_warn_reset(dev, csts);
1004 nvme_dev_disable(dev, false);
1005 nvme_reset_ctrl(&dev->ctrl);
1006 return BLK_EH_HANDLED;
1007 }
960 1008
961 /* 1009 /*
962 * Did we miss an interrupt? 1010 * Did we miss an interrupt?
@@ -993,7 +1041,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
993 "I/O %d QID %d timeout, reset controller\n", 1041 "I/O %d QID %d timeout, reset controller\n",
994 req->tag, nvmeq->qid); 1042 req->tag, nvmeq->qid);
995 nvme_dev_disable(dev, false); 1043 nvme_dev_disable(dev, false);
996 nvme_reset(dev); 1044 nvme_reset_ctrl(&dev->ctrl);
997 1045
998 /* 1046 /*
999 * Mark the request as handled, since the inline shutdown 1047 * Mark the request as handled, since the inline shutdown
@@ -1247,7 +1295,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
1247 .complete = nvme_pci_complete_rq, 1295 .complete = nvme_pci_complete_rq,
1248 .init_hctx = nvme_admin_init_hctx, 1296 .init_hctx = nvme_admin_init_hctx,
1249 .exit_hctx = nvme_admin_exit_hctx, 1297 .exit_hctx = nvme_admin_exit_hctx,
1250 .init_request = nvme_admin_init_request, 1298 .init_request = nvme_init_request,
1251 .timeout = nvme_timeout, 1299 .timeout = nvme_timeout,
1252}; 1300};
1253 1301
@@ -1311,6 +1359,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
1311 return 0; 1359 return 0;
1312} 1360}
1313 1361
1362static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
1363{
1364 return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
1365}
1366
1367static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
1368{
1369 struct pci_dev *pdev = to_pci_dev(dev->dev);
1370
1371 if (size <= dev->bar_mapped_size)
1372 return 0;
1373 if (size > pci_resource_len(pdev, 0))
1374 return -ENOMEM;
1375 if (dev->bar)
1376 iounmap(dev->bar);
1377 dev->bar = ioremap(pci_resource_start(pdev, 0), size);
1378 if (!dev->bar) {
1379 dev->bar_mapped_size = 0;
1380 return -ENOMEM;
1381 }
1382 dev->bar_mapped_size = size;
1383 dev->dbs = dev->bar + NVME_REG_DBS;
1384
1385 return 0;
1386}
1387
1314static int nvme_configure_admin_queue(struct nvme_dev *dev) 1388static int nvme_configure_admin_queue(struct nvme_dev *dev)
1315{ 1389{
1316 int result; 1390 int result;
@@ -1318,6 +1392,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1318 u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); 1392 u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
1319 struct nvme_queue *nvmeq; 1393 struct nvme_queue *nvmeq;
1320 1394
1395 result = nvme_remap_bar(dev, db_bar_size(dev, 0));
1396 if (result < 0)
1397 return result;
1398
1321 dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? 1399 dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
1322 NVME_CAP_NSSRC(cap) : 0; 1400 NVME_CAP_NSSRC(cap) : 0;
1323 1401
@@ -1358,66 +1436,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1358 return result; 1436 return result;
1359} 1437}
1360 1438
1361static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
1362{
1363
1364 /* If true, indicates loss of adapter communication, possibly by a
1365 * NVMe Subsystem reset.
1366 */
1367 bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
1368
1369 /* If there is a reset ongoing, we shouldn't reset again. */
1370 if (dev->ctrl.state == NVME_CTRL_RESETTING)
1371 return false;
1372
1373 /* We shouldn't reset unless the controller is on fatal error state
1374 * _or_ if we lost the communication with it.
1375 */
1376 if (!(csts & NVME_CSTS_CFS) && !nssro)
1377 return false;
1378
1379 /* If PCI error recovery process is happening, we cannot reset or
1380 * the recovery mechanism will surely fail.
1381 */
1382 if (pci_channel_offline(to_pci_dev(dev->dev)))
1383 return false;
1384
1385 return true;
1386}
1387
1388static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
1389{
1390 /* Read a config register to help see what died. */
1391 u16 pci_status;
1392 int result;
1393
1394 result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
1395 &pci_status);
1396 if (result == PCIBIOS_SUCCESSFUL)
1397 dev_warn(dev->ctrl.device,
1398 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
1399 csts, pci_status);
1400 else
1401 dev_warn(dev->ctrl.device,
1402 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
1403 csts, result);
1404}
1405
1406static void nvme_watchdog_timer(unsigned long data)
1407{
1408 struct nvme_dev *dev = (struct nvme_dev *)data;
1409 u32 csts = readl(dev->bar + NVME_REG_CSTS);
1410
1411 /* Skip controllers under certain specific conditions. */
1412 if (nvme_should_reset(dev, csts)) {
1413 if (!nvme_reset(dev))
1414 nvme_warn_reset(dev, csts);
1415 return;
1416 }
1417
1418 mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
1419}
1420
1421static int nvme_create_io_queues(struct nvme_dev *dev) 1439static int nvme_create_io_queues(struct nvme_dev *dev)
1422{ 1440{
1423 unsigned i, max; 1441 unsigned i, max;
@@ -1514,16 +1532,168 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
1514 } 1532 }
1515} 1533}
1516 1534
1517static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) 1535static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
1536{
1537 size_t len = dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs);
1538 struct nvme_command c;
1539 u64 dma_addr;
1540 int ret;
1541
1542 dma_addr = dma_map_single(dev->dev, dev->host_mem_descs, len,
1543 DMA_TO_DEVICE);
1544 if (dma_mapping_error(dev->dev, dma_addr))
1545 return -ENOMEM;
1546
1547 memset(&c, 0, sizeof(c));
1548 c.features.opcode = nvme_admin_set_features;
1549 c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
1550 c.features.dword11 = cpu_to_le32(bits);
1551 c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
1552 ilog2(dev->ctrl.page_size));
1553 c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
1554 c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
1555 c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
1556
1557 ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
1558 if (ret) {
1559 dev_warn(dev->ctrl.device,
1560 "failed to set host mem (err %d, flags %#x).\n",
1561 ret, bits);
1562 }
1563 dma_unmap_single(dev->dev, dma_addr, len, DMA_TO_DEVICE);
1564 return ret;
1565}
1566
1567static void nvme_free_host_mem(struct nvme_dev *dev)
1568{
1569 int i;
1570
1571 for (i = 0; i < dev->nr_host_mem_descs; i++) {
1572 struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
1573 size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
1574
1575 dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
1576 le64_to_cpu(desc->addr));
1577 }
1578
1579 kfree(dev->host_mem_desc_bufs);
1580 dev->host_mem_desc_bufs = NULL;
1581 kfree(dev->host_mem_descs);
1582 dev->host_mem_descs = NULL;
1583}
1584
1585static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
1518{ 1586{
1519 return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); 1587 struct nvme_host_mem_buf_desc *descs;
1588 u32 chunk_size, max_entries, i = 0;
1589 void **bufs;
1590 u64 size, tmp;
1591
1592 /* start big and work our way down */
1593 chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
1594retry:
1595 tmp = (preferred + chunk_size - 1);
1596 do_div(tmp, chunk_size);
1597 max_entries = tmp;
1598 descs = kcalloc(max_entries, sizeof(*descs), GFP_KERNEL);
1599 if (!descs)
1600 goto out;
1601
1602 bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
1603 if (!bufs)
1604 goto out_free_descs;
1605
1606 for (size = 0; size < preferred; size += chunk_size) {
1607 u32 len = min_t(u64, chunk_size, preferred - size);
1608 dma_addr_t dma_addr;
1609
1610 bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
1611 DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
1612 if (!bufs[i])
1613 break;
1614
1615 descs[i].addr = cpu_to_le64(dma_addr);
1616 descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
1617 i++;
1618 }
1619
1620 if (!size || (min && size < min)) {
1621 dev_warn(dev->ctrl.device,
1622 "failed to allocate host memory buffer.\n");
1623 goto out_free_bufs;
1624 }
1625
1626 dev_info(dev->ctrl.device,
1627 "allocated %lld MiB host memory buffer.\n",
1628 size >> ilog2(SZ_1M));
1629 dev->nr_host_mem_descs = i;
1630 dev->host_mem_size = size;
1631 dev->host_mem_descs = descs;
1632 dev->host_mem_desc_bufs = bufs;
1633 return 0;
1634
1635out_free_bufs:
1636 while (--i >= 0) {
1637 size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
1638
1639 dma_free_coherent(dev->dev, size, bufs[i],
1640 le64_to_cpu(descs[i].addr));
1641 }
1642
1643 kfree(bufs);
1644out_free_descs:
1645 kfree(descs);
1646out:
1647 /* try a smaller chunk size if we failed early */
1648 if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
1649 chunk_size /= 2;
1650 goto retry;
1651 }
1652 dev->host_mem_descs = NULL;
1653 return -ENOMEM;
1654}
1655
1656static void nvme_setup_host_mem(struct nvme_dev *dev)
1657{
1658 u64 max = (u64)max_host_mem_size_mb * SZ_1M;
1659 u64 preferred = (u64)dev->ctrl.hmpre * 4096;
1660 u64 min = (u64)dev->ctrl.hmmin * 4096;
1661 u32 enable_bits = NVME_HOST_MEM_ENABLE;
1662
1663 preferred = min(preferred, max);
1664 if (min > max) {
1665 dev_warn(dev->ctrl.device,
1666 "min host memory (%lld MiB) above limit (%d MiB).\n",
1667 min >> ilog2(SZ_1M), max_host_mem_size_mb);
1668 nvme_free_host_mem(dev);
1669 return;
1670 }
1671
1672 /*
1673 * If we already have a buffer allocated check if we can reuse it.
1674 */
1675 if (dev->host_mem_descs) {
1676 if (dev->host_mem_size >= min)
1677 enable_bits |= NVME_HOST_MEM_RETURN;
1678 else
1679 nvme_free_host_mem(dev);
1680 }
1681
1682 if (!dev->host_mem_descs) {
1683 if (nvme_alloc_host_mem(dev, min, preferred))
1684 return;
1685 }
1686
1687 if (nvme_set_host_mem(dev, enable_bits))
1688 nvme_free_host_mem(dev);
1520} 1689}
1521 1690
1522static int nvme_setup_io_queues(struct nvme_dev *dev) 1691static int nvme_setup_io_queues(struct nvme_dev *dev)
1523{ 1692{
1524 struct nvme_queue *adminq = dev->queues[0]; 1693 struct nvme_queue *adminq = dev->queues[0];
1525 struct pci_dev *pdev = to_pci_dev(dev->dev); 1694 struct pci_dev *pdev = to_pci_dev(dev->dev);
1526 int result, nr_io_queues, size; 1695 int result, nr_io_queues;
1696 unsigned long size;
1527 1697
1528 nr_io_queues = num_online_cpus(); 1698 nr_io_queues = num_online_cpus();
1529 result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); 1699 result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1542,20 +1712,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1542 nvme_release_cmb(dev); 1712 nvme_release_cmb(dev);
1543 } 1713 }
1544 1714
1545 size = db_bar_size(dev, nr_io_queues); 1715 do {
1546 if (size > 8192) { 1716 size = db_bar_size(dev, nr_io_queues);
1547 iounmap(dev->bar); 1717 result = nvme_remap_bar(dev, size);
1548 do { 1718 if (!result)
1549 dev->bar = ioremap(pci_resource_start(pdev, 0), size); 1719 break;
1550 if (dev->bar) 1720 if (!--nr_io_queues)
1551 break; 1721 return -ENOMEM;
1552 if (!--nr_io_queues) 1722 } while (1);
1553 return -ENOMEM; 1723 adminq->q_db = dev->dbs;
1554 size = db_bar_size(dev, nr_io_queues);
1555 } while (1);
1556 dev->dbs = dev->bar + 4096;
1557 adminq->q_db = dev->dbs;
1558 }
1559 1724
1560 /* Deregister the admin queue's interrupt */ 1725 /* Deregister the admin queue's interrupt */
1561 pci_free_irq(pdev, 0, adminq); 1726 pci_free_irq(pdev, 0, adminq);
@@ -1586,7 +1751,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1586 return nvme_create_io_queues(dev); 1751 return nvme_create_io_queues(dev);
1587} 1752}
1588 1753
1589static void nvme_del_queue_end(struct request *req, int error) 1754static void nvme_del_queue_end(struct request *req, blk_status_t error)
1590{ 1755{
1591 struct nvme_queue *nvmeq = req->end_io_data; 1756 struct nvme_queue *nvmeq = req->end_io_data;
1592 1757
@@ -1594,7 +1759,7 @@ static void nvme_del_queue_end(struct request *req, int error)
1594 complete(&nvmeq->dev->ioq_wait); 1759 complete(&nvmeq->dev->ioq_wait);
1595} 1760}
1596 1761
1597static void nvme_del_cq_end(struct request *req, int error) 1762static void nvme_del_cq_end(struct request *req, blk_status_t error)
1598{ 1763{
1599 struct nvme_queue *nvmeq = req->end_io_data; 1764 struct nvme_queue *nvmeq = req->end_io_data;
1600 1765
@@ -1799,8 +1964,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1799 bool dead = true; 1964 bool dead = true;
1800 struct pci_dev *pdev = to_pci_dev(dev->dev); 1965 struct pci_dev *pdev = to_pci_dev(dev->dev);
1801 1966
1802 del_timer_sync(&dev->watchdog_timer);
1803
1804 mutex_lock(&dev->shutdown_lock); 1967 mutex_lock(&dev->shutdown_lock);
1805 if (pci_is_enabled(pdev)) { 1968 if (pci_is_enabled(pdev)) {
1806 u32 csts = readl(dev->bar + NVME_REG_CSTS); 1969 u32 csts = readl(dev->bar + NVME_REG_CSTS);
@@ -1816,8 +1979,20 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
1816 * Give the controller a chance to complete all entered requests if 1979 * Give the controller a chance to complete all entered requests if
1817 * doing a safe shutdown. 1980 * doing a safe shutdown.
1818 */ 1981 */
1819 if (!dead && shutdown) 1982 if (!dead) {
1820 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); 1983 if (shutdown)
1984 nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
1985
1986 /*
1987 * If the controller is still alive tell it to stop using the
1988 * host memory buffer. In theory the shutdown / reset should
1989 * make sure that it doesn't access the host memoery anymore,
1990 * but I'd rather be safe than sorry..
1991 */
1992 if (dev->host_mem_descs)
1993 nvme_set_host_mem(dev, 0);
1994
1995 }
1821 nvme_stop_queues(&dev->ctrl); 1996 nvme_stop_queues(&dev->ctrl);
1822 1997
1823 queues = dev->online_queues - 1; 1998 queues = dev->online_queues - 1;
@@ -1900,7 +2075,8 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
1900 2075
1901static void nvme_reset_work(struct work_struct *work) 2076static void nvme_reset_work(struct work_struct *work)
1902{ 2077{
1903 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); 2078 struct nvme_dev *dev =
2079 container_of(work, struct nvme_dev, ctrl.reset_work);
1904 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); 2080 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
1905 int result = -ENODEV; 2081 int result = -ENODEV;
1906 2082
@@ -1949,6 +2125,9 @@ static void nvme_reset_work(struct work_struct *work)
1949 "unable to allocate dma for dbbuf\n"); 2125 "unable to allocate dma for dbbuf\n");
1950 } 2126 }
1951 2127
2128 if (dev->ctrl.hmpre)
2129 nvme_setup_host_mem(dev);
2130
1952 result = nvme_setup_io_queues(dev); 2131 result = nvme_setup_io_queues(dev);
1953 if (result) 2132 if (result)
1954 goto out; 2133 goto out;
@@ -1962,8 +2141,6 @@ static void nvme_reset_work(struct work_struct *work)
1962 if (dev->online_queues > 1) 2141 if (dev->online_queues > 1)
1963 nvme_queue_async_events(&dev->ctrl); 2142 nvme_queue_async_events(&dev->ctrl);
1964 2143
1965 mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + HZ));
1966
1967 /* 2144 /*
1968 * Keep the controller around but remove all namespaces if we don't have 2145 * Keep the controller around but remove all namespaces if we don't have
1969 * any working I/O queue. 2146 * any working I/O queue.
@@ -2003,17 +2180,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
2003 nvme_put_ctrl(&dev->ctrl); 2180 nvme_put_ctrl(&dev->ctrl);
2004} 2181}
2005 2182
2006static int nvme_reset(struct nvme_dev *dev)
2007{
2008 if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
2009 return -ENODEV;
2010 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
2011 return -EBUSY;
2012 if (!queue_work(nvme_workq, &dev->reset_work))
2013 return -EBUSY;
2014 return 0;
2015}
2016
2017static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) 2183static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
2018{ 2184{
2019 *val = readl(to_nvme_dev(ctrl)->bar + off); 2185 *val = readl(to_nvme_dev(ctrl)->bar + off);
@@ -2032,16 +2198,6 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
2032 return 0; 2198 return 0;
2033} 2199}
2034 2200
2035static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
2036{
2037 struct nvme_dev *dev = to_nvme_dev(ctrl);
2038 int ret = nvme_reset(dev);
2039
2040 if (!ret)
2041 flush_work(&dev->reset_work);
2042 return ret;
2043}
2044
2045static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { 2201static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2046 .name = "pcie", 2202 .name = "pcie",
2047 .module = THIS_MODULE, 2203 .module = THIS_MODULE,
@@ -2049,7 +2205,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
2049 .reg_read32 = nvme_pci_reg_read32, 2205 .reg_read32 = nvme_pci_reg_read32,
2050 .reg_write32 = nvme_pci_reg_write32, 2206 .reg_write32 = nvme_pci_reg_write32,
2051 .reg_read64 = nvme_pci_reg_read64, 2207 .reg_read64 = nvme_pci_reg_read64,
2052 .reset_ctrl = nvme_pci_reset_ctrl,
2053 .free_ctrl = nvme_pci_free_ctrl, 2208 .free_ctrl = nvme_pci_free_ctrl,
2054 .submit_async_event = nvme_pci_submit_async_event, 2209 .submit_async_event = nvme_pci_submit_async_event,
2055}; 2210};
@@ -2061,8 +2216,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
2061 if (pci_request_mem_regions(pdev, "nvme")) 2216 if (pci_request_mem_regions(pdev, "nvme"))
2062 return -ENODEV; 2217 return -ENODEV;
2063 2218
2064 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); 2219 if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
2065 if (!dev->bar)
2066 goto release; 2220 goto release;
2067 2221
2068 return 0; 2222 return 0;
@@ -2116,10 +2270,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2116 if (result) 2270 if (result)
2117 goto free; 2271 goto free;
2118 2272
2119 INIT_WORK(&dev->reset_work, nvme_reset_work); 2273 INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
2120 INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); 2274 INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
2121 setup_timer(&dev->watchdog_timer, nvme_watchdog_timer,
2122 (unsigned long)dev);
2123 mutex_init(&dev->shutdown_lock); 2275 mutex_init(&dev->shutdown_lock);
2124 init_completion(&dev->ioq_wait); 2276 init_completion(&dev->ioq_wait);
2125 2277
@@ -2137,7 +2289,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2137 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING); 2289 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
2138 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 2290 dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
2139 2291
2140 queue_work(nvme_workq, &dev->reset_work); 2292 queue_work(nvme_wq, &dev->ctrl.reset_work);
2141 return 0; 2293 return 0;
2142 2294
2143 release_pools: 2295 release_pools:
@@ -2158,7 +2310,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
2158 if (prepare) 2310 if (prepare)
2159 nvme_dev_disable(dev, false); 2311 nvme_dev_disable(dev, false);
2160 else 2312 else
2161 nvme_reset(dev); 2313 nvme_reset_ctrl(&dev->ctrl);
2162} 2314}
2163 2315
2164static void nvme_shutdown(struct pci_dev *pdev) 2316static void nvme_shutdown(struct pci_dev *pdev)
@@ -2178,7 +2330,7 @@ static void nvme_remove(struct pci_dev *pdev)
2178 2330
2179 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 2331 nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
2180 2332
2181 cancel_work_sync(&dev->reset_work); 2333 cancel_work_sync(&dev->ctrl.reset_work);
2182 pci_set_drvdata(pdev, NULL); 2334 pci_set_drvdata(pdev, NULL);
2183 2335
2184 if (!pci_device_is_present(pdev)) { 2336 if (!pci_device_is_present(pdev)) {
@@ -2186,9 +2338,10 @@ static void nvme_remove(struct pci_dev *pdev)
2186 nvme_dev_disable(dev, false); 2338 nvme_dev_disable(dev, false);
2187 } 2339 }
2188 2340
2189 flush_work(&dev->reset_work); 2341 flush_work(&dev->ctrl.reset_work);
2190 nvme_uninit_ctrl(&dev->ctrl); 2342 nvme_uninit_ctrl(&dev->ctrl);
2191 nvme_dev_disable(dev, true); 2343 nvme_dev_disable(dev, true);
2344 nvme_free_host_mem(dev);
2192 nvme_dev_remove_admin(dev); 2345 nvme_dev_remove_admin(dev);
2193 nvme_free_queues(dev, 0); 2346 nvme_free_queues(dev, 0);
2194 nvme_release_prp_pools(dev); 2347 nvme_release_prp_pools(dev);
@@ -2229,7 +2382,7 @@ static int nvme_resume(struct device *dev)
2229 struct pci_dev *pdev = to_pci_dev(dev); 2382 struct pci_dev *pdev = to_pci_dev(dev);
2230 struct nvme_dev *ndev = pci_get_drvdata(pdev); 2383 struct nvme_dev *ndev = pci_get_drvdata(pdev);
2231 2384
2232 nvme_reset(ndev); 2385 nvme_reset_ctrl(&ndev->ctrl);
2233 return 0; 2386 return 0;
2234} 2387}
2235#endif 2388#endif
@@ -2268,7 +2421,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
2268 2421
2269 dev_info(dev->ctrl.device, "restart after slot reset\n"); 2422 dev_info(dev->ctrl.device, "restart after slot reset\n");
2270 pci_restore_state(pdev); 2423 pci_restore_state(pdev);
2271 nvme_reset(dev); 2424 nvme_reset_ctrl(&dev->ctrl);
2272 return PCI_ERS_RESULT_RECOVERED; 2425 return PCI_ERS_RESULT_RECOVERED;
2273} 2426}
2274 2427
@@ -2324,22 +2477,12 @@ static struct pci_driver nvme_driver = {
2324 2477
2325static int __init nvme_init(void) 2478static int __init nvme_init(void)
2326{ 2479{
2327 int result; 2480 return pci_register_driver(&nvme_driver);
2328
2329 nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
2330 if (!nvme_workq)
2331 return -ENOMEM;
2332
2333 result = pci_register_driver(&nvme_driver);
2334 if (result)
2335 destroy_workqueue(nvme_workq);
2336 return result;
2337} 2481}
2338 2482
2339static void __exit nvme_exit(void) 2483static void __exit nvme_exit(void)
2340{ 2484{
2341 pci_unregister_driver(&nvme_driver); 2485 pci_unregister_driver(&nvme_driver);
2342 destroy_workqueue(nvme_workq);
2343 _nvme_check_size(); 2486 _nvme_check_size();
2344} 2487}
2345 2488
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 24397d306d53..6d4119dfbdaa 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -48,7 +48,7 @@
48 */ 48 */
49#define NVME_RDMA_NR_AEN_COMMANDS 1 49#define NVME_RDMA_NR_AEN_COMMANDS 1
50#define NVME_RDMA_AQ_BLKMQ_DEPTH \ 50#define NVME_RDMA_AQ_BLKMQ_DEPTH \
51 (NVMF_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS) 51 (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
52 52
53struct nvme_rdma_device { 53struct nvme_rdma_device {
54 struct ib_device *dev; 54 struct ib_device *dev;
@@ -80,10 +80,8 @@ struct nvme_rdma_request {
80}; 80};
81 81
82enum nvme_rdma_queue_flags { 82enum nvme_rdma_queue_flags {
83 NVME_RDMA_Q_CONNECTED = (1 << 0), 83 NVME_RDMA_Q_LIVE = 0,
84 NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), 84 NVME_RDMA_Q_DELETING = 1,
85 NVME_RDMA_Q_DELETING = (1 << 2),
86 NVME_RDMA_Q_LIVE = (1 << 3),
87}; 85};
88 86
89struct nvme_rdma_queue { 87struct nvme_rdma_queue {
@@ -103,9 +101,6 @@ struct nvme_rdma_queue {
103}; 101};
104 102
105struct nvme_rdma_ctrl { 103struct nvme_rdma_ctrl {
106 /* read and written in the hot path */
107 spinlock_t lock;
108
109 /* read only in the hot path */ 104 /* read only in the hot path */
110 struct nvme_rdma_queue *queues; 105 struct nvme_rdma_queue *queues;
111 u32 queue_count; 106 u32 queue_count;
@@ -113,7 +108,6 @@ struct nvme_rdma_ctrl {
113 /* other member variables */ 108 /* other member variables */
114 struct blk_mq_tag_set tag_set; 109 struct blk_mq_tag_set tag_set;
115 struct work_struct delete_work; 110 struct work_struct delete_work;
116 struct work_struct reset_work;
117 struct work_struct err_work; 111 struct work_struct err_work;
118 112
119 struct nvme_rdma_qe async_event_sqe; 113 struct nvme_rdma_qe async_event_sqe;
@@ -145,8 +139,6 @@ static DEFINE_MUTEX(device_list_mutex);
145static LIST_HEAD(nvme_rdma_ctrl_list); 139static LIST_HEAD(nvme_rdma_ctrl_list);
146static DEFINE_MUTEX(nvme_rdma_ctrl_mutex); 140static DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
147 141
148static struct workqueue_struct *nvme_rdma_wq;
149
150/* 142/*
151 * Disabling this option makes small I/O goes faster, but is fundamentally 143 * Disabling this option makes small I/O goes faster, but is fundamentally
152 * unsafe. With it turned off we will have to register a global rkey that 144 * unsafe. With it turned off we will have to register a global rkey that
@@ -301,10 +293,12 @@ out:
301 return ret; 293 return ret;
302} 294}
303 295
304static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl, 296static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
305 struct request *rq, unsigned int queue_idx) 297 struct request *rq, unsigned int hctx_idx)
306{ 298{
299 struct nvme_rdma_ctrl *ctrl = set->driver_data;
307 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 300 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
301 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
308 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 302 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
309 struct nvme_rdma_device *dev = queue->device; 303 struct nvme_rdma_device *dev = queue->device;
310 304
@@ -315,22 +309,13 @@ static void __nvme_rdma_exit_request(struct nvme_rdma_ctrl *ctrl,
315 DMA_TO_DEVICE); 309 DMA_TO_DEVICE);
316} 310}
317 311
318static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, 312static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
319 struct request *rq, unsigned int hctx_idx) 313 struct request *rq, unsigned int hctx_idx,
320{ 314 unsigned int numa_node)
321 return __nvme_rdma_exit_request(set->driver_data, rq, hctx_idx + 1);
322}
323
324static void nvme_rdma_exit_admin_request(struct blk_mq_tag_set *set,
325 struct request *rq, unsigned int hctx_idx)
326{
327 return __nvme_rdma_exit_request(set->driver_data, rq, 0);
328}
329
330static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl,
331 struct request *rq, unsigned int queue_idx)
332{ 315{
316 struct nvme_rdma_ctrl *ctrl = set->driver_data;
333 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 317 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
318 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
334 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 319 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
335 struct nvme_rdma_device *dev = queue->device; 320 struct nvme_rdma_device *dev = queue->device;
336 struct ib_device *ibdev = dev->dev; 321 struct ib_device *ibdev = dev->dev;
@@ -358,20 +343,6 @@ out_free_qe:
358 return -ENOMEM; 343 return -ENOMEM;
359} 344}
360 345
361static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
362 struct request *rq, unsigned int hctx_idx,
363 unsigned int numa_node)
364{
365 return __nvme_rdma_init_request(set->driver_data, rq, hctx_idx + 1);
366}
367
368static int nvme_rdma_init_admin_request(struct blk_mq_tag_set *set,
369 struct request *rq, unsigned int hctx_idx,
370 unsigned int numa_node)
371{
372 return __nvme_rdma_init_request(set->driver_data, rq, 0);
373}
374
375static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 346static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
376 unsigned int hctx_idx) 347 unsigned int hctx_idx)
377{ 348{
@@ -469,9 +440,6 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
469 struct nvme_rdma_device *dev; 440 struct nvme_rdma_device *dev;
470 struct ib_device *ibdev; 441 struct ib_device *ibdev;
471 442
472 if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
473 return;
474
475 dev = queue->device; 443 dev = queue->device;
476 ibdev = dev->dev; 444 ibdev = dev->dev;
477 rdma_destroy_qp(queue->cm_id); 445 rdma_destroy_qp(queue->cm_id);
@@ -483,17 +451,21 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
483 nvme_rdma_dev_put(dev); 451 nvme_rdma_dev_put(dev);
484} 452}
485 453
486static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, 454static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
487 struct nvme_rdma_device *dev)
488{ 455{
489 struct ib_device *ibdev = dev->dev; 456 struct ib_device *ibdev;
490 const int send_wr_factor = 3; /* MR, SEND, INV */ 457 const int send_wr_factor = 3; /* MR, SEND, INV */
491 const int cq_factor = send_wr_factor + 1; /* + RECV */ 458 const int cq_factor = send_wr_factor + 1; /* + RECV */
492 int comp_vector, idx = nvme_rdma_queue_idx(queue); 459 int comp_vector, idx = nvme_rdma_queue_idx(queue);
493
494 int ret; 460 int ret;
495 461
496 queue->device = dev; 462 queue->device = nvme_rdma_find_get_device(queue->cm_id);
463 if (!queue->device) {
464 dev_err(queue->cm_id->device->dev.parent,
465 "no client data found!\n");
466 return -ECONNREFUSED;
467 }
468 ibdev = queue->device->dev;
497 469
498 /* 470 /*
499 * The admin queue is barely used once the controller is live, so don't 471 * The admin queue is barely used once the controller is live, so don't
@@ -506,12 +478,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
506 478
507 479
508 /* +1 for ib_stop_cq */ 480 /* +1 for ib_stop_cq */
509 queue->ib_cq = ib_alloc_cq(dev->dev, queue, 481 queue->ib_cq = ib_alloc_cq(ibdev, queue,
510 cq_factor * queue->queue_size + 1, comp_vector, 482 cq_factor * queue->queue_size + 1,
511 IB_POLL_SOFTIRQ); 483 comp_vector, IB_POLL_SOFTIRQ);
512 if (IS_ERR(queue->ib_cq)) { 484 if (IS_ERR(queue->ib_cq)) {
513 ret = PTR_ERR(queue->ib_cq); 485 ret = PTR_ERR(queue->ib_cq);
514 goto out; 486 goto out_put_dev;
515 } 487 }
516 488
517 ret = nvme_rdma_create_qp(queue, send_wr_factor); 489 ret = nvme_rdma_create_qp(queue, send_wr_factor);
@@ -524,7 +496,6 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue,
524 ret = -ENOMEM; 496 ret = -ENOMEM;
525 goto out_destroy_qp; 497 goto out_destroy_qp;
526 } 498 }
527 set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
528 499
529 return 0; 500 return 0;
530 501
@@ -532,7 +503,8 @@ out_destroy_qp:
532 ib_destroy_qp(queue->qp); 503 ib_destroy_qp(queue->qp);
533out_destroy_ib_cq: 504out_destroy_ib_cq:
534 ib_free_cq(queue->ib_cq); 505 ib_free_cq(queue->ib_cq);
535out: 506out_put_dev:
507 nvme_rdma_dev_put(queue->device);
536 return ret; 508 return ret;
537} 509}
538 510
@@ -583,12 +555,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
583 } 555 }
584 556
585 clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); 557 clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
586 set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
587 558
588 return 0; 559 return 0;
589 560
590out_destroy_cm_id: 561out_destroy_cm_id:
591 nvme_rdma_destroy_queue_ib(queue);
592 rdma_destroy_id(queue->cm_id); 562 rdma_destroy_id(queue->cm_id);
593 return ret; 563 return ret;
594} 564}
@@ -718,11 +688,11 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
718 if (nvmf_should_reconnect(&ctrl->ctrl)) { 688 if (nvmf_should_reconnect(&ctrl->ctrl)) {
719 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", 689 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
720 ctrl->ctrl.opts->reconnect_delay); 690 ctrl->ctrl.opts->reconnect_delay);
721 queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work, 691 queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
722 ctrl->ctrl.opts->reconnect_delay * HZ); 692 ctrl->ctrl.opts->reconnect_delay * HZ);
723 } else { 693 } else {
724 dev_info(ctrl->ctrl.device, "Removing controller...\n"); 694 dev_info(ctrl->ctrl.device, "Removing controller...\n");
725 queue_work(nvme_rdma_wq, &ctrl->delete_work); 695 queue_work(nvme_wq, &ctrl->delete_work);
726 } 696 }
727} 697}
728 698
@@ -733,7 +703,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
733 bool changed; 703 bool changed;
734 int ret; 704 int ret;
735 705
736 ++ctrl->ctrl.opts->nr_reconnects; 706 ++ctrl->ctrl.nr_reconnects;
737 707
738 if (ctrl->queue_count > 1) { 708 if (ctrl->queue_count > 1) {
739 nvme_rdma_free_io_queues(ctrl); 709 nvme_rdma_free_io_queues(ctrl);
@@ -749,7 +719,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
749 if (ret) 719 if (ret)
750 goto requeue; 720 goto requeue;
751 721
752 ret = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH); 722 ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
753 if (ret) 723 if (ret)
754 goto requeue; 724 goto requeue;
755 725
@@ -777,7 +747,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
777 747
778 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 748 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
779 WARN_ON_ONCE(!changed); 749 WARN_ON_ONCE(!changed);
780 ctrl->ctrl.opts->nr_reconnects = 0; 750 ctrl->ctrl.nr_reconnects = 0;
781 751
782 if (ctrl->queue_count > 1) { 752 if (ctrl->queue_count > 1) {
783 nvme_queue_scan(&ctrl->ctrl); 753 nvme_queue_scan(&ctrl->ctrl);
@@ -790,7 +760,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
790 760
791requeue: 761requeue:
792 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", 762 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
793 ctrl->ctrl.opts->nr_reconnects); 763 ctrl->ctrl.nr_reconnects);
794 nvme_rdma_reconnect_or_remove(ctrl); 764 nvme_rdma_reconnect_or_remove(ctrl);
795} 765}
796 766
@@ -802,10 +772,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
802 772
803 nvme_stop_keep_alive(&ctrl->ctrl); 773 nvme_stop_keep_alive(&ctrl->ctrl);
804 774
805 for (i = 0; i < ctrl->queue_count; i++) { 775 for (i = 0; i < ctrl->queue_count; i++)
806 clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
807 clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); 776 clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
808 }
809 777
810 if (ctrl->queue_count > 1) 778 if (ctrl->queue_count > 1)
811 nvme_stop_queues(&ctrl->ctrl); 779 nvme_stop_queues(&ctrl->ctrl);
@@ -833,7 +801,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
833 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) 801 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
834 return; 802 return;
835 803
836 queue_work(nvme_rdma_wq, &ctrl->err_work); 804 queue_work(nvme_wq, &ctrl->err_work);
837} 805}
838 806
839static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, 807static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
@@ -1278,21 +1246,11 @@ static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
1278 1246
1279static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue) 1247static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
1280{ 1248{
1281 struct nvme_rdma_device *dev;
1282 int ret; 1249 int ret;
1283 1250
1284 dev = nvme_rdma_find_get_device(queue->cm_id); 1251 ret = nvme_rdma_create_queue_ib(queue);
1285 if (!dev) { 1252 if (ret)
1286 dev_err(queue->cm_id->device->dev.parent, 1253 return ret;
1287 "no client data found!\n");
1288 return -ECONNREFUSED;
1289 }
1290
1291 ret = nvme_rdma_create_queue_ib(queue, dev);
1292 if (ret) {
1293 nvme_rdma_dev_put(dev);
1294 goto out;
1295 }
1296 1254
1297 ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS); 1255 ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
1298 if (ret) { 1256 if (ret) {
@@ -1306,7 +1264,6 @@ static int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
1306 1264
1307out_destroy_queue: 1265out_destroy_queue:
1308 nvme_rdma_destroy_queue_ib(queue); 1266 nvme_rdma_destroy_queue_ib(queue);
1309out:
1310 return ret; 1267 return ret;
1311} 1268}
1312 1269
@@ -1334,8 +1291,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
1334 * specified by the Fabrics standard. 1291 * specified by the Fabrics standard.
1335 */ 1292 */
1336 if (priv.qid == 0) { 1293 if (priv.qid == 0) {
1337 priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH); 1294 priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
1338 priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1); 1295 priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
1339 } else { 1296 } else {
1340 /* 1297 /*
1341 * current interpretation of the fabrics spec 1298 * current interpretation of the fabrics spec
@@ -1383,12 +1340,14 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
1383 complete(&queue->cm_done); 1340 complete(&queue->cm_done);
1384 return 0; 1341 return 0;
1385 case RDMA_CM_EVENT_REJECTED: 1342 case RDMA_CM_EVENT_REJECTED:
1343 nvme_rdma_destroy_queue_ib(queue);
1386 cm_error = nvme_rdma_conn_rejected(queue, ev); 1344 cm_error = nvme_rdma_conn_rejected(queue, ev);
1387 break; 1345 break;
1388 case RDMA_CM_EVENT_ADDR_ERROR:
1389 case RDMA_CM_EVENT_ROUTE_ERROR: 1346 case RDMA_CM_EVENT_ROUTE_ERROR:
1390 case RDMA_CM_EVENT_CONNECT_ERROR: 1347 case RDMA_CM_EVENT_CONNECT_ERROR:
1391 case RDMA_CM_EVENT_UNREACHABLE: 1348 case RDMA_CM_EVENT_UNREACHABLE:
1349 nvme_rdma_destroy_queue_ib(queue);
1350 case RDMA_CM_EVENT_ADDR_ERROR:
1392 dev_dbg(queue->ctrl->ctrl.device, 1351 dev_dbg(queue->ctrl->ctrl.device,
1393 "CM error event %d\n", ev->event); 1352 "CM error event %d\n", ev->event);
1394 cm_error = -ECONNRESET; 1353 cm_error = -ECONNRESET;
@@ -1435,8 +1394,8 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
1435/* 1394/*
1436 * We cannot accept any other command until the Connect command has completed. 1395 * We cannot accept any other command until the Connect command has completed.
1437 */ 1396 */
1438static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, 1397static inline blk_status_t
1439 struct request *rq) 1398nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
1440{ 1399{
1441 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { 1400 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
1442 struct nvme_command *cmd = nvme_req(rq)->cmd; 1401 struct nvme_command *cmd = nvme_req(rq)->cmd;
@@ -1452,16 +1411,15 @@ static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
1452 * failover. 1411 * failover.
1453 */ 1412 */
1454 if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING) 1413 if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
1455 return -EIO; 1414 return BLK_STS_IOERR;
1456 else 1415 return BLK_STS_RESOURCE; /* try again later */
1457 return -EAGAIN;
1458 } 1416 }
1459 } 1417 }
1460 1418
1461 return 0; 1419 return 0;
1462} 1420}
1463 1421
1464static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, 1422static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1465 const struct blk_mq_queue_data *bd) 1423 const struct blk_mq_queue_data *bd)
1466{ 1424{
1467 struct nvme_ns *ns = hctx->queue->queuedata; 1425 struct nvme_ns *ns = hctx->queue->queuedata;
@@ -1472,28 +1430,29 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1472 struct nvme_command *c = sqe->data; 1430 struct nvme_command *c = sqe->data;
1473 bool flush = false; 1431 bool flush = false;
1474 struct ib_device *dev; 1432 struct ib_device *dev;
1475 int ret; 1433 blk_status_t ret;
1434 int err;
1476 1435
1477 WARN_ON_ONCE(rq->tag < 0); 1436 WARN_ON_ONCE(rq->tag < 0);
1478 1437
1479 ret = nvme_rdma_queue_is_ready(queue, rq); 1438 ret = nvme_rdma_queue_is_ready(queue, rq);
1480 if (unlikely(ret)) 1439 if (unlikely(ret))
1481 goto err; 1440 return ret;
1482 1441
1483 dev = queue->device->dev; 1442 dev = queue->device->dev;
1484 ib_dma_sync_single_for_cpu(dev, sqe->dma, 1443 ib_dma_sync_single_for_cpu(dev, sqe->dma,
1485 sizeof(struct nvme_command), DMA_TO_DEVICE); 1444 sizeof(struct nvme_command), DMA_TO_DEVICE);
1486 1445
1487 ret = nvme_setup_cmd(ns, rq, c); 1446 ret = nvme_setup_cmd(ns, rq, c);
1488 if (ret != BLK_MQ_RQ_QUEUE_OK) 1447 if (ret)
1489 return ret; 1448 return ret;
1490 1449
1491 blk_mq_start_request(rq); 1450 blk_mq_start_request(rq);
1492 1451
1493 ret = nvme_rdma_map_data(queue, rq, c); 1452 err = nvme_rdma_map_data(queue, rq, c);
1494 if (ret < 0) { 1453 if (err < 0) {
1495 dev_err(queue->ctrl->ctrl.device, 1454 dev_err(queue->ctrl->ctrl.device,
1496 "Failed to map data (%d)\n", ret); 1455 "Failed to map data (%d)\n", err);
1497 nvme_cleanup_cmd(rq); 1456 nvme_cleanup_cmd(rq);
1498 goto err; 1457 goto err;
1499 } 1458 }
@@ -1503,17 +1462,18 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1503 1462
1504 if (req_op(rq) == REQ_OP_FLUSH) 1463 if (req_op(rq) == REQ_OP_FLUSH)
1505 flush = true; 1464 flush = true;
1506 ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, 1465 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
1507 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); 1466 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
1508 if (ret) { 1467 if (err) {
1509 nvme_rdma_unmap_data(queue, rq); 1468 nvme_rdma_unmap_data(queue, rq);
1510 goto err; 1469 goto err;
1511 } 1470 }
1512 1471
1513 return BLK_MQ_RQ_QUEUE_OK; 1472 return BLK_STS_OK;
1514err: 1473err:
1515 return (ret == -ENOMEM || ret == -EAGAIN) ? 1474 if (err == -ENOMEM || err == -EAGAIN)
1516 BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; 1475 return BLK_STS_RESOURCE;
1476 return BLK_STS_IOERR;
1517} 1477}
1518 1478
1519static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 1479static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
@@ -1523,7 +1483,6 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
1523 struct ib_wc wc; 1483 struct ib_wc wc;
1524 int found = 0; 1484 int found = 0;
1525 1485
1526 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1527 while (ib_poll_cq(cq, 1, &wc) > 0) { 1486 while (ib_poll_cq(cq, 1, &wc) > 0) {
1528 struct ib_cqe *cqe = wc.wr_cqe; 1487 struct ib_cqe *cqe = wc.wr_cqe;
1529 1488
@@ -1560,8 +1519,8 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
1560static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { 1519static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
1561 .queue_rq = nvme_rdma_queue_rq, 1520 .queue_rq = nvme_rdma_queue_rq,
1562 .complete = nvme_rdma_complete_rq, 1521 .complete = nvme_rdma_complete_rq,
1563 .init_request = nvme_rdma_init_admin_request, 1522 .init_request = nvme_rdma_init_request,
1564 .exit_request = nvme_rdma_exit_admin_request, 1523 .exit_request = nvme_rdma_exit_request,
1565 .reinit_request = nvme_rdma_reinit_request, 1524 .reinit_request = nvme_rdma_reinit_request,
1566 .init_hctx = nvme_rdma_init_admin_hctx, 1525 .init_hctx = nvme_rdma_init_admin_hctx,
1567 .timeout = nvme_rdma_timeout, 1526 .timeout = nvme_rdma_timeout,
@@ -1571,7 +1530,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
1571{ 1530{
1572 int error; 1531 int error;
1573 1532
1574 error = nvme_rdma_init_queue(ctrl, 0, NVMF_AQ_DEPTH); 1533 error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH);
1575 if (error) 1534 if (error)
1576 return error; 1535 return error;
1577 1536
@@ -1672,7 +1631,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl)
1672 nvme_rdma_free_io_queues(ctrl); 1631 nvme_rdma_free_io_queues(ctrl);
1673 } 1632 }
1674 1633
1675 if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags)) 1634 if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags))
1676 nvme_shutdown_ctrl(&ctrl->ctrl); 1635 nvme_shutdown_ctrl(&ctrl->ctrl);
1677 1636
1678 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); 1637 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1709,7 +1668,7 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
1709 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 1668 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
1710 return -EBUSY; 1669 return -EBUSY;
1711 1670
1712 if (!queue_work(nvme_rdma_wq, &ctrl->delete_work)) 1671 if (!queue_work(nvme_wq, &ctrl->delete_work))
1713 return -EBUSY; 1672 return -EBUSY;
1714 1673
1715 return 0; 1674 return 0;
@@ -1743,8 +1702,8 @@ static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
1743 1702
1744static void nvme_rdma_reset_ctrl_work(struct work_struct *work) 1703static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1745{ 1704{
1746 struct nvme_rdma_ctrl *ctrl = container_of(work, 1705 struct nvme_rdma_ctrl *ctrl =
1747 struct nvme_rdma_ctrl, reset_work); 1706 container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
1748 int ret; 1707 int ret;
1749 bool changed; 1708 bool changed;
1750 1709
@@ -1785,22 +1744,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1785del_dead_ctrl: 1744del_dead_ctrl:
1786 /* Deleting this dead controller... */ 1745 /* Deleting this dead controller... */
1787 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); 1746 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
1788 WARN_ON(!queue_work(nvme_rdma_wq, &ctrl->delete_work)); 1747 WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work));
1789}
1790
1791static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
1792{
1793 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
1794
1795 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
1796 return -EBUSY;
1797
1798 if (!queue_work(nvme_rdma_wq, &ctrl->reset_work))
1799 return -EBUSY;
1800
1801 flush_work(&ctrl->reset_work);
1802
1803 return 0;
1804} 1748}
1805 1749
1806static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { 1750static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
@@ -1810,11 +1754,9 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1810 .reg_read32 = nvmf_reg_read32, 1754 .reg_read32 = nvmf_reg_read32,
1811 .reg_read64 = nvmf_reg_read64, 1755 .reg_read64 = nvmf_reg_read64,
1812 .reg_write32 = nvmf_reg_write32, 1756 .reg_write32 = nvmf_reg_write32,
1813 .reset_ctrl = nvme_rdma_reset_ctrl,
1814 .free_ctrl = nvme_rdma_free_ctrl, 1757 .free_ctrl = nvme_rdma_free_ctrl,
1815 .submit_async_event = nvme_rdma_submit_async_event, 1758 .submit_async_event = nvme_rdma_submit_async_event,
1816 .delete_ctrl = nvme_rdma_del_ctrl, 1759 .delete_ctrl = nvme_rdma_del_ctrl,
1817 .get_subsysnqn = nvmf_get_subsysnqn,
1818 .get_address = nvmf_get_address, 1760 .get_address = nvmf_get_address,
1819}; 1761};
1820 1762
@@ -1919,8 +1861,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1919 nvme_rdma_reconnect_ctrl_work); 1861 nvme_rdma_reconnect_ctrl_work);
1920 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); 1862 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
1921 INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work); 1863 INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
1922 INIT_WORK(&ctrl->reset_work, nvme_rdma_reset_ctrl_work); 1864 INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
1923 spin_lock_init(&ctrl->lock);
1924 1865
1925 ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */ 1866 ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
1926 ctrl->ctrl.sqsize = opts->queue_size - 1; 1867 ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -1939,12 +1880,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1939 /* sanity check icdoff */ 1880 /* sanity check icdoff */
1940 if (ctrl->ctrl.icdoff) { 1881 if (ctrl->ctrl.icdoff) {
1941 dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); 1882 dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
1883 ret = -EINVAL;
1942 goto out_remove_admin_queue; 1884 goto out_remove_admin_queue;
1943 } 1885 }
1944 1886
1945 /* sanity check keyed sgls */ 1887 /* sanity check keyed sgls */
1946 if (!(ctrl->ctrl.sgls & (1 << 20))) { 1888 if (!(ctrl->ctrl.sgls & (1 << 20))) {
1947 dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n"); 1889 dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not support\n");
1890 ret = -EINVAL;
1948 goto out_remove_admin_queue; 1891 goto out_remove_admin_queue;
1949 } 1892 }
1950 1893
@@ -2033,7 +1976,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
2033 } 1976 }
2034 mutex_unlock(&nvme_rdma_ctrl_mutex); 1977 mutex_unlock(&nvme_rdma_ctrl_mutex);
2035 1978
2036 flush_workqueue(nvme_rdma_wq); 1979 flush_workqueue(nvme_wq);
2037} 1980}
2038 1981
2039static struct ib_client nvme_rdma_ib_client = { 1982static struct ib_client nvme_rdma_ib_client = {
@@ -2046,13 +1989,9 @@ static int __init nvme_rdma_init_module(void)
2046{ 1989{
2047 int ret; 1990 int ret;
2048 1991
2049 nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
2050 if (!nvme_rdma_wq)
2051 return -ENOMEM;
2052
2053 ret = ib_register_client(&nvme_rdma_ib_client); 1992 ret = ib_register_client(&nvme_rdma_ib_client);
2054 if (ret) 1993 if (ret)
2055 goto err_destroy_wq; 1994 return ret;
2056 1995
2057 ret = nvmf_register_transport(&nvme_rdma_transport); 1996 ret = nvmf_register_transport(&nvme_rdma_transport);
2058 if (ret) 1997 if (ret)
@@ -2062,8 +2001,6 @@ static int __init nvme_rdma_init_module(void)
2062 2001
2063err_unreg_client: 2002err_unreg_client:
2064 ib_unregister_client(&nvme_rdma_ib_client); 2003 ib_unregister_client(&nvme_rdma_ib_client);
2065err_destroy_wq:
2066 destroy_workqueue(nvme_rdma_wq);
2067 return ret; 2004 return ret;
2068} 2005}
2069 2006
@@ -2071,7 +2008,6 @@ static void __exit nvme_rdma_cleanup_module(void)
2071{ 2008{
2072 nvmf_unregister_transport(&nvme_rdma_transport); 2009 nvmf_unregister_transport(&nvme_rdma_transport);
2073 ib_unregister_client(&nvme_rdma_ib_client); 2010 ib_unregister_client(&nvme_rdma_ib_client);
2074 destroy_workqueue(nvme_rdma_wq);
2075} 2011}
2076 2012
2077module_init(nvme_rdma_init_module); 2013module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
deleted file mode 100644
index 1f7671e631dd..000000000000
--- a/drivers/nvme/host/scsi.c
+++ /dev/null
@@ -1,2460 +0,0 @@
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15/*
16 * Refer to the SCSI-NVMe Translation spec for details on how
17 * each command is translated.
18 */
19
20#include <linux/bio.h>
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/compat.h>
24#include <linux/delay.h>
25#include <linux/errno.h>
26#include <linux/fs.h>
27#include <linux/genhd.h>
28#include <linux/idr.h>
29#include <linux/init.h>
30#include <linux/interrupt.h>
31#include <linux/io.h>
32#include <linux/kdev_t.h>
33#include <linux/kthread.h>
34#include <linux/kernel.h>
35#include <linux/mm.h>
36#include <linux/module.h>
37#include <linux/moduleparam.h>
38#include <linux/pci.h>
39#include <linux/poison.h>
40#include <linux/sched.h>
41#include <linux/slab.h>
42#include <linux/types.h>
43#include <asm/unaligned.h>
44#include <scsi/sg.h>
45#include <scsi/scsi.h>
46#include <scsi/scsi_request.h>
47
48#include "nvme.h"
49
50static int sg_version_num = 30534; /* 2 digits for each component */
51
52/* VPD Page Codes */
53#define VPD_SUPPORTED_PAGES 0x00
54#define VPD_SERIAL_NUMBER 0x80
55#define VPD_DEVICE_IDENTIFIERS 0x83
56#define VPD_EXTENDED_INQUIRY 0x86
57#define VPD_BLOCK_LIMITS 0xB0
58#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
59
60/* format unit paramter list offsets */
61#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
62#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
63#define FORMAT_UNIT_PROT_INT_OFFSET 3
64#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
65#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
66
67/* Misc. defines */
68#define FIXED_SENSE_DATA 0x70
69#define DESC_FORMAT_SENSE_DATA 0x72
70#define FIXED_SENSE_DATA_ADD_LENGTH 10
71#define LUN_ENTRY_SIZE 8
72#define LUN_DATA_HEADER_SIZE 8
73#define ALL_LUNS_RETURNED 0x02
74#define ALL_WELL_KNOWN_LUNS_RETURNED 0x01
75#define RESTRICTED_LUNS_RETURNED 0x00
76#define DOWNLOAD_SAVE_ACTIVATE 0x05
77#define DOWNLOAD_SAVE_DEFER_ACTIVATE 0x0E
78#define ACTIVATE_DEFERRED_MICROCODE 0x0F
79#define FORMAT_UNIT_IMMED_MASK 0x2
80#define FORMAT_UNIT_IMMED_OFFSET 1
81#define KELVIN_TEMP_FACTOR 273
82#define FIXED_FMT_SENSE_DATA_SIZE 18
83#define DESC_FMT_SENSE_DATA_SIZE 8
84
85/* SCSI/NVMe defines and bit masks */
86#define INQ_STANDARD_INQUIRY_PAGE 0x00
87#define INQ_SUPPORTED_VPD_PAGES_PAGE 0x00
88#define INQ_UNIT_SERIAL_NUMBER_PAGE 0x80
89#define INQ_DEVICE_IDENTIFICATION_PAGE 0x83
90#define INQ_EXTENDED_INQUIRY_DATA_PAGE 0x86
91#define INQ_BDEV_LIMITS_PAGE 0xB0
92#define INQ_BDEV_CHARACTERISTICS_PAGE 0xB1
93#define INQ_SERIAL_NUMBER_LENGTH 0x14
94#define INQ_NUM_SUPPORTED_VPD_PAGES 6
95#define VERSION_SPC_4 0x06
96#define ACA_UNSUPPORTED 0
97#define STANDARD_INQUIRY_LENGTH 36
98#define ADDITIONAL_STD_INQ_LENGTH 31
99#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
100#define RESERVED_FIELD 0
101
102/* Mode Sense/Select defines */
103#define MODE_PAGE_INFO_EXCEP 0x1C
104#define MODE_PAGE_CACHING 0x08
105#define MODE_PAGE_CONTROL 0x0A
106#define MODE_PAGE_POWER_CONDITION 0x1A
107#define MODE_PAGE_RETURN_ALL 0x3F
108#define MODE_PAGE_BLK_DES_LEN 0x08
109#define MODE_PAGE_LLBAA_BLK_DES_LEN 0x10
110#define MODE_PAGE_CACHING_LEN 0x14
111#define MODE_PAGE_CONTROL_LEN 0x0C
112#define MODE_PAGE_POW_CND_LEN 0x28
113#define MODE_PAGE_INF_EXC_LEN 0x0C
114#define MODE_PAGE_ALL_LEN 0x54
115#define MODE_SENSE6_MPH_SIZE 4
116#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
117#define MODE_SENSE_PAGE_CODE_OFFSET 2
118#define MODE_SENSE_PAGE_CODE_MASK 0x3F
119#define MODE_SENSE_LLBAA_MASK 0x10
120#define MODE_SENSE_LLBAA_SHIFT 4
121#define MODE_SENSE_DBD_MASK 8
122#define MODE_SENSE_DBD_SHIFT 3
123#define MODE_SENSE10_MPH_SIZE 8
124#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
125#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
126#define MODE_SELECT_6_BD_OFFSET 3
127#define MODE_SELECT_10_BD_OFFSET 6
128#define MODE_SELECT_10_LLBAA_OFFSET 4
129#define MODE_SELECT_10_LLBAA_MASK 1
130#define MODE_SELECT_6_MPH_SIZE 4
131#define MODE_SELECT_10_MPH_SIZE 8
132#define CACHING_MODE_PAGE_WCE_MASK 0x04
133#define MODE_SENSE_BLK_DESC_ENABLED 0
134#define MODE_SENSE_BLK_DESC_COUNT 1
135#define MODE_SELECT_PAGE_CODE_MASK 0x3F
136#define SHORT_DESC_BLOCK 8
137#define LONG_DESC_BLOCK 16
138#define MODE_PAGE_POW_CND_LEN_FIELD 0x26
139#define MODE_PAGE_INF_EXC_LEN_FIELD 0x0A
140#define MODE_PAGE_CACHING_LEN_FIELD 0x12
141#define MODE_PAGE_CONTROL_LEN_FIELD 0x0A
142#define MODE_SENSE_PC_CURRENT_VALUES 0
143
144/* Log Sense defines */
145#define LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE 0x00
146#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
147#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
148#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
149#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
150#define LOG_SENSE_CDB_PC_MASK 0xC0
151#define LOG_SENSE_CDB_PC_SHIFT 6
152#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
153#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
154#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
155#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
156#define REMAINING_TEMP_PAGE_LENGTH 0xC
157#define LOG_TEMP_PAGE_LENGTH 0x10
158#define LOG_TEMP_UNKNOWN 0xFF
159#define SUPPORTED_LOG_PAGES_PAGE_LENGTH 0x3
160
161/* Read Capacity defines */
162#define READ_CAP_10_RESP_SIZE 8
163#define READ_CAP_16_RESP_SIZE 32
164
165/* NVMe Namespace and Command Defines */
166#define BYTES_TO_DWORDS 4
167#define NVME_MAX_FIRMWARE_SLOT 7
168
169/* Report LUNs defines */
170#define REPORT_LUNS_FIRST_LUN_OFFSET 8
171
172/* SCSI ADDITIONAL SENSE Codes */
173
174#define SCSI_ASC_NO_SENSE 0x00
175#define SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT 0x03
176#define SCSI_ASC_LUN_NOT_READY 0x04
177#define SCSI_ASC_WARNING 0x0B
178#define SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED 0x10
179#define SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED 0x10
180#define SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED 0x10
181#define SCSI_ASC_UNRECOVERED_READ_ERROR 0x11
182#define SCSI_ASC_MISCOMPARE_DURING_VERIFY 0x1D
183#define SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID 0x20
184#define SCSI_ASC_ILLEGAL_COMMAND 0x20
185#define SCSI_ASC_ILLEGAL_BLOCK 0x21
186#define SCSI_ASC_INVALID_CDB 0x24
187#define SCSI_ASC_INVALID_LUN 0x25
188#define SCSI_ASC_INVALID_PARAMETER 0x26
189#define SCSI_ASC_FORMAT_COMMAND_FAILED 0x31
190#define SCSI_ASC_INTERNAL_TARGET_FAILURE 0x44
191
192/* SCSI ADDITIONAL SENSE Code Qualifiers */
193
194#define SCSI_ASCQ_CAUSE_NOT_REPORTABLE 0x00
195#define SCSI_ASCQ_FORMAT_COMMAND_FAILED 0x01
196#define SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED 0x01
197#define SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED 0x02
198#define SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED 0x03
199#define SCSI_ASCQ_FORMAT_IN_PROGRESS 0x04
200#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
201#define SCSI_ASCQ_INVALID_LUN_ID 0x09
202
203/* copied from drivers/usb/gadget/function/storage_common.h */
204static inline u32 get_unaligned_be24(u8 *buf)
205{
206 return 0xffffff & (u32) get_unaligned_be32(buf - 1);
207}
208
209/* Struct to gather data that needs to be extracted from a SCSI CDB.
210 Not conforming to any particular CDB variant, but compatible with all. */
211
212struct nvme_trans_io_cdb {
213 u8 fua;
214 u8 prot_info;
215 u64 lba;
216 u32 xfer_len;
217};
218
219
220/* Internal Helper Functions */
221
222
223/* Copy data to userspace memory */
224
225static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
226 unsigned long n)
227{
228 int i;
229 void *index = from;
230 size_t remaining = n;
231 size_t xfer_len;
232
233 if (hdr->iovec_count > 0) {
234 struct sg_iovec sgl;
235
236 for (i = 0; i < hdr->iovec_count; i++) {
237 if (copy_from_user(&sgl, hdr->dxferp +
238 i * sizeof(struct sg_iovec),
239 sizeof(struct sg_iovec)))
240 return -EFAULT;
241 xfer_len = min(remaining, sgl.iov_len);
242 if (copy_to_user(sgl.iov_base, index, xfer_len))
243 return -EFAULT;
244
245 index += xfer_len;
246 remaining -= xfer_len;
247 if (remaining == 0)
248 break;
249 }
250 return 0;
251 }
252
253 if (copy_to_user(hdr->dxferp, from, n))
254 return -EFAULT;
255 return 0;
256}
257
258/* Copy data from userspace memory */
259
260static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
261 unsigned long n)
262{
263 int i;
264 void *index = to;
265 size_t remaining = n;
266 size_t xfer_len;
267
268 if (hdr->iovec_count > 0) {
269 struct sg_iovec sgl;
270
271 for (i = 0; i < hdr->iovec_count; i++) {
272 if (copy_from_user(&sgl, hdr->dxferp +
273 i * sizeof(struct sg_iovec),
274 sizeof(struct sg_iovec)))
275 return -EFAULT;
276 xfer_len = min(remaining, sgl.iov_len);
277 if (copy_from_user(index, sgl.iov_base, xfer_len))
278 return -EFAULT;
279 index += xfer_len;
280 remaining -= xfer_len;
281 if (remaining == 0)
282 break;
283 }
284 return 0;
285 }
286
287 if (copy_from_user(to, hdr->dxferp, n))
288 return -EFAULT;
289 return 0;
290}
291
292/* Status/Sense Buffer Writeback */
293
294static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
295 u8 asc, u8 ascq)
296{
297 u8 xfer_len;
298 u8 resp[DESC_FMT_SENSE_DATA_SIZE];
299
300 if (scsi_status_is_good(status)) {
301 hdr->status = SAM_STAT_GOOD;
302 hdr->masked_status = GOOD;
303 hdr->host_status = DID_OK;
304 hdr->driver_status = DRIVER_OK;
305 hdr->sb_len_wr = 0;
306 } else {
307 hdr->status = status;
308 hdr->masked_status = status >> 1;
309 hdr->host_status = DID_OK;
310 hdr->driver_status = DRIVER_OK;
311
312 memset(resp, 0, DESC_FMT_SENSE_DATA_SIZE);
313 resp[0] = DESC_FORMAT_SENSE_DATA;
314 resp[1] = sense_key;
315 resp[2] = asc;
316 resp[3] = ascq;
317
318 xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
319 hdr->sb_len_wr = xfer_len;
320 if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
321 return -EFAULT;
322 }
323
324 return 0;
325}
326
327/*
328 * Take a status code from a lowlevel routine, and if it was a positive NVMe
329 * error code update the sense data based on it. In either case the passed
330 * in value is returned again, unless an -EFAULT from copy_to_user overrides
331 * it.
332 */
333static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
334{
335 u8 status, sense_key, asc, ascq;
336 int res;
337
338 /* For non-nvme (Linux) errors, simply return the error code */
339 if (nvme_sc < 0)
340 return nvme_sc;
341
342 /* Mask DNR, More, and reserved fields */
343 switch (nvme_sc & 0x7FF) {
344 /* Generic Command Status */
345 case NVME_SC_SUCCESS:
346 status = SAM_STAT_GOOD;
347 sense_key = NO_SENSE;
348 asc = SCSI_ASC_NO_SENSE;
349 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
350 break;
351 case NVME_SC_INVALID_OPCODE:
352 status = SAM_STAT_CHECK_CONDITION;
353 sense_key = ILLEGAL_REQUEST;
354 asc = SCSI_ASC_ILLEGAL_COMMAND;
355 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
356 break;
357 case NVME_SC_INVALID_FIELD:
358 status = SAM_STAT_CHECK_CONDITION;
359 sense_key = ILLEGAL_REQUEST;
360 asc = SCSI_ASC_INVALID_CDB;
361 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
362 break;
363 case NVME_SC_DATA_XFER_ERROR:
364 status = SAM_STAT_CHECK_CONDITION;
365 sense_key = MEDIUM_ERROR;
366 asc = SCSI_ASC_NO_SENSE;
367 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
368 break;
369 case NVME_SC_POWER_LOSS:
370 status = SAM_STAT_TASK_ABORTED;
371 sense_key = ABORTED_COMMAND;
372 asc = SCSI_ASC_WARNING;
373 ascq = SCSI_ASCQ_POWER_LOSS_EXPECTED;
374 break;
375 case NVME_SC_INTERNAL:
376 status = SAM_STAT_CHECK_CONDITION;
377 sense_key = HARDWARE_ERROR;
378 asc = SCSI_ASC_INTERNAL_TARGET_FAILURE;
379 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
380 break;
381 case NVME_SC_ABORT_REQ:
382 status = SAM_STAT_TASK_ABORTED;
383 sense_key = ABORTED_COMMAND;
384 asc = SCSI_ASC_NO_SENSE;
385 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
386 break;
387 case NVME_SC_ABORT_QUEUE:
388 status = SAM_STAT_TASK_ABORTED;
389 sense_key = ABORTED_COMMAND;
390 asc = SCSI_ASC_NO_SENSE;
391 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
392 break;
393 case NVME_SC_FUSED_FAIL:
394 status = SAM_STAT_TASK_ABORTED;
395 sense_key = ABORTED_COMMAND;
396 asc = SCSI_ASC_NO_SENSE;
397 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
398 break;
399 case NVME_SC_FUSED_MISSING:
400 status = SAM_STAT_TASK_ABORTED;
401 sense_key = ABORTED_COMMAND;
402 asc = SCSI_ASC_NO_SENSE;
403 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
404 break;
405 case NVME_SC_INVALID_NS:
406 status = SAM_STAT_CHECK_CONDITION;
407 sense_key = ILLEGAL_REQUEST;
408 asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
409 ascq = SCSI_ASCQ_INVALID_LUN_ID;
410 break;
411 case NVME_SC_LBA_RANGE:
412 status = SAM_STAT_CHECK_CONDITION;
413 sense_key = ILLEGAL_REQUEST;
414 asc = SCSI_ASC_ILLEGAL_BLOCK;
415 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
416 break;
417 case NVME_SC_CAP_EXCEEDED:
418 status = SAM_STAT_CHECK_CONDITION;
419 sense_key = MEDIUM_ERROR;
420 asc = SCSI_ASC_NO_SENSE;
421 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
422 break;
423 case NVME_SC_NS_NOT_READY:
424 status = SAM_STAT_CHECK_CONDITION;
425 sense_key = NOT_READY;
426 asc = SCSI_ASC_LUN_NOT_READY;
427 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
428 break;
429
430 /* Command Specific Status */
431 case NVME_SC_INVALID_FORMAT:
432 status = SAM_STAT_CHECK_CONDITION;
433 sense_key = ILLEGAL_REQUEST;
434 asc = SCSI_ASC_FORMAT_COMMAND_FAILED;
435 ascq = SCSI_ASCQ_FORMAT_COMMAND_FAILED;
436 break;
437 case NVME_SC_BAD_ATTRIBUTES:
438 status = SAM_STAT_CHECK_CONDITION;
439 sense_key = ILLEGAL_REQUEST;
440 asc = SCSI_ASC_INVALID_CDB;
441 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
442 break;
443
444 /* Media Errors */
445 case NVME_SC_WRITE_FAULT:
446 status = SAM_STAT_CHECK_CONDITION;
447 sense_key = MEDIUM_ERROR;
448 asc = SCSI_ASC_PERIPHERAL_DEV_WRITE_FAULT;
449 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
450 break;
451 case NVME_SC_READ_ERROR:
452 status = SAM_STAT_CHECK_CONDITION;
453 sense_key = MEDIUM_ERROR;
454 asc = SCSI_ASC_UNRECOVERED_READ_ERROR;
455 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
456 break;
457 case NVME_SC_GUARD_CHECK:
458 status = SAM_STAT_CHECK_CONDITION;
459 sense_key = MEDIUM_ERROR;
460 asc = SCSI_ASC_LOG_BLOCK_GUARD_CHECK_FAILED;
461 ascq = SCSI_ASCQ_LOG_BLOCK_GUARD_CHECK_FAILED;
462 break;
463 case NVME_SC_APPTAG_CHECK:
464 status = SAM_STAT_CHECK_CONDITION;
465 sense_key = MEDIUM_ERROR;
466 asc = SCSI_ASC_LOG_BLOCK_APPTAG_CHECK_FAILED;
467 ascq = SCSI_ASCQ_LOG_BLOCK_APPTAG_CHECK_FAILED;
468 break;
469 case NVME_SC_REFTAG_CHECK:
470 status = SAM_STAT_CHECK_CONDITION;
471 sense_key = MEDIUM_ERROR;
472 asc = SCSI_ASC_LOG_BLOCK_REFTAG_CHECK_FAILED;
473 ascq = SCSI_ASCQ_LOG_BLOCK_REFTAG_CHECK_FAILED;
474 break;
475 case NVME_SC_COMPARE_FAILED:
476 status = SAM_STAT_CHECK_CONDITION;
477 sense_key = MISCOMPARE;
478 asc = SCSI_ASC_MISCOMPARE_DURING_VERIFY;
479 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
480 break;
481 case NVME_SC_ACCESS_DENIED:
482 status = SAM_STAT_CHECK_CONDITION;
483 sense_key = ILLEGAL_REQUEST;
484 asc = SCSI_ASC_ACCESS_DENIED_INVALID_LUN_ID;
485 ascq = SCSI_ASCQ_INVALID_LUN_ID;
486 break;
487
488 /* Unspecified/Default */
489 case NVME_SC_CMDID_CONFLICT:
490 case NVME_SC_CMD_SEQ_ERROR:
491 case NVME_SC_CQ_INVALID:
492 case NVME_SC_QID_INVALID:
493 case NVME_SC_QUEUE_SIZE:
494 case NVME_SC_ABORT_LIMIT:
495 case NVME_SC_ABORT_MISSING:
496 case NVME_SC_ASYNC_LIMIT:
497 case NVME_SC_FIRMWARE_SLOT:
498 case NVME_SC_FIRMWARE_IMAGE:
499 case NVME_SC_INVALID_VECTOR:
500 case NVME_SC_INVALID_LOG_PAGE:
501 default:
502 status = SAM_STAT_CHECK_CONDITION;
503 sense_key = ILLEGAL_REQUEST;
504 asc = SCSI_ASC_NO_SENSE;
505 ascq = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
506 break;
507 }
508
509 res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
510 return res ? res : nvme_sc;
511}
512
513/* INQUIRY Helper Functions */
514
515static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
516 struct sg_io_hdr *hdr, u8 *inq_response,
517 int alloc_len)
518{
519 struct nvme_ctrl *ctrl = ns->ctrl;
520 struct nvme_id_ns *id_ns;
521 int res;
522 int nvme_sc;
523 int xfer_len;
524 u8 resp_data_format = 0x02;
525 u8 protect;
526 u8 cmdque = 0x01 << 1;
527 u8 fw_offset = sizeof(ctrl->firmware_rev);
528
529 /* nvme ns identify - use DPS value for PROTECT field */
530 nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
531 res = nvme_trans_status_code(hdr, nvme_sc);
532 if (res)
533 return res;
534
535 if (id_ns->dps)
536 protect = 0x01;
537 else
538 protect = 0;
539 kfree(id_ns);
540
541 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
542 inq_response[2] = VERSION_SPC_4;
543 inq_response[3] = resp_data_format; /*normaca=0 | hisup=0 */
544 inq_response[4] = ADDITIONAL_STD_INQ_LENGTH;
545 inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
546 inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
547 strncpy(&inq_response[8], "NVMe ", 8);
548 strncpy(&inq_response[16], ctrl->model, 16);
549
550 while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
551 fw_offset--;
552 fw_offset -= 4;
553 strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
554
555 xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
556 return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
557}
558
559static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
560 struct sg_io_hdr *hdr, u8 *inq_response,
561 int alloc_len)
562{
563 int xfer_len;
564
565 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
566 inq_response[1] = INQ_SUPPORTED_VPD_PAGES_PAGE; /* Page Code */
567 inq_response[3] = INQ_NUM_SUPPORTED_VPD_PAGES; /* Page Length */
568 inq_response[4] = INQ_SUPPORTED_VPD_PAGES_PAGE;
569 inq_response[5] = INQ_UNIT_SERIAL_NUMBER_PAGE;
570 inq_response[6] = INQ_DEVICE_IDENTIFICATION_PAGE;
571 inq_response[7] = INQ_EXTENDED_INQUIRY_DATA_PAGE;
572 inq_response[8] = INQ_BDEV_CHARACTERISTICS_PAGE;
573 inq_response[9] = INQ_BDEV_LIMITS_PAGE;
574
575 xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
576 return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
577}
578
579static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
580 struct sg_io_hdr *hdr, u8 *inq_response,
581 int alloc_len)
582{
583 int xfer_len;
584
585 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
586 inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
587 inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
588 strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
589
590 xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
591 return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
592}
593
594static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
595 u8 *inq_response, int alloc_len)
596{
597 struct nvme_id_ns *id_ns;
598 int nvme_sc, res;
599 size_t len;
600 void *eui;
601
602 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
603 res = nvme_trans_status_code(hdr, nvme_sc);
604 if (res)
605 return res;
606
607 eui = id_ns->eui64;
608 len = sizeof(id_ns->eui64);
609
610 if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
611 if (bitmap_empty(eui, len * 8)) {
612 eui = id_ns->nguid;
613 len = sizeof(id_ns->nguid);
614 }
615 }
616
617 if (bitmap_empty(eui, len * 8)) {
618 res = -EOPNOTSUPP;
619 goto out_free_id;
620 }
621
622 memset(inq_response, 0, alloc_len);
623 inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
624 inq_response[3] = 4 + len; /* Page Length */
625
626 /* Designation Descriptor start */
627 inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
628 inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
629 inq_response[6] = 0x00; /* Rsvd */
630 inq_response[7] = len; /* Designator Length */
631 memcpy(&inq_response[8], eui, len);
632
633 res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
634out_free_id:
635 kfree(id_ns);
636 return res;
637}
638
639static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
640 struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
641{
642 struct nvme_ctrl *ctrl = ns->ctrl;
643 struct nvme_id_ctrl *id_ctrl;
644 int nvme_sc, res;
645
646 if (alloc_len < 72) {
647 return nvme_trans_completion(hdr,
648 SAM_STAT_CHECK_CONDITION,
649 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
650 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
651 }
652
653 nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
654 res = nvme_trans_status_code(hdr, nvme_sc);
655 if (res)
656 return res;
657
658 memset(inq_response, 0, alloc_len);
659 inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
660 inq_response[3] = 0x48; /* Page Length */
661
662 /* Designation Descriptor start */
663 inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
664 inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
665 inq_response[6] = 0x00; /* Rsvd */
666 inq_response[7] = 0x44; /* Designator Length */
667
668 sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
669 memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
670 sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
671 memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
672
673 res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
674 kfree(id_ctrl);
675 return res;
676}
677
678static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
679 u8 *resp, int alloc_len)
680{
681 int res;
682
683 if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
684 res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
685 if (res != -EOPNOTSUPP)
686 return res;
687 }
688
689 return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
690}
691
692static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
693 int alloc_len)
694{
695 u8 *inq_response;
696 int res;
697 int nvme_sc;
698 struct nvme_ctrl *ctrl = ns->ctrl;
699 struct nvme_id_ctrl *id_ctrl;
700 struct nvme_id_ns *id_ns;
701 int xfer_len;
702 u8 microcode = 0x80;
703 u8 spt;
704 u8 spt_lut[8] = {0, 0, 2, 1, 4, 6, 5, 7};
705 u8 grd_chk, app_chk, ref_chk, protect;
706 u8 uask_sup = 0x20;
707 u8 v_sup;
708 u8 luiclr = 0x01;
709
710 inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
711 if (inq_response == NULL)
712 return -ENOMEM;
713
714 nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
715 res = nvme_trans_status_code(hdr, nvme_sc);
716 if (res)
717 goto out_free_inq;
718
719 spt = spt_lut[id_ns->dpc & 0x07] << 3;
720 if (id_ns->dps)
721 protect = 0x01;
722 else
723 protect = 0;
724 kfree(id_ns);
725
726 grd_chk = protect << 2;
727 app_chk = protect << 1;
728 ref_chk = protect;
729
730 nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
731 res = nvme_trans_status_code(hdr, nvme_sc);
732 if (res)
733 goto out_free_inq;
734
735 v_sup = id_ctrl->vwc;
736 kfree(id_ctrl);
737
738 memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
739 inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
740 inq_response[2] = 0x00; /* Page Length MSB */
741 inq_response[3] = 0x3C; /* Page Length LSB */
742 inq_response[4] = microcode | spt | grd_chk | app_chk | ref_chk;
743 inq_response[5] = uask_sup;
744 inq_response[6] = v_sup;
745 inq_response[7] = luiclr;
746 inq_response[8] = 0;
747 inq_response[9] = 0;
748
749 xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
750 res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
751
752 out_free_inq:
753 kfree(inq_response);
754 return res;
755}
756
757static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
758 u8 *inq_response, int alloc_len)
759{
760 __be32 max_sectors = cpu_to_be32(
761 nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
762 __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
763 __be32 discard_desc_count = cpu_to_be32(0x100);
764
765 memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
766 inq_response[1] = VPD_BLOCK_LIMITS;
767 inq_response[3] = 0x3c; /* Page Length */
768 memcpy(&inq_response[8], &max_sectors, sizeof(u32));
769 memcpy(&inq_response[20], &max_discard, sizeof(u32));
770
771 if (max_discard)
772 memcpy(&inq_response[24], &discard_desc_count, sizeof(u32));
773
774 return nvme_trans_copy_to_user(hdr, inq_response, 0x3c);
775}
776
777static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
778 int alloc_len)
779{
780 u8 *inq_response;
781 int res;
782 int xfer_len;
783
784 inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
785 if (inq_response == NULL) {
786 res = -ENOMEM;
787 goto out_mem;
788 }
789
790 inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
791 inq_response[2] = 0x00; /* Page Length MSB */
792 inq_response[3] = 0x3C; /* Page Length LSB */
793 inq_response[4] = 0x00; /* Medium Rotation Rate MSB */
794 inq_response[5] = 0x01; /* Medium Rotation Rate LSB */
795 inq_response[6] = 0x00; /* Form Factor */
796
797 xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
798 res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
799
800 kfree(inq_response);
801 out_mem:
802 return res;
803}
804
805/* LOG SENSE Helper Functions */
806
807static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
808 int alloc_len)
809{
810 int res;
811 int xfer_len;
812 u8 *log_response;
813
814 log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
815 if (log_response == NULL) {
816 res = -ENOMEM;
817 goto out_mem;
818 }
819
820 log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
821 /* Subpage=0x00, Page Length MSB=0 */
822 log_response[3] = SUPPORTED_LOG_PAGES_PAGE_LENGTH;
823 log_response[4] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
824 log_response[5] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
825 log_response[6] = LOG_PAGE_TEMPERATURE_PAGE;
826
827 xfer_len = min(alloc_len, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
828 res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
829
830 kfree(log_response);
831 out_mem:
832 return res;
833}
834
835static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
836 struct sg_io_hdr *hdr, int alloc_len)
837{
838 int res;
839 int xfer_len;
840 u8 *log_response;
841 struct nvme_smart_log *smart_log;
842 u8 temp_c;
843 u16 temp_k;
844
845 log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
846 if (log_response == NULL)
847 return -ENOMEM;
848
849 res = nvme_get_log_page(ns->ctrl, &smart_log);
850 if (res < 0)
851 goto out_free_response;
852
853 if (res != NVME_SC_SUCCESS) {
854 temp_c = LOG_TEMP_UNKNOWN;
855 } else {
856 temp_k = (smart_log->temperature[1] << 8) +
857 (smart_log->temperature[0]);
858 temp_c = temp_k - KELVIN_TEMP_FACTOR;
859 }
860 kfree(smart_log);
861
862 log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
863 /* Subpage=0x00, Page Length MSB=0 */
864 log_response[3] = REMAINING_INFO_EXCP_PAGE_LENGTH;
865 /* Informational Exceptions Log Parameter 1 Start */
866 /* Parameter Code=0x0000 bytes 4,5 */
867 log_response[6] = 0x23; /* DU=0, TSD=1, ETC=0, TMC=0, FMT_AND_LNK=11b */
868 log_response[7] = 0x04; /* PARAMETER LENGTH */
869 /* Add sense Code and qualifier = 0x00 each */
870 /* Use Temperature from NVMe Get Log Page, convert to C from K */
871 log_response[10] = temp_c;
872
873 xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
874 res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
875
876 out_free_response:
877 kfree(log_response);
878 return res;
879}
880
881static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
882 int alloc_len)
883{
884 int res;
885 int xfer_len;
886 u8 *log_response;
887 struct nvme_smart_log *smart_log;
888 u32 feature_resp;
889 u8 temp_c_cur, temp_c_thresh;
890 u16 temp_k;
891
892 log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
893 if (log_response == NULL)
894 return -ENOMEM;
895
896 res = nvme_get_log_page(ns->ctrl, &smart_log);
897 if (res < 0)
898 goto out_free_response;
899
900 if (res != NVME_SC_SUCCESS) {
901 temp_c_cur = LOG_TEMP_UNKNOWN;
902 } else {
903 temp_k = (smart_log->temperature[1] << 8) +
904 (smart_log->temperature[0]);
905 temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
906 }
907 kfree(smart_log);
908
909 /* Get Features for Temp Threshold */
910 res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, NULL, 0,
911 &feature_resp);
912 if (res != NVME_SC_SUCCESS)
913 temp_c_thresh = LOG_TEMP_UNKNOWN;
914 else
915 temp_c_thresh = (feature_resp & 0xFFFF) - KELVIN_TEMP_FACTOR;
916
917 log_response[0] = LOG_PAGE_TEMPERATURE_PAGE;
918 /* Subpage=0x00, Page Length MSB=0 */
919 log_response[3] = REMAINING_TEMP_PAGE_LENGTH;
920 /* Temperature Log Parameter 1 (Temperature) Start */
921 /* Parameter Code = 0x0000 */
922 log_response[6] = 0x01; /* Format and Linking = 01b */
923 log_response[7] = 0x02; /* Parameter Length */
924 /* Use Temperature from NVMe Get Log Page, convert to C from K */
925 log_response[9] = temp_c_cur;
926 /* Temperature Log Parameter 2 (Reference Temperature) Start */
927 log_response[11] = 0x01; /* Parameter Code = 0x0001 */
928 log_response[12] = 0x01; /* Format and Linking = 01b */
929 log_response[13] = 0x02; /* Parameter Length */
930 /* Use Temperature Thresh from NVMe Get Log Page, convert to C from K */
931 log_response[15] = temp_c_thresh;
932
933 xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
934 res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
935
936 out_free_response:
937 kfree(log_response);
938 return res;
939}
940
941/* MODE SENSE Helper Functions */
942
943static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
944 u16 mode_data_length, u16 blk_desc_len)
945{
946 /* Quick check to make sure I don't stomp on my own memory... */
947 if ((cdb10 && len < 8) || (!cdb10 && len < 4))
948 return -EINVAL;
949
950 if (cdb10) {
951 resp[0] = (mode_data_length & 0xFF00) >> 8;
952 resp[1] = (mode_data_length & 0x00FF);
953 resp[3] = 0x10 /* DPOFUA */;
954 resp[4] = llbaa;
955 resp[5] = RESERVED_FIELD;
956 resp[6] = (blk_desc_len & 0xFF00) >> 8;
957 resp[7] = (blk_desc_len & 0x00FF);
958 } else {
959 resp[0] = (mode_data_length & 0x00FF);
960 resp[2] = 0x10 /* DPOFUA */;
961 resp[3] = (blk_desc_len & 0x00FF);
962 }
963
964 return 0;
965}
966
967static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
968 u8 *resp, int len, u8 llbaa)
969{
970 int res;
971 int nvme_sc;
972 struct nvme_id_ns *id_ns;
973 u8 flbas;
974 u32 lba_length;
975
976 if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
977 return -EINVAL;
978 else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
979 return -EINVAL;
980
981 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
982 res = nvme_trans_status_code(hdr, nvme_sc);
983 if (res)
984 return res;
985
986 flbas = (id_ns->flbas) & 0x0F;
987 lba_length = (1 << (id_ns->lbaf[flbas].ds));
988
989 if (llbaa == 0) {
990 __be32 tmp_cap = cpu_to_be32(le64_to_cpu(id_ns->ncap));
991 /* Byte 4 is reserved */
992 __be32 tmp_len = cpu_to_be32(lba_length & 0x00FFFFFF);
993
994 memcpy(resp, &tmp_cap, sizeof(u32));
995 memcpy(&resp[4], &tmp_len, sizeof(u32));
996 } else {
997 __be64 tmp_cap = cpu_to_be64(le64_to_cpu(id_ns->ncap));
998 __be32 tmp_len = cpu_to_be32(lba_length);
999
1000 memcpy(resp, &tmp_cap, sizeof(u64));
1001 /* Bytes 8, 9, 10, 11 are reserved */
1002 memcpy(&resp[12], &tmp_len, sizeof(u32));
1003 }
1004
1005 kfree(id_ns);
1006 return res;
1007}
1008
1009static int nvme_trans_fill_control_page(struct nvme_ns *ns,
1010 struct sg_io_hdr *hdr, u8 *resp,
1011 int len)
1012{
1013 if (len < MODE_PAGE_CONTROL_LEN)
1014 return -EINVAL;
1015
1016 resp[0] = MODE_PAGE_CONTROL;
1017 resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
1018 resp[2] = 0x0E; /* TST=000b, TMF_ONLY=0, DPICZ=1,
1019 * D_SENSE=1, GLTSD=1, RLEC=0 */
1020 resp[3] = 0x12; /* Q_ALGO_MODIFIER=1h, NUAR=0, QERR=01b */
1021 /* Byte 4: VS=0, RAC=0, UA_INT=0, SWP=0 */
1022 resp[5] = 0x40; /* ATO=0, TAS=1, ATMPE=0, RWWP=0, AUTOLOAD=0 */
1023 /* resp[6] and [7] are obsolete, thus zero */
1024 resp[8] = 0xFF; /* Busy timeout period = 0xffff */
1025 resp[9] = 0xFF;
1026 /* Bytes 10,11: Extended selftest completion time = 0x0000 */
1027
1028 return 0;
1029}
1030
1031static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
1032 struct sg_io_hdr *hdr,
1033 u8 *resp, int len)
1034{
1035 int res = 0;
1036 int nvme_sc;
1037 u32 feature_resp;
1038 u8 vwc;
1039
1040 if (len < MODE_PAGE_CACHING_LEN)
1041 return -EINVAL;
1042
1043 nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, NULL, 0,
1044 &feature_resp);
1045 res = nvme_trans_status_code(hdr, nvme_sc);
1046 if (res)
1047 return res;
1048
1049 vwc = feature_resp & 0x00000001;
1050
1051 resp[0] = MODE_PAGE_CACHING;
1052 resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
1053 resp[2] = vwc << 2;
1054 return 0;
1055}
1056
1057static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
1058 struct sg_io_hdr *hdr, u8 *resp,
1059 int len)
1060{
1061 if (len < MODE_PAGE_POW_CND_LEN)
1062 return -EINVAL;
1063
1064 resp[0] = MODE_PAGE_POWER_CONDITION;
1065 resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
1066 /* All other bytes are zero */
1067
1068 return 0;
1069}
1070
1071static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
1072 struct sg_io_hdr *hdr, u8 *resp,
1073 int len)
1074{
1075 if (len < MODE_PAGE_INF_EXC_LEN)
1076 return -EINVAL;
1077
1078 resp[0] = MODE_PAGE_INFO_EXCEP;
1079 resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
1080 resp[2] = 0x88;
1081 /* All other bytes are zero */
1082
1083 return 0;
1084}
1085
1086static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1087 u8 *resp, int len)
1088{
1089 int res;
1090 u16 mode_pages_offset_1 = 0;
1091 u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
1092
1093 mode_pages_offset_2 = mode_pages_offset_1 + MODE_PAGE_CACHING_LEN;
1094 mode_pages_offset_3 = mode_pages_offset_2 + MODE_PAGE_CONTROL_LEN;
1095 mode_pages_offset_4 = mode_pages_offset_3 + MODE_PAGE_POW_CND_LEN;
1096
1097 res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
1098 MODE_PAGE_CACHING_LEN);
1099 if (res)
1100 return res;
1101 res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
1102 MODE_PAGE_CONTROL_LEN);
1103 if (res)
1104 return res;
1105 res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
1106 MODE_PAGE_POW_CND_LEN);
1107 if (res)
1108 return res;
1109 return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
1110 MODE_PAGE_INF_EXC_LEN);
1111}
1112
1113static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
1114{
1115 if (dbd == MODE_SENSE_BLK_DESC_ENABLED) {
1116 /* SPC-4: len = 8 x Num_of_descriptors if llbaa = 0, 16x if 1 */
1117 return 8 * (llbaa + 1) * MODE_SENSE_BLK_DESC_COUNT;
1118 } else {
1119 return 0;
1120 }
1121}
1122
1123static int nvme_trans_mode_page_create(struct nvme_ns *ns,
1124 struct sg_io_hdr *hdr, u8 *cmd,
1125 u16 alloc_len, u8 cdb10,
1126 int (*mode_page_fill_func)
1127 (struct nvme_ns *,
1128 struct sg_io_hdr *hdr, u8 *, int),
1129 u16 mode_pages_tot_len)
1130{
1131 int res;
1132 int xfer_len;
1133 u8 *response;
1134 u8 dbd, llbaa;
1135 u16 resp_size;
1136 int mph_size;
1137 u16 mode_pages_offset_1;
1138 u16 blk_desc_len, blk_desc_offset, mode_data_length;
1139
1140 dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
1141 llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
1142 mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
1143
1144 blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
1145
1146 resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
1147 /* Refer spc4r34 Table 440 for calculation of Mode data Length field */
1148 mode_data_length = 3 + (3 * cdb10) + blk_desc_len + mode_pages_tot_len;
1149
1150 blk_desc_offset = mph_size;
1151 mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
1152
1153 response = kzalloc(resp_size, GFP_KERNEL);
1154 if (response == NULL) {
1155 res = -ENOMEM;
1156 goto out_mem;
1157 }
1158
1159 res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
1160 llbaa, mode_data_length, blk_desc_len);
1161 if (res)
1162 goto out_free;
1163 if (blk_desc_len > 0) {
1164 res = nvme_trans_fill_blk_desc(ns, hdr,
1165 &response[blk_desc_offset],
1166 blk_desc_len, llbaa);
1167 if (res)
1168 goto out_free;
1169 }
1170 res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
1171 mode_pages_tot_len);
1172 if (res)
1173 goto out_free;
1174
1175 xfer_len = min(alloc_len, resp_size);
1176 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
1177
1178 out_free:
1179 kfree(response);
1180 out_mem:
1181 return res;
1182}
1183
1184/* Read Capacity Helper Functions */
1185
1186static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
1187 u8 cdb16)
1188{
1189 u8 flbas;
1190 u32 lba_length;
1191 u64 rlba;
1192 u8 prot_en;
1193 u8 p_type_lut[4] = {0, 0, 1, 2};
1194 __be64 tmp_rlba;
1195 __be32 tmp_rlba_32;
1196 __be32 tmp_len;
1197
1198 flbas = (id_ns->flbas) & 0x0F;
1199 lba_length = (1 << (id_ns->lbaf[flbas].ds));
1200 rlba = le64_to_cpup(&id_ns->nsze) - 1;
1201 (id_ns->dps) ? (prot_en = 0x01) : (prot_en = 0);
1202
1203 if (!cdb16) {
1204 if (rlba > 0xFFFFFFFF)
1205 rlba = 0xFFFFFFFF;
1206 tmp_rlba_32 = cpu_to_be32(rlba);
1207 tmp_len = cpu_to_be32(lba_length);
1208 memcpy(response, &tmp_rlba_32, sizeof(u32));
1209 memcpy(&response[4], &tmp_len, sizeof(u32));
1210 } else {
1211 tmp_rlba = cpu_to_be64(rlba);
1212 tmp_len = cpu_to_be32(lba_length);
1213 memcpy(response, &tmp_rlba, sizeof(u64));
1214 memcpy(&response[8], &tmp_len, sizeof(u32));
1215 response[12] = (p_type_lut[id_ns->dps & 0x3] << 1) | prot_en;
1216 /* P_I_Exponent = 0x0 | LBPPBE = 0x0 */
1217 /* LBPME = 0 | LBPRZ = 0 | LALBA = 0x00 */
1218 /* Bytes 16-31 - Reserved */
1219 }
1220}
1221
1222/* Start Stop Unit Helper Functions */
1223
1224static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1225 u8 buffer_id)
1226{
1227 struct nvme_command c;
1228 int nvme_sc;
1229
1230 memset(&c, 0, sizeof(c));
1231 c.common.opcode = nvme_admin_activate_fw;
1232 c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
1233
1234 nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
1235 return nvme_trans_status_code(hdr, nvme_sc);
1236}
1237
1238static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1239 u8 opcode, u32 tot_len, u32 offset,
1240 u8 buffer_id)
1241{
1242 int nvme_sc;
1243 struct nvme_command c;
1244
1245 if (hdr->iovec_count > 0) {
1246 /* Assuming SGL is not allowed for this command */
1247 return nvme_trans_completion(hdr,
1248 SAM_STAT_CHECK_CONDITION,
1249 ILLEGAL_REQUEST,
1250 SCSI_ASC_INVALID_CDB,
1251 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1252 }
1253
1254 memset(&c, 0, sizeof(c));
1255 c.common.opcode = nvme_admin_download_fw;
1256 c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
1257 c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
1258
1259 nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
1260 hdr->dxferp, tot_len, NULL, 0);
1261 return nvme_trans_status_code(hdr, nvme_sc);
1262}
1263
1264/* Mode Select Helper Functions */
1265
1266static inline void nvme_trans_modesel_get_bd_len(u8 *parm_list, u8 cdb10,
1267 u16 *bd_len, u8 *llbaa)
1268{
1269 if (cdb10) {
1270 /* 10 Byte CDB */
1271 *bd_len = (parm_list[MODE_SELECT_10_BD_OFFSET] << 8) +
1272 parm_list[MODE_SELECT_10_BD_OFFSET + 1];
1273 *llbaa = parm_list[MODE_SELECT_10_LLBAA_OFFSET] &
1274 MODE_SELECT_10_LLBAA_MASK;
1275 } else {
1276 /* 6 Byte CDB */
1277 *bd_len = parm_list[MODE_SELECT_6_BD_OFFSET];
1278 }
1279}
1280
1281static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
1282 u16 idx, u16 bd_len, u8 llbaa)
1283{
1284 /* Store block descriptor info if a FORMAT UNIT comes later */
1285 /* TODO Saving 1st BD info; what to do if multiple BD received? */
1286 if (llbaa == 0) {
1287 /* Standard Block Descriptor - spc4r34 7.5.5.1 */
1288 ns->mode_select_num_blocks =
1289 (parm_list[idx + 1] << 16) +
1290 (parm_list[idx + 2] << 8) +
1291 (parm_list[idx + 3]);
1292
1293 ns->mode_select_block_len =
1294 (parm_list[idx + 5] << 16) +
1295 (parm_list[idx + 6] << 8) +
1296 (parm_list[idx + 7]);
1297 } else {
1298 /* Long LBA Block Descriptor - sbc3r27 6.4.2.3 */
1299 ns->mode_select_num_blocks =
1300 (((u64)parm_list[idx + 0]) << 56) +
1301 (((u64)parm_list[idx + 1]) << 48) +
1302 (((u64)parm_list[idx + 2]) << 40) +
1303 (((u64)parm_list[idx + 3]) << 32) +
1304 (((u64)parm_list[idx + 4]) << 24) +
1305 (((u64)parm_list[idx + 5]) << 16) +
1306 (((u64)parm_list[idx + 6]) << 8) +
1307 ((u64)parm_list[idx + 7]);
1308
1309 ns->mode_select_block_len =
1310 (parm_list[idx + 12] << 24) +
1311 (parm_list[idx + 13] << 16) +
1312 (parm_list[idx + 14] << 8) +
1313 (parm_list[idx + 15]);
1314 }
1315}
1316
1317static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1318 u8 *mode_page, u8 page_code)
1319{
1320 int res = 0;
1321 int nvme_sc;
1322 unsigned dword11;
1323
1324 switch (page_code) {
1325 case MODE_PAGE_CACHING:
1326 dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
1327 nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
1328 dword11, NULL, 0, NULL);
1329 res = nvme_trans_status_code(hdr, nvme_sc);
1330 break;
1331 case MODE_PAGE_CONTROL:
1332 break;
1333 case MODE_PAGE_POWER_CONDITION:
1334 /* Verify the OS is not trying to set timers */
1335 if ((mode_page[2] & 0x01) != 0 || (mode_page[3] & 0x0F) != 0) {
1336 res = nvme_trans_completion(hdr,
1337 SAM_STAT_CHECK_CONDITION,
1338 ILLEGAL_REQUEST,
1339 SCSI_ASC_INVALID_PARAMETER,
1340 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1341 break;
1342 }
1343 break;
1344 default:
1345 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1346 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1347 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1348 break;
1349 }
1350
1351 return res;
1352}
1353
1354static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1355 u8 *cmd, u16 parm_list_len, u8 pf,
1356 u8 sp, u8 cdb10)
1357{
1358 int res;
1359 u8 *parm_list;
1360 u16 bd_len;
1361 u8 llbaa = 0;
1362 u16 index, saved_index;
1363 u8 page_code;
1364 u16 mp_size;
1365
1366 /* Get parm list from data-in/out buffer */
1367 parm_list = kmalloc(parm_list_len, GFP_KERNEL);
1368 if (parm_list == NULL) {
1369 res = -ENOMEM;
1370 goto out;
1371 }
1372
1373 res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
1374 if (res)
1375 goto out_mem;
1376
1377 nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
1378 index = (cdb10) ? (MODE_SELECT_10_MPH_SIZE) : (MODE_SELECT_6_MPH_SIZE);
1379
1380 if (bd_len != 0) {
1381 /* Block Descriptors present, parse */
1382 nvme_trans_modesel_save_bd(ns, parm_list, index, bd_len, llbaa);
1383 index += bd_len;
1384 }
1385 saved_index = index;
1386
1387 /* Multiple mode pages may be present; iterate through all */
1388 /* In 1st Iteration, don't do NVME Command, only check for CDB errors */
1389 do {
1390 page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
1391 mp_size = parm_list[index + 1] + 2;
1392 if ((page_code != MODE_PAGE_CACHING) &&
1393 (page_code != MODE_PAGE_CONTROL) &&
1394 (page_code != MODE_PAGE_POWER_CONDITION)) {
1395 res = nvme_trans_completion(hdr,
1396 SAM_STAT_CHECK_CONDITION,
1397 ILLEGAL_REQUEST,
1398 SCSI_ASC_INVALID_CDB,
1399 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1400 goto out_mem;
1401 }
1402 index += mp_size;
1403 } while (index < parm_list_len);
1404
1405 /* In 2nd Iteration, do the NVME Commands */
1406 index = saved_index;
1407 do {
1408 page_code = parm_list[index] & MODE_SELECT_PAGE_CODE_MASK;
1409 mp_size = parm_list[index + 1] + 2;
1410 res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
1411 page_code);
1412 if (res)
1413 break;
1414 index += mp_size;
1415 } while (index < parm_list_len);
1416
1417 out_mem:
1418 kfree(parm_list);
1419 out:
1420 return res;
1421}
1422
1423/* Format Unit Helper Functions */
1424
1425static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
1426 struct sg_io_hdr *hdr)
1427{
1428 int res = 0;
1429 int nvme_sc;
1430 u8 flbas;
1431
1432 /*
1433 * SCSI Expects a MODE SELECT would have been issued prior to
1434 * a FORMAT UNIT, and the block size and number would be used
1435 * from the block descriptor in it. If a MODE SELECT had not
1436 * been issued, FORMAT shall use the current values for both.
1437 */
1438
1439 if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
1440 struct nvme_id_ns *id_ns;
1441
1442 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
1443 res = nvme_trans_status_code(hdr, nvme_sc);
1444 if (res)
1445 return res;
1446
1447 if (ns->mode_select_num_blocks == 0)
1448 ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
1449 if (ns->mode_select_block_len == 0) {
1450 flbas = (id_ns->flbas) & 0x0F;
1451 ns->mode_select_block_len =
1452 (1 << (id_ns->lbaf[flbas].ds));
1453 }
1454
1455 kfree(id_ns);
1456 }
1457
1458 return 0;
1459}
1460
1461static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
1462 u8 format_prot_info, u8 *nvme_pf_code)
1463{
1464 int res;
1465 u8 *parm_list;
1466 u8 pf_usage, pf_code;
1467
1468 parm_list = kmalloc(len, GFP_KERNEL);
1469 if (parm_list == NULL) {
1470 res = -ENOMEM;
1471 goto out;
1472 }
1473 res = nvme_trans_copy_from_user(hdr, parm_list, len);
1474 if (res)
1475 goto out_mem;
1476
1477 if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
1478 FORMAT_UNIT_IMMED_MASK) != 0) {
1479 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1480 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1481 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1482 goto out_mem;
1483 }
1484
1485 if (len == FORMAT_UNIT_LONG_PARM_LIST_LEN &&
1486 (parm_list[FORMAT_UNIT_PROT_INT_OFFSET] & 0x0F) != 0) {
1487 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1488 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1489 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1490 goto out_mem;
1491 }
1492 pf_usage = parm_list[FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET] &
1493 FORMAT_UNIT_PROT_FIELD_USAGE_MASK;
1494 pf_code = (pf_usage << 2) | format_prot_info;
1495 switch (pf_code) {
1496 case 0:
1497 *nvme_pf_code = 0;
1498 break;
1499 case 2:
1500 *nvme_pf_code = 1;
1501 break;
1502 case 3:
1503 *nvme_pf_code = 2;
1504 break;
1505 case 7:
1506 *nvme_pf_code = 3;
1507 break;
1508 default:
1509 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1510 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1511 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1512 break;
1513 }
1514
1515 out_mem:
1516 kfree(parm_list);
1517 out:
1518 return res;
1519}
1520
1521static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1522 u8 prot_info)
1523{
1524 int res;
1525 int nvme_sc;
1526 struct nvme_id_ns *id_ns;
1527 u8 i;
1528 u8 nlbaf;
1529 u8 selected_lbaf = 0xFF;
1530 u32 cdw10 = 0;
1531 struct nvme_command c;
1532
1533 /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
1534 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
1535 res = nvme_trans_status_code(hdr, nvme_sc);
1536 if (res)
1537 return res;
1538
1539 nlbaf = id_ns->nlbaf;
1540
1541 for (i = 0; i < nlbaf; i++) {
1542 if (ns->mode_select_block_len == (1 << (id_ns->lbaf[i].ds))) {
1543 selected_lbaf = i;
1544 break;
1545 }
1546 }
1547 if (selected_lbaf > 0x0F) {
1548 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1549 ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
1550 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1551 }
1552 if (ns->mode_select_num_blocks != le64_to_cpu(id_ns->ncap)) {
1553 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1554 ILLEGAL_REQUEST, SCSI_ASC_INVALID_PARAMETER,
1555 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1556 }
1557
1558 cdw10 |= prot_info << 5;
1559 cdw10 |= selected_lbaf & 0x0F;
1560 memset(&c, 0, sizeof(c));
1561 c.format.opcode = nvme_admin_format_nvm;
1562 c.format.nsid = cpu_to_le32(ns->ns_id);
1563 c.format.cdw10 = cpu_to_le32(cdw10);
1564
1565 nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
1566 res = nvme_trans_status_code(hdr, nvme_sc);
1567
1568 kfree(id_ns);
1569 return res;
1570}
1571
1572static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
1573 struct nvme_trans_io_cdb *cdb_info,
1574 u32 max_blocks)
1575{
1576 /* If using iovecs, send one nvme command per vector */
1577 if (hdr->iovec_count > 0)
1578 return hdr->iovec_count;
1579 else if (cdb_info->xfer_len > max_blocks)
1580 return ((cdb_info->xfer_len - 1) / max_blocks) + 1;
1581 else
1582 return 1;
1583}
1584
1585static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
1586 struct nvme_trans_io_cdb *cdb_info)
1587{
1588 u16 control = 0;
1589
1590 /* When Protection information support is added, implement here */
1591
1592 if (cdb_info->fua > 0)
1593 control |= NVME_RW_FUA;
1594
1595 return control;
1596}
1597
1598static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1599 struct nvme_trans_io_cdb *cdb_info, u8 is_write)
1600{
1601 int nvme_sc = NVME_SC_SUCCESS;
1602 u32 num_cmds;
1603 u64 unit_len;
1604 u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
1605 u32 retcode;
1606 u32 i = 0;
1607 u64 nvme_offset = 0;
1608 void __user *next_mapping_addr;
1609 struct nvme_command c;
1610 u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read);
1611 u16 control;
1612 u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9);
1613
1614 num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks);
1615
1616 /*
1617 * This loop handles two cases.
1618 * First, when an SGL is used in the form of an iovec list:
1619 * - Use iov_base as the next mapping address for the nvme command_id
1620 * - Use iov_len as the data transfer length for the command.
1621 * Second, when we have a single buffer
1622 * - If larger than max_blocks, split into chunks, offset
1623 * each nvme command accordingly.
1624 */
1625 for (i = 0; i < num_cmds; i++) {
1626 memset(&c, 0, sizeof(c));
1627 if (hdr->iovec_count > 0) {
1628 struct sg_iovec sgl;
1629
1630 retcode = copy_from_user(&sgl, hdr->dxferp +
1631 i * sizeof(struct sg_iovec),
1632 sizeof(struct sg_iovec));
1633 if (retcode)
1634 return -EFAULT;
1635 unit_len = sgl.iov_len;
1636 unit_num_blocks = unit_len >> ns->lba_shift;
1637 next_mapping_addr = sgl.iov_base;
1638 } else {
1639 unit_num_blocks = min((u64)max_blocks,
1640 (cdb_info->xfer_len - nvme_offset));
1641 unit_len = unit_num_blocks << ns->lba_shift;
1642 next_mapping_addr = hdr->dxferp +
1643 ((1 << ns->lba_shift) * nvme_offset);
1644 }
1645
1646 c.rw.opcode = opcode;
1647 c.rw.nsid = cpu_to_le32(ns->ns_id);
1648 c.rw.slba = cpu_to_le64(cdb_info->lba + nvme_offset);
1649 c.rw.length = cpu_to_le16(unit_num_blocks - 1);
1650 control = nvme_trans_io_get_control(ns, cdb_info);
1651 c.rw.control = cpu_to_le16(control);
1652
1653 if (get_capacity(ns->disk) - unit_num_blocks <
1654 cdb_info->lba + nvme_offset) {
1655 nvme_sc = NVME_SC_LBA_RANGE;
1656 break;
1657 }
1658 nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
1659 next_mapping_addr, unit_len, NULL, 0);
1660 if (nvme_sc)
1661 break;
1662
1663 nvme_offset += unit_num_blocks;
1664 }
1665
1666 return nvme_trans_status_code(hdr, nvme_sc);
1667}
1668
1669
1670/* SCSI Command Translation Functions */
1671
1672static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
1673 u8 *cmd)
1674{
1675 int res = 0;
1676 struct nvme_trans_io_cdb cdb_info = { 0, };
1677 u8 opcode = cmd[0];
1678 u64 xfer_bytes;
1679 u64 sum_iov_len = 0;
1680 struct sg_iovec sgl;
1681 int i;
1682 size_t not_copied;
1683
1684 /*
1685 * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
1686 * but always in the same place for all others.
1687 */
1688 switch (opcode) {
1689 case WRITE_6:
1690 case READ_6:
1691 break;
1692 default:
1693 cdb_info.fua = cmd[1] & 0x8;
1694 cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
1695 if (cdb_info.prot_info && !ns->pi_type) {
1696 return nvme_trans_completion(hdr,
1697 SAM_STAT_CHECK_CONDITION,
1698 ILLEGAL_REQUEST,
1699 SCSI_ASC_INVALID_CDB,
1700 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1701 }
1702 }
1703
1704 switch (opcode) {
1705 case WRITE_6:
1706 case READ_6:
1707 cdb_info.lba = get_unaligned_be24(&cmd[1]);
1708 cdb_info.xfer_len = cmd[4];
1709 if (cdb_info.xfer_len == 0)
1710 cdb_info.xfer_len = 256;
1711 break;
1712 case WRITE_10:
1713 case READ_10:
1714 cdb_info.lba = get_unaligned_be32(&cmd[2]);
1715 cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
1716 break;
1717 case WRITE_12:
1718 case READ_12:
1719 cdb_info.lba = get_unaligned_be32(&cmd[2]);
1720 cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
1721 break;
1722 case WRITE_16:
1723 case READ_16:
1724 cdb_info.lba = get_unaligned_be64(&cmd[2]);
1725 cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
1726 break;
1727 default:
1728 /* Will never really reach here */
1729 res = -EIO;
1730 goto out;
1731 }
1732
1733 /* Calculate total length of transfer (in bytes) */
1734 if (hdr->iovec_count > 0) {
1735 for (i = 0; i < hdr->iovec_count; i++) {
1736 not_copied = copy_from_user(&sgl, hdr->dxferp +
1737 i * sizeof(struct sg_iovec),
1738 sizeof(struct sg_iovec));
1739 if (not_copied)
1740 return -EFAULT;
1741 sum_iov_len += sgl.iov_len;
1742 /* IO vector sizes should be multiples of block size */
1743 if (sgl.iov_len % (1 << ns->lba_shift) != 0) {
1744 res = nvme_trans_completion(hdr,
1745 SAM_STAT_CHECK_CONDITION,
1746 ILLEGAL_REQUEST,
1747 SCSI_ASC_INVALID_PARAMETER,
1748 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1749 goto out;
1750 }
1751 }
1752 } else {
1753 sum_iov_len = hdr->dxfer_len;
1754 }
1755
1756 /* As Per sg ioctl howto, if the lengths differ, use the lower one */
1757 xfer_bytes = min(((u64)hdr->dxfer_len), sum_iov_len);
1758
1759 /* If block count and actual data buffer size dont match, error out */
1760 if (xfer_bytes != (cdb_info.xfer_len << ns->lba_shift)) {
1761 res = -EINVAL;
1762 goto out;
1763 }
1764
1765 /* Check for 0 length transfer - it is not illegal */
1766 if (cdb_info.xfer_len == 0)
1767 goto out;
1768
1769 /* Send NVMe IO Command(s) */
1770 res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
1771 if (res)
1772 goto out;
1773
1774 out:
1775 return res;
1776}
1777
1778static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1779 u8 *cmd)
1780{
1781 int res = 0;
1782 u8 evpd;
1783 u8 page_code;
1784 int alloc_len;
1785 u8 *inq_response;
1786
1787 evpd = cmd[1] & 0x01;
1788 page_code = cmd[2];
1789 alloc_len = get_unaligned_be16(&cmd[3]);
1790
1791 inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
1792 GFP_KERNEL);
1793 if (inq_response == NULL) {
1794 res = -ENOMEM;
1795 goto out_mem;
1796 }
1797
1798 if (evpd == 0) {
1799 if (page_code == INQ_STANDARD_INQUIRY_PAGE) {
1800 res = nvme_trans_standard_inquiry_page(ns, hdr,
1801 inq_response, alloc_len);
1802 } else {
1803 res = nvme_trans_completion(hdr,
1804 SAM_STAT_CHECK_CONDITION,
1805 ILLEGAL_REQUEST,
1806 SCSI_ASC_INVALID_CDB,
1807 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1808 }
1809 } else {
1810 switch (page_code) {
1811 case VPD_SUPPORTED_PAGES:
1812 res = nvme_trans_supported_vpd_pages(ns, hdr,
1813 inq_response, alloc_len);
1814 break;
1815 case VPD_SERIAL_NUMBER:
1816 res = nvme_trans_unit_serial_page(ns, hdr, inq_response,
1817 alloc_len);
1818 break;
1819 case VPD_DEVICE_IDENTIFIERS:
1820 res = nvme_trans_device_id_page(ns, hdr, inq_response,
1821 alloc_len);
1822 break;
1823 case VPD_EXTENDED_INQUIRY:
1824 res = nvme_trans_ext_inq_page(ns, hdr, alloc_len);
1825 break;
1826 case VPD_BLOCK_LIMITS:
1827 res = nvme_trans_bdev_limits_page(ns, hdr, inq_response,
1828 alloc_len);
1829 break;
1830 case VPD_BLOCK_DEV_CHARACTERISTICS:
1831 res = nvme_trans_bdev_char_page(ns, hdr, alloc_len);
1832 break;
1833 default:
1834 res = nvme_trans_completion(hdr,
1835 SAM_STAT_CHECK_CONDITION,
1836 ILLEGAL_REQUEST,
1837 SCSI_ASC_INVALID_CDB,
1838 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1839 break;
1840 }
1841 }
1842 kfree(inq_response);
1843 out_mem:
1844 return res;
1845}
1846
1847static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1848 u8 *cmd)
1849{
1850 int res;
1851 u16 alloc_len;
1852 u8 pc;
1853 u8 page_code;
1854
1855 if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
1856 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1857 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1858 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1859 goto out;
1860 }
1861
1862 page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
1863 pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
1864 if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
1865 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1866 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1867 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1868 goto out;
1869 }
1870 alloc_len = get_unaligned_be16(&cmd[7]);
1871 switch (page_code) {
1872 case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
1873 res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
1874 break;
1875 case LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE:
1876 res = nvme_trans_log_info_exceptions(ns, hdr, alloc_len);
1877 break;
1878 case LOG_PAGE_TEMPERATURE_PAGE:
1879 res = nvme_trans_log_temperature(ns, hdr, alloc_len);
1880 break;
1881 default:
1882 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1883 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1884 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1885 break;
1886 }
1887
1888 out:
1889 return res;
1890}
1891
1892static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1893 u8 *cmd)
1894{
1895 u8 cdb10 = 0;
1896 u16 parm_list_len;
1897 u8 page_format;
1898 u8 save_pages;
1899
1900 page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
1901 save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
1902
1903 if (cmd[0] == MODE_SELECT) {
1904 parm_list_len = cmd[4];
1905 } else {
1906 parm_list_len = cmd[7];
1907 cdb10 = 1;
1908 }
1909
1910 if (parm_list_len != 0) {
1911 /*
1912 * According to SPC-4 r24, a paramter list length field of 0
1913 * shall not be considered an error
1914 */
1915 return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
1916 page_format, save_pages, cdb10);
1917 }
1918
1919 return 0;
1920}
1921
1922static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1923 u8 *cmd)
1924{
1925 int res = 0;
1926 u16 alloc_len;
1927 u8 cdb10 = 0;
1928
1929 if (cmd[0] == MODE_SENSE) {
1930 alloc_len = cmd[4];
1931 } else {
1932 alloc_len = get_unaligned_be16(&cmd[7]);
1933 cdb10 = 1;
1934 }
1935
1936 if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
1937 MODE_SENSE_PC_CURRENT_VALUES) {
1938 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1939 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1940 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1941 goto out;
1942 }
1943
1944 switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
1945 case MODE_PAGE_CACHING:
1946 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1947 cdb10,
1948 &nvme_trans_fill_caching_page,
1949 MODE_PAGE_CACHING_LEN);
1950 break;
1951 case MODE_PAGE_CONTROL:
1952 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1953 cdb10,
1954 &nvme_trans_fill_control_page,
1955 MODE_PAGE_CONTROL_LEN);
1956 break;
1957 case MODE_PAGE_POWER_CONDITION:
1958 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1959 cdb10,
1960 &nvme_trans_fill_pow_cnd_page,
1961 MODE_PAGE_POW_CND_LEN);
1962 break;
1963 case MODE_PAGE_INFO_EXCEP:
1964 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1965 cdb10,
1966 &nvme_trans_fill_inf_exc_page,
1967 MODE_PAGE_INF_EXC_LEN);
1968 break;
1969 case MODE_PAGE_RETURN_ALL:
1970 res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
1971 cdb10,
1972 &nvme_trans_fill_all_pages,
1973 MODE_PAGE_ALL_LEN);
1974 break;
1975 default:
1976 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
1977 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
1978 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
1979 break;
1980 }
1981
1982 out:
1983 return res;
1984}
1985
1986static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1987 u8 *cmd, u8 cdb16)
1988{
1989 int res;
1990 int nvme_sc;
1991 u32 alloc_len;
1992 u32 resp_size;
1993 u32 xfer_len;
1994 struct nvme_id_ns *id_ns;
1995 u8 *response;
1996
1997 if (cdb16) {
1998 alloc_len = get_unaligned_be32(&cmd[10]);
1999 resp_size = READ_CAP_16_RESP_SIZE;
2000 } else {
2001 alloc_len = READ_CAP_10_RESP_SIZE;
2002 resp_size = READ_CAP_10_RESP_SIZE;
2003 }
2004
2005 nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
2006 res = nvme_trans_status_code(hdr, nvme_sc);
2007 if (res)
2008 return res;
2009
2010 response = kzalloc(resp_size, GFP_KERNEL);
2011 if (response == NULL) {
2012 res = -ENOMEM;
2013 goto out_free_id;
2014 }
2015 nvme_trans_fill_read_cap(response, id_ns, cdb16);
2016
2017 xfer_len = min(alloc_len, resp_size);
2018 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
2019
2020 kfree(response);
2021 out_free_id:
2022 kfree(id_ns);
2023 return res;
2024}
2025
2026static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2027 u8 *cmd)
2028{
2029 int res;
2030 int nvme_sc;
2031 u32 alloc_len, xfer_len, resp_size;
2032 u8 *response;
2033 struct nvme_id_ctrl *id_ctrl;
2034 u32 ll_length, lun_id;
2035 u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
2036 __be32 tmp_len;
2037
2038 switch (cmd[2]) {
2039 default:
2040 return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2041 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2042 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2043 case ALL_LUNS_RETURNED:
2044 case ALL_WELL_KNOWN_LUNS_RETURNED:
2045 case RESTRICTED_LUNS_RETURNED:
2046 nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
2047 res = nvme_trans_status_code(hdr, nvme_sc);
2048 if (res)
2049 return res;
2050
2051 ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
2052 resp_size = ll_length + LUN_DATA_HEADER_SIZE;
2053
2054 alloc_len = get_unaligned_be32(&cmd[6]);
2055 if (alloc_len < resp_size) {
2056 res = nvme_trans_completion(hdr,
2057 SAM_STAT_CHECK_CONDITION,
2058 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2059 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2060 goto out_free_id;
2061 }
2062
2063 response = kzalloc(resp_size, GFP_KERNEL);
2064 if (response == NULL) {
2065 res = -ENOMEM;
2066 goto out_free_id;
2067 }
2068
2069 /* The first LUN ID will always be 0 per the SAM spec */
2070 for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
2071 /*
2072 * Set the LUN Id and then increment to the next LUN
2073 * location in the parameter data.
2074 */
2075 __be64 tmp_id = cpu_to_be64(lun_id);
2076 memcpy(&response[lun_id_offset], &tmp_id, sizeof(u64));
2077 lun_id_offset += LUN_ENTRY_SIZE;
2078 }
2079 tmp_len = cpu_to_be32(ll_length);
2080 memcpy(response, &tmp_len, sizeof(u32));
2081 }
2082
2083 xfer_len = min(alloc_len, resp_size);
2084 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
2085
2086 kfree(response);
2087 out_free_id:
2088 kfree(id_ctrl);
2089 return res;
2090}
2091
2092static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2093 u8 *cmd)
2094{
2095 int res;
2096 u8 alloc_len, xfer_len, resp_size;
2097 u8 desc_format;
2098 u8 *response;
2099
2100 desc_format = cmd[1] & 0x01;
2101 alloc_len = cmd[4];
2102
2103 resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
2104 (FIXED_FMT_SENSE_DATA_SIZE));
2105 response = kzalloc(resp_size, GFP_KERNEL);
2106 if (response == NULL) {
2107 res = -ENOMEM;
2108 goto out;
2109 }
2110
2111 if (desc_format) {
2112 /* Descriptor Format Sense Data */
2113 response[0] = DESC_FORMAT_SENSE_DATA;
2114 response[1] = NO_SENSE;
2115 /* TODO How is LOW POWER CONDITION ON handled? (byte 2) */
2116 response[2] = SCSI_ASC_NO_SENSE;
2117 response[3] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
2118 /* SDAT_OVFL = 0 | Additional Sense Length = 0 */
2119 } else {
2120 /* Fixed Format Sense Data */
2121 response[0] = FIXED_SENSE_DATA;
2122 /* Byte 1 = Obsolete */
2123 response[2] = NO_SENSE; /* FM, EOM, ILI, SDAT_OVFL = 0 */
2124 /* Bytes 3-6 - Information - set to zero */
2125 response[7] = FIXED_SENSE_DATA_ADD_LENGTH;
2126 /* Bytes 8-11 - Cmd Specific Information - set to zero */
2127 response[12] = SCSI_ASC_NO_SENSE;
2128 response[13] = SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
2129 /* Byte 14 = Field Replaceable Unit Code = 0 */
2130 /* Bytes 15-17 - SKSV=0; Sense Key Specific = 0 */
2131 }
2132
2133 xfer_len = min(alloc_len, resp_size);
2134 res = nvme_trans_copy_to_user(hdr, response, xfer_len);
2135
2136 kfree(response);
2137 out:
2138 return res;
2139}
2140
2141static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
2142 struct sg_io_hdr *hdr)
2143{
2144 int nvme_sc;
2145 struct nvme_command c;
2146
2147 memset(&c, 0, sizeof(c));
2148 c.common.opcode = nvme_cmd_flush;
2149 c.common.nsid = cpu_to_le32(ns->ns_id);
2150
2151 nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
2152 return nvme_trans_status_code(hdr, nvme_sc);
2153}
2154
2155static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2156 u8 *cmd)
2157{
2158 int res;
2159 u8 parm_hdr_len = 0;
2160 u8 nvme_pf_code = 0;
2161 u8 format_prot_info, long_list, format_data;
2162
2163 format_prot_info = (cmd[1] & 0xc0) >> 6;
2164 long_list = cmd[1] & 0x20;
2165 format_data = cmd[1] & 0x10;
2166
2167 if (format_data != 0) {
2168 if (format_prot_info != 0) {
2169 if (long_list == 0)
2170 parm_hdr_len = FORMAT_UNIT_SHORT_PARM_LIST_LEN;
2171 else
2172 parm_hdr_len = FORMAT_UNIT_LONG_PARM_LIST_LEN;
2173 }
2174 } else if (format_data == 0 && format_prot_info != 0) {
2175 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2176 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2177 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2178 goto out;
2179 }
2180
2181 /* Get parm header from data-in/out buffer */
2182 /*
2183 * According to the translation spec, the only fields in the parameter
2184 * list we are concerned with are in the header. So allocate only that.
2185 */
2186 if (parm_hdr_len > 0) {
2187 res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
2188 format_prot_info, &nvme_pf_code);
2189 if (res)
2190 goto out;
2191 }
2192
2193 /* Attempt to activate any previously downloaded firmware image */
2194 res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
2195
2196 /* Determine Block size and count and send format command */
2197 res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
2198 if (res)
2199 goto out;
2200
2201 res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
2202
2203 out:
2204 return res;
2205}
2206
2207static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
2208 struct sg_io_hdr *hdr,
2209 u8 *cmd)
2210{
2211 if (nvme_ctrl_ready(ns->ctrl))
2212 return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2213 NOT_READY, SCSI_ASC_LUN_NOT_READY,
2214 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2215 else
2216 return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
2217}
2218
2219static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2220 u8 *cmd)
2221{
2222 int res = 0;
2223 u32 buffer_offset, parm_list_length;
2224 u8 buffer_id, mode;
2225
2226 parm_list_length = get_unaligned_be24(&cmd[6]);
2227 if (parm_list_length % BYTES_TO_DWORDS != 0) {
2228 /* NVMe expects Firmware file to be a whole number of DWORDS */
2229 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2230 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2231 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2232 goto out;
2233 }
2234 buffer_id = cmd[2];
2235 if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
2236 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2237 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2238 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2239 goto out;
2240 }
2241 mode = cmd[1] & 0x1f;
2242 buffer_offset = get_unaligned_be24(&cmd[3]);
2243
2244 switch (mode) {
2245 case DOWNLOAD_SAVE_ACTIVATE:
2246 res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
2247 parm_list_length, buffer_offset,
2248 buffer_id);
2249 if (res)
2250 goto out;
2251 res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
2252 break;
2253 case DOWNLOAD_SAVE_DEFER_ACTIVATE:
2254 res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
2255 parm_list_length, buffer_offset,
2256 buffer_id);
2257 break;
2258 case ACTIVATE_DEFERRED_MICROCODE:
2259 res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
2260 break;
2261 default:
2262 res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2263 ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
2264 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2265 break;
2266 }
2267
2268 out:
2269 return res;
2270}
2271
2272struct scsi_unmap_blk_desc {
2273 __be64 slba;
2274 __be32 nlb;
2275 u32 resv;
2276};
2277
2278struct scsi_unmap_parm_list {
2279 __be16 unmap_data_len;
2280 __be16 unmap_blk_desc_data_len;
2281 u32 resv;
2282 struct scsi_unmap_blk_desc desc[0];
2283};
2284
2285static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2286 u8 *cmd)
2287{
2288 struct scsi_unmap_parm_list *plist;
2289 struct nvme_dsm_range *range;
2290 struct nvme_command c;
2291 int i, nvme_sc, res;
2292 u16 ndesc, list_len;
2293
2294 list_len = get_unaligned_be16(&cmd[7]);
2295 if (!list_len)
2296 return -EINVAL;
2297
2298 plist = kmalloc(list_len, GFP_KERNEL);
2299 if (!plist)
2300 return -ENOMEM;
2301
2302 res = nvme_trans_copy_from_user(hdr, plist, list_len);
2303 if (res)
2304 goto out;
2305
2306 ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
2307 if (!ndesc || ndesc > 256) {
2308 res = -EINVAL;
2309 goto out;
2310 }
2311
2312 range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
2313 if (!range) {
2314 res = -ENOMEM;
2315 goto out;
2316 }
2317
2318 for (i = 0; i < ndesc; i++) {
2319 range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
2320 range[i].slba = cpu_to_le64(be64_to_cpu(plist->desc[i].slba));
2321 range[i].cattr = 0;
2322 }
2323
2324 memset(&c, 0, sizeof(c));
2325 c.dsm.opcode = nvme_cmd_dsm;
2326 c.dsm.nsid = cpu_to_le32(ns->ns_id);
2327 c.dsm.nr = cpu_to_le32(ndesc - 1);
2328 c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
2329
2330 nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
2331 ndesc * sizeof(*range));
2332 res = nvme_trans_status_code(hdr, nvme_sc);
2333
2334 kfree(range);
2335 out:
2336 kfree(plist);
2337 return res;
2338}
2339
2340static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
2341{
2342 u8 cmd[16];
2343 int retcode;
2344 unsigned int opcode;
2345
2346 if (hdr->cmdp == NULL)
2347 return -EMSGSIZE;
2348 if (hdr->cmd_len > sizeof(cmd))
2349 return -EINVAL;
2350 if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
2351 return -EFAULT;
2352
2353 /*
2354 * Prime the hdr with good status for scsi commands that don't require
2355 * an nvme command for translation.
2356 */
2357 retcode = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
2358 if (retcode)
2359 return retcode;
2360
2361 opcode = cmd[0];
2362
2363 switch (opcode) {
2364 case READ_6:
2365 case READ_10:
2366 case READ_12:
2367 case READ_16:
2368 retcode = nvme_trans_io(ns, hdr, 0, cmd);
2369 break;
2370 case WRITE_6:
2371 case WRITE_10:
2372 case WRITE_12:
2373 case WRITE_16:
2374 retcode = nvme_trans_io(ns, hdr, 1, cmd);
2375 break;
2376 case INQUIRY:
2377 retcode = nvme_trans_inquiry(ns, hdr, cmd);
2378 break;
2379 case LOG_SENSE:
2380 retcode = nvme_trans_log_sense(ns, hdr, cmd);
2381 break;
2382 case MODE_SELECT:
2383 case MODE_SELECT_10:
2384 retcode = nvme_trans_mode_select(ns, hdr, cmd);
2385 break;
2386 case MODE_SENSE:
2387 case MODE_SENSE_10:
2388 retcode = nvme_trans_mode_sense(ns, hdr, cmd);
2389 break;
2390 case READ_CAPACITY:
2391 retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
2392 break;
2393 case SERVICE_ACTION_IN_16:
2394 switch (cmd[1]) {
2395 case SAI_READ_CAPACITY_16:
2396 retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
2397 break;
2398 default:
2399 goto out;
2400 }
2401 break;
2402 case REPORT_LUNS:
2403 retcode = nvme_trans_report_luns(ns, hdr, cmd);
2404 break;
2405 case REQUEST_SENSE:
2406 retcode = nvme_trans_request_sense(ns, hdr, cmd);
2407 break;
2408 case SYNCHRONIZE_CACHE:
2409 retcode = nvme_trans_synchronize_cache(ns, hdr);
2410 break;
2411 case FORMAT_UNIT:
2412 retcode = nvme_trans_format_unit(ns, hdr, cmd);
2413 break;
2414 case TEST_UNIT_READY:
2415 retcode = nvme_trans_test_unit_ready(ns, hdr, cmd);
2416 break;
2417 case WRITE_BUFFER:
2418 retcode = nvme_trans_write_buffer(ns, hdr, cmd);
2419 break;
2420 case UNMAP:
2421 retcode = nvme_trans_unmap(ns, hdr, cmd);
2422 break;
2423 default:
2424 out:
2425 retcode = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
2426 ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND,
2427 SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
2428 break;
2429 }
2430 return retcode;
2431}
2432
2433int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
2434{
2435 struct sg_io_hdr hdr;
2436 int retcode;
2437
2438 if (!capable(CAP_SYS_ADMIN))
2439 return -EACCES;
2440 if (copy_from_user(&hdr, u_hdr, sizeof(hdr)))
2441 return -EFAULT;
2442 if (hdr.interface_id != 'S')
2443 return -EINVAL;
2444
2445 /*
2446 * A positive return code means a NVMe status, which has been
2447 * translated to sense data.
2448 */
2449 retcode = nvme_scsi_translate(ns, &hdr);
2450 if (retcode < 0)
2451 return retcode;
2452 if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
2453 return -EFAULT;
2454 return 0;
2455}
2456
2457int nvme_sg_get_version_num(int __user *ip)
2458{
2459 return put_user(sg_version_num, ip);
2460}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index ff1f97006322..35f930db3c02 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -336,7 +336,7 @@ out:
336 336
337static void nvmet_execute_identify_nslist(struct nvmet_req *req) 337static void nvmet_execute_identify_nslist(struct nvmet_req *req)
338{ 338{
339 static const int buf_size = 4096; 339 static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
340 struct nvmet_ctrl *ctrl = req->sq->ctrl; 340 struct nvmet_ctrl *ctrl = req->sq->ctrl;
341 struct nvmet_ns *ns; 341 struct nvmet_ns *ns;
342 u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid); 342 u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
@@ -367,6 +367,64 @@ out:
367 nvmet_req_complete(req, status); 367 nvmet_req_complete(req, status);
368} 368}
369 369
370static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
371 void *id, off_t *off)
372{
373 struct nvme_ns_id_desc desc = {
374 .nidt = type,
375 .nidl = len,
376 };
377 u16 status;
378
379 status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
380 if (status)
381 return status;
382 *off += sizeof(desc);
383
384 status = nvmet_copy_to_sgl(req, *off, id, len);
385 if (status)
386 return status;
387 *off += len;
388
389 return 0;
390}
391
392static void nvmet_execute_identify_desclist(struct nvmet_req *req)
393{
394 struct nvmet_ns *ns;
395 u16 status = 0;
396 off_t off = 0;
397
398 ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
399 if (!ns) {
400 status = NVME_SC_INVALID_NS | NVME_SC_DNR;
401 goto out;
402 }
403
404 if (memchr_inv(&ns->uuid, 0, sizeof(ns->uuid))) {
405 status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
406 NVME_NIDT_UUID_LEN,
407 &ns->uuid, &off);
408 if (status)
409 goto out_put_ns;
410 }
411 if (memchr_inv(ns->nguid, 0, sizeof(ns->nguid))) {
412 status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
413 NVME_NIDT_NGUID_LEN,
414 &ns->nguid, &off);
415 if (status)
416 goto out_put_ns;
417 }
418
419 if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
420 off) != NVME_IDENTIFY_DATA_SIZE - off)
421 status = NVME_SC_INTERNAL | NVME_SC_DNR;
422out_put_ns:
423 nvmet_put_namespace(ns);
424out:
425 nvmet_req_complete(req, status);
426}
427
370/* 428/*
371 * A "mimimum viable" abort implementation: the command is mandatory in the 429 * A "mimimum viable" abort implementation: the command is mandatory in the
372 * spec, but we are not required to do any useful work. We couldn't really 430 * spec, but we are not required to do any useful work. We couldn't really
@@ -504,7 +562,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
504 } 562 }
505 break; 563 break;
506 case nvme_admin_identify: 564 case nvme_admin_identify:
507 req->data_len = 4096; 565 req->data_len = NVME_IDENTIFY_DATA_SIZE;
508 switch (cmd->identify.cns) { 566 switch (cmd->identify.cns) {
509 case NVME_ID_CNS_NS: 567 case NVME_ID_CNS_NS:
510 req->execute = nvmet_execute_identify_ns; 568 req->execute = nvmet_execute_identify_ns;
@@ -515,6 +573,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
515 case NVME_ID_CNS_NS_ACTIVE_LIST: 573 case NVME_ID_CNS_NS_ACTIVE_LIST:
516 req->execute = nvmet_execute_identify_nslist; 574 req->execute = nvmet_execute_identify_nslist;
517 return 0; 575 return 0;
576 case NVME_ID_CNS_NS_DESC_LIST:
577 req->execute = nvmet_execute_identify_desclist;
578 return 0;
518 } 579 }
519 break; 580 break;
520 case nvme_admin_abort_cmd: 581 case nvme_admin_abort_cmd:
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index be8c800078e2..a358ecd93e11 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -305,11 +305,41 @@ out_unlock:
305 305
306CONFIGFS_ATTR(nvmet_ns_, device_path); 306CONFIGFS_ATTR(nvmet_ns_, device_path);
307 307
308static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page)
309{
310 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid);
311}
312
313static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
314 const char *page, size_t count)
315{
316 struct nvmet_ns *ns = to_nvmet_ns(item);
317 struct nvmet_subsys *subsys = ns->subsys;
318 int ret = 0;
319
320
321 mutex_lock(&subsys->lock);
322 if (ns->enabled) {
323 ret = -EBUSY;
324 goto out_unlock;
325 }
326
327
328 if (uuid_parse(page, &ns->uuid))
329 ret = -EINVAL;
330
331out_unlock:
332 mutex_unlock(&subsys->lock);
333 return ret ? ret : count;
334}
335
308static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page) 336static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page)
309{ 337{
310 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid); 338 return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid);
311} 339}
312 340
341CONFIGFS_ATTR(nvmet_ns_, device_uuid);
342
313static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, 343static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
314 const char *page, size_t count) 344 const char *page, size_t count)
315{ 345{
@@ -379,6 +409,7 @@ CONFIGFS_ATTR(nvmet_ns_, enable);
379static struct configfs_attribute *nvmet_ns_attrs[] = { 409static struct configfs_attribute *nvmet_ns_attrs[] = {
380 &nvmet_ns_attr_device_path, 410 &nvmet_ns_attr_device_path,
381 &nvmet_ns_attr_device_nguid, 411 &nvmet_ns_attr_device_nguid,
412 &nvmet_ns_attr_device_uuid,
382 &nvmet_ns_attr_enable, 413 &nvmet_ns_attr_enable,
383 NULL, 414 NULL,
384}; 415};
@@ -619,8 +650,45 @@ out_unlock:
619 650
620CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host); 651CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host);
621 652
653static ssize_t nvmet_subsys_version_show(struct config_item *item,
654 char *page)
655{
656 struct nvmet_subsys *subsys = to_subsys(item);
657
658 if (NVME_TERTIARY(subsys->ver))
659 return snprintf(page, PAGE_SIZE, "%d.%d.%d\n",
660 (int)NVME_MAJOR(subsys->ver),
661 (int)NVME_MINOR(subsys->ver),
662 (int)NVME_TERTIARY(subsys->ver));
663 else
664 return snprintf(page, PAGE_SIZE, "%d.%d\n",
665 (int)NVME_MAJOR(subsys->ver),
666 (int)NVME_MINOR(subsys->ver));
667}
668
669static ssize_t nvmet_subsys_version_store(struct config_item *item,
670 const char *page, size_t count)
671{
672 struct nvmet_subsys *subsys = to_subsys(item);
673 int major, minor, tertiary = 0;
674 int ret;
675
676
677 ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
678 if (ret != 2 && ret != 3)
679 return -EINVAL;
680
681 down_write(&nvmet_config_sem);
682 subsys->ver = NVME_VS(major, minor, tertiary);
683 up_write(&nvmet_config_sem);
684
685 return count;
686}
687CONFIGFS_ATTR(nvmet_subsys_, version);
688
622static struct configfs_attribute *nvmet_subsys_attrs[] = { 689static struct configfs_attribute *nvmet_subsys_attrs[] = {
623 &nvmet_subsys_attr_attr_allow_any_host, 690 &nvmet_subsys_attr_attr_allow_any_host,
691 &nvmet_subsys_attr_version,
624 NULL, 692 NULL,
625}; 693};
626 694
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index eb9399ac97cf..b5b4ac103748 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -380,6 +380,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
380 380
381 ns->nsid = nsid; 381 ns->nsid = nsid;
382 ns->subsys = subsys; 382 ns->subsys = subsys;
383 uuid_gen(&ns->uuid);
383 384
384 return ns; 385 return ns;
385} 386}
@@ -926,7 +927,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
926 if (!subsys) 927 if (!subsys)
927 return NULL; 928 return NULL;
928 929
929 subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */ 930 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
930 931
931 switch (type) { 932 switch (type) {
932 case NVME_NQN_NVME: 933 case NVME_NQN_NVME:
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 1aaf597e81fc..8f3b57b4c97b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -53,7 +53,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
53 e->portid = port->disc_addr.portid; 53 e->portid = port->disc_addr.portid;
54 /* we support only dynamic controllers */ 54 /* we support only dynamic controllers */
55 e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); 55 e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
56 e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH); 56 e->asqsz = cpu_to_le16(NVME_AQ_DEPTH);
57 e->subtype = type; 57 e->subtype = type;
58 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); 58 memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
59 memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); 59 memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
@@ -185,7 +185,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
185 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 185 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
186 } 186 }
187 case nvme_admin_identify: 187 case nvme_admin_identify:
188 req->data_len = 4096; 188 req->data_len = NVME_IDENTIFY_DATA_SIZE;
189 switch (cmd->identify.cns) { 189 switch (cmd->identify.cns) {
190 case NVME_ID_CNS_CTRL: 190 case NVME_ID_CNS_CTRL:
191 req->execute = 191 req->execute =
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 2006fae61980..7692a96c9065 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2096,20 +2096,22 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
2096 /* clear any response payload */ 2096 /* clear any response payload */
2097 memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf)); 2097 memset(&fod->rspiubuf, 0, sizeof(fod->rspiubuf));
2098 2098
2099 fod->data_sg = NULL;
2100 fod->data_sg_cnt = 0;
2101
2099 ret = nvmet_req_init(&fod->req, 2102 ret = nvmet_req_init(&fod->req,
2100 &fod->queue->nvme_cq, 2103 &fod->queue->nvme_cq,
2101 &fod->queue->nvme_sq, 2104 &fod->queue->nvme_sq,
2102 &nvmet_fc_tgt_fcp_ops); 2105 &nvmet_fc_tgt_fcp_ops);
2103 if (!ret) { /* bad SQE content or invalid ctrl state */ 2106 if (!ret) {
2104 nvmet_fc_abort_op(tgtport, fod); 2107 /* bad SQE content or invalid ctrl state */
2108 /* nvmet layer has already called op done to send rsp. */
2105 return; 2109 return;
2106 } 2110 }
2107 2111
2108 /* keep a running counter of tail position */ 2112 /* keep a running counter of tail position */
2109 atomic_inc(&fod->queue->sqtail); 2113 atomic_inc(&fod->queue->sqtail);
2110 2114
2111 fod->data_sg = NULL;
2112 fod->data_sg_cnt = 0;
2113 if (fod->total_length) { 2115 if (fod->total_length) {
2114 ret = nvmet_fc_alloc_tgt_pgs(fod); 2116 ret = nvmet_fc_alloc_tgt_pgs(fod);
2115 if (ret) { 2117 if (ret) {
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 294a6611fb24..1bb9d5b311b1 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -569,7 +569,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
569 struct nvmefc_tgt_fcp_req *tgt_fcpreq) 569 struct nvmefc_tgt_fcp_req *tgt_fcpreq)
570{ 570{
571 struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); 571 struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
572 int active;
573 572
574 /* 573 /*
575 * mark aborted only in case there were 2 threads in transport 574 * mark aborted only in case there were 2 threads in transport
@@ -577,7 +576,6 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
577 * after the abort request 576 * after the abort request
578 */ 577 */
579 spin_lock(&tfcp_req->reqlock); 578 spin_lock(&tfcp_req->reqlock);
580 active = tfcp_req->active;
581 tfcp_req->aborted = true; 579 tfcp_req->aborted = true;
582 spin_unlock(&tfcp_req->reqlock); 580 spin_unlock(&tfcp_req->reqlock);
583 581
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index c77940d80fc8..40128793e613 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -21,7 +21,7 @@ static void nvmet_bio_done(struct bio *bio)
21 struct nvmet_req *req = bio->bi_private; 21 struct nvmet_req *req = bio->bi_private;
22 22
23 nvmet_req_complete(req, 23 nvmet_req_complete(req,
24 bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); 24 bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
25 25
26 if (bio != &req->inline_bio) 26 if (bio != &req->inline_bio)
27 bio_put(bio); 27 bio_put(bio);
@@ -145,7 +145,7 @@ static void nvmet_execute_discard(struct nvmet_req *req)
145 bio->bi_private = req; 145 bio->bi_private = req;
146 bio->bi_end_io = nvmet_bio_done; 146 bio->bi_end_io = nvmet_bio_done;
147 if (status) { 147 if (status) {
148 bio->bi_error = -EIO; 148 bio->bi_status = BLK_STS_IOERR;
149 bio_endio(bio); 149 bio_endio(bio);
150 } else { 150 } else {
151 submit_bio(bio); 151 submit_bio(bio);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e503cfff0337..5f55c683b338 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -21,8 +21,6 @@
21#include "../host/nvme.h" 21#include "../host/nvme.h"
22#include "../host/fabrics.h" 22#include "../host/fabrics.h"
23 23
24#define NVME_LOOP_AQ_DEPTH 256
25
26#define NVME_LOOP_MAX_SEGMENTS 256 24#define NVME_LOOP_MAX_SEGMENTS 256
27 25
28/* 26/*
@@ -31,7 +29,7 @@
31 */ 29 */
32#define NVME_LOOP_NR_AEN_COMMANDS 1 30#define NVME_LOOP_NR_AEN_COMMANDS 1
33#define NVME_LOOP_AQ_BLKMQ_DEPTH \ 31#define NVME_LOOP_AQ_BLKMQ_DEPTH \
34 (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS) 32 (NVME_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
35 33
36struct nvme_loop_iod { 34struct nvme_loop_iod {
37 struct nvme_request nvme_req; 35 struct nvme_request nvme_req;
@@ -45,7 +43,6 @@ struct nvme_loop_iod {
45}; 43};
46 44
47struct nvme_loop_ctrl { 45struct nvme_loop_ctrl {
48 spinlock_t lock;
49 struct nvme_loop_queue *queues; 46 struct nvme_loop_queue *queues;
50 u32 queue_count; 47 u32 queue_count;
51 48
@@ -59,7 +56,6 @@ struct nvme_loop_ctrl {
59 56
60 struct nvmet_ctrl *target_ctrl; 57 struct nvmet_ctrl *target_ctrl;
61 struct work_struct delete_work; 58 struct work_struct delete_work;
62 struct work_struct reset_work;
63}; 59};
64 60
65static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) 61static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -151,7 +147,7 @@ nvme_loop_timeout(struct request *rq, bool reserved)
151 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq); 147 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
152 148
153 /* queue error recovery */ 149 /* queue error recovery */
154 schedule_work(&iod->queue->ctrl->reset_work); 150 nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
155 151
156 /* fail with DNR on admin cmd timeout */ 152 /* fail with DNR on admin cmd timeout */
157 nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; 153 nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
@@ -159,17 +155,17 @@ nvme_loop_timeout(struct request *rq, bool reserved)
159 return BLK_EH_HANDLED; 155 return BLK_EH_HANDLED;
160} 156}
161 157
162static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, 158static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
163 const struct blk_mq_queue_data *bd) 159 const struct blk_mq_queue_data *bd)
164{ 160{
165 struct nvme_ns *ns = hctx->queue->queuedata; 161 struct nvme_ns *ns = hctx->queue->queuedata;
166 struct nvme_loop_queue *queue = hctx->driver_data; 162 struct nvme_loop_queue *queue = hctx->driver_data;
167 struct request *req = bd->rq; 163 struct request *req = bd->rq;
168 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 164 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
169 int ret; 165 blk_status_t ret;
170 166
171 ret = nvme_setup_cmd(ns, req, &iod->cmd); 167 ret = nvme_setup_cmd(ns, req, &iod->cmd);
172 if (ret != BLK_MQ_RQ_QUEUE_OK) 168 if (ret)
173 return ret; 169 return ret;
174 170
175 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; 171 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
@@ -179,16 +175,15 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
179 nvme_cleanup_cmd(req); 175 nvme_cleanup_cmd(req);
180 blk_mq_start_request(req); 176 blk_mq_start_request(req);
181 nvme_loop_queue_response(&iod->req); 177 nvme_loop_queue_response(&iod->req);
182 return BLK_MQ_RQ_QUEUE_OK; 178 return BLK_STS_OK;
183 } 179 }
184 180
185 if (blk_rq_bytes(req)) { 181 if (blk_rq_bytes(req)) {
186 iod->sg_table.sgl = iod->first_sgl; 182 iod->sg_table.sgl = iod->first_sgl;
187 ret = sg_alloc_table_chained(&iod->sg_table, 183 if (sg_alloc_table_chained(&iod->sg_table,
188 blk_rq_nr_phys_segments(req), 184 blk_rq_nr_phys_segments(req),
189 iod->sg_table.sgl); 185 iod->sg_table.sgl))
190 if (ret) 186 return BLK_STS_RESOURCE;
191 return BLK_MQ_RQ_QUEUE_BUSY;
192 187
193 iod->req.sg = iod->sg_table.sgl; 188 iod->req.sg = iod->sg_table.sgl;
194 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); 189 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
@@ -197,7 +192,7 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
197 blk_mq_start_request(req); 192 blk_mq_start_request(req);
198 193
199 schedule_work(&iod->work); 194 schedule_work(&iod->work);
200 return BLK_MQ_RQ_QUEUE_OK; 195 return BLK_STS_OK;
201} 196}
202 197
203static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) 198static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
@@ -234,15 +229,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set,
234 struct request *req, unsigned int hctx_idx, 229 struct request *req, unsigned int hctx_idx,
235 unsigned int numa_node) 230 unsigned int numa_node)
236{ 231{
237 return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 232 struct nvme_loop_ctrl *ctrl = set->driver_data;
238 hctx_idx + 1);
239}
240 233
241static int nvme_loop_init_admin_request(struct blk_mq_tag_set *set, 234 return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req),
242 struct request *req, unsigned int hctx_idx, 235 (set == &ctrl->tag_set) ? hctx_idx + 1 : 0);
243 unsigned int numa_node)
244{
245 return nvme_loop_init_iod(set->driver_data, blk_mq_rq_to_pdu(req), 0);
246} 236}
247 237
248static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 238static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -280,7 +270,7 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
280static const struct blk_mq_ops nvme_loop_admin_mq_ops = { 270static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
281 .queue_rq = nvme_loop_queue_rq, 271 .queue_rq = nvme_loop_queue_rq,
282 .complete = nvme_loop_complete_rq, 272 .complete = nvme_loop_complete_rq,
283 .init_request = nvme_loop_init_admin_request, 273 .init_request = nvme_loop_init_request,
284 .init_hctx = nvme_loop_init_admin_hctx, 274 .init_hctx = nvme_loop_init_admin_hctx,
285 .timeout = nvme_loop_timeout, 275 .timeout = nvme_loop_timeout,
286}; 276};
@@ -467,7 +457,7 @@ static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
467 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 457 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
468 return -EBUSY; 458 return -EBUSY;
469 459
470 if (!schedule_work(&ctrl->delete_work)) 460 if (!queue_work(nvme_wq, &ctrl->delete_work))
471 return -EBUSY; 461 return -EBUSY;
472 462
473 return 0; 463 return 0;
@@ -501,8 +491,8 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
501 491
502static void nvme_loop_reset_ctrl_work(struct work_struct *work) 492static void nvme_loop_reset_ctrl_work(struct work_struct *work)
503{ 493{
504 struct nvme_loop_ctrl *ctrl = container_of(work, 494 struct nvme_loop_ctrl *ctrl =
505 struct nvme_loop_ctrl, reset_work); 495 container_of(work, struct nvme_loop_ctrl, ctrl.reset_work);
506 bool changed; 496 bool changed;
507 int ret; 497 int ret;
508 498
@@ -540,21 +530,6 @@ out_disable:
540 nvme_put_ctrl(&ctrl->ctrl); 530 nvme_put_ctrl(&ctrl->ctrl);
541} 531}
542 532
543static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
544{
545 struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
546
547 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
548 return -EBUSY;
549
550 if (!schedule_work(&ctrl->reset_work))
551 return -EBUSY;
552
553 flush_work(&ctrl->reset_work);
554
555 return 0;
556}
557
558static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { 533static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
559 .name = "loop", 534 .name = "loop",
560 .module = THIS_MODULE, 535 .module = THIS_MODULE,
@@ -562,11 +537,9 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
562 .reg_read32 = nvmf_reg_read32, 537 .reg_read32 = nvmf_reg_read32,
563 .reg_read64 = nvmf_reg_read64, 538 .reg_read64 = nvmf_reg_read64,
564 .reg_write32 = nvmf_reg_write32, 539 .reg_write32 = nvmf_reg_write32,
565 .reset_ctrl = nvme_loop_reset_ctrl,
566 .free_ctrl = nvme_loop_free_ctrl, 540 .free_ctrl = nvme_loop_free_ctrl,
567 .submit_async_event = nvme_loop_submit_async_event, 541 .submit_async_event = nvme_loop_submit_async_event,
568 .delete_ctrl = nvme_loop_del_ctrl, 542 .delete_ctrl = nvme_loop_del_ctrl,
569 .get_subsysnqn = nvmf_get_subsysnqn,
570}; 543};
571 544
572static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) 545static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -629,15 +602,13 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
629 INIT_LIST_HEAD(&ctrl->list); 602 INIT_LIST_HEAD(&ctrl->list);
630 603
631 INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work); 604 INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
632 INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work); 605 INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
633 606
634 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops, 607 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
635 0 /* no quirks, we're perfect! */); 608 0 /* no quirks, we're perfect! */);
636 if (ret) 609 if (ret)
637 goto out_put_ctrl; 610 goto out_put_ctrl;
638 611
639 spin_lock_init(&ctrl->lock);
640
641 ret = -ENOMEM; 612 ret = -ENOMEM;
642 613
643 ctrl->ctrl.sqsize = opts->queue_size - 1; 614 ctrl->ctrl.sqsize = opts->queue_size - 1;
@@ -766,7 +737,7 @@ static void __exit nvme_loop_cleanup_module(void)
766 __nvme_loop_del_ctrl(ctrl); 737 __nvme_loop_del_ctrl(ctrl);
767 mutex_unlock(&nvme_loop_ctrl_mutex); 738 mutex_unlock(&nvme_loop_ctrl_mutex);
768 739
769 flush_scheduled_work(); 740 flush_workqueue(nvme_wq);
770} 741}
771 742
772module_init(nvme_loop_init_module); 743module_init(nvme_loop_init_module);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8ff6e430b30a..747bbdb4f9c6 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -47,6 +47,7 @@ struct nvmet_ns {
47 u32 blksize_shift; 47 u32 blksize_shift;
48 loff_t size; 48 loff_t size;
49 u8 nguid[16]; 49 u8 nguid[16];
50 uuid_t uuid;
50 51
51 bool enabled; 52 bool enabled;
52 struct nvmet_subsys *subsys; 53 struct nvmet_subsys *subsys;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 9e45cde63376..56a4cba690b5 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1027,7 +1027,7 @@ nvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn,
1027 queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; 1027 queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
1028 queue->send_queue_size = le16_to_cpu(req->hrqsize); 1028 queue->send_queue_size = le16_to_cpu(req->hrqsize);
1029 1029
1030 if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH) 1030 if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH)
1031 return NVME_RDMA_CM_INVALID_HSQSIZE; 1031 return NVME_RDMA_CM_INVALID_HSQSIZE;
1032 1032
1033 /* XXX: Should we enforce some kind of max for IO queues? */ 1033 /* XXX: Should we enforce some kind of max for IO queues? */
@@ -1307,53 +1307,44 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
1307 1307
1308/** 1308/**
1309 * nvme_rdma_device_removal() - Handle RDMA device removal 1309 * nvme_rdma_device_removal() - Handle RDMA device removal
1310 * @cm_id: rdma_cm id, used for nvmet port
1310 * @queue: nvmet rdma queue (cm id qp_context) 1311 * @queue: nvmet rdma queue (cm id qp_context)
1311 * @addr: nvmet address (cm_id context)
1312 * 1312 *
1313 * DEVICE_REMOVAL event notifies us that the RDMA device is about 1313 * DEVICE_REMOVAL event notifies us that the RDMA device is about
1314 * to unplug so we should take care of destroying our RDMA resources. 1314 * to unplug. Note that this event can be generated on a normal
1315 * This event will be generated for each allocated cm_id. 1315 * queue cm_id and/or a device bound listener cm_id (where in this
1316 * case queue will be null).
1316 * 1317 *
1317 * Note that this event can be generated on a normal queue cm_id 1318 * We registered an ib_client to handle device removal for queues,
1318 * and/or a device bound listener cm_id (where in this case 1319 * so we only need to handle the listening port cm_ids. In this case
1319 * queue will be null).
1320 *
1321 * we claim ownership on destroying the cm_id. For queues we move
1322 * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
1323 * we nullify the priv to prevent double cm_id destruction and destroying 1320 * we nullify the priv to prevent double cm_id destruction and destroying
1324 * the cm_id implicitely by returning a non-zero rc to the callout. 1321 * the cm_id implicitely by returning a non-zero rc to the callout.
1325 */ 1322 */
1326static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, 1323static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
1327 struct nvmet_rdma_queue *queue) 1324 struct nvmet_rdma_queue *queue)
1328{ 1325{
1329 unsigned long flags; 1326 struct nvmet_port *port;
1330
1331 if (!queue) {
1332 struct nvmet_port *port = cm_id->context;
1333 1327
1328 if (queue) {
1334 /* 1329 /*
1335 * This is a listener cm_id. Make sure that 1330 * This is a queue cm_id. we have registered
1336 * future remove_port won't invoke a double 1331 * an ib_client to handle queues removal
1337 * cm_id destroy. use atomic xchg to make sure 1332 * so don't interfear and just return.
1338 * we don't compete with remove_port.
1339 */
1340 if (xchg(&port->priv, NULL) != cm_id)
1341 return 0;
1342 } else {
1343 /*
1344 * This is a queue cm_id. Make sure that
1345 * release queue will not destroy the cm_id
1346 * and schedule all ctrl queues removal (only
1347 * if the queue is not disconnecting already).
1348 */ 1333 */
1349 spin_lock_irqsave(&queue->state_lock, flags); 1334 return 0;
1350 if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
1351 queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
1352 spin_unlock_irqrestore(&queue->state_lock, flags);
1353 nvmet_rdma_queue_disconnect(queue);
1354 flush_scheduled_work();
1355 } 1335 }
1356 1336
1337 port = cm_id->context;
1338
1339 /*
1340 * This is a listener cm_id. Make sure that
1341 * future remove_port won't invoke a double
1342 * cm_id destroy. use atomic xchg to make sure
1343 * we don't compete with remove_port.
1344 */
1345 if (xchg(&port->priv, NULL) != cm_id)
1346 return 0;
1347
1357 /* 1348 /*
1358 * We need to return 1 so that the core will destroy 1349 * We need to return 1 so that the core will destroy
1359 * it's own ID. What a great API design.. 1350 * it's own ID. What a great API design..
@@ -1519,9 +1510,51 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = {
1519 .delete_ctrl = nvmet_rdma_delete_ctrl, 1510 .delete_ctrl = nvmet_rdma_delete_ctrl,
1520}; 1511};
1521 1512
1513static void nvmet_rdma_add_one(struct ib_device *ib_device)
1514{
1515}
1516
1517static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
1518{
1519 struct nvmet_rdma_queue *queue;
1520
1521 /* Device is being removed, delete all queues using this device */
1522 mutex_lock(&nvmet_rdma_queue_mutex);
1523 list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) {
1524 if (queue->dev->device != ib_device)
1525 continue;
1526
1527 pr_info("Removing queue %d\n", queue->idx);
1528 __nvmet_rdma_queue_disconnect(queue);
1529 }
1530 mutex_unlock(&nvmet_rdma_queue_mutex);
1531
1532 flush_scheduled_work();
1533}
1534
1535static struct ib_client nvmet_rdma_ib_client = {
1536 .name = "nvmet_rdma",
1537 .add = nvmet_rdma_add_one,
1538 .remove = nvmet_rdma_remove_one
1539};
1540
1522static int __init nvmet_rdma_init(void) 1541static int __init nvmet_rdma_init(void)
1523{ 1542{
1524 return nvmet_register_transport(&nvmet_rdma_ops); 1543 int ret;
1544
1545 ret = ib_register_client(&nvmet_rdma_ib_client);
1546 if (ret)
1547 return ret;
1548
1549 ret = nvmet_register_transport(&nvmet_rdma_ops);
1550 if (ret)
1551 goto err_ib_client;
1552
1553 return 0;
1554
1555err_ib_client:
1556 ib_unregister_client(&nvmet_rdma_ib_client);
1557 return ret;
1525} 1558}
1526 1559
1527static void __exit nvmet_rdma_exit(void) 1560static void __exit nvmet_rdma_exit(void)
@@ -1544,6 +1577,7 @@ static void __exit nvmet_rdma_exit(void)
1544 mutex_unlock(&nvmet_rdma_queue_mutex); 1577 mutex_unlock(&nvmet_rdma_queue_mutex);
1545 1578
1546 flush_scheduled_work(); 1579 flush_scheduled_work();
1580 ib_unregister_client(&nvmet_rdma_ib_client);
1547 ida_destroy(&nvmet_rdma_queue_ida); 1581 ida_destroy(&nvmet_rdma_queue_ida);
1548} 1582}
1549 1583
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 6fb3fd5efc11..b7cbd5d2cdea 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2672,7 +2672,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2672 */ 2672 */
2673 if (basedev->state < DASD_STATE_READY) { 2673 if (basedev->state < DASD_STATE_READY) {
2674 while ((req = blk_fetch_request(block->request_queue))) 2674 while ((req = blk_fetch_request(block->request_queue)))
2675 __blk_end_request_all(req, -EIO); 2675 __blk_end_request_all(req, BLK_STS_IOERR);
2676 return; 2676 return;
2677 } 2677 }
2678 2678
@@ -2692,7 +2692,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2692 "Rejecting write request %p", 2692 "Rejecting write request %p",
2693 req); 2693 req);
2694 blk_start_request(req); 2694 blk_start_request(req);
2695 __blk_end_request_all(req, -EIO); 2695 __blk_end_request_all(req, BLK_STS_IOERR);
2696 continue; 2696 continue;
2697 } 2697 }
2698 if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) && 2698 if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
@@ -2702,7 +2702,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2702 "Rejecting failfast request %p", 2702 "Rejecting failfast request %p",
2703 req); 2703 req);
2704 blk_start_request(req); 2704 blk_start_request(req);
2705 __blk_end_request_all(req, -ETIMEDOUT); 2705 __blk_end_request_all(req, BLK_STS_TIMEOUT);
2706 continue; 2706 continue;
2707 } 2707 }
2708 cqr = basedev->discipline->build_cp(basedev, block, req); 2708 cqr = basedev->discipline->build_cp(basedev, block, req);
@@ -2734,7 +2734,7 @@ static void __dasd_process_request_queue(struct dasd_block *block)
2734 "on request %p", 2734 "on request %p",
2735 PTR_ERR(cqr), req); 2735 PTR_ERR(cqr), req);
2736 blk_start_request(req); 2736 blk_start_request(req);
2737 __blk_end_request_all(req, -EIO); 2737 __blk_end_request_all(req, BLK_STS_IOERR);
2738 continue; 2738 continue;
2739 } 2739 }
2740 /* 2740 /*
@@ -2755,21 +2755,29 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
2755{ 2755{
2756 struct request *req; 2756 struct request *req;
2757 int status; 2757 int status;
2758 int error = 0; 2758 blk_status_t error = BLK_STS_OK;
2759 2759
2760 req = (struct request *) cqr->callback_data; 2760 req = (struct request *) cqr->callback_data;
2761 dasd_profile_end(cqr->block, cqr, req); 2761 dasd_profile_end(cqr->block, cqr, req);
2762
2762 status = cqr->block->base->discipline->free_cp(cqr, req); 2763 status = cqr->block->base->discipline->free_cp(cqr, req);
2763 if (status < 0) 2764 if (status < 0)
2764 error = status; 2765 error = errno_to_blk_status(status);
2765 else if (status == 0) { 2766 else if (status == 0) {
2766 if (cqr->intrc == -EPERM) 2767 switch (cqr->intrc) {
2767 error = -EBADE; 2768 case -EPERM:
2768 else if (cqr->intrc == -ENOLINK || 2769 error = BLK_STS_NEXUS;
2769 cqr->intrc == -ETIMEDOUT) 2770 break;
2770 error = cqr->intrc; 2771 case -ENOLINK:
2771 else 2772 error = BLK_STS_TRANSPORT;
2772 error = -EIO; 2773 break;
2774 case -ETIMEDOUT:
2775 error = BLK_STS_TIMEOUT;
2776 break;
2777 default:
2778 error = BLK_STS_IOERR;
2779 break;
2780 }
2773 } 2781 }
2774 __blk_end_request_all(req, error); 2782 __blk_end_request_all(req, error);
2775} 2783}
@@ -3190,7 +3198,7 @@ static void dasd_flush_request_queue(struct dasd_block *block)
3190 3198
3191 spin_lock_irq(&block->request_queue_lock); 3199 spin_lock_irq(&block->request_queue_lock);
3192 while ((req = blk_fetch_request(block->request_queue))) 3200 while ((req = blk_fetch_request(block->request_queue)))
3193 __blk_end_request_all(req, -EIO); 3201 __blk_end_request_all(req, BLK_STS_IOERR);
3194 spin_unlock_irq(&block->request_queue_lock); 3202 spin_unlock_irq(&block->request_queue_lock);
3195} 3203}
3196 3204
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 36e5280af3e4..06eb1de52d1c 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -845,7 +845,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
845 unsigned long source_addr; 845 unsigned long source_addr;
846 unsigned long bytes_done; 846 unsigned long bytes_done;
847 847
848 blk_queue_split(q, &bio, q->bio_split); 848 blk_queue_split(q, &bio);
849 849
850 bytes_done = 0; 850 bytes_done = 0;
851 dev_info = bio->bi_bdev->bd_disk->private_data; 851 dev_info = bio->bi_bdev->bd_disk->private_data;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 152de6817875..3c2c84b72877 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -231,7 +231,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
231 aob->request.data = (u64) aobrq; 231 aob->request.data = (u64) aobrq;
232 scmrq->bdev = bdev; 232 scmrq->bdev = bdev;
233 scmrq->retries = 4; 233 scmrq->retries = 4;
234 scmrq->error = 0; 234 scmrq->error = BLK_STS_OK;
235 /* We don't use all msbs - place aidaws at the end of the aob page. */ 235 /* We don't use all msbs - place aidaws at the end of the aob page. */
236 scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io]; 236 scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
237 scm_request_cluster_init(scmrq); 237 scm_request_cluster_init(scmrq);
@@ -364,7 +364,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
364{ 364{
365 struct aob *aob = scmrq->aob; 365 struct aob *aob = scmrq->aob;
366 366
367 if (scmrq->error == -ETIMEDOUT) 367 if (scmrq->error == BLK_STS_TIMEOUT)
368 SCM_LOG(1, "Request timeout"); 368 SCM_LOG(1, "Request timeout");
369 else { 369 else {
370 SCM_LOG(1, "Request error"); 370 SCM_LOG(1, "Request error");
@@ -377,7 +377,7 @@ static void __scmrq_log_error(struct scm_request *scmrq)
377 scmrq->error); 377 scmrq->error);
378} 378}
379 379
380void scm_blk_irq(struct scm_device *scmdev, void *data, int error) 380void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)
381{ 381{
382 struct scm_request *scmrq = data; 382 struct scm_request *scmrq = data;
383 struct scm_blk_dev *bdev = scmrq->bdev; 383 struct scm_blk_dev *bdev = scmrq->bdev;
@@ -397,7 +397,7 @@ static void scm_blk_handle_error(struct scm_request *scmrq)
397 struct scm_blk_dev *bdev = scmrq->bdev; 397 struct scm_blk_dev *bdev = scmrq->bdev;
398 unsigned long flags; 398 unsigned long flags;
399 399
400 if (scmrq->error != -EIO) 400 if (scmrq->error != BLK_STS_IOERR)
401 goto restart; 401 goto restart;
402 402
403 /* For -EIO the response block is valid. */ 403 /* For -EIO the response block is valid. */
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 09218cdc5129..cd598d1a4eae 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -35,7 +35,7 @@ struct scm_request {
35 struct aob *aob; 35 struct aob *aob;
36 struct list_head list; 36 struct list_head list;
37 u8 retries; 37 u8 retries;
38 int error; 38 blk_status_t error;
39#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE 39#ifdef CONFIG_SCM_BLOCK_CLUSTER_WRITE
40 struct { 40 struct {
41 enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state; 41 enum {CLUSTER_NONE, CLUSTER_READ, CLUSTER_WRITE} state;
@@ -50,7 +50,7 @@ struct scm_request {
50int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *); 50int scm_blk_dev_setup(struct scm_blk_dev *, struct scm_device *);
51void scm_blk_dev_cleanup(struct scm_blk_dev *); 51void scm_blk_dev_cleanup(struct scm_blk_dev *);
52void scm_blk_set_available(struct scm_blk_dev *); 52void scm_blk_set_available(struct scm_blk_dev *);
53void scm_blk_irq(struct scm_device *, void *, int); 53void scm_blk_irq(struct scm_device *, void *, blk_status_t);
54 54
55void scm_request_finish(struct scm_request *); 55void scm_request_finish(struct scm_request *);
56void scm_request_requeue(struct scm_request *); 56void scm_request_requeue(struct scm_request *);
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index b9d7e755c8a3..a48f0d40c1d2 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -190,7 +190,7 @@ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio)
190 unsigned long page_addr; 190 unsigned long page_addr;
191 unsigned long bytes; 191 unsigned long bytes;
192 192
193 blk_queue_split(q, &bio, q->bio_split); 193 blk_queue_split(q, &bio);
194 194
195 if ((bio->bi_iter.bi_sector & 7) != 0 || 195 if ((bio->bi_iter.bi_sector & 7) != 0 ||
196 (bio->bi_iter.bi_size & 4095) != 0) 196 (bio->bi_iter.bi_size & 4095) != 0)
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index b3f44bc7f644..0f11f3bcac82 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -135,7 +135,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
135 struct eadm_private *private = get_eadm_private(sch); 135 struct eadm_private *private = get_eadm_private(sch);
136 struct eadm_scsw *scsw = &sch->schib.scsw.eadm; 136 struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
137 struct irb *irb = this_cpu_ptr(&cio_irb); 137 struct irb *irb = this_cpu_ptr(&cio_irb);
138 int error = 0; 138 blk_status_t error = BLK_STS_OK;
139 139
140 EADM_LOG(6, "irq"); 140 EADM_LOG(6, "irq");
141 EADM_LOG_HEX(6, irb, sizeof(*irb)); 141 EADM_LOG_HEX(6, irb, sizeof(*irb));
@@ -144,10 +144,10 @@ static void eadm_subchannel_irq(struct subchannel *sch)
144 144
145 if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) 145 if ((scsw->stctl & (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND))
146 && scsw->eswf == 1 && irb->esw.eadm.erw.r) 146 && scsw->eswf == 1 && irb->esw.eadm.erw.r)
147 error = -EIO; 147 error = BLK_STS_IOERR;
148 148
149 if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC) 149 if (scsw->fctl & SCSW_FCTL_CLEAR_FUNC)
150 error = -ETIMEDOUT; 150 error = BLK_STS_TIMEOUT;
151 151
152 eadm_subchannel_set_timeout(sch, 0); 152 eadm_subchannel_set_timeout(sch, 0);
153 153
diff --git a/drivers/s390/cio/scm.c b/drivers/s390/cio/scm.c
index 15268edc54ae..1fa53ecdc2aa 100644
--- a/drivers/s390/cio/scm.c
+++ b/drivers/s390/cio/scm.c
@@ -71,7 +71,7 @@ void scm_driver_unregister(struct scm_driver *scmdrv)
71} 71}
72EXPORT_SYMBOL_GPL(scm_driver_unregister); 72EXPORT_SYMBOL_GPL(scm_driver_unregister);
73 73
74void scm_irq_handler(struct aob *aob, int error) 74void scm_irq_handler(struct aob *aob, blk_status_t error)
75{ 75{
76 struct aob_rq_header *aobrq = (void *) aob->request.data; 76 struct aob_rq_header *aobrq = (void *) aob->request.data;
77 struct scm_device *scmdev = aobrq->scmdev; 77 struct scm_device *scmdev = aobrq->scmdev;
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 62fed9dc893e..14f377ac1280 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -214,7 +214,7 @@ static void jsfd_request(void)
214 struct jsfd_part *jdp = req->rq_disk->private_data; 214 struct jsfd_part *jdp = req->rq_disk->private_data;
215 unsigned long offset = blk_rq_pos(req) << 9; 215 unsigned long offset = blk_rq_pos(req) << 9;
216 size_t len = blk_rq_cur_bytes(req); 216 size_t len = blk_rq_cur_bytes(req);
217 int err = -EIO; 217 blk_status_t err = BLK_STS_IOERR;
218 218
219 if ((offset + len) > jdp->dsize) 219 if ((offset + len) > jdp->dsize)
220 goto end; 220 goto end;
@@ -230,7 +230,7 @@ static void jsfd_request(void)
230 } 230 }
231 231
232 jsfd_read(bio_data(req->bio), jdp->dbase + offset, len); 232 jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
233 err = 0; 233 err = BLK_STS_OK;
234 end: 234 end:
235 if (!__blk_end_request_cur(req, err)) 235 if (!__blk_end_request_cur(req, err))
236 req = jsfd_next_request(); 236 req = jsfd_next_request();
@@ -592,6 +592,7 @@ static int jsfd_init(void)
592 put_disk(disk); 592 put_disk(disk);
593 goto out; 593 goto out;
594 } 594 }
595 blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
595 jsfd_disk[i] = disk; 596 jsfd_disk[i] = disk;
596 } 597 }
597 598
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 8a1b94816419..a4f28b7e4c65 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -446,7 +446,7 @@ static void _put_request(struct request *rq)
446 * code paths. 446 * code paths.
447 */ 447 */
448 if (unlikely(rq->bio)) 448 if (unlikely(rq->bio))
449 blk_end_request(rq, -ENOMEM, blk_rq_bytes(rq)); 449 blk_end_request(rq, BLK_STS_IOERR, blk_rq_bytes(rq));
450 else 450 else
451 blk_put_request(rq); 451 blk_put_request(rq);
452} 452}
@@ -474,10 +474,10 @@ void osd_end_request(struct osd_request *or)
474EXPORT_SYMBOL(osd_end_request); 474EXPORT_SYMBOL(osd_end_request);
475 475
476static void _set_error_resid(struct osd_request *or, struct request *req, 476static void _set_error_resid(struct osd_request *or, struct request *req,
477 int error) 477 blk_status_t error)
478{ 478{
479 or->async_error = error; 479 or->async_error = error;
480 or->req_errors = scsi_req(req)->result ? : error; 480 or->req_errors = scsi_req(req)->result;
481 or->sense_len = scsi_req(req)->sense_len; 481 or->sense_len = scsi_req(req)->sense_len;
482 if (or->sense_len) 482 if (or->sense_len)
483 memcpy(or->sense, scsi_req(req)->sense, or->sense_len); 483 memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
@@ -489,17 +489,19 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
489 489
490int osd_execute_request(struct osd_request *or) 490int osd_execute_request(struct osd_request *or)
491{ 491{
492 int error;
493
494 blk_execute_rq(or->request->q, NULL, or->request, 0); 492 blk_execute_rq(or->request->q, NULL, or->request, 0);
495 error = scsi_req(or->request)->result ? -EIO : 0;
496 493
497 _set_error_resid(or, or->request, error); 494 if (scsi_req(or->request)->result) {
498 return error; 495 _set_error_resid(or, or->request, BLK_STS_IOERR);
496 return -EIO;
497 }
498
499 _set_error_resid(or, or->request, BLK_STS_OK);
500 return 0;
499} 501}
500EXPORT_SYMBOL(osd_execute_request); 502EXPORT_SYMBOL(osd_execute_request);
501 503
502static void osd_request_async_done(struct request *req, int error) 504static void osd_request_async_done(struct request *req, blk_status_t error)
503{ 505{
504 struct osd_request *or = req->end_io_data; 506 struct osd_request *or = req->end_io_data;
505 507
@@ -1572,13 +1574,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1572 flags); 1574 flags);
1573 if (IS_ERR(req)) 1575 if (IS_ERR(req))
1574 return req; 1576 return req;
1575 scsi_req_init(req);
1576 1577
1577 for_each_bio(bio) { 1578 for_each_bio(bio) {
1578 struct bio *bounce_bio = bio; 1579 ret = blk_rq_append_bio(req, bio);
1579
1580 blk_queue_bounce(req->q, &bounce_bio);
1581 ret = blk_rq_append_bio(req, bounce_bio);
1582 if (ret) 1580 if (ret)
1583 return ERR_PTR(ret); 1581 return ERR_PTR(ret);
1584 } 1582 }
@@ -1617,7 +1615,6 @@ static int _init_blk_request(struct osd_request *or,
1617 ret = PTR_ERR(req); 1615 ret = PTR_ERR(req);
1618 goto out; 1616 goto out;
1619 } 1617 }
1620 scsi_req_init(req);
1621 or->in.req = or->request->next_rq = req; 1618 or->in.req = or->request->next_rq = req;
1622 } 1619 }
1623 } else if (has_in) 1620 } else if (has_in)
@@ -1914,7 +1911,7 @@ analyze:
1914 /* scsi sense is Empty, the request was never issued to target 1911 /* scsi sense is Empty, the request was never issued to target
1915 * linux return code might tell us what happened. 1912 * linux return code might tell us what happened.
1916 */ 1913 */
1917 if (or->async_error == -ENOMEM) 1914 if (or->async_error == BLK_STS_RESOURCE)
1918 osi->osd_err_pri = OSD_ERR_PRI_RESOURCE; 1915 osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
1919 else 1916 else
1920 osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE; 1917 osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 67cbed92f07d..929ee7e88120 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -320,7 +320,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
320 320
321 321
322/* Wakeup from interrupt */ 322/* Wakeup from interrupt */
323static void osst_end_async(struct request *req, int update) 323static void osst_end_async(struct request *req, blk_status_t status)
324{ 324{
325 struct scsi_request *rq = scsi_req(req); 325 struct scsi_request *rq = scsi_req(req);
326 struct osst_request *SRpnt = req->end_io_data; 326 struct osst_request *SRpnt = req->end_io_data;
@@ -373,7 +373,6 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
373 return DRIVER_ERROR << 24; 373 return DRIVER_ERROR << 24;
374 374
375 rq = scsi_req(req); 375 rq = scsi_req(req);
376 scsi_req_init(req);
377 req->rq_flags |= RQF_QUIET; 376 req->rq_flags |= RQF_QUIET;
378 377
379 SRpnt->bio = NULL; 378 SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..304a7158540f 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1874,7 +1874,7 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
1874 } 1874 }
1875} 1875}
1876 1876
1877static void eh_lock_door_done(struct request *req, int uptodate) 1877static void eh_lock_door_done(struct request *req, blk_status_t status)
1878{ 1878{
1879 __blk_put_request(req->q, req); 1879 __blk_put_request(req->q, req);
1880} 1880}
@@ -1903,7 +1903,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
1903 if (IS_ERR(req)) 1903 if (IS_ERR(req))
1904 return; 1904 return;
1905 rq = scsi_req(req); 1905 rq = scsi_req(req);
1906 scsi_req_init(req);
1907 1906
1908 rq->cmd[0] = ALLOW_MEDIUM_REMOVAL; 1907 rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
1909 rq->cmd[1] = 0; 1908 rq->cmd[1] = 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 99e16ac479e3..550e29f903b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -250,7 +250,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
250 if (IS_ERR(req)) 250 if (IS_ERR(req))
251 return ret; 251 return ret;
252 rq = scsi_req(req); 252 rq = scsi_req(req);
253 scsi_req_init(req);
254 253
255 if (bufflen && blk_rq_map_kern(sdev->request_queue, req, 254 if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
256 buffer, bufflen, __GFP_RECLAIM)) 255 buffer, bufflen, __GFP_RECLAIM))
@@ -635,7 +634,7 @@ static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
635 cmd->request->next_rq->special = NULL; 634 cmd->request->next_rq->special = NULL;
636} 635}
637 636
638static bool scsi_end_request(struct request *req, int error, 637static bool scsi_end_request(struct request *req, blk_status_t error,
639 unsigned int bytes, unsigned int bidi_bytes) 638 unsigned int bytes, unsigned int bidi_bytes)
640{ 639{
641 struct scsi_cmnd *cmd = req->special; 640 struct scsi_cmnd *cmd = req->special;
@@ -694,45 +693,28 @@ static bool scsi_end_request(struct request *req, int error,
694 * @cmd: SCSI command (unused) 693 * @cmd: SCSI command (unused)
695 * @result: scsi error code 694 * @result: scsi error code
696 * 695 *
697 * Translate SCSI error code into standard UNIX errno. 696 * Translate SCSI error code into block errors.
698 * Return values:
699 * -ENOLINK temporary transport failure
700 * -EREMOTEIO permanent target failure, do not retry
701 * -EBADE permanent nexus failure, retry on other path
702 * -ENOSPC No write space available
703 * -ENODATA Medium error
704 * -EIO unspecified I/O error
705 */ 697 */
706static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) 698static blk_status_t __scsi_error_from_host_byte(struct scsi_cmnd *cmd,
699 int result)
707{ 700{
708 int error = 0; 701 switch (host_byte(result)) {
709
710 switch(host_byte(result)) {
711 case DID_TRANSPORT_FAILFAST: 702 case DID_TRANSPORT_FAILFAST:
712 error = -ENOLINK; 703 return BLK_STS_TRANSPORT;
713 break;
714 case DID_TARGET_FAILURE: 704 case DID_TARGET_FAILURE:
715 set_host_byte(cmd, DID_OK); 705 set_host_byte(cmd, DID_OK);
716 error = -EREMOTEIO; 706 return BLK_STS_TARGET;
717 break;
718 case DID_NEXUS_FAILURE: 707 case DID_NEXUS_FAILURE:
719 set_host_byte(cmd, DID_OK); 708 return BLK_STS_NEXUS;
720 error = -EBADE;
721 break;
722 case DID_ALLOC_FAILURE: 709 case DID_ALLOC_FAILURE:
723 set_host_byte(cmd, DID_OK); 710 set_host_byte(cmd, DID_OK);
724 error = -ENOSPC; 711 return BLK_STS_NOSPC;
725 break;
726 case DID_MEDIUM_ERROR: 712 case DID_MEDIUM_ERROR:
727 set_host_byte(cmd, DID_OK); 713 set_host_byte(cmd, DID_OK);
728 error = -ENODATA; 714 return BLK_STS_MEDIUM;
729 break;
730 default: 715 default:
731 error = -EIO; 716 return BLK_STS_IOERR;
732 break;
733 } 717 }
734
735 return error;
736} 718}
737 719
738/* 720/*
@@ -769,7 +751,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
769 int result = cmd->result; 751 int result = cmd->result;
770 struct request_queue *q = cmd->device->request_queue; 752 struct request_queue *q = cmd->device->request_queue;
771 struct request *req = cmd->request; 753 struct request *req = cmd->request;
772 int error = 0; 754 blk_status_t error = BLK_STS_OK;
773 struct scsi_sense_hdr sshdr; 755 struct scsi_sense_hdr sshdr;
774 bool sense_valid = false; 756 bool sense_valid = false;
775 int sense_deferred = 0, level = 0; 757 int sense_deferred = 0, level = 0;
@@ -808,7 +790,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
808 * both sides at once. 790 * both sides at once.
809 */ 791 */
810 scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid; 792 scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
811 if (scsi_end_request(req, 0, blk_rq_bytes(req), 793 if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
812 blk_rq_bytes(req->next_rq))) 794 blk_rq_bytes(req->next_rq)))
813 BUG(); 795 BUG();
814 return; 796 return;
@@ -850,7 +832,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
850 scsi_print_sense(cmd); 832 scsi_print_sense(cmd);
851 result = 0; 833 result = 0;
852 /* for passthrough error may be set */ 834 /* for passthrough error may be set */
853 error = 0; 835 error = BLK_STS_OK;
854 } 836 }
855 837
856 /* 838 /*
@@ -922,18 +904,18 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
922 action = ACTION_REPREP; 904 action = ACTION_REPREP;
923 } else if (sshdr.asc == 0x10) /* DIX */ { 905 } else if (sshdr.asc == 0x10) /* DIX */ {
924 action = ACTION_FAIL; 906 action = ACTION_FAIL;
925 error = -EILSEQ; 907 error = BLK_STS_PROTECTION;
926 /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ 908 /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */
927 } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { 909 } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) {
928 action = ACTION_FAIL; 910 action = ACTION_FAIL;
929 error = -EREMOTEIO; 911 error = BLK_STS_TARGET;
930 } else 912 } else
931 action = ACTION_FAIL; 913 action = ACTION_FAIL;
932 break; 914 break;
933 case ABORTED_COMMAND: 915 case ABORTED_COMMAND:
934 action = ACTION_FAIL; 916 action = ACTION_FAIL;
935 if (sshdr.asc == 0x10) /* DIF */ 917 if (sshdr.asc == 0x10) /* DIF */
936 error = -EILSEQ; 918 error = BLK_STS_PROTECTION;
937 break; 919 break;
938 case NOT_READY: 920 case NOT_READY:
939 /* If the device is in the process of becoming 921 /* If the device is in the process of becoming
@@ -1134,6 +1116,20 @@ err_exit:
1134} 1116}
1135EXPORT_SYMBOL(scsi_init_io); 1117EXPORT_SYMBOL(scsi_init_io);
1136 1118
1119/**
1120 * scsi_initialize_rq - initialize struct scsi_cmnd.req
1121 *
1122 * Called from inside blk_get_request().
1123 */
1124void scsi_initialize_rq(struct request *rq)
1125{
1126 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1127
1128 scsi_req_init(&cmd->req);
1129}
1130EXPORT_SYMBOL(scsi_initialize_rq);
1131
1132/* Called after a request has been started. */
1137void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) 1133void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
1138{ 1134{
1139 void *buf = cmd->sense_buffer; 1135 void *buf = cmd->sense_buffer;
@@ -1829,15 +1825,15 @@ out_delay:
1829 blk_delay_queue(q, SCSI_QUEUE_DELAY); 1825 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1830} 1826}
1831 1827
1832static inline int prep_to_mq(int ret) 1828static inline blk_status_t prep_to_mq(int ret)
1833{ 1829{
1834 switch (ret) { 1830 switch (ret) {
1835 case BLKPREP_OK: 1831 case BLKPREP_OK:
1836 return BLK_MQ_RQ_QUEUE_OK; 1832 return BLK_STS_OK;
1837 case BLKPREP_DEFER: 1833 case BLKPREP_DEFER:
1838 return BLK_MQ_RQ_QUEUE_BUSY; 1834 return BLK_STS_RESOURCE;
1839 default: 1835 default:
1840 return BLK_MQ_RQ_QUEUE_ERROR; 1836 return BLK_STS_IOERR;
1841 } 1837 }
1842} 1838}
1843 1839
@@ -1909,7 +1905,7 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
1909 blk_mq_complete_request(cmd->request); 1905 blk_mq_complete_request(cmd->request);
1910} 1906}
1911 1907
1912static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, 1908static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1913 const struct blk_mq_queue_data *bd) 1909 const struct blk_mq_queue_data *bd)
1914{ 1910{
1915 struct request *req = bd->rq; 1911 struct request *req = bd->rq;
@@ -1917,14 +1913,14 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1917 struct scsi_device *sdev = q->queuedata; 1913 struct scsi_device *sdev = q->queuedata;
1918 struct Scsi_Host *shost = sdev->host; 1914 struct Scsi_Host *shost = sdev->host;
1919 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); 1915 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1920 int ret; 1916 blk_status_t ret;
1921 int reason; 1917 int reason;
1922 1918
1923 ret = prep_to_mq(scsi_prep_state_check(sdev, req)); 1919 ret = prep_to_mq(scsi_prep_state_check(sdev, req));
1924 if (ret != BLK_MQ_RQ_QUEUE_OK) 1920 if (ret != BLK_STS_OK)
1925 goto out; 1921 goto out;
1926 1922
1927 ret = BLK_MQ_RQ_QUEUE_BUSY; 1923 ret = BLK_STS_RESOURCE;
1928 if (!get_device(&sdev->sdev_gendev)) 1924 if (!get_device(&sdev->sdev_gendev))
1929 goto out; 1925 goto out;
1930 1926
@@ -1937,7 +1933,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1937 1933
1938 if (!(req->rq_flags & RQF_DONTPREP)) { 1934 if (!(req->rq_flags & RQF_DONTPREP)) {
1939 ret = prep_to_mq(scsi_mq_prep_fn(req)); 1935 ret = prep_to_mq(scsi_mq_prep_fn(req));
1940 if (ret != BLK_MQ_RQ_QUEUE_OK) 1936 if (ret != BLK_STS_OK)
1941 goto out_dec_host_busy; 1937 goto out_dec_host_busy;
1942 req->rq_flags |= RQF_DONTPREP; 1938 req->rq_flags |= RQF_DONTPREP;
1943 } else { 1939 } else {
@@ -1955,11 +1951,11 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1955 reason = scsi_dispatch_cmd(cmd); 1951 reason = scsi_dispatch_cmd(cmd);
1956 if (reason) { 1952 if (reason) {
1957 scsi_set_blocked(cmd, reason); 1953 scsi_set_blocked(cmd, reason);
1958 ret = BLK_MQ_RQ_QUEUE_BUSY; 1954 ret = BLK_STS_RESOURCE;
1959 goto out_dec_host_busy; 1955 goto out_dec_host_busy;
1960 } 1956 }
1961 1957
1962 return BLK_MQ_RQ_QUEUE_OK; 1958 return BLK_STS_OK;
1963 1959
1964out_dec_host_busy: 1960out_dec_host_busy:
1965 atomic_dec(&shost->host_busy); 1961 atomic_dec(&shost->host_busy);
@@ -1972,12 +1968,14 @@ out_put_device:
1972 put_device(&sdev->sdev_gendev); 1968 put_device(&sdev->sdev_gendev);
1973out: 1969out:
1974 switch (ret) { 1970 switch (ret) {
1975 case BLK_MQ_RQ_QUEUE_BUSY: 1971 case BLK_STS_OK:
1972 break;
1973 case BLK_STS_RESOURCE:
1976 if (atomic_read(&sdev->device_busy) == 0 && 1974 if (atomic_read(&sdev->device_busy) == 0 &&
1977 !scsi_device_blocked(sdev)) 1975 !scsi_device_blocked(sdev))
1978 blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); 1976 blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
1979 break; 1977 break;
1980 case BLK_MQ_RQ_QUEUE_ERROR: 1978 default:
1981 /* 1979 /*
1982 * Make sure to release all allocated ressources when 1980 * Make sure to release all allocated ressources when
1983 * we hit an error, as we will never see this command 1981 * we hit an error, as we will never see this command
@@ -1986,8 +1984,6 @@ out:
1986 if (req->rq_flags & RQF_DONTPREP) 1984 if (req->rq_flags & RQF_DONTPREP)
1987 scsi_mq_uninit_cmd(cmd); 1985 scsi_mq_uninit_cmd(cmd);
1988 break; 1986 break;
1989 default:
1990 break;
1991 } 1987 }
1992 return ret; 1988 return ret;
1993} 1989}
@@ -2057,6 +2053,8 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
2057{ 2053{
2058 struct device *dev = shost->dma_dev; 2054 struct device *dev = shost->dma_dev;
2059 2055
2056 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
2057
2060 /* 2058 /*
2061 * this limit is imposed by hardware restrictions 2059 * this limit is imposed by hardware restrictions
2062 */ 2060 */
@@ -2139,6 +2137,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
2139 q->request_fn = scsi_request_fn; 2137 q->request_fn = scsi_request_fn;
2140 q->init_rq_fn = scsi_init_rq; 2138 q->init_rq_fn = scsi_init_rq;
2141 q->exit_rq_fn = scsi_exit_rq; 2139 q->exit_rq_fn = scsi_exit_rq;
2140 q->initialize_rq_fn = scsi_initialize_rq;
2142 2141
2143 if (blk_init_allocated_queue(q) < 0) { 2142 if (blk_init_allocated_queue(q) < 0) {
2144 blk_cleanup_queue(q); 2143 blk_cleanup_queue(q);
@@ -2163,6 +2162,7 @@ static const struct blk_mq_ops scsi_mq_ops = {
2163#endif 2162#endif
2164 .init_request = scsi_init_request, 2163 .init_request = scsi_init_request,
2165 .exit_request = scsi_exit_request, 2164 .exit_request = scsi_exit_request,
2165 .initialize_rq_fn = scsi_initialize_rq,
2166 .map_queues = scsi_map_queues, 2166 .map_queues = scsi_map_queues,
2167}; 2167};
2168 2168
@@ -2977,7 +2977,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
2977 if (wait) 2977 if (wait)
2978 blk_mq_quiesce_queue(q); 2978 blk_mq_quiesce_queue(q);
2979 else 2979 else
2980 blk_mq_stop_hw_queues(q); 2980 blk_mq_quiesce_queue_nowait(q);
2981 } else { 2981 } else {
2982 spin_lock_irqsave(q->queue_lock, flags); 2982 spin_lock_irqsave(q->queue_lock, flags);
2983 blk_stop_queue(q); 2983 blk_stop_queue(q);
@@ -3031,7 +3031,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
3031 return -EINVAL; 3031 return -EINVAL;
3032 3032
3033 if (q->mq_ops) { 3033 if (q->mq_ops) {
3034 blk_mq_start_stopped_hw_queues(q, false); 3034 blk_mq_unquiesce_queue(q);
3035 } else { 3035 } else {
3036 spin_lock_irqsave(q->queue_lock, flags); 3036 spin_lock_irqsave(q->queue_lock, flags);
3037 blk_start_queue(q); 3037 blk_start_queue(q);
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 0ebe2f1bb908..5006a656e16a 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
33#include <linux/bsg.h> 33#include <linux/bsg.h>
34 34
35#include <scsi/scsi.h> 35#include <scsi/scsi.h>
36#include <scsi/scsi_cmnd.h>
36#include <scsi/scsi_request.h> 37#include <scsi/scsi_request.h>
37#include <scsi/scsi_device.h> 38#include <scsi/scsi_device.h>
38#include <scsi/scsi_host.h> 39#include <scsi/scsi_host.h>
@@ -172,7 +173,7 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
172 struct sas_rphy *rphy) 173 struct sas_rphy *rphy)
173{ 174{
174 struct request *req; 175 struct request *req;
175 int ret; 176 blk_status_t ret;
176 int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *); 177 int (*handler)(struct Scsi_Host *, struct sas_rphy *, struct request *);
177 178
178 while ((req = blk_fetch_request(q)) != NULL) { 179 while ((req = blk_fetch_request(q)) != NULL) {
@@ -230,6 +231,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
230 q = blk_alloc_queue(GFP_KERNEL); 231 q = blk_alloc_queue(GFP_KERNEL);
231 if (!q) 232 if (!q)
232 return -ENOMEM; 233 return -ENOMEM;
234 q->initialize_rq_fn = scsi_initialize_rq;
233 q->cmd_size = sizeof(struct scsi_request); 235 q->cmd_size = sizeof(struct scsi_request);
234 236
235 if (rphy) { 237 if (rphy) {
@@ -249,6 +251,11 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
249 if (error) 251 if (error)
250 goto out_cleanup_queue; 252 goto out_cleanup_queue;
251 253
254 /*
255 * by default assume old behaviour and bounce for any highmem page
256 */
257 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
258
252 error = bsg_register_queue(q, dev, name, release); 259 error = bsg_register_queue(q, dev, name, release);
253 if (error) 260 if (error)
254 goto out_cleanup_queue; 261 goto out_cleanup_queue;
@@ -264,6 +271,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
264 q->queuedata = shost; 271 q->queuedata = shost;
265 272
266 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); 273 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
274 queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
267 return 0; 275 return 0;
268 276
269out_cleanup_queue: 277out_cleanup_queue:
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 82c33a6edbea..21225d62b0c1 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
177} Sg_device; 177} Sg_device;
178 178
179/* tasklet or soft irq callback */ 179/* tasklet or soft irq callback */
180static void sg_rq_end_io(struct request *rq, int uptodate); 180static void sg_rq_end_io(struct request *rq, blk_status_t status);
181static int sg_start_req(Sg_request *srp, unsigned char *cmd); 181static int sg_start_req(Sg_request *srp, unsigned char *cmd);
182static int sg_finish_rem_req(Sg_request * srp); 182static int sg_finish_rem_req(Sg_request * srp);
183static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); 183static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
@@ -808,7 +808,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
808 if (atomic_read(&sdp->detaching)) { 808 if (atomic_read(&sdp->detaching)) {
809 if (srp->bio) { 809 if (srp->bio) {
810 scsi_req_free_cmd(scsi_req(srp->rq)); 810 scsi_req_free_cmd(scsi_req(srp->rq));
811 blk_end_request_all(srp->rq, -EIO); 811 blk_end_request_all(srp->rq, BLK_STS_IOERR);
812 srp->rq = NULL; 812 srp->rq = NULL;
813 } 813 }
814 814
@@ -1300,7 +1300,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
1300 * level when a command is completed (or has failed). 1300 * level when a command is completed (or has failed).
1301 */ 1301 */
1302static void 1302static void
1303sg_rq_end_io(struct request *rq, int uptodate) 1303sg_rq_end_io(struct request *rq, blk_status_t status)
1304{ 1304{
1305 struct sg_request *srp = rq->end_io_data; 1305 struct sg_request *srp = rq->end_io_data;
1306 struct scsi_request *req = scsi_req(rq); 1306 struct scsi_request *req = scsi_req(rq);
@@ -1732,8 +1732,6 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1732 } 1732 }
1733 req = scsi_req(rq); 1733 req = scsi_req(rq);
1734 1734
1735 scsi_req_init(rq);
1736
1737 if (hp->cmd_len > BLK_MAX_CDB) 1735 if (hp->cmd_len > BLK_MAX_CDB)
1738 req->cmd = long_cmdp; 1736 req->cmd = long_cmdp;
1739 memcpy(req->cmd, cmd, hp->cmd_len); 1737 memcpy(req->cmd, cmd, hp->cmd_len);
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1ea34d6f5437..8e5013d9cad4 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -511,7 +511,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
511 atomic64_dec(&STp->stats->in_flight); 511 atomic64_dec(&STp->stats->in_flight);
512} 512}
513 513
514static void st_scsi_execute_end(struct request *req, int uptodate) 514static void st_scsi_execute_end(struct request *req, blk_status_t status)
515{ 515{
516 struct st_request *SRpnt = req->end_io_data; 516 struct st_request *SRpnt = req->end_io_data;
517 struct scsi_request *rq = scsi_req(req); 517 struct scsi_request *rq = scsi_req(req);
@@ -549,7 +549,6 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
549 if (IS_ERR(req)) 549 if (IS_ERR(req))
550 return DRIVER_ERROR << 24; 550 return DRIVER_ERROR << 24;
551 rq = scsi_req(req); 551 rq = scsi_req(req);
552 scsi_req_init(req);
553 req->rq_flags |= RQF_QUIET; 552 req->rq_flags |= RQF_QUIET;
554 553
555 mdata->null_mapped = 1; 554 mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index bb069ebe4aa6..c05d38016556 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -93,7 +93,7 @@ static int iblock_configure_device(struct se_device *dev)
93 return -EINVAL; 93 return -EINVAL;
94 } 94 }
95 95
96 ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0); 96 ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
97 if (!ib_dev->ibd_bio_set) { 97 if (!ib_dev->ibd_bio_set) {
98 pr_err("IBLOCK: Unable to create bioset\n"); 98 pr_err("IBLOCK: Unable to create bioset\n");
99 goto out; 99 goto out;
@@ -296,8 +296,8 @@ static void iblock_bio_done(struct bio *bio)
296 struct se_cmd *cmd = bio->bi_private; 296 struct se_cmd *cmd = bio->bi_private;
297 struct iblock_req *ibr = cmd->priv; 297 struct iblock_req *ibr = cmd->priv;
298 298
299 if (bio->bi_error) { 299 if (bio->bi_status) {
300 pr_err("bio error: %p, err: %d\n", bio, bio->bi_error); 300 pr_err("bio error: %p, err: %d\n", bio, bio->bi_status);
301 /* 301 /*
302 * Bump the ib_bio_err_cnt and release bio. 302 * Bump the ib_bio_err_cnt and release bio.
303 */ 303 */
@@ -354,11 +354,11 @@ static void iblock_end_io_flush(struct bio *bio)
354{ 354{
355 struct se_cmd *cmd = bio->bi_private; 355 struct se_cmd *cmd = bio->bi_private;
356 356
357 if (bio->bi_error) 357 if (bio->bi_status)
358 pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error); 358 pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_status);
359 359
360 if (cmd) { 360 if (cmd) {
361 if (bio->bi_error) 361 if (bio->bi_status)
362 target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION); 362 target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION);
363 else 363 else
364 target_complete_cmd(cmd, SAM_STAT_GOOD); 364 target_complete_cmd(cmd, SAM_STAT_GOOD);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 3e4abb13f8ea..ceec0211e84e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -55,7 +55,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
55} 55}
56 56
57static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); 57static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
58static void pscsi_req_done(struct request *, int); 58static void pscsi_req_done(struct request *, blk_status_t);
59 59
60/* pscsi_attach_hba(): 60/* pscsi_attach_hba():
61 * 61 *
@@ -992,8 +992,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
992 goto fail; 992 goto fail;
993 } 993 }
994 994
995 scsi_req_init(req);
996
997 if (sgl) { 995 if (sgl) {
998 ret = pscsi_map_sg(cmd, sgl, sgl_nents, req); 996 ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
999 if (ret) 997 if (ret)
@@ -1045,7 +1043,7 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
1045 return 0; 1043 return 0;
1046} 1044}
1047 1045
1048static void pscsi_req_done(struct request *req, int uptodate) 1046static void pscsi_req_done(struct request *req, blk_status_t status)
1049{ 1047{
1050 struct se_cmd *cmd = req->end_io_data; 1048 struct se_cmd *cmd = req->end_io_data;
1051 struct pscsi_plugin_task *pt = cmd->priv; 1049 struct pscsi_plugin_task *pt = cmd->priv;
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..dcad3a66748c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1541,7 +1541,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1541 ssize_t ret; 1541 ssize_t ret;
1542 1542
1543 /* enforce forwards compatibility on users */ 1543 /* enforce forwards compatibility on users */
1544 if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) { 1544 if (unlikely(iocb->aio_reserved2)) {
1545 pr_debug("EINVAL: reserve field set\n"); 1545 pr_debug("EINVAL: reserve field set\n");
1546 return -EINVAL; 1546 return -EINVAL;
1547 } 1547 }
@@ -1568,6 +1568,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1568 req->common.ki_pos = iocb->aio_offset; 1568 req->common.ki_pos = iocb->aio_offset;
1569 req->common.ki_complete = aio_complete; 1569 req->common.ki_complete = aio_complete;
1570 req->common.ki_flags = iocb_flags(req->common.ki_filp); 1570 req->common.ki_flags = iocb_flags(req->common.ki_filp);
1571 req->common.ki_hint = file_write_hint(file);
1571 1572
1572 if (iocb->aio_flags & IOCB_FLAG_RESFD) { 1573 if (iocb->aio_flags & IOCB_FLAG_RESFD) {
1573 /* 1574 /*
@@ -1586,6 +1587,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1586 req->common.ki_flags |= IOCB_EVENTFD; 1587 req->common.ki_flags |= IOCB_EVENTFD;
1587 } 1588 }
1588 1589
1590 ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
1591 if (unlikely(ret)) {
1592 pr_debug("EINVAL: aio_rw_flags\n");
1593 goto out_put_req;
1594 }
1595
1596 if ((req->common.ki_flags & IOCB_NOWAIT) &&
1597 !(req->common.ki_flags & IOCB_DIRECT)) {
1598 ret = -EOPNOTSUPP;
1599 goto out_put_req;
1600 }
1601
1589 ret = put_user(KIOCB_KEY, &user_iocb->aio_key); 1602 ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
1590 if (unlikely(ret)) { 1603 if (unlikely(ret)) {
1591 pr_debug("EFAULT: aio_key\n"); 1604 pr_debug("EFAULT: aio_key\n");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0a7404ef9335..a7df151f8aba 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -225,6 +225,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
225 bio_init(&bio, vecs, nr_pages); 225 bio_init(&bio, vecs, nr_pages);
226 bio.bi_bdev = bdev; 226 bio.bi_bdev = bdev;
227 bio.bi_iter.bi_sector = pos >> 9; 227 bio.bi_iter.bi_sector = pos >> 9;
228 bio.bi_write_hint = iocb->ki_hint;
228 bio.bi_private = current; 229 bio.bi_private = current;
229 bio.bi_end_io = blkdev_bio_end_io_simple; 230 bio.bi_end_io = blkdev_bio_end_io_simple;
230 231
@@ -262,8 +263,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
262 if (vecs != inline_vecs) 263 if (vecs != inline_vecs)
263 kfree(vecs); 264 kfree(vecs);
264 265
265 if (unlikely(bio.bi_error)) 266 if (unlikely(bio.bi_status))
266 ret = bio.bi_error; 267 ret = blk_status_to_errno(bio.bi_status);
267 268
268 bio_uninit(&bio); 269 bio_uninit(&bio);
269 270
@@ -291,16 +292,18 @@ static void blkdev_bio_end_io(struct bio *bio)
291 bool should_dirty = dio->should_dirty; 292 bool should_dirty = dio->should_dirty;
292 293
293 if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { 294 if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
294 if (bio->bi_error && !dio->bio.bi_error) 295 if (bio->bi_status && !dio->bio.bi_status)
295 dio->bio.bi_error = bio->bi_error; 296 dio->bio.bi_status = bio->bi_status;
296 } else { 297 } else {
297 if (!dio->is_sync) { 298 if (!dio->is_sync) {
298 struct kiocb *iocb = dio->iocb; 299 struct kiocb *iocb = dio->iocb;
299 ssize_t ret = dio->bio.bi_error; 300 ssize_t ret;
300 301
301 if (likely(!ret)) { 302 if (likely(!dio->bio.bi_status)) {
302 ret = dio->size; 303 ret = dio->size;
303 iocb->ki_pos += ret; 304 iocb->ki_pos += ret;
305 } else {
306 ret = blk_status_to_errno(dio->bio.bi_status);
304 } 307 }
305 308
306 dio->iocb->ki_complete(iocb, ret, 0); 309 dio->iocb->ki_complete(iocb, ret, 0);
@@ -337,7 +340,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
337 bool is_read = (iov_iter_rw(iter) == READ), is_sync; 340 bool is_read = (iov_iter_rw(iter) == READ), is_sync;
338 loff_t pos = iocb->ki_pos; 341 loff_t pos = iocb->ki_pos;
339 blk_qc_t qc = BLK_QC_T_NONE; 342 blk_qc_t qc = BLK_QC_T_NONE;
340 int ret; 343 int ret = 0;
341 344
342 if ((pos | iov_iter_alignment(iter)) & 345 if ((pos | iov_iter_alignment(iter)) &
343 (bdev_logical_block_size(bdev) - 1)) 346 (bdev_logical_block_size(bdev) - 1))
@@ -361,12 +364,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
361 for (;;) { 364 for (;;) {
362 bio->bi_bdev = bdev; 365 bio->bi_bdev = bdev;
363 bio->bi_iter.bi_sector = pos >> 9; 366 bio->bi_iter.bi_sector = pos >> 9;
367 bio->bi_write_hint = iocb->ki_hint;
364 bio->bi_private = dio; 368 bio->bi_private = dio;
365 bio->bi_end_io = blkdev_bio_end_io; 369 bio->bi_end_io = blkdev_bio_end_io;
366 370
367 ret = bio_iov_iter_get_pages(bio, iter); 371 ret = bio_iov_iter_get_pages(bio, iter);
368 if (unlikely(ret)) { 372 if (unlikely(ret)) {
369 bio->bi_error = ret; 373 bio->bi_status = BLK_STS_IOERR;
370 bio_endio(bio); 374 bio_endio(bio);
371 break; 375 break;
372 } 376 }
@@ -415,7 +419,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
415 } 419 }
416 __set_current_state(TASK_RUNNING); 420 __set_current_state(TASK_RUNNING);
417 421
418 ret = dio->bio.bi_error; 422 if (!ret)
423 ret = blk_status_to_errno(dio->bio.bi_status);
419 if (likely(!ret)) 424 if (likely(!ret))
420 ret = dio->size; 425 ret = dio->size;
421 426
@@ -439,7 +444,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
439 444
440static __init int blkdev_init(void) 445static __init int blkdev_init(void)
441{ 446{
442 blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio)); 447 blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
443 if (!blkdev_dio_pool) 448 if (!blkdev_dio_pool)
444 return -ENOMEM; 449 return -ENOMEM;
445 return 0; 450 return 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b8622e4d1744..d87ac27a5f2b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -310,7 +310,8 @@ struct btrfs_dio_private {
310 * The original bio may be split to several sub-bios, this is 310 * The original bio may be split to several sub-bios, this is
311 * done during endio of sub-bios 311 * done during endio of sub-bios
312 */ 312 */
313 int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); 313 blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
314 blk_status_t);
314}; 315};
315 316
316/* 317/*
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ab14c2e635ca..4ded1c3f92b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2129,7 +2129,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
2129 /* mutex is not held! This is not save if IO is not yet completed 2129 /* mutex is not held! This is not save if IO is not yet completed
2130 * on umount */ 2130 * on umount */
2131 iodone_w_error = 0; 2131 iodone_w_error = 0;
2132 if (bp->bi_error) 2132 if (bp->bi_status)
2133 iodone_w_error = 1; 2133 iodone_w_error = 1;
2134 2134
2135 BUG_ON(NULL == block); 2135 BUG_ON(NULL == block);
@@ -2143,7 +2143,7 @@ static void btrfsic_bio_end_io(struct bio *bp)
2143 if ((dev_state->state->print_mask & 2143 if ((dev_state->state->print_mask &
2144 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2144 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2145 pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2145 pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2146 bp->bi_error, 2146 bp->bi_status,
2147 btrfsic_get_block_type(dev_state->state, block), 2147 btrfsic_get_block_type(dev_state->state, block),
2148 block->logical_bytenr, dev_state->name, 2148 block->logical_bytenr, dev_state->name,
2149 block->dev_bytenr, block->mirror_num); 2149 block->dev_bytenr, block->mirror_num);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b282d09d..a2fad39f79ba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -155,7 +155,7 @@ static void end_compressed_bio_read(struct bio *bio)
155 unsigned long index; 155 unsigned long index;
156 int ret; 156 int ret;
157 157
158 if (bio->bi_error) 158 if (bio->bi_status)
159 cb->errors = 1; 159 cb->errors = 1;
160 160
161 /* if there are more bios still pending for this compressed 161 /* if there are more bios still pending for this compressed
@@ -268,7 +268,7 @@ static void end_compressed_bio_write(struct bio *bio)
268 struct page *page; 268 struct page *page;
269 unsigned long index; 269 unsigned long index;
270 270
271 if (bio->bi_error) 271 if (bio->bi_status)
272 cb->errors = 1; 272 cb->errors = 1;
273 273
274 /* if there are more bios still pending for this compressed 274 /* if there are more bios still pending for this compressed
@@ -287,7 +287,7 @@ static void end_compressed_bio_write(struct bio *bio)
287 cb->start, 287 cb->start,
288 cb->start + cb->len - 1, 288 cb->start + cb->len - 1,
289 NULL, 289 NULL,
290 bio->bi_error ? 0 : 1); 290 bio->bi_status ? 0 : 1);
291 cb->compressed_pages[0]->mapping = NULL; 291 cb->compressed_pages[0]->mapping = NULL;
292 292
293 end_compressed_writeback(inode, cb); 293 end_compressed_writeback(inode, cb);
@@ -320,7 +320,7 @@ out:
320 * This also checksums the file bytes and gets things ready for 320 * This also checksums the file bytes and gets things ready for
321 * the end io hooks. 321 * the end io hooks.
322 */ 322 */
323int btrfs_submit_compressed_write(struct inode *inode, u64 start, 323blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
324 unsigned long len, u64 disk_start, 324 unsigned long len, u64 disk_start,
325 unsigned long compressed_len, 325 unsigned long compressed_len,
326 struct page **compressed_pages, 326 struct page **compressed_pages,
@@ -335,13 +335,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
335 struct page *page; 335 struct page *page;
336 u64 first_byte = disk_start; 336 u64 first_byte = disk_start;
337 struct block_device *bdev; 337 struct block_device *bdev;
338 int ret; 338 blk_status_t ret;
339 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 339 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
340 340
341 WARN_ON(start & ((u64)PAGE_SIZE - 1)); 341 WARN_ON(start & ((u64)PAGE_SIZE - 1));
342 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); 342 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
343 if (!cb) 343 if (!cb)
344 return -ENOMEM; 344 return BLK_STS_RESOURCE;
345 refcount_set(&cb->pending_bios, 0); 345 refcount_set(&cb->pending_bios, 0);
346 cb->errors = 0; 346 cb->errors = 0;
347 cb->inode = inode; 347 cb->inode = inode;
@@ -358,7 +358,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
358 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); 358 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
359 if (!bio) { 359 if (!bio) {
360 kfree(cb); 360 kfree(cb);
361 return -ENOMEM; 361 return BLK_STS_RESOURCE;
362 } 362 }
363 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 363 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
364 bio->bi_private = cb; 364 bio->bi_private = cb;
@@ -368,17 +368,17 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
368 /* create and submit bios for the compressed pages */ 368 /* create and submit bios for the compressed pages */
369 bytes_left = compressed_len; 369 bytes_left = compressed_len;
370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { 370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
371 int submit = 0;
372
371 page = compressed_pages[pg_index]; 373 page = compressed_pages[pg_index];
372 page->mapping = inode->i_mapping; 374 page->mapping = inode->i_mapping;
373 if (bio->bi_iter.bi_size) 375 if (bio->bi_iter.bi_size)
374 ret = io_tree->ops->merge_bio_hook(page, 0, 376 submit = io_tree->ops->merge_bio_hook(page, 0,
375 PAGE_SIZE, 377 PAGE_SIZE,
376 bio, 0); 378 bio, 0);
377 else
378 ret = 0;
379 379
380 page->mapping = NULL; 380 page->mapping = NULL;
381 if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) < 381 if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
382 PAGE_SIZE) { 382 PAGE_SIZE) {
383 bio_get(bio); 383 bio_get(bio);
384 384
@@ -400,7 +400,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
400 400
401 ret = btrfs_map_bio(fs_info, bio, 0, 1); 401 ret = btrfs_map_bio(fs_info, bio, 0, 1);
402 if (ret) { 402 if (ret) {
403 bio->bi_error = ret; 403 bio->bi_status = ret;
404 bio_endio(bio); 404 bio_endio(bio);
405 } 405 }
406 406
@@ -434,7 +434,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
434 434
435 ret = btrfs_map_bio(fs_info, bio, 0, 1); 435 ret = btrfs_map_bio(fs_info, bio, 0, 1);
436 if (ret) { 436 if (ret) {
437 bio->bi_error = ret; 437 bio->bi_status = ret;
438 bio_endio(bio); 438 bio_endio(bio);
439 } 439 }
440 440
@@ -569,7 +569,7 @@ next:
569 * After the compressed pages are read, we copy the bytes into the 569 * After the compressed pages are read, we copy the bytes into the
570 * bio we were passed and then call the bio end_io calls 570 * bio we were passed and then call the bio end_io calls
571 */ 571 */
572int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 572blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
573 int mirror_num, unsigned long bio_flags) 573 int mirror_num, unsigned long bio_flags)
574{ 574{
575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -586,7 +586,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
586 u64 em_len; 586 u64 em_len;
587 u64 em_start; 587 u64 em_start;
588 struct extent_map *em; 588 struct extent_map *em;
589 int ret = -ENOMEM; 589 blk_status_t ret = BLK_STS_RESOURCE;
590 int faili = 0; 590 int faili = 0;
591 u32 *sums; 591 u32 *sums;
592 592
@@ -600,7 +600,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
600 PAGE_SIZE); 600 PAGE_SIZE);
601 read_unlock(&em_tree->lock); 601 read_unlock(&em_tree->lock);
602 if (!em) 602 if (!em)
603 return -EIO; 603 return BLK_STS_IOERR;
604 604
605 compressed_len = em->block_len; 605 compressed_len = em->block_len;
606 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); 606 cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -638,7 +638,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
638 __GFP_HIGHMEM); 638 __GFP_HIGHMEM);
639 if (!cb->compressed_pages[pg_index]) { 639 if (!cb->compressed_pages[pg_index]) {
640 faili = pg_index - 1; 640 faili = pg_index - 1;
641 ret = -ENOMEM; 641 ret = BLK_STS_RESOURCE;
642 goto fail2; 642 goto fail2;
643 } 643 }
644 } 644 }
@@ -659,19 +659,19 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
659 refcount_set(&cb->pending_bios, 1); 659 refcount_set(&cb->pending_bios, 1);
660 660
661 for (pg_index = 0; pg_index < nr_pages; pg_index++) { 661 for (pg_index = 0; pg_index < nr_pages; pg_index++) {
662 int submit = 0;
663
662 page = cb->compressed_pages[pg_index]; 664 page = cb->compressed_pages[pg_index];
663 page->mapping = inode->i_mapping; 665 page->mapping = inode->i_mapping;
664 page->index = em_start >> PAGE_SHIFT; 666 page->index = em_start >> PAGE_SHIFT;
665 667
666 if (comp_bio->bi_iter.bi_size) 668 if (comp_bio->bi_iter.bi_size)
667 ret = tree->ops->merge_bio_hook(page, 0, 669 submit = tree->ops->merge_bio_hook(page, 0,
668 PAGE_SIZE, 670 PAGE_SIZE,
669 comp_bio, 0); 671 comp_bio, 0);
670 else
671 ret = 0;
672 672
673 page->mapping = NULL; 673 page->mapping = NULL;
674 if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < 674 if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
675 PAGE_SIZE) { 675 PAGE_SIZE) {
676 bio_get(comp_bio); 676 bio_get(comp_bio);
677 677
@@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
697 697
698 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); 698 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
699 if (ret) { 699 if (ret) {
700 comp_bio->bi_error = ret; 700 comp_bio->bi_status = ret;
701 bio_endio(comp_bio); 701 bio_endio(comp_bio);
702 } 702 }
703 703
@@ -726,7 +726,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
726 726
727 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0); 727 ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
728 if (ret) { 728 if (ret) {
729 comp_bio->bi_error = ret; 729 comp_bio->bi_status = ret;
730 bio_endio(comp_bio); 730 bio_endio(comp_bio);
731 } 731 }
732 732
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43ab8df1..680d4265d601 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,12 +48,12 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
48 unsigned long total_out, u64 disk_start, 48 unsigned long total_out, u64 disk_start,
49 struct bio *bio); 49 struct bio *bio);
50 50
51int btrfs_submit_compressed_write(struct inode *inode, u64 start, 51blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
52 unsigned long len, u64 disk_start, 52 unsigned long len, u64 disk_start,
53 unsigned long compressed_len, 53 unsigned long compressed_len,
54 struct page **compressed_pages, 54 struct page **compressed_pages,
55 unsigned long nr_pages); 55 unsigned long nr_pages);
56int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, 56blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
57 int mirror_num, unsigned long bio_flags); 57 int mirror_num, unsigned long bio_flags);
58 58
59enum btrfs_compression_type { 59enum btrfs_compression_type {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4f8f75d9e839..a0d0c79d95ed 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3078,8 +3078,8 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
3078struct btrfs_dio_private; 3078struct btrfs_dio_private;
3079int btrfs_del_csums(struct btrfs_trans_handle *trans, 3079int btrfs_del_csums(struct btrfs_trans_handle *trans,
3080 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); 3080 struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
3081int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst); 3081blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
3082int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, 3082blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
3083 u64 logical_offset); 3083 u64 logical_offset);
3084int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3084int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3085 struct btrfs_root *root, 3085 struct btrfs_root *root,
@@ -3094,7 +3094,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
3094int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 3094int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
3095 struct btrfs_root *root, 3095 struct btrfs_root *root,
3096 struct btrfs_ordered_sum *sums); 3096 struct btrfs_ordered_sum *sums);
3097int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, 3097blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
3098 u64 file_start, int contig); 3098 u64 file_start, int contig);
3099int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 3099int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
3100 struct list_head *list, int search_commit); 3100 struct list_head *list, int search_commit);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f678dcb20e6..6036d15b47b8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -87,7 +87,7 @@ struct btrfs_end_io_wq {
87 bio_end_io_t *end_io; 87 bio_end_io_t *end_io;
88 void *private; 88 void *private;
89 struct btrfs_fs_info *info; 89 struct btrfs_fs_info *info;
90 int error; 90 blk_status_t status;
91 enum btrfs_wq_endio_type metadata; 91 enum btrfs_wq_endio_type metadata;
92 struct list_head list; 92 struct list_head list;
93 struct btrfs_work work; 93 struct btrfs_work work;
@@ -131,7 +131,7 @@ struct async_submit_bio {
131 */ 131 */
132 u64 bio_offset; 132 u64 bio_offset;
133 struct btrfs_work work; 133 struct btrfs_work work;
134 int error; 134 blk_status_t status;
135}; 135};
136 136
137/* 137/*
@@ -799,7 +799,7 @@ static void end_workqueue_bio(struct bio *bio)
799 btrfs_work_func_t func; 799 btrfs_work_func_t func;
800 800
801 fs_info = end_io_wq->info; 801 fs_info = end_io_wq->info;
802 end_io_wq->error = bio->bi_error; 802 end_io_wq->status = bio->bi_status;
803 803
804 if (bio_op(bio) == REQ_OP_WRITE) { 804 if (bio_op(bio) == REQ_OP_WRITE) {
805 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { 805 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@ -836,19 +836,19 @@ static void end_workqueue_bio(struct bio *bio)
836 btrfs_queue_work(wq, &end_io_wq->work); 836 btrfs_queue_work(wq, &end_io_wq->work);
837} 837}
838 838
839int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 839blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
840 enum btrfs_wq_endio_type metadata) 840 enum btrfs_wq_endio_type metadata)
841{ 841{
842 struct btrfs_end_io_wq *end_io_wq; 842 struct btrfs_end_io_wq *end_io_wq;
843 843
844 end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); 844 end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS);
845 if (!end_io_wq) 845 if (!end_io_wq)
846 return -ENOMEM; 846 return BLK_STS_RESOURCE;
847 847
848 end_io_wq->private = bio->bi_private; 848 end_io_wq->private = bio->bi_private;
849 end_io_wq->end_io = bio->bi_end_io; 849 end_io_wq->end_io = bio->bi_end_io;
850 end_io_wq->info = info; 850 end_io_wq->info = info;
851 end_io_wq->error = 0; 851 end_io_wq->status = 0;
852 end_io_wq->bio = bio; 852 end_io_wq->bio = bio;
853 end_io_wq->metadata = metadata; 853 end_io_wq->metadata = metadata;
854 854
@@ -868,14 +868,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
868static void run_one_async_start(struct btrfs_work *work) 868static void run_one_async_start(struct btrfs_work *work)
869{ 869{
870 struct async_submit_bio *async; 870 struct async_submit_bio *async;
871 int ret; 871 blk_status_t ret;
872 872
873 async = container_of(work, struct async_submit_bio, work); 873 async = container_of(work, struct async_submit_bio, work);
874 ret = async->submit_bio_start(async->inode, async->bio, 874 ret = async->submit_bio_start(async->inode, async->bio,
875 async->mirror_num, async->bio_flags, 875 async->mirror_num, async->bio_flags,
876 async->bio_offset); 876 async->bio_offset);
877 if (ret) 877 if (ret)
878 async->error = ret; 878 async->status = ret;
879} 879}
880 880
881static void run_one_async_done(struct btrfs_work *work) 881static void run_one_async_done(struct btrfs_work *work)
@@ -898,8 +898,8 @@ static void run_one_async_done(struct btrfs_work *work)
898 wake_up(&fs_info->async_submit_wait); 898 wake_up(&fs_info->async_submit_wait);
899 899
900 /* If an error occurred we just want to clean up the bio and move on */ 900 /* If an error occurred we just want to clean up the bio and move on */
901 if (async->error) { 901 if (async->status) {
902 async->bio->bi_error = async->error; 902 async->bio->bi_status = async->status;
903 bio_endio(async->bio); 903 bio_endio(async->bio);
904 return; 904 return;
905 } 905 }
@@ -916,18 +916,17 @@ static void run_one_async_free(struct btrfs_work *work)
916 kfree(async); 916 kfree(async);
917} 917}
918 918
919int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 919blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
920 struct bio *bio, int mirror_num, 920 struct inode *inode, struct bio *bio, int mirror_num,
921 unsigned long bio_flags, 921 unsigned long bio_flags, u64 bio_offset,
922 u64 bio_offset, 922 extent_submit_bio_hook_t *submit_bio_start,
923 extent_submit_bio_hook_t *submit_bio_start, 923 extent_submit_bio_hook_t *submit_bio_done)
924 extent_submit_bio_hook_t *submit_bio_done)
925{ 924{
926 struct async_submit_bio *async; 925 struct async_submit_bio *async;
927 926
928 async = kmalloc(sizeof(*async), GFP_NOFS); 927 async = kmalloc(sizeof(*async), GFP_NOFS);
929 if (!async) 928 if (!async)
930 return -ENOMEM; 929 return BLK_STS_RESOURCE;
931 930
932 async->inode = inode; 931 async->inode = inode;
933 async->bio = bio; 932 async->bio = bio;
@@ -941,7 +940,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
941 async->bio_flags = bio_flags; 940 async->bio_flags = bio_flags;
942 async->bio_offset = bio_offset; 941 async->bio_offset = bio_offset;
943 942
944 async->error = 0; 943 async->status = 0;
945 944
946 atomic_inc(&fs_info->nr_async_submits); 945 atomic_inc(&fs_info->nr_async_submits);
947 946
@@ -959,7 +958,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
959 return 0; 958 return 0;
960} 959}
961 960
962static int btree_csum_one_bio(struct bio *bio) 961static blk_status_t btree_csum_one_bio(struct bio *bio)
963{ 962{
964 struct bio_vec *bvec; 963 struct bio_vec *bvec;
965 struct btrfs_root *root; 964 struct btrfs_root *root;
@@ -972,12 +971,12 @@ static int btree_csum_one_bio(struct bio *bio)
972 break; 971 break;
973 } 972 }
974 973
975 return ret; 974 return errno_to_blk_status(ret);
976} 975}
977 976
978static int __btree_submit_bio_start(struct inode *inode, struct bio *bio, 977static blk_status_t __btree_submit_bio_start(struct inode *inode,
979 int mirror_num, unsigned long bio_flags, 978 struct bio *bio, int mirror_num, unsigned long bio_flags,
980 u64 bio_offset) 979 u64 bio_offset)
981{ 980{
982 /* 981 /*
983 * when we're called for a write, we're already in the async 982 * when we're called for a write, we're already in the async
@@ -986,11 +985,11 @@ static int __btree_submit_bio_start(struct inode *inode, struct bio *bio,
986 return btree_csum_one_bio(bio); 985 return btree_csum_one_bio(bio);
987} 986}
988 987
989static int __btree_submit_bio_done(struct inode *inode, struct bio *bio, 988static blk_status_t __btree_submit_bio_done(struct inode *inode,
990 int mirror_num, unsigned long bio_flags, 989 struct bio *bio, int mirror_num, unsigned long bio_flags,
991 u64 bio_offset) 990 u64 bio_offset)
992{ 991{
993 int ret; 992 blk_status_t ret;
994 993
995 /* 994 /*
996 * when we're called for a write, we're already in the async 995 * when we're called for a write, we're already in the async
@@ -998,7 +997,7 @@ static int __btree_submit_bio_done(struct inode *inode, struct bio *bio,
998 */ 997 */
999 ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1); 998 ret = btrfs_map_bio(btrfs_sb(inode->i_sb), bio, mirror_num, 1);
1000 if (ret) { 999 if (ret) {
1001 bio->bi_error = ret; 1000 bio->bi_status = ret;
1002 bio_endio(bio); 1001 bio_endio(bio);
1003 } 1002 }
1004 return ret; 1003 return ret;
@@ -1015,13 +1014,13 @@ static int check_async_write(unsigned long bio_flags)
1015 return 1; 1014 return 1;
1016} 1015}
1017 1016
1018static int btree_submit_bio_hook(struct inode *inode, struct bio *bio, 1017static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
1019 int mirror_num, unsigned long bio_flags, 1018 int mirror_num, unsigned long bio_flags,
1020 u64 bio_offset) 1019 u64 bio_offset)
1021{ 1020{
1022 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1021 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1023 int async = check_async_write(bio_flags); 1022 int async = check_async_write(bio_flags);
1024 int ret; 1023 blk_status_t ret;
1025 1024
1026 if (bio_op(bio) != REQ_OP_WRITE) { 1025 if (bio_op(bio) != REQ_OP_WRITE) {
1027 /* 1026 /*
@@ -1054,7 +1053,7 @@ static int btree_submit_bio_hook(struct inode *inode, struct bio *bio,
1054 return 0; 1053 return 0;
1055 1054
1056out_w_error: 1055out_w_error:
1057 bio->bi_error = ret; 1056 bio->bi_status = ret;
1058 bio_endio(bio); 1057 bio_endio(bio);
1059 return ret; 1058 return ret;
1060} 1059}
@@ -1820,7 +1819,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
1820 end_io_wq = container_of(work, struct btrfs_end_io_wq, work); 1819 end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
1821 bio = end_io_wq->bio; 1820 bio = end_io_wq->bio;
1822 1821
1823 bio->bi_error = end_io_wq->error; 1822 bio->bi_status = end_io_wq->status;
1824 bio->bi_private = end_io_wq->private; 1823 bio->bi_private = end_io_wq->private;
1825 bio->bi_end_io = end_io_wq->end_io; 1824 bio->bi_end_io = end_io_wq->end_io;
1826 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); 1825 kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
@@ -3497,11 +3496,11 @@ static void btrfs_end_empty_barrier(struct bio *bio)
3497 * any device where the flush fails with eopnotsupp are flagged as not-barrier 3496 * any device where the flush fails with eopnotsupp are flagged as not-barrier
3498 * capable 3497 * capable
3499 */ 3498 */
3500static int write_dev_flush(struct btrfs_device *device, int wait) 3499static blk_status_t write_dev_flush(struct btrfs_device *device, int wait)
3501{ 3500{
3502 struct request_queue *q = bdev_get_queue(device->bdev); 3501 struct request_queue *q = bdev_get_queue(device->bdev);
3503 struct bio *bio; 3502 struct bio *bio;
3504 int ret = 0; 3503 blk_status_t ret = 0;
3505 3504
3506 if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) 3505 if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
3507 return 0; 3506 return 0;
@@ -3513,8 +3512,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3513 3512
3514 wait_for_completion(&device->flush_wait); 3513 wait_for_completion(&device->flush_wait);
3515 3514
3516 if (bio->bi_error) { 3515 if (bio->bi_status) {
3517 ret = bio->bi_error; 3516 ret = bio->bi_status;
3518 btrfs_dev_stat_inc_and_print(device, 3517 btrfs_dev_stat_inc_and_print(device,
3519 BTRFS_DEV_STAT_FLUSH_ERRS); 3518 BTRFS_DEV_STAT_FLUSH_ERRS);
3520 } 3519 }
@@ -3533,7 +3532,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3533 device->flush_bio = NULL; 3532 device->flush_bio = NULL;
3534 bio = btrfs_io_bio_alloc(GFP_NOFS, 0); 3533 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
3535 if (!bio) 3534 if (!bio)
3536 return -ENOMEM; 3535 return BLK_STS_RESOURCE;
3537 3536
3538 bio->bi_end_io = btrfs_end_empty_barrier; 3537 bio->bi_end_io = btrfs_end_empty_barrier;
3539 bio->bi_bdev = device->bdev; 3538 bio->bi_bdev = device->bdev;
@@ -3558,7 +3557,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
3558 struct btrfs_device *dev; 3557 struct btrfs_device *dev;
3559 int errors_send = 0; 3558 int errors_send = 0;
3560 int errors_wait = 0; 3559 int errors_wait = 0;
3561 int ret; 3560 blk_status_t ret;
3562 3561
3563 /* send down all the barriers */ 3562 /* send down all the barriers */
3564 head = &info->fs_devices->devices; 3563 head = &info->fs_devices->devices;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 21f1ceb85b76..c581927555f3 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -118,13 +118,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
118int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 118int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
119u32 btrfs_csum_data(const char *data, u32 seed, size_t len); 119u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
120void btrfs_csum_final(u32 crc, u8 *result); 120void btrfs_csum_final(u32 crc, u8 *result);
121int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 121blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
122 enum btrfs_wq_endio_type metadata); 122 enum btrfs_wq_endio_type metadata);
123int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, 123blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info,
124 struct bio *bio, int mirror_num, 124 struct inode *inode, struct bio *bio, int mirror_num,
125 unsigned long bio_flags, u64 bio_offset, 125 unsigned long bio_flags, u64 bio_offset,
126 extent_submit_bio_hook_t *submit_bio_start, 126 extent_submit_bio_hook_t *submit_bio_start,
127 extent_submit_bio_hook_t *submit_bio_done); 127 extent_submit_bio_hook_t *submit_bio_done);
128unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); 128unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info);
129int btrfs_write_tree_block(struct extent_buffer *buf); 129int btrfs_write_tree_block(struct extent_buffer *buf);
130int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); 130int btrfs_wait_tree_block_writeback(struct extent_buffer *buf);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d3619e010005..d1cd60140817 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -174,7 +174,8 @@ int __init extent_io_init(void)
174 goto free_state_cache; 174 goto free_state_cache;
175 175
176 btrfs_bioset = bioset_create(BIO_POOL_SIZE, 176 btrfs_bioset = bioset_create(BIO_POOL_SIZE,
177 offsetof(struct btrfs_io_bio, bio)); 177 offsetof(struct btrfs_io_bio, bio),
178 BIOSET_NEED_BVECS);
178 if (!btrfs_bioset) 179 if (!btrfs_bioset)
179 goto free_buffer_cache; 180 goto free_buffer_cache;
180 181
@@ -2399,6 +2400,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2399 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2400 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2400 struct bio *bio; 2401 struct bio *bio;
2401 int read_mode = 0; 2402 int read_mode = 0;
2403 blk_status_t status;
2402 int ret; 2404 int ret;
2403 2405
2404 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); 2406 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
@@ -2431,11 +2433,12 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2431 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d", 2433 "Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
2432 read_mode, failrec->this_mirror, failrec->in_validation); 2434 read_mode, failrec->this_mirror, failrec->in_validation);
2433 2435
2434 ret = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror, 2436 status = tree->ops->submit_bio_hook(inode, bio, failrec->this_mirror,
2435 failrec->bio_flags, 0); 2437 failrec->bio_flags, 0);
2436 if (ret) { 2438 if (status) {
2437 free_io_failure(BTRFS_I(inode), failrec); 2439 free_io_failure(BTRFS_I(inode), failrec);
2438 bio_put(bio); 2440 bio_put(bio);
2441 ret = blk_status_to_errno(status);
2439 } 2442 }
2440 2443
2441 return ret; 2444 return ret;
@@ -2474,6 +2477,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2474 */ 2477 */
2475static void end_bio_extent_writepage(struct bio *bio) 2478static void end_bio_extent_writepage(struct bio *bio)
2476{ 2479{
2480 int error = blk_status_to_errno(bio->bi_status);
2477 struct bio_vec *bvec; 2481 struct bio_vec *bvec;
2478 u64 start; 2482 u64 start;
2479 u64 end; 2483 u64 end;
@@ -2503,7 +2507,7 @@ static void end_bio_extent_writepage(struct bio *bio)
2503 start = page_offset(page); 2507 start = page_offset(page);
2504 end = start + bvec->bv_offset + bvec->bv_len - 1; 2508 end = start + bvec->bv_offset + bvec->bv_len - 1;
2505 2509
2506 end_extent_writepage(page, bio->bi_error, start, end); 2510 end_extent_writepage(page, error, start, end);
2507 end_page_writeback(page); 2511 end_page_writeback(page);
2508 } 2512 }
2509 2513
@@ -2536,7 +2540,7 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2536static void end_bio_extent_readpage(struct bio *bio) 2540static void end_bio_extent_readpage(struct bio *bio)
2537{ 2541{
2538 struct bio_vec *bvec; 2542 struct bio_vec *bvec;
2539 int uptodate = !bio->bi_error; 2543 int uptodate = !bio->bi_status;
2540 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 2544 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2541 struct extent_io_tree *tree; 2545 struct extent_io_tree *tree;
2542 u64 offset = 0; 2546 u64 offset = 0;
@@ -2556,7 +2560,7 @@ static void end_bio_extent_readpage(struct bio *bio)
2556 2560
2557 btrfs_debug(fs_info, 2561 btrfs_debug(fs_info,
2558 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", 2562 "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
2559 (u64)bio->bi_iter.bi_sector, bio->bi_error, 2563 (u64)bio->bi_iter.bi_sector, bio->bi_status,
2560 io_bio->mirror_num); 2564 io_bio->mirror_num);
2561 tree = &BTRFS_I(inode)->io_tree; 2565 tree = &BTRFS_I(inode)->io_tree;
2562 2566
@@ -2615,7 +2619,7 @@ static void end_bio_extent_readpage(struct bio *bio)
2615 ret = bio_readpage_error(bio, offset, page, 2619 ret = bio_readpage_error(bio, offset, page,
2616 start, end, mirror); 2620 start, end, mirror);
2617 if (ret == 0) { 2621 if (ret == 0) {
2618 uptodate = !bio->bi_error; 2622 uptodate = !bio->bi_status;
2619 offset += len; 2623 offset += len;
2620 continue; 2624 continue;
2621 } 2625 }
@@ -2673,7 +2677,7 @@ readpage_ok:
2673 endio_readpage_release_extent(tree, extent_start, extent_len, 2677 endio_readpage_release_extent(tree, extent_start, extent_len,
2674 uptodate); 2678 uptodate);
2675 if (io_bio->end_io) 2679 if (io_bio->end_io)
2676 io_bio->end_io(io_bio, bio->bi_error); 2680 io_bio->end_io(io_bio, blk_status_to_errno(bio->bi_status));
2677 bio_put(bio); 2681 bio_put(bio);
2678} 2682}
2679 2683
@@ -2743,7 +2747,7 @@ struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2743static int __must_check submit_one_bio(struct bio *bio, int mirror_num, 2747static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2744 unsigned long bio_flags) 2748 unsigned long bio_flags)
2745{ 2749{
2746 int ret = 0; 2750 blk_status_t ret = 0;
2747 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2751 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
2748 struct page *page = bvec->bv_page; 2752 struct page *page = bvec->bv_page;
2749 struct extent_io_tree *tree = bio->bi_private; 2753 struct extent_io_tree *tree = bio->bi_private;
@@ -2761,7 +2765,7 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num,
2761 btrfsic_submit_bio(bio); 2765 btrfsic_submit_bio(bio);
2762 2766
2763 bio_put(bio); 2767 bio_put(bio);
2764 return ret; 2768 return blk_status_to_errno(ret);
2765} 2769}
2766 2770
2767static int merge_bio(struct extent_io_tree *tree, struct page *page, 2771static int merge_bio(struct extent_io_tree *tree, struct page *page,
@@ -2826,6 +2830,7 @@ static int submit_extent_page(int op, int op_flags, struct extent_io_tree *tree,
2826 bio_add_page(bio, page, page_size, offset); 2830 bio_add_page(bio, page, page_size, offset);
2827 bio->bi_end_io = end_io_func; 2831 bio->bi_end_io = end_io_func;
2828 bio->bi_private = tree; 2832 bio->bi_private = tree;
2833 bio->bi_write_hint = page->mapping->host->i_write_hint;
2829 bio_set_op_attrs(bio, op, op_flags); 2834 bio_set_op_attrs(bio, op, op_flags);
2830 if (wbc) { 2835 if (wbc) {
2831 wbc_init_bio(wbc, bio); 2836 wbc_init_bio(wbc, bio);
@@ -3707,7 +3712,7 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
3707 BUG_ON(!eb); 3712 BUG_ON(!eb);
3708 done = atomic_dec_and_test(&eb->io_pages); 3713 done = atomic_dec_and_test(&eb->io_pages);
3709 3714
3710 if (bio->bi_error || 3715 if (bio->bi_status ||
3711 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { 3716 test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
3712 ClearPageUptodate(page); 3717 ClearPageUptodate(page);
3713 set_btree_ioerr(page); 3718 set_btree_ioerr(page);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1eafa2f0ede3..487ca0207cb6 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -92,9 +92,9 @@ struct btrfs_inode;
92struct btrfs_io_bio; 92struct btrfs_io_bio;
93struct io_failure_record; 93struct io_failure_record;
94 94
95typedef int (extent_submit_bio_hook_t)(struct inode *inode, struct bio *bio, 95typedef blk_status_t (extent_submit_bio_hook_t)(struct inode *inode,
96 int mirror_num, unsigned long bio_flags, 96 struct bio *bio, int mirror_num, unsigned long bio_flags,
97 u64 bio_offset); 97 u64 bio_offset);
98struct extent_io_ops { 98struct extent_io_ops {
99 /* 99 /*
100 * The following callbacks must be allways defined, the function 100 * The following callbacks must be allways defined, the function
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 64fcb31d7163..5b1c7090e546 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -160,7 +160,7 @@ static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
160 kfree(bio->csum_allocated); 160 kfree(bio->csum_allocated);
161} 161}
162 162
163static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, 163static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
164 u64 logical_offset, u32 *dst, int dio) 164 u64 logical_offset, u32 *dst, int dio)
165{ 165{
166 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 166 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
182 182
183 path = btrfs_alloc_path(); 183 path = btrfs_alloc_path();
184 if (!path) 184 if (!path)
185 return -ENOMEM; 185 return BLK_STS_RESOURCE;
186 186
187 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; 187 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
188 if (!dst) { 188 if (!dst) {
@@ -191,7 +191,7 @@ static int __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
191 csum_size, GFP_NOFS); 191 csum_size, GFP_NOFS);
192 if (!btrfs_bio->csum_allocated) { 192 if (!btrfs_bio->csum_allocated) {
193 btrfs_free_path(path); 193 btrfs_free_path(path);
194 return -ENOMEM; 194 return BLK_STS_RESOURCE;
195 } 195 }
196 btrfs_bio->csum = btrfs_bio->csum_allocated; 196 btrfs_bio->csum = btrfs_bio->csum_allocated;
197 btrfs_bio->end_io = btrfs_io_bio_endio_readpage; 197 btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
@@ -303,12 +303,12 @@ next:
303 return 0; 303 return 0;
304} 304}
305 305
306int btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst) 306blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
307{ 307{
308 return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0); 308 return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
309} 309}
310 310
311int btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset) 311blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
312{ 312{
313 return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1); 313 return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
314} 314}
@@ -433,7 +433,7 @@ fail:
433 return ret; 433 return ret;
434} 434}
435 435
436int btrfs_csum_one_bio(struct inode *inode, struct bio *bio, 436blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
437 u64 file_start, int contig) 437 u64 file_start, int contig)
438{ 438{
439 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 439 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -452,7 +452,7 @@ int btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
452 sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size), 452 sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
453 GFP_NOFS); 453 GFP_NOFS);
454 if (!sums) 454 if (!sums)
455 return -ENOMEM; 455 return BLK_STS_RESOURCE;
456 456
457 sums->len = bio->bi_iter.bi_size; 457 sums->len = bio->bi_iter.bi_size;
458 INIT_LIST_HEAD(&sums->list); 458 INIT_LIST_HEAD(&sums->list);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index da1096eb1a40..59e2dccdf75b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1875,12 +1875,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1875 ssize_t num_written = 0; 1875 ssize_t num_written = 0;
1876 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1876 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1877 ssize_t err; 1877 ssize_t err;
1878 loff_t pos; 1878 loff_t pos = iocb->ki_pos;
1879 size_t count; 1879 size_t count = iov_iter_count(from);
1880 loff_t oldsize; 1880 loff_t oldsize;
1881 int clean_page = 0; 1881 int clean_page = 0;
1882 1882
1883 inode_lock(inode); 1883 if ((iocb->ki_flags & IOCB_NOWAIT) &&
1884 (iocb->ki_flags & IOCB_DIRECT)) {
1885 /* Don't sleep on inode rwsem */
1886 if (!inode_trylock(inode))
1887 return -EAGAIN;
1888 /*
1889 * We will allocate space in case nodatacow is not set,
1890 * so bail
1891 */
1892 if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1893 BTRFS_INODE_PREALLOC)) ||
1894 check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
1895 inode_unlock(inode);
1896 return -EAGAIN;
1897 }
1898 } else
1899 inode_lock(inode);
1900
1884 err = generic_write_checks(iocb, from); 1901 err = generic_write_checks(iocb, from);
1885 if (err <= 0) { 1902 if (err <= 0) {
1886 inode_unlock(inode); 1903 inode_unlock(inode);
@@ -1914,8 +1931,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1914 */ 1931 */
1915 update_time_for_write(inode); 1932 update_time_for_write(inode);
1916 1933
1917 pos = iocb->ki_pos;
1918 count = iov_iter_count(from);
1919 start_pos = round_down(pos, fs_info->sectorsize); 1934 start_pos = round_down(pos, fs_info->sectorsize);
1920 oldsize = i_size_read(inode); 1935 oldsize = i_size_read(inode);
1921 if (start_pos > oldsize) { 1936 if (start_pos > oldsize) {
@@ -3071,13 +3086,19 @@ out:
3071 return offset; 3086 return offset;
3072} 3087}
3073 3088
3089static int btrfs_file_open(struct inode *inode, struct file *filp)
3090{
3091 filp->f_mode |= FMODE_AIO_NOWAIT;
3092 return generic_file_open(inode, filp);
3093}
3094
3074const struct file_operations btrfs_file_operations = { 3095const struct file_operations btrfs_file_operations = {
3075 .llseek = btrfs_file_llseek, 3096 .llseek = btrfs_file_llseek,
3076 .read_iter = generic_file_read_iter, 3097 .read_iter = generic_file_read_iter,
3077 .splice_read = generic_file_splice_read, 3098 .splice_read = generic_file_splice_read,
3078 .write_iter = btrfs_file_write_iter, 3099 .write_iter = btrfs_file_write_iter,
3079 .mmap = btrfs_file_mmap, 3100 .mmap = btrfs_file_mmap,
3080 .open = generic_file_open, 3101 .open = btrfs_file_open,
3081 .release = btrfs_release_file, 3102 .release = btrfs_release_file,
3082 .fsync = btrfs_sync_file, 3103 .fsync = btrfs_sync_file,
3083 .fallocate = btrfs_fallocate, 3104 .fallocate = btrfs_fallocate,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ef3c98c527c1..556c93060606 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -842,13 +842,12 @@ retry:
842 NULL, EXTENT_LOCKED | EXTENT_DELALLOC, 842 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
843 PAGE_UNLOCK | PAGE_CLEAR_DIRTY | 843 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
844 PAGE_SET_WRITEBACK); 844 PAGE_SET_WRITEBACK);
845 ret = btrfs_submit_compressed_write(inode, 845 if (btrfs_submit_compressed_write(inode,
846 async_extent->start, 846 async_extent->start,
847 async_extent->ram_size, 847 async_extent->ram_size,
848 ins.objectid, 848 ins.objectid,
849 ins.offset, async_extent->pages, 849 ins.offset, async_extent->pages,
850 async_extent->nr_pages); 850 async_extent->nr_pages)) {
851 if (ret) {
852 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 851 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
853 struct page *p = async_extent->pages[0]; 852 struct page *p = async_extent->pages[0];
854 const u64 start = async_extent->start; 853 const u64 start = async_extent->start;
@@ -1901,11 +1900,11 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1901 * At IO completion time the cums attached on the ordered extent record 1900 * At IO completion time the cums attached on the ordered extent record
1902 * are inserted into the btree 1901 * are inserted into the btree
1903 */ 1902 */
1904static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio, 1903static blk_status_t __btrfs_submit_bio_start(struct inode *inode,
1905 int mirror_num, unsigned long bio_flags, 1904 struct bio *bio, int mirror_num, unsigned long bio_flags,
1906 u64 bio_offset) 1905 u64 bio_offset)
1907{ 1906{
1908 int ret = 0; 1907 blk_status_t ret = 0;
1909 1908
1910 ret = btrfs_csum_one_bio(inode, bio, 0, 0); 1909 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
1911 BUG_ON(ret); /* -ENOMEM */ 1910 BUG_ON(ret); /* -ENOMEM */
@@ -1920,16 +1919,16 @@ static int __btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
1920 * At IO completion time the cums attached on the ordered extent record 1919 * At IO completion time the cums attached on the ordered extent record
1921 * are inserted into the btree 1920 * are inserted into the btree
1922 */ 1921 */
1923static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio, 1922static blk_status_t __btrfs_submit_bio_done(struct inode *inode,
1924 int mirror_num, unsigned long bio_flags, 1923 struct bio *bio, int mirror_num, unsigned long bio_flags,
1925 u64 bio_offset) 1924 u64 bio_offset)
1926{ 1925{
1927 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1926 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1928 int ret; 1927 blk_status_t ret;
1929 1928
1930 ret = btrfs_map_bio(fs_info, bio, mirror_num, 1); 1929 ret = btrfs_map_bio(fs_info, bio, mirror_num, 1);
1931 if (ret) { 1930 if (ret) {
1932 bio->bi_error = ret; 1931 bio->bi_status = ret;
1933 bio_endio(bio); 1932 bio_endio(bio);
1934 } 1933 }
1935 return ret; 1934 return ret;
@@ -1939,14 +1938,14 @@ static int __btrfs_submit_bio_done(struct inode *inode, struct bio *bio,
1939 * extent_io.c submission hook. This does the right thing for csum calculation 1938 * extent_io.c submission hook. This does the right thing for csum calculation
1940 * on write, or reading the csums from the tree before a read 1939 * on write, or reading the csums from the tree before a read
1941 */ 1940 */
1942static int btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, 1941static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
1943 int mirror_num, unsigned long bio_flags, 1942 int mirror_num, unsigned long bio_flags,
1944 u64 bio_offset) 1943 u64 bio_offset)
1945{ 1944{
1946 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1945 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1947 struct btrfs_root *root = BTRFS_I(inode)->root; 1946 struct btrfs_root *root = BTRFS_I(inode)->root;
1948 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA; 1947 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1949 int ret = 0; 1948 blk_status_t ret = 0;
1950 int skip_sum; 1949 int skip_sum;
1951 int async = !atomic_read(&BTRFS_I(inode)->sync_writers); 1950 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1952 1951
@@ -1991,8 +1990,8 @@ mapit:
1991 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0); 1990 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
1992 1991
1993out: 1992out:
1994 if (ret < 0) { 1993 if (ret) {
1995 bio->bi_error = ret; 1994 bio->bi_status = ret;
1996 bio_endio(bio); 1995 bio_endio(bio);
1997 } 1996 }
1998 return ret; 1997 return ret;
@@ -8037,7 +8036,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
8037 struct bio_vec *bvec; 8036 struct bio_vec *bvec;
8038 int i; 8037 int i;
8039 8038
8040 if (bio->bi_error) 8039 if (bio->bi_status)
8041 goto end; 8040 goto end;
8042 8041
8043 ASSERT(bio->bi_vcnt == 1); 8042 ASSERT(bio->bi_vcnt == 1);
@@ -8116,7 +8115,7 @@ static void btrfs_retry_endio(struct bio *bio)
8116 int ret; 8115 int ret;
8117 int i; 8116 int i;
8118 8117
8119 if (bio->bi_error) 8118 if (bio->bi_status)
8120 goto end; 8119 goto end;
8121 8120
8122 uptodate = 1; 8121 uptodate = 1;
@@ -8141,8 +8140,8 @@ end:
8141 bio_put(bio); 8140 bio_put(bio);
8142} 8141}
8143 8142
8144static int __btrfs_subio_endio_read(struct inode *inode, 8143static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
8145 struct btrfs_io_bio *io_bio, int err) 8144 struct btrfs_io_bio *io_bio, blk_status_t err)
8146{ 8145{
8147 struct btrfs_fs_info *fs_info; 8146 struct btrfs_fs_info *fs_info;
8148 struct bio_vec *bvec; 8147 struct bio_vec *bvec;
@@ -8184,7 +8183,7 @@ try_again:
8184 io_bio->mirror_num, 8183 io_bio->mirror_num,
8185 btrfs_retry_endio, &done); 8184 btrfs_retry_endio, &done);
8186 if (ret) { 8185 if (ret) {
8187 err = ret; 8186 err = errno_to_blk_status(ret);
8188 goto next; 8187 goto next;
8189 } 8188 }
8190 8189
@@ -8211,8 +8210,8 @@ next:
8211 return err; 8210 return err;
8212} 8211}
8213 8212
8214static int btrfs_subio_endio_read(struct inode *inode, 8213static blk_status_t btrfs_subio_endio_read(struct inode *inode,
8215 struct btrfs_io_bio *io_bio, int err) 8214 struct btrfs_io_bio *io_bio, blk_status_t err)
8216{ 8215{
8217 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 8216 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8218 8217
@@ -8232,7 +8231,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
8232 struct inode *inode = dip->inode; 8231 struct inode *inode = dip->inode;
8233 struct bio *dio_bio; 8232 struct bio *dio_bio;
8234 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8233 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8235 int err = bio->bi_error; 8234 blk_status_t err = bio->bi_status;
8236 8235
8237 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) 8236 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8238 err = btrfs_subio_endio_read(inode, io_bio, err); 8237 err = btrfs_subio_endio_read(inode, io_bio, err);
@@ -8243,11 +8242,11 @@ static void btrfs_endio_direct_read(struct bio *bio)
8243 8242
8244 kfree(dip); 8243 kfree(dip);
8245 8244
8246 dio_bio->bi_error = bio->bi_error; 8245 dio_bio->bi_status = bio->bi_status;
8247 dio_end_io(dio_bio, bio->bi_error); 8246 dio_end_io(dio_bio);
8248 8247
8249 if (io_bio->end_io) 8248 if (io_bio->end_io)
8250 io_bio->end_io(io_bio, err); 8249 io_bio->end_io(io_bio, blk_status_to_errno(err));
8251 bio_put(bio); 8250 bio_put(bio);
8252} 8251}
8253 8252
@@ -8299,20 +8298,20 @@ static void btrfs_endio_direct_write(struct bio *bio)
8299 struct bio *dio_bio = dip->dio_bio; 8298 struct bio *dio_bio = dip->dio_bio;
8300 8299
8301 __endio_write_update_ordered(dip->inode, dip->logical_offset, 8300 __endio_write_update_ordered(dip->inode, dip->logical_offset,
8302 dip->bytes, !bio->bi_error); 8301 dip->bytes, !bio->bi_status);
8303 8302
8304 kfree(dip); 8303 kfree(dip);
8305 8304
8306 dio_bio->bi_error = bio->bi_error; 8305 dio_bio->bi_status = bio->bi_status;
8307 dio_end_io(dio_bio, bio->bi_error); 8306 dio_end_io(dio_bio);
8308 bio_put(bio); 8307 bio_put(bio);
8309} 8308}
8310 8309
8311static int __btrfs_submit_bio_start_direct_io(struct inode *inode, 8310static blk_status_t __btrfs_submit_bio_start_direct_io(struct inode *inode,
8312 struct bio *bio, int mirror_num, 8311 struct bio *bio, int mirror_num,
8313 unsigned long bio_flags, u64 offset) 8312 unsigned long bio_flags, u64 offset)
8314{ 8313{
8315 int ret; 8314 blk_status_t ret;
8316 ret = btrfs_csum_one_bio(inode, bio, offset, 1); 8315 ret = btrfs_csum_one_bio(inode, bio, offset, 1);
8317 BUG_ON(ret); /* -ENOMEM */ 8316 BUG_ON(ret); /* -ENOMEM */
8318 return 0; 8317 return 0;
@@ -8321,7 +8320,7 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode,
8321static void btrfs_end_dio_bio(struct bio *bio) 8320static void btrfs_end_dio_bio(struct bio *bio)
8322{ 8321{
8323 struct btrfs_dio_private *dip = bio->bi_private; 8322 struct btrfs_dio_private *dip = bio->bi_private;
8324 int err = bio->bi_error; 8323 blk_status_t err = bio->bi_status;
8325 8324
8326 if (err) 8325 if (err)
8327 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, 8326 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@ -8351,7 +8350,7 @@ static void btrfs_end_dio_bio(struct bio *bio)
8351 if (dip->errors) { 8350 if (dip->errors) {
8352 bio_io_error(dip->orig_bio); 8351 bio_io_error(dip->orig_bio);
8353 } else { 8352 } else {
8354 dip->dio_bio->bi_error = 0; 8353 dip->dio_bio->bi_status = 0;
8355 bio_endio(dip->orig_bio); 8354 bio_endio(dip->orig_bio);
8356 } 8355 }
8357out: 8356out:
@@ -8368,14 +8367,14 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
8368 return bio; 8367 return bio;
8369} 8368}
8370 8369
8371static inline int btrfs_lookup_and_bind_dio_csum(struct inode *inode, 8370static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
8372 struct btrfs_dio_private *dip, 8371 struct btrfs_dio_private *dip,
8373 struct bio *bio, 8372 struct bio *bio,
8374 u64 file_offset) 8373 u64 file_offset)
8375{ 8374{
8376 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8375 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8377 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); 8376 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8378 int ret; 8377 blk_status_t ret;
8379 8378
8380 /* 8379 /*
8381 * We load all the csum data we need when we submit 8380 * We load all the csum data we need when we submit
@@ -8406,7 +8405,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
8406 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 8405 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8407 struct btrfs_dio_private *dip = bio->bi_private; 8406 struct btrfs_dio_private *dip = bio->bi_private;
8408 bool write = bio_op(bio) == REQ_OP_WRITE; 8407 bool write = bio_op(bio) == REQ_OP_WRITE;
8409 int ret; 8408 blk_status_t ret;
8410 8409
8411 if (async_submit) 8410 if (async_submit)
8412 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); 8411 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8649,7 +8648,7 @@ free_ordered:
8649 * callbacks - they require an allocated dip and a clone of dio_bio. 8648 * callbacks - they require an allocated dip and a clone of dio_bio.
8650 */ 8649 */
8651 if (io_bio && dip) { 8650 if (io_bio && dip) {
8652 io_bio->bi_error = -EIO; 8651 io_bio->bi_status = BLK_STS_IOERR;
8653 bio_endio(io_bio); 8652 bio_endio(io_bio);
8654 /* 8653 /*
8655 * The end io callbacks free our dip, do the final put on io_bio 8654 * The end io callbacks free our dip, do the final put on io_bio
@@ -8668,12 +8667,12 @@ free_ordered:
8668 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, 8667 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8669 file_offset + dio_bio->bi_iter.bi_size - 1); 8668 file_offset + dio_bio->bi_iter.bi_size - 1);
8670 8669
8671 dio_bio->bi_error = -EIO; 8670 dio_bio->bi_status = BLK_STS_IOERR;
8672 /* 8671 /*
8673 * Releases and cleans up our dio_bio, no need to bio_put() 8672 * Releases and cleans up our dio_bio, no need to bio_put()
8674 * nor bio_endio()/bio_io_error() against dio_bio. 8673 * nor bio_endio()/bio_io_error() against dio_bio.
8675 */ 8674 */
8676 dio_end_io(dio_bio, ret); 8675 dio_end_io(dio_bio);
8677 } 8676 }
8678 if (io_bio) 8677 if (io_bio)
8679 bio_put(io_bio); 8678 bio_put(io_bio);
@@ -8755,6 +8754,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8755 dio_data.overwrite = 1; 8754 dio_data.overwrite = 1;
8756 inode_unlock(inode); 8755 inode_unlock(inode);
8757 relock = true; 8756 relock = true;
8757 } else if (iocb->ki_flags & IOCB_NOWAIT) {
8758 ret = -EAGAIN;
8759 goto out;
8758 } 8760 }
8759 ret = btrfs_delalloc_reserve_space(inode, offset, count); 8761 ret = btrfs_delalloc_reserve_space(inode, offset, count);
8760 if (ret) 8762 if (ret)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index d8ea0eb76325..f3d30d9ea8f9 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -871,7 +871,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio)
871 * this frees the rbio and runs through all the bios in the 871 * this frees the rbio and runs through all the bios in the
872 * bio_list and calls end_io on them 872 * bio_list and calls end_io on them
873 */ 873 */
874static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err) 874static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
875{ 875{
876 struct bio *cur = bio_list_get(&rbio->bio_list); 876 struct bio *cur = bio_list_get(&rbio->bio_list);
877 struct bio *next; 877 struct bio *next;
@@ -884,7 +884,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
884 while (cur) { 884 while (cur) {
885 next = cur->bi_next; 885 next = cur->bi_next;
886 cur->bi_next = NULL; 886 cur->bi_next = NULL;
887 cur->bi_error = err; 887 cur->bi_status = err;
888 bio_endio(cur); 888 bio_endio(cur);
889 cur = next; 889 cur = next;
890 } 890 }
@@ -897,7 +897,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err)
897static void raid_write_end_io(struct bio *bio) 897static void raid_write_end_io(struct bio *bio)
898{ 898{
899 struct btrfs_raid_bio *rbio = bio->bi_private; 899 struct btrfs_raid_bio *rbio = bio->bi_private;
900 int err = bio->bi_error; 900 blk_status_t err = bio->bi_status;
901 int max_errors; 901 int max_errors;
902 902
903 if (err) 903 if (err)
@@ -914,7 +914,7 @@ static void raid_write_end_io(struct bio *bio)
914 max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? 914 max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
915 0 : rbio->bbio->max_errors; 915 0 : rbio->bbio->max_errors;
916 if (atomic_read(&rbio->error) > max_errors) 916 if (atomic_read(&rbio->error) > max_errors)
917 err = -EIO; 917 err = BLK_STS_IOERR;
918 918
919 rbio_orig_end_io(rbio, err); 919 rbio_orig_end_io(rbio, err);
920} 920}
@@ -1092,7 +1092,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1092 * devices or if they are not contiguous 1092 * devices or if they are not contiguous
1093 */ 1093 */
1094 if (last_end == disk_start && stripe->dev->bdev && 1094 if (last_end == disk_start && stripe->dev->bdev &&
1095 !last->bi_error && 1095 !last->bi_status &&
1096 last->bi_bdev == stripe->dev->bdev) { 1096 last->bi_bdev == stripe->dev->bdev) {
1097 ret = bio_add_page(last, page, PAGE_SIZE, 0); 1097 ret = bio_add_page(last, page, PAGE_SIZE, 0);
1098 if (ret == PAGE_SIZE) 1098 if (ret == PAGE_SIZE)
@@ -1448,7 +1448,7 @@ static void raid_rmw_end_io(struct bio *bio)
1448{ 1448{
1449 struct btrfs_raid_bio *rbio = bio->bi_private; 1449 struct btrfs_raid_bio *rbio = bio->bi_private;
1450 1450
1451 if (bio->bi_error) 1451 if (bio->bi_status)
1452 fail_bio_stripe(rbio, bio); 1452 fail_bio_stripe(rbio, bio);
1453 else 1453 else
1454 set_bio_pages_uptodate(bio); 1454 set_bio_pages_uptodate(bio);
@@ -1991,7 +1991,7 @@ static void raid_recover_end_io(struct bio *bio)
1991 * we only read stripe pages off the disk, set them 1991 * we only read stripe pages off the disk, set them
1992 * up to date if there were no errors 1992 * up to date if there were no errors
1993 */ 1993 */
1994 if (bio->bi_error) 1994 if (bio->bi_status)
1995 fail_bio_stripe(rbio, bio); 1995 fail_bio_stripe(rbio, bio);
1996 else 1996 else
1997 set_bio_pages_uptodate(bio); 1997 set_bio_pages_uptodate(bio);
@@ -2530,7 +2530,7 @@ static void raid56_parity_scrub_end_io(struct bio *bio)
2530{ 2530{
2531 struct btrfs_raid_bio *rbio = bio->bi_private; 2531 struct btrfs_raid_bio *rbio = bio->bi_private;
2532 2532
2533 if (bio->bi_error) 2533 if (bio->bi_status)
2534 fail_bio_stripe(rbio, bio); 2534 fail_bio_stripe(rbio, bio);
2535 else 2535 else
2536 set_bio_pages_uptodate(bio); 2536 set_bio_pages_uptodate(bio);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c7b45eb2403d..ba5595d19de1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -95,7 +95,7 @@ struct scrub_bio {
95 struct scrub_ctx *sctx; 95 struct scrub_ctx *sctx;
96 struct btrfs_device *dev; 96 struct btrfs_device *dev;
97 struct bio *bio; 97 struct bio *bio;
98 int err; 98 blk_status_t status;
99 u64 logical; 99 u64 logical;
100 u64 physical; 100 u64 physical;
101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO 101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
@@ -1668,14 +1668,14 @@ leave_nomem:
1668 1668
1669struct scrub_bio_ret { 1669struct scrub_bio_ret {
1670 struct completion event; 1670 struct completion event;
1671 int error; 1671 blk_status_t status;
1672}; 1672};
1673 1673
1674static void scrub_bio_wait_endio(struct bio *bio) 1674static void scrub_bio_wait_endio(struct bio *bio)
1675{ 1675{
1676 struct scrub_bio_ret *ret = bio->bi_private; 1676 struct scrub_bio_ret *ret = bio->bi_private;
1677 1677
1678 ret->error = bio->bi_error; 1678 ret->status = bio->bi_status;
1679 complete(&ret->event); 1679 complete(&ret->event);
1680} 1680}
1681 1681
@@ -1693,7 +1693,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1693 int ret; 1693 int ret;
1694 1694
1695 init_completion(&done.event); 1695 init_completion(&done.event);
1696 done.error = 0; 1696 done.status = 0;
1697 bio->bi_iter.bi_sector = page->logical >> 9; 1697 bio->bi_iter.bi_sector = page->logical >> 9;
1698 bio->bi_private = &done; 1698 bio->bi_private = &done;
1699 bio->bi_end_io = scrub_bio_wait_endio; 1699 bio->bi_end_io = scrub_bio_wait_endio;
@@ -1705,7 +1705,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1705 return ret; 1705 return ret;
1706 1706
1707 wait_for_completion(&done.event); 1707 wait_for_completion(&done.event);
1708 if (done.error) 1708 if (done.status)
1709 return -EIO; 1709 return -EIO;
1710 1710
1711 return 0; 1711 return 0;
@@ -1937,7 +1937,7 @@ again:
1937 bio->bi_bdev = sbio->dev->bdev; 1937 bio->bi_bdev = sbio->dev->bdev;
1938 bio->bi_iter.bi_sector = sbio->physical >> 9; 1938 bio->bi_iter.bi_sector = sbio->physical >> 9;
1939 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 1939 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1940 sbio->err = 0; 1940 sbio->status = 0;
1941 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1941 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1942 spage->physical_for_dev_replace || 1942 spage->physical_for_dev_replace ||
1943 sbio->logical + sbio->page_count * PAGE_SIZE != 1943 sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -1992,7 +1992,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
1992 struct scrub_bio *sbio = bio->bi_private; 1992 struct scrub_bio *sbio = bio->bi_private;
1993 struct btrfs_fs_info *fs_info = sbio->dev->fs_info; 1993 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1994 1994
1995 sbio->err = bio->bi_error; 1995 sbio->status = bio->bi_status;
1996 sbio->bio = bio; 1996 sbio->bio = bio;
1997 1997
1998 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, 1998 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
@@ -2007,7 +2007,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
2007 int i; 2007 int i;
2008 2008
2009 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); 2009 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
2010 if (sbio->err) { 2010 if (sbio->status) {
2011 struct btrfs_dev_replace *dev_replace = 2011 struct btrfs_dev_replace *dev_replace =
2012 &sbio->sctx->fs_info->dev_replace; 2012 &sbio->sctx->fs_info->dev_replace;
2013 2013
@@ -2341,7 +2341,7 @@ again:
2341 bio->bi_bdev = sbio->dev->bdev; 2341 bio->bi_bdev = sbio->dev->bdev;
2342 bio->bi_iter.bi_sector = sbio->physical >> 9; 2342 bio->bi_iter.bi_sector = sbio->physical >> 9;
2343 bio_set_op_attrs(bio, REQ_OP_READ, 0); 2343 bio_set_op_attrs(bio, REQ_OP_READ, 0);
2344 sbio->err = 0; 2344 sbio->status = 0;
2345 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 2345 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2346 spage->physical || 2346 spage->physical ||
2347 sbio->logical + sbio->page_count * PAGE_SIZE != 2347 sbio->logical + sbio->page_count * PAGE_SIZE !=
@@ -2377,7 +2377,7 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
2377 struct scrub_block *sblock = bio->bi_private; 2377 struct scrub_block *sblock = bio->bi_private;
2378 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info; 2378 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2379 2379
2380 if (bio->bi_error) 2380 if (bio->bi_status)
2381 sblock->no_io_error_seen = 0; 2381 sblock->no_io_error_seen = 0;
2382 2382
2383 bio_put(bio); 2383 bio_put(bio);
@@ -2588,7 +2588,7 @@ static void scrub_bio_end_io(struct bio *bio)
2588 struct scrub_bio *sbio = bio->bi_private; 2588 struct scrub_bio *sbio = bio->bi_private;
2589 struct btrfs_fs_info *fs_info = sbio->dev->fs_info; 2589 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2590 2590
2591 sbio->err = bio->bi_error; 2591 sbio->status = bio->bi_status;
2592 sbio->bio = bio; 2592 sbio->bio = bio;
2593 2593
2594 btrfs_queue_work(fs_info->scrub_workers, &sbio->work); 2594 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
@@ -2601,7 +2601,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
2601 int i; 2601 int i;
2602 2602
2603 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); 2603 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2604 if (sbio->err) { 2604 if (sbio->status) {
2605 for (i = 0; i < sbio->page_count; i++) { 2605 for (i = 0; i < sbio->page_count; i++) {
2606 struct scrub_page *spage = sbio->pagev[i]; 2606 struct scrub_page *spage = sbio->pagev[i];
2607 2607
@@ -3004,7 +3004,7 @@ static void scrub_parity_bio_endio(struct bio *bio)
3004 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; 3004 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
3005 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info; 3005 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
3006 3006
3007 if (bio->bi_error) 3007 if (bio->bi_status)
3008 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, 3008 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
3009 sparity->nsectors); 3009 sparity->nsectors);
3010 3010
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 017b67daa3bb..84a495967e0a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6042,9 +6042,10 @@ static void btrfs_end_bio(struct bio *bio)
6042 struct btrfs_bio *bbio = bio->bi_private; 6042 struct btrfs_bio *bbio = bio->bi_private;
6043 int is_orig_bio = 0; 6043 int is_orig_bio = 0;
6044 6044
6045 if (bio->bi_error) { 6045 if (bio->bi_status) {
6046 atomic_inc(&bbio->error); 6046 atomic_inc(&bbio->error);
6047 if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) { 6047 if (bio->bi_status == BLK_STS_IOERR ||
6048 bio->bi_status == BLK_STS_TARGET) {
6048 unsigned int stripe_index = 6049 unsigned int stripe_index =
6049 btrfs_io_bio(bio)->stripe_index; 6050 btrfs_io_bio(bio)->stripe_index;
6050 struct btrfs_device *dev; 6051 struct btrfs_device *dev;
@@ -6082,13 +6083,13 @@ static void btrfs_end_bio(struct bio *bio)
6082 * beyond the tolerance of the btrfs bio 6083 * beyond the tolerance of the btrfs bio
6083 */ 6084 */
6084 if (atomic_read(&bbio->error) > bbio->max_errors) { 6085 if (atomic_read(&bbio->error) > bbio->max_errors) {
6085 bio->bi_error = -EIO; 6086 bio->bi_status = BLK_STS_IOERR;
6086 } else { 6087 } else {
6087 /* 6088 /*
6088 * this bio is actually up to date, we didn't 6089 * this bio is actually up to date, we didn't
6089 * go over the max number of errors 6090 * go over the max number of errors
6090 */ 6091 */
6091 bio->bi_error = 0; 6092 bio->bi_status = 0;
6092 } 6093 }
6093 6094
6094 btrfs_end_bbio(bbio, bio); 6095 btrfs_end_bbio(bbio, bio);
@@ -6199,7 +6200,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6199 6200
6200 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 6201 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6201 bio->bi_iter.bi_sector = logical >> 9; 6202 bio->bi_iter.bi_sector = logical >> 9;
6202 bio->bi_error = -EIO; 6203 bio->bi_status = BLK_STS_IOERR;
6203 btrfs_end_bbio(bbio, bio); 6204 btrfs_end_bbio(bbio, bio);
6204 } 6205 }
6205} 6206}
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..5c2cba8d2387 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
49 49
50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, 51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
52 struct writeback_control *wbc); 52 enum rw_hint hint, struct writeback_control *wbc);
53 53
54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
55 55
@@ -1829,7 +1829,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
1829 do { 1829 do {
1830 struct buffer_head *next = bh->b_this_page; 1830 struct buffer_head *next = bh->b_this_page;
1831 if (buffer_async_write(bh)) { 1831 if (buffer_async_write(bh)) {
1832 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); 1832 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1833 inode->i_write_hint, wbc);
1833 nr_underway++; 1834 nr_underway++;
1834 } 1835 }
1835 bh = next; 1836 bh = next;
@@ -1883,7 +1884,8 @@ recover:
1883 struct buffer_head *next = bh->b_this_page; 1884 struct buffer_head *next = bh->b_this_page;
1884 if (buffer_async_write(bh)) { 1885 if (buffer_async_write(bh)) {
1885 clear_buffer_dirty(bh); 1886 clear_buffer_dirty(bh);
1886 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); 1887 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1888 inode->i_write_hint, wbc);
1887 nr_underway++; 1889 nr_underway++;
1888 } 1890 }
1889 bh = next; 1891 bh = next;
@@ -3038,7 +3040,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
3038 if (unlikely(bio_flagged(bio, BIO_QUIET))) 3040 if (unlikely(bio_flagged(bio, BIO_QUIET)))
3039 set_bit(BH_Quiet, &bh->b_state); 3041 set_bit(BH_Quiet, &bh->b_state);
3040 3042
3041 bh->b_end_io(bh, !bio->bi_error); 3043 bh->b_end_io(bh, !bio->bi_status);
3042 bio_put(bio); 3044 bio_put(bio);
3043} 3045}
3044 3046
@@ -3091,7 +3093,7 @@ void guard_bio_eod(int op, struct bio *bio)
3091} 3093}
3092 3094
3093static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, 3095static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3094 struct writeback_control *wbc) 3096 enum rw_hint write_hint, struct writeback_control *wbc)
3095{ 3097{
3096 struct bio *bio; 3098 struct bio *bio;
3097 3099
@@ -3120,6 +3122,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3120 3122
3121 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 3123 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3122 bio->bi_bdev = bh->b_bdev; 3124 bio->bi_bdev = bh->b_bdev;
3125 bio->bi_write_hint = write_hint;
3123 3126
3124 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 3127 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3125 BUG_ON(bio->bi_iter.bi_size != bh->b_size); 3128 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3145,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3142 3145
3143int submit_bh(int op, int op_flags, struct buffer_head *bh) 3146int submit_bh(int op, int op_flags, struct buffer_head *bh)
3144{ 3147{
3145 return submit_bh_wbc(op, op_flags, bh, NULL); 3148 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3146} 3149}
3147EXPORT_SYMBOL(submit_bh); 3150EXPORT_SYMBOL(submit_bh);
3148 3151
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index a409a84f1bca..6181e9526860 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -129,7 +129,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
129 goto errout; 129 goto errout;
130 } 130 }
131 err = submit_bio_wait(bio); 131 err = submit_bio_wait(bio);
132 if ((err == 0) && bio->bi_error) 132 if (err == 0 && bio->bi_status)
133 err = -EIO; 133 err = -EIO;
134 bio_put(bio); 134 bio_put(bio);
135 if (err) 135 if (err)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a04ebea77de8..08cf27811e5a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -294,7 +294,7 @@ static void dio_aio_complete_work(struct work_struct *work)
294 dio_complete(dio, 0, true); 294 dio_complete(dio, 0, true);
295} 295}
296 296
297static int dio_bio_complete(struct dio *dio, struct bio *bio); 297static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
298 298
299/* 299/*
300 * Asynchronous IO callback. 300 * Asynchronous IO callback.
@@ -348,13 +348,12 @@ static void dio_bio_end_io(struct bio *bio)
348/** 348/**
349 * dio_end_io - handle the end io action for the given bio 349 * dio_end_io - handle the end io action for the given bio
350 * @bio: The direct io bio thats being completed 350 * @bio: The direct io bio thats being completed
351 * @error: Error if there was one
352 * 351 *
353 * This is meant to be called by any filesystem that uses their own dio_submit_t 352 * This is meant to be called by any filesystem that uses their own dio_submit_t
354 * so that the DIO specific endio actions are dealt with after the filesystem 353 * so that the DIO specific endio actions are dealt with after the filesystem
355 * has done it's completion work. 354 * has done it's completion work.
356 */ 355 */
357void dio_end_io(struct bio *bio, int error) 356void dio_end_io(struct bio *bio)
358{ 357{
359 struct dio *dio = bio->bi_private; 358 struct dio *dio = bio->bi_private;
360 359
@@ -386,6 +385,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
386 else 385 else
387 bio->bi_end_io = dio_bio_end_io; 386 bio->bi_end_io = dio_bio_end_io;
388 387
388 bio->bi_write_hint = dio->iocb->ki_hint;
389
389 sdio->bio = bio; 390 sdio->bio = bio;
390 sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; 391 sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
391} 392}
@@ -474,17 +475,20 @@ static struct bio *dio_await_one(struct dio *dio)
474/* 475/*
475 * Process one completed BIO. No locks are held. 476 * Process one completed BIO. No locks are held.
476 */ 477 */
477static int dio_bio_complete(struct dio *dio, struct bio *bio) 478static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
478{ 479{
479 struct bio_vec *bvec; 480 struct bio_vec *bvec;
480 unsigned i; 481 unsigned i;
481 int err; 482 blk_status_t err = bio->bi_status;
482 483
483 if (bio->bi_error) 484 if (err) {
484 dio->io_error = -EIO; 485 if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT))
486 dio->io_error = -EAGAIN;
487 else
488 dio->io_error = -EIO;
489 }
485 490
486 if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) { 491 if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
487 err = bio->bi_error;
488 bio_check_pages_dirty(bio); /* transfers ownership */ 492 bio_check_pages_dirty(bio); /* transfers ownership */
489 } else { 493 } else {
490 bio_for_each_segment_all(bvec, bio, i) { 494 bio_for_each_segment_all(bvec, bio, i) {
@@ -495,7 +499,6 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
495 set_page_dirty_lock(page); 499 set_page_dirty_lock(page);
496 put_page(page); 500 put_page(page);
497 } 501 }
498 err = bio->bi_error;
499 bio_put(bio); 502 bio_put(bio);
500 } 503 }
501 return err; 504 return err;
@@ -539,7 +542,7 @@ static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
539 bio = dio->bio_list; 542 bio = dio->bio_list;
540 dio->bio_list = bio->bi_private; 543 dio->bio_list = bio->bi_private;
541 spin_unlock_irqrestore(&dio->bio_lock, flags); 544 spin_unlock_irqrestore(&dio->bio_lock, flags);
542 ret2 = dio_bio_complete(dio, bio); 545 ret2 = blk_status_to_errno(dio_bio_complete(dio, bio));
543 if (ret == 0) 546 if (ret == 0)
544 ret = ret2; 547 ret = ret2;
545 } 548 }
@@ -1197,6 +1200,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1197 if (iov_iter_rw(iter) == WRITE) { 1200 if (iov_iter_rw(iter) == WRITE) {
1198 dio->op = REQ_OP_WRITE; 1201 dio->op = REQ_OP_WRITE;
1199 dio->op_flags = REQ_SYNC | REQ_IDLE; 1202 dio->op_flags = REQ_SYNC | REQ_IDLE;
1203 if (iocb->ki_flags & IOCB_NOWAIT)
1204 dio->op_flags |= REQ_NOWAIT;
1200 } else { 1205 } else {
1201 dio->op = REQ_OP_READ; 1206 dio->op = REQ_OP_READ;
1202 } 1207 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 02ce7e7bbdf5..58e2eeaa0bc4 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -37,7 +37,11 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
37 struct inode *inode = file_inode(iocb->ki_filp); 37 struct inode *inode = file_inode(iocb->ki_filp);
38 ssize_t ret; 38 ssize_t ret;
39 39
40 inode_lock_shared(inode); 40 if (!inode_trylock_shared(inode)) {
41 if (iocb->ki_flags & IOCB_NOWAIT)
42 return -EAGAIN;
43 inode_lock_shared(inode);
44 }
41 /* 45 /*
42 * Recheck under inode lock - at this point we are sure it cannot 46 * Recheck under inode lock - at this point we are sure it cannot
43 * change anymore 47 * change anymore
@@ -179,7 +183,11 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
179 struct inode *inode = file_inode(iocb->ki_filp); 183 struct inode *inode = file_inode(iocb->ki_filp);
180 ssize_t ret; 184 ssize_t ret;
181 185
182 inode_lock(inode); 186 if (!inode_trylock(inode)) {
187 if (iocb->ki_flags & IOCB_NOWAIT)
188 return -EAGAIN;
189 inode_lock(inode);
190 }
183 ret = ext4_write_checks(iocb, from); 191 ret = ext4_write_checks(iocb, from);
184 if (ret <= 0) 192 if (ret <= 0)
185 goto out; 193 goto out;
@@ -216,7 +224,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
216 return ext4_dax_write_iter(iocb, from); 224 return ext4_dax_write_iter(iocb, from);
217#endif 225#endif
218 226
219 inode_lock(inode); 227 if (!inode_trylock(inode)) {
228 if (iocb->ki_flags & IOCB_NOWAIT)
229 return -EAGAIN;
230 inode_lock(inode);
231 }
232
220 ret = ext4_write_checks(iocb, from); 233 ret = ext4_write_checks(iocb, from);
221 if (ret <= 0) 234 if (ret <= 0)
222 goto out; 235 goto out;
@@ -235,9 +248,15 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
235 248
236 iocb->private = &overwrite; 249 iocb->private = &overwrite;
237 /* Check whether we do a DIO overwrite or not */ 250 /* Check whether we do a DIO overwrite or not */
238 if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio && 251 if (o_direct && !unaligned_aio) {
239 ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) 252 if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
240 overwrite = 1; 253 if (ext4_should_dioread_nolock(inode))
254 overwrite = 1;
255 } else if (iocb->ki_flags & IOCB_NOWAIT) {
256 ret = -EAGAIN;
257 goto out;
258 }
259 }
241 260
242 ret = __generic_file_write_iter(iocb, from); 261 ret = __generic_file_write_iter(iocb, from);
243 inode_unlock(inode); 262 inode_unlock(inode);
@@ -435,6 +454,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
435 if (ret < 0) 454 if (ret < 0)
436 return ret; 455 return ret;
437 } 456 }
457
458 /* Set the flags to support nowait AIO */
459 filp->f_mode |= FMODE_AIO_NOWAIT;
460
438 return dquot_file_open(inode, filp); 461 return dquot_file_open(inode, filp);
439} 462}
440 463
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a82138ba739..c2fce4478cca 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -85,7 +85,7 @@ static void ext4_finish_bio(struct bio *bio)
85 } 85 }
86#endif 86#endif
87 87
88 if (bio->bi_error) { 88 if (bio->bi_status) {
89 SetPageError(page); 89 SetPageError(page);
90 mapping_set_error(page->mapping, -EIO); 90 mapping_set_error(page->mapping, -EIO);
91 } 91 }
@@ -104,7 +104,7 @@ static void ext4_finish_bio(struct bio *bio)
104 continue; 104 continue;
105 } 105 }
106 clear_buffer_async_write(bh); 106 clear_buffer_async_write(bh);
107 if (bio->bi_error) 107 if (bio->bi_status)
108 buffer_io_error(bh); 108 buffer_io_error(bh);
109 } while ((bh = bh->b_this_page) != head); 109 } while ((bh = bh->b_this_page) != head);
110 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); 110 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
@@ -303,24 +303,25 @@ static void ext4_end_bio(struct bio *bio)
303 bdevname(bio->bi_bdev, b), 303 bdevname(bio->bi_bdev, b),
304 (long long) bio->bi_iter.bi_sector, 304 (long long) bio->bi_iter.bi_sector,
305 (unsigned) bio_sectors(bio), 305 (unsigned) bio_sectors(bio),
306 bio->bi_error)) { 306 bio->bi_status)) {
307 ext4_finish_bio(bio); 307 ext4_finish_bio(bio);
308 bio_put(bio); 308 bio_put(bio);
309 return; 309 return;
310 } 310 }
311 bio->bi_end_io = NULL; 311 bio->bi_end_io = NULL;
312 312
313 if (bio->bi_error) { 313 if (bio->bi_status) {
314 struct inode *inode = io_end->inode; 314 struct inode *inode = io_end->inode;
315 315
316 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " 316 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
317 "(offset %llu size %ld starting block %llu)", 317 "(offset %llu size %ld starting block %llu)",
318 bio->bi_error, inode->i_ino, 318 bio->bi_status, inode->i_ino,
319 (unsigned long long) io_end->offset, 319 (unsigned long long) io_end->offset,
320 (long) io_end->size, 320 (long) io_end->size,
321 (unsigned long long) 321 (unsigned long long)
322 bi_sector >> (inode->i_blkbits - 9)); 322 bi_sector >> (inode->i_blkbits - 9));
323 mapping_set_error(inode->i_mapping, bio->bi_error); 323 mapping_set_error(inode->i_mapping,
324 blk_status_to_errno(bio->bi_status));
324 } 325 }
325 326
326 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 327 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
@@ -349,6 +350,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
349 if (bio) { 350 if (bio) {
350 int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? 351 int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
351 REQ_SYNC : 0; 352 REQ_SYNC : 0;
353 io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
352 bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags); 354 bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
353 submit_bio(io->io_bio); 355 submit_bio(io->io_bio);
354 } 356 }
@@ -396,6 +398,7 @@ submit_and_retry:
396 ret = io_submit_init_bio(io, bh); 398 ret = io_submit_init_bio(io, bh);
397 if (ret) 399 if (ret)
398 return ret; 400 return ret;
401 io->io_bio->bi_write_hint = inode->i_write_hint;
399 } 402 }
400 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); 403 ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
401 if (ret != bh->b_size) 404 if (ret != bh->b_size)
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index a81b829d56de..40a5497b0f60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -73,7 +73,7 @@ static void mpage_end_io(struct bio *bio)
73 int i; 73 int i;
74 74
75 if (ext4_bio_encrypted(bio)) { 75 if (ext4_bio_encrypted(bio)) {
76 if (bio->bi_error) { 76 if (bio->bi_status) {
77 fscrypt_release_ctx(bio->bi_private); 77 fscrypt_release_ctx(bio->bi_private);
78 } else { 78 } else {
79 fscrypt_decrypt_bio_pages(bio->bi_private, bio); 79 fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -83,7 +83,7 @@ static void mpage_end_io(struct bio *bio)
83 bio_for_each_segment_all(bv, bio, i) { 83 bio_for_each_segment_all(bv, bio, i) {
84 struct page *page = bv->bv_page; 84 struct page *page = bv->bv_page;
85 85
86 if (!bio->bi_error) { 86 if (!bio->bi_status) {
87 SetPageUptodate(page); 87 SetPageUptodate(page);
88 } else { 88 } else {
89 ClearPageUptodate(page); 89 ClearPageUptodate(page);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7c0f6bdf817d..36fe82012a33 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -58,12 +58,12 @@ static void f2fs_read_end_io(struct bio *bio)
58#ifdef CONFIG_F2FS_FAULT_INJECTION 58#ifdef CONFIG_F2FS_FAULT_INJECTION
59 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { 59 if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
60 f2fs_show_injection_info(FAULT_IO); 60 f2fs_show_injection_info(FAULT_IO);
61 bio->bi_error = -EIO; 61 bio->bi_status = BLK_STS_IOERR;
62 } 62 }
63#endif 63#endif
64 64
65 if (f2fs_bio_encrypted(bio)) { 65 if (f2fs_bio_encrypted(bio)) {
66 if (bio->bi_error) { 66 if (bio->bi_status) {
67 fscrypt_release_ctx(bio->bi_private); 67 fscrypt_release_ctx(bio->bi_private);
68 } else { 68 } else {
69 fscrypt_decrypt_bio_pages(bio->bi_private, bio); 69 fscrypt_decrypt_bio_pages(bio->bi_private, bio);
@@ -74,7 +74,7 @@ static void f2fs_read_end_io(struct bio *bio)
74 bio_for_each_segment_all(bvec, bio, i) { 74 bio_for_each_segment_all(bvec, bio, i) {
75 struct page *page = bvec->bv_page; 75 struct page *page = bvec->bv_page;
76 76
77 if (!bio->bi_error) { 77 if (!bio->bi_status) {
78 if (!PageUptodate(page)) 78 if (!PageUptodate(page))
79 SetPageUptodate(page); 79 SetPageUptodate(page);
80 } else { 80 } else {
@@ -102,14 +102,14 @@ static void f2fs_write_end_io(struct bio *bio)
102 unlock_page(page); 102 unlock_page(page);
103 mempool_free(page, sbi->write_io_dummy); 103 mempool_free(page, sbi->write_io_dummy);
104 104
105 if (unlikely(bio->bi_error)) 105 if (unlikely(bio->bi_status))
106 f2fs_stop_checkpoint(sbi, true); 106 f2fs_stop_checkpoint(sbi, true);
107 continue; 107 continue;
108 } 108 }
109 109
110 fscrypt_pullback_bio_page(&page, true); 110 fscrypt_pullback_bio_page(&page, true);
111 111
112 if (unlikely(bio->bi_error)) { 112 if (unlikely(bio->bi_status)) {
113 mapping_set_error(page->mapping, -EIO); 113 mapping_set_error(page->mapping, -EIO);
114 f2fs_stop_checkpoint(sbi, true); 114 f2fs_stop_checkpoint(sbi, true);
115 } 115 }
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 96845854e7ee..ea9f455d94ba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -749,7 +749,7 @@ static void f2fs_submit_discard_endio(struct bio *bio)
749{ 749{
750 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; 750 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
751 751
752 dc->error = bio->bi_error; 752 dc->error = blk_status_to_errno(bio->bi_status);
753 dc->state = D_DONE; 753 dc->state = D_DONE;
754 complete(&dc->wait); 754 complete(&dc->wait);
755 bio_put(bio); 755 bio_put(bio);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..ed051f825bad 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -243,6 +243,67 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
243} 243}
244#endif 244#endif
245 245
246static bool rw_hint_valid(enum rw_hint hint)
247{
248 switch (hint) {
249 case RWF_WRITE_LIFE_NOT_SET:
250 case RWH_WRITE_LIFE_NONE:
251 case RWH_WRITE_LIFE_SHORT:
252 case RWH_WRITE_LIFE_MEDIUM:
253 case RWH_WRITE_LIFE_LONG:
254 case RWH_WRITE_LIFE_EXTREME:
255 return true;
256 default:
257 return false;
258 }
259}
260
261static long fcntl_rw_hint(struct file *file, unsigned int cmd,
262 unsigned long arg)
263{
264 struct inode *inode = file_inode(file);
265 u64 *argp = (u64 __user *)arg;
266 enum rw_hint hint;
267 u64 h;
268
269 switch (cmd) {
270 case F_GET_FILE_RW_HINT:
271 h = file_write_hint(file);
272 if (copy_to_user(argp, &h, sizeof(*argp)))
273 return -EFAULT;
274 return 0;
275 case F_SET_FILE_RW_HINT:
276 if (copy_from_user(&h, argp, sizeof(h)))
277 return -EFAULT;
278 hint = (enum rw_hint) h;
279 if (!rw_hint_valid(hint))
280 return -EINVAL;
281
282 spin_lock(&file->f_lock);
283 file->f_write_hint = hint;
284 spin_unlock(&file->f_lock);
285 return 0;
286 case F_GET_RW_HINT:
287 h = inode->i_write_hint;
288 if (copy_to_user(argp, &h, sizeof(*argp)))
289 return -EFAULT;
290 return 0;
291 case F_SET_RW_HINT:
292 if (copy_from_user(&h, argp, sizeof(h)))
293 return -EFAULT;
294 hint = (enum rw_hint) h;
295 if (!rw_hint_valid(hint))
296 return -EINVAL;
297
298 inode_lock(inode);
299 inode->i_write_hint = hint;
300 inode_unlock(inode);
301 return 0;
302 default:
303 return -EINVAL;
304 }
305}
306
246static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, 307static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
247 struct file *filp) 308 struct file *filp)
248{ 309{
@@ -337,6 +398,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
337 case F_GET_SEALS: 398 case F_GET_SEALS:
338 err = shmem_fcntl(filp, cmd, arg); 399 err = shmem_fcntl(filp, cmd, arg);
339 break; 400 break;
401 case F_GET_RW_HINT:
402 case F_SET_RW_HINT:
403 case F_GET_FILE_RW_HINT:
404 case F_SET_FILE_RW_HINT:
405 err = fcntl_rw_hint(filp, cmd, arg);
406 break;
340 default: 407 default:
341 break; 408 break;
342 } 409 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d13561..aa3d44527fa2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -815,7 +815,6 @@ struct gfs2_sbd {
815 atomic_t sd_log_in_flight; 815 atomic_t sd_log_in_flight;
816 struct bio *sd_log_bio; 816 struct bio *sd_log_bio;
817 wait_queue_head_t sd_log_flush_wait; 817 wait_queue_head_t sd_log_flush_wait;
818 int sd_log_error;
819 818
820 atomic_t sd_reserving_log; 819 atomic_t sd_reserving_log;
821 wait_queue_head_t sd_reserving_log_wait; 820 wait_queue_head_t sd_reserving_log_wait;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index b1f9144b42c7..885d36e7a29f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,7 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
170 */ 170 */
171 171
172static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, 172static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
173 int error) 173 blk_status_t error)
174{ 174{
175 struct buffer_head *bh, *next; 175 struct buffer_head *bh, *next;
176 struct page *page = bvec->bv_page; 176 struct page *page = bvec->bv_page;
@@ -209,15 +209,13 @@ static void gfs2_end_log_write(struct bio *bio)
209 struct page *page; 209 struct page *page;
210 int i; 210 int i;
211 211
212 if (bio->bi_error) { 212 if (bio->bi_status)
213 sdp->sd_log_error = bio->bi_error; 213 fs_err(sdp, "Error %d writing to log\n", bio->bi_status);
214 fs_err(sdp, "Error %d writing to log\n", bio->bi_error);
215 }
216 214
217 bio_for_each_segment_all(bvec, bio, i) { 215 bio_for_each_segment_all(bvec, bio, i) {
218 page = bvec->bv_page; 216 page = bvec->bv_page;
219 if (page_has_buffers(page)) 217 if (page_has_buffers(page))
220 gfs2_end_log_write_bh(sdp, bvec, bio->bi_error); 218 gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
221 else 219 else
222 mempool_free(page, gfs2_page_pool); 220 mempool_free(page, gfs2_page_pool);
223 } 221 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 663ffc135ef3..fabe1614f879 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -201,7 +201,7 @@ static void gfs2_meta_read_endio(struct bio *bio)
201 do { 201 do {
202 struct buffer_head *next = bh->b_this_page; 202 struct buffer_head *next = bh->b_this_page;
203 len -= bh->b_size; 203 len -= bh->b_size;
204 bh->b_end_io(bh, !bio->bi_error); 204 bh->b_end_io(bh, !bio->bi_status);
205 bh = next; 205 bh = next;
206 } while (bh && len); 206 } while (bh && len);
207 } 207 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b92135c202c2..e76058d34b74 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -176,10 +176,10 @@ static void end_bio_io_page(struct bio *bio)
176{ 176{
177 struct page *page = bio->bi_private; 177 struct page *page = bio->bi_private;
178 178
179 if (!bio->bi_error) 179 if (!bio->bi_status)
180 SetPageUptodate(page); 180 SetPageUptodate(page);
181 else 181 else
182 pr_warn("error %d reading superblock\n", bio->bi_error); 182 pr_warn("error %d reading superblock\n", bio->bi_status);
183 unlock_page(page); 183 unlock_page(page);
184} 184}
185 185
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..f0e5fc77e6a4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
146 i_gid_write(inode, 0); 146 i_gid_write(inode, 0);
147 atomic_set(&inode->i_writecount, 0); 147 atomic_set(&inode->i_writecount, 0);
148 inode->i_size = 0; 148 inode->i_size = 0;
149 inode->i_write_hint = WRITE_LIFE_NOT_SET;
149 inode->i_blocks = 0; 150 inode->i_blocks = 0;
150 inode->i_bytes = 0; 151 inode->i_bytes = 0;
151 inode->i_generation = 0; 152 inode->i_generation = 0;
diff --git a/fs/iomap.c b/fs/iomap.c
index 4b10892967a5..fa6cd5b3f578 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -672,8 +672,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
672 struct iomap_dio *dio = bio->bi_private; 672 struct iomap_dio *dio = bio->bi_private;
673 bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY); 673 bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
674 674
675 if (bio->bi_error) 675 if (bio->bi_status)
676 iomap_dio_set_error(dio, bio->bi_error); 676 iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
677 677
678 if (atomic_dec_and_test(&dio->ref)) { 678 if (atomic_dec_and_test(&dio->ref)) {
679 if (is_sync_kiocb(dio->iocb)) { 679 if (is_sync_kiocb(dio->iocb)) {
@@ -793,6 +793,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
793 bio->bi_bdev = iomap->bdev; 793 bio->bi_bdev = iomap->bdev;
794 bio->bi_iter.bi_sector = 794 bio->bi_iter.bi_sector =
795 iomap->blkno + ((pos - iomap->offset) >> 9); 795 iomap->blkno + ((pos - iomap->offset) >> 9);
796 bio->bi_write_hint = dio->iocb->ki_hint;
796 bio->bi_private = dio; 797 bio->bi_private = dio;
797 bio->bi_end_io = iomap_dio_bio_end_io; 798 bio->bi_end_io = iomap_dio_bio_end_io;
798 799
@@ -881,6 +882,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
881 flags |= IOMAP_WRITE; 882 flags |= IOMAP_WRITE;
882 } 883 }
883 884
885 if (iocb->ki_flags & IOCB_NOWAIT) {
886 if (filemap_range_has_page(mapping, start, end)) {
887 ret = -EAGAIN;
888 goto out_free_dio;
889 }
890 flags |= IOMAP_NOWAIT;
891 }
892
884 ret = filemap_write_and_wait_range(mapping, start, end); 893 ret = filemap_write_and_wait_range(mapping, start, end);
885 if (ret) 894 if (ret)
886 goto out_free_dio; 895 goto out_free_dio;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index bb1da1feafeb..a21f0e9eecd4 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2205,7 +2205,7 @@ static void lbmIODone(struct bio *bio)
2205 2205
2206 bp->l_flag |= lbmDONE; 2206 bp->l_flag |= lbmDONE;
2207 2207
2208 if (bio->bi_error) { 2208 if (bio->bi_status) {
2209 bp->l_flag |= lbmERROR; 2209 bp->l_flag |= lbmERROR;
2210 2210
2211 jfs_err("lbmIODone: I/O error in JFS log"); 2211 jfs_err("lbmIODone: I/O error in JFS log");
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 489aaa1403e5..ce93db3aef3c 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -280,7 +280,7 @@ static void metapage_read_end_io(struct bio *bio)
280{ 280{
281 struct page *page = bio->bi_private; 281 struct page *page = bio->bi_private;
282 282
283 if (bio->bi_error) { 283 if (bio->bi_status) {
284 printk(KERN_ERR "metapage_read_end_io: I/O error\n"); 284 printk(KERN_ERR "metapage_read_end_io: I/O error\n");
285 SetPageError(page); 285 SetPageError(page);
286 } 286 }
@@ -337,7 +337,7 @@ static void metapage_write_end_io(struct bio *bio)
337 337
338 BUG_ON(!PagePrivate(page)); 338 BUG_ON(!PagePrivate(page));
339 339
340 if (bio->bi_error) { 340 if (bio->bi_status) {
341 printk(KERN_ERR "metapage_write_end_io: I/O error\n"); 341 printk(KERN_ERR "metapage_write_end_io: I/O error\n");
342 SetPageError(page); 342 SetPageError(page);
343 } 343 }
diff --git a/fs/mpage.c b/fs/mpage.c
index baff8f820c29..d6d1486d6f99 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -50,7 +50,8 @@ static void mpage_end_io(struct bio *bio)
50 50
51 bio_for_each_segment_all(bv, bio, i) { 51 bio_for_each_segment_all(bv, bio, i) {
52 struct page *page = bv->bv_page; 52 struct page *page = bv->bv_page;
53 page_endio(page, op_is_write(bio_op(bio)), bio->bi_error); 53 page_endio(page, op_is_write(bio_op(bio)),
54 blk_status_to_errno(bio->bi_status));
54 } 55 }
55 56
56 bio_put(bio); 57 bio_put(bio);
@@ -614,6 +615,7 @@ alloc_new:
614 goto confused; 615 goto confused;
615 616
616 wbc_init_bio(wbc, bio); 617 wbc_init_bio(wbc, bio);
618 bio->bi_write_hint = inode->i_write_hint;
617 } 619 }
618 620
619 /* 621 /*
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0ca370d23ddb..d8863a804b15 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -188,7 +188,7 @@ static void bl_end_io_read(struct bio *bio)
188{ 188{
189 struct parallel_io *par = bio->bi_private; 189 struct parallel_io *par = bio->bi_private;
190 190
191 if (bio->bi_error) { 191 if (bio->bi_status) {
192 struct nfs_pgio_header *header = par->data; 192 struct nfs_pgio_header *header = par->data;
193 193
194 if (!header->pnfs_error) 194 if (!header->pnfs_error)
@@ -319,7 +319,7 @@ static void bl_end_io_write(struct bio *bio)
319 struct parallel_io *par = bio->bi_private; 319 struct parallel_io *par = bio->bi_private;
320 struct nfs_pgio_header *header = par->data; 320 struct nfs_pgio_header *header = par->data;
321 321
322 if (bio->bi_error) { 322 if (bio->bi_status) {
323 if (!header->pnfs_error) 323 if (!header->pnfs_error)
324 header->pnfs_error = -EIO; 324 header->pnfs_error = -EIO;
325 pnfs_set_lo_fail(header->lseg); 325 pnfs_set_lo_fail(header->lseg);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index fb5213afc854..c862c2489df0 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -219,6 +219,9 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
219 u8 *buf, *d, type, assoc; 219 u8 *buf, *d, type, assoc;
220 int error; 220 int error;
221 221
222 if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
223 return -EINVAL;
224
222 buf = kzalloc(bufflen, GFP_KERNEL); 225 buf = kzalloc(bufflen, GFP_KERNEL);
223 if (!buf) 226 if (!buf)
224 return -ENOMEM; 227 return -ENOMEM;
@@ -229,7 +232,6 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
229 goto out_free_buf; 232 goto out_free_buf;
230 } 233 }
231 req = scsi_req(rq); 234 req = scsi_req(rq);
232 scsi_req_init(rq);
233 235
234 error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); 236 error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
235 if (error) 237 if (error)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 6f87b2ac1aeb..e73c86d9855c 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -338,7 +338,7 @@ static void nilfs_end_bio_write(struct bio *bio)
338{ 338{
339 struct nilfs_segment_buffer *segbuf = bio->bi_private; 339 struct nilfs_segment_buffer *segbuf = bio->bi_private;
340 340
341 if (bio->bi_error) 341 if (bio->bi_status)
342 atomic_inc(&segbuf->sb_err); 342 atomic_inc(&segbuf->sb_err);
343 343
344 bio_put(bio); 344 bio_put(bio);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0da0332725aa..ffe003982d95 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -516,9 +516,9 @@ static void o2hb_bio_end_io(struct bio *bio)
516{ 516{
517 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 517 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
518 518
519 if (bio->bi_error) { 519 if (bio->bi_status) {
520 mlog(ML_ERROR, "IO Error %d\n", bio->bi_error); 520 mlog(ML_ERROR, "IO Error %d\n", bio->bi_status);
521 wc->wc_error = bio->bi_error; 521 wc->wc_error = blk_status_to_errno(bio->bi_status);
522 } 522 }
523 523
524 o2hb_bio_wait_dec(wc, 1); 524 o2hb_bio_wait_dec(wc, 1);
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..3fe0c4aa7d27 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@ static int do_dentry_open(struct file *f,
759 likely(f->f_op->write || f->f_op->write_iter)) 759 likely(f->f_op->write || f->f_op->write_iter))
760 f->f_mode |= FMODE_CAN_WRITE; 760 f->f_mode |= FMODE_CAN_WRITE;
761 761
762 f->f_write_hint = WRITE_LIFE_NOT_SET;
762 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 763 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
763 764
764 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 765 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/fs/read_write.c b/fs/read_write.c
index 19d4d88fa285..d591eeed061f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -678,16 +678,10 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
678 struct kiocb kiocb; 678 struct kiocb kiocb;
679 ssize_t ret; 679 ssize_t ret;
680 680
681 if (flags & ~(RWF_HIPRI | RWF_DSYNC | RWF_SYNC))
682 return -EOPNOTSUPP;
683
684 init_sync_kiocb(&kiocb, filp); 681 init_sync_kiocb(&kiocb, filp);
685 if (flags & RWF_HIPRI) 682 ret = kiocb_set_rw_flags(&kiocb, flags);
686 kiocb.ki_flags |= IOCB_HIPRI; 683 if (ret)
687 if (flags & RWF_DSYNC) 684 return ret;
688 kiocb.ki_flags |= IOCB_DSYNC;
689 if (flags & RWF_SYNC)
690 kiocb.ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
691 kiocb.ki_pos = *ppos; 685 kiocb.ki_pos = *ppos;
692 686
693 if (type == READ) 687 if (type == READ)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3b91faacc1ba..d20c29b9c95b 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -276,7 +276,7 @@ xfs_end_io(
276 struct xfs_inode *ip = XFS_I(ioend->io_inode); 276 struct xfs_inode *ip = XFS_I(ioend->io_inode);
277 xfs_off_t offset = ioend->io_offset; 277 xfs_off_t offset = ioend->io_offset;
278 size_t size = ioend->io_size; 278 size_t size = ioend->io_size;
279 int error = ioend->io_bio->bi_error; 279 int error;
280 280
281 /* 281 /*
282 * Just clean up the in-memory strutures if the fs has been shut down. 282 * Just clean up the in-memory strutures if the fs has been shut down.
@@ -289,6 +289,7 @@ xfs_end_io(
289 /* 289 /*
290 * Clean up any COW blocks on an I/O error. 290 * Clean up any COW blocks on an I/O error.
291 */ 291 */
292 error = blk_status_to_errno(ioend->io_bio->bi_status);
292 if (unlikely(error)) { 293 if (unlikely(error)) {
293 switch (ioend->io_type) { 294 switch (ioend->io_type) {
294 case XFS_IO_COW: 295 case XFS_IO_COW:
@@ -332,7 +333,7 @@ xfs_end_bio(
332 else if (ioend->io_append_trans) 333 else if (ioend->io_append_trans)
333 queue_work(mp->m_data_workqueue, &ioend->io_work); 334 queue_work(mp->m_data_workqueue, &ioend->io_work);
334 else 335 else
335 xfs_destroy_ioend(ioend, bio->bi_error); 336 xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
336} 337}
337 338
338STATIC int 339STATIC int
@@ -500,11 +501,12 @@ xfs_submit_ioend(
500 * time. 501 * time.
501 */ 502 */
502 if (status) { 503 if (status) {
503 ioend->io_bio->bi_error = status; 504 ioend->io_bio->bi_status = errno_to_blk_status(status);
504 bio_endio(ioend->io_bio); 505 bio_endio(ioend->io_bio);
505 return status; 506 return status;
506 } 507 }
507 508
509 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
508 submit_bio(ioend->io_bio); 510 submit_bio(ioend->io_bio);
509 return 0; 511 return 0;
510} 512}
@@ -564,6 +566,7 @@ xfs_chain_bio(
564 bio_chain(ioend->io_bio, new); 566 bio_chain(ioend->io_bio, new);
565 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 567 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
566 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 568 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
569 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
567 submit_bio(ioend->io_bio); 570 submit_bio(ioend->io_bio);
568 ioend->io_bio = new; 571 ioend->io_bio = new;
569} 572}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 16d6a578fc16..438505f395e7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1227,8 +1227,11 @@ xfs_buf_bio_end_io(
1227 * don't overwrite existing errors - otherwise we can lose errors on 1227 * don't overwrite existing errors - otherwise we can lose errors on
1228 * buffers that require multiple bios to complete. 1228 * buffers that require multiple bios to complete.
1229 */ 1229 */
1230 if (bio->bi_error) 1230 if (bio->bi_status) {
1231 cmpxchg(&bp->b_io_error, 0, bio->bi_error); 1231 int error = blk_status_to_errno(bio->bi_status);
1232
1233 cmpxchg(&bp->b_io_error, 0, error);
1234 }
1232 1235
1233 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) 1236 if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1234 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); 1237 invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5fb5a0958a14..17f27a2fb5e2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,7 +237,11 @@ xfs_file_dax_read(
237 if (!count) 237 if (!count)
238 return 0; /* skip atime */ 238 return 0; /* skip atime */
239 239
240 xfs_ilock(ip, XFS_IOLOCK_SHARED); 240 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
241 if (iocb->ki_flags & IOCB_NOWAIT)
242 return -EAGAIN;
243 xfs_ilock(ip, XFS_IOLOCK_SHARED);
244 }
241 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); 245 ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
242 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 246 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
243 247
@@ -541,7 +545,11 @@ xfs_file_dio_aio_write(
541 iolock = XFS_IOLOCK_SHARED; 545 iolock = XFS_IOLOCK_SHARED;
542 } 546 }
543 547
544 xfs_ilock(ip, iolock); 548 if (!xfs_ilock_nowait(ip, iolock)) {
549 if (iocb->ki_flags & IOCB_NOWAIT)
550 return -EAGAIN;
551 xfs_ilock(ip, iolock);
552 }
545 553
546 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 554 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
547 if (ret) 555 if (ret)
@@ -553,9 +561,15 @@ xfs_file_dio_aio_write(
553 * otherwise demote the lock if we had to take the exclusive lock 561 * otherwise demote the lock if we had to take the exclusive lock
554 * for other reasons in xfs_file_aio_write_checks. 562 * for other reasons in xfs_file_aio_write_checks.
555 */ 563 */
556 if (unaligned_io) 564 if (unaligned_io) {
557 inode_dio_wait(inode); 565 /* If we are going to wait for other DIO to finish, bail */
558 else if (iolock == XFS_IOLOCK_EXCL) { 566 if (iocb->ki_flags & IOCB_NOWAIT) {
567 if (atomic_read(&inode->i_dio_count))
568 return -EAGAIN;
569 } else {
570 inode_dio_wait(inode);
571 }
572 } else if (iolock == XFS_IOLOCK_EXCL) {
559 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 573 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
560 iolock = XFS_IOLOCK_SHARED; 574 iolock = XFS_IOLOCK_SHARED;
561 } 575 }
@@ -585,7 +599,12 @@ xfs_file_dax_write(
585 size_t count; 599 size_t count;
586 loff_t pos; 600 loff_t pos;
587 601
588 xfs_ilock(ip, iolock); 602 if (!xfs_ilock_nowait(ip, iolock)) {
603 if (iocb->ki_flags & IOCB_NOWAIT)
604 return -EAGAIN;
605 xfs_ilock(ip, iolock);
606 }
607
589 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 608 ret = xfs_file_aio_write_checks(iocb, from, &iolock);
590 if (ret) 609 if (ret)
591 goto out; 610 goto out;
@@ -892,6 +911,7 @@ xfs_file_open(
892 return -EFBIG; 911 return -EFBIG;
893 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 912 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
894 return -EIO; 913 return -EIO;
914 file->f_mode |= FMODE_AIO_NOWAIT;
895 return 0; 915 return 0;
896} 916}
897 917
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 94e5bdf7304c..05dc87e8c1f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -995,6 +995,11 @@ xfs_file_iomap_begin(
995 lockmode = xfs_ilock_data_map_shared(ip); 995 lockmode = xfs_ilock_data_map_shared(ip);
996 } 996 }
997 997
998 if ((flags & IOMAP_NOWAIT) && !(ip->i_df.if_flags & XFS_IFEXTENTS)) {
999 error = -EAGAIN;
1000 goto out_unlock;
1001 }
1002
998 ASSERT(offset <= mp->m_super->s_maxbytes); 1003 ASSERT(offset <= mp->m_super->s_maxbytes);
999 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) 1004 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
1000 length = mp->m_super->s_maxbytes - offset; 1005 length = mp->m_super->s_maxbytes - offset;
@@ -1016,6 +1021,15 @@ xfs_file_iomap_begin(
1016 1021
1017 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { 1022 if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
1018 if (flags & IOMAP_DIRECT) { 1023 if (flags & IOMAP_DIRECT) {
1024 /*
1025 * A reflinked inode will result in CoW alloc.
1026 * FIXME: It could still overwrite on unshared extents
1027 * and not need allocation.
1028 */
1029 if (flags & IOMAP_NOWAIT) {
1030 error = -EAGAIN;
1031 goto out_unlock;
1032 }
1019 /* may drop and re-acquire the ilock */ 1033 /* may drop and re-acquire the ilock */
1020 error = xfs_reflink_allocate_cow(ip, &imap, &shared, 1034 error = xfs_reflink_allocate_cow(ip, &imap, &shared,
1021 &lockmode); 1035 &lockmode);
@@ -1033,6 +1047,14 @@ xfs_file_iomap_begin(
1033 1047
1034 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { 1048 if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
1035 /* 1049 /*
1050 * If nowait is set bail since we are going to make
1051 * allocations.
1052 */
1053 if (flags & IOMAP_NOWAIT) {
1054 error = -EAGAIN;
1055 goto out_unlock;
1056 }
1057 /*
1036 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES 1058 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
1037 * pages to keep the chunks of work done where somewhat symmetric 1059 * pages to keep the chunks of work done where somewhat symmetric
1038 * with the work writeback does. This is a completely arbitrary 1060 * with the work writeback does. This is a completely arbitrary
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 455a575f101d..97df4db13b2e 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1766,7 +1766,8 @@ STATIC int __init
1766xfs_init_zones(void) 1766xfs_init_zones(void)
1767{ 1767{
1768 xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE, 1768 xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
1769 offsetof(struct xfs_ioend, io_inline_bio)); 1769 offsetof(struct xfs_ioend, io_inline_bio),
1770 BIOSET_NEED_BVECS);
1770 if (!xfs_ioend_bioset) 1771 if (!xfs_ioend_bioset)
1771 goto out; 1772 goto out;
1772 1773
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a7e29fa0981f..664a27da276d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -118,7 +118,6 @@ static inline void *bio_data(struct bio *bio)
118/* 118/*
119 * will die 119 * will die
120 */ 120 */
121#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
122#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) 121#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
123 122
124/* 123/*
@@ -373,8 +372,11 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
373 return bio_split(bio, sectors, gfp, bs); 372 return bio_split(bio, sectors, gfp, bs);
374} 373}
375 374
376extern struct bio_set *bioset_create(unsigned int, unsigned int); 375extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
377extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); 376enum {
377 BIOSET_NEED_BVECS = BIT(0),
378 BIOSET_NEED_RESCUER = BIT(1),
379};
378extern void bioset_free(struct bio_set *); 380extern void bioset_free(struct bio_set *);
379extern mempool_t *biovec_create_pool(int pool_entries); 381extern mempool_t *biovec_create_pool(int pool_entries);
380 382
@@ -392,11 +394,6 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
392 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); 394 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
393} 395}
394 396
395static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
396{
397 return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
398}
399
400static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) 397static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
401{ 398{
402 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); 399 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
@@ -414,7 +411,13 @@ extern void bio_endio(struct bio *);
414 411
415static inline void bio_io_error(struct bio *bio) 412static inline void bio_io_error(struct bio *bio)
416{ 413{
417 bio->bi_error = -EIO; 414 bio->bi_status = BLK_STS_IOERR;
415 bio_endio(bio);
416}
417
418static inline void bio_wouldblock_error(struct bio *bio)
419{
420 bio->bi_status = BLK_STS_AGAIN;
418 bio_endio(bio); 421 bio_endio(bio);
419} 422}
420 423
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd641032f8d..23d32ff0b462 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,8 +39,6 @@ struct blk_mq_hw_ctx {
39 struct blk_mq_tags *tags; 39 struct blk_mq_tags *tags;
40 struct blk_mq_tags *sched_tags; 40 struct blk_mq_tags *sched_tags;
41 41
42 struct srcu_struct queue_rq_srcu;
43
44 unsigned long queued; 42 unsigned long queued;
45 unsigned long run; 43 unsigned long run;
46#define BLK_MQ_MAX_DISPATCH_ORDER 7 44#define BLK_MQ_MAX_DISPATCH_ORDER 7
@@ -62,6 +60,9 @@ struct blk_mq_hw_ctx {
62 struct dentry *debugfs_dir; 60 struct dentry *debugfs_dir;
63 struct dentry *sched_debugfs_dir; 61 struct dentry *sched_debugfs_dir;
64#endif 62#endif
63
64 /* Must be the last member - see also blk_mq_hw_ctx_size(). */
65 struct srcu_struct queue_rq_srcu[0];
65}; 66};
66 67
67struct blk_mq_tag_set { 68struct blk_mq_tag_set {
@@ -87,7 +88,8 @@ struct blk_mq_queue_data {
87 bool last; 88 bool last;
88}; 89};
89 90
90typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); 91typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
92 const struct blk_mq_queue_data *);
91typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); 93typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
92typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); 94typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
93typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); 95typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -142,6 +144,8 @@ struct blk_mq_ops {
142 init_request_fn *init_request; 144 init_request_fn *init_request;
143 exit_request_fn *exit_request; 145 exit_request_fn *exit_request;
144 reinit_request_fn *reinit_request; 146 reinit_request_fn *reinit_request;
147 /* Called from inside blk_get_request() */
148 void (*initialize_rq_fn)(struct request *rq);
145 149
146 map_queues_fn *map_queues; 150 map_queues_fn *map_queues;
147 151
@@ -155,10 +159,6 @@ struct blk_mq_ops {
155}; 159};
156 160
157enum { 161enum {
158 BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */
159 BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */
160 BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */
161
162 BLK_MQ_F_SHOULD_MERGE = 1 << 0, 162 BLK_MQ_F_SHOULD_MERGE = 1 << 0,
163 BLK_MQ_F_TAG_SHARED = 1 << 1, 163 BLK_MQ_F_TAG_SHARED = 1 << 1,
164 BLK_MQ_F_SG_MERGE = 1 << 2, 164 BLK_MQ_F_SG_MERGE = 1 << 2,
@@ -204,10 +204,10 @@ enum {
204 BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ 204 BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
205}; 205};
206 206
207struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 207struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
208 unsigned int flags); 208 unsigned int flags);
209struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int op, 209struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
210 unsigned int flags, unsigned int hctx_idx); 210 unsigned int op, unsigned int flags, unsigned int hctx_idx);
211struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); 211struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
212 212
213enum { 213enum {
@@ -230,8 +230,8 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
230 230
231int blk_mq_request_started(struct request *rq); 231int blk_mq_request_started(struct request *rq);
232void blk_mq_start_request(struct request *rq); 232void blk_mq_start_request(struct request *rq);
233void blk_mq_end_request(struct request *rq, int error); 233void blk_mq_end_request(struct request *rq, blk_status_t error);
234void __blk_mq_end_request(struct request *rq, int error); 234void __blk_mq_end_request(struct request *rq, blk_status_t error);
235 235
236void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); 236void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
237void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, 237void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
@@ -247,6 +247,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
247void blk_mq_start_hw_queues(struct request_queue *q); 247void blk_mq_start_hw_queues(struct request_queue *q);
248void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 248void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
249void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); 249void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
250void blk_mq_quiesce_queue(struct request_queue *q);
251void blk_mq_unquiesce_queue(struct request_queue *q);
250void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); 252void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
251void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 253void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
252void blk_mq_run_hw_queues(struct request_queue *q, bool async); 254void blk_mq_run_hw_queues(struct request_queue *q, bool async);
@@ -264,6 +266,8 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set);
264int blk_mq_map_queues(struct blk_mq_tag_set *set); 266int blk_mq_map_queues(struct blk_mq_tag_set *set);
265void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); 267void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
266 268
269void blk_mq_quiesce_queue_nowait(struct request_queue *q);
270
267/* 271/*
268 * Driver command data is immediately after the request. So subtract request 272 * Driver command data is immediately after the request. So subtract request
269 * size to get back to the original request, add request size to get the PDU. 273 * size to get back to the original request, add request size to get the PDU.
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 61339bc44400..d2eb87c84d82 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,6 +17,27 @@ struct io_context;
17struct cgroup_subsys_state; 17struct cgroup_subsys_state;
18typedef void (bio_end_io_t) (struct bio *); 18typedef void (bio_end_io_t) (struct bio *);
19 19
20/*
21 * Block error status values. See block/blk-core:blk_errors for the details.
22 */
23typedef u8 __bitwise blk_status_t;
24#define BLK_STS_OK 0
25#define BLK_STS_NOTSUPP ((__force blk_status_t)1)
26#define BLK_STS_TIMEOUT ((__force blk_status_t)2)
27#define BLK_STS_NOSPC ((__force blk_status_t)3)
28#define BLK_STS_TRANSPORT ((__force blk_status_t)4)
29#define BLK_STS_TARGET ((__force blk_status_t)5)
30#define BLK_STS_NEXUS ((__force blk_status_t)6)
31#define BLK_STS_MEDIUM ((__force blk_status_t)7)
32#define BLK_STS_PROTECTION ((__force blk_status_t)8)
33#define BLK_STS_RESOURCE ((__force blk_status_t)9)
34#define BLK_STS_IOERR ((__force blk_status_t)10)
35
36/* hack for device mapper, don't use elsewhere: */
37#define BLK_STS_DM_REQUEUE ((__force blk_status_t)11)
38
39#define BLK_STS_AGAIN ((__force blk_status_t)12)
40
20struct blk_issue_stat { 41struct blk_issue_stat {
21 u64 stat; 42 u64 stat;
22}; 43};
@@ -28,13 +49,14 @@ struct blk_issue_stat {
28struct bio { 49struct bio {
29 struct bio *bi_next; /* request queue link */ 50 struct bio *bi_next; /* request queue link */
30 struct block_device *bi_bdev; 51 struct block_device *bi_bdev;
31 int bi_error; 52 blk_status_t bi_status;
32 unsigned int bi_opf; /* bottom bits req flags, 53 unsigned int bi_opf; /* bottom bits req flags,
33 * top bits REQ_OP. Use 54 * top bits REQ_OP. Use
34 * accessors. 55 * accessors.
35 */ 56 */
36 unsigned short bi_flags; /* status, etc and bvec pool number */ 57 unsigned short bi_flags; /* status, etc and bvec pool number */
37 unsigned short bi_ioprio; 58 unsigned short bi_ioprio;
59 unsigned short bi_write_hint;
38 60
39 struct bvec_iter bi_iter; 61 struct bvec_iter bi_iter;
40 62
@@ -205,6 +227,7 @@ enum req_flag_bits {
205 /* command specific flags for REQ_OP_WRITE_ZEROES: */ 227 /* command specific flags for REQ_OP_WRITE_ZEROES: */
206 __REQ_NOUNMAP, /* do not free blocks when zeroing */ 228 __REQ_NOUNMAP, /* do not free blocks when zeroing */
207 229
230 __REQ_NOWAIT, /* Don't wait if request will block */
208 __REQ_NR_BITS, /* stops here */ 231 __REQ_NR_BITS, /* stops here */
209}; 232};
210 233
@@ -223,6 +246,7 @@ enum req_flag_bits {
223#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) 246#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
224 247
225#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) 248#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
249#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
226 250
227#define REQ_FAILFAST_MASK \ 251#define REQ_FAILFAST_MASK \
228 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 252 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ddd36bd2173..25f6a0cb27d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -55,7 +55,7 @@ struct blk_stat_callback;
55 */ 55 */
56#define BLKCG_MAX_POLS 3 56#define BLKCG_MAX_POLS 3
57 57
58typedef void (rq_end_io_fn)(struct request *, int); 58typedef void (rq_end_io_fn)(struct request *, blk_status_t);
59 59
60#define BLK_RL_SYNCFULL (1U << 0) 60#define BLK_RL_SYNCFULL (1U << 0)
61#define BLK_RL_ASYNCFULL (1U << 1) 61#define BLK_RL_ASYNCFULL (1U << 1)
@@ -225,6 +225,8 @@ struct request {
225 225
226 unsigned int extra_len; /* length of alignment and padding */ 226 unsigned int extra_len; /* length of alignment and padding */
227 227
228 unsigned short write_hint;
229
228 unsigned long deadline; 230 unsigned long deadline;
229 struct list_head timeout_list; 231 struct list_head timeout_list;
230 232
@@ -412,8 +414,12 @@ struct request_queue {
412 rq_timed_out_fn *rq_timed_out_fn; 414 rq_timed_out_fn *rq_timed_out_fn;
413 dma_drain_needed_fn *dma_drain_needed; 415 dma_drain_needed_fn *dma_drain_needed;
414 lld_busy_fn *lld_busy_fn; 416 lld_busy_fn *lld_busy_fn;
417 /* Called just after a request is allocated */
415 init_rq_fn *init_rq_fn; 418 init_rq_fn *init_rq_fn;
419 /* Called just before a request is freed */
416 exit_rq_fn *exit_rq_fn; 420 exit_rq_fn *exit_rq_fn;
421 /* Called from inside blk_get_request() */
422 void (*initialize_rq_fn)(struct request *rq);
417 423
418 const struct blk_mq_ops *mq_ops; 424 const struct blk_mq_ops *mq_ops;
419 425
@@ -590,6 +596,9 @@ struct request_queue {
590 void *rq_alloc_data; 596 void *rq_alloc_data;
591 597
592 struct work_struct release_work; 598 struct work_struct release_work;
599
600#define BLK_MAX_WRITE_HINTS 5
601 u64 write_hints[BLK_MAX_WRITE_HINTS];
593}; 602};
594 603
595#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 604#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -622,6 +631,8 @@ struct request_queue {
622#define QUEUE_FLAG_STATS 27 /* track rq completion times */ 631#define QUEUE_FLAG_STATS 27 /* track rq completion times */
623#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */ 632#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
624#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */ 633#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
634#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
635#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */
625 636
626#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 637#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
627 (1 << QUEUE_FLAG_STACKABLE) | \ 638 (1 << QUEUE_FLAG_STACKABLE) | \
@@ -633,6 +644,13 @@ struct request_queue {
633 (1 << QUEUE_FLAG_SAME_COMP) | \ 644 (1 << QUEUE_FLAG_SAME_COMP) | \
634 (1 << QUEUE_FLAG_POLL)) 645 (1 << QUEUE_FLAG_POLL))
635 646
647/*
648 * @q->queue_lock is set while a queue is being initialized. Since we know
649 * that no other threads access the queue object before @q->queue_lock has
650 * been set, it is safe to manipulate queue flags without holding the
651 * queue_lock if @q->queue_lock == NULL. See also blk_alloc_queue_node() and
652 * blk_init_allocated_queue().
653 */
636static inline void queue_lockdep_assert_held(struct request_queue *q) 654static inline void queue_lockdep_assert_held(struct request_queue *q)
637{ 655{
638 if (q->queue_lock) 656 if (q->queue_lock)
@@ -712,10 +730,13 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
712#define blk_queue_secure_erase(q) \ 730#define blk_queue_secure_erase(q) \
713 (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) 731 (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
714#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) 732#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
733#define blk_queue_scsi_passthrough(q) \
734 test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
715 735
716#define blk_noretry_request(rq) \ 736#define blk_noretry_request(rq) \
717 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ 737 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
718 REQ_FAILFAST_DRIVER)) 738 REQ_FAILFAST_DRIVER))
739#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
719 740
720static inline bool blk_account_rq(struct request *rq) 741static inline bool blk_account_rq(struct request *rq)
721{ 742{
@@ -814,7 +835,8 @@ static inline bool rq_mergeable(struct request *rq)
814 835
815static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) 836static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
816{ 837{
817 if (bio_data(a) == bio_data(b)) 838 if (bio_page(a) == bio_page(b) &&
839 bio_offset(a) == bio_offset(b))
818 return true; 840 return true;
819 841
820 return false; 842 return false;
@@ -862,19 +884,6 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn;
862#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) 884#define BLK_DEFAULT_SG_TIMEOUT (60 * HZ)
863#define BLK_MIN_SG_TIMEOUT (7 * HZ) 885#define BLK_MIN_SG_TIMEOUT (7 * HZ)
864 886
865#ifdef CONFIG_BOUNCE
866extern int init_emergency_isa_pool(void);
867extern void blk_queue_bounce(struct request_queue *q, struct bio **bio);
868#else
869static inline int init_emergency_isa_pool(void)
870{
871 return 0;
872}
873static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
874{
875}
876#endif /* CONFIG_MMU */
877
878struct rq_map_data { 887struct rq_map_data {
879 struct page **pages; 888 struct page **pages;
880 int page_order; 889 int page_order;
@@ -933,7 +942,8 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
933extern void blk_init_request_from_bio(struct request *req, struct bio *bio); 942extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
934extern void blk_put_request(struct request *); 943extern void blk_put_request(struct request *);
935extern void __blk_put_request(struct request_queue *, struct request *); 944extern void __blk_put_request(struct request_queue *, struct request *);
936extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 945extern struct request *blk_get_request(struct request_queue *, unsigned int op,
946 gfp_t gfp_mask);
937extern void blk_requeue_request(struct request_queue *, struct request *); 947extern void blk_requeue_request(struct request_queue *, struct request *);
938extern int blk_lld_busy(struct request_queue *q); 948extern int blk_lld_busy(struct request_queue *q);
939extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 949extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -941,12 +951,11 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
941 int (*bio_ctr)(struct bio *, struct bio *, void *), 951 int (*bio_ctr)(struct bio *, struct bio *, void *),
942 void *data); 952 void *data);
943extern void blk_rq_unprep_clone(struct request *rq); 953extern void blk_rq_unprep_clone(struct request *rq);
944extern int blk_insert_cloned_request(struct request_queue *q, 954extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
945 struct request *rq); 955 struct request *rq);
946extern int blk_rq_append_bio(struct request *rq, struct bio *bio); 956extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
947extern void blk_delay_queue(struct request_queue *, unsigned long); 957extern void blk_delay_queue(struct request_queue *, unsigned long);
948extern void blk_queue_split(struct request_queue *, struct bio **, 958extern void blk_queue_split(struct request_queue *, struct bio **);
949 struct bio_set *);
950extern void blk_recount_segments(struct request_queue *, struct bio *); 959extern void blk_recount_segments(struct request_queue *, struct bio *);
951extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); 960extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int);
952extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, 961extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t,
@@ -967,7 +976,6 @@ extern void __blk_run_queue(struct request_queue *q);
967extern void __blk_run_queue_uncond(struct request_queue *q); 976extern void __blk_run_queue_uncond(struct request_queue *q);
968extern void blk_run_queue(struct request_queue *); 977extern void blk_run_queue(struct request_queue *);
969extern void blk_run_queue_async(struct request_queue *q); 978extern void blk_run_queue_async(struct request_queue *q);
970extern void blk_mq_quiesce_queue(struct request_queue *q);
971extern int blk_rq_map_user(struct request_queue *, struct request *, 979extern int blk_rq_map_user(struct request_queue *, struct request *,
972 struct rq_map_data *, void __user *, unsigned long, 980 struct rq_map_data *, void __user *, unsigned long,
973 gfp_t); 981 gfp_t);
@@ -981,6 +989,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *,
981extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 989extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
982 struct request *, int, rq_end_io_fn *); 990 struct request *, int, rq_end_io_fn *);
983 991
992int blk_status_to_errno(blk_status_t status);
993blk_status_t errno_to_blk_status(int errno);
994
984bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); 995bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
985 996
986static inline struct request_queue *bdev_get_queue(struct block_device *bdev) 997static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
@@ -1113,16 +1124,16 @@ extern struct request *blk_fetch_request(struct request_queue *q);
1113 * blk_end_request() for parts of the original function. 1124 * blk_end_request() for parts of the original function.
1114 * This prevents code duplication in drivers. 1125 * This prevents code duplication in drivers.
1115 */ 1126 */
1116extern bool blk_update_request(struct request *rq, int error, 1127extern bool blk_update_request(struct request *rq, blk_status_t error,
1117 unsigned int nr_bytes); 1128 unsigned int nr_bytes);
1118extern void blk_finish_request(struct request *rq, int error); 1129extern void blk_finish_request(struct request *rq, blk_status_t error);
1119extern bool blk_end_request(struct request *rq, int error, 1130extern bool blk_end_request(struct request *rq, blk_status_t error,
1120 unsigned int nr_bytes); 1131 unsigned int nr_bytes);
1121extern void blk_end_request_all(struct request *rq, int error); 1132extern void blk_end_request_all(struct request *rq, blk_status_t error);
1122extern bool __blk_end_request(struct request *rq, int error, 1133extern bool __blk_end_request(struct request *rq, blk_status_t error,
1123 unsigned int nr_bytes); 1134 unsigned int nr_bytes);
1124extern void __blk_end_request_all(struct request *rq, int error); 1135extern void __blk_end_request_all(struct request *rq, blk_status_t error);
1125extern bool __blk_end_request_cur(struct request *rq, int error); 1136extern bool __blk_end_request_cur(struct request *rq, blk_status_t error);
1126 1137
1127extern void blk_complete_request(struct request *); 1138extern void blk_complete_request(struct request *);
1128extern void __blk_complete_request(struct request *); 1139extern void __blk_complete_request(struct request *);
@@ -1374,11 +1385,6 @@ enum blk_default_limits {
1374 1385
1375#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) 1386#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
1376 1387
1377static inline unsigned long queue_bounce_pfn(struct request_queue *q)
1378{
1379 return q->limits.bounce_pfn;
1380}
1381
1382static inline unsigned long queue_segment_boundary(struct request_queue *q) 1388static inline unsigned long queue_segment_boundary(struct request_queue *q)
1383{ 1389{
1384 return q->limits.seg_boundary_mask; 1390 return q->limits.seg_boundary_mask;
@@ -1780,7 +1786,7 @@ struct blk_integrity_iter {
1780 const char *disk_name; 1786 const char *disk_name;
1781}; 1787};
1782 1788
1783typedef int (integrity_processing_fn) (struct blk_integrity_iter *); 1789typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
1784 1790
1785struct blk_integrity_profile { 1791struct blk_integrity_profile {
1786 integrity_processing_fn *generate_fn; 1792 integrity_processing_fn *generate_fn;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index f4c639c0c362..456da5017b32 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,9 +72,9 @@ typedef void (*dm_release_clone_request_fn) (struct request *clone);
72 * 2 : The target wants to push back the io 72 * 2 : The target wants to push back the io
73 */ 73 */
74typedef int (*dm_endio_fn) (struct dm_target *ti, 74typedef int (*dm_endio_fn) (struct dm_target *ti,
75 struct bio *bio, int error); 75 struct bio *bio, blk_status_t *error);
76typedef int (*dm_request_endio_fn) (struct dm_target *ti, 76typedef int (*dm_request_endio_fn) (struct dm_target *ti,
77 struct request *clone, int error, 77 struct request *clone, blk_status_t error,
78 union map_info *map_context); 78 union map_info *map_context);
79 79
80typedef void (*dm_presuspend_fn) (struct dm_target *ti); 80typedef void (*dm_presuspend_fn) (struct dm_target *ti);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0e306c5a86d6..5bc8f8682a3e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -104,8 +104,9 @@ struct elevator_mq_ops {
104 int (*request_merge)(struct request_queue *q, struct request **, struct bio *); 104 int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
105 void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); 105 void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
106 void (*requests_merged)(struct request_queue *, struct request *, struct request *); 106 void (*requests_merged)(struct request_queue *, struct request *, struct request *);
107 struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); 107 void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
108 void (*put_request)(struct request *); 108 void (*prepare_request)(struct request *, struct bio *bio);
109 void (*finish_request)(struct request *);
109 void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); 110 void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
110 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); 111 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
111 bool (*has_work)(struct blk_mq_hw_ctx *); 112 bool (*has_work)(struct blk_mq_hw_ctx *);
@@ -114,8 +115,6 @@ struct elevator_mq_ops {
114 void (*requeue_request)(struct request *); 115 void (*requeue_request)(struct request *);
115 struct request *(*former_request)(struct request_queue *, struct request *); 116 struct request *(*former_request)(struct request_queue *, struct request *);
116 struct request *(*next_request)(struct request_queue *, struct request *); 117 struct request *(*next_request)(struct request_queue *, struct request *);
117 int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
118 void (*put_rq_priv)(struct request_queue *, struct request *);
119 void (*init_icq)(struct io_cq *); 118 void (*init_icq)(struct io_cq *);
120 void (*exit_icq)(struct io_cq *); 119 void (*exit_icq)(struct io_cq *);
121}; 120};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3e68cabb8457..65adbddb3163 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -20,6 +20,7 @@
20#include <linux/rwsem.h> 20#include <linux/rwsem.h>
21#include <linux/capability.h> 21#include <linux/capability.h>
22#include <linux/semaphore.h> 22#include <linux/semaphore.h>
23#include <linux/fcntl.h>
23#include <linux/fiemap.h> 24#include <linux/fiemap.h>
24#include <linux/rculist_bl.h> 25#include <linux/rculist_bl.h>
25#include <linux/atomic.h> 26#include <linux/atomic.h>
@@ -143,6 +144,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
143/* File was opened by fanotify and shouldn't generate fanotify events */ 144/* File was opened by fanotify and shouldn't generate fanotify events */
144#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) 145#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
145 146
147/* File is capable of returning -EAGAIN if AIO will block */
148#define FMODE_AIO_NOWAIT ((__force fmode_t)0x8000000)
149
146/* 150/*
147 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector 151 * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
148 * that indicates that they should check the contents of the iovec are 152 * that indicates that they should check the contents of the iovec are
@@ -262,6 +266,18 @@ struct page;
262struct address_space; 266struct address_space;
263struct writeback_control; 267struct writeback_control;
264 268
269/*
270 * Write life time hint values.
271 */
272enum rw_hint {
273 WRITE_LIFE_NOT_SET = 0,
274 WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
275 WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
276 WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
277 WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
278 WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
279};
280
265#define IOCB_EVENTFD (1 << 0) 281#define IOCB_EVENTFD (1 << 0)
266#define IOCB_APPEND (1 << 1) 282#define IOCB_APPEND (1 << 1)
267#define IOCB_DIRECT (1 << 2) 283#define IOCB_DIRECT (1 << 2)
@@ -269,6 +285,7 @@ struct writeback_control;
269#define IOCB_DSYNC (1 << 4) 285#define IOCB_DSYNC (1 << 4)
270#define IOCB_SYNC (1 << 5) 286#define IOCB_SYNC (1 << 5)
271#define IOCB_WRITE (1 << 6) 287#define IOCB_WRITE (1 << 6)
288#define IOCB_NOWAIT (1 << 7)
272 289
273struct kiocb { 290struct kiocb {
274 struct file *ki_filp; 291 struct file *ki_filp;
@@ -276,6 +293,7 @@ struct kiocb {
276 void (*ki_complete)(struct kiocb *iocb, long ret, long ret2); 293 void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
277 void *private; 294 void *private;
278 int ki_flags; 295 int ki_flags;
296 enum rw_hint ki_hint;
279}; 297};
280 298
281static inline bool is_sync_kiocb(struct kiocb *kiocb) 299static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -283,16 +301,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
283 return kiocb->ki_complete == NULL; 301 return kiocb->ki_complete == NULL;
284} 302}
285 303
286static inline int iocb_flags(struct file *file);
287
288static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
289{
290 *kiocb = (struct kiocb) {
291 .ki_filp = filp,
292 .ki_flags = iocb_flags(filp),
293 };
294}
295
296/* 304/*
297 * "descriptor" for what we're up to with a read. 305 * "descriptor" for what we're up to with a read.
298 * This allows us to use the same read code yet 306 * This allows us to use the same read code yet
@@ -593,6 +601,7 @@ struct inode {
593 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ 601 spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
594 unsigned short i_bytes; 602 unsigned short i_bytes;
595 unsigned int i_blkbits; 603 unsigned int i_blkbits;
604 enum rw_hint i_write_hint;
596 blkcnt_t i_blocks; 605 blkcnt_t i_blocks;
597 606
598#ifdef __NEED_I_SIZE_ORDERED 607#ifdef __NEED_I_SIZE_ORDERED
@@ -847,6 +856,7 @@ struct file {
847 * Must not be taken from IRQ context. 856 * Must not be taken from IRQ context.
848 */ 857 */
849 spinlock_t f_lock; 858 spinlock_t f_lock;
859 enum rw_hint f_write_hint;
850 atomic_long_t f_count; 860 atomic_long_t f_count;
851 unsigned int f_flags; 861 unsigned int f_flags;
852 fmode_t f_mode; 862 fmode_t f_mode;
@@ -1022,8 +1032,6 @@ struct file_lock_context {
1022#define OFFT_OFFSET_MAX INT_LIMIT(off_t) 1032#define OFFT_OFFSET_MAX INT_LIMIT(off_t)
1023#endif 1033#endif
1024 1034
1025#include <linux/fcntl.h>
1026
1027extern void send_sigio(struct fown_struct *fown, int fd, int band); 1035extern void send_sigio(struct fown_struct *fown, int fd, int band);
1028 1036
1029/* 1037/*
@@ -1874,6 +1882,25 @@ static inline bool HAS_UNMAPPED_ID(struct inode *inode)
1874 return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid); 1882 return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
1875} 1883}
1876 1884
1885static inline enum rw_hint file_write_hint(struct file *file)
1886{
1887 if (file->f_write_hint != WRITE_LIFE_NOT_SET)
1888 return file->f_write_hint;
1889
1890 return file_inode(file)->i_write_hint;
1891}
1892
1893static inline int iocb_flags(struct file *file);
1894
1895static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
1896{
1897 *kiocb = (struct kiocb) {
1898 .ki_filp = filp,
1899 .ki_flags = iocb_flags(filp),
1900 .ki_hint = file_write_hint(filp),
1901 };
1902}
1903
1877/* 1904/*
1878 * Inode state bits. Protected by inode->i_lock 1905 * Inode state bits. Protected by inode->i_lock
1879 * 1906 *
@@ -2518,6 +2545,8 @@ extern int filemap_fdatawait(struct address_space *);
2518extern void filemap_fdatawait_keep_errors(struct address_space *); 2545extern void filemap_fdatawait_keep_errors(struct address_space *);
2519extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, 2546extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
2520 loff_t lend); 2547 loff_t lend);
2548extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
2549 loff_t lend);
2521extern int filemap_write_and_wait(struct address_space *mapping); 2550extern int filemap_write_and_wait(struct address_space *mapping);
2522extern int filemap_write_and_wait_range(struct address_space *mapping, 2551extern int filemap_write_and_wait_range(struct address_space *mapping,
2523 loff_t lstart, loff_t lend); 2552 loff_t lstart, loff_t lend);
@@ -2844,7 +2873,7 @@ enum {
2844 DIO_SKIP_DIO_COUNT = 0x08, 2873 DIO_SKIP_DIO_COUNT = 0x08,
2845}; 2874};
2846 2875
2847void dio_end_io(struct bio *bio, int error); 2876void dio_end_io(struct bio *bio);
2848 2877
2849ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, 2878ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
2850 struct block_device *bdev, struct iov_iter *iter, 2879 struct block_device *bdev, struct iov_iter *iter,
@@ -3057,6 +3086,25 @@ static inline int iocb_flags(struct file *file)
3057 return res; 3086 return res;
3058} 3087}
3059 3088
3089static inline int kiocb_set_rw_flags(struct kiocb *ki, int flags)
3090{
3091 if (unlikely(flags & ~RWF_SUPPORTED))
3092 return -EOPNOTSUPP;
3093
3094 if (flags & RWF_NOWAIT) {
3095 if (!(ki->ki_filp->f_mode & FMODE_AIO_NOWAIT))
3096 return -EOPNOTSUPP;
3097 ki->ki_flags |= IOCB_NOWAIT;
3098 }
3099 if (flags & RWF_HIPRI)
3100 ki->ki_flags |= IOCB_HIPRI;
3101 if (flags & RWF_DSYNC)
3102 ki->ki_flags |= IOCB_DSYNC;
3103 if (flags & RWF_SYNC)
3104 ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
3105 return 0;
3106}
3107
3060static inline ino_t parent_ino(struct dentry *dentry) 3108static inline ino_t parent_ino(struct dentry *dentry)
3061{ 3109{
3062 ino_t res; 3110 ino_t res;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 6980ca322074..dc152e4b7f73 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -671,7 +671,7 @@ struct ide_port_ops {
671 void (*init_dev)(ide_drive_t *); 671 void (*init_dev)(ide_drive_t *);
672 void (*set_pio_mode)(struct hwif_s *, ide_drive_t *); 672 void (*set_pio_mode)(struct hwif_s *, ide_drive_t *);
673 void (*set_dma_mode)(struct hwif_s *, ide_drive_t *); 673 void (*set_dma_mode)(struct hwif_s *, ide_drive_t *);
674 int (*reset_poll)(ide_drive_t *); 674 blk_status_t (*reset_poll)(ide_drive_t *);
675 void (*pre_reset)(ide_drive_t *); 675 void (*pre_reset)(ide_drive_t *);
676 void (*resetproc)(ide_drive_t *); 676 void (*resetproc)(ide_drive_t *);
677 void (*maskproc)(ide_drive_t *, int); 677 void (*maskproc)(ide_drive_t *, int);
@@ -1092,7 +1092,7 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
1092extern int ide_vlb_clk; 1092extern int ide_vlb_clk;
1093extern int ide_pci_clk; 1093extern int ide_pci_clk;
1094 1094
1095int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int); 1095int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
1096void ide_kill_rq(ide_drive_t *, struct request *); 1096void ide_kill_rq(ide_drive_t *, struct request *);
1097 1097
1098void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); 1098void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
@@ -1123,7 +1123,7 @@ extern int ide_devset_execute(ide_drive_t *drive,
1123 const struct ide_devset *setting, int arg); 1123 const struct ide_devset *setting, int arg);
1124 1124
1125void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8); 1125void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
1126int ide_complete_rq(ide_drive_t *, int, unsigned int); 1126int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
1127 1127
1128void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd); 1128void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
1129void ide_tf_dump(const char *, struct ide_cmd *); 1129void ide_tf_dump(const char *, struct ide_cmd *);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f753e788da31..69f4e9470084 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -52,6 +52,7 @@ struct iomap {
52#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ 52#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
53#define IOMAP_FAULT (1 << 3) /* mapping for page fault */ 53#define IOMAP_FAULT (1 << 3) /* mapping for page fault */
54#define IOMAP_DIRECT (1 << 4) /* direct I/O */ 54#define IOMAP_DIRECT (1 << 4) /* direct I/O */
55#define IOMAP_NOWAIT (1 << 5) /* Don't wait for writeback */
55 56
56struct iomap_ops { 57struct iomap_ops {
57 /* 58 /*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index e400a69fa1d3..6b8ee9e628e1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -87,7 +87,7 @@ enum {
87 NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ 87 NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
88}; 88};
89 89
90#define NVMF_AQ_DEPTH 32 90#define NVME_AQ_DEPTH 32
91 91
92enum { 92enum {
93 NVME_REG_CAP = 0x0000, /* Controller Capabilities */ 93 NVME_REG_CAP = 0x0000, /* Controller Capabilities */
@@ -102,6 +102,7 @@ enum {
102 NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ 102 NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
103 NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ 103 NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
104 NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ 104 NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
105 NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
105}; 106};
106 107
107#define NVME_CAP_MQES(cap) ((cap) & 0xffff) 108#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
@@ -208,9 +209,15 @@ struct nvme_id_ctrl {
208 __u8 tnvmcap[16]; 209 __u8 tnvmcap[16];
209 __u8 unvmcap[16]; 210 __u8 unvmcap[16];
210 __le32 rpmbs; 211 __le32 rpmbs;
211 __u8 rsvd316[4]; 212 __le16 edstt;
213 __u8 dsto;
214 __u8 fwug;
212 __le16 kas; 215 __le16 kas;
213 __u8 rsvd322[190]; 216 __le16 hctma;
217 __le16 mntmt;
218 __le16 mxtmt;
219 __le32 sanicap;
220 __u8 rsvd332[180];
214 __u8 sqes; 221 __u8 sqes;
215 __u8 cqes; 222 __u8 cqes;
216 __le16 maxcmd; 223 __le16 maxcmd;
@@ -246,6 +253,7 @@ enum {
246 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, 253 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
247 NVME_CTRL_VWC_PRESENT = 1 << 0, 254 NVME_CTRL_VWC_PRESENT = 1 << 0,
248 NVME_CTRL_OACS_SEC_SUPP = 1 << 0, 255 NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
256 NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
249 NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, 257 NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7,
250}; 258};
251 259
@@ -275,7 +283,7 @@ struct nvme_id_ns {
275 __le16 nabsn; 283 __le16 nabsn;
276 __le16 nabo; 284 __le16 nabo;
277 __le16 nabspf; 285 __le16 nabspf;
278 __u16 rsvd46; 286 __le16 noiob;
279 __u8 nvmcap[16]; 287 __u8 nvmcap[16];
280 __u8 rsvd64[40]; 288 __u8 rsvd64[40];
281 __u8 nguid[16]; 289 __u8 nguid[16];
@@ -289,6 +297,7 @@ enum {
289 NVME_ID_CNS_NS = 0x00, 297 NVME_ID_CNS_NS = 0x00,
290 NVME_ID_CNS_CTRL = 0x01, 298 NVME_ID_CNS_CTRL = 0x01,
291 NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, 299 NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
300 NVME_ID_CNS_NS_DESC_LIST = 0x03,
292 NVME_ID_CNS_NS_PRESENT_LIST = 0x10, 301 NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
293 NVME_ID_CNS_NS_PRESENT = 0x11, 302 NVME_ID_CNS_NS_PRESENT = 0x11,
294 NVME_ID_CNS_CTRL_NS_LIST = 0x12, 303 NVME_ID_CNS_CTRL_NS_LIST = 0x12,
@@ -296,6 +305,19 @@ enum {
296}; 305};
297 306
298enum { 307enum {
308 NVME_DIR_IDENTIFY = 0x00,
309 NVME_DIR_STREAMS = 0x01,
310 NVME_DIR_SND_ID_OP_ENABLE = 0x01,
311 NVME_DIR_SND_ST_OP_REL_ID = 0x01,
312 NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
313 NVME_DIR_RCV_ID_OP_PARAM = 0x01,
314 NVME_DIR_RCV_ST_OP_PARAM = 0x01,
315 NVME_DIR_RCV_ST_OP_STATUS = 0x02,
316 NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
317 NVME_DIR_ENDIR = 0x01,
318};
319
320enum {
299 NVME_NS_FEAT_THIN = 1 << 0, 321 NVME_NS_FEAT_THIN = 1 << 0,
300 NVME_NS_FLBAS_LBA_MASK = 0xf, 322 NVME_NS_FLBAS_LBA_MASK = 0xf,
301 NVME_NS_FLBAS_META_EXT = 0x10, 323 NVME_NS_FLBAS_META_EXT = 0x10,
@@ -315,6 +337,22 @@ enum {
315 NVME_NS_DPS_PI_TYPE3 = 3, 337 NVME_NS_DPS_PI_TYPE3 = 3,
316}; 338};
317 339
340struct nvme_ns_id_desc {
341 __u8 nidt;
342 __u8 nidl;
343 __le16 reserved;
344};
345
346#define NVME_NIDT_EUI64_LEN 8
347#define NVME_NIDT_NGUID_LEN 16
348#define NVME_NIDT_UUID_LEN 16
349
350enum {
351 NVME_NIDT_EUI64 = 0x01,
352 NVME_NIDT_NGUID = 0x02,
353 NVME_NIDT_UUID = 0x03,
354};
355
318struct nvme_smart_log { 356struct nvme_smart_log {
319 __u8 critical_warning; 357 __u8 critical_warning;
320 __u8 temperature[2]; 358 __u8 temperature[2];
@@ -536,6 +574,7 @@ enum {
536 NVME_RW_PRINFO_PRCHK_APP = 1 << 11, 574 NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
537 NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, 575 NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
538 NVME_RW_PRINFO_PRACT = 1 << 13, 576 NVME_RW_PRINFO_PRACT = 1 << 13,
577 NVME_RW_DTYPE_STREAMS = 1 << 4,
539}; 578};
540 579
541struct nvme_dsm_cmd { 580struct nvme_dsm_cmd {
@@ -587,6 +626,11 @@ struct nvme_feat_auto_pst {
587 __le64 entries[32]; 626 __le64 entries[32];
588}; 627};
589 628
629enum {
630 NVME_HOST_MEM_ENABLE = (1 << 0),
631 NVME_HOST_MEM_RETURN = (1 << 1),
632};
633
590/* Admin commands */ 634/* Admin commands */
591 635
592enum nvme_admin_opcode { 636enum nvme_admin_opcode {
@@ -605,6 +649,8 @@ enum nvme_admin_opcode {
605 nvme_admin_download_fw = 0x11, 649 nvme_admin_download_fw = 0x11,
606 nvme_admin_ns_attach = 0x15, 650 nvme_admin_ns_attach = 0x15,
607 nvme_admin_keep_alive = 0x18, 651 nvme_admin_keep_alive = 0x18,
652 nvme_admin_directive_send = 0x19,
653 nvme_admin_directive_recv = 0x1a,
608 nvme_admin_dbbuf = 0x7C, 654 nvme_admin_dbbuf = 0x7C,
609 nvme_admin_format_nvm = 0x80, 655 nvme_admin_format_nvm = 0x80,
610 nvme_admin_security_send = 0x81, 656 nvme_admin_security_send = 0x81,
@@ -659,6 +705,8 @@ struct nvme_identify {
659 __u32 rsvd11[5]; 705 __u32 rsvd11[5];
660}; 706};
661 707
708#define NVME_IDENTIFY_DATA_SIZE 4096
709
662struct nvme_features { 710struct nvme_features {
663 __u8 opcode; 711 __u8 opcode;
664 __u8 flags; 712 __u8 flags;
@@ -668,7 +716,16 @@ struct nvme_features {
668 union nvme_data_ptr dptr; 716 union nvme_data_ptr dptr;
669 __le32 fid; 717 __le32 fid;
670 __le32 dword11; 718 __le32 dword11;
671 __u32 rsvd12[4]; 719 __le32 dword12;
720 __le32 dword13;
721 __le32 dword14;
722 __le32 dword15;
723};
724
725struct nvme_host_mem_buf_desc {
726 __le64 addr;
727 __le32 size;
728 __u32 rsvd;
672}; 729};
673 730
674struct nvme_create_cq { 731struct nvme_create_cq {
@@ -757,6 +814,24 @@ struct nvme_get_log_page_command {
757 __u32 rsvd14[2]; 814 __u32 rsvd14[2];
758}; 815};
759 816
817struct nvme_directive_cmd {
818 __u8 opcode;
819 __u8 flags;
820 __u16 command_id;
821 __le32 nsid;
822 __u64 rsvd2[2];
823 union nvme_data_ptr dptr;
824 __le32 numd;
825 __u8 doper;
826 __u8 dtype;
827 __le16 dspec;
828 __u8 endir;
829 __u8 tdtype;
830 __u16 rsvd15;
831
832 __u32 rsvd16[3];
833};
834
760/* 835/*
761 * Fabrics subcommands. 836 * Fabrics subcommands.
762 */ 837 */
@@ -887,6 +962,18 @@ struct nvme_dbbuf {
887 __u32 rsvd12[6]; 962 __u32 rsvd12[6];
888}; 963};
889 964
965struct streams_directive_params {
966 __u16 msl;
967 __u16 nssa;
968 __u16 nsso;
969 __u8 rsvd[10];
970 __u32 sws;
971 __u16 sgs;
972 __u16 nsa;
973 __u16 nso;
974 __u8 rsvd2[6];
975};
976
890struct nvme_command { 977struct nvme_command {
891 union { 978 union {
892 struct nvme_common_command common; 979 struct nvme_common_command common;
@@ -907,6 +994,7 @@ struct nvme_command {
907 struct nvmf_property_set_command prop_set; 994 struct nvmf_property_set_command prop_set;
908 struct nvmf_property_get_command prop_get; 995 struct nvmf_property_get_command prop_get;
909 struct nvme_dbbuf dbbuf; 996 struct nvme_dbbuf dbbuf;
997 struct nvme_directive_cmd directive;
910 }; 998 };
911}; 999};
912 1000
@@ -1051,4 +1139,8 @@ struct nvme_completion {
1051#define NVME_VS(major, minor, tertiary) \ 1139#define NVME_VS(major, minor, tertiary) \
1052 (((major) << 16) | ((minor) << 8) | (tertiary)) 1140 (((major) << 16) | ((minor) << 8) | (tertiary))
1053 1141
1142#define NVME_MAJOR(ver) ((ver) >> 16)
1143#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
1144#define NVME_TERTIARY(ver) ((ver) & 0xff)
1145
1054#endif /* _LINUX_NVME_H */ 1146#endif /* _LINUX_NVME_H */
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index cb3c8fe6acd7..4b3286ac60c8 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -278,6 +278,8 @@ size_t sg_pcopy_from_buffer(struct scatterlist *sgl, unsigned int nents,
278 const void *buf, size_t buflen, off_t skip); 278 const void *buf, size_t buflen, off_t skip);
279size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, 279size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
280 void *buf, size_t buflen, off_t skip); 280 void *buf, size_t buflen, off_t skip);
281size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
282 size_t buflen, off_t skip);
281 283
282/* 284/*
283 * Maximum number of entries that will be allocated in one piece, if 285 * Maximum number of entries that will be allocated in one piece, if
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a09cca829082..a29d3086eb56 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -157,7 +157,7 @@ struct osd_request {
157 157
158 osd_req_done_fn *async_done; 158 osd_req_done_fn *async_done;
159 void *async_private; 159 void *async_private;
160 int async_error; 160 blk_status_t async_error;
161 int req_errors; 161 int req_errors;
162}; 162};
163 163
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index b379f93a2c48..da9bf2bcdf1a 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -166,6 +166,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
166extern void scsi_kunmap_atomic_sg(void *virt); 166extern void scsi_kunmap_atomic_sg(void *virt);
167 167
168extern int scsi_init_io(struct scsi_cmnd *cmd); 168extern int scsi_init_io(struct scsi_cmnd *cmd);
169extern void scsi_initialize_rq(struct request *rq);
169 170
170extern int scsi_dma_map(struct scsi_cmnd *cmd); 171extern int scsi_dma_map(struct scsi_cmnd *cmd);
171extern void scsi_dma_unmap(struct scsi_cmnd *cmd); 172extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index f0c76f9dc285..e0afa445ee4e 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -27,6 +27,6 @@ static inline void scsi_req_free_cmd(struct scsi_request *req)
27 kfree(req->cmd); 27 kfree(req->cmd);
28} 28}
29 29
30void scsi_req_init(struct request *); 30void scsi_req_init(struct scsi_request *req);
31 31
32#endif /* _SCSI_SCSI_REQUEST_H */ 32#endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f7fbd1..a2d4a8ac94ca 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -79,7 +79,7 @@ struct io_event {
79struct iocb { 79struct iocb {
80 /* these are internal to the kernel/libc. */ 80 /* these are internal to the kernel/libc. */
81 __u64 aio_data; /* data to be returned in event's data */ 81 __u64 aio_data; /* data to be returned in event's data */
82 __u32 PADDED(aio_key, aio_reserved1); 82 __u32 PADDED(aio_key, aio_rw_flags);
83 /* the kernel sets aio_key to the req # */ 83 /* the kernel sets aio_key to the req # */
84 84
85 /* common fields */ 85 /* common fields */
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 4bf9f1eabffc..2f6c77aebe1a 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) 267#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
268 268
269#define DM_VERSION_MAJOR 4 269#define DM_VERSION_MAJOR 4
270#define DM_VERSION_MINOR 35 270#define DM_VERSION_MINOR 36
271#define DM_VERSION_PATCHLEVEL 0 271#define DM_VERSION_PATCHLEVEL 0
272#define DM_VERSION_EXTRA "-ioctl (2016-06-23)" 272#define DM_VERSION_EXTRA "-ioctl (2017-06-09)"
273 273
274/* Status bits */ 274/* Status bits */
275#define DM_READONLY_FLAG (1 << 0) /* In/Out */ 275#define DM_READONLY_FLAG (1 << 0) /* In/Out */
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6eee71..ec69d55bcec7 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -43,6 +43,27 @@
43/* (1U << 31) is reserved for signed error codes */ 43/* (1U << 31) is reserved for signed error codes */
44 44
45/* 45/*
46 * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
47 * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
48 * the specific file.
49 */
50#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
51#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
52#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
53#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
54
55/*
56 * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
57 * used to clear any hints previously set.
58 */
59#define RWF_WRITE_LIFE_NOT_SET 0
60#define RWH_WRITE_LIFE_NONE 1
61#define RWH_WRITE_LIFE_SHORT 2
62#define RWH_WRITE_LIFE_MEDIUM 3
63#define RWH_WRITE_LIFE_LONG 4
64#define RWH_WRITE_LIFE_EXTREME 5
65
66/*
46 * Types of directory notifications that may be requested. 67 * Types of directory notifications that may be requested.
47 */ 68 */
48#define DN_ACCESS 0x00000001 /* File accessed */ 69#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 24e61a54feaa..27d8c36c04af 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -360,5 +360,9 @@ struct fscrypt_key {
360#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */ 360#define RWF_HIPRI 0x00000001 /* high priority request, poll if possible */
361#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */ 361#define RWF_DSYNC 0x00000002 /* per-IO O_DSYNC */
362#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */ 362#define RWF_SYNC 0x00000004 /* per-IO O_SYNC */
363#define RWF_NOWAIT 0x00000008 /* per-IO, return -EAGAIN if operation would block */
364
365#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC |\
366 RWF_NOWAIT)
363 367
364#endif /* _UAPI_LINUX_FS_H */ 368#endif /* _UAPI_LINUX_FS_H */
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index c8125ec1f4f2..a3960f98679c 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,6 +22,7 @@ enum {
22 LO_FLAGS_AUTOCLEAR = 4, 22 LO_FLAGS_AUTOCLEAR = 4,
23 LO_FLAGS_PARTSCAN = 8, 23 LO_FLAGS_PARTSCAN = 8,
24 LO_FLAGS_DIRECT_IO = 16, 24 LO_FLAGS_DIRECT_IO = 16,
25 LO_FLAGS_BLOCKSIZE = 32,
25}; 26};
26 27
27#include <asm/posix_types.h> /* for __kernel_old_dev_t */ 28#include <asm/posix_types.h> /* for __kernel_old_dev_t */
@@ -59,6 +60,8 @@ struct loop_info64 {
59 __u64 lo_init[2]; 60 __u64 lo_init[2];
60}; 61};
61 62
63#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
64
62/* 65/*
63 * Loop filter types 66 * Loop filter types
64 */ 67 */
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 155e33f81913..a50527ebf671 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -41,10 +41,14 @@ enum {
41#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ 41#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */
42#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ 42#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */
43#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ 43#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */
44#define NBD_FLAG_SEND_FUA (1 << 3) /* send FUA (forced unit access) */
44/* there is a gap here to match userspace */ 45/* there is a gap here to match userspace */
45#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ 46#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */
46#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ 47#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */
47 48
49/* values for cmd flags in the upper 16 bits of request type */
50#define NBD_CMD_FLAG_FUA (1 << 16) /* FUA (forced unit access) op */
51
48/* These are client behavior specific flags. */ 52/* These are client behavior specific flags. */
49#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on 53#define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on
50 disconnect. */ 54 disconnect. */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f80fd33639e0..57d22571f306 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -225,14 +225,14 @@ static struct block_device *hib_resume_bdev;
225struct hib_bio_batch { 225struct hib_bio_batch {
226 atomic_t count; 226 atomic_t count;
227 wait_queue_head_t wait; 227 wait_queue_head_t wait;
228 int error; 228 blk_status_t error;
229}; 229};
230 230
231static void hib_init_batch(struct hib_bio_batch *hb) 231static void hib_init_batch(struct hib_bio_batch *hb)
232{ 232{
233 atomic_set(&hb->count, 0); 233 atomic_set(&hb->count, 0);
234 init_waitqueue_head(&hb->wait); 234 init_waitqueue_head(&hb->wait);
235 hb->error = 0; 235 hb->error = BLK_STS_OK;
236} 236}
237 237
238static void hib_end_io(struct bio *bio) 238static void hib_end_io(struct bio *bio)
@@ -240,7 +240,7 @@ static void hib_end_io(struct bio *bio)
240 struct hib_bio_batch *hb = bio->bi_private; 240 struct hib_bio_batch *hb = bio->bi_private;
241 struct page *page = bio->bi_io_vec[0].bv_page; 241 struct page *page = bio->bi_io_vec[0].bv_page;
242 242
243 if (bio->bi_error) { 243 if (bio->bi_status) {
244 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", 244 printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
245 imajor(bio->bi_bdev->bd_inode), 245 imajor(bio->bi_bdev->bd_inode),
246 iminor(bio->bi_bdev->bd_inode), 246 iminor(bio->bi_bdev->bd_inode),
@@ -253,8 +253,8 @@ static void hib_end_io(struct bio *bio)
253 flush_icache_range((unsigned long)page_address(page), 253 flush_icache_range((unsigned long)page_address(page),
254 (unsigned long)page_address(page) + PAGE_SIZE); 254 (unsigned long)page_address(page) + PAGE_SIZE);
255 255
256 if (bio->bi_error && !hb->error) 256 if (bio->bi_status && !hb->error)
257 hb->error = bio->bi_error; 257 hb->error = bio->bi_status;
258 if (atomic_dec_and_test(&hb->count)) 258 if (atomic_dec_and_test(&hb->count))
259 wake_up(&hb->wait); 259 wake_up(&hb->wait);
260 260
@@ -293,10 +293,10 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr,
293 return error; 293 return error;
294} 294}
295 295
296static int hib_wait_io(struct hib_bio_batch *hb) 296static blk_status_t hib_wait_io(struct hib_bio_batch *hb)
297{ 297{
298 wait_event(hb->wait, atomic_read(&hb->count) == 0); 298 wait_event(hb->wait, atomic_read(&hb->count) == 0);
299 return hb->error; 299 return blk_status_to_errno(hb->error);
300} 300}
301 301
302/* 302/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 193c5f5e3f79..bc364f86100a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -867,7 +867,7 @@ static void blk_add_trace_split(void *ignore,
867 867
868 __blk_add_trace(bt, bio->bi_iter.bi_sector, 868 __blk_add_trace(bt, bio->bi_iter.bi_sector,
869 bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, 869 bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
870 BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu), 870 BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
871 &rpdu); 871 &rpdu);
872 } 872 }
873} 873}
@@ -900,7 +900,7 @@ static void blk_add_trace_bio_remap(void *ignore,
900 r.sector_from = cpu_to_be64(from); 900 r.sector_from = cpu_to_be64(from);
901 901
902 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, 902 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
903 bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error, 903 bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
904 sizeof(r), &r); 904 sizeof(r), &r);
905} 905}
906 906
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index c6cf82242d65..be7b4dd6b68d 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
751 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); 751 return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
752} 752}
753EXPORT_SYMBOL(sg_pcopy_to_buffer); 753EXPORT_SYMBOL(sg_pcopy_to_buffer);
754
755/**
756 * sg_zero_buffer - Zero-out a part of a SG list
757 * @sgl: The SG list
758 * @nents: Number of SG entries
759 * @buflen: The number of bytes to zero out
760 * @skip: Number of bytes to skip before zeroing
761 *
762 * Returns the number of bytes zeroed.
763 **/
764size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
765 size_t buflen, off_t skip)
766{
767 unsigned int offset = 0;
768 struct sg_mapping_iter miter;
769 unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
770
771 sg_miter_start(&miter, sgl, nents, sg_flags);
772
773 if (!sg_miter_skip(&miter, skip))
774 return false;
775
776 while (offset < buflen && sg_miter_next(&miter)) {
777 unsigned int len;
778
779 len = min(miter.length, buflen - offset);
780 memset(miter.addr, 0, len);
781
782 offset += len;
783 }
784
785 sg_miter_stop(&miter);
786 return offset;
787}
788EXPORT_SYMBOL(sg_zero_buffer);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f1be573a5e6..742034e56100 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -376,6 +376,38 @@ int filemap_flush(struct address_space *mapping)
376} 376}
377EXPORT_SYMBOL(filemap_flush); 377EXPORT_SYMBOL(filemap_flush);
378 378
379/**
380 * filemap_range_has_page - check if a page exists in range.
381 * @mapping: address space within which to check
382 * @start_byte: offset in bytes where the range starts
383 * @end_byte: offset in bytes where the range ends (inclusive)
384 *
385 * Find at least one page in the range supplied, usually used to check if
386 * direct writing in this range will trigger a writeback.
387 */
388bool filemap_range_has_page(struct address_space *mapping,
389 loff_t start_byte, loff_t end_byte)
390{
391 pgoff_t index = start_byte >> PAGE_SHIFT;
392 pgoff_t end = end_byte >> PAGE_SHIFT;
393 struct pagevec pvec;
394 bool ret;
395
396 if (end_byte < start_byte)
397 return false;
398
399 if (mapping->nrpages == 0)
400 return false;
401
402 pagevec_init(&pvec, 0);
403 if (!pagevec_lookup(&pvec, mapping, index, 1))
404 return false;
405 ret = (pvec.pages[0]->index <= end);
406 pagevec_release(&pvec);
407 return ret;
408}
409EXPORT_SYMBOL(filemap_range_has_page);
410
379static int __filemap_fdatawait_range(struct address_space *mapping, 411static int __filemap_fdatawait_range(struct address_space *mapping,
380 loff_t start_byte, loff_t end_byte) 412 loff_t start_byte, loff_t end_byte)
381{ 413{
@@ -2038,10 +2070,17 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
2038 loff_t size; 2070 loff_t size;
2039 2071
2040 size = i_size_read(inode); 2072 size = i_size_read(inode);
2041 retval = filemap_write_and_wait_range(mapping, iocb->ki_pos, 2073 if (iocb->ki_flags & IOCB_NOWAIT) {
2042 iocb->ki_pos + count - 1); 2074 if (filemap_range_has_page(mapping, iocb->ki_pos,
2043 if (retval < 0) 2075 iocb->ki_pos + count - 1))
2044 goto out; 2076 return -EAGAIN;
2077 } else {
2078 retval = filemap_write_and_wait_range(mapping,
2079 iocb->ki_pos,
2080 iocb->ki_pos + count - 1);
2081 if (retval < 0)
2082 goto out;
2083 }
2045 2084
2046 file_accessed(file); 2085 file_accessed(file);
2047 2086
@@ -2642,6 +2681,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
2642 2681
2643 pos = iocb->ki_pos; 2682 pos = iocb->ki_pos;
2644 2683
2684 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
2685 return -EINVAL;
2686
2645 if (limit != RLIM_INFINITY) { 2687 if (limit != RLIM_INFINITY) {
2646 if (iocb->ki_pos >= limit) { 2688 if (iocb->ki_pos >= limit) {
2647 send_sig(SIGXFSZ, current, 0); 2689 send_sig(SIGXFSZ, current, 0);
@@ -2710,9 +2752,17 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2710 write_len = iov_iter_count(from); 2752 write_len = iov_iter_count(from);
2711 end = (pos + write_len - 1) >> PAGE_SHIFT; 2753 end = (pos + write_len - 1) >> PAGE_SHIFT;
2712 2754
2713 written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1); 2755 if (iocb->ki_flags & IOCB_NOWAIT) {
2714 if (written) 2756 /* If there are pages to writeback, return */
2715 goto out; 2757 if (filemap_range_has_page(inode->i_mapping, pos,
2758 pos + iov_iter_count(from)))
2759 return -EAGAIN;
2760 } else {
2761 written = filemap_write_and_wait_range(mapping, pos,
2762 pos + write_len - 1);
2763 if (written)
2764 goto out;
2765 }
2716 2766
2717 /* 2767 /*
2718 * After a write we want buffered reads to be sure to go to disk to get 2768 * After a write we want buffered reads to be sure to go to disk to get
diff --git a/mm/page_io.c b/mm/page_io.c
index 23f6d0d3470f..2da71e627812 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -45,7 +45,7 @@ void end_swap_bio_write(struct bio *bio)
45{ 45{
46 struct page *page = bio->bi_io_vec[0].bv_page; 46 struct page *page = bio->bi_io_vec[0].bv_page;
47 47
48 if (bio->bi_error) { 48 if (bio->bi_status) {
49 SetPageError(page); 49 SetPageError(page);
50 /* 50 /*
51 * We failed to write the page out to swap-space. 51 * We failed to write the page out to swap-space.
@@ -118,7 +118,7 @@ static void end_swap_bio_read(struct bio *bio)
118{ 118{
119 struct page *page = bio->bi_io_vec[0].bv_page; 119 struct page *page = bio->bi_io_vec[0].bv_page;
120 120
121 if (bio->bi_error) { 121 if (bio->bi_status) {
122 SetPageError(page); 122 SetPageError(page);
123 ClearPageUptodate(page); 123 ClearPageUptodate(page);
124 pr_alert("Read-error on swap-device (%u:%u:%llu)\n", 124 pr_alert("Read-error on swap-device (%u:%u:%llu)\n",