aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-21 13:57:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-21 13:57:33 -0500
commit772c8f6f3bbd3ceb94a89373473083e3e1113554 (patch)
treed2b34e8f1841a169d59adf53074de217a9e0f977
parentfd4a61e08aa79f2b7835b25c6f94f27bd2d65990 (diff)
parent818551e2b2c662a1b26de6b4f7d6b8411a838d18 (diff)
Merge tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: - blk-mq scheduling framework from me and Omar, with a port of the deadline scheduler for this framework. A port of BFQ from Paolo is in the works, and should be ready for 4.12. - Various fixups and improvements to the above scheduling framework from Omar, Paolo, Bart, me, others. - Cleanup of the exported sysfs blk-mq data into debugfs, from Omar. This allows us to export more information that helps debug hangs or performance issues, without cluttering or abusing the sysfs API. - Fixes for the sbitmap code, the scalable bitmap code that was migrated from blk-mq, from Omar. - Removal of the BLOCK_PC support in struct request, and refactoring of carrying SCSI payloads in the block layer. This cleans up the code nicely, and enables us to kill the SCSI specific parts of struct request, shrinking it down nicely. From Christoph mainly, with help from Hannes. - Support for ranged discard requests and discard merging, also from Christoph. - Support for OPAL in the block layer, and for NVMe as well. Mainly from Scott Bauer, with fixes/updates from various others folks. - Error code fixup for gdrom from Christophe. - cciss pci irq allocation cleanup from Christoph. - Making the cdrom device operations read only, from Kees Cook. - Fixes for duplicate bdi registrations and bdi/queue life time problems from Jan and Dan. - Set of fixes and updates for lightnvm, from Matias and Javier. - A few fixes for nbd from Josef, using idr to name devices and a workqueue deadlock fix on receive. Also marks Josef as the current maintainer of nbd. - Fix from Josef, overwriting queue settings when the number of hardware queues is updated for a blk-mq device. - NVMe fix from Keith, ensuring that we don't repeatedly mark and IO aborted, if we didn't end up aborting it. - SG gap merging fix from Ming Lei for block. - Loop fix also from Ming, fixing a race and crash between setting loop status and IO. - Two block race fixes from Tahsin, fixing request list iteration and fixing a race between device registration and udev device add notifiations. - Double free fix from cgroup writeback, from Tejun. - Another double free fix in blkcg, from Hou Tao. - Partition overflow fix for EFI from Alden Tondettar. * tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block: (156 commits) nvme: Check for Security send/recv support before issuing commands. block/sed-opal: allocate struct opal_dev dynamically block/sed-opal: tone down not supported warnings block: don't defer flushes on blk-mq + scheduling blk-mq-sched: ask scheduler for work, if we failed dispatching leftovers blk-mq: don't special case flush inserts for blk-mq-sched blk-mq-sched: don't add flushes to the head of requeue queue blk-mq: have blk_mq_dispatch_rq_list() return if we queued IO or not block: do not allow updates through sysfs until registration completes lightnvm: set default lun range when no luns are specified lightnvm: fix off-by-one error on target initialization Maintainers: Modify SED list from nvme to block Move stack parameters for sed_ioctl to prevent oversized stack with CONFIG_KASAN uapi: sed-opal fix IOW for activate lsp to use correct struct cdrom: Make device operations read-only elevator: fix loading wrong elevator type for blk-mq devices cciss: switch to pci_irq_alloc_vectors block/loop: fix race between I/O and set_status blk-mq-sched: don't hold queue_lock when calling exit_icq block: set make_request_fn manually in blk_mq_update_nr_hw_queues ...
-rw-r--r--Documentation/cdrom/cdrom-standard.tex9
-rw-r--r--MAINTAINERS15
-rw-r--r--block/Kconfig24
-rw-r--r--block/Kconfig.iosched50
-rw-r--r--block/Makefile10
-rw-r--r--block/bio.c16
-rw-r--r--block/blk-cgroup.c32
-rw-r--r--block/blk-core.c355
-rw-r--r--block/blk-exec.c22
-rw-r--r--block/blk-flush.c26
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-ioc.c34
-rw-r--r--block/blk-map.c13
-rw-r--r--block/blk-merge.c62
-rw-r--r--block/blk-mq-debugfs.c772
-rw-r--r--block/blk-mq-sched.c515
-rw-r--r--block/blk-mq-sched.h143
-rw-r--r--block/blk-mq-sysfs.c235
-rw-r--r--block/blk-mq-tag.c190
-rw-r--r--block/blk-mq-tag.h10
-rw-r--r--block/blk-mq.c590
-rw-r--r--block/blk-mq.h72
-rw-r--r--block/blk-settings.c22
-rw-r--r--block/blk-sysfs.c68
-rw-r--r--block/blk-tag.c1
-rw-r--r--block/blk-throttle.c6
-rw-r--r--block/blk-wbt.c8
-rw-r--r--block/blk.h47
-rw-r--r--block/bsg-lib.c49
-rw-r--r--block/bsg.c64
-rw-r--r--block/cfq-iosched.c14
-rw-r--r--block/compat_ioctl.c7
-rw-r--r--block/deadline-iosched.c14
-rw-r--r--block/elevator.c267
-rw-r--r--block/genhd.c25
-rw-r--r--block/ioctl.c7
-rw-r--r--block/mq-deadline.c556
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/opal_proto.h452
-rw-r--r--block/partitions/efi.c17
-rw-r--r--block/scsi_ioctl.c83
-rw-r--r--block/sed-opal.c2488
-rw-r--r--drivers/ata/libata-scsi.c4
-rw-r--r--drivers/block/Kconfig13
-rw-r--r--drivers/block/aoe/aoeblk.c4
-rw-r--r--drivers/block/cciss.c131
-rw-r--r--drivers/block/cciss.h6
-rw-r--r--drivers/block/drbd/drbd_main.c6
-rw-r--r--drivers/block/drbd/drbd_nl.c12
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/floppy.c6
-rw-r--r--drivers/block/hd.c45
-rw-r--r--drivers/block/loop.c17
-rw-r--r--drivers/block/mg_disk.c31
-rw-r--r--drivers/block/nbd.c258
-rw-r--r--drivers/block/null_blk.c10
-rw-r--r--drivers/block/osdblk.c6
-rw-r--r--drivers/block/paride/Kconfig1
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/block/paride/pd.c15
-rw-r--r--drivers/block/pktcdvd.c12
-rw-r--r--drivers/block/ps3disk.c15
-rw-r--r--drivers/block/rbd.c24
-rw-r--r--drivers/block/skd_main.c15
-rw-r--r--drivers/block/sx8.c4
-rw-r--r--drivers/block/virtio_blk.c205
-rw-r--r--drivers/block/xen-blkfront.c2
-rw-r--r--drivers/block/xsysace.c2
-rw-r--r--drivers/block/zram/zram_drv.c2
-rw-r--r--drivers/cdrom/cdrom.c92
-rw-r--r--drivers/cdrom/gdrom.c41
-rw-r--r--drivers/ide/Kconfig1
-rw-r--r--drivers/ide/ide-atapi.c78
-rw-r--r--drivers/ide/ide-cd.c192
-rw-r--r--drivers/ide/ide-cd_ioctl.c5
-rw-r--r--drivers/ide/ide-cd_verbose.c6
-rw-r--r--drivers/ide/ide-devsets.c13
-rw-r--r--drivers/ide/ide-disk.c12
-rw-r--r--drivers/ide/ide-eh.c8
-rw-r--r--drivers/ide/ide-floppy.c37
-rw-r--r--drivers/ide/ide-io.c13
-rw-r--r--drivers/ide/ide-ioctls.c14
-rw-r--r--drivers/ide/ide-park.c20
-rw-r--r--drivers/ide/ide-pm.c20
-rw-r--r--drivers/ide/ide-probe.c36
-rw-r--r--drivers/ide/ide-tape.c41
-rw-r--r--drivers/ide/ide-taskfile.c8
-rw-r--r--drivers/ide/sis5513.c2
-rw-r--r--drivers/lightnvm/Kconfig9
-rw-r--r--drivers/lightnvm/Makefile3
-rw-r--r--drivers/lightnvm/core.c1027
-rw-r--r--drivers/lightnvm/gennvm.c657
-rw-r--r--drivers/lightnvm/gennvm.h62
-rw-r--r--drivers/lightnvm/rrpc.c7
-rw-r--r--drivers/lightnvm/rrpc.h3
-rw-r--r--drivers/lightnvm/sysblk.c733
-rw-r--r--drivers/md/bcache/request.c12
-rw-r--r--drivers/md/bcache/super.c8
-rw-r--r--drivers/md/dm-cache-target.c15
-rw-r--r--drivers/md/dm-core.h1
-rw-r--r--drivers/md/dm-era-target.c2
-rw-r--r--drivers/md/dm-mpath.c132
-rw-r--r--drivers/md/dm-rq.c268
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm-table.c2
-rw-r--r--drivers/md/dm-target.c7
-rw-r--r--drivers/md/dm-thin.c15
-rw-r--r--drivers/md/dm.c49
-rw-r--r--drivers/md/dm.h3
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/md.c6
-rw-r--r--drivers/md/multipath.c2
-rw-r--r--drivers/md/raid0.c6
-rw-r--r--drivers/md/raid1.c11
-rw-r--r--drivers/md/raid10.c10
-rw-r--r--drivers/md/raid5.c12
-rw-r--r--drivers/memstick/core/ms_block.c11
-rw-r--r--drivers/memstick/core/mspro_block.c13
-rw-r--r--drivers/message/fusion/mptsas.c8
-rw-r--r--drivers/mmc/core/queue.c9
-rw-r--r--drivers/mtd/mtd_blkdevs.c13
-rw-r--r--drivers/mtd/ubi/block.c15
-rw-r--r--drivers/nvme/host/core.c86
-rw-r--r--drivers/nvme/host/fc.c2
-rw-r--r--drivers/nvme/host/lightnvm.c315
-rw-r--r--drivers/nvme/host/nvme.h13
-rw-r--r--drivers/nvme/host/pci.c19
-rw-r--r--drivers/nvme/host/rdma.c6
-rw-r--r--drivers/nvme/host/scsi.c7
-rw-r--r--drivers/nvme/target/loop.c2
-rw-r--r--drivers/s390/block/scm_blk.c7
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/device_handler/scsi_dh_emc.c247
-rw-r--r--drivers/scsi/device_handler/scsi_dh_hp_sw.c222
-rw-r--r--drivers/scsi/device_handler/scsi_dh_rdac.c174
-rw-r--r--drivers/scsi/hosts.c24
-rw-r--r--drivers/scsi/hpsa.c4
-rw-r--r--drivers/scsi/libfc/fc_lport.c2
-rw-r--r--drivers/scsi/libsas/sas_expander.c8
-rw-r--r--drivers/scsi/libsas/sas_host_smp.c38
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_transport.c8
-rw-r--r--drivers/scsi/osd/osd_initiator.c22
-rw-r--r--drivers/scsi/osst.c18
-rw-r--r--drivers/scsi/qla2xxx/qla_bsg.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c6
-rw-r--r--drivers/scsi/qla2xxx/qla_mr.c2
-rw-r--r--drivers/scsi/scsi.c354
-rw-r--r--drivers/scsi/scsi_error.c43
-rw-r--r--drivers/scsi/scsi_lib.c264
-rw-r--r--drivers/scsi/scsi_priv.h5
-rw-r--r--drivers/scsi/scsi_transport_fc.c34
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c14
-rw-r--r--drivers/scsi/scsi_transport_sas.c5
-rw-r--r--drivers/scsi/sd.c48
-rw-r--r--drivers/scsi/sg.c33
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c2
-rw-r--r--drivers/scsi/sr.c11
-rw-r--r--drivers/scsi/st.c28
-rw-r--r--drivers/scsi/sun3_scsi.c2
-rw-r--r--drivers/target/Kconfig1
-rw-r--r--drivers/target/target_core_pscsi.c14
-rw-r--r--fs/block_dev.c22
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/debugfs/inode.c36
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/blocklayout.c19
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/xfs/xfs_buf.c3
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--include/linux/backing-dev-defs.h2
-rw-r--r--include/linux/backing-dev.h12
-rw-r--r--include/linux/blk-mq.h9
-rw-r--r--include/linux/blk_types.h38
-rw-r--r--include/linux/blkdev.h124
-rw-r--r--include/linux/blktrace_api.h18
-rw-r--r--include/linux/bsg-lib.h5
-rw-r--r--include/linux/cdrom.h5
-rw-r--r--include/linux/debugfs.h8
-rw-r--r--include/linux/device-mapper.h3
-rw-r--r--include/linux/elevator.h63
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/genhd.h8
-rw-r--r--include/linux/ide.h58
-rw-r--r--include/linux/lightnvm.h138
-rw-r--r--include/linux/nvme.h3
-rw-r--r--include/linux/sbitmap.h30
-rw-r--r--include/linux/sed-opal.h70
-rw-r--r--include/scsi/scsi_cmnd.h4
-rw-r--r--include/scsi/scsi_host.h5
-rw-r--r--include/scsi/scsi_request.h30
-rw-r--r--include/scsi/scsi_transport.h2
-rw-r--r--include/trace/events/block.h27
-rw-r--r--include/uapi/linux/lightnvm.h50
-rw-r--r--include/uapi/linux/sed-opal.h119
-rw-r--r--kernel/trace/blktrace.c78
-rw-r--r--lib/sbitmap.c139
-rw-r--r--mm/backing-dev.c43
-rw-r--r--mm/page-writeback.c4
203 files changed, 9729 insertions, 5577 deletions
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
index c06233fe52ac..8f85b0e41046 100644
--- a/Documentation/cdrom/cdrom-standard.tex
+++ b/Documentation/cdrom/cdrom-standard.tex
@@ -249,7 +249,6 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr
249 unsigned\ long);\cr 249 unsigned\ long);\cr
250\noalign{\medskip} 250\noalign{\medskip}
251 &const\ int& capability;& capability flags \cr 251 &const\ int& capability;& capability flags \cr
252 &int& n_minors;& number of active minor devices \cr
253\};\cr 252\};\cr
254} 253}
255$$ 254$$
@@ -258,13 +257,7 @@ it should add a function pointer to this $struct$. When a particular
258function is not implemented, however, this $struct$ should contain a 257function is not implemented, however, this $struct$ should contain a
259NULL instead. The $capability$ flags specify the capabilities of the 258NULL instead. The $capability$ flags specify the capabilities of the
260\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive 259\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive
261is registered with the \UCD. The value $n_minors$ should be a positive 260is registered with the \UCD.
262value indicating the number of minor devices that are supported by
263the low-level device driver, normally~1. Although these two variables
264are `informative' rather than `operational,' they are included in
265$cdrom_device_ops$ because they describe the capability of the {\em
266driver\/} rather than the {\em drive}. Nomenclature has always been
267difficult in computer programming.
268 261
269Note that most functions have fewer parameters than their 262Note that most functions have fewer parameters than their
270$blkdev_fops$ counterparts. This is because very little of the 263$blkdev_fops$ counterparts. This is because very little of the
diff --git a/MAINTAINERS b/MAINTAINERS
index 80f1a5894411..a0fb98c6399a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8620,10 +8620,10 @@ S: Maintained
8620F: drivers/net/ethernet/netronome/ 8620F: drivers/net/ethernet/netronome/
8621 8621
8622NETWORK BLOCK DEVICE (NBD) 8622NETWORK BLOCK DEVICE (NBD)
8623M: Markus Pargmann <mpa@pengutronix.de> 8623M: Josef Bacik <jbacik@fb.com>
8624S: Maintained 8624S: Maintained
8625L: linux-block@vger.kernel.org
8625L: nbd-general@lists.sourceforge.net 8626L: nbd-general@lists.sourceforge.net
8626T: git git://git.pengutronix.de/git/mpa/linux-nbd.git
8627F: Documentation/blockdev/nbd.txt 8627F: Documentation/blockdev/nbd.txt
8628F: drivers/block/nbd.c 8628F: drivers/block/nbd.c
8629F: include/uapi/linux/nbd.h 8629F: include/uapi/linux/nbd.h
@@ -11097,6 +11097,17 @@ L: linux-mmc@vger.kernel.org
11097S: Maintained 11097S: Maintained
11098F: drivers/mmc/host/sdhci-spear.c 11098F: drivers/mmc/host/sdhci-spear.c
11099 11099
11100SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
11101M: Scott Bauer <scott.bauer@intel.com>
11102M: Jonathan Derrick <jonathan.derrick@intel.com>
11103M: Rafael Antognolli <rafael.antognolli@intel.com>
11104L: linux-block@vger.kernel.org
11105S: Supported
11106F: block/sed*
11107F: block/opal_proto.h
11108F: include/linux/sed*
11109F: include/uapi/linux/sed*
11110
11100SECURITY SUBSYSTEM 11111SECURITY SUBSYSTEM
11101M: James Morris <james.l.morris@oracle.com> 11112M: James Morris <james.l.morris@oracle.com>
11102M: "Serge E. Hallyn" <serge@hallyn.com> 11113M: "Serge E. Hallyn" <serge@hallyn.com>
diff --git a/block/Kconfig b/block/Kconfig
index 8bf114a3858a..a2a92e57a87d 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -49,9 +49,13 @@ config LBDAF
49 49
50 If unsure, say Y. 50 If unsure, say Y.
51 51
52config BLK_SCSI_REQUEST
53 bool
54
52config BLK_DEV_BSG 55config BLK_DEV_BSG
53 bool "Block layer SG support v4" 56 bool "Block layer SG support v4"
54 default y 57 default y
58 select BLK_SCSI_REQUEST
55 help 59 help
56 Saying Y here will enable generic SG (SCSI generic) v4 support 60 Saying Y here will enable generic SG (SCSI generic) v4 support
57 for any block device. 61 for any block device.
@@ -71,6 +75,7 @@ config BLK_DEV_BSGLIB
71 bool "Block layer SG support v4 helper lib" 75 bool "Block layer SG support v4 helper lib"
72 default n 76 default n
73 select BLK_DEV_BSG 77 select BLK_DEV_BSG
78 select BLK_SCSI_REQUEST
74 help 79 help
75 Subsystems will normally enable this if needed. Users will not 80 Subsystems will normally enable this if needed. Users will not
76 normally need to manually enable this. 81 normally need to manually enable this.
@@ -147,6 +152,25 @@ config BLK_WBT_MQ
147 Multiqueue currently doesn't have support for IO scheduling, 152 Multiqueue currently doesn't have support for IO scheduling,
148 enabling this option is recommended. 153 enabling this option is recommended.
149 154
155config BLK_DEBUG_FS
156 bool "Block layer debugging information in debugfs"
157 default y
158 depends on DEBUG_FS
159 ---help---
160 Include block layer debugging information in debugfs. This information
161 is mostly useful for kernel developers, but it doesn't incur any cost
162 at runtime.
163
164 Unless you are building a kernel for a tiny system, you should
165 say Y here.
166
167config BLK_SED_OPAL
168 bool "Logic for interfacing with Opal enabled SEDs"
169 ---help---
170 Builds Logic for interfacing with Opal enabled controllers.
171 Enabling this option enables users to setup/unlock/lock
172 Locking ranges for SED devices using the Opal protocol.
173
150menu "Partition Types" 174menu "Partition Types"
151 175
152source "block/partitions/Kconfig" 176source "block/partitions/Kconfig"
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 421bef9c4c48..0715ce93daef 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED
63 default "cfq" if DEFAULT_CFQ 63 default "cfq" if DEFAULT_CFQ
64 default "noop" if DEFAULT_NOOP 64 default "noop" if DEFAULT_NOOP
65 65
66config MQ_IOSCHED_DEADLINE
67 tristate "MQ deadline I/O scheduler"
68 default y
69 ---help---
70 MQ version of the deadline IO scheduler.
71
72config MQ_IOSCHED_NONE
73 bool
74 default y
75
76choice
77 prompt "Default single-queue blk-mq I/O scheduler"
78 default DEFAULT_SQ_NONE
79 help
80 Select the I/O scheduler which will be used by default for blk-mq
81 managed block devices with a single queue.
82
83 config DEFAULT_SQ_DEADLINE
84 bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
85
86 config DEFAULT_SQ_NONE
87 bool "None"
88
89endchoice
90
91config DEFAULT_SQ_IOSCHED
92 string
93 default "mq-deadline" if DEFAULT_SQ_DEADLINE
94 default "none" if DEFAULT_SQ_NONE
95
96choice
97 prompt "Default multi-queue blk-mq I/O scheduler"
98 default DEFAULT_MQ_NONE
99 help
100 Select the I/O scheduler which will be used by default for blk-mq
101 managed block devices with multiple queues.
102
103 config DEFAULT_MQ_DEADLINE
104 bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
105
106 config DEFAULT_MQ_NONE
107 bool "None"
108
109endchoice
110
111config DEFAULT_MQ_IOSCHED
112 string
113 default "mq-deadline" if DEFAULT_MQ_DEADLINE
114 default "none" if DEFAULT_MQ_NONE
115
66endmenu 116endmenu
67 117
68endif 118endif
diff --git a/block/Makefile b/block/Makefile
index a827f988c4e6..2ad7c304e3f5 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -6,11 +6,12 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ 8 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
9 blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ 9 blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ 10 genhd.o partition-generic.o ioprio.o \
11 badblocks.o partitions/ 11 badblocks.o partitions/
12 12
13obj-$(CONFIG_BOUNCE) += bounce.o 13obj-$(CONFIG_BOUNCE) += bounce.o
14obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
14obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 15obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
15obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o 16obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
16obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o 17obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
@@ -18,6 +19,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
18obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 19obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
19obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o 20obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 21obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
22obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
21 23
22obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 24obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
23obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o 25obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
@@ -25,3 +27,5 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
25obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o 27obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
26obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o 28obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
27obj-$(CONFIG_BLK_WBT) += blk-wbt.o 29obj-$(CONFIG_BLK_WBT) += blk-wbt.o
30obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
31obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
diff --git a/block/bio.c b/block/bio.c
index 2b375020fc49..4b564d0c3e29 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1227,9 +1227,6 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
1227 if (!bio) 1227 if (!bio)
1228 goto out_bmd; 1228 goto out_bmd;
1229 1229
1230 if (iter->type & WRITE)
1231 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1232
1233 ret = 0; 1230 ret = 0;
1234 1231
1235 if (map_data) { 1232 if (map_data) {
@@ -1394,16 +1391,10 @@ struct bio *bio_map_user_iov(struct request_queue *q,
1394 1391
1395 kfree(pages); 1392 kfree(pages);
1396 1393
1397 /*
1398 * set data direction, and check if mapped pages need bouncing
1399 */
1400 if (iter->type & WRITE)
1401 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1402
1403 bio_set_flag(bio, BIO_USER_MAPPED); 1394 bio_set_flag(bio, BIO_USER_MAPPED);
1404 1395
1405 /* 1396 /*
1406 * subtle -- if __bio_map_user() ended up bouncing a bio, 1397 * subtle -- if bio_map_user_iov() ended up bouncing a bio,
1407 * it would normally disappear when its bi_end_io is run. 1398 * it would normally disappear when its bi_end_io is run.
1408 * however, we need it for the unmap, so grab an extra 1399 * however, we need it for the unmap, so grab an extra
1409 * reference to it 1400 * reference to it
@@ -1445,8 +1436,8 @@ static void __bio_unmap_user(struct bio *bio)
1445 * bio_unmap_user - unmap a bio 1436 * bio_unmap_user - unmap a bio
1446 * @bio: the bio being unmapped 1437 * @bio: the bio being unmapped
1447 * 1438 *
1448 * Unmap a bio previously mapped by bio_map_user(). Must be called with 1439 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
1449 * a process context. 1440 * process context.
1450 * 1441 *
1451 * bio_unmap_user() may sleep. 1442 * bio_unmap_user() may sleep.
1452 */ 1443 */
@@ -1590,7 +1581,6 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1590 bio->bi_private = data; 1581 bio->bi_private = data;
1591 } else { 1582 } else {
1592 bio->bi_end_io = bio_copy_kern_endio; 1583 bio->bi_end_io = bio_copy_kern_endio;
1593 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1594 } 1584 }
1595 1585
1596 return bio; 1586 return bio;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8ba0af780e88..295e98c2c8cc 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -184,7 +184,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
184 goto err_free_blkg; 184 goto err_free_blkg;
185 } 185 }
186 186
187 wb_congested = wb_congested_get_create(&q->backing_dev_info, 187 wb_congested = wb_congested_get_create(q->backing_dev_info,
188 blkcg->css.id, 188 blkcg->css.id,
189 GFP_NOWAIT | __GFP_NOWARN); 189 GFP_NOWAIT | __GFP_NOWARN);
190 if (!wb_congested) { 190 if (!wb_congested) {
@@ -469,8 +469,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
469const char *blkg_dev_name(struct blkcg_gq *blkg) 469const char *blkg_dev_name(struct blkcg_gq *blkg)
470{ 470{
471 /* some drivers (floppy) instantiate a queue w/o disk registered */ 471 /* some drivers (floppy) instantiate a queue w/o disk registered */
472 if (blkg->q->backing_dev_info.dev) 472 if (blkg->q->backing_dev_info->dev)
473 return dev_name(blkg->q->backing_dev_info.dev); 473 return dev_name(blkg->q->backing_dev_info->dev);
474 return NULL; 474 return NULL;
475} 475}
476EXPORT_SYMBOL_GPL(blkg_dev_name); 476EXPORT_SYMBOL_GPL(blkg_dev_name);
@@ -1079,10 +1079,8 @@ int blkcg_init_queue(struct request_queue *q)
1079 if (preloaded) 1079 if (preloaded)
1080 radix_tree_preload_end(); 1080 radix_tree_preload_end();
1081 1081
1082 if (IS_ERR(blkg)) { 1082 if (IS_ERR(blkg))
1083 blkg_free(new_blkg);
1084 return PTR_ERR(blkg); 1083 return PTR_ERR(blkg);
1085 }
1086 1084
1087 q->root_blkg = blkg; 1085 q->root_blkg = blkg;
1088 q->root_rl.blkg = blkg; 1086 q->root_rl.blkg = blkg;
@@ -1223,7 +1221,10 @@ int blkcg_activate_policy(struct request_queue *q,
1223 if (blkcg_policy_enabled(q, pol)) 1221 if (blkcg_policy_enabled(q, pol))
1224 return 0; 1222 return 0;
1225 1223
1226 blk_queue_bypass_start(q); 1224 if (q->mq_ops)
1225 blk_mq_freeze_queue(q);
1226 else
1227 blk_queue_bypass_start(q);
1227pd_prealloc: 1228pd_prealloc:
1228 if (!pd_prealloc) { 1229 if (!pd_prealloc) {
1229 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); 1230 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
@@ -1261,7 +1262,10 @@ pd_prealloc:
1261 1262
1262 spin_unlock_irq(q->queue_lock); 1263 spin_unlock_irq(q->queue_lock);
1263out_bypass_end: 1264out_bypass_end:
1264 blk_queue_bypass_end(q); 1265 if (q->mq_ops)
1266 blk_mq_unfreeze_queue(q);
1267 else
1268 blk_queue_bypass_end(q);
1265 if (pd_prealloc) 1269 if (pd_prealloc)
1266 pol->pd_free_fn(pd_prealloc); 1270 pol->pd_free_fn(pd_prealloc);
1267 return ret; 1271 return ret;
@@ -1284,7 +1288,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
1284 if (!blkcg_policy_enabled(q, pol)) 1288 if (!blkcg_policy_enabled(q, pol))
1285 return; 1289 return;
1286 1290
1287 blk_queue_bypass_start(q); 1291 if (q->mq_ops)
1292 blk_mq_freeze_queue(q);
1293 else
1294 blk_queue_bypass_start(q);
1295
1288 spin_lock_irq(q->queue_lock); 1296 spin_lock_irq(q->queue_lock);
1289 1297
1290 __clear_bit(pol->plid, q->blkcg_pols); 1298 __clear_bit(pol->plid, q->blkcg_pols);
@@ -1304,7 +1312,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
1304 } 1312 }
1305 1313
1306 spin_unlock_irq(q->queue_lock); 1314 spin_unlock_irq(q->queue_lock);
1307 blk_queue_bypass_end(q); 1315
1316 if (q->mq_ops)
1317 blk_mq_unfreeze_queue(q);
1318 else
1319 blk_queue_bypass_end(q);
1308} 1320}
1309EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); 1321EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
1310 1322
diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c58b64..b9e857f4afe8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,14 +33,20 @@
33#include <linux/ratelimit.h> 33#include <linux/ratelimit.h>
34#include <linux/pm_runtime.h> 34#include <linux/pm_runtime.h>
35#include <linux/blk-cgroup.h> 35#include <linux/blk-cgroup.h>
36#include <linux/debugfs.h>
36 37
37#define CREATE_TRACE_POINTS 38#define CREATE_TRACE_POINTS
38#include <trace/events/block.h> 39#include <trace/events/block.h>
39 40
40#include "blk.h" 41#include "blk.h"
41#include "blk-mq.h" 42#include "blk-mq.h"
43#include "blk-mq-sched.h"
42#include "blk-wbt.h" 44#include "blk-wbt.h"
43 45
46#ifdef CONFIG_DEBUG_FS
47struct dentry *blk_debugfs_root;
48#endif
49
44EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); 50EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
45EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 51EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
46EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); 52EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
@@ -74,7 +80,7 @@ static void blk_clear_congested(struct request_list *rl, int sync)
74 * flip its congestion state for events on other blkcgs. 80 * flip its congestion state for events on other blkcgs.
75 */ 81 */
76 if (rl == &rl->q->root_rl) 82 if (rl == &rl->q->root_rl)
77 clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); 83 clear_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
78#endif 84#endif
79} 85}
80 86
@@ -85,7 +91,7 @@ static void blk_set_congested(struct request_list *rl, int sync)
85#else 91#else
86 /* see blk_clear_congested() */ 92 /* see blk_clear_congested() */
87 if (rl == &rl->q->root_rl) 93 if (rl == &rl->q->root_rl)
88 set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); 94 set_wb_congested(rl->q->backing_dev_info->wb.congested, sync);
89#endif 95#endif
90} 96}
91 97
@@ -104,22 +110,6 @@ void blk_queue_congestion_threshold(struct request_queue *q)
104 q->nr_congestion_off = nr; 110 q->nr_congestion_off = nr;
105} 111}
106 112
107/**
108 * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
109 * @bdev: device
110 *
111 * Locates the passed device's request queue and returns the address of its
112 * backing_dev_info. This function can only be called if @bdev is opened
113 * and the return value is never NULL.
114 */
115struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
116{
117 struct request_queue *q = bdev_get_queue(bdev);
118
119 return &q->backing_dev_info;
120}
121EXPORT_SYMBOL(blk_get_backing_dev_info);
122
123void blk_rq_init(struct request_queue *q, struct request *rq) 113void blk_rq_init(struct request_queue *q, struct request *rq)
124{ 114{
125 memset(rq, 0, sizeof(*rq)); 115 memset(rq, 0, sizeof(*rq));
@@ -131,9 +121,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
131 rq->__sector = (sector_t) -1; 121 rq->__sector = (sector_t) -1;
132 INIT_HLIST_NODE(&rq->hash); 122 INIT_HLIST_NODE(&rq->hash);
133 RB_CLEAR_NODE(&rq->rb_node); 123 RB_CLEAR_NODE(&rq->rb_node);
134 rq->cmd = rq->__cmd;
135 rq->cmd_len = BLK_MAX_CDB;
136 rq->tag = -1; 124 rq->tag = -1;
125 rq->internal_tag = -1;
137 rq->start_time = jiffies; 126 rq->start_time = jiffies;
138 set_start_time_ns(rq); 127 set_start_time_ns(rq);
139 rq->part = NULL; 128 rq->part = NULL;
@@ -158,10 +147,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
158 147
159void blk_dump_rq_flags(struct request *rq, char *msg) 148void blk_dump_rq_flags(struct request *rq, char *msg)
160{ 149{
161 int bit; 150 printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg,
162 151 rq->rq_disk ? rq->rq_disk->disk_name : "?",
163 printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg,
164 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
165 (unsigned long long) rq->cmd_flags); 152 (unsigned long long) rq->cmd_flags);
166 153
167 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", 154 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
@@ -169,13 +156,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
169 blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); 156 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
170 printk(KERN_INFO " bio %p, biotail %p, len %u\n", 157 printk(KERN_INFO " bio %p, biotail %p, len %u\n",
171 rq->bio, rq->biotail, blk_rq_bytes(rq)); 158 rq->bio, rq->biotail, blk_rq_bytes(rq));
172
173 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
174 printk(KERN_INFO " cdb: ");
175 for (bit = 0; bit < BLK_MAX_CDB; bit++)
176 printk("%02x ", rq->cmd[bit]);
177 printk("\n");
178 }
179} 159}
180EXPORT_SYMBOL(blk_dump_rq_flags); 160EXPORT_SYMBOL(blk_dump_rq_flags);
181 161
@@ -525,12 +505,14 @@ void blk_set_queue_dying(struct request_queue *q)
525 else { 505 else {
526 struct request_list *rl; 506 struct request_list *rl;
527 507
508 spin_lock_irq(q->queue_lock);
528 blk_queue_for_each_rl(rl, q) { 509 blk_queue_for_each_rl(rl, q) {
529 if (rl->rq_pool) { 510 if (rl->rq_pool) {
530 wake_up(&rl->wait[BLK_RW_SYNC]); 511 wake_up(&rl->wait[BLK_RW_SYNC]);
531 wake_up(&rl->wait[BLK_RW_ASYNC]); 512 wake_up(&rl->wait[BLK_RW_ASYNC]);
532 } 513 }
533 } 514 }
515 spin_unlock_irq(q->queue_lock);
534 } 516 }
535} 517}
536EXPORT_SYMBOL_GPL(blk_set_queue_dying); 518EXPORT_SYMBOL_GPL(blk_set_queue_dying);
@@ -584,7 +566,7 @@ void blk_cleanup_queue(struct request_queue *q)
584 blk_flush_integrity(); 566 blk_flush_integrity();
585 567
586 /* @q won't process any more request, flush async actions */ 568 /* @q won't process any more request, flush async actions */
587 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); 569 del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
588 blk_sync_queue(q); 570 blk_sync_queue(q);
589 571
590 if (q->mq_ops) 572 if (q->mq_ops)
@@ -596,7 +578,8 @@ void blk_cleanup_queue(struct request_queue *q)
596 q->queue_lock = &q->__queue_lock; 578 q->queue_lock = &q->__queue_lock;
597 spin_unlock_irq(lock); 579 spin_unlock_irq(lock);
598 580
599 bdi_unregister(&q->backing_dev_info); 581 bdi_unregister(q->backing_dev_info);
582 put_disk_devt(q->disk_devt);
600 583
601 /* @q is and will stay empty, shutdown and put */ 584 /* @q is and will stay empty, shutdown and put */
602 blk_put_queue(q); 585 blk_put_queue(q);
@@ -604,17 +587,41 @@ void blk_cleanup_queue(struct request_queue *q)
604EXPORT_SYMBOL(blk_cleanup_queue); 587EXPORT_SYMBOL(blk_cleanup_queue);
605 588
606/* Allocate memory local to the request queue */ 589/* Allocate memory local to the request queue */
607static void *alloc_request_struct(gfp_t gfp_mask, void *data) 590static void *alloc_request_simple(gfp_t gfp_mask, void *data)
608{ 591{
609 int nid = (int)(long)data; 592 struct request_queue *q = data;
610 return kmem_cache_alloc_node(request_cachep, gfp_mask, nid); 593
594 return kmem_cache_alloc_node(request_cachep, gfp_mask, q->node);
611} 595}
612 596
613static void free_request_struct(void *element, void *unused) 597static void free_request_simple(void *element, void *data)
614{ 598{
615 kmem_cache_free(request_cachep, element); 599 kmem_cache_free(request_cachep, element);
616} 600}
617 601
602static void *alloc_request_size(gfp_t gfp_mask, void *data)
603{
604 struct request_queue *q = data;
605 struct request *rq;
606
607 rq = kmalloc_node(sizeof(struct request) + q->cmd_size, gfp_mask,
608 q->node);
609 if (rq && q->init_rq_fn && q->init_rq_fn(q, rq, gfp_mask) < 0) {
610 kfree(rq);
611 rq = NULL;
612 }
613 return rq;
614}
615
616static void free_request_size(void *element, void *data)
617{
618 struct request_queue *q = data;
619
620 if (q->exit_rq_fn)
621 q->exit_rq_fn(q, element);
622 kfree(element);
623}
624
618int blk_init_rl(struct request_list *rl, struct request_queue *q, 625int blk_init_rl(struct request_list *rl, struct request_queue *q,
619 gfp_t gfp_mask) 626 gfp_t gfp_mask)
620{ 627{
@@ -627,10 +634,15 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q,
627 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); 634 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
628 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); 635 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
629 636
630 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, alloc_request_struct, 637 if (q->cmd_size) {
631 free_request_struct, 638 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
632 (void *)(long)q->node, gfp_mask, 639 alloc_request_size, free_request_size,
633 q->node); 640 q, gfp_mask, q->node);
641 } else {
642 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ,
643 alloc_request_simple, free_request_simple,
644 q, gfp_mask, q->node);
645 }
634 if (!rl->rq_pool) 646 if (!rl->rq_pool)
635 return -ENOMEM; 647 return -ENOMEM;
636 648
@@ -693,7 +705,6 @@ static void blk_rq_timed_out_timer(unsigned long data)
693struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) 705struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
694{ 706{
695 struct request_queue *q; 707 struct request_queue *q;
696 int err;
697 708
698 q = kmem_cache_alloc_node(blk_requestq_cachep, 709 q = kmem_cache_alloc_node(blk_requestq_cachep,
699 gfp_mask | __GFP_ZERO, node_id); 710 gfp_mask | __GFP_ZERO, node_id);
@@ -708,17 +719,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
708 if (!q->bio_split) 719 if (!q->bio_split)
709 goto fail_id; 720 goto fail_id;
710 721
711 q->backing_dev_info.ra_pages = 722 q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
723 if (!q->backing_dev_info)
724 goto fail_split;
725
726 q->backing_dev_info->ra_pages =
712 (VM_MAX_READAHEAD * 1024) / PAGE_SIZE; 727 (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
713 q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK; 728 q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
714 q->backing_dev_info.name = "block"; 729 q->backing_dev_info->name = "block";
715 q->node = node_id; 730 q->node = node_id;
716 731
717 err = bdi_init(&q->backing_dev_info); 732 setup_timer(&q->backing_dev_info->laptop_mode_wb_timer,
718 if (err)
719 goto fail_split;
720
721 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
722 laptop_mode_timer_fn, (unsigned long) q); 733 laptop_mode_timer_fn, (unsigned long) q);
723 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 734 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
724 INIT_LIST_HEAD(&q->queue_head); 735 INIT_LIST_HEAD(&q->queue_head);
@@ -768,7 +779,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
768fail_ref: 779fail_ref:
769 percpu_ref_exit(&q->q_usage_counter); 780 percpu_ref_exit(&q->q_usage_counter);
770fail_bdi: 781fail_bdi:
771 bdi_destroy(&q->backing_dev_info); 782 bdi_put(q->backing_dev_info);
772fail_split: 783fail_split:
773 bioset_free(q->bio_split); 784 bioset_free(q->bio_split);
774fail_id: 785fail_id:
@@ -821,15 +832,19 @@ EXPORT_SYMBOL(blk_init_queue);
821struct request_queue * 832struct request_queue *
822blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) 833blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
823{ 834{
824 struct request_queue *uninit_q, *q; 835 struct request_queue *q;
825 836
826 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id); 837 q = blk_alloc_queue_node(GFP_KERNEL, node_id);
827 if (!uninit_q) 838 if (!q)
828 return NULL; 839 return NULL;
829 840
830 q = blk_init_allocated_queue(uninit_q, rfn, lock); 841 q->request_fn = rfn;
831 if (!q) 842 if (lock)
832 blk_cleanup_queue(uninit_q); 843 q->queue_lock = lock;
844 if (blk_init_allocated_queue(q) < 0) {
845 blk_cleanup_queue(q);
846 return NULL;
847 }
833 848
834 return q; 849 return q;
835} 850}
@@ -837,30 +852,22 @@ EXPORT_SYMBOL(blk_init_queue_node);
837 852
838static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio); 853static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
839 854
840struct request_queue *
841blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
842 spinlock_t *lock)
843{
844 if (!q)
845 return NULL;
846 855
847 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0); 856int blk_init_allocated_queue(struct request_queue *q)
857{
858 q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
848 if (!q->fq) 859 if (!q->fq)
849 return NULL; 860 return -ENOMEM;
861
862 if (q->init_rq_fn && q->init_rq_fn(q, q->fq->flush_rq, GFP_KERNEL))
863 goto out_free_flush_queue;
850 864
851 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) 865 if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
852 goto fail; 866 goto out_exit_flush_rq;
853 867
854 INIT_WORK(&q->timeout_work, blk_timeout_work); 868 INIT_WORK(&q->timeout_work, blk_timeout_work);
855 q->request_fn = rfn;
856 q->prep_rq_fn = NULL;
857 q->unprep_rq_fn = NULL;
858 q->queue_flags |= QUEUE_FLAG_DEFAULT; 869 q->queue_flags |= QUEUE_FLAG_DEFAULT;
859 870
860 /* Override internal queue lock with supplied lock pointer */
861 if (lock)
862 q->queue_lock = lock;
863
864 /* 871 /*
865 * This also sets hw/phys segments, boundary and size 872 * This also sets hw/phys segments, boundary and size
866 */ 873 */
@@ -874,17 +881,19 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
874 /* init elevator */ 881 /* init elevator */
875 if (elevator_init(q, NULL)) { 882 if (elevator_init(q, NULL)) {
876 mutex_unlock(&q->sysfs_lock); 883 mutex_unlock(&q->sysfs_lock);
877 goto fail; 884 goto out_exit_flush_rq;
878 } 885 }
879 886
880 mutex_unlock(&q->sysfs_lock); 887 mutex_unlock(&q->sysfs_lock);
888 return 0;
881 889
882 return q; 890out_exit_flush_rq:
883 891 if (q->exit_rq_fn)
884fail: 892 q->exit_rq_fn(q, q->fq->flush_rq);
893out_free_flush_queue:
885 blk_free_flush_queue(q->fq); 894 blk_free_flush_queue(q->fq);
886 wbt_exit(q); 895 wbt_exit(q);
887 return NULL; 896 return -ENOMEM;
888} 897}
889EXPORT_SYMBOL(blk_init_allocated_queue); 898EXPORT_SYMBOL(blk_init_allocated_queue);
890 899
@@ -1020,41 +1029,6 @@ int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
1020 return 0; 1029 return 0;
1021} 1030}
1022 1031
1023/*
1024 * Determine if elevator data should be initialized when allocating the
1025 * request associated with @bio.
1026 */
1027static bool blk_rq_should_init_elevator(struct bio *bio)
1028{
1029 if (!bio)
1030 return true;
1031
1032 /*
1033 * Flush requests do not use the elevator so skip initialization.
1034 * This allows a request to share the flush and elevator data.
1035 */
1036 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA))
1037 return false;
1038
1039 return true;
1040}
1041
1042/**
1043 * rq_ioc - determine io_context for request allocation
1044 * @bio: request being allocated is for this bio (can be %NULL)
1045 *
1046 * Determine io_context to use for request allocation for @bio. May return
1047 * %NULL if %current->io_context doesn't exist.
1048 */
1049static struct io_context *rq_ioc(struct bio *bio)
1050{
1051#ifdef CONFIG_BLK_CGROUP
1052 if (bio && bio->bi_ioc)
1053 return bio->bi_ioc;
1054#endif
1055 return current->io_context;
1056}
1057
1058/** 1032/**
1059 * __get_request - get a free request 1033 * __get_request - get a free request
1060 * @rl: request list to allocate from 1034 * @rl: request list to allocate from
@@ -1133,10 +1107,13 @@ static struct request *__get_request(struct request_list *rl, unsigned int op,
1133 * request is freed. This guarantees icq's won't be destroyed and 1107 * request is freed. This guarantees icq's won't be destroyed and
1134 * makes creating new ones safe. 1108 * makes creating new ones safe.
1135 * 1109 *
1110 * Flush requests do not use the elevator so skip initialization.
1111 * This allows a request to share the flush and elevator data.
1112 *
1136 * Also, lookup icq while holding queue_lock. If it doesn't exist, 1113 * Also, lookup icq while holding queue_lock. If it doesn't exist,
1137 * it will be created after releasing queue_lock. 1114 * it will be created after releasing queue_lock.
1138 */ 1115 */
1139 if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) { 1116 if (!op_is_flush(op) && !blk_queue_bypass(q)) {
1140 rq_flags |= RQF_ELVPRIV; 1117 rq_flags |= RQF_ELVPRIV;
1141 q->nr_rqs_elvpriv++; 1118 q->nr_rqs_elvpriv++;
1142 if (et->icq_cache && ioc) 1119 if (et->icq_cache && ioc)
@@ -1196,7 +1173,7 @@ fail_elvpriv:
1196 * disturb iosched and blkcg but weird is bettern than dead. 1173 * disturb iosched and blkcg but weird is bettern than dead.
1197 */ 1174 */
1198 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", 1175 printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n",
1199 __func__, dev_name(q->backing_dev_info.dev)); 1176 __func__, dev_name(q->backing_dev_info->dev));
1200 1177
1201 rq->rq_flags &= ~RQF_ELVPRIV; 1178 rq->rq_flags &= ~RQF_ELVPRIV;
1202 rq->elv.icq = NULL; 1179 rq->elv.icq = NULL;
@@ -1290,8 +1267,6 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1290{ 1267{
1291 struct request *rq; 1268 struct request *rq;
1292 1269
1293 BUG_ON(rw != READ && rw != WRITE);
1294
1295 /* create ioc upfront */ 1270 /* create ioc upfront */
1296 create_io_context(gfp_mask, q->node); 1271 create_io_context(gfp_mask, q->node);
1297 1272
@@ -1321,18 +1296,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1321EXPORT_SYMBOL(blk_get_request); 1296EXPORT_SYMBOL(blk_get_request);
1322 1297
1323/** 1298/**
1324 * blk_rq_set_block_pc - initialize a request to type BLOCK_PC
1325 * @rq: request to be initialized
1326 *
1327 */
1328void blk_rq_set_block_pc(struct request *rq)
1329{
1330 rq->cmd_type = REQ_TYPE_BLOCK_PC;
1331 memset(rq->__cmd, 0, sizeof(rq->__cmd));
1332}
1333EXPORT_SYMBOL(blk_rq_set_block_pc);
1334
1335/**
1336 * blk_requeue_request - put a request back on queue 1299 * blk_requeue_request - put a request back on queue
1337 * @q: request queue where request should be inserted 1300 * @q: request queue where request should be inserted
1338 * @rq: request to be inserted 1301 * @rq: request to be inserted
@@ -1522,6 +1485,30 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1522 return true; 1485 return true;
1523} 1486}
1524 1487
1488bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
1489 struct bio *bio)
1490{
1491 unsigned short segments = blk_rq_nr_discard_segments(req);
1492
1493 if (segments >= queue_max_discard_segments(q))
1494 goto no_merge;
1495 if (blk_rq_sectors(req) + bio_sectors(bio) >
1496 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
1497 goto no_merge;
1498
1499 req->biotail->bi_next = bio;
1500 req->biotail = bio;
1501 req->__data_len += bio->bi_iter.bi_size;
1502 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1503 req->nr_phys_segments = segments + 1;
1504
1505 blk_account_io_start(req, false);
1506 return true;
1507no_merge:
1508 req_set_nomerge(q, req);
1509 return false;
1510}
1511
1525/** 1512/**
1526 * blk_attempt_plug_merge - try to merge with %current's plugged list 1513 * blk_attempt_plug_merge - try to merge with %current's plugged list
1527 * @q: request_queue new bio is being queued at 1514 * @q: request_queue new bio is being queued at
@@ -1550,12 +1537,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1550{ 1537{
1551 struct blk_plug *plug; 1538 struct blk_plug *plug;
1552 struct request *rq; 1539 struct request *rq;
1553 bool ret = false;
1554 struct list_head *plug_list; 1540 struct list_head *plug_list;
1555 1541
1556 plug = current->plug; 1542 plug = current->plug;
1557 if (!plug) 1543 if (!plug)
1558 goto out; 1544 return false;
1559 *request_count = 0; 1545 *request_count = 0;
1560 1546
1561 if (q->mq_ops) 1547 if (q->mq_ops)
@@ -1564,7 +1550,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1564 plug_list = &plug->list; 1550 plug_list = &plug->list;
1565 1551
1566 list_for_each_entry_reverse(rq, plug_list, queuelist) { 1552 list_for_each_entry_reverse(rq, plug_list, queuelist) {
1567 int el_ret; 1553 bool merged = false;
1568 1554
1569 if (rq->q == q) { 1555 if (rq->q == q) {
1570 (*request_count)++; 1556 (*request_count)++;
@@ -1580,19 +1566,25 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1580 if (rq->q != q || !blk_rq_merge_ok(rq, bio)) 1566 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1581 continue; 1567 continue;
1582 1568
1583 el_ret = blk_try_merge(rq, bio); 1569 switch (blk_try_merge(rq, bio)) {
1584 if (el_ret == ELEVATOR_BACK_MERGE) { 1570 case ELEVATOR_BACK_MERGE:
1585 ret = bio_attempt_back_merge(q, rq, bio); 1571 merged = bio_attempt_back_merge(q, rq, bio);
1586 if (ret) 1572 break;
1587 break; 1573 case ELEVATOR_FRONT_MERGE:
1588 } else if (el_ret == ELEVATOR_FRONT_MERGE) { 1574 merged = bio_attempt_front_merge(q, rq, bio);
1589 ret = bio_attempt_front_merge(q, rq, bio); 1575 break;
1590 if (ret) 1576 case ELEVATOR_DISCARD_MERGE:
1591 break; 1577 merged = bio_attempt_discard_merge(q, rq, bio);
1578 break;
1579 default:
1580 break;
1592 } 1581 }
1582
1583 if (merged)
1584 return true;
1593 } 1585 }
1594out: 1586
1595 return ret; 1587 return false;
1596} 1588}
1597 1589
1598unsigned int blk_plug_queued_count(struct request_queue *q) 1590unsigned int blk_plug_queued_count(struct request_queue *q)
@@ -1621,7 +1613,6 @@ out:
1621 1613
1622void init_request_from_bio(struct request *req, struct bio *bio) 1614void init_request_from_bio(struct request *req, struct bio *bio)
1623{ 1615{
1624 req->cmd_type = REQ_TYPE_FS;
1625 if (bio->bi_opf & REQ_RAHEAD) 1616 if (bio->bi_opf & REQ_RAHEAD)
1626 req->cmd_flags |= REQ_FAILFAST_MASK; 1617 req->cmd_flags |= REQ_FAILFAST_MASK;
1627 1618
@@ -1635,8 +1626,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1635static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) 1626static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1636{ 1627{
1637 struct blk_plug *plug; 1628 struct blk_plug *plug;
1638 int el_ret, where = ELEVATOR_INSERT_SORT; 1629 int where = ELEVATOR_INSERT_SORT;
1639 struct request *req; 1630 struct request *req, *free;
1640 unsigned int request_count = 0; 1631 unsigned int request_count = 0;
1641 unsigned int wb_acct; 1632 unsigned int wb_acct;
1642 1633
@@ -1655,7 +1646,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1655 return BLK_QC_T_NONE; 1646 return BLK_QC_T_NONE;
1656 } 1647 }
1657 1648
1658 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) { 1649 if (op_is_flush(bio->bi_opf)) {
1659 spin_lock_irq(q->queue_lock); 1650 spin_lock_irq(q->queue_lock);
1660 where = ELEVATOR_INSERT_FLUSH; 1651 where = ELEVATOR_INSERT_FLUSH;
1661 goto get_rq; 1652 goto get_rq;
@@ -1673,21 +1664,29 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1673 1664
1674 spin_lock_irq(q->queue_lock); 1665 spin_lock_irq(q->queue_lock);
1675 1666
1676 el_ret = elv_merge(q, &req, bio); 1667 switch (elv_merge(q, &req, bio)) {
1677 if (el_ret == ELEVATOR_BACK_MERGE) { 1668 case ELEVATOR_BACK_MERGE:
1678 if (bio_attempt_back_merge(q, req, bio)) { 1669 if (!bio_attempt_back_merge(q, req, bio))
1679 elv_bio_merged(q, req, bio); 1670 break;
1680 if (!attempt_back_merge(q, req)) 1671 elv_bio_merged(q, req, bio);
1681 elv_merged_request(q, req, el_ret); 1672 free = attempt_back_merge(q, req);
1682 goto out_unlock; 1673 if (free)
1683 } 1674 __blk_put_request(q, free);
1684 } else if (el_ret == ELEVATOR_FRONT_MERGE) { 1675 else
1685 if (bio_attempt_front_merge(q, req, bio)) { 1676 elv_merged_request(q, req, ELEVATOR_BACK_MERGE);
1686 elv_bio_merged(q, req, bio); 1677 goto out_unlock;
1687 if (!attempt_front_merge(q, req)) 1678 case ELEVATOR_FRONT_MERGE:
1688 elv_merged_request(q, req, el_ret); 1679 if (!bio_attempt_front_merge(q, req, bio))
1689 goto out_unlock; 1680 break;
1690 } 1681 elv_bio_merged(q, req, bio);
1682 free = attempt_front_merge(q, req);
1683 if (free)
1684 __blk_put_request(q, free);
1685 else
1686 elv_merged_request(q, req, ELEVATOR_FRONT_MERGE);
1687 goto out_unlock;
1688 default:
1689 break;
1691 } 1690 }
1692 1691
1693get_rq: 1692get_rq:
@@ -1894,7 +1893,7 @@ generic_make_request_checks(struct bio *bio)
1894 * drivers without flush support don't have to worry 1893 * drivers without flush support don't have to worry
1895 * about them. 1894 * about them.
1896 */ 1895 */
1897 if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && 1896 if (op_is_flush(bio->bi_opf) &&
1898 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { 1897 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1899 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); 1898 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
1900 if (!nr_sectors) { 1899 if (!nr_sectors) {
@@ -2143,7 +2142,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2143 if (q->mq_ops) { 2142 if (q->mq_ops) {
2144 if (blk_queue_io_stat(q)) 2143 if (blk_queue_io_stat(q))
2145 blk_account_io_start(rq, true); 2144 blk_account_io_start(rq, true);
2146 blk_mq_insert_request(rq, false, true, false); 2145 blk_mq_sched_insert_request(rq, false, true, false, false);
2147 return 0; 2146 return 0;
2148 } 2147 }
2149 2148
@@ -2159,7 +2158,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2159 */ 2158 */
2160 BUG_ON(blk_queued_rq(rq)); 2159 BUG_ON(blk_queued_rq(rq));
2161 2160
2162 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) 2161 if (op_is_flush(rq->cmd_flags))
2163 where = ELEVATOR_INSERT_FLUSH; 2162 where = ELEVATOR_INSERT_FLUSH;
2164 2163
2165 add_acct_request(q, rq, where); 2164 add_acct_request(q, rq, where);
@@ -2464,14 +2463,6 @@ void blk_start_request(struct request *req)
2464 wbt_issue(req->q->rq_wb, &req->issue_stat); 2463 wbt_issue(req->q->rq_wb, &req->issue_stat);
2465 } 2464 }
2466 2465
2467 /*
2468 * We are now handing the request to the hardware, initialize
2469 * resid_len to full count and add the timeout handler.
2470 */
2471 req->resid_len = blk_rq_bytes(req);
2472 if (unlikely(blk_bidi_rq(req)))
2473 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
2474
2475 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); 2466 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
2476 blk_add_timer(req); 2467 blk_add_timer(req);
2477} 2468}
@@ -2542,10 +2533,10 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2542 * TODO: tj: This is too subtle. It would be better to let 2533 * TODO: tj: This is too subtle. It would be better to let
2543 * low level drivers do what they see fit. 2534 * low level drivers do what they see fit.
2544 */ 2535 */
2545 if (req->cmd_type == REQ_TYPE_FS) 2536 if (!blk_rq_is_passthrough(req))
2546 req->errors = 0; 2537 req->errors = 0;
2547 2538
2548 if (error && req->cmd_type == REQ_TYPE_FS && 2539 if (error && !blk_rq_is_passthrough(req) &&
2549 !(req->rq_flags & RQF_QUIET)) { 2540 !(req->rq_flags & RQF_QUIET)) {
2550 char *error_type; 2541 char *error_type;
2551 2542
@@ -2617,7 +2608,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2617 req->__data_len -= total_bytes; 2608 req->__data_len -= total_bytes;
2618 2609
2619 /* update sector only for requests with clear definition of sector */ 2610 /* update sector only for requests with clear definition of sector */
2620 if (req->cmd_type == REQ_TYPE_FS) 2611 if (!blk_rq_is_passthrough(req))
2621 req->__sector += total_bytes >> 9; 2612 req->__sector += total_bytes >> 9;
2622 2613
2623 /* mixed attributes always follow the first bio */ 2614 /* mixed attributes always follow the first bio */
@@ -2695,8 +2686,8 @@ void blk_finish_request(struct request *req, int error)
2695 2686
2696 BUG_ON(blk_queued_rq(req)); 2687 BUG_ON(blk_queued_rq(req));
2697 2688
2698 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) 2689 if (unlikely(laptop_mode) && !blk_rq_is_passthrough(req))
2699 laptop_io_completion(&req->q->backing_dev_info); 2690 laptop_io_completion(req->q->backing_dev_info);
2700 2691
2701 blk_delete_timer(req); 2692 blk_delete_timer(req);
2702 2693
@@ -3019,8 +3010,6 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
3019static void __blk_rq_prep_clone(struct request *dst, struct request *src) 3010static void __blk_rq_prep_clone(struct request *dst, struct request *src)
3020{ 3011{
3021 dst->cpu = src->cpu; 3012 dst->cpu = src->cpu;
3022 dst->cmd_flags = src->cmd_flags | REQ_NOMERGE;
3023 dst->cmd_type = src->cmd_type;
3024 dst->__sector = blk_rq_pos(src); 3013 dst->__sector = blk_rq_pos(src);
3025 dst->__data_len = blk_rq_bytes(src); 3014 dst->__data_len = blk_rq_bytes(src);
3026 dst->nr_phys_segments = src->nr_phys_segments; 3015 dst->nr_phys_segments = src->nr_phys_segments;
@@ -3270,7 +3259,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3270 /* 3259 /*
3271 * rq is already accounted, so use raw insert 3260 * rq is already accounted, so use raw insert
3272 */ 3261 */
3273 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) 3262 if (op_is_flush(rq->cmd_flags))
3274 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); 3263 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3275 else 3264 else
3276 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); 3265 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
@@ -3496,5 +3485,9 @@ int __init blk_dev_init(void)
3496 blk_requestq_cachep = kmem_cache_create("request_queue", 3485 blk_requestq_cachep = kmem_cache_create("request_queue",
3497 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 3486 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3498 3487
3488#ifdef CONFIG_DEBUG_FS
3489 blk_debugfs_root = debugfs_create_dir("block", NULL);
3490#endif
3491
3499 return 0; 3492 return 0;
3500} 3493}
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3ecb00a6cf45..8cd0e9bc8dc8 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -9,11 +9,7 @@
9#include <linux/sched/sysctl.h> 9#include <linux/sched/sysctl.h>
10 10
11#include "blk.h" 11#include "blk.h"
12 12#include "blk-mq-sched.h"
13/*
14 * for max sense size
15 */
16#include <scsi/scsi_cmnd.h>
17 13
18/** 14/**
19 * blk_end_sync_rq - executes a completion event on a request 15 * blk_end_sync_rq - executes a completion event on a request
@@ -55,7 +51,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
55 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; 51 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
56 52
57 WARN_ON(irqs_disabled()); 53 WARN_ON(irqs_disabled());
58 WARN_ON(rq->cmd_type == REQ_TYPE_FS); 54 WARN_ON(!blk_rq_is_passthrough(rq));
59 55
60 rq->rq_disk = bd_disk; 56 rq->rq_disk = bd_disk;
61 rq->end_io = done; 57 rq->end_io = done;
@@ -65,7 +61,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
65 * be reused after dying flag is set 61 * be reused after dying flag is set
66 */ 62 */
67 if (q->mq_ops) { 63 if (q->mq_ops) {
68 blk_mq_insert_request(rq, at_head, true, false); 64 blk_mq_sched_insert_request(rq, at_head, true, false, false);
69 return; 65 return;
70 } 66 }
71 67
@@ -100,16 +96,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
100 struct request *rq, int at_head) 96 struct request *rq, int at_head)
101{ 97{
102 DECLARE_COMPLETION_ONSTACK(wait); 98 DECLARE_COMPLETION_ONSTACK(wait);
103 char sense[SCSI_SENSE_BUFFERSIZE];
104 int err = 0; 99 int err = 0;
105 unsigned long hang_check; 100 unsigned long hang_check;
106 101
107 if (!rq->sense) {
108 memset(sense, 0, sizeof(sense));
109 rq->sense = sense;
110 rq->sense_len = 0;
111 }
112
113 rq->end_io_data = &wait; 102 rq->end_io_data = &wait;
114 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); 103 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
115 104
@@ -123,11 +112,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
123 if (rq->errors) 112 if (rq->errors)
124 err = -EIO; 113 err = -EIO;
125 114
126 if (rq->sense == sense) {
127 rq->sense = NULL;
128 rq->sense_len = 0;
129 }
130
131 return err; 115 return err;
132} 116}
133EXPORT_SYMBOL(blk_execute_rq); 117EXPORT_SYMBOL(blk_execute_rq);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 20b7c7a02f1c..0d5a9c1da1fc 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -74,6 +74,7 @@
74#include "blk.h" 74#include "blk.h"
75#include "blk-mq.h" 75#include "blk-mq.h"
76#include "blk-mq-tag.h" 76#include "blk-mq-tag.h"
77#include "blk-mq-sched.h"
77 78
78/* FLUSH/FUA sequences */ 79/* FLUSH/FUA sequences */
79enum { 80enum {
@@ -296,8 +297,14 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
296 if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending)) 297 if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
297 return false; 298 return false;
298 299
299 /* C2 and C3 */ 300 /* C2 and C3
301 *
302 * For blk-mq + scheduling, we can risk having all driver tags
303 * assigned to empty flushes, and we deadlock if we are expecting
304 * other requests to make progress. Don't defer for that case.
305 */
300 if (!list_empty(&fq->flush_data_in_flight) && 306 if (!list_empty(&fq->flush_data_in_flight) &&
307 !(q->mq_ops && q->elevator) &&
301 time_before(jiffies, 308 time_before(jiffies,
302 fq->flush_pending_since + FLUSH_PENDING_TIMEOUT)) 309 fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
303 return false; 310 return false;
@@ -326,7 +333,6 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
326 blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); 333 blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
327 } 334 }
328 335
329 flush_rq->cmd_type = REQ_TYPE_FS;
330 flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; 336 flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
331 flush_rq->rq_flags |= RQF_FLUSH_SEQ; 337 flush_rq->rq_flags |= RQF_FLUSH_SEQ;
332 flush_rq->rq_disk = first_rq->rq_disk; 338 flush_rq->rq_disk = first_rq->rq_disk;
@@ -391,9 +397,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
391 * the comment in flush_end_io(). 397 * the comment in flush_end_io().
392 */ 398 */
393 spin_lock_irqsave(&fq->mq_flush_lock, flags); 399 spin_lock_irqsave(&fq->mq_flush_lock, flags);
394 if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) 400 blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
395 blk_mq_run_hw_queue(hctx, true);
396 spin_unlock_irqrestore(&fq->mq_flush_lock, flags); 401 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
402
403 blk_mq_run_hw_queue(hctx, true);
397} 404}
398 405
399/** 406/**
@@ -453,9 +460,9 @@ void blk_insert_flush(struct request *rq)
453 */ 460 */
454 if ((policy & REQ_FSEQ_DATA) && 461 if ((policy & REQ_FSEQ_DATA) &&
455 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 462 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
456 if (q->mq_ops) { 463 if (q->mq_ops)
457 blk_mq_insert_request(rq, false, true, false); 464 blk_mq_sched_insert_request(rq, false, true, false, false);
458 } else 465 else
459 list_add_tail(&rq->queuelist, &q->queue_head); 466 list_add_tail(&rq->queuelist, &q->queue_head);
460 return; 467 return;
461 } 468 }
@@ -545,11 +552,10 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
545 if (!fq) 552 if (!fq)
546 goto fail; 553 goto fail;
547 554
548 if (q->mq_ops) { 555 if (q->mq_ops)
549 spin_lock_init(&fq->mq_flush_lock); 556 spin_lock_init(&fq->mq_flush_lock);
550 rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
551 }
552 557
558 rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
553 fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); 559 fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
554 if (!fq->flush_rq) 560 if (!fq->flush_rq)
555 goto fail_rq; 561 goto fail_rq;
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index d69c5c79f98e..9f0ff5ba4f84 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -443,10 +443,10 @@ void blk_integrity_revalidate(struct gendisk *disk)
443 return; 443 return;
444 444
445 if (bi->profile) 445 if (bi->profile)
446 disk->queue->backing_dev_info.capabilities |= 446 disk->queue->backing_dev_info->capabilities |=
447 BDI_CAP_STABLE_WRITES; 447 BDI_CAP_STABLE_WRITES;
448 else 448 else
449 disk->queue->backing_dev_info.capabilities &= 449 disk->queue->backing_dev_info->capabilities &=
450 ~BDI_CAP_STABLE_WRITES; 450 ~BDI_CAP_STABLE_WRITES;
451} 451}
452 452
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 381cb50a673c..b12f9c87b4c3 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -35,7 +35,10 @@ static void icq_free_icq_rcu(struct rcu_head *head)
35 kmem_cache_free(icq->__rcu_icq_cache, icq); 35 kmem_cache_free(icq->__rcu_icq_cache, icq);
36} 36}
37 37
38/* Exit an icq. Called with both ioc and q locked. */ 38/*
39 * Exit an icq. Called with both ioc and q locked for sq, only ioc locked for
40 * mq.
41 */
39static void ioc_exit_icq(struct io_cq *icq) 42static void ioc_exit_icq(struct io_cq *icq)
40{ 43{
41 struct elevator_type *et = icq->q->elevator->type; 44 struct elevator_type *et = icq->q->elevator->type;
@@ -43,8 +46,10 @@ static void ioc_exit_icq(struct io_cq *icq)
43 if (icq->flags & ICQ_EXITED) 46 if (icq->flags & ICQ_EXITED)
44 return; 47 return;
45 48
46 if (et->ops.elevator_exit_icq_fn) 49 if (et->uses_mq && et->ops.mq.exit_icq)
47 et->ops.elevator_exit_icq_fn(icq); 50 et->ops.mq.exit_icq(icq);
51 else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
52 et->ops.sq.elevator_exit_icq_fn(icq);
48 53
49 icq->flags |= ICQ_EXITED; 54 icq->flags |= ICQ_EXITED;
50} 55}
@@ -164,6 +169,7 @@ EXPORT_SYMBOL(put_io_context);
164 */ 169 */
165void put_io_context_active(struct io_context *ioc) 170void put_io_context_active(struct io_context *ioc)
166{ 171{
172 struct elevator_type *et;
167 unsigned long flags; 173 unsigned long flags;
168 struct io_cq *icq; 174 struct io_cq *icq;
169 175
@@ -182,13 +188,19 @@ retry:
182 hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) { 188 hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
183 if (icq->flags & ICQ_EXITED) 189 if (icq->flags & ICQ_EXITED)
184 continue; 190 continue;
185 if (spin_trylock(icq->q->queue_lock)) { 191
192 et = icq->q->elevator->type;
193 if (et->uses_mq) {
186 ioc_exit_icq(icq); 194 ioc_exit_icq(icq);
187 spin_unlock(icq->q->queue_lock);
188 } else { 195 } else {
189 spin_unlock_irqrestore(&ioc->lock, flags); 196 if (spin_trylock(icq->q->queue_lock)) {
190 cpu_relax(); 197 ioc_exit_icq(icq);
191 goto retry; 198 spin_unlock(icq->q->queue_lock);
199 } else {
200 spin_unlock_irqrestore(&ioc->lock, flags);
201 cpu_relax();
202 goto retry;
203 }
192 } 204 }
193 } 205 }
194 spin_unlock_irqrestore(&ioc->lock, flags); 206 spin_unlock_irqrestore(&ioc->lock, flags);
@@ -383,8 +395,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
383 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { 395 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
384 hlist_add_head(&icq->ioc_node, &ioc->icq_list); 396 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
385 list_add(&icq->q_node, &q->icq_list); 397 list_add(&icq->q_node, &q->icq_list);
386 if (et->ops.elevator_init_icq_fn) 398 if (et->uses_mq && et->ops.mq.init_icq)
387 et->ops.elevator_init_icq_fn(icq); 399 et->ops.mq.init_icq(icq);
400 else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
401 et->ops.sq.elevator_init_icq_fn(icq);
388 } else { 402 } else {
389 kmem_cache_free(et->icq_cache, icq); 403 kmem_cache_free(et->icq_cache, icq);
390 icq = ioc_lookup_icq(ioc, q); 404 icq = ioc_lookup_icq(ioc, q);
diff --git a/block/blk-map.c b/block/blk-map.c
index 0acb6640ead7..2f18c2a0be1b 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -16,8 +16,6 @@
16int blk_rq_append_bio(struct request *rq, struct bio *bio) 16int blk_rq_append_bio(struct request *rq, struct bio *bio)
17{ 17{
18 if (!rq->bio) { 18 if (!rq->bio) {
19 rq->cmd_flags &= REQ_OP_MASK;
20 rq->cmd_flags |= (bio->bi_opf & REQ_OP_MASK);
21 blk_rq_bio_prep(rq->q, rq, bio); 19 blk_rq_bio_prep(rq->q, rq, bio);
22 } else { 20 } else {
23 if (!ll_back_merge_fn(rq->q, rq, bio)) 21 if (!ll_back_merge_fn(rq->q, rq, bio))
@@ -62,6 +60,9 @@ static int __blk_rq_map_user_iov(struct request *rq,
62 if (IS_ERR(bio)) 60 if (IS_ERR(bio))
63 return PTR_ERR(bio); 61 return PTR_ERR(bio);
64 62
63 bio->bi_opf &= ~REQ_OP_MASK;
64 bio->bi_opf |= req_op(rq);
65
65 if (map_data && map_data->null_mapped) 66 if (map_data && map_data->null_mapped)
66 bio_set_flag(bio, BIO_NULL_MAPPED); 67 bio_set_flag(bio, BIO_NULL_MAPPED);
67 68
@@ -90,7 +91,7 @@ static int __blk_rq_map_user_iov(struct request *rq,
90} 91}
91 92
92/** 93/**
93 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage 94 * blk_rq_map_user_iov - map user data to a request, for passthrough requests
94 * @q: request queue where request should be inserted 95 * @q: request queue where request should be inserted
95 * @rq: request to map data to 96 * @rq: request to map data to
96 * @map_data: pointer to the rq_map_data holding pages (if necessary) 97 * @map_data: pointer to the rq_map_data holding pages (if necessary)
@@ -199,7 +200,7 @@ int blk_rq_unmap_user(struct bio *bio)
199EXPORT_SYMBOL(blk_rq_unmap_user); 200EXPORT_SYMBOL(blk_rq_unmap_user);
200 201
201/** 202/**
202 * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage 203 * blk_rq_map_kern - map kernel data to a request, for passthrough requests
203 * @q: request queue where request should be inserted 204 * @q: request queue where request should be inserted
204 * @rq: request to fill 205 * @rq: request to fill
205 * @kbuf: the kernel buffer 206 * @kbuf: the kernel buffer
@@ -234,8 +235,8 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
234 if (IS_ERR(bio)) 235 if (IS_ERR(bio))
235 return PTR_ERR(bio); 236 return PTR_ERR(bio);
236 237
237 if (!reading) 238 bio->bi_opf &= ~REQ_OP_MASK;
238 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 239 bio->bi_opf |= req_op(rq);
239 240
240 if (do_copy) 241 if (do_copy)
241 rq->rq_flags |= RQF_COPY_USER; 242 rq->rq_flags |= RQF_COPY_USER;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 182398cb1524..2afa262425d1 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -482,13 +482,6 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
482} 482}
483EXPORT_SYMBOL(blk_rq_map_sg); 483EXPORT_SYMBOL(blk_rq_map_sg);
484 484
485static void req_set_nomerge(struct request_queue *q, struct request *req)
486{
487 req->cmd_flags |= REQ_NOMERGE;
488 if (req == q->last_merge)
489 q->last_merge = NULL;
490}
491
492static inline int ll_new_hw_segment(struct request_queue *q, 485static inline int ll_new_hw_segment(struct request_queue *q,
493 struct request *req, 486 struct request *req,
494 struct bio *bio) 487 struct bio *bio)
@@ -659,31 +652,32 @@ static void blk_account_io_merge(struct request *req)
659} 652}
660 653
661/* 654/*
662 * Has to be called with the request spinlock acquired 655 * For non-mq, this has to be called with the request spinlock acquired.
656 * For mq with scheduling, the appropriate queue wide lock should be held.
663 */ 657 */
664static int attempt_merge(struct request_queue *q, struct request *req, 658static struct request *attempt_merge(struct request_queue *q,
665 struct request *next) 659 struct request *req, struct request *next)
666{ 660{
667 if (!rq_mergeable(req) || !rq_mergeable(next)) 661 if (!rq_mergeable(req) || !rq_mergeable(next))
668 return 0; 662 return NULL;
669 663
670 if (req_op(req) != req_op(next)) 664 if (req_op(req) != req_op(next))
671 return 0; 665 return NULL;
672 666
673 /* 667 /*
674 * not contiguous 668 * not contiguous
675 */ 669 */
676 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 670 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
677 return 0; 671 return NULL;
678 672
679 if (rq_data_dir(req) != rq_data_dir(next) 673 if (rq_data_dir(req) != rq_data_dir(next)
680 || req->rq_disk != next->rq_disk 674 || req->rq_disk != next->rq_disk
681 || req_no_special_merge(next)) 675 || req_no_special_merge(next))
682 return 0; 676 return NULL;
683 677
684 if (req_op(req) == REQ_OP_WRITE_SAME && 678 if (req_op(req) == REQ_OP_WRITE_SAME &&
685 !blk_write_same_mergeable(req->bio, next->bio)) 679 !blk_write_same_mergeable(req->bio, next->bio))
686 return 0; 680 return NULL;
687 681
688 /* 682 /*
689 * If we are allowed to merge, then append bio list 683 * If we are allowed to merge, then append bio list
@@ -692,7 +686,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
692 * counts here. 686 * counts here.
693 */ 687 */
694 if (!ll_merge_requests_fn(q, req, next)) 688 if (!ll_merge_requests_fn(q, req, next))
695 return 0; 689 return NULL;
696 690
697 /* 691 /*
698 * If failfast settings disagree or any of the two is already 692 * If failfast settings disagree or any of the two is already
@@ -732,42 +726,51 @@ static int attempt_merge(struct request_queue *q, struct request *req,
732 if (blk_rq_cpu_valid(next)) 726 if (blk_rq_cpu_valid(next))
733 req->cpu = next->cpu; 727 req->cpu = next->cpu;
734 728
735 /* owner-ship of bio passed from next to req */ 729 /*
730 * ownership of bio passed from next to req, return 'next' for
731 * the caller to free
732 */
736 next->bio = NULL; 733 next->bio = NULL;
737 __blk_put_request(q, next); 734 return next;
738 return 1;
739} 735}
740 736
741int attempt_back_merge(struct request_queue *q, struct request *rq) 737struct request *attempt_back_merge(struct request_queue *q, struct request *rq)
742{ 738{
743 struct request *next = elv_latter_request(q, rq); 739 struct request *next = elv_latter_request(q, rq);
744 740
745 if (next) 741 if (next)
746 return attempt_merge(q, rq, next); 742 return attempt_merge(q, rq, next);
747 743
748 return 0; 744 return NULL;
749} 745}
750 746
751int attempt_front_merge(struct request_queue *q, struct request *rq) 747struct request *attempt_front_merge(struct request_queue *q, struct request *rq)
752{ 748{
753 struct request *prev = elv_former_request(q, rq); 749 struct request *prev = elv_former_request(q, rq);
754 750
755 if (prev) 751 if (prev)
756 return attempt_merge(q, prev, rq); 752 return attempt_merge(q, prev, rq);
757 753
758 return 0; 754 return NULL;
759} 755}
760 756
761int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 757int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
762 struct request *next) 758 struct request *next)
763{ 759{
764 struct elevator_queue *e = q->elevator; 760 struct elevator_queue *e = q->elevator;
761 struct request *free;
765 762
766 if (e->type->ops.elevator_allow_rq_merge_fn) 763 if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
767 if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) 764 if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
768 return 0; 765 return 0;
769 766
770 return attempt_merge(q, rq, next); 767 free = attempt_merge(q, rq, next);
768 if (free) {
769 __blk_put_request(q, free);
770 return 1;
771 }
772
773 return 0;
771} 774}
772 775
773bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 776bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
@@ -798,9 +801,12 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
798 return true; 801 return true;
799} 802}
800 803
801int blk_try_merge(struct request *rq, struct bio *bio) 804enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
802{ 805{
803 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 806 if (req_op(rq) == REQ_OP_DISCARD &&
807 queue_max_discard_segments(rq->q) > 1)
808 return ELEVATOR_DISCARD_MERGE;
809 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
804 return ELEVATOR_BACK_MERGE; 810 return ELEVATOR_BACK_MERGE;
805 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 811 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
806 return ELEVATOR_FRONT_MERGE; 812 return ELEVATOR_FRONT_MERGE;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
new file mode 100644
index 000000000000..f6d917977b33
--- /dev/null
+++ b/block/blk-mq-debugfs.c
@@ -0,0 +1,772 @@
1/*
2 * Copyright (C) 2017 Facebook
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <https://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/blkdev.h>
19#include <linux/debugfs.h>
20
21#include <linux/blk-mq.h>
22#include "blk.h"
23#include "blk-mq.h"
24#include "blk-mq-tag.h"
25
26struct blk_mq_debugfs_attr {
27 const char *name;
28 umode_t mode;
29 const struct file_operations *fops;
30};
31
32static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
33 const struct seq_operations *ops)
34{
35 struct seq_file *m;
36 int ret;
37
38 ret = seq_open(file, ops);
39 if (!ret) {
40 m = file->private_data;
41 m->private = inode->i_private;
42 }
43 return ret;
44}
45
46static int hctx_state_show(struct seq_file *m, void *v)
47{
48 struct blk_mq_hw_ctx *hctx = m->private;
49
50 seq_printf(m, "0x%lx\n", hctx->state);
51 return 0;
52}
53
54static int hctx_state_open(struct inode *inode, struct file *file)
55{
56 return single_open(file, hctx_state_show, inode->i_private);
57}
58
59static const struct file_operations hctx_state_fops = {
60 .open = hctx_state_open,
61 .read = seq_read,
62 .llseek = seq_lseek,
63 .release = single_release,
64};
65
66static int hctx_flags_show(struct seq_file *m, void *v)
67{
68 struct blk_mq_hw_ctx *hctx = m->private;
69
70 seq_printf(m, "0x%lx\n", hctx->flags);
71 return 0;
72}
73
74static int hctx_flags_open(struct inode *inode, struct file *file)
75{
76 return single_open(file, hctx_flags_show, inode->i_private);
77}
78
79static const struct file_operations hctx_flags_fops = {
80 .open = hctx_flags_open,
81 .read = seq_read,
82 .llseek = seq_lseek,
83 .release = single_release,
84};
85
86static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
87{
88 struct request *rq = list_entry_rq(v);
89
90 seq_printf(m, "%p {.cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n",
91 rq, rq->cmd_flags, (__force unsigned int)rq->rq_flags,
92 rq->tag, rq->internal_tag);
93 return 0;
94}
95
96static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
97 __acquires(&hctx->lock)
98{
99 struct blk_mq_hw_ctx *hctx = m->private;
100
101 spin_lock(&hctx->lock);
102 return seq_list_start(&hctx->dispatch, *pos);
103}
104
105static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
106{
107 struct blk_mq_hw_ctx *hctx = m->private;
108
109 return seq_list_next(v, &hctx->dispatch, pos);
110}
111
112static void hctx_dispatch_stop(struct seq_file *m, void *v)
113 __releases(&hctx->lock)
114{
115 struct blk_mq_hw_ctx *hctx = m->private;
116
117 spin_unlock(&hctx->lock);
118}
119
120static const struct seq_operations hctx_dispatch_seq_ops = {
121 .start = hctx_dispatch_start,
122 .next = hctx_dispatch_next,
123 .stop = hctx_dispatch_stop,
124 .show = blk_mq_debugfs_rq_show,
125};
126
127static int hctx_dispatch_open(struct inode *inode, struct file *file)
128{
129 return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops);
130}
131
132static const struct file_operations hctx_dispatch_fops = {
133 .open = hctx_dispatch_open,
134 .read = seq_read,
135 .llseek = seq_lseek,
136 .release = seq_release,
137};
138
139static int hctx_ctx_map_show(struct seq_file *m, void *v)
140{
141 struct blk_mq_hw_ctx *hctx = m->private;
142
143 sbitmap_bitmap_show(&hctx->ctx_map, m);
144 return 0;
145}
146
147static int hctx_ctx_map_open(struct inode *inode, struct file *file)
148{
149 return single_open(file, hctx_ctx_map_show, inode->i_private);
150}
151
152static const struct file_operations hctx_ctx_map_fops = {
153 .open = hctx_ctx_map_open,
154 .read = seq_read,
155 .llseek = seq_lseek,
156 .release = single_release,
157};
158
159static void blk_mq_debugfs_tags_show(struct seq_file *m,
160 struct blk_mq_tags *tags)
161{
162 seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
163 seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
164 seq_printf(m, "active_queues=%d\n",
165 atomic_read(&tags->active_queues));
166
167 seq_puts(m, "\nbitmap_tags:\n");
168 sbitmap_queue_show(&tags->bitmap_tags, m);
169
170 if (tags->nr_reserved_tags) {
171 seq_puts(m, "\nbreserved_tags:\n");
172 sbitmap_queue_show(&tags->breserved_tags, m);
173 }
174}
175
176static int hctx_tags_show(struct seq_file *m, void *v)
177{
178 struct blk_mq_hw_ctx *hctx = m->private;
179 struct request_queue *q = hctx->queue;
180 int res;
181
182 res = mutex_lock_interruptible(&q->sysfs_lock);
183 if (res)
184 goto out;
185 if (hctx->tags)
186 blk_mq_debugfs_tags_show(m, hctx->tags);
187 mutex_unlock(&q->sysfs_lock);
188
189out:
190 return res;
191}
192
193static int hctx_tags_open(struct inode *inode, struct file *file)
194{
195 return single_open(file, hctx_tags_show, inode->i_private);
196}
197
198static const struct file_operations hctx_tags_fops = {
199 .open = hctx_tags_open,
200 .read = seq_read,
201 .llseek = seq_lseek,
202 .release = single_release,
203};
204
205static int hctx_tags_bitmap_show(struct seq_file *m, void *v)
206{
207 struct blk_mq_hw_ctx *hctx = m->private;
208 struct request_queue *q = hctx->queue;
209 int res;
210
211 res = mutex_lock_interruptible(&q->sysfs_lock);
212 if (res)
213 goto out;
214 if (hctx->tags)
215 sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m);
216 mutex_unlock(&q->sysfs_lock);
217
218out:
219 return res;
220}
221
222static int hctx_tags_bitmap_open(struct inode *inode, struct file *file)
223{
224 return single_open(file, hctx_tags_bitmap_show, inode->i_private);
225}
226
227static const struct file_operations hctx_tags_bitmap_fops = {
228 .open = hctx_tags_bitmap_open,
229 .read = seq_read,
230 .llseek = seq_lseek,
231 .release = single_release,
232};
233
234static int hctx_sched_tags_show(struct seq_file *m, void *v)
235{
236 struct blk_mq_hw_ctx *hctx = m->private;
237 struct request_queue *q = hctx->queue;
238 int res;
239
240 res = mutex_lock_interruptible(&q->sysfs_lock);
241 if (res)
242 goto out;
243 if (hctx->sched_tags)
244 blk_mq_debugfs_tags_show(m, hctx->sched_tags);
245 mutex_unlock(&q->sysfs_lock);
246
247out:
248 return res;
249}
250
251static int hctx_sched_tags_open(struct inode *inode, struct file *file)
252{
253 return single_open(file, hctx_sched_tags_show, inode->i_private);
254}
255
256static const struct file_operations hctx_sched_tags_fops = {
257 .open = hctx_sched_tags_open,
258 .read = seq_read,
259 .llseek = seq_lseek,
260 .release = single_release,
261};
262
263static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v)
264{
265 struct blk_mq_hw_ctx *hctx = m->private;
266 struct request_queue *q = hctx->queue;
267 int res;
268
269 res = mutex_lock_interruptible(&q->sysfs_lock);
270 if (res)
271 goto out;
272 if (hctx->sched_tags)
273 sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m);
274 mutex_unlock(&q->sysfs_lock);
275
276out:
277 return res;
278}
279
280static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file)
281{
282 return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private);
283}
284
285static const struct file_operations hctx_sched_tags_bitmap_fops = {
286 .open = hctx_sched_tags_bitmap_open,
287 .read = seq_read,
288 .llseek = seq_lseek,
289 .release = single_release,
290};
291
292static int hctx_io_poll_show(struct seq_file *m, void *v)
293{
294 struct blk_mq_hw_ctx *hctx = m->private;
295
296 seq_printf(m, "considered=%lu\n", hctx->poll_considered);
297 seq_printf(m, "invoked=%lu\n", hctx->poll_invoked);
298 seq_printf(m, "success=%lu\n", hctx->poll_success);
299 return 0;
300}
301
302static int hctx_io_poll_open(struct inode *inode, struct file *file)
303{
304 return single_open(file, hctx_io_poll_show, inode->i_private);
305}
306
307static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf,
308 size_t count, loff_t *ppos)
309{
310 struct seq_file *m = file->private_data;
311 struct blk_mq_hw_ctx *hctx = m->private;
312
313 hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
314 return count;
315}
316
317static const struct file_operations hctx_io_poll_fops = {
318 .open = hctx_io_poll_open,
319 .read = seq_read,
320 .write = hctx_io_poll_write,
321 .llseek = seq_lseek,
322 .release = single_release,
323};
324
325static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
326{
327 seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
328 stat->nr_samples, stat->mean, stat->min, stat->max);
329}
330
331static int hctx_stats_show(struct seq_file *m, void *v)
332{
333 struct blk_mq_hw_ctx *hctx = m->private;
334 struct blk_rq_stat stat[2];
335
336 blk_stat_init(&stat[BLK_STAT_READ]);
337 blk_stat_init(&stat[BLK_STAT_WRITE]);
338
339 blk_hctx_stat_get(hctx, stat);
340
341 seq_puts(m, "read: ");
342 print_stat(m, &stat[BLK_STAT_READ]);
343 seq_puts(m, "\n");
344
345 seq_puts(m, "write: ");
346 print_stat(m, &stat[BLK_STAT_WRITE]);
347 seq_puts(m, "\n");
348 return 0;
349}
350
351static int hctx_stats_open(struct inode *inode, struct file *file)
352{
353 return single_open(file, hctx_stats_show, inode->i_private);
354}
355
356static ssize_t hctx_stats_write(struct file *file, const char __user *buf,
357 size_t count, loff_t *ppos)
358{
359 struct seq_file *m = file->private_data;
360 struct blk_mq_hw_ctx *hctx = m->private;
361 struct blk_mq_ctx *ctx;
362 int i;
363
364 hctx_for_each_ctx(hctx, ctx, i) {
365 blk_stat_init(&ctx->stat[BLK_STAT_READ]);
366 blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
367 }
368 return count;
369}
370
371static const struct file_operations hctx_stats_fops = {
372 .open = hctx_stats_open,
373 .read = seq_read,
374 .write = hctx_stats_write,
375 .llseek = seq_lseek,
376 .release = single_release,
377};
378
379static int hctx_dispatched_show(struct seq_file *m, void *v)
380{
381 struct blk_mq_hw_ctx *hctx = m->private;
382 int i;
383
384 seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
385
386 for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
387 unsigned int d = 1U << (i - 1);
388
389 seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]);
390 }
391
392 seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]);
393 return 0;
394}
395
396static int hctx_dispatched_open(struct inode *inode, struct file *file)
397{
398 return single_open(file, hctx_dispatched_show, inode->i_private);
399}
400
401static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf,
402 size_t count, loff_t *ppos)
403{
404 struct seq_file *m = file->private_data;
405 struct blk_mq_hw_ctx *hctx = m->private;
406 int i;
407
408 for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++)
409 hctx->dispatched[i] = 0;
410 return count;
411}
412
413static const struct file_operations hctx_dispatched_fops = {
414 .open = hctx_dispatched_open,
415 .read = seq_read,
416 .write = hctx_dispatched_write,
417 .llseek = seq_lseek,
418 .release = single_release,
419};
420
421static int hctx_queued_show(struct seq_file *m, void *v)
422{
423 struct blk_mq_hw_ctx *hctx = m->private;
424
425 seq_printf(m, "%lu\n", hctx->queued);
426 return 0;
427}
428
429static int hctx_queued_open(struct inode *inode, struct file *file)
430{
431 return single_open(file, hctx_queued_show, inode->i_private);
432}
433
434static ssize_t hctx_queued_write(struct file *file, const char __user *buf,
435 size_t count, loff_t *ppos)
436{
437 struct seq_file *m = file->private_data;
438 struct blk_mq_hw_ctx *hctx = m->private;
439
440 hctx->queued = 0;
441 return count;
442}
443
444static const struct file_operations hctx_queued_fops = {
445 .open = hctx_queued_open,
446 .read = seq_read,
447 .write = hctx_queued_write,
448 .llseek = seq_lseek,
449 .release = single_release,
450};
451
452static int hctx_run_show(struct seq_file *m, void *v)
453{
454 struct blk_mq_hw_ctx *hctx = m->private;
455
456 seq_printf(m, "%lu\n", hctx->run);
457 return 0;
458}
459
460static int hctx_run_open(struct inode *inode, struct file *file)
461{
462 return single_open(file, hctx_run_show, inode->i_private);
463}
464
465static ssize_t hctx_run_write(struct file *file, const char __user *buf,
466 size_t count, loff_t *ppos)
467{
468 struct seq_file *m = file->private_data;
469 struct blk_mq_hw_ctx *hctx = m->private;
470
471 hctx->run = 0;
472 return count;
473}
474
475static const struct file_operations hctx_run_fops = {
476 .open = hctx_run_open,
477 .read = seq_read,
478 .write = hctx_run_write,
479 .llseek = seq_lseek,
480 .release = single_release,
481};
482
483static int hctx_active_show(struct seq_file *m, void *v)
484{
485 struct blk_mq_hw_ctx *hctx = m->private;
486
487 seq_printf(m, "%d\n", atomic_read(&hctx->nr_active));
488 return 0;
489}
490
491static int hctx_active_open(struct inode *inode, struct file *file)
492{
493 return single_open(file, hctx_active_show, inode->i_private);
494}
495
496static const struct file_operations hctx_active_fops = {
497 .open = hctx_active_open,
498 .read = seq_read,
499 .llseek = seq_lseek,
500 .release = single_release,
501};
502
503static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
504 __acquires(&ctx->lock)
505{
506 struct blk_mq_ctx *ctx = m->private;
507
508 spin_lock(&ctx->lock);
509 return seq_list_start(&ctx->rq_list, *pos);
510}
511
512static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
513{
514 struct blk_mq_ctx *ctx = m->private;
515
516 return seq_list_next(v, &ctx->rq_list, pos);
517}
518
519static void ctx_rq_list_stop(struct seq_file *m, void *v)
520 __releases(&ctx->lock)
521{
522 struct blk_mq_ctx *ctx = m->private;
523
524 spin_unlock(&ctx->lock);
525}
526
527static const struct seq_operations ctx_rq_list_seq_ops = {
528 .start = ctx_rq_list_start,
529 .next = ctx_rq_list_next,
530 .stop = ctx_rq_list_stop,
531 .show = blk_mq_debugfs_rq_show,
532};
533
534static int ctx_rq_list_open(struct inode *inode, struct file *file)
535{
536 return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops);
537}
538
539static const struct file_operations ctx_rq_list_fops = {
540 .open = ctx_rq_list_open,
541 .read = seq_read,
542 .llseek = seq_lseek,
543 .release = seq_release,
544};
545
546static int ctx_dispatched_show(struct seq_file *m, void *v)
547{
548 struct blk_mq_ctx *ctx = m->private;
549
550 seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]);
551 return 0;
552}
553
554static int ctx_dispatched_open(struct inode *inode, struct file *file)
555{
556 return single_open(file, ctx_dispatched_show, inode->i_private);
557}
558
559static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf,
560 size_t count, loff_t *ppos)
561{
562 struct seq_file *m = file->private_data;
563 struct blk_mq_ctx *ctx = m->private;
564
565 ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0;
566 return count;
567}
568
569static const struct file_operations ctx_dispatched_fops = {
570 .open = ctx_dispatched_open,
571 .read = seq_read,
572 .write = ctx_dispatched_write,
573 .llseek = seq_lseek,
574 .release = single_release,
575};
576
577static int ctx_merged_show(struct seq_file *m, void *v)
578{
579 struct blk_mq_ctx *ctx = m->private;
580
581 seq_printf(m, "%lu\n", ctx->rq_merged);
582 return 0;
583}
584
585static int ctx_merged_open(struct inode *inode, struct file *file)
586{
587 return single_open(file, ctx_merged_show, inode->i_private);
588}
589
590static ssize_t ctx_merged_write(struct file *file, const char __user *buf,
591 size_t count, loff_t *ppos)
592{
593 struct seq_file *m = file->private_data;
594 struct blk_mq_ctx *ctx = m->private;
595
596 ctx->rq_merged = 0;
597 return count;
598}
599
600static const struct file_operations ctx_merged_fops = {
601 .open = ctx_merged_open,
602 .read = seq_read,
603 .write = ctx_merged_write,
604 .llseek = seq_lseek,
605 .release = single_release,
606};
607
608static int ctx_completed_show(struct seq_file *m, void *v)
609{
610 struct blk_mq_ctx *ctx = m->private;
611
612 seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]);
613 return 0;
614}
615
616static int ctx_completed_open(struct inode *inode, struct file *file)
617{
618 return single_open(file, ctx_completed_show, inode->i_private);
619}
620
621static ssize_t ctx_completed_write(struct file *file, const char __user *buf,
622 size_t count, loff_t *ppos)
623{
624 struct seq_file *m = file->private_data;
625 struct blk_mq_ctx *ctx = m->private;
626
627 ctx->rq_completed[0] = ctx->rq_completed[1] = 0;
628 return count;
629}
630
631static const struct file_operations ctx_completed_fops = {
632 .open = ctx_completed_open,
633 .read = seq_read,
634 .write = ctx_completed_write,
635 .llseek = seq_lseek,
636 .release = single_release,
637};
638
639static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
640 {"state", 0400, &hctx_state_fops},
641 {"flags", 0400, &hctx_flags_fops},
642 {"dispatch", 0400, &hctx_dispatch_fops},
643 {"ctx_map", 0400, &hctx_ctx_map_fops},
644 {"tags", 0400, &hctx_tags_fops},
645 {"tags_bitmap", 0400, &hctx_tags_bitmap_fops},
646 {"sched_tags", 0400, &hctx_sched_tags_fops},
647 {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
648 {"io_poll", 0600, &hctx_io_poll_fops},
649 {"stats", 0600, &hctx_stats_fops},
650 {"dispatched", 0600, &hctx_dispatched_fops},
651 {"queued", 0600, &hctx_queued_fops},
652 {"run", 0600, &hctx_run_fops},
653 {"active", 0400, &hctx_active_fops},
654 {},
655};
656
657static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
658 {"rq_list", 0400, &ctx_rq_list_fops},
659 {"dispatched", 0600, &ctx_dispatched_fops},
660 {"merged", 0600, &ctx_merged_fops},
661 {"completed", 0600, &ctx_completed_fops},
662 {},
663};
664
665int blk_mq_debugfs_register(struct request_queue *q, const char *name)
666{
667 if (!blk_debugfs_root)
668 return -ENOENT;
669
670 q->debugfs_dir = debugfs_create_dir(name, blk_debugfs_root);
671 if (!q->debugfs_dir)
672 goto err;
673
674 if (blk_mq_debugfs_register_hctxs(q))
675 goto err;
676
677 return 0;
678
679err:
680 blk_mq_debugfs_unregister(q);
681 return -ENOMEM;
682}
683
684void blk_mq_debugfs_unregister(struct request_queue *q)
685{
686 debugfs_remove_recursive(q->debugfs_dir);
687 q->mq_debugfs_dir = NULL;
688 q->debugfs_dir = NULL;
689}
690
691static bool debugfs_create_files(struct dentry *parent, void *data,
692 const struct blk_mq_debugfs_attr *attr)
693{
694 for (; attr->name; attr++) {
695 if (!debugfs_create_file(attr->name, attr->mode, parent,
696 data, attr->fops))
697 return false;
698 }
699 return true;
700}
701
702static int blk_mq_debugfs_register_ctx(struct request_queue *q,
703 struct blk_mq_ctx *ctx,
704 struct dentry *hctx_dir)
705{
706 struct dentry *ctx_dir;
707 char name[20];
708
709 snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
710 ctx_dir = debugfs_create_dir(name, hctx_dir);
711 if (!ctx_dir)
712 return -ENOMEM;
713
714 if (!debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs))
715 return -ENOMEM;
716
717 return 0;
718}
719
720static int blk_mq_debugfs_register_hctx(struct request_queue *q,
721 struct blk_mq_hw_ctx *hctx)
722{
723 struct blk_mq_ctx *ctx;
724 struct dentry *hctx_dir;
725 char name[20];
726 int i;
727
728 snprintf(name, sizeof(name), "%u", hctx->queue_num);
729 hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir);
730 if (!hctx_dir)
731 return -ENOMEM;
732
733 if (!debugfs_create_files(hctx_dir, hctx, blk_mq_debugfs_hctx_attrs))
734 return -ENOMEM;
735
736 hctx_for_each_ctx(hctx, ctx, i) {
737 if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir))
738 return -ENOMEM;
739 }
740
741 return 0;
742}
743
744int blk_mq_debugfs_register_hctxs(struct request_queue *q)
745{
746 struct blk_mq_hw_ctx *hctx;
747 int i;
748
749 if (!q->debugfs_dir)
750 return -ENOENT;
751
752 q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir);
753 if (!q->mq_debugfs_dir)
754 goto err;
755
756 queue_for_each_hw_ctx(q, hctx, i) {
757 if (blk_mq_debugfs_register_hctx(q, hctx))
758 goto err;
759 }
760
761 return 0;
762
763err:
764 blk_mq_debugfs_unregister_hctxs(q);
765 return -ENOMEM;
766}
767
768void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
769{
770 debugfs_remove_recursive(q->mq_debugfs_dir);
771 q->mq_debugfs_dir = NULL;
772}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
new file mode 100644
index 000000000000..9e8d6795a8c1
--- /dev/null
+++ b/block/blk-mq-sched.c
@@ -0,0 +1,515 @@
1/*
2 * blk-mq scheduling framework
3 *
4 * Copyright (C) 2016 Jens Axboe
5 */
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/blk-mq.h>
9
10#include <trace/events/block.h>
11
12#include "blk.h"
13#include "blk-mq.h"
14#include "blk-mq-sched.h"
15#include "blk-mq-tag.h"
16#include "blk-wbt.h"
17
18void blk_mq_sched_free_hctx_data(struct request_queue *q,
19 void (*exit)(struct blk_mq_hw_ctx *))
20{
21 struct blk_mq_hw_ctx *hctx;
22 int i;
23
24 queue_for_each_hw_ctx(q, hctx, i) {
25 if (exit && hctx->sched_data)
26 exit(hctx);
27 kfree(hctx->sched_data);
28 hctx->sched_data = NULL;
29 }
30}
31EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
32
33int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
34 int (*init)(struct blk_mq_hw_ctx *),
35 void (*exit)(struct blk_mq_hw_ctx *))
36{
37 struct blk_mq_hw_ctx *hctx;
38 int ret;
39 int i;
40
41 queue_for_each_hw_ctx(q, hctx, i) {
42 hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
43 if (!hctx->sched_data) {
44 ret = -ENOMEM;
45 goto error;
46 }
47
48 if (init) {
49 ret = init(hctx);
50 if (ret) {
51 /*
52 * We don't want to give exit() a partially
53 * initialized sched_data. init() must clean up
54 * if it fails.
55 */
56 kfree(hctx->sched_data);
57 hctx->sched_data = NULL;
58 goto error;
59 }
60 }
61 }
62
63 return 0;
64error:
65 blk_mq_sched_free_hctx_data(q, exit);
66 return ret;
67}
68EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
69
70static void __blk_mq_sched_assign_ioc(struct request_queue *q,
71 struct request *rq,
72 struct bio *bio,
73 struct io_context *ioc)
74{
75 struct io_cq *icq;
76
77 spin_lock_irq(q->queue_lock);
78 icq = ioc_lookup_icq(ioc, q);
79 spin_unlock_irq(q->queue_lock);
80
81 if (!icq) {
82 icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
83 if (!icq)
84 return;
85 }
86
87 rq->elv.icq = icq;
88 if (!blk_mq_sched_get_rq_priv(q, rq, bio)) {
89 rq->rq_flags |= RQF_ELVPRIV;
90 get_io_context(icq->ioc);
91 return;
92 }
93
94 rq->elv.icq = NULL;
95}
96
97static void blk_mq_sched_assign_ioc(struct request_queue *q,
98 struct request *rq, struct bio *bio)
99{
100 struct io_context *ioc;
101
102 ioc = rq_ioc(bio);
103 if (ioc)
104 __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
105}
106
107struct request *blk_mq_sched_get_request(struct request_queue *q,
108 struct bio *bio,
109 unsigned int op,
110 struct blk_mq_alloc_data *data)
111{
112 struct elevator_queue *e = q->elevator;
113 struct blk_mq_hw_ctx *hctx;
114 struct blk_mq_ctx *ctx;
115 struct request *rq;
116
117 blk_queue_enter_live(q);
118 ctx = blk_mq_get_ctx(q);
119 hctx = blk_mq_map_queue(q, ctx->cpu);
120
121 blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
122
123 if (e) {
124 data->flags |= BLK_MQ_REQ_INTERNAL;
125
126 /*
127 * Flush requests are special and go directly to the
128 * dispatch list.
129 */
130 if (!op_is_flush(op) && e->type->ops.mq.get_request) {
131 rq = e->type->ops.mq.get_request(q, op, data);
132 if (rq)
133 rq->rq_flags |= RQF_QUEUED;
134 } else
135 rq = __blk_mq_alloc_request(data, op);
136 } else {
137 rq = __blk_mq_alloc_request(data, op);
138 if (rq)
139 data->hctx->tags->rqs[rq->tag] = rq;
140 }
141
142 if (rq) {
143 if (!op_is_flush(op)) {
144 rq->elv.icq = NULL;
145 if (e && e->type->icq_cache)
146 blk_mq_sched_assign_ioc(q, rq, bio);
147 }
148 data->hctx->queued++;
149 return rq;
150 }
151
152 blk_queue_exit(q);
153 return NULL;
154}
155
156void blk_mq_sched_put_request(struct request *rq)
157{
158 struct request_queue *q = rq->q;
159 struct elevator_queue *e = q->elevator;
160
161 if (rq->rq_flags & RQF_ELVPRIV) {
162 blk_mq_sched_put_rq_priv(rq->q, rq);
163 if (rq->elv.icq) {
164 put_io_context(rq->elv.icq->ioc);
165 rq->elv.icq = NULL;
166 }
167 }
168
169 if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
170 e->type->ops.mq.put_request(rq);
171 else
172 blk_mq_finish_request(rq);
173}
174
175void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
176{
177 struct elevator_queue *e = hctx->queue->elevator;
178 const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
179 bool did_work = false;
180 LIST_HEAD(rq_list);
181
182 if (unlikely(blk_mq_hctx_stopped(hctx)))
183 return;
184
185 hctx->run++;
186
187 /*
188 * If we have previous entries on our dispatch list, grab them first for
189 * more fair dispatch.
190 */
191 if (!list_empty_careful(&hctx->dispatch)) {
192 spin_lock(&hctx->lock);
193 if (!list_empty(&hctx->dispatch))
194 list_splice_init(&hctx->dispatch, &rq_list);
195 spin_unlock(&hctx->lock);
196 }
197
198 /*
199 * Only ask the scheduler for requests, if we didn't have residual
200 * requests from the dispatch list. This is to avoid the case where
201 * we only ever dispatch a fraction of the requests available because
202 * of low device queue depth. Once we pull requests out of the IO
203 * scheduler, we can no longer merge or sort them. So it's best to
204 * leave them there for as long as we can. Mark the hw queue as
205 * needing a restart in that case.
206 */
207 if (!list_empty(&rq_list)) {
208 blk_mq_sched_mark_restart(hctx);
209 did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
210 } else if (!has_sched_dispatch) {
211 blk_mq_flush_busy_ctxs(hctx, &rq_list);
212 blk_mq_dispatch_rq_list(hctx, &rq_list);
213 }
214
215 /*
216 * We want to dispatch from the scheduler if we had no work left
217 * on the dispatch list, OR if we did have work but weren't able
218 * to make progress.
219 */
220 if (!did_work && has_sched_dispatch) {
221 do {
222 struct request *rq;
223
224 rq = e->type->ops.mq.dispatch_request(hctx);
225 if (!rq)
226 break;
227 list_add(&rq->queuelist, &rq_list);
228 } while (blk_mq_dispatch_rq_list(hctx, &rq_list));
229 }
230}
231
232void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
233 struct list_head *rq_list,
234 struct request *(*get_rq)(struct blk_mq_hw_ctx *))
235{
236 do {
237 struct request *rq;
238
239 rq = get_rq(hctx);
240 if (!rq)
241 break;
242
243 list_add_tail(&rq->queuelist, rq_list);
244 } while (1);
245}
246EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
247
248bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
249 struct request **merged_request)
250{
251 struct request *rq;
252
253 switch (elv_merge(q, &rq, bio)) {
254 case ELEVATOR_BACK_MERGE:
255 if (!blk_mq_sched_allow_merge(q, rq, bio))
256 return false;
257 if (!bio_attempt_back_merge(q, rq, bio))
258 return false;
259 *merged_request = attempt_back_merge(q, rq);
260 if (!*merged_request)
261 elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
262 return true;
263 case ELEVATOR_FRONT_MERGE:
264 if (!blk_mq_sched_allow_merge(q, rq, bio))
265 return false;
266 if (!bio_attempt_front_merge(q, rq, bio))
267 return false;
268 *merged_request = attempt_front_merge(q, rq);
269 if (!*merged_request)
270 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
271 return true;
272 default:
273 return false;
274 }
275}
276EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
277
278bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
279{
280 struct elevator_queue *e = q->elevator;
281
282 if (e->type->ops.mq.bio_merge) {
283 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
284 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
285
286 blk_mq_put_ctx(ctx);
287 return e->type->ops.mq.bio_merge(hctx, bio);
288 }
289
290 return false;
291}
292
293bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
294{
295 return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
296}
297EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
298
299void blk_mq_sched_request_inserted(struct request *rq)
300{
301 trace_block_rq_insert(rq->q, rq);
302}
303EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
304
305static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
306 struct request *rq)
307{
308 if (rq->tag == -1) {
309 rq->rq_flags |= RQF_SORTED;
310 return false;
311 }
312
313 /*
314 * If we already have a real request tag, send directly to
315 * the dispatch list.
316 */
317 spin_lock(&hctx->lock);
318 list_add(&rq->queuelist, &hctx->dispatch);
319 spin_unlock(&hctx->lock);
320 return true;
321}
322
323static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
324{
325 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
326 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
327 if (blk_mq_hctx_has_pending(hctx))
328 blk_mq_run_hw_queue(hctx, true);
329 }
330}
331
332void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
333{
334 unsigned int i;
335
336 if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
337 blk_mq_sched_restart_hctx(hctx);
338 else {
339 struct request_queue *q = hctx->queue;
340
341 if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
342 return;
343
344 clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
345
346 queue_for_each_hw_ctx(q, hctx, i)
347 blk_mq_sched_restart_hctx(hctx);
348 }
349}
350
351/*
352 * Add flush/fua to the queue. If we fail getting a driver tag, then
353 * punt to the requeue list. Requeue will re-invoke us from a context
354 * that's safe to block from.
355 */
356static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
357 struct request *rq, bool can_block)
358{
359 if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
360 blk_insert_flush(rq);
361 blk_mq_run_hw_queue(hctx, true);
362 } else
363 blk_mq_add_to_requeue_list(rq, false, true);
364}
365
366void blk_mq_sched_insert_request(struct request *rq, bool at_head,
367 bool run_queue, bool async, bool can_block)
368{
369 struct request_queue *q = rq->q;
370 struct elevator_queue *e = q->elevator;
371 struct blk_mq_ctx *ctx = rq->mq_ctx;
372 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
373
374 if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
375 blk_mq_sched_insert_flush(hctx, rq, can_block);
376 return;
377 }
378
379 if (e && blk_mq_sched_bypass_insert(hctx, rq))
380 goto run;
381
382 if (e && e->type->ops.mq.insert_requests) {
383 LIST_HEAD(list);
384
385 list_add(&rq->queuelist, &list);
386 e->type->ops.mq.insert_requests(hctx, &list, at_head);
387 } else {
388 spin_lock(&ctx->lock);
389 __blk_mq_insert_request(hctx, rq, at_head);
390 spin_unlock(&ctx->lock);
391 }
392
393run:
394 if (run_queue)
395 blk_mq_run_hw_queue(hctx, async);
396}
397
398void blk_mq_sched_insert_requests(struct request_queue *q,
399 struct blk_mq_ctx *ctx,
400 struct list_head *list, bool run_queue_async)
401{
402 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
403 struct elevator_queue *e = hctx->queue->elevator;
404
405 if (e) {
406 struct request *rq, *next;
407
408 /*
409 * We bypass requests that already have a driver tag assigned,
410 * which should only be flushes. Flushes are only ever inserted
411 * as single requests, so we shouldn't ever hit the
412 * WARN_ON_ONCE() below (but let's handle it just in case).
413 */
414 list_for_each_entry_safe(rq, next, list, queuelist) {
415 if (WARN_ON_ONCE(rq->tag != -1)) {
416 list_del_init(&rq->queuelist);
417 blk_mq_sched_bypass_insert(hctx, rq);
418 }
419 }
420 }
421
422 if (e && e->type->ops.mq.insert_requests)
423 e->type->ops.mq.insert_requests(hctx, list, false);
424 else
425 blk_mq_insert_requests(hctx, ctx, list);
426
427 blk_mq_run_hw_queue(hctx, run_queue_async);
428}
429
430static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
431 struct blk_mq_hw_ctx *hctx,
432 unsigned int hctx_idx)
433{
434 if (hctx->sched_tags) {
435 blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
436 blk_mq_free_rq_map(hctx->sched_tags);
437 hctx->sched_tags = NULL;
438 }
439}
440
441int blk_mq_sched_setup(struct request_queue *q)
442{
443 struct blk_mq_tag_set *set = q->tag_set;
444 struct blk_mq_hw_ctx *hctx;
445 int ret, i;
446
447 /*
448 * Default to 256, since we don't split into sync/async like the
449 * old code did. Additionally, this is a per-hw queue depth.
450 */
451 q->nr_requests = 2 * BLKDEV_MAX_RQ;
452
453 /*
454 * We're switching to using an IO scheduler, so setup the hctx
455 * scheduler tags and switch the request map from the regular
456 * tags to scheduler tags. First allocate what we need, so we
457 * can safely fail and fallback, if needed.
458 */
459 ret = 0;
460 queue_for_each_hw_ctx(q, hctx, i) {
461 hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
462 if (!hctx->sched_tags) {
463 ret = -ENOMEM;
464 break;
465 }
466 ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
467 if (ret)
468 break;
469 }
470
471 /*
472 * If we failed, free what we did allocate
473 */
474 if (ret) {
475 queue_for_each_hw_ctx(q, hctx, i) {
476 if (!hctx->sched_tags)
477 continue;
478 blk_mq_sched_free_tags(set, hctx, i);
479 }
480
481 return ret;
482 }
483
484 return 0;
485}
486
487void blk_mq_sched_teardown(struct request_queue *q)
488{
489 struct blk_mq_tag_set *set = q->tag_set;
490 struct blk_mq_hw_ctx *hctx;
491 int i;
492
493 queue_for_each_hw_ctx(q, hctx, i)
494 blk_mq_sched_free_tags(set, hctx, i);
495}
496
497int blk_mq_sched_init(struct request_queue *q)
498{
499 int ret;
500
501#if defined(CONFIG_DEFAULT_SQ_NONE)
502 if (q->nr_hw_queues == 1)
503 return 0;
504#endif
505#if defined(CONFIG_DEFAULT_MQ_NONE)
506 if (q->nr_hw_queues > 1)
507 return 0;
508#endif
509
510 mutex_lock(&q->sysfs_lock);
511 ret = elevator_init(q, NULL);
512 mutex_unlock(&q->sysfs_lock);
513
514 return ret;
515}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
new file mode 100644
index 000000000000..7b5f3b95c78e
--- /dev/null
+++ b/block/blk-mq-sched.h
@@ -0,0 +1,143 @@
1#ifndef BLK_MQ_SCHED_H
2#define BLK_MQ_SCHED_H
3
4#include "blk-mq.h"
5#include "blk-mq-tag.h"
6
7int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
8 int (*init)(struct blk_mq_hw_ctx *),
9 void (*exit)(struct blk_mq_hw_ctx *));
10
11void blk_mq_sched_free_hctx_data(struct request_queue *q,
12 void (*exit)(struct blk_mq_hw_ctx *));
13
14struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
15void blk_mq_sched_put_request(struct request *rq);
16
17void blk_mq_sched_request_inserted(struct request *rq);
18bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
19 struct request **merged_request);
20bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
21bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
22void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
23
24void blk_mq_sched_insert_request(struct request *rq, bool at_head,
25 bool run_queue, bool async, bool can_block);
26void blk_mq_sched_insert_requests(struct request_queue *q,
27 struct blk_mq_ctx *ctx,
28 struct list_head *list, bool run_queue_async);
29
30void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
31void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
32 struct list_head *rq_list,
33 struct request *(*get_rq)(struct blk_mq_hw_ctx *));
34
35int blk_mq_sched_setup(struct request_queue *q);
36void blk_mq_sched_teardown(struct request_queue *q);
37
38int blk_mq_sched_init(struct request_queue *q);
39
40static inline bool
41blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
42{
43 struct elevator_queue *e = q->elevator;
44
45 if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
46 return false;
47
48 return __blk_mq_sched_bio_merge(q, bio);
49}
50
51static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
52 struct request *rq,
53 struct bio *bio)
54{
55 struct elevator_queue *e = q->elevator;
56
57 if (e && e->type->ops.mq.get_rq_priv)
58 return e->type->ops.mq.get_rq_priv(q, rq, bio);
59
60 return 0;
61}
62
63static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
64 struct request *rq)
65{
66 struct elevator_queue *e = q->elevator;
67
68 if (e && e->type->ops.mq.put_rq_priv)
69 e->type->ops.mq.put_rq_priv(q, rq);
70}
71
72static inline bool
73blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
74 struct bio *bio)
75{
76 struct elevator_queue *e = q->elevator;
77
78 if (e && e->type->ops.mq.allow_merge)
79 return e->type->ops.mq.allow_merge(q, rq, bio);
80
81 return true;
82}
83
84static inline void
85blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
86{
87 struct elevator_queue *e = hctx->queue->elevator;
88
89 if (e && e->type->ops.mq.completed_request)
90 e->type->ops.mq.completed_request(hctx, rq);
91
92 BUG_ON(rq->internal_tag == -1);
93
94 blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
95}
96
97static inline void blk_mq_sched_started_request(struct request *rq)
98{
99 struct request_queue *q = rq->q;
100 struct elevator_queue *e = q->elevator;
101
102 if (e && e->type->ops.mq.started_request)
103 e->type->ops.mq.started_request(rq);
104}
105
106static inline void blk_mq_sched_requeue_request(struct request *rq)
107{
108 struct request_queue *q = rq->q;
109 struct elevator_queue *e = q->elevator;
110
111 if (e && e->type->ops.mq.requeue_request)
112 e->type->ops.mq.requeue_request(rq);
113}
114
115static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
116{
117 struct elevator_queue *e = hctx->queue->elevator;
118
119 if (e && e->type->ops.mq.has_work)
120 return e->type->ops.mq.has_work(hctx);
121
122 return false;
123}
124
125static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
126{
127 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
128 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
129 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
130 struct request_queue *q = hctx->queue;
131
132 if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
133 set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
134 }
135 }
136}
137
138static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
139{
140 return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
141}
142
143#endif
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index eacd3af72099..295e69670c39 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
122 return res; 122 return res;
123} 123}
124 124
125static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page) 125static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
126{
127 return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
128 ctx->rq_dispatched[0]);
129}
130
131static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
132{
133 return sprintf(page, "%lu\n", ctx->rq_merged);
134}
135
136static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
137{
138 return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
139 ctx->rq_completed[0]);
140}
141
142static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
143{
144 struct request *rq;
145 int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg);
146
147 list_for_each_entry(rq, list, queuelist) {
148 const int rq_len = 2 * sizeof(rq) + 2;
149
150 /* if the output will be truncated */
151 if (PAGE_SIZE - 1 < len + rq_len) {
152 /* backspacing if it can't hold '\t...\n' */
153 if (PAGE_SIZE - 1 < len + 5)
154 len -= rq_len;
155 len += snprintf(page + len, PAGE_SIZE - 1 - len,
156 "\t...\n");
157 break;
158 }
159 len += snprintf(page + len, PAGE_SIZE - 1 - len,
160 "\t%p\n", rq);
161 }
162
163 return len;
164}
165
166static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
167{
168 ssize_t ret;
169
170 spin_lock(&ctx->lock);
171 ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
172 spin_unlock(&ctx->lock);
173
174 return ret;
175}
176
177static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
178{
179 return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n",
180 hctx->poll_considered, hctx->poll_invoked,
181 hctx->poll_success);
182}
183
184static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx,
185 const char *page, size_t size)
186{
187 hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
188
189 return size;
190}
191
192static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
193 char *page)
194{
195 return sprintf(page, "%lu\n", hctx->queued);
196}
197
198static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
199{
200 return sprintf(page, "%lu\n", hctx->run);
201}
202
203static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
204 char *page)
205{
206 char *start_page = page;
207 int i;
208
209 page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
210
211 for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
212 unsigned int d = 1U << (i - 1);
213
214 page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]);
215 }
216
217 page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1),
218 hctx->dispatched[i]);
219 return page - start_page;
220}
221
222static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
223 char *page) 126 char *page)
224{ 127{
225 ssize_t ret; 128 return sprintf(page, "%u\n", hctx->tags->nr_tags);
226
227 spin_lock(&hctx->lock);
228 ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
229 spin_unlock(&hctx->lock);
230
231 return ret;
232} 129}
233 130
234static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) 131static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
132 char *page)
235{ 133{
236 return blk_mq_tag_sysfs_show(hctx->tags, page); 134 return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
237}
238
239static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
240{
241 return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
242} 135}
243 136
244static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) 137static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
259 return ret; 152 return ret;
260} 153}
261 154
262static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
263{
264 struct blk_mq_ctx *ctx;
265 unsigned int i;
266
267 hctx_for_each_ctx(hctx, ctx, i) {
268 blk_stat_init(&ctx->stat[BLK_STAT_READ]);
269 blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
270 }
271}
272
273static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
274 const char *page, size_t count)
275{
276 blk_mq_stat_clear(hctx);
277 return count;
278}
279
280static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
281{
282 return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
283 pre, (long long) stat->nr_samples,
284 (long long) stat->mean, (long long) stat->min,
285 (long long) stat->max);
286}
287
288static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
289{
290 struct blk_rq_stat stat[2];
291 ssize_t ret;
292
293 blk_stat_init(&stat[BLK_STAT_READ]);
294 blk_stat_init(&stat[BLK_STAT_WRITE]);
295
296 blk_hctx_stat_get(hctx, stat);
297
298 ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
299 ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
300 return ret;
301}
302
303static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
304 .attr = {.name = "dispatched", .mode = S_IRUGO },
305 .show = blk_mq_sysfs_dispatched_show,
306};
307static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
308 .attr = {.name = "merged", .mode = S_IRUGO },
309 .show = blk_mq_sysfs_merged_show,
310};
311static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
312 .attr = {.name = "completed", .mode = S_IRUGO },
313 .show = blk_mq_sysfs_completed_show,
314};
315static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
316 .attr = {.name = "rq_list", .mode = S_IRUGO },
317 .show = blk_mq_sysfs_rq_list_show,
318};
319
320static struct attribute *default_ctx_attrs[] = { 155static struct attribute *default_ctx_attrs[] = {
321 &blk_mq_sysfs_dispatched.attr,
322 &blk_mq_sysfs_merged.attr,
323 &blk_mq_sysfs_completed.attr,
324 &blk_mq_sysfs_rq_list.attr,
325 NULL, 156 NULL,
326}; 157};
327 158
328static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = { 159static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
329 .attr = {.name = "queued", .mode = S_IRUGO }, 160 .attr = {.name = "nr_tags", .mode = S_IRUGO },
330 .show = blk_mq_hw_sysfs_queued_show, 161 .show = blk_mq_hw_sysfs_nr_tags_show,
331}; 162};
332static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = { 163static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
333 .attr = {.name = "run", .mode = S_IRUGO }, 164 .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
334 .show = blk_mq_hw_sysfs_run_show, 165 .show = blk_mq_hw_sysfs_nr_reserved_tags_show,
335};
336static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
337 .attr = {.name = "dispatched", .mode = S_IRUGO },
338 .show = blk_mq_hw_sysfs_dispatched_show,
339};
340static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
341 .attr = {.name = "active", .mode = S_IRUGO },
342 .show = blk_mq_hw_sysfs_active_show,
343};
344static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
345 .attr = {.name = "pending", .mode = S_IRUGO },
346 .show = blk_mq_hw_sysfs_rq_list_show,
347};
348static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
349 .attr = {.name = "tags", .mode = S_IRUGO },
350 .show = blk_mq_hw_sysfs_tags_show,
351}; 166};
352static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { 167static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
353 .attr = {.name = "cpu_list", .mode = S_IRUGO }, 168 .attr = {.name = "cpu_list", .mode = S_IRUGO },
354 .show = blk_mq_hw_sysfs_cpus_show, 169 .show = blk_mq_hw_sysfs_cpus_show,
355}; 170};
356static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
357 .attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO },
358 .show = blk_mq_hw_sysfs_poll_show,
359 .store = blk_mq_hw_sysfs_poll_store,
360};
361static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
362 .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
363 .show = blk_mq_hw_sysfs_stat_show,
364 .store = blk_mq_hw_sysfs_stat_store,
365};
366 171
367static struct attribute *default_hw_ctx_attrs[] = { 172static struct attribute *default_hw_ctx_attrs[] = {
368 &blk_mq_hw_sysfs_queued.attr, 173 &blk_mq_hw_sysfs_nr_tags.attr,
369 &blk_mq_hw_sysfs_run.attr, 174 &blk_mq_hw_sysfs_nr_reserved_tags.attr,
370 &blk_mq_hw_sysfs_dispatched.attr,
371 &blk_mq_hw_sysfs_pending.attr,
372 &blk_mq_hw_sysfs_tags.attr,
373 &blk_mq_hw_sysfs_cpus.attr, 175 &blk_mq_hw_sysfs_cpus.attr,
374 &blk_mq_hw_sysfs_active.attr,
375 &blk_mq_hw_sysfs_poll.attr,
376 &blk_mq_hw_sysfs_stat.attr,
377 NULL, 176 NULL,
378}; 177};
379 178
@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
455 kobject_put(&hctx->kobj); 254 kobject_put(&hctx->kobj);
456 } 255 }
457 256
257 blk_mq_debugfs_unregister_hctxs(q);
258
458 kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); 259 kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
459 kobject_del(&q->mq_kobj); 260 kobject_del(&q->mq_kobj);
460 kobject_put(&q->mq_kobj); 261 kobject_put(&q->mq_kobj);
@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
504 305
505 kobject_uevent(&q->mq_kobj, KOBJ_ADD); 306 kobject_uevent(&q->mq_kobj, KOBJ_ADD);
506 307
308 blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
309
507 queue_for_each_hw_ctx(q, hctx, i) { 310 queue_for_each_hw_ctx(q, hctx, i) {
508 ret = blk_mq_register_hctx(hctx); 311 ret = blk_mq_register_hctx(hctx);
509 if (ret) 312 if (ret)
@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
529 if (!q->mq_sysfs_init_done) 332 if (!q->mq_sysfs_init_done)
530 return; 333 return;
531 334
335 blk_mq_debugfs_unregister_hctxs(q);
336
532 queue_for_each_hw_ctx(q, hctx, i) 337 queue_for_each_hw_ctx(q, hctx, i)
533 blk_mq_unregister_hctx(hctx); 338 blk_mq_unregister_hctx(hctx);
534} 339}
@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
541 if (!q->mq_sysfs_init_done) 346 if (!q->mq_sysfs_init_done)
542 return ret; 347 return ret;
543 348
349 blk_mq_debugfs_register_hctxs(q);
350
544 queue_for_each_hw_ctx(q, hctx, i) { 351 queue_for_each_hw_ctx(q, hctx, i) {
545 ret = blk_mq_register_hctx(hctx); 352 ret = blk_mq_register_hctx(hctx);
546 if (ret) 353 if (ret)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index dcf5ce3ba4bf..54c84363c1b2 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
90 return atomic_read(&hctx->nr_active) < depth; 90 return atomic_read(&hctx->nr_active) < depth;
91} 91}
92 92
93static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) 93static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
94 struct sbitmap_queue *bt)
94{ 95{
95 if (!hctx_may_queue(hctx, bt)) 96 if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
97 !hctx_may_queue(data->hctx, bt))
96 return -1; 98 return -1;
97 return __sbitmap_queue_get(bt); 99 return __sbitmap_queue_get(bt);
98} 100}
99 101
100static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, 102unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
101 struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
102{ 103{
104 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
105 struct sbitmap_queue *bt;
103 struct sbq_wait_state *ws; 106 struct sbq_wait_state *ws;
104 DEFINE_WAIT(wait); 107 DEFINE_WAIT(wait);
108 unsigned int tag_offset;
109 bool drop_ctx;
105 int tag; 110 int tag;
106 111
107 tag = __bt_get(hctx, bt); 112 if (data->flags & BLK_MQ_REQ_RESERVED) {
113 if (unlikely(!tags->nr_reserved_tags)) {
114 WARN_ON_ONCE(1);
115 return BLK_MQ_TAG_FAIL;
116 }
117 bt = &tags->breserved_tags;
118 tag_offset = 0;
119 } else {
120 bt = &tags->bitmap_tags;
121 tag_offset = tags->nr_reserved_tags;
122 }
123
124 tag = __blk_mq_get_tag(data, bt);
108 if (tag != -1) 125 if (tag != -1)
109 return tag; 126 goto found_tag;
110 127
111 if (data->flags & BLK_MQ_REQ_NOWAIT) 128 if (data->flags & BLK_MQ_REQ_NOWAIT)
112 return -1; 129 return BLK_MQ_TAG_FAIL;
113 130
114 ws = bt_wait_ptr(bt, hctx); 131 ws = bt_wait_ptr(bt, data->hctx);
132 drop_ctx = data->ctx == NULL;
115 do { 133 do {
116 prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); 134 prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
117 135
118 tag = __bt_get(hctx, bt); 136 tag = __blk_mq_get_tag(data, bt);
119 if (tag != -1) 137 if (tag != -1)
120 break; 138 break;
121 139
122 /* 140 /*
123 * We're out of tags on this hardware queue, kick any 141 * We're out of tags on this hardware queue, kick any
124 * pending IO submits before going to sleep waiting for 142 * pending IO submits before going to sleep waiting for
125 * some to complete. Note that hctx can be NULL here for 143 * some to complete.
126 * reserved tag allocation.
127 */ 144 */
128 if (hctx) 145 blk_mq_run_hw_queue(data->hctx, false);
129 blk_mq_run_hw_queue(hctx, false);
130 146
131 /* 147 /*
132 * Retry tag allocation after running the hardware queue, 148 * Retry tag allocation after running the hardware queue,
133 * as running the queue may also have found completions. 149 * as running the queue may also have found completions.
134 */ 150 */
135 tag = __bt_get(hctx, bt); 151 tag = __blk_mq_get_tag(data, bt);
136 if (tag != -1) 152 if (tag != -1)
137 break; 153 break;
138 154
139 blk_mq_put_ctx(data->ctx); 155 if (data->ctx)
156 blk_mq_put_ctx(data->ctx);
140 157
141 io_schedule(); 158 io_schedule();
142 159
143 data->ctx = blk_mq_get_ctx(data->q); 160 data->ctx = blk_mq_get_ctx(data->q);
144 data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); 161 data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
145 if (data->flags & BLK_MQ_REQ_RESERVED) { 162 tags = blk_mq_tags_from_data(data);
146 bt = &data->hctx->tags->breserved_tags; 163 if (data->flags & BLK_MQ_REQ_RESERVED)
147 } else { 164 bt = &tags->breserved_tags;
148 hctx = data->hctx; 165 else
149 bt = &hctx->tags->bitmap_tags; 166 bt = &tags->bitmap_tags;
150 } 167
151 finish_wait(&ws->wait, &wait); 168 finish_wait(&ws->wait, &wait);
152 ws = bt_wait_ptr(bt, hctx); 169 ws = bt_wait_ptr(bt, data->hctx);
153 } while (1); 170 } while (1);
154 171
155 finish_wait(&ws->wait, &wait); 172 if (drop_ctx && data->ctx)
156 return tag; 173 blk_mq_put_ctx(data->ctx);
157}
158
159static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
160{
161 int tag;
162
163 tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
164 data->hctx->tags);
165 if (tag >= 0)
166 return tag + data->hctx->tags->nr_reserved_tags;
167
168 return BLK_MQ_TAG_FAIL;
169}
170
171static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
172{
173 int tag;
174
175 if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
176 WARN_ON_ONCE(1);
177 return BLK_MQ_TAG_FAIL;
178 }
179
180 tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
181 data->hctx->tags);
182 if (tag < 0)
183 return BLK_MQ_TAG_FAIL;
184 174
185 return tag; 175 finish_wait(&ws->wait, &wait);
186}
187 176
188unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) 177found_tag:
189{ 178 return tag + tag_offset;
190 if (data->flags & BLK_MQ_REQ_RESERVED)
191 return __blk_mq_get_reserved_tag(data);
192 return __blk_mq_get_tag(data);
193} 179}
194 180
195void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 181void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
196 unsigned int tag) 182 struct blk_mq_ctx *ctx, unsigned int tag)
197{ 183{
198 struct blk_mq_tags *tags = hctx->tags;
199
200 if (tag >= tags->nr_reserved_tags) { 184 if (tag >= tags->nr_reserved_tags) {
201 const int real_tag = tag - tags->nr_reserved_tags; 185 const int real_tag = tag - tags->nr_reserved_tags;
202 186
@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
312 struct blk_mq_tags *tags = set->tags[i]; 296 struct blk_mq_tags *tags = set->tags[i];
313 297
314 for (j = 0; j < tags->nr_tags; j++) { 298 for (j = 0; j < tags->nr_tags; j++) {
315 if (!tags->rqs[j]) 299 if (!tags->static_rqs[j])
316 continue; 300 continue;
317 301
318 ret = set->ops->reinit_request(set->driver_data, 302 ret = set->ops->reinit_request(set->driver_data,
319 tags->rqs[j]); 303 tags->static_rqs[j]);
320 if (ret) 304 if (ret)
321 goto out; 305 goto out;
322 } 306 }
@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
351 335
352} 336}
353 337
354static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
355{
356 return bt->sb.depth - sbitmap_weight(&bt->sb);
357}
358
359static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, 338static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
360 bool round_robin, int node) 339 bool round_robin, int node)
361{ 340{
@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
411 kfree(tags); 390 kfree(tags);
412} 391}
413 392
414int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) 393int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
394 struct blk_mq_tags **tagsptr, unsigned int tdepth,
395 bool can_grow)
415{ 396{
416 tdepth -= tags->nr_reserved_tags; 397 struct blk_mq_tags *tags = *tagsptr;
417 if (tdepth > tags->nr_tags) 398
399 if (tdepth <= tags->nr_reserved_tags)
418 return -EINVAL; 400 return -EINVAL;
419 401
402 tdepth -= tags->nr_reserved_tags;
403
420 /* 404 /*
421 * Don't need (or can't) update reserved tags here, they remain 405 * If we are allowed to grow beyond the original size, allocate
422 * static and should never need resizing. 406 * a new set of tags before freeing the old one.
423 */ 407 */
424 sbitmap_queue_resize(&tags->bitmap_tags, tdepth); 408 if (tdepth > tags->nr_tags) {
409 struct blk_mq_tag_set *set = hctx->queue->tag_set;
410 struct blk_mq_tags *new;
411 bool ret;
412
413 if (!can_grow)
414 return -EINVAL;
415
416 /*
417 * We need some sort of upper limit, set it high enough that
418 * no valid use cases should require more.
419 */
420 if (tdepth > 16 * BLKDEV_MAX_RQ)
421 return -EINVAL;
422
423 new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0);
424 if (!new)
425 return -ENOMEM;
426 ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
427 if (ret) {
428 blk_mq_free_rq_map(new);
429 return -ENOMEM;
430 }
431
432 blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
433 blk_mq_free_rq_map(*tagsptr);
434 *tagsptr = new;
435 } else {
436 /*
437 * Don't need (or can't) update reserved tags here, they
438 * remain static and should never need resizing.
439 */
440 sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
441 }
425 442
426 blk_mq_tag_wakeup_all(tags, false);
427 return 0; 443 return 0;
428} 444}
429 445
@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq)
454 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); 470 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
455} 471}
456EXPORT_SYMBOL(blk_mq_unique_tag); 472EXPORT_SYMBOL(blk_mq_unique_tag);
457
458ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
459{
460 char *orig_page = page;
461 unsigned int free, res;
462
463 if (!tags)
464 return 0;
465
466 page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
467 "bits_per_word=%u\n",
468 tags->nr_tags, tags->nr_reserved_tags,
469 1U << tags->bitmap_tags.sb.shift);
470
471 free = bt_unused_tags(&tags->bitmap_tags);
472 res = bt_unused_tags(&tags->breserved_tags);
473
474 page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
475 page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
476
477 return page - orig_page;
478}
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index d1662734dc53..63497423c5cd 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -16,6 +16,7 @@ struct blk_mq_tags {
16 struct sbitmap_queue breserved_tags; 16 struct sbitmap_queue breserved_tags;
17 17
18 struct request **rqs; 18 struct request **rqs;
19 struct request **static_rqs;
19 struct list_head page_list; 20 struct list_head page_list;
20}; 21};
21 22
@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
24extern void blk_mq_free_tags(struct blk_mq_tags *tags); 25extern void blk_mq_free_tags(struct blk_mq_tags *tags);
25 26
26extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 27extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
27extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 28extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
28 unsigned int tag); 29 struct blk_mq_ctx *ctx, unsigned int tag);
29extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); 30extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
30extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); 31extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
31extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); 32 struct blk_mq_tags **tags,
33 unsigned int depth, bool can_grow);
32extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); 34extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
33void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, 35void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
34 void *priv); 36 void *priv);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c3400b5444a7..b29e7dc7b309 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -32,6 +32,7 @@
32#include "blk-mq-tag.h" 32#include "blk-mq-tag.h"
33#include "blk-stat.h" 33#include "blk-stat.h"
34#include "blk-wbt.h" 34#include "blk-wbt.h"
35#include "blk-mq-sched.h"
35 36
36static DEFINE_MUTEX(all_q_mutex); 37static DEFINE_MUTEX(all_q_mutex);
37static LIST_HEAD(all_q_list); 38static LIST_HEAD(all_q_list);
@@ -39,9 +40,11 @@ static LIST_HEAD(all_q_list);
39/* 40/*
40 * Check if any of the ctx's have pending work in this hardware queue 41 * Check if any of the ctx's have pending work in this hardware queue
41 */ 42 */
42static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) 43bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
43{ 44{
44 return sbitmap_any_bit_set(&hctx->ctx_map); 45 return sbitmap_any_bit_set(&hctx->ctx_map) ||
46 !list_empty_careful(&hctx->dispatch) ||
47 blk_mq_sched_has_work(hctx);
45} 48}
46 49
47/* 50/*
@@ -167,8 +170,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
167} 170}
168EXPORT_SYMBOL(blk_mq_can_queue); 171EXPORT_SYMBOL(blk_mq_can_queue);
169 172
170static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, 173void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
171 struct request *rq, unsigned int op) 174 struct request *rq, unsigned int op)
172{ 175{
173 INIT_LIST_HEAD(&rq->queuelist); 176 INIT_LIST_HEAD(&rq->queuelist);
174 /* csd/requeue_work/fifo_time is initialized before use */ 177 /* csd/requeue_work/fifo_time is initialized before use */
@@ -196,13 +199,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
196 rq->special = NULL; 199 rq->special = NULL;
197 /* tag was already set */ 200 /* tag was already set */
198 rq->errors = 0; 201 rq->errors = 0;
199
200 rq->cmd = rq->__cmd;
201
202 rq->extra_len = 0; 202 rq->extra_len = 0;
203 rq->sense_len = 0;
204 rq->resid_len = 0;
205 rq->sense = NULL;
206 203
207 INIT_LIST_HEAD(&rq->timeout_list); 204 INIT_LIST_HEAD(&rq->timeout_list);
208 rq->timeout = 0; 205 rq->timeout = 0;
@@ -213,53 +210,58 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
213 210
214 ctx->rq_dispatched[op_is_sync(op)]++; 211 ctx->rq_dispatched[op_is_sync(op)]++;
215} 212}
213EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
216 214
217static struct request * 215struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
218__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) 216 unsigned int op)
219{ 217{
220 struct request *rq; 218 struct request *rq;
221 unsigned int tag; 219 unsigned int tag;
222 220
223 tag = blk_mq_get_tag(data); 221 tag = blk_mq_get_tag(data);
224 if (tag != BLK_MQ_TAG_FAIL) { 222 if (tag != BLK_MQ_TAG_FAIL) {
225 rq = data->hctx->tags->rqs[tag]; 223 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
226 224
227 if (blk_mq_tag_busy(data->hctx)) { 225 rq = tags->static_rqs[tag];
228 rq->rq_flags = RQF_MQ_INFLIGHT; 226
229 atomic_inc(&data->hctx->nr_active); 227 if (data->flags & BLK_MQ_REQ_INTERNAL) {
228 rq->tag = -1;
229 rq->internal_tag = tag;
230 } else {
231 if (blk_mq_tag_busy(data->hctx)) {
232 rq->rq_flags = RQF_MQ_INFLIGHT;
233 atomic_inc(&data->hctx->nr_active);
234 }
235 rq->tag = tag;
236 rq->internal_tag = -1;
230 } 237 }
231 238
232 rq->tag = tag;
233 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); 239 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
234 return rq; 240 return rq;
235 } 241 }
236 242
237 return NULL; 243 return NULL;
238} 244}
245EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
239 246
240struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 247struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
241 unsigned int flags) 248 unsigned int flags)
242{ 249{
243 struct blk_mq_ctx *ctx; 250 struct blk_mq_alloc_data alloc_data = { .flags = flags };
244 struct blk_mq_hw_ctx *hctx;
245 struct request *rq; 251 struct request *rq;
246 struct blk_mq_alloc_data alloc_data;
247 int ret; 252 int ret;
248 253
249 ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); 254 ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
250 if (ret) 255 if (ret)
251 return ERR_PTR(ret); 256 return ERR_PTR(ret);
252 257
253 ctx = blk_mq_get_ctx(q); 258 rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
254 hctx = blk_mq_map_queue(q, ctx->cpu);
255 blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
256 rq = __blk_mq_alloc_request(&alloc_data, rw);
257 blk_mq_put_ctx(ctx);
258 259
259 if (!rq) { 260 blk_mq_put_ctx(alloc_data.ctx);
260 blk_queue_exit(q); 261 blk_queue_exit(q);
262
263 if (!rq)
261 return ERR_PTR(-EWOULDBLOCK); 264 return ERR_PTR(-EWOULDBLOCK);
262 }
263 265
264 rq->__data_len = 0; 266 rq->__data_len = 0;
265 rq->__sector = (sector_t) -1; 267 rq->__sector = (sector_t) -1;
@@ -319,10 +321,10 @@ out_queue_exit:
319} 321}
320EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); 322EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
321 323
322static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, 324void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
323 struct blk_mq_ctx *ctx, struct request *rq) 325 struct request *rq)
324{ 326{
325 const int tag = rq->tag; 327 const int sched_tag = rq->internal_tag;
326 struct request_queue *q = rq->q; 328 struct request_queue *q = rq->q;
327 329
328 if (rq->rq_flags & RQF_MQ_INFLIGHT) 330 if (rq->rq_flags & RQF_MQ_INFLIGHT)
@@ -333,23 +335,31 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
333 335
334 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 336 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
335 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); 337 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
336 blk_mq_put_tag(hctx, ctx, tag); 338 if (rq->tag != -1)
339 blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
340 if (sched_tag != -1)
341 blk_mq_sched_completed_request(hctx, rq);
342 blk_mq_sched_restart_queues(hctx);
337 blk_queue_exit(q); 343 blk_queue_exit(q);
338} 344}
339 345
340void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq) 346static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
347 struct request *rq)
341{ 348{
342 struct blk_mq_ctx *ctx = rq->mq_ctx; 349 struct blk_mq_ctx *ctx = rq->mq_ctx;
343 350
344 ctx->rq_completed[rq_is_sync(rq)]++; 351 ctx->rq_completed[rq_is_sync(rq)]++;
345 __blk_mq_free_request(hctx, ctx, rq); 352 __blk_mq_finish_request(hctx, ctx, rq);
353}
346 354
355void blk_mq_finish_request(struct request *rq)
356{
357 blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
347} 358}
348EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
349 359
350void blk_mq_free_request(struct request *rq) 360void blk_mq_free_request(struct request *rq)
351{ 361{
352 blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); 362 blk_mq_sched_put_request(rq);
353} 363}
354EXPORT_SYMBOL_GPL(blk_mq_free_request); 364EXPORT_SYMBOL_GPL(blk_mq_free_request);
355 365
@@ -467,11 +477,9 @@ void blk_mq_start_request(struct request *rq)
467{ 477{
468 struct request_queue *q = rq->q; 478 struct request_queue *q = rq->q;
469 479
470 trace_block_rq_issue(q, rq); 480 blk_mq_sched_started_request(rq);
471 481
472 rq->resid_len = blk_rq_bytes(rq); 482 trace_block_rq_issue(q, rq);
473 if (unlikely(blk_bidi_rq(rq)))
474 rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
475 483
476 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { 484 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
477 blk_stat_set_issue_time(&rq->issue_stat); 485 blk_stat_set_issue_time(&rq->issue_stat);
@@ -515,6 +523,7 @@ static void __blk_mq_requeue_request(struct request *rq)
515 523
516 trace_block_rq_requeue(q, rq); 524 trace_block_rq_requeue(q, rq);
517 wbt_requeue(q->rq_wb, &rq->issue_stat); 525 wbt_requeue(q->rq_wb, &rq->issue_stat);
526 blk_mq_sched_requeue_request(rq);
518 527
519 if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { 528 if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
520 if (q->dma_drain_size && blk_rq_bytes(rq)) 529 if (q->dma_drain_size && blk_rq_bytes(rq))
@@ -549,13 +558,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
549 558
550 rq->rq_flags &= ~RQF_SOFTBARRIER; 559 rq->rq_flags &= ~RQF_SOFTBARRIER;
551 list_del_init(&rq->queuelist); 560 list_del_init(&rq->queuelist);
552 blk_mq_insert_request(rq, true, false, false); 561 blk_mq_sched_insert_request(rq, true, false, false, true);
553 } 562 }
554 563
555 while (!list_empty(&rq_list)) { 564 while (!list_empty(&rq_list)) {
556 rq = list_entry(rq_list.next, struct request, queuelist); 565 rq = list_entry(rq_list.next, struct request, queuelist);
557 list_del_init(&rq->queuelist); 566 list_del_init(&rq->queuelist);
558 blk_mq_insert_request(rq, false, false, false); 567 blk_mq_sched_insert_request(rq, false, false, false, true);
559 } 568 }
560 569
561 blk_mq_run_hw_queues(q, false); 570 blk_mq_run_hw_queues(q, false);
@@ -639,7 +648,7 @@ struct blk_mq_timeout_data {
639 648
640void blk_mq_rq_timed_out(struct request *req, bool reserved) 649void blk_mq_rq_timed_out(struct request *req, bool reserved)
641{ 650{
642 struct blk_mq_ops *ops = req->q->mq_ops; 651 const struct blk_mq_ops *ops = req->q->mq_ops;
643 enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; 652 enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
644 653
645 /* 654 /*
@@ -754,7 +763,7 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
754 int checked = 8; 763 int checked = 8;
755 764
756 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { 765 list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
757 int el_ret; 766 bool merged = false;
758 767
759 if (!checked--) 768 if (!checked--)
760 break; 769 break;
@@ -762,20 +771,25 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
762 if (!blk_rq_merge_ok(rq, bio)) 771 if (!blk_rq_merge_ok(rq, bio))
763 continue; 772 continue;
764 773
765 el_ret = blk_try_merge(rq, bio); 774 switch (blk_try_merge(rq, bio)) {
766 if (el_ret == ELEVATOR_BACK_MERGE) { 775 case ELEVATOR_BACK_MERGE:
767 if (bio_attempt_back_merge(q, rq, bio)) { 776 if (blk_mq_sched_allow_merge(q, rq, bio))
768 ctx->rq_merged++; 777 merged = bio_attempt_back_merge(q, rq, bio);
769 return true;
770 }
771 break; 778 break;
772 } else if (el_ret == ELEVATOR_FRONT_MERGE) { 779 case ELEVATOR_FRONT_MERGE:
773 if (bio_attempt_front_merge(q, rq, bio)) { 780 if (blk_mq_sched_allow_merge(q, rq, bio))
774 ctx->rq_merged++; 781 merged = bio_attempt_front_merge(q, rq, bio);
775 return true; 782 break;
776 } 783 case ELEVATOR_DISCARD_MERGE:
784 merged = bio_attempt_discard_merge(q, rq, bio);
777 break; 785 break;
786 default:
787 continue;
778 } 788 }
789
790 if (merged)
791 ctx->rq_merged++;
792 return merged;
779 } 793 }
780 794
781 return false; 795 return false;
@@ -803,7 +817,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
803 * Process software queues that have been marked busy, splicing them 817 * Process software queues that have been marked busy, splicing them
804 * to the for-dispatch 818 * to the for-dispatch
805 */ 819 */
806static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) 820void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
807{ 821{
808 struct flush_busy_ctx_data data = { 822 struct flush_busy_ctx_data data = {
809 .hctx = hctx, 823 .hctx = hctx,
@@ -812,6 +826,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
812 826
813 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); 827 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
814} 828}
829EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
815 830
816static inline unsigned int queued_to_index(unsigned int queued) 831static inline unsigned int queued_to_index(unsigned int queued)
817{ 832{
@@ -821,6 +836,74 @@ static inline unsigned int queued_to_index(unsigned int queued)
821 return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); 836 return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
822} 837}
823 838
839bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
840 bool wait)
841{
842 struct blk_mq_alloc_data data = {
843 .q = rq->q,
844 .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
845 .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
846 };
847
848 if (rq->tag != -1) {
849done:
850 if (hctx)
851 *hctx = data.hctx;
852 return true;
853 }
854
855 rq->tag = blk_mq_get_tag(&data);
856 if (rq->tag >= 0) {
857 if (blk_mq_tag_busy(data.hctx)) {
858 rq->rq_flags |= RQF_MQ_INFLIGHT;
859 atomic_inc(&data.hctx->nr_active);
860 }
861 data.hctx->tags->rqs[rq->tag] = rq;
862 goto done;
863 }
864
865 return false;
866}
867
868static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
869 struct request *rq)
870{
871 if (rq->tag == -1 || rq->internal_tag == -1)
872 return;
873
874 blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
875 rq->tag = -1;
876
877 if (rq->rq_flags & RQF_MQ_INFLIGHT) {
878 rq->rq_flags &= ~RQF_MQ_INFLIGHT;
879 atomic_dec(&hctx->nr_active);
880 }
881}
882
883/*
884 * If we fail getting a driver tag because all the driver tags are already
885 * assigned and on the dispatch list, BUT the first entry does not have a
886 * tag, then we could deadlock. For that case, move entries with assigned
887 * driver tags to the front, leaving the set of tagged requests in the
888 * same order, and the untagged set in the same order.
889 */
890static bool reorder_tags_to_front(struct list_head *list)
891{
892 struct request *rq, *tmp, *first = NULL;
893
894 list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
895 if (rq == first)
896 break;
897 if (rq->tag != -1) {
898 list_move(&rq->queuelist, list);
899 if (!first)
900 first = rq;
901 }
902 }
903
904 return first != NULL;
905}
906
824bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) 907bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
825{ 908{
826 struct request_queue *q = hctx->queue; 909 struct request_queue *q = hctx->queue;
@@ -843,6 +926,20 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
843 struct blk_mq_queue_data bd; 926 struct blk_mq_queue_data bd;
844 927
845 rq = list_first_entry(list, struct request, queuelist); 928 rq = list_first_entry(list, struct request, queuelist);
929 if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
930 if (!queued && reorder_tags_to_front(list))
931 continue;
932
933 /*
934 * We failed getting a driver tag. Mark the queue(s)
935 * as needing a restart. Retry getting a tag again,
936 * in case the needed IO completed right before we
937 * marked the queue as needing a restart.
938 */
939 blk_mq_sched_mark_restart(hctx);
940 if (!blk_mq_get_driver_tag(rq, &hctx, false))
941 break;
942 }
846 list_del_init(&rq->queuelist); 943 list_del_init(&rq->queuelist);
847 944
848 bd.rq = rq; 945 bd.rq = rq;
@@ -855,6 +952,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
855 queued++; 952 queued++;
856 break; 953 break;
857 case BLK_MQ_RQ_QUEUE_BUSY: 954 case BLK_MQ_RQ_QUEUE_BUSY:
955 blk_mq_put_driver_tag(hctx, rq);
858 list_add(&rq->queuelist, list); 956 list_add(&rq->queuelist, list);
859 __blk_mq_requeue_request(rq); 957 __blk_mq_requeue_request(rq);
860 break; 958 break;
@@ -885,7 +983,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
885 */ 983 */
886 if (!list_empty(list)) { 984 if (!list_empty(list)) {
887 spin_lock(&hctx->lock); 985 spin_lock(&hctx->lock);
888 list_splice(list, &hctx->dispatch); 986 list_splice_init(list, &hctx->dispatch);
889 spin_unlock(&hctx->lock); 987 spin_unlock(&hctx->lock);
890 988
891 /* 989 /*
@@ -896,45 +994,15 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
896 * the requests in rq_list might get lost. 994 * the requests in rq_list might get lost.
897 * 995 *
898 * blk_mq_run_hw_queue() already checks the STOPPED bit 996 * blk_mq_run_hw_queue() already checks the STOPPED bit
899 **/ 997 *
900 blk_mq_run_hw_queue(hctx, true); 998 * If RESTART is set, then let completion restart the queue
901 } 999 * instead of potentially looping here.
902 1000 */
903 return ret != BLK_MQ_RQ_QUEUE_BUSY; 1001 if (!blk_mq_sched_needs_restart(hctx))
904} 1002 blk_mq_run_hw_queue(hctx, true);
905
906/*
907 * Run this hardware queue, pulling any software queues mapped to it in.
908 * Note that this function currently has various problems around ordering
909 * of IO. In particular, we'd like FIFO behaviour on handling existing
910 * items on the hctx->dispatch list. Ignore that for now.
911 */
912static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
913{
914 LIST_HEAD(rq_list);
915
916 if (unlikely(blk_mq_hctx_stopped(hctx)))
917 return;
918
919 hctx->run++;
920
921 /*
922 * Touch any software queue that has pending entries.
923 */
924 flush_busy_ctxs(hctx, &rq_list);
925
926 /*
927 * If we have previous entries on our dispatch list, grab them
928 * and stuff them at the front for more fair dispatch.
929 */
930 if (!list_empty_careful(&hctx->dispatch)) {
931 spin_lock(&hctx->lock);
932 if (!list_empty(&hctx->dispatch))
933 list_splice_init(&hctx->dispatch, &rq_list);
934 spin_unlock(&hctx->lock);
935 } 1003 }
936 1004
937 blk_mq_dispatch_rq_list(hctx, &rq_list); 1005 return queued != 0;
938} 1006}
939 1007
940static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) 1008static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
@@ -946,11 +1014,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
946 1014
947 if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { 1015 if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
948 rcu_read_lock(); 1016 rcu_read_lock();
949 blk_mq_process_rq_list(hctx); 1017 blk_mq_sched_dispatch_requests(hctx);
950 rcu_read_unlock(); 1018 rcu_read_unlock();
951 } else { 1019 } else {
952 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); 1020 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
953 blk_mq_process_rq_list(hctx); 1021 blk_mq_sched_dispatch_requests(hctx);
954 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); 1022 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
955 } 1023 }
956} 1024}
@@ -1006,8 +1074,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
1006 int i; 1074 int i;
1007 1075
1008 queue_for_each_hw_ctx(q, hctx, i) { 1076 queue_for_each_hw_ctx(q, hctx, i) {
1009 if ((!blk_mq_hctx_has_pending(hctx) && 1077 if (!blk_mq_hctx_has_pending(hctx) ||
1010 list_empty_careful(&hctx->dispatch)) ||
1011 blk_mq_hctx_stopped(hctx)) 1078 blk_mq_hctx_stopped(hctx))
1012 continue; 1079 continue;
1013 1080
@@ -1116,6 +1183,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
1116 if (unlikely(!blk_mq_hw_queue_mapped(hctx))) 1183 if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
1117 return; 1184 return;
1118 1185
1186 blk_mq_stop_hw_queue(hctx);
1119 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), 1187 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
1120 &hctx->delay_work, msecs_to_jiffies(msecs)); 1188 &hctx->delay_work, msecs_to_jiffies(msecs));
1121} 1189}
@@ -1135,8 +1203,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
1135 list_add_tail(&rq->queuelist, &ctx->rq_list); 1203 list_add_tail(&rq->queuelist, &ctx->rq_list);
1136} 1204}
1137 1205
1138static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 1206void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
1139 struct request *rq, bool at_head) 1207 bool at_head)
1140{ 1208{
1141 struct blk_mq_ctx *ctx = rq->mq_ctx; 1209 struct blk_mq_ctx *ctx = rq->mq_ctx;
1142 1210
@@ -1144,32 +1212,10 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
1144 blk_mq_hctx_mark_pending(hctx, ctx); 1212 blk_mq_hctx_mark_pending(hctx, ctx);
1145} 1213}
1146 1214
1147void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, 1215void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
1148 bool async) 1216 struct list_head *list)
1149{
1150 struct blk_mq_ctx *ctx = rq->mq_ctx;
1151 struct request_queue *q = rq->q;
1152 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
1153
1154 spin_lock(&ctx->lock);
1155 __blk_mq_insert_request(hctx, rq, at_head);
1156 spin_unlock(&ctx->lock);
1157
1158 if (run_queue)
1159 blk_mq_run_hw_queue(hctx, async);
1160}
1161
1162static void blk_mq_insert_requests(struct request_queue *q,
1163 struct blk_mq_ctx *ctx,
1164 struct list_head *list,
1165 int depth,
1166 bool from_schedule)
1167 1217
1168{ 1218{
1169 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
1170
1171 trace_block_unplug(q, depth, !from_schedule);
1172
1173 /* 1219 /*
1174 * preemption doesn't flush plug list, so it's possible ctx->cpu is 1220 * preemption doesn't flush plug list, so it's possible ctx->cpu is
1175 * offline now 1221 * offline now
@@ -1185,8 +1231,6 @@ static void blk_mq_insert_requests(struct request_queue *q,
1185 } 1231 }
1186 blk_mq_hctx_mark_pending(hctx, ctx); 1232 blk_mq_hctx_mark_pending(hctx, ctx);
1187 spin_unlock(&ctx->lock); 1233 spin_unlock(&ctx->lock);
1188
1189 blk_mq_run_hw_queue(hctx, from_schedule);
1190} 1234}
1191 1235
1192static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) 1236static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1222,9 +1266,10 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
1222 BUG_ON(!rq->q); 1266 BUG_ON(!rq->q);
1223 if (rq->mq_ctx != this_ctx) { 1267 if (rq->mq_ctx != this_ctx) {
1224 if (this_ctx) { 1268 if (this_ctx) {
1225 blk_mq_insert_requests(this_q, this_ctx, 1269 trace_block_unplug(this_q, depth, from_schedule);
1226 &ctx_list, depth, 1270 blk_mq_sched_insert_requests(this_q, this_ctx,
1227 from_schedule); 1271 &ctx_list,
1272 from_schedule);
1228 } 1273 }
1229 1274
1230 this_ctx = rq->mq_ctx; 1275 this_ctx = rq->mq_ctx;
@@ -1241,8 +1286,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
1241 * on 'ctx_list'. Do those. 1286 * on 'ctx_list'. Do those.
1242 */ 1287 */
1243 if (this_ctx) { 1288 if (this_ctx) {
1244 blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth, 1289 trace_block_unplug(this_q, depth, from_schedule);
1245 from_schedule); 1290 blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
1291 from_schedule);
1246 } 1292 }
1247} 1293}
1248 1294
@@ -1280,46 +1326,39 @@ insert_rq:
1280 } 1326 }
1281 1327
1282 spin_unlock(&ctx->lock); 1328 spin_unlock(&ctx->lock);
1283 __blk_mq_free_request(hctx, ctx, rq); 1329 __blk_mq_finish_request(hctx, ctx, rq);
1284 return true; 1330 return true;
1285 } 1331 }
1286} 1332}
1287 1333
1288static struct request *blk_mq_map_request(struct request_queue *q, 1334static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
1289 struct bio *bio,
1290 struct blk_mq_alloc_data *data)
1291{ 1335{
1292 struct blk_mq_hw_ctx *hctx; 1336 if (rq->tag != -1)
1293 struct blk_mq_ctx *ctx; 1337 return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false);
1294 struct request *rq;
1295 1338
1296 blk_queue_enter_live(q); 1339 return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
1297 ctx = blk_mq_get_ctx(q);
1298 hctx = blk_mq_map_queue(q, ctx->cpu);
1299
1300 trace_block_getrq(q, bio, bio->bi_opf);
1301 blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
1302 rq = __blk_mq_alloc_request(data, bio->bi_opf);
1303
1304 data->hctx->queued++;
1305 return rq;
1306} 1340}
1307 1341
1308static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) 1342static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
1309{ 1343{
1310 int ret;
1311 struct request_queue *q = rq->q; 1344 struct request_queue *q = rq->q;
1312 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
1313 struct blk_mq_queue_data bd = { 1345 struct blk_mq_queue_data bd = {
1314 .rq = rq, 1346 .rq = rq,
1315 .list = NULL, 1347 .list = NULL,
1316 .last = 1 1348 .last = 1
1317 }; 1349 };
1318 blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); 1350 struct blk_mq_hw_ctx *hctx;
1351 blk_qc_t new_cookie;
1352 int ret;
1319 1353
1320 if (blk_mq_hctx_stopped(hctx)) 1354 if (q->elevator)
1321 goto insert; 1355 goto insert;
1322 1356
1357 if (!blk_mq_get_driver_tag(rq, &hctx, false))
1358 goto insert;
1359
1360 new_cookie = request_to_qc_t(hctx, rq);
1361
1323 /* 1362 /*
1324 * For OK queue, we are done. For error, kill it. Any other 1363 * For OK queue, we are done. For error, kill it. Any other
1325 * error (busy), just add it to our list as we previously 1364 * error (busy), just add it to our list as we previously
@@ -1341,7 +1380,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
1341 } 1380 }
1342 1381
1343insert: 1382insert:
1344 blk_mq_insert_request(rq, false, true, true); 1383 blk_mq_sched_insert_request(rq, false, true, true, false);
1345} 1384}
1346 1385
1347/* 1386/*
@@ -1352,8 +1391,8 @@ insert:
1352static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) 1391static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1353{ 1392{
1354 const int is_sync = op_is_sync(bio->bi_opf); 1393 const int is_sync = op_is_sync(bio->bi_opf);
1355 const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); 1394 const int is_flush_fua = op_is_flush(bio->bi_opf);
1356 struct blk_mq_alloc_data data; 1395 struct blk_mq_alloc_data data = { .flags = 0 };
1357 struct request *rq; 1396 struct request *rq;
1358 unsigned int request_count = 0, srcu_idx; 1397 unsigned int request_count = 0, srcu_idx;
1359 struct blk_plug *plug; 1398 struct blk_plug *plug;
@@ -1374,9 +1413,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1374 blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) 1413 blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
1375 return BLK_QC_T_NONE; 1414 return BLK_QC_T_NONE;
1376 1415
1416 if (blk_mq_sched_bio_merge(q, bio))
1417 return BLK_QC_T_NONE;
1418
1377 wb_acct = wbt_wait(q->rq_wb, bio, NULL); 1419 wb_acct = wbt_wait(q->rq_wb, bio, NULL);
1378 1420
1379 rq = blk_mq_map_request(q, bio, &data); 1421 trace_block_getrq(q, bio, bio->bi_opf);
1422
1423 rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
1380 if (unlikely(!rq)) { 1424 if (unlikely(!rq)) {
1381 __wbt_done(q->rq_wb, wb_acct); 1425 __wbt_done(q->rq_wb, wb_acct);
1382 return BLK_QC_T_NONE; 1426 return BLK_QC_T_NONE;
@@ -1384,9 +1428,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1384 1428
1385 wbt_track(&rq->issue_stat, wb_acct); 1429 wbt_track(&rq->issue_stat, wb_acct);
1386 1430
1387 cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); 1431 cookie = request_to_qc_t(data.hctx, rq);
1388 1432
1389 if (unlikely(is_flush_fua)) { 1433 if (unlikely(is_flush_fua)) {
1434 if (q->elevator)
1435 goto elv_insert;
1390 blk_mq_bio_to_request(rq, bio); 1436 blk_mq_bio_to_request(rq, bio);
1391 blk_insert_flush(rq); 1437 blk_insert_flush(rq);
1392 goto run_queue; 1438 goto run_queue;
@@ -1438,6 +1484,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1438 goto done; 1484 goto done;
1439 } 1485 }
1440 1486
1487 if (q->elevator) {
1488elv_insert:
1489 blk_mq_put_ctx(data.ctx);
1490 blk_mq_bio_to_request(rq, bio);
1491 blk_mq_sched_insert_request(rq, false, true,
1492 !is_sync || is_flush_fua, true);
1493 goto done;
1494 }
1441 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1495 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
1442 /* 1496 /*
1443 * For a SYNC request, send it to the hardware immediately. For 1497 * For a SYNC request, send it to the hardware immediately. For
@@ -1460,10 +1514,10 @@ done:
1460static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) 1514static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1461{ 1515{
1462 const int is_sync = op_is_sync(bio->bi_opf); 1516 const int is_sync = op_is_sync(bio->bi_opf);
1463 const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); 1517 const int is_flush_fua = op_is_flush(bio->bi_opf);
1464 struct blk_plug *plug; 1518 struct blk_plug *plug;
1465 unsigned int request_count = 0; 1519 unsigned int request_count = 0;
1466 struct blk_mq_alloc_data data; 1520 struct blk_mq_alloc_data data = { .flags = 0 };
1467 struct request *rq; 1521 struct request *rq;
1468 blk_qc_t cookie; 1522 blk_qc_t cookie;
1469 unsigned int wb_acct; 1523 unsigned int wb_acct;
@@ -1483,9 +1537,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1483 } else 1537 } else
1484 request_count = blk_plug_queued_count(q); 1538 request_count = blk_plug_queued_count(q);
1485 1539
1540 if (blk_mq_sched_bio_merge(q, bio))
1541 return BLK_QC_T_NONE;
1542
1486 wb_acct = wbt_wait(q->rq_wb, bio, NULL); 1543 wb_acct = wbt_wait(q->rq_wb, bio, NULL);
1487 1544
1488 rq = blk_mq_map_request(q, bio, &data); 1545 trace_block_getrq(q, bio, bio->bi_opf);
1546
1547 rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
1489 if (unlikely(!rq)) { 1548 if (unlikely(!rq)) {
1490 __wbt_done(q->rq_wb, wb_acct); 1549 __wbt_done(q->rq_wb, wb_acct);
1491 return BLK_QC_T_NONE; 1550 return BLK_QC_T_NONE;
@@ -1493,9 +1552,11 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1493 1552
1494 wbt_track(&rq->issue_stat, wb_acct); 1553 wbt_track(&rq->issue_stat, wb_acct);
1495 1554
1496 cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); 1555 cookie = request_to_qc_t(data.hctx, rq);
1497 1556
1498 if (unlikely(is_flush_fua)) { 1557 if (unlikely(is_flush_fua)) {
1558 if (q->elevator)
1559 goto elv_insert;
1499 blk_mq_bio_to_request(rq, bio); 1560 blk_mq_bio_to_request(rq, bio);
1500 blk_insert_flush(rq); 1561 blk_insert_flush(rq);
1501 goto run_queue; 1562 goto run_queue;
@@ -1535,6 +1596,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1535 return cookie; 1596 return cookie;
1536 } 1597 }
1537 1598
1599 if (q->elevator) {
1600elv_insert:
1601 blk_mq_put_ctx(data.ctx);
1602 blk_mq_bio_to_request(rq, bio);
1603 blk_mq_sched_insert_request(rq, false, true,
1604 !is_sync || is_flush_fua, true);
1605 goto done;
1606 }
1538 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1607 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
1539 /* 1608 /*
1540 * For a SYNC request, send it to the hardware immediately. For 1609 * For a SYNC request, send it to the hardware immediately. For
@@ -1547,11 +1616,12 @@ run_queue:
1547 } 1616 }
1548 1617
1549 blk_mq_put_ctx(data.ctx); 1618 blk_mq_put_ctx(data.ctx);
1619done:
1550 return cookie; 1620 return cookie;
1551} 1621}
1552 1622
1553static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, 1623void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
1554 struct blk_mq_tags *tags, unsigned int hctx_idx) 1624 unsigned int hctx_idx)
1555{ 1625{
1556 struct page *page; 1626 struct page *page;
1557 1627
@@ -1559,11 +1629,13 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1559 int i; 1629 int i;
1560 1630
1561 for (i = 0; i < tags->nr_tags; i++) { 1631 for (i = 0; i < tags->nr_tags; i++) {
1562 if (!tags->rqs[i]) 1632 struct request *rq = tags->static_rqs[i];
1633
1634 if (!rq)
1563 continue; 1635 continue;
1564 set->ops->exit_request(set->driver_data, tags->rqs[i], 1636 set->ops->exit_request(set->driver_data, rq,
1565 hctx_idx, i); 1637 hctx_idx, i);
1566 tags->rqs[i] = NULL; 1638 tags->static_rqs[i] = NULL;
1567 } 1639 }
1568 } 1640 }
1569 1641
@@ -1577,33 +1649,32 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1577 kmemleak_free(page_address(page)); 1649 kmemleak_free(page_address(page));
1578 __free_pages(page, page->private); 1650 __free_pages(page, page->private);
1579 } 1651 }
1652}
1580 1653
1654void blk_mq_free_rq_map(struct blk_mq_tags *tags)
1655{
1581 kfree(tags->rqs); 1656 kfree(tags->rqs);
1657 tags->rqs = NULL;
1658 kfree(tags->static_rqs);
1659 tags->static_rqs = NULL;
1582 1660
1583 blk_mq_free_tags(tags); 1661 blk_mq_free_tags(tags);
1584} 1662}
1585 1663
1586static size_t order_to_size(unsigned int order) 1664struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
1587{ 1665 unsigned int hctx_idx,
1588 return (size_t)PAGE_SIZE << order; 1666 unsigned int nr_tags,
1589} 1667 unsigned int reserved_tags)
1590
1591static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1592 unsigned int hctx_idx)
1593{ 1668{
1594 struct blk_mq_tags *tags; 1669 struct blk_mq_tags *tags;
1595 unsigned int i, j, entries_per_page, max_order = 4;
1596 size_t rq_size, left;
1597 1670
1598 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, 1671 tags = blk_mq_init_tags(nr_tags, reserved_tags,
1599 set->numa_node, 1672 set->numa_node,
1600 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); 1673 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
1601 if (!tags) 1674 if (!tags)
1602 return NULL; 1675 return NULL;
1603 1676
1604 INIT_LIST_HEAD(&tags->page_list); 1677 tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
1605
1606 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
1607 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, 1678 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
1608 set->numa_node); 1679 set->numa_node);
1609 if (!tags->rqs) { 1680 if (!tags->rqs) {
@@ -1611,15 +1682,40 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1611 return NULL; 1682 return NULL;
1612 } 1683 }
1613 1684
1685 tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
1686 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
1687 set->numa_node);
1688 if (!tags->static_rqs) {
1689 kfree(tags->rqs);
1690 blk_mq_free_tags(tags);
1691 return NULL;
1692 }
1693
1694 return tags;
1695}
1696
1697static size_t order_to_size(unsigned int order)
1698{
1699 return (size_t)PAGE_SIZE << order;
1700}
1701
1702int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
1703 unsigned int hctx_idx, unsigned int depth)
1704{
1705 unsigned int i, j, entries_per_page, max_order = 4;
1706 size_t rq_size, left;
1707
1708 INIT_LIST_HEAD(&tags->page_list);
1709
1614 /* 1710 /*
1615 * rq_size is the size of the request plus driver payload, rounded 1711 * rq_size is the size of the request plus driver payload, rounded
1616 * to the cacheline size 1712 * to the cacheline size
1617 */ 1713 */
1618 rq_size = round_up(sizeof(struct request) + set->cmd_size, 1714 rq_size = round_up(sizeof(struct request) + set->cmd_size,
1619 cache_line_size()); 1715 cache_line_size());
1620 left = rq_size * set->queue_depth; 1716 left = rq_size * depth;
1621 1717
1622 for (i = 0; i < set->queue_depth; ) { 1718 for (i = 0; i < depth; ) {
1623 int this_order = max_order; 1719 int this_order = max_order;
1624 struct page *page; 1720 struct page *page;
1625 int to_do; 1721 int to_do;
@@ -1653,15 +1749,17 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1653 */ 1749 */
1654 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); 1750 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
1655 entries_per_page = order_to_size(this_order) / rq_size; 1751 entries_per_page = order_to_size(this_order) / rq_size;
1656 to_do = min(entries_per_page, set->queue_depth - i); 1752 to_do = min(entries_per_page, depth - i);
1657 left -= to_do * rq_size; 1753 left -= to_do * rq_size;
1658 for (j = 0; j < to_do; j++) { 1754 for (j = 0; j < to_do; j++) {
1659 tags->rqs[i] = p; 1755 struct request *rq = p;
1756
1757 tags->static_rqs[i] = rq;
1660 if (set->ops->init_request) { 1758 if (set->ops->init_request) {
1661 if (set->ops->init_request(set->driver_data, 1759 if (set->ops->init_request(set->driver_data,
1662 tags->rqs[i], hctx_idx, i, 1760 rq, hctx_idx, i,
1663 set->numa_node)) { 1761 set->numa_node)) {
1664 tags->rqs[i] = NULL; 1762 tags->static_rqs[i] = NULL;
1665 goto fail; 1763 goto fail;
1666 } 1764 }
1667 } 1765 }
@@ -1670,11 +1768,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1670 i++; 1768 i++;
1671 } 1769 }
1672 } 1770 }
1673 return tags; 1771 return 0;
1674 1772
1675fail: 1773fail:
1676 blk_mq_free_rq_map(set, tags, hctx_idx); 1774 blk_mq_free_rqs(set, tags, hctx_idx);
1677 return NULL; 1775 return -ENOMEM;
1678} 1776}
1679 1777
1680/* 1778/*
@@ -1866,6 +1964,35 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
1866 } 1964 }
1867} 1965}
1868 1966
1967static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
1968{
1969 int ret = 0;
1970
1971 set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
1972 set->queue_depth, set->reserved_tags);
1973 if (!set->tags[hctx_idx])
1974 return false;
1975
1976 ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
1977 set->queue_depth);
1978 if (!ret)
1979 return true;
1980
1981 blk_mq_free_rq_map(set->tags[hctx_idx]);
1982 set->tags[hctx_idx] = NULL;
1983 return false;
1984}
1985
1986static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
1987 unsigned int hctx_idx)
1988{
1989 if (set->tags[hctx_idx]) {
1990 blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
1991 blk_mq_free_rq_map(set->tags[hctx_idx]);
1992 set->tags[hctx_idx] = NULL;
1993 }
1994}
1995
1869static void blk_mq_map_swqueue(struct request_queue *q, 1996static void blk_mq_map_swqueue(struct request_queue *q,
1870 const struct cpumask *online_mask) 1997 const struct cpumask *online_mask)
1871{ 1998{
@@ -1894,17 +2021,15 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1894 2021
1895 hctx_idx = q->mq_map[i]; 2022 hctx_idx = q->mq_map[i];
1896 /* unmapped hw queue can be remapped after CPU topo changed */ 2023 /* unmapped hw queue can be remapped after CPU topo changed */
1897 if (!set->tags[hctx_idx]) { 2024 if (!set->tags[hctx_idx] &&
1898 set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); 2025 !__blk_mq_alloc_rq_map(set, hctx_idx)) {
1899
1900 /* 2026 /*
1901 * If tags initialization fail for some hctx, 2027 * If tags initialization fail for some hctx,
1902 * that hctx won't be brought online. In this 2028 * that hctx won't be brought online. In this
1903 * case, remap the current ctx to hctx[0] which 2029 * case, remap the current ctx to hctx[0] which
1904 * is guaranteed to always have tags allocated 2030 * is guaranteed to always have tags allocated
1905 */ 2031 */
1906 if (!set->tags[hctx_idx]) 2032 q->mq_map[i] = 0;
1907 q->mq_map[i] = 0;
1908 } 2033 }
1909 2034
1910 ctx = per_cpu_ptr(q->queue_ctx, i); 2035 ctx = per_cpu_ptr(q->queue_ctx, i);
@@ -1927,10 +2052,9 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1927 * fallback in case of a new remap fails 2052 * fallback in case of a new remap fails
1928 * allocation 2053 * allocation
1929 */ 2054 */
1930 if (i && set->tags[i]) { 2055 if (i && set->tags[i])
1931 blk_mq_free_rq_map(set, set->tags[i], i); 2056 blk_mq_free_map_and_requests(set, i);
1932 set->tags[i] = NULL; 2057
1933 }
1934 hctx->tags = NULL; 2058 hctx->tags = NULL;
1935 continue; 2059 continue;
1936 } 2060 }
@@ -2023,6 +2147,8 @@ void blk_mq_release(struct request_queue *q)
2023 struct blk_mq_hw_ctx *hctx; 2147 struct blk_mq_hw_ctx *hctx;
2024 unsigned int i; 2148 unsigned int i;
2025 2149
2150 blk_mq_sched_teardown(q);
2151
2026 /* hctx kobj stays in hctx */ 2152 /* hctx kobj stays in hctx */
2027 queue_for_each_hw_ctx(q, hctx, i) { 2153 queue_for_each_hw_ctx(q, hctx, i) {
2028 if (!hctx) 2154 if (!hctx)
@@ -2097,10 +2223,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2097 struct blk_mq_hw_ctx *hctx = hctxs[j]; 2223 struct blk_mq_hw_ctx *hctx = hctxs[j];
2098 2224
2099 if (hctx) { 2225 if (hctx) {
2100 if (hctx->tags) { 2226 if (hctx->tags)
2101 blk_mq_free_rq_map(set, hctx->tags, j); 2227 blk_mq_free_map_and_requests(set, j);
2102 set->tags[j] = NULL;
2103 }
2104 blk_mq_exit_hctx(q, set, hctx, j); 2228 blk_mq_exit_hctx(q, set, hctx, j);
2105 free_cpumask_var(hctx->cpumask); 2229 free_cpumask_var(hctx->cpumask);
2106 kobject_put(&hctx->kobj); 2230 kobject_put(&hctx->kobj);
@@ -2181,6 +2305,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
2181 mutex_unlock(&all_q_mutex); 2305 mutex_unlock(&all_q_mutex);
2182 put_online_cpus(); 2306 put_online_cpus();
2183 2307
2308 if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
2309 int ret;
2310
2311 ret = blk_mq_sched_init(q);
2312 if (ret)
2313 return ERR_PTR(ret);
2314 }
2315
2184 return q; 2316 return q;
2185 2317
2186err_hctxs: 2318err_hctxs:
@@ -2279,10 +2411,10 @@ static int blk_mq_queue_reinit_dead(unsigned int cpu)
2279 * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list 2411 * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
2280 * and set bit0 in pending bitmap as ctx1->index_hw is still zero. 2412 * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
2281 * 2413 *
2282 * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in 2414 * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
2283 * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. 2415 * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
2284 * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list 2416 * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
2285 * is ignored. 2417 * ignored.
2286 */ 2418 */
2287static int blk_mq_queue_reinit_prepare(unsigned int cpu) 2419static int blk_mq_queue_reinit_prepare(unsigned int cpu)
2288{ 2420{
@@ -2296,17 +2428,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
2296{ 2428{
2297 int i; 2429 int i;
2298 2430
2299 for (i = 0; i < set->nr_hw_queues; i++) { 2431 for (i = 0; i < set->nr_hw_queues; i++)
2300 set->tags[i] = blk_mq_init_rq_map(set, i); 2432 if (!__blk_mq_alloc_rq_map(set, i))
2301 if (!set->tags[i])
2302 goto out_unwind; 2433 goto out_unwind;
2303 }
2304 2434
2305 return 0; 2435 return 0;
2306 2436
2307out_unwind: 2437out_unwind:
2308 while (--i >= 0) 2438 while (--i >= 0)
2309 blk_mq_free_rq_map(set, set->tags[i], i); 2439 blk_mq_free_rq_map(set->tags[i]);
2310 2440
2311 return -ENOMEM; 2441 return -ENOMEM;
2312} 2442}
@@ -2430,10 +2560,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
2430{ 2560{
2431 int i; 2561 int i;
2432 2562
2433 for (i = 0; i < nr_cpu_ids; i++) { 2563 for (i = 0; i < nr_cpu_ids; i++)
2434 if (set->tags[i]) 2564 blk_mq_free_map_and_requests(set, i);
2435 blk_mq_free_rq_map(set, set->tags[i], i);
2436 }
2437 2565
2438 kfree(set->mq_map); 2566 kfree(set->mq_map);
2439 set->mq_map = NULL; 2567 set->mq_map = NULL;
@@ -2449,14 +2577,28 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
2449 struct blk_mq_hw_ctx *hctx; 2577 struct blk_mq_hw_ctx *hctx;
2450 int i, ret; 2578 int i, ret;
2451 2579
2452 if (!set || nr > set->queue_depth) 2580 if (!set)
2453 return -EINVAL; 2581 return -EINVAL;
2454 2582
2583 blk_mq_freeze_queue(q);
2584 blk_mq_quiesce_queue(q);
2585
2455 ret = 0; 2586 ret = 0;
2456 queue_for_each_hw_ctx(q, hctx, i) { 2587 queue_for_each_hw_ctx(q, hctx, i) {
2457 if (!hctx->tags) 2588 if (!hctx->tags)
2458 continue; 2589 continue;
2459 ret = blk_mq_tag_update_depth(hctx->tags, nr); 2590 /*
2591 * If we're using an MQ scheduler, just update the scheduler
2592 * queue depth. This is similar to what the old code would do.
2593 */
2594 if (!hctx->sched_tags) {
2595 ret = blk_mq_tag_update_depth(hctx, &hctx->tags,
2596 min(nr, set->queue_depth),
2597 false);
2598 } else {
2599 ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
2600 nr, true);
2601 }
2460 if (ret) 2602 if (ret)
2461 break; 2603 break;
2462 } 2604 }
@@ -2464,6 +2606,9 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
2464 if (!ret) 2606 if (!ret)
2465 q->nr_requests = nr; 2607 q->nr_requests = nr;
2466 2608
2609 blk_mq_unfreeze_queue(q);
2610 blk_mq_start_stopped_hw_queues(q, true);
2611
2467 return ret; 2612 return ret;
2468} 2613}
2469 2614
@@ -2483,10 +2628,14 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
2483 list_for_each_entry(q, &set->tag_list, tag_set_list) { 2628 list_for_each_entry(q, &set->tag_list, tag_set_list) {
2484 blk_mq_realloc_hw_ctxs(set, q); 2629 blk_mq_realloc_hw_ctxs(set, q);
2485 2630
2631 /*
2632 * Manually set the make_request_fn as blk_queue_make_request
2633 * resets a lot of the queue settings.
2634 */
2486 if (q->nr_hw_queues > 1) 2635 if (q->nr_hw_queues > 1)
2487 blk_queue_make_request(q, blk_mq_make_request); 2636 q->make_request_fn = blk_mq_make_request;
2488 else 2637 else
2489 blk_queue_make_request(q, blk_sq_make_request); 2638 q->make_request_fn = blk_sq_make_request;
2490 2639
2491 blk_mq_queue_reinit(q, cpu_online_mask); 2640 blk_mq_queue_reinit(q, cpu_online_mask);
2492 } 2641 }
@@ -2649,7 +2798,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
2649 blk_flush_plug_list(plug, false); 2798 blk_flush_plug_list(plug, false);
2650 2799
2651 hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; 2800 hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
2652 rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); 2801 if (!blk_qc_t_is_internal(cookie))
2802 rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
2803 else
2804 rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
2653 2805
2654 return __blk_mq_poll(hctx, rq); 2806 return __blk_mq_poll(hctx, rq);
2655} 2807}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 63e9116cddbd..24b2256186f3 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,8 +32,32 @@ void blk_mq_free_queue(struct request_queue *q);
32int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); 32int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
33void blk_mq_wake_waiters(struct request_queue *q); 33void blk_mq_wake_waiters(struct request_queue *q);
34bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); 34bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
35void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
36bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
37bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
38 bool wait);
35 39
36/* 40/*
41 * Internal helpers for allocating/freeing the request map
42 */
43void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
44 unsigned int hctx_idx);
45void blk_mq_free_rq_map(struct blk_mq_tags *tags);
46struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
47 unsigned int hctx_idx,
48 unsigned int nr_tags,
49 unsigned int reserved_tags);
50int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
51 unsigned int hctx_idx, unsigned int depth);
52
53/*
54 * Internal helpers for request insertion into sw queues
55 */
56void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
57 bool at_head);
58void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
59 struct list_head *list);
60/*
37 * CPU hotplug helpers 61 * CPU hotplug helpers
38 */ 62 */
39void blk_mq_enable_hotplug(void); 63void blk_mq_enable_hotplug(void);
@@ -57,6 +81,35 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
57extern void blk_mq_sysfs_unregister(struct request_queue *q); 81extern void blk_mq_sysfs_unregister(struct request_queue *q);
58extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); 82extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
59 83
84/*
85 * debugfs helpers
86 */
87#ifdef CONFIG_BLK_DEBUG_FS
88int blk_mq_debugfs_register(struct request_queue *q, const char *name);
89void blk_mq_debugfs_unregister(struct request_queue *q);
90int blk_mq_debugfs_register_hctxs(struct request_queue *q);
91void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
92#else
93static inline int blk_mq_debugfs_register(struct request_queue *q,
94 const char *name)
95{
96 return 0;
97}
98
99static inline void blk_mq_debugfs_unregister(struct request_queue *q)
100{
101}
102
103static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
104{
105 return 0;
106}
107
108static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
109{
110}
111#endif
112
60extern void blk_mq_rq_timed_out(struct request *req, bool reserved); 113extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
61 114
62void blk_mq_release(struct request_queue *q); 115void blk_mq_release(struct request_queue *q);
@@ -103,6 +156,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
103 data->hctx = hctx; 156 data->hctx = hctx;
104} 157}
105 158
159static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
160{
161 if (data->flags & BLK_MQ_REQ_INTERNAL)
162 return data->hctx->sched_tags;
163
164 return data->hctx->tags;
165}
166
167/*
168 * Internal helpers for request allocation/init/free
169 */
170void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
171 struct request *rq, unsigned int op);
172void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
173 struct request *rq);
174void blk_mq_finish_request(struct request *rq);
175struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
176 unsigned int op);
177
106static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 178static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
107{ 179{
108 return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 180 return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 529e55f52a03..1e7174ffc9d4 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -88,6 +88,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
88void blk_set_default_limits(struct queue_limits *lim) 88void blk_set_default_limits(struct queue_limits *lim)
89{ 89{
90 lim->max_segments = BLK_MAX_SEGMENTS; 90 lim->max_segments = BLK_MAX_SEGMENTS;
91 lim->max_discard_segments = 1;
91 lim->max_integrity_segments = 0; 92 lim->max_integrity_segments = 0;
92 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 93 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
93 lim->virt_boundary_mask = 0; 94 lim->virt_boundary_mask = 0;
@@ -128,6 +129,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
128 /* Inherit limits from component devices */ 129 /* Inherit limits from component devices */
129 lim->discard_zeroes_data = 1; 130 lim->discard_zeroes_data = 1;
130 lim->max_segments = USHRT_MAX; 131 lim->max_segments = USHRT_MAX;
132 lim->max_discard_segments = 1;
131 lim->max_hw_sectors = UINT_MAX; 133 lim->max_hw_sectors = UINT_MAX;
132 lim->max_segment_size = UINT_MAX; 134 lim->max_segment_size = UINT_MAX;
133 lim->max_sectors = UINT_MAX; 135 lim->max_sectors = UINT_MAX;
@@ -253,7 +255,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
253 max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); 255 max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
254 max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); 256 max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
255 limits->max_sectors = max_sectors; 257 limits->max_sectors = max_sectors;
256 q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9); 258 q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
257} 259}
258EXPORT_SYMBOL(blk_queue_max_hw_sectors); 260EXPORT_SYMBOL(blk_queue_max_hw_sectors);
259 261
@@ -337,6 +339,22 @@ void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments
337EXPORT_SYMBOL(blk_queue_max_segments); 339EXPORT_SYMBOL(blk_queue_max_segments);
338 340
339/** 341/**
342 * blk_queue_max_discard_segments - set max segments for discard requests
343 * @q: the request queue for the device
344 * @max_segments: max number of segments
345 *
346 * Description:
347 * Enables a low level driver to set an upper limit on the number of
348 * segments in a discard request.
349 **/
350void blk_queue_max_discard_segments(struct request_queue *q,
351 unsigned short max_segments)
352{
353 q->limits.max_discard_segments = max_segments;
354}
355EXPORT_SYMBOL_GPL(blk_queue_max_discard_segments);
356
357/**
340 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg 358 * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
341 * @q: the request queue for the device 359 * @q: the request queue for the device
342 * @max_size: max size of segment in bytes 360 * @max_size: max size of segment in bytes
@@ -553,6 +571,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
553 b->virt_boundary_mask); 571 b->virt_boundary_mask);
554 572
555 t->max_segments = min_not_zero(t->max_segments, b->max_segments); 573 t->max_segments = min_not_zero(t->max_segments, b->max_segments);
574 t->max_discard_segments = min_not_zero(t->max_discard_segments,
575 b->max_discard_segments);
556 t->max_integrity_segments = min_not_zero(t->max_integrity_segments, 576 t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
557 b->max_integrity_segments); 577 b->max_integrity_segments);
558 578
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1dbce057592d..002af836aa87 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -89,7 +89,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
89 89
90static ssize_t queue_ra_show(struct request_queue *q, char *page) 90static ssize_t queue_ra_show(struct request_queue *q, char *page)
91{ 91{
92 unsigned long ra_kb = q->backing_dev_info.ra_pages << 92 unsigned long ra_kb = q->backing_dev_info->ra_pages <<
93 (PAGE_SHIFT - 10); 93 (PAGE_SHIFT - 10);
94 94
95 return queue_var_show(ra_kb, (page)); 95 return queue_var_show(ra_kb, (page));
@@ -104,7 +104,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
104 if (ret < 0) 104 if (ret < 0)
105 return ret; 105 return ret;
106 106
107 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_SHIFT - 10); 107 q->backing_dev_info->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
108 108
109 return ret; 109 return ret;
110} 110}
@@ -121,6 +121,12 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
121 return queue_var_show(queue_max_segments(q), (page)); 121 return queue_var_show(queue_max_segments(q), (page));
122} 122}
123 123
124static ssize_t queue_max_discard_segments_show(struct request_queue *q,
125 char *page)
126{
127 return queue_var_show(queue_max_discard_segments(q), (page));
128}
129
124static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) 130static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
125{ 131{
126 return queue_var_show(q->limits.max_integrity_segments, (page)); 132 return queue_var_show(q->limits.max_integrity_segments, (page));
@@ -236,7 +242,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
236 242
237 spin_lock_irq(q->queue_lock); 243 spin_lock_irq(q->queue_lock);
238 q->limits.max_sectors = max_sectors_kb << 1; 244 q->limits.max_sectors = max_sectors_kb << 1;
239 q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10); 245 q->backing_dev_info->io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
240 spin_unlock_irq(q->queue_lock); 246 spin_unlock_irq(q->queue_lock);
241 247
242 return ret; 248 return ret;
@@ -545,6 +551,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = {
545 .show = queue_max_segments_show, 551 .show = queue_max_segments_show,
546}; 552};
547 553
554static struct queue_sysfs_entry queue_max_discard_segments_entry = {
555 .attr = {.name = "max_discard_segments", .mode = S_IRUGO },
556 .show = queue_max_discard_segments_show,
557};
558
548static struct queue_sysfs_entry queue_max_integrity_segments_entry = { 559static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
549 .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, 560 .attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
550 .show = queue_max_integrity_segments_show, 561 .show = queue_max_integrity_segments_show,
@@ -697,6 +708,7 @@ static struct attribute *default_attrs[] = {
697 &queue_max_hw_sectors_entry.attr, 708 &queue_max_hw_sectors_entry.attr,
698 &queue_max_sectors_entry.attr, 709 &queue_max_sectors_entry.attr,
699 &queue_max_segments_entry.attr, 710 &queue_max_segments_entry.attr,
711 &queue_max_discard_segments_entry.attr,
700 &queue_max_integrity_segments_entry.attr, 712 &queue_max_integrity_segments_entry.attr,
701 &queue_max_segment_size_entry.attr, 713 &queue_max_segment_size_entry.attr,
702 &queue_iosched_entry.attr, 714 &queue_iosched_entry.attr,
@@ -799,7 +811,7 @@ static void blk_release_queue(struct kobject *kobj)
799 container_of(kobj, struct request_queue, kobj); 811 container_of(kobj, struct request_queue, kobj);
800 812
801 wbt_exit(q); 813 wbt_exit(q);
802 bdi_exit(&q->backing_dev_info); 814 bdi_put(q->backing_dev_info);
803 blkcg_exit_queue(q); 815 blkcg_exit_queue(q);
804 816
805 if (q->elevator) { 817 if (q->elevator) {
@@ -814,13 +826,19 @@ static void blk_release_queue(struct kobject *kobj)
814 if (q->queue_tags) 826 if (q->queue_tags)
815 __blk_queue_free_tags(q); 827 __blk_queue_free_tags(q);
816 828
817 if (!q->mq_ops) 829 if (!q->mq_ops) {
830 if (q->exit_rq_fn)
831 q->exit_rq_fn(q, q->fq->flush_rq);
818 blk_free_flush_queue(q->fq); 832 blk_free_flush_queue(q->fq);
819 else 833 } else {
820 blk_mq_release(q); 834 blk_mq_release(q);
835 }
821 836
822 blk_trace_shutdown(q); 837 blk_trace_shutdown(q);
823 838
839 if (q->mq_ops)
840 blk_mq_debugfs_unregister(q);
841
824 if (q->bio_split) 842 if (q->bio_split)
825 bioset_free(q->bio_split); 843 bioset_free(q->bio_split);
826 844
@@ -884,32 +902,36 @@ int blk_register_queue(struct gendisk *disk)
884 if (ret) 902 if (ret)
885 return ret; 903 return ret;
886 904
905 if (q->mq_ops)
906 blk_mq_register_dev(dev, q);
907
908 /* Prevent changes through sysfs until registration is completed. */
909 mutex_lock(&q->sysfs_lock);
910
887 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); 911 ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
888 if (ret < 0) { 912 if (ret < 0) {
889 blk_trace_remove_sysfs(dev); 913 blk_trace_remove_sysfs(dev);
890 return ret; 914 goto unlock;
891 } 915 }
892 916
893 kobject_uevent(&q->kobj, KOBJ_ADD); 917 kobject_uevent(&q->kobj, KOBJ_ADD);
894 918
895 if (q->mq_ops)
896 blk_mq_register_dev(dev, q);
897
898 blk_wb_init(q); 919 blk_wb_init(q);
899 920
900 if (!q->request_fn) 921 if (q->request_fn || (q->mq_ops && q->elevator)) {
901 return 0; 922 ret = elv_register_queue(q);
902 923 if (ret) {
903 ret = elv_register_queue(q); 924 kobject_uevent(&q->kobj, KOBJ_REMOVE);
904 if (ret) { 925 kobject_del(&q->kobj);
905 kobject_uevent(&q->kobj, KOBJ_REMOVE); 926 blk_trace_remove_sysfs(dev);
906 kobject_del(&q->kobj); 927 kobject_put(&dev->kobj);
907 blk_trace_remove_sysfs(dev); 928 goto unlock;
908 kobject_put(&dev->kobj); 929 }
909 return ret;
910 } 930 }
911 931 ret = 0;
912 return 0; 932unlock:
933 mutex_unlock(&q->sysfs_lock);
934 return ret;
913} 935}
914 936
915void blk_unregister_queue(struct gendisk *disk) 937void blk_unregister_queue(struct gendisk *disk)
@@ -922,7 +944,7 @@ void blk_unregister_queue(struct gendisk *disk)
922 if (q->mq_ops) 944 if (q->mq_ops)
923 blk_mq_unregister_dev(disk_to_dev(disk), q); 945 blk_mq_unregister_dev(disk_to_dev(disk), q);
924 946
925 if (q->request_fn) 947 if (q->request_fn || (q->mq_ops && q->elevator))
926 elv_unregister_queue(q); 948 elv_unregister_queue(q);
927 949
928 kobject_uevent(&q->kobj, KOBJ_REMOVE); 950 kobject_uevent(&q->kobj, KOBJ_REMOVE);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index bae1decb6ec3..07cc329fa4b0 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
272 list_del_init(&rq->queuelist); 272 list_del_init(&rq->queuelist);
273 rq->rq_flags &= ~RQF_QUEUED; 273 rq->rq_flags &= ~RQF_QUEUED;
274 rq->tag = -1; 274 rq->tag = -1;
275 rq->internal_tag = -1;
275 276
276 if (unlikely(bqt->tag_index[tag] == NULL)) 277 if (unlikely(bqt->tag_index[tag] == NULL))
277 printk(KERN_ERR "%s: tag %d is missing\n", 278 printk(KERN_ERR "%s: tag %d is missing\n",
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a6bb4fe326c3..82fd0cc394eb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg)
866 unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; 866 unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
867 struct bio *bio; 867 struct bio *bio;
868 868
869 if ((bio = throtl_peek_queued(&sq->queued[READ]))) 869 bio = throtl_peek_queued(&sq->queued[READ]);
870 if (bio)
870 tg_may_dispatch(tg, bio, &read_wait); 871 tg_may_dispatch(tg, bio, &read_wait);
871 872
872 if ((bio = throtl_peek_queued(&sq->queued[WRITE]))) 873 bio = throtl_peek_queued(&sq->queued[WRITE]);
874 if (bio)
873 tg_may_dispatch(tg, bio, &write_wait); 875 tg_may_dispatch(tg, bio, &write_wait);
874 876
875 min_wait = min(read_wait, write_wait); 877 min_wait = min(read_wait, write_wait);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index f0a9c07b4c7a..1aedb1f7ee0c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -96,7 +96,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
96 */ 96 */
97static bool wb_recent_wait(struct rq_wb *rwb) 97static bool wb_recent_wait(struct rq_wb *rwb)
98{ 98{
99 struct bdi_writeback *wb = &rwb->queue->backing_dev_info.wb; 99 struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb;
100 100
101 return time_before(jiffies, wb->dirty_sleep + HZ); 101 return time_before(jiffies, wb->dirty_sleep + HZ);
102} 102}
@@ -279,7 +279,7 @@ enum {
279 279
280static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat) 280static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
281{ 281{
282 struct backing_dev_info *bdi = &rwb->queue->backing_dev_info; 282 struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
283 u64 thislat; 283 u64 thislat;
284 284
285 /* 285 /*
@@ -339,7 +339,7 @@ static int latency_exceeded(struct rq_wb *rwb)
339 339
340static void rwb_trace_step(struct rq_wb *rwb, const char *msg) 340static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
341{ 341{
342 struct backing_dev_info *bdi = &rwb->queue->backing_dev_info; 342 struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
343 343
344 trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec, 344 trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec,
345 rwb->wb_background, rwb->wb_normal, rwb->wb_max); 345 rwb->wb_background, rwb->wb_normal, rwb->wb_max);
@@ -423,7 +423,7 @@ static void wb_timer_fn(unsigned long data)
423 423
424 status = latency_exceeded(rwb); 424 status = latency_exceeded(rwb);
425 425
426 trace_wbt_timer(&rwb->queue->backing_dev_info, status, rwb->scale_step, 426 trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
427 inflight); 427 inflight);
428 428
429 /* 429 /*
diff --git a/block/blk.h b/block/blk.h
index 041185e5f129..d1ea4bd9b9a3 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -14,6 +14,10 @@
14/* Max future timer expiry for timeouts */ 14/* Max future timer expiry for timeouts */
15#define BLK_MAX_TIMEOUT (5 * HZ) 15#define BLK_MAX_TIMEOUT (5 * HZ)
16 16
17#ifdef CONFIG_DEBUG_FS
18extern struct dentry *blk_debugfs_root;
19#endif
20
17struct blk_flush_queue { 21struct blk_flush_queue {
18 unsigned int flush_queue_delayed:1; 22 unsigned int flush_queue_delayed:1;
19 unsigned int flush_pending_idx:1; 23 unsigned int flush_pending_idx:1;
@@ -96,6 +100,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
96 struct bio *bio); 100 struct bio *bio);
97bool bio_attempt_back_merge(struct request_queue *q, struct request *req, 101bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
98 struct bio *bio); 102 struct bio *bio);
103bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
104 struct bio *bio);
99bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 105bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
100 unsigned int *request_count, 106 unsigned int *request_count,
101 struct request **same_queue_rq); 107 struct request **same_queue_rq);
@@ -167,7 +173,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
167 return NULL; 173 return NULL;
168 } 174 }
169 if (unlikely(blk_queue_bypass(q)) || 175 if (unlikely(blk_queue_bypass(q)) ||
170 !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) 176 !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
171 return NULL; 177 return NULL;
172 } 178 }
173} 179}
@@ -176,16 +182,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
176{ 182{
177 struct elevator_queue *e = q->elevator; 183 struct elevator_queue *e = q->elevator;
178 184
179 if (e->type->ops.elevator_activate_req_fn) 185 if (e->type->ops.sq.elevator_activate_req_fn)
180 e->type->ops.elevator_activate_req_fn(q, rq); 186 e->type->ops.sq.elevator_activate_req_fn(q, rq);
181} 187}
182 188
183static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) 189static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
184{ 190{
185 struct elevator_queue *e = q->elevator; 191 struct elevator_queue *e = q->elevator;
186 192
187 if (e->type->ops.elevator_deactivate_req_fn) 193 if (e->type->ops.sq.elevator_deactivate_req_fn)
188 e->type->ops.elevator_deactivate_req_fn(q, rq); 194 e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
189} 195}
190 196
191#ifdef CONFIG_FAIL_IO_TIMEOUT 197#ifdef CONFIG_FAIL_IO_TIMEOUT
@@ -204,14 +210,14 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
204 struct bio *bio); 210 struct bio *bio);
205int ll_front_merge_fn(struct request_queue *q, struct request *req, 211int ll_front_merge_fn(struct request_queue *q, struct request *req,
206 struct bio *bio); 212 struct bio *bio);
207int attempt_back_merge(struct request_queue *q, struct request *rq); 213struct request *attempt_back_merge(struct request_queue *q, struct request *rq);
208int attempt_front_merge(struct request_queue *q, struct request *rq); 214struct request *attempt_front_merge(struct request_queue *q, struct request *rq);
209int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 215int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
210 struct request *next); 216 struct request *next);
211void blk_recalc_rq_segments(struct request *rq); 217void blk_recalc_rq_segments(struct request *rq);
212void blk_rq_set_mixed_merge(struct request *rq); 218void blk_rq_set_mixed_merge(struct request *rq);
213bool blk_rq_merge_ok(struct request *rq, struct bio *bio); 219bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
214int blk_try_merge(struct request *rq, struct bio *bio); 220enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
215 221
216void blk_queue_congestion_threshold(struct request_queue *q); 222void blk_queue_congestion_threshold(struct request_queue *q);
217 223
@@ -249,7 +255,14 @@ static inline int blk_do_io_stat(struct request *rq)
249{ 255{
250 return rq->rq_disk && 256 return rq->rq_disk &&
251 (rq->rq_flags & RQF_IO_STAT) && 257 (rq->rq_flags & RQF_IO_STAT) &&
252 (rq->cmd_type == REQ_TYPE_FS); 258 !blk_rq_is_passthrough(rq);
259}
260
261static inline void req_set_nomerge(struct request_queue *q, struct request *req)
262{
263 req->cmd_flags |= REQ_NOMERGE;
264 if (req == q->last_merge)
265 q->last_merge = NULL;
253} 266}
254 267
255/* 268/*
@@ -264,6 +277,22 @@ void ioc_clear_queue(struct request_queue *q);
264int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); 277int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
265 278
266/** 279/**
280 * rq_ioc - determine io_context for request allocation
281 * @bio: request being allocated is for this bio (can be %NULL)
282 *
283 * Determine io_context to use for request allocation for @bio. May return
284 * %NULL if %current->io_context doesn't exist.
285 */
286static inline struct io_context *rq_ioc(struct bio *bio)
287{
288#ifdef CONFIG_BLK_CGROUP
289 if (bio && bio->bi_ioc)
290 return bio->bi_ioc;
291#endif
292 return current->io_context;
293}
294
295/**
267 * create_io_context - try to create task->io_context 296 * create_io_context - try to create task->io_context
268 * @gfp_mask: allocation mask 297 * @gfp_mask: allocation mask
269 * @node: allocation node 298 * @node: allocation node
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 9d652a992316..cd15f9dbb147 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -71,22 +71,24 @@ void bsg_job_done(struct bsg_job *job, int result,
71{ 71{
72 struct request *req = job->req; 72 struct request *req = job->req;
73 struct request *rsp = req->next_rq; 73 struct request *rsp = req->next_rq;
74 struct scsi_request *rq = scsi_req(req);
74 int err; 75 int err;
75 76
76 err = job->req->errors = result; 77 err = job->req->errors = result;
77 if (err < 0) 78 if (err < 0)
78 /* we're only returning the result field in the reply */ 79 /* we're only returning the result field in the reply */
79 job->req->sense_len = sizeof(u32); 80 rq->sense_len = sizeof(u32);
80 else 81 else
81 job->req->sense_len = job->reply_len; 82 rq->sense_len = job->reply_len;
82 /* we assume all request payload was transferred, residual == 0 */ 83 /* we assume all request payload was transferred, residual == 0 */
83 req->resid_len = 0; 84 rq->resid_len = 0;
84 85
85 if (rsp) { 86 if (rsp) {
86 WARN_ON(reply_payload_rcv_len > rsp->resid_len); 87 WARN_ON(reply_payload_rcv_len > scsi_req(rsp)->resid_len);
87 88
88 /* set reply (bidi) residual */ 89 /* set reply (bidi) residual */
89 rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); 90 scsi_req(rsp)->resid_len -=
91 min(reply_payload_rcv_len, scsi_req(rsp)->resid_len);
90 } 92 }
91 blk_complete_request(req); 93 blk_complete_request(req);
92} 94}
@@ -113,6 +115,7 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
113 if (!buf->sg_list) 115 if (!buf->sg_list)
114 return -ENOMEM; 116 return -ENOMEM;
115 sg_init_table(buf->sg_list, req->nr_phys_segments); 117 sg_init_table(buf->sg_list, req->nr_phys_segments);
118 scsi_req(req)->resid_len = blk_rq_bytes(req);
116 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); 119 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
117 buf->payload_len = blk_rq_bytes(req); 120 buf->payload_len = blk_rq_bytes(req);
118 return 0; 121 return 0;
@@ -127,6 +130,7 @@ static int bsg_create_job(struct device *dev, struct request *req)
127{ 130{
128 struct request *rsp = req->next_rq; 131 struct request *rsp = req->next_rq;
129 struct request_queue *q = req->q; 132 struct request_queue *q = req->q;
133 struct scsi_request *rq = scsi_req(req);
130 struct bsg_job *job; 134 struct bsg_job *job;
131 int ret; 135 int ret;
132 136
@@ -140,9 +144,9 @@ static int bsg_create_job(struct device *dev, struct request *req)
140 job->req = req; 144 job->req = req;
141 if (q->bsg_job_size) 145 if (q->bsg_job_size)
142 job->dd_data = (void *)&job[1]; 146 job->dd_data = (void *)&job[1];
143 job->request = req->cmd; 147 job->request = rq->cmd;
144 job->request_len = req->cmd_len; 148 job->request_len = rq->cmd_len;
145 job->reply = req->sense; 149 job->reply = rq->sense;
146 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer 150 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
147 * allocated */ 151 * allocated */
148 if (req->bio) { 152 if (req->bio) {
@@ -177,7 +181,7 @@ failjob_rls_job:
177 * 181 *
178 * Drivers/subsys should pass this to the queue init function. 182 * Drivers/subsys should pass this to the queue init function.
179 */ 183 */
180void bsg_request_fn(struct request_queue *q) 184static void bsg_request_fn(struct request_queue *q)
181 __releases(q->queue_lock) 185 __releases(q->queue_lock)
182 __acquires(q->queue_lock) 186 __acquires(q->queue_lock)
183{ 187{
@@ -214,24 +218,30 @@ void bsg_request_fn(struct request_queue *q)
214 put_device(dev); 218 put_device(dev);
215 spin_lock_irq(q->queue_lock); 219 spin_lock_irq(q->queue_lock);
216} 220}
217EXPORT_SYMBOL_GPL(bsg_request_fn);
218 221
219/** 222/**
220 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests 223 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
221 * @dev: device to attach bsg device to 224 * @dev: device to attach bsg device to
222 * @q: request queue setup by caller
223 * @name: device to give bsg device 225 * @name: device to give bsg device
224 * @job_fn: bsg job handler 226 * @job_fn: bsg job handler
225 * @dd_job_size: size of LLD data needed for each job 227 * @dd_job_size: size of LLD data needed for each job
226 *
227 * The caller should have setup the reuqest queue with bsg_request_fn
228 * as the request_fn.
229 */ 228 */
230int bsg_setup_queue(struct device *dev, struct request_queue *q, 229struct request_queue *bsg_setup_queue(struct device *dev, char *name,
231 char *name, bsg_job_fn *job_fn, int dd_job_size) 230 bsg_job_fn *job_fn, int dd_job_size)
232{ 231{
232 struct request_queue *q;
233 int ret; 233 int ret;
234 234
235 q = blk_alloc_queue(GFP_KERNEL);
236 if (!q)
237 return ERR_PTR(-ENOMEM);
238 q->cmd_size = sizeof(struct scsi_request);
239 q->request_fn = bsg_request_fn;
240
241 ret = blk_init_allocated_queue(q);
242 if (ret)
243 goto out_cleanup_queue;
244
235 q->queuedata = dev; 245 q->queuedata = dev;
236 q->bsg_job_size = dd_job_size; 246 q->bsg_job_size = dd_job_size;
237 q->bsg_job_fn = job_fn; 247 q->bsg_job_fn = job_fn;
@@ -243,9 +253,12 @@ int bsg_setup_queue(struct device *dev, struct request_queue *q,
243 if (ret) { 253 if (ret) {
244 printk(KERN_ERR "%s: bsg interface failed to " 254 printk(KERN_ERR "%s: bsg interface failed to "
245 "initialize - register queue\n", dev->kobj.name); 255 "initialize - register queue\n", dev->kobj.name);
246 return ret; 256 goto out_cleanup_queue;
247 } 257 }
248 258
249 return 0; 259 return q;
260out_cleanup_queue:
261 blk_cleanup_queue(q);
262 return ERR_PTR(ret);
250} 263}
251EXPORT_SYMBOL_GPL(bsg_setup_queue); 264EXPORT_SYMBOL_GPL(bsg_setup_queue);
diff --git a/block/bsg.c b/block/bsg.c
index a57046de2f07..a9a8b8e0446f 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -85,7 +85,6 @@ struct bsg_command {
85 struct bio *bidi_bio; 85 struct bio *bidi_bio;
86 int err; 86 int err;
87 struct sg_io_v4 hdr; 87 struct sg_io_v4 hdr;
88 char sense[SCSI_SENSE_BUFFERSIZE];
89}; 88};
90 89
91static void bsg_free_command(struct bsg_command *bc) 90static void bsg_free_command(struct bsg_command *bc)
@@ -140,18 +139,20 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
140 struct sg_io_v4 *hdr, struct bsg_device *bd, 139 struct sg_io_v4 *hdr, struct bsg_device *bd,
141 fmode_t has_write_perm) 140 fmode_t has_write_perm)
142{ 141{
142 struct scsi_request *req = scsi_req(rq);
143
143 if (hdr->request_len > BLK_MAX_CDB) { 144 if (hdr->request_len > BLK_MAX_CDB) {
144 rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); 145 req->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
145 if (!rq->cmd) 146 if (!req->cmd)
146 return -ENOMEM; 147 return -ENOMEM;
147 } 148 }
148 149
149 if (copy_from_user(rq->cmd, (void __user *)(unsigned long)hdr->request, 150 if (copy_from_user(req->cmd, (void __user *)(unsigned long)hdr->request,
150 hdr->request_len)) 151 hdr->request_len))
151 return -EFAULT; 152 return -EFAULT;
152 153
153 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { 154 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
154 if (blk_verify_command(rq->cmd, has_write_perm)) 155 if (blk_verify_command(req->cmd, has_write_perm))
155 return -EPERM; 156 return -EPERM;
156 } else if (!capable(CAP_SYS_RAWIO)) 157 } else if (!capable(CAP_SYS_RAWIO))
157 return -EPERM; 158 return -EPERM;
@@ -159,7 +160,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
159 /* 160 /*
160 * fill in request structure 161 * fill in request structure
161 */ 162 */
162 rq->cmd_len = hdr->request_len; 163 req->cmd_len = hdr->request_len;
163 164
164 rq->timeout = msecs_to_jiffies(hdr->timeout); 165 rq->timeout = msecs_to_jiffies(hdr->timeout);
165 if (!rq->timeout) 166 if (!rq->timeout)
@@ -176,7 +177,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
176 * Check if sg_io_v4 from user is allowed and valid 177 * Check if sg_io_v4 from user is allowed and valid
177 */ 178 */
178static int 179static int
179bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw) 180bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op)
180{ 181{
181 int ret = 0; 182 int ret = 0;
182 183
@@ -197,7 +198,7 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw)
197 ret = -EINVAL; 198 ret = -EINVAL;
198 } 199 }
199 200
200 *rw = hdr->dout_xfer_len ? WRITE : READ; 201 *op = hdr->dout_xfer_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN;
201 return ret; 202 return ret;
202} 203}
203 204
@@ -205,13 +206,12 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *rw)
205 * map sg_io_v4 to a request. 206 * map sg_io_v4 to a request.
206 */ 207 */
207static struct request * 208static struct request *
208bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm, 209bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm)
209 u8 *sense)
210{ 210{
211 struct request_queue *q = bd->queue; 211 struct request_queue *q = bd->queue;
212 struct request *rq, *next_rq = NULL; 212 struct request *rq, *next_rq = NULL;
213 int ret, rw; 213 int ret;
214 unsigned int dxfer_len; 214 unsigned int op, dxfer_len;
215 void __user *dxferp = NULL; 215 void __user *dxferp = NULL;
216 struct bsg_class_device *bcd = &q->bsg_dev; 216 struct bsg_class_device *bcd = &q->bsg_dev;
217 217
@@ -226,36 +226,35 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
226 hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp, 226 hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
227 hdr->din_xfer_len); 227 hdr->din_xfer_len);
228 228
229 ret = bsg_validate_sgv4_hdr(hdr, &rw); 229 ret = bsg_validate_sgv4_hdr(hdr, &op);
230 if (ret) 230 if (ret)
231 return ERR_PTR(ret); 231 return ERR_PTR(ret);
232 232
233 /* 233 /*
234 * map scatter-gather elements separately and string them to request 234 * map scatter-gather elements separately and string them to request
235 */ 235 */
236 rq = blk_get_request(q, rw, GFP_KERNEL); 236 rq = blk_get_request(q, op, GFP_KERNEL);
237 if (IS_ERR(rq)) 237 if (IS_ERR(rq))
238 return rq; 238 return rq;
239 blk_rq_set_block_pc(rq); 239 scsi_req_init(rq);
240 240
241 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); 241 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
242 if (ret) 242 if (ret)
243 goto out; 243 goto out;
244 244
245 if (rw == WRITE && hdr->din_xfer_len) { 245 if (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len) {
246 if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) { 246 if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
247 ret = -EOPNOTSUPP; 247 ret = -EOPNOTSUPP;
248 goto out; 248 goto out;
249 } 249 }
250 250
251 next_rq = blk_get_request(q, READ, GFP_KERNEL); 251 next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
252 if (IS_ERR(next_rq)) { 252 if (IS_ERR(next_rq)) {
253 ret = PTR_ERR(next_rq); 253 ret = PTR_ERR(next_rq);
254 next_rq = NULL; 254 next_rq = NULL;
255 goto out; 255 goto out;
256 } 256 }
257 rq->next_rq = next_rq; 257 rq->next_rq = next_rq;
258 next_rq->cmd_type = rq->cmd_type;
259 258
260 dxferp = (void __user *)(unsigned long)hdr->din_xferp; 259 dxferp = (void __user *)(unsigned long)hdr->din_xferp;
261 ret = blk_rq_map_user(q, next_rq, NULL, dxferp, 260 ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
@@ -280,13 +279,9 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
280 goto out; 279 goto out;
281 } 280 }
282 281
283 rq->sense = sense;
284 rq->sense_len = 0;
285
286 return rq; 282 return rq;
287out: 283out:
288 if (rq->cmd != rq->__cmd) 284 scsi_req_free_cmd(scsi_req(rq));
289 kfree(rq->cmd);
290 blk_put_request(rq); 285 blk_put_request(rq);
291 if (next_rq) { 286 if (next_rq) {
292 blk_rq_unmap_user(next_rq->bio); 287 blk_rq_unmap_user(next_rq->bio);
@@ -393,6 +388,7 @@ static struct bsg_command *bsg_get_done_cmd(struct bsg_device *bd)
393static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, 388static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
394 struct bio *bio, struct bio *bidi_bio) 389 struct bio *bio, struct bio *bidi_bio)
395{ 390{
391 struct scsi_request *req = scsi_req(rq);
396 int ret = 0; 392 int ret = 0;
397 393
398 dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors); 394 dprintk("rq %p bio %p 0x%x\n", rq, bio, rq->errors);
@@ -407,12 +403,12 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
407 hdr->info |= SG_INFO_CHECK; 403 hdr->info |= SG_INFO_CHECK;
408 hdr->response_len = 0; 404 hdr->response_len = 0;
409 405
410 if (rq->sense_len && hdr->response) { 406 if (req->sense_len && hdr->response) {
411 int len = min_t(unsigned int, hdr->max_response_len, 407 int len = min_t(unsigned int, hdr->max_response_len,
412 rq->sense_len); 408 req->sense_len);
413 409
414 ret = copy_to_user((void __user *)(unsigned long)hdr->response, 410 ret = copy_to_user((void __user *)(unsigned long)hdr->response,
415 rq->sense, len); 411 req->sense, len);
416 if (!ret) 412 if (!ret)
417 hdr->response_len = len; 413 hdr->response_len = len;
418 else 414 else
@@ -420,14 +416,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
420 } 416 }
421 417
422 if (rq->next_rq) { 418 if (rq->next_rq) {
423 hdr->dout_resid = rq->resid_len; 419 hdr->dout_resid = req->resid_len;
424 hdr->din_resid = rq->next_rq->resid_len; 420 hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
425 blk_rq_unmap_user(bidi_bio); 421 blk_rq_unmap_user(bidi_bio);
426 blk_put_request(rq->next_rq); 422 blk_put_request(rq->next_rq);
427 } else if (rq_data_dir(rq) == READ) 423 } else if (rq_data_dir(rq) == READ)
428 hdr->din_resid = rq->resid_len; 424 hdr->din_resid = req->resid_len;
429 else 425 else
430 hdr->dout_resid = rq->resid_len; 426 hdr->dout_resid = req->resid_len;
431 427
432 /* 428 /*
433 * If the request generated a negative error number, return it 429 * If the request generated a negative error number, return it
@@ -439,8 +435,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
439 ret = rq->errors; 435 ret = rq->errors;
440 436
441 blk_rq_unmap_user(bio); 437 blk_rq_unmap_user(bio);
442 if (rq->cmd != rq->__cmd) 438 scsi_req_free_cmd(req);
443 kfree(rq->cmd);
444 blk_put_request(rq); 439 blk_put_request(rq);
445 440
446 return ret; 441 return ret;
@@ -625,7 +620,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf,
625 /* 620 /*
626 * get a request, fill in the blanks, and add to request queue 621 * get a request, fill in the blanks, and add to request queue
627 */ 622 */
628 rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm, bc->sense); 623 rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm);
629 if (IS_ERR(rq)) { 624 if (IS_ERR(rq)) {
630 ret = PTR_ERR(rq); 625 ret = PTR_ERR(rq);
631 rq = NULL; 626 rq = NULL;
@@ -911,12 +906,11 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
911 struct bio *bio, *bidi_bio = NULL; 906 struct bio *bio, *bidi_bio = NULL;
912 struct sg_io_v4 hdr; 907 struct sg_io_v4 hdr;
913 int at_head; 908 int at_head;
914 u8 sense[SCSI_SENSE_BUFFERSIZE];
915 909
916 if (copy_from_user(&hdr, uarg, sizeof(hdr))) 910 if (copy_from_user(&hdr, uarg, sizeof(hdr)))
917 return -EFAULT; 911 return -EFAULT;
918 912
919 rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE, sense); 913 rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE);
920 if (IS_ERR(rq)) 914 if (IS_ERR(rq))
921 return PTR_ERR(rq); 915 return PTR_ERR(rq);
922 916
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 838f07e2b64a..137944777859 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2528,7 +2528,7 @@ static void cfq_remove_request(struct request *rq)
2528 } 2528 }
2529} 2529}
2530 2530
2531static int cfq_merge(struct request_queue *q, struct request **req, 2531static enum elv_merge cfq_merge(struct request_queue *q, struct request **req,
2532 struct bio *bio) 2532 struct bio *bio)
2533{ 2533{
2534 struct cfq_data *cfqd = q->elevator->elevator_data; 2534 struct cfq_data *cfqd = q->elevator->elevator_data;
@@ -2544,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req,
2544} 2544}
2545 2545
2546static void cfq_merged_request(struct request_queue *q, struct request *req, 2546static void cfq_merged_request(struct request_queue *q, struct request *req,
2547 int type) 2547 enum elv_merge type)
2548{ 2548{
2549 if (type == ELEVATOR_FRONT_MERGE) { 2549 if (type == ELEVATOR_FRONT_MERGE) {
2550 struct cfq_queue *cfqq = RQ_CFQQ(req); 2550 struct cfq_queue *cfqq = RQ_CFQQ(req);
@@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
2749 if (!cfqg) 2749 if (!cfqg)
2750 return NULL; 2750 return NULL;
2751 2751
2752 for_each_cfqg_st(cfqg, i, j, st) 2752 for_each_cfqg_st(cfqg, i, j, st) {
2753 if ((cfqq = cfq_rb_first(st)) != NULL) 2753 cfqq = cfq_rb_first(st);
2754 if (cfqq)
2754 return cfqq; 2755 return cfqq;
2756 }
2755 return NULL; 2757 return NULL;
2756} 2758}
2757 2759
@@ -3860,6 +3862,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
3860 goto out; 3862 goto out;
3861 } 3863 }
3862 3864
3865 /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
3866 cfqq->ioprio_class = IOPRIO_CLASS_NONE;
3863 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); 3867 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
3864 cfq_init_prio_data(cfqq, cic); 3868 cfq_init_prio_data(cfqq, cic);
3865 cfq_link_cfqq_cfqg(cfqq, cfqg); 3869 cfq_link_cfqq_cfqg(cfqq, cfqg);
@@ -4838,7 +4842,7 @@ static struct elv_fs_entry cfq_attrs[] = {
4838}; 4842};
4839 4843
4840static struct elevator_type iosched_cfq = { 4844static struct elevator_type iosched_cfq = {
4841 .ops = { 4845 .ops.sq = {
4842 .elevator_merge_fn = cfq_merge, 4846 .elevator_merge_fn = cfq_merge,
4843 .elevator_merged_fn = cfq_merged_request, 4847 .elevator_merged_fn = cfq_merged_request,
4844 .elevator_merge_req_fn = cfq_merged_requests, 4848 .elevator_merge_req_fn = cfq_merged_requests,
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 556826ac7cb4..570021a0dc1c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -661,7 +661,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
661 struct block_device *bdev = inode->i_bdev; 661 struct block_device *bdev = inode->i_bdev;
662 struct gendisk *disk = bdev->bd_disk; 662 struct gendisk *disk = bdev->bd_disk;
663 fmode_t mode = file->f_mode; 663 fmode_t mode = file->f_mode;
664 struct backing_dev_info *bdi;
665 loff_t size; 664 loff_t size;
666 unsigned int max_sectors; 665 unsigned int max_sectors;
667 666
@@ -708,9 +707,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
708 case BLKFRAGET: 707 case BLKFRAGET:
709 if (!arg) 708 if (!arg)
710 return -EINVAL; 709 return -EINVAL;
711 bdi = blk_get_backing_dev_info(bdev);
712 return compat_put_long(arg, 710 return compat_put_long(arg,
713 (bdi->ra_pages * PAGE_SIZE) / 512); 711 (bdev->bd_bdi->ra_pages * PAGE_SIZE) / 512);
714 case BLKROGET: /* compatible */ 712 case BLKROGET: /* compatible */
715 return compat_put_int(arg, bdev_read_only(bdev) != 0); 713 return compat_put_int(arg, bdev_read_only(bdev) != 0);
716 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 714 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
@@ -728,8 +726,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
728 case BLKFRASET: 726 case BLKFRASET:
729 if (!capable(CAP_SYS_ADMIN)) 727 if (!capable(CAP_SYS_ADMIN))
730 return -EACCES; 728 return -EACCES;
731 bdi = blk_get_backing_dev_info(bdev); 729 bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
732 bdi->ra_pages = (arg * 512) / PAGE_SIZE;
733 return 0; 730 return 0;
734 case BLKGETSIZE: 731 case BLKGETSIZE:
735 size = i_size_read(bdev->bd_inode); 732 size = i_size_read(bdev->bd_inode);
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 55e0bb6d7da7..c68f6bbc0dcd 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -120,12 +120,11 @@ static void deadline_remove_request(struct request_queue *q, struct request *rq)
120 deadline_del_rq_rb(dd, rq); 120 deadline_del_rq_rb(dd, rq);
121} 121}
122 122
123static int 123static enum elv_merge
124deadline_merge(struct request_queue *q, struct request **req, struct bio *bio) 124deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
125{ 125{
126 struct deadline_data *dd = q->elevator->elevator_data; 126 struct deadline_data *dd = q->elevator->elevator_data;
127 struct request *__rq; 127 struct request *__rq;
128 int ret;
129 128
130 /* 129 /*
131 * check for front merge 130 * check for front merge
@@ -138,20 +137,17 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
138 BUG_ON(sector != blk_rq_pos(__rq)); 137 BUG_ON(sector != blk_rq_pos(__rq));
139 138
140 if (elv_bio_merge_ok(__rq, bio)) { 139 if (elv_bio_merge_ok(__rq, bio)) {
141 ret = ELEVATOR_FRONT_MERGE; 140 *req = __rq;
142 goto out; 141 return ELEVATOR_FRONT_MERGE;
143 } 142 }
144 } 143 }
145 } 144 }
146 145
147 return ELEVATOR_NO_MERGE; 146 return ELEVATOR_NO_MERGE;
148out:
149 *req = __rq;
150 return ret;
151} 147}
152 148
153static void deadline_merged_request(struct request_queue *q, 149static void deadline_merged_request(struct request_queue *q,
154 struct request *req, int type) 150 struct request *req, enum elv_merge type)
155{ 151{
156 struct deadline_data *dd = q->elevator->elevator_data; 152 struct deadline_data *dd = q->elevator->elevator_data;
157 153
@@ -439,7 +435,7 @@ static struct elv_fs_entry deadline_attrs[] = {
439}; 435};
440 436
441static struct elevator_type iosched_deadline = { 437static struct elevator_type iosched_deadline = {
442 .ops = { 438 .ops.sq = {
443 .elevator_merge_fn = deadline_merge, 439 .elevator_merge_fn = deadline_merge,
444 .elevator_merged_fn = deadline_merged_request, 440 .elevator_merged_fn = deadline_merged_request,
445 .elevator_merge_req_fn = deadline_merged_requests, 441 .elevator_merge_req_fn = deadline_merged_requests,
diff --git a/block/elevator.c b/block/elevator.c
index 40f0c04e5ad3..699d10f71a2c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -40,6 +40,7 @@
40#include <trace/events/block.h> 40#include <trace/events/block.h>
41 41
42#include "blk.h" 42#include "blk.h"
43#include "blk-mq-sched.h"
43 44
44static DEFINE_SPINLOCK(elv_list_lock); 45static DEFINE_SPINLOCK(elv_list_lock);
45static LIST_HEAD(elv_list); 46static LIST_HEAD(elv_list);
@@ -58,8 +59,10 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
58 struct request_queue *q = rq->q; 59 struct request_queue *q = rq->q;
59 struct elevator_queue *e = q->elevator; 60 struct elevator_queue *e = q->elevator;
60 61
61 if (e->type->ops.elevator_allow_bio_merge_fn) 62 if (e->uses_mq && e->type->ops.mq.allow_merge)
62 return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio); 63 return e->type->ops.mq.allow_merge(q, rq, bio);
64 else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
65 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
63 66
64 return 1; 67 return 1;
65} 68}
@@ -163,6 +166,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
163 kobject_init(&eq->kobj, &elv_ktype); 166 kobject_init(&eq->kobj, &elv_ktype);
164 mutex_init(&eq->sysfs_lock); 167 mutex_init(&eq->sysfs_lock);
165 hash_init(eq->hash); 168 hash_init(eq->hash);
169 eq->uses_mq = e->uses_mq;
166 170
167 return eq; 171 return eq;
168} 172}
@@ -203,11 +207,12 @@ int elevator_init(struct request_queue *q, char *name)
203 } 207 }
204 208
205 /* 209 /*
206 * Use the default elevator specified by config boot param or 210 * Use the default elevator specified by config boot param for
207 * config option. Don't try to load modules as we could be running 211 * non-mq devices, or by config option. Don't try to load modules
208 * off async and request_module() isn't allowed from async. 212 * as we could be running off async and request_module() isn't
213 * allowed from async.
209 */ 214 */
210 if (!e && *chosen_elevator) { 215 if (!e && !q->mq_ops && *chosen_elevator) {
211 e = elevator_get(chosen_elevator, false); 216 e = elevator_get(chosen_elevator, false);
212 if (!e) 217 if (!e)
213 printk(KERN_ERR "I/O scheduler %s not found\n", 218 printk(KERN_ERR "I/O scheduler %s not found\n",
@@ -215,18 +220,32 @@ int elevator_init(struct request_queue *q, char *name)
215 } 220 }
216 221
217 if (!e) { 222 if (!e) {
218 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); 223 if (q->mq_ops && q->nr_hw_queues == 1)
224 e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
225 else if (q->mq_ops)
226 e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
227 else
228 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
229
219 if (!e) { 230 if (!e) {
220 printk(KERN_ERR 231 printk(KERN_ERR
221 "Default I/O scheduler not found. " \ 232 "Default I/O scheduler not found. " \
222 "Using noop.\n"); 233 "Using noop/none.\n");
223 e = elevator_get("noop", false); 234 e = elevator_get("noop", false);
224 } 235 }
225 } 236 }
226 237
227 err = e->ops.elevator_init_fn(q, e); 238 if (e->uses_mq) {
228 if (err) 239 err = blk_mq_sched_setup(q);
240 if (!err)
241 err = e->ops.mq.init_sched(q, e);
242 } else
243 err = e->ops.sq.elevator_init_fn(q, e);
244 if (err) {
245 if (e->uses_mq)
246 blk_mq_sched_teardown(q);
229 elevator_put(e); 247 elevator_put(e);
248 }
230 return err; 249 return err;
231} 250}
232EXPORT_SYMBOL(elevator_init); 251EXPORT_SYMBOL(elevator_init);
@@ -234,8 +253,10 @@ EXPORT_SYMBOL(elevator_init);
234void elevator_exit(struct elevator_queue *e) 253void elevator_exit(struct elevator_queue *e)
235{ 254{
236 mutex_lock(&e->sysfs_lock); 255 mutex_lock(&e->sysfs_lock);
237 if (e->type->ops.elevator_exit_fn) 256 if (e->uses_mq && e->type->ops.mq.exit_sched)
238 e->type->ops.elevator_exit_fn(e); 257 e->type->ops.mq.exit_sched(e);
258 else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
259 e->type->ops.sq.elevator_exit_fn(e);
239 mutex_unlock(&e->sysfs_lock); 260 mutex_unlock(&e->sysfs_lock);
240 261
241 kobject_put(&e->kobj); 262 kobject_put(&e->kobj);
@@ -253,6 +274,7 @@ void elv_rqhash_del(struct request_queue *q, struct request *rq)
253 if (ELV_ON_HASH(rq)) 274 if (ELV_ON_HASH(rq))
254 __elv_rqhash_del(rq); 275 __elv_rqhash_del(rq);
255} 276}
277EXPORT_SYMBOL_GPL(elv_rqhash_del);
256 278
257void elv_rqhash_add(struct request_queue *q, struct request *rq) 279void elv_rqhash_add(struct request_queue *q, struct request *rq)
258{ 280{
@@ -262,6 +284,7 @@ void elv_rqhash_add(struct request_queue *q, struct request *rq)
262 hash_add(e->hash, &rq->hash, rq_hash_key(rq)); 284 hash_add(e->hash, &rq->hash, rq_hash_key(rq));
263 rq->rq_flags |= RQF_HASHED; 285 rq->rq_flags |= RQF_HASHED;
264} 286}
287EXPORT_SYMBOL_GPL(elv_rqhash_add);
265 288
266void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 289void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
267{ 290{
@@ -405,11 +428,11 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
405} 428}
406EXPORT_SYMBOL(elv_dispatch_add_tail); 429EXPORT_SYMBOL(elv_dispatch_add_tail);
407 430
408int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) 431enum elv_merge elv_merge(struct request_queue *q, struct request **req,
432 struct bio *bio)
409{ 433{
410 struct elevator_queue *e = q->elevator; 434 struct elevator_queue *e = q->elevator;
411 struct request *__rq; 435 struct request *__rq;
412 int ret;
413 436
414 /* 437 /*
415 * Levels of merges: 438 * Levels of merges:
@@ -424,7 +447,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
424 * First try one-hit cache. 447 * First try one-hit cache.
425 */ 448 */
426 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) { 449 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
427 ret = blk_try_merge(q->last_merge, bio); 450 enum elv_merge ret = blk_try_merge(q->last_merge, bio);
451
428 if (ret != ELEVATOR_NO_MERGE) { 452 if (ret != ELEVATOR_NO_MERGE) {
429 *req = q->last_merge; 453 *req = q->last_merge;
430 return ret; 454 return ret;
@@ -443,8 +467,10 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
443 return ELEVATOR_BACK_MERGE; 467 return ELEVATOR_BACK_MERGE;
444 } 468 }
445 469
446 if (e->type->ops.elevator_merge_fn) 470 if (e->uses_mq && e->type->ops.mq.request_merge)
447 return e->type->ops.elevator_merge_fn(q, req, bio); 471 return e->type->ops.mq.request_merge(q, req, bio);
472 else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
473 return e->type->ops.sq.elevator_merge_fn(q, req, bio);
448 474
449 return ELEVATOR_NO_MERGE; 475 return ELEVATOR_NO_MERGE;
450} 476}
@@ -456,8 +482,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
456 * 482 *
457 * Returns true if we merged, false otherwise 483 * Returns true if we merged, false otherwise
458 */ 484 */
459static bool elv_attempt_insert_merge(struct request_queue *q, 485bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
460 struct request *rq)
461{ 486{
462 struct request *__rq; 487 struct request *__rq;
463 bool ret; 488 bool ret;
@@ -491,12 +516,15 @@ static bool elv_attempt_insert_merge(struct request_queue *q,
491 return ret; 516 return ret;
492} 517}
493 518
494void elv_merged_request(struct request_queue *q, struct request *rq, int type) 519void elv_merged_request(struct request_queue *q, struct request *rq,
520 enum elv_merge type)
495{ 521{
496 struct elevator_queue *e = q->elevator; 522 struct elevator_queue *e = q->elevator;
497 523
498 if (e->type->ops.elevator_merged_fn) 524 if (e->uses_mq && e->type->ops.mq.request_merged)
499 e->type->ops.elevator_merged_fn(q, rq, type); 525 e->type->ops.mq.request_merged(q, rq, type);
526 else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
527 e->type->ops.sq.elevator_merged_fn(q, rq, type);
500 528
501 if (type == ELEVATOR_BACK_MERGE) 529 if (type == ELEVATOR_BACK_MERGE)
502 elv_rqhash_reposition(q, rq); 530 elv_rqhash_reposition(q, rq);
@@ -508,10 +536,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
508 struct request *next) 536 struct request *next)
509{ 537{
510 struct elevator_queue *e = q->elevator; 538 struct elevator_queue *e = q->elevator;
511 const int next_sorted = next->rq_flags & RQF_SORTED; 539 bool next_sorted = false;
512 540
513 if (next_sorted && e->type->ops.elevator_merge_req_fn) 541 if (e->uses_mq && e->type->ops.mq.requests_merged)
514 e->type->ops.elevator_merge_req_fn(q, rq, next); 542 e->type->ops.mq.requests_merged(q, rq, next);
543 else if (e->type->ops.sq.elevator_merge_req_fn) {
544 next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
545 if (next_sorted)
546 e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
547 }
515 548
516 elv_rqhash_reposition(q, rq); 549 elv_rqhash_reposition(q, rq);
517 550
@@ -528,8 +561,11 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
528{ 561{
529 struct elevator_queue *e = q->elevator; 562 struct elevator_queue *e = q->elevator;
530 563
531 if (e->type->ops.elevator_bio_merged_fn) 564 if (WARN_ON_ONCE(e->uses_mq))
532 e->type->ops.elevator_bio_merged_fn(q, rq, bio); 565 return;
566
567 if (e->type->ops.sq.elevator_bio_merged_fn)
568 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
533} 569}
534 570
535#ifdef CONFIG_PM 571#ifdef CONFIG_PM
@@ -574,11 +610,15 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
574 610
575void elv_drain_elevator(struct request_queue *q) 611void elv_drain_elevator(struct request_queue *q)
576{ 612{
613 struct elevator_queue *e = q->elevator;
577 static int printed; 614 static int printed;
578 615
616 if (WARN_ON_ONCE(e->uses_mq))
617 return;
618
579 lockdep_assert_held(q->queue_lock); 619 lockdep_assert_held(q->queue_lock);
580 620
581 while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) 621 while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
582 ; 622 ;
583 if (q->nr_sorted && printed++ < 10) { 623 if (q->nr_sorted && printed++ < 10) {
584 printk(KERN_ERR "%s: forced dispatching is broken " 624 printk(KERN_ERR "%s: forced dispatching is broken "
@@ -597,7 +637,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
597 637
598 if (rq->rq_flags & RQF_SOFTBARRIER) { 638 if (rq->rq_flags & RQF_SOFTBARRIER) {
599 /* barriers are scheduling boundary, update end_sector */ 639 /* barriers are scheduling boundary, update end_sector */
600 if (rq->cmd_type == REQ_TYPE_FS) { 640 if (!blk_rq_is_passthrough(rq)) {
601 q->end_sector = rq_end_sector(rq); 641 q->end_sector = rq_end_sector(rq);
602 q->boundary_rq = rq; 642 q->boundary_rq = rq;
603 } 643 }
@@ -639,7 +679,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
639 if (elv_attempt_insert_merge(q, rq)) 679 if (elv_attempt_insert_merge(q, rq))
640 break; 680 break;
641 case ELEVATOR_INSERT_SORT: 681 case ELEVATOR_INSERT_SORT:
642 BUG_ON(rq->cmd_type != REQ_TYPE_FS); 682 BUG_ON(blk_rq_is_passthrough(rq));
643 rq->rq_flags |= RQF_SORTED; 683 rq->rq_flags |= RQF_SORTED;
644 q->nr_sorted++; 684 q->nr_sorted++;
645 if (rq_mergeable(rq)) { 685 if (rq_mergeable(rq)) {
@@ -653,7 +693,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
653 * rq cannot be accessed after calling 693 * rq cannot be accessed after calling
654 * elevator_add_req_fn. 694 * elevator_add_req_fn.
655 */ 695 */
656 q->elevator->type->ops.elevator_add_req_fn(q, rq); 696 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
657 break; 697 break;
658 698
659 case ELEVATOR_INSERT_FLUSH: 699 case ELEVATOR_INSERT_FLUSH:
@@ -682,8 +722,11 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
682{ 722{
683 struct elevator_queue *e = q->elevator; 723 struct elevator_queue *e = q->elevator;
684 724
685 if (e->type->ops.elevator_latter_req_fn) 725 if (e->uses_mq && e->type->ops.mq.next_request)
686 return e->type->ops.elevator_latter_req_fn(q, rq); 726 return e->type->ops.mq.next_request(q, rq);
727 else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
728 return e->type->ops.sq.elevator_latter_req_fn(q, rq);
729
687 return NULL; 730 return NULL;
688} 731}
689 732
@@ -691,8 +734,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
691{ 734{
692 struct elevator_queue *e = q->elevator; 735 struct elevator_queue *e = q->elevator;
693 736
694 if (e->type->ops.elevator_former_req_fn) 737 if (e->uses_mq && e->type->ops.mq.former_request)
695 return e->type->ops.elevator_former_req_fn(q, rq); 738 return e->type->ops.mq.former_request(q, rq);
739 if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
740 return e->type->ops.sq.elevator_former_req_fn(q, rq);
696 return NULL; 741 return NULL;
697} 742}
698 743
@@ -701,8 +746,11 @@ int elv_set_request(struct request_queue *q, struct request *rq,
701{ 746{
702 struct elevator_queue *e = q->elevator; 747 struct elevator_queue *e = q->elevator;
703 748
704 if (e->type->ops.elevator_set_req_fn) 749 if (WARN_ON_ONCE(e->uses_mq))
705 return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask); 750 return 0;
751
752 if (e->type->ops.sq.elevator_set_req_fn)
753 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
706 return 0; 754 return 0;
707} 755}
708 756
@@ -710,16 +758,22 @@ void elv_put_request(struct request_queue *q, struct request *rq)
710{ 758{
711 struct elevator_queue *e = q->elevator; 759 struct elevator_queue *e = q->elevator;
712 760
713 if (e->type->ops.elevator_put_req_fn) 761 if (WARN_ON_ONCE(e->uses_mq))
714 e->type->ops.elevator_put_req_fn(rq); 762 return;
763
764 if (e->type->ops.sq.elevator_put_req_fn)
765 e->type->ops.sq.elevator_put_req_fn(rq);
715} 766}
716 767
717int elv_may_queue(struct request_queue *q, unsigned int op) 768int elv_may_queue(struct request_queue *q, unsigned int op)
718{ 769{
719 struct elevator_queue *e = q->elevator; 770 struct elevator_queue *e = q->elevator;
720 771
721 if (e->type->ops.elevator_may_queue_fn) 772 if (WARN_ON_ONCE(e->uses_mq))
722 return e->type->ops.elevator_may_queue_fn(q, op); 773 return 0;
774
775 if (e->type->ops.sq.elevator_may_queue_fn)
776 return e->type->ops.sq.elevator_may_queue_fn(q, op);
723 777
724 return ELV_MQUEUE_MAY; 778 return ELV_MQUEUE_MAY;
725} 779}
@@ -728,14 +782,17 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
728{ 782{
729 struct elevator_queue *e = q->elevator; 783 struct elevator_queue *e = q->elevator;
730 784
785 if (WARN_ON_ONCE(e->uses_mq))
786 return;
787
731 /* 788 /*
732 * request is released from the driver, io must be done 789 * request is released from the driver, io must be done
733 */ 790 */
734 if (blk_account_rq(rq)) { 791 if (blk_account_rq(rq)) {
735 q->in_flight[rq_is_sync(rq)]--; 792 q->in_flight[rq_is_sync(rq)]--;
736 if ((rq->rq_flags & RQF_SORTED) && 793 if ((rq->rq_flags & RQF_SORTED) &&
737 e->type->ops.elevator_completed_req_fn) 794 e->type->ops.sq.elevator_completed_req_fn)
738 e->type->ops.elevator_completed_req_fn(q, rq); 795 e->type->ops.sq.elevator_completed_req_fn(q, rq);
739 } 796 }
740} 797}
741 798
@@ -803,8 +860,8 @@ int elv_register_queue(struct request_queue *q)
803 } 860 }
804 kobject_uevent(&e->kobj, KOBJ_ADD); 861 kobject_uevent(&e->kobj, KOBJ_ADD);
805 e->registered = 1; 862 e->registered = 1;
806 if (e->type->ops.elevator_registered_fn) 863 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
807 e->type->ops.elevator_registered_fn(q); 864 e->type->ops.sq.elevator_registered_fn(q);
808 } 865 }
809 return error; 866 return error;
810} 867}
@@ -891,9 +948,14 @@ EXPORT_SYMBOL_GPL(elv_unregister);
891static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 948static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
892{ 949{
893 struct elevator_queue *old = q->elevator; 950 struct elevator_queue *old = q->elevator;
894 bool registered = old->registered; 951 bool old_registered = false;
895 int err; 952 int err;
896 953
954 if (q->mq_ops) {
955 blk_mq_freeze_queue(q);
956 blk_mq_quiesce_queue(q);
957 }
958
897 /* 959 /*
898 * Turn on BYPASS and drain all requests w/ elevator private data. 960 * Turn on BYPASS and drain all requests w/ elevator private data.
899 * Block layer doesn't call into a quiesced elevator - all requests 961 * Block layer doesn't call into a quiesced elevator - all requests
@@ -901,42 +963,76 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
901 * using INSERT_BACK. All requests have SOFTBARRIER set and no 963 * using INSERT_BACK. All requests have SOFTBARRIER set and no
902 * merge happens either. 964 * merge happens either.
903 */ 965 */
904 blk_queue_bypass_start(q); 966 if (old) {
967 old_registered = old->registered;
968
969 if (old->uses_mq)
970 blk_mq_sched_teardown(q);
971
972 if (!q->mq_ops)
973 blk_queue_bypass_start(q);
905 974
906 /* unregister and clear all auxiliary data of the old elevator */ 975 /* unregister and clear all auxiliary data of the old elevator */
907 if (registered) 976 if (old_registered)
908 elv_unregister_queue(q); 977 elv_unregister_queue(q);
909 978
910 spin_lock_irq(q->queue_lock); 979 spin_lock_irq(q->queue_lock);
911 ioc_clear_queue(q); 980 ioc_clear_queue(q);
912 spin_unlock_irq(q->queue_lock); 981 spin_unlock_irq(q->queue_lock);
982 }
913 983
914 /* allocate, init and register new elevator */ 984 /* allocate, init and register new elevator */
915 err = new_e->ops.elevator_init_fn(q, new_e); 985 if (new_e) {
916 if (err) 986 if (new_e->uses_mq) {
917 goto fail_init; 987 err = blk_mq_sched_setup(q);
988 if (!err)
989 err = new_e->ops.mq.init_sched(q, new_e);
990 } else
991 err = new_e->ops.sq.elevator_init_fn(q, new_e);
992 if (err)
993 goto fail_init;
918 994
919 if (registered) {
920 err = elv_register_queue(q); 995 err = elv_register_queue(q);
921 if (err) 996 if (err)
922 goto fail_register; 997 goto fail_register;
923 } 998 } else
999 q->elevator = NULL;
924 1000
925 /* done, kill the old one and finish */ 1001 /* done, kill the old one and finish */
926 elevator_exit(old); 1002 if (old) {
927 blk_queue_bypass_end(q); 1003 elevator_exit(old);
1004 if (!q->mq_ops)
1005 blk_queue_bypass_end(q);
1006 }
928 1007
929 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 1008 if (q->mq_ops) {
1009 blk_mq_unfreeze_queue(q);
1010 blk_mq_start_stopped_hw_queues(q, true);
1011 }
1012
1013 if (new_e)
1014 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1015 else
1016 blk_add_trace_msg(q, "elv switch: none");
930 1017
931 return 0; 1018 return 0;
932 1019
933fail_register: 1020fail_register:
1021 if (q->mq_ops)
1022 blk_mq_sched_teardown(q);
934 elevator_exit(q->elevator); 1023 elevator_exit(q->elevator);
935fail_init: 1024fail_init:
936 /* switch failed, restore and re-register old elevator */ 1025 /* switch failed, restore and re-register old elevator */
937 q->elevator = old; 1026 if (old) {
938 elv_register_queue(q); 1027 q->elevator = old;
939 blk_queue_bypass_end(q); 1028 elv_register_queue(q);
1029 if (!q->mq_ops)
1030 blk_queue_bypass_end(q);
1031 }
1032 if (q->mq_ops) {
1033 blk_mq_unfreeze_queue(q);
1034 blk_mq_start_stopped_hw_queues(q, true);
1035 }
940 1036
941 return err; 1037 return err;
942} 1038}
@@ -949,8 +1045,11 @@ static int __elevator_change(struct request_queue *q, const char *name)
949 char elevator_name[ELV_NAME_MAX]; 1045 char elevator_name[ELV_NAME_MAX];
950 struct elevator_type *e; 1046 struct elevator_type *e;
951 1047
952 if (!q->elevator) 1048 /*
953 return -ENXIO; 1049 * Special case for mq, turn off scheduling
1050 */
1051 if (q->mq_ops && !strncmp(name, "none", 4))
1052 return elevator_switch(q, NULL);
954 1053
955 strlcpy(elevator_name, name, sizeof(elevator_name)); 1054 strlcpy(elevator_name, name, sizeof(elevator_name));
956 e = elevator_get(strstrip(elevator_name), true); 1055 e = elevator_get(strstrip(elevator_name), true);
@@ -959,11 +1058,21 @@ static int __elevator_change(struct request_queue *q, const char *name)
959 return -EINVAL; 1058 return -EINVAL;
960 } 1059 }
961 1060
962 if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { 1061 if (q->elevator &&
1062 !strcmp(elevator_name, q->elevator->type->elevator_name)) {
963 elevator_put(e); 1063 elevator_put(e);
964 return 0; 1064 return 0;
965 } 1065 }
966 1066
1067 if (!e->uses_mq && q->mq_ops) {
1068 elevator_put(e);
1069 return -EINVAL;
1070 }
1071 if (e->uses_mq && !q->mq_ops) {
1072 elevator_put(e);
1073 return -EINVAL;
1074 }
1075
967 return elevator_switch(q, e); 1076 return elevator_switch(q, e);
968} 1077}
969 1078
@@ -985,7 +1094,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
985{ 1094{
986 int ret; 1095 int ret;
987 1096
988 if (!q->elevator) 1097 if (!(q->mq_ops || q->request_fn))
989 return count; 1098 return count;
990 1099
991 ret = __elevator_change(q, name); 1100 ret = __elevator_change(q, name);
@@ -999,24 +1108,34 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
999ssize_t elv_iosched_show(struct request_queue *q, char *name) 1108ssize_t elv_iosched_show(struct request_queue *q, char *name)
1000{ 1109{
1001 struct elevator_queue *e = q->elevator; 1110 struct elevator_queue *e = q->elevator;
1002 struct elevator_type *elv; 1111 struct elevator_type *elv = NULL;
1003 struct elevator_type *__e; 1112 struct elevator_type *__e;
1004 int len = 0; 1113 int len = 0;
1005 1114
1006 if (!q->elevator || !blk_queue_stackable(q)) 1115 if (!blk_queue_stackable(q))
1007 return sprintf(name, "none\n"); 1116 return sprintf(name, "none\n");
1008 1117
1009 elv = e->type; 1118 if (!q->elevator)
1119 len += sprintf(name+len, "[none] ");
1120 else
1121 elv = e->type;
1010 1122
1011 spin_lock(&elv_list_lock); 1123 spin_lock(&elv_list_lock);
1012 list_for_each_entry(__e, &elv_list, list) { 1124 list_for_each_entry(__e, &elv_list, list) {
1013 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1125 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
1014 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1126 len += sprintf(name+len, "[%s] ", elv->elevator_name);
1015 else 1127 continue;
1128 }
1129 if (__e->uses_mq && q->mq_ops)
1130 len += sprintf(name+len, "%s ", __e->elevator_name);
1131 else if (!__e->uses_mq && !q->mq_ops)
1016 len += sprintf(name+len, "%s ", __e->elevator_name); 1132 len += sprintf(name+len, "%s ", __e->elevator_name);
1017 } 1133 }
1018 spin_unlock(&elv_list_lock); 1134 spin_unlock(&elv_list_lock);
1019 1135
1136 if (q->mq_ops && q->elevator)
1137 len += sprintf(name+len, "none");
1138
1020 len += sprintf(len+name, "\n"); 1139 len += sprintf(len+name, "\n");
1021 return len; 1140 return len;
1022} 1141}
diff --git a/block/genhd.c b/block/genhd.c
index fcd6d4fae657..3631cd480295 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -572,6 +572,20 @@ exit:
572 disk_part_iter_exit(&piter); 572 disk_part_iter_exit(&piter);
573} 573}
574 574
575void put_disk_devt(struct disk_devt *disk_devt)
576{
577 if (disk_devt && atomic_dec_and_test(&disk_devt->count))
578 disk_devt->release(disk_devt);
579}
580EXPORT_SYMBOL(put_disk_devt);
581
582void get_disk_devt(struct disk_devt *disk_devt)
583{
584 if (disk_devt)
585 atomic_inc(&disk_devt->count);
586}
587EXPORT_SYMBOL(get_disk_devt);
588
575/** 589/**
576 * device_add_disk - add partitioning information to kernel list 590 * device_add_disk - add partitioning information to kernel list
577 * @parent: parent device for the disk 591 * @parent: parent device for the disk
@@ -612,8 +626,15 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
612 626
613 disk_alloc_events(disk); 627 disk_alloc_events(disk);
614 628
629 /*
630 * Take a reference on the devt and assign it to queue since it
631 * must not be reallocated while the bdi is registered
632 */
633 disk->queue->disk_devt = disk->disk_devt;
634 get_disk_devt(disk->disk_devt);
635
615 /* Register BDI before referencing it from bdev */ 636 /* Register BDI before referencing it from bdev */
616 bdi = &disk->queue->backing_dev_info; 637 bdi = disk->queue->backing_dev_info;
617 bdi_register_owner(bdi, disk_to_dev(disk)); 638 bdi_register_owner(bdi, disk_to_dev(disk));
618 639
619 blk_register_region(disk_devt(disk), disk->minors, NULL, 640 blk_register_region(disk_devt(disk), disk->minors, NULL,
@@ -648,6 +669,8 @@ void del_gendisk(struct gendisk *disk)
648 disk_part_iter_init(&piter, disk, 669 disk_part_iter_init(&piter, disk,
649 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 670 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
650 while ((part = disk_part_iter_next(&piter))) { 671 while ((part = disk_part_iter_next(&piter))) {
672 bdev_unhash_inode(MKDEV(disk->major,
673 disk->first_minor + part->partno));
651 invalidate_partition(disk, part->partno); 674 invalidate_partition(disk, part->partno);
652 delete_partition(disk, part->partno); 675 delete_partition(disk, part->partno);
653 } 676 }
diff --git a/block/ioctl.c b/block/ioctl.c
index be7f4de3eb3c..7b88820b93d9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -505,7 +505,6 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
505int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, 505int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
506 unsigned long arg) 506 unsigned long arg)
507{ 507{
508 struct backing_dev_info *bdi;
509 void __user *argp = (void __user *)arg; 508 void __user *argp = (void __user *)arg;
510 loff_t size; 509 loff_t size;
511 unsigned int max_sectors; 510 unsigned int max_sectors;
@@ -532,8 +531,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
532 case BLKFRAGET: 531 case BLKFRAGET:
533 if (!arg) 532 if (!arg)
534 return -EINVAL; 533 return -EINVAL;
535 bdi = blk_get_backing_dev_info(bdev); 534 return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
536 return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512);
537 case BLKROGET: 535 case BLKROGET:
538 return put_int(arg, bdev_read_only(bdev) != 0); 536 return put_int(arg, bdev_read_only(bdev) != 0);
539 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 537 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
@@ -560,8 +558,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
560 case BLKFRASET: 558 case BLKFRASET:
561 if(!capable(CAP_SYS_ADMIN)) 559 if(!capable(CAP_SYS_ADMIN))
562 return -EACCES; 560 return -EACCES;
563 bdi = blk_get_backing_dev_info(bdev); 561 bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
564 bdi->ra_pages = (arg * 512) / PAGE_SIZE;
565 return 0; 562 return 0;
566 case BLKBSZSET: 563 case BLKBSZSET:
567 return blkdev_bszset(bdev, mode, argp); 564 return blkdev_bszset(bdev, mode, argp);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
new file mode 100644
index 000000000000..236121633ca0
--- /dev/null
+++ b/block/mq-deadline.c
@@ -0,0 +1,556 @@
1/*
2 * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
3 * for the blk-mq scheduling framework
4 *
5 * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
6 */
7#include <linux/kernel.h>
8#include <linux/fs.h>
9#include <linux/blkdev.h>
10#include <linux/blk-mq.h>
11#include <linux/elevator.h>
12#include <linux/bio.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/init.h>
16#include <linux/compiler.h>
17#include <linux/rbtree.h>
18#include <linux/sbitmap.h>
19
20#include "blk.h"
21#include "blk-mq.h"
22#include "blk-mq-tag.h"
23#include "blk-mq-sched.h"
24
25/*
26 * See Documentation/block/deadline-iosched.txt
27 */
28static const int read_expire = HZ / 2; /* max time before a read is submitted. */
29static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
30static const int writes_starved = 2; /* max times reads can starve a write */
31static const int fifo_batch = 16; /* # of sequential requests treated as one
32 by the above parameters. For throughput. */
33
34struct deadline_data {
35 /*
36 * run time data
37 */
38
39 /*
40 * requests (deadline_rq s) are present on both sort_list and fifo_list
41 */
42 struct rb_root sort_list[2];
43 struct list_head fifo_list[2];
44
45 /*
46 * next in sort order. read, write or both are NULL
47 */
48 struct request *next_rq[2];
49 unsigned int batching; /* number of sequential requests made */
50 unsigned int starved; /* times reads have starved writes */
51
52 /*
53 * settings that change how the i/o scheduler behaves
54 */
55 int fifo_expire[2];
56 int fifo_batch;
57 int writes_starved;
58 int front_merges;
59
60 spinlock_t lock;
61 struct list_head dispatch;
62};
63
64static inline struct rb_root *
65deadline_rb_root(struct deadline_data *dd, struct request *rq)
66{
67 return &dd->sort_list[rq_data_dir(rq)];
68}
69
70/*
71 * get the request after `rq' in sector-sorted order
72 */
73static inline struct request *
74deadline_latter_request(struct request *rq)
75{
76 struct rb_node *node = rb_next(&rq->rb_node);
77
78 if (node)
79 return rb_entry_rq(node);
80
81 return NULL;
82}
83
84static void
85deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
86{
87 struct rb_root *root = deadline_rb_root(dd, rq);
88
89 elv_rb_add(root, rq);
90}
91
92static inline void
93deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
94{
95 const int data_dir = rq_data_dir(rq);
96
97 if (dd->next_rq[data_dir] == rq)
98 dd->next_rq[data_dir] = deadline_latter_request(rq);
99
100 elv_rb_del(deadline_rb_root(dd, rq), rq);
101}
102
103/*
104 * remove rq from rbtree and fifo.
105 */
106static void deadline_remove_request(struct request_queue *q, struct request *rq)
107{
108 struct deadline_data *dd = q->elevator->elevator_data;
109
110 list_del_init(&rq->queuelist);
111
112 /*
113 * We might not be on the rbtree, if we are doing an insert merge
114 */
115 if (!RB_EMPTY_NODE(&rq->rb_node))
116 deadline_del_rq_rb(dd, rq);
117
118 elv_rqhash_del(q, rq);
119 if (q->last_merge == rq)
120 q->last_merge = NULL;
121}
122
123static void dd_request_merged(struct request_queue *q, struct request *req,
124 enum elv_merge type)
125{
126 struct deadline_data *dd = q->elevator->elevator_data;
127
128 /*
129 * if the merge was a front merge, we need to reposition request
130 */
131 if (type == ELEVATOR_FRONT_MERGE) {
132 elv_rb_del(deadline_rb_root(dd, req), req);
133 deadline_add_rq_rb(dd, req);
134 }
135}
136
137static void dd_merged_requests(struct request_queue *q, struct request *req,
138 struct request *next)
139{
140 /*
141 * if next expires before rq, assign its expire time to rq
142 * and move into next position (next will be deleted) in fifo
143 */
144 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
145 if (time_before((unsigned long)next->fifo_time,
146 (unsigned long)req->fifo_time)) {
147 list_move(&req->queuelist, &next->queuelist);
148 req->fifo_time = next->fifo_time;
149 }
150 }
151
152 /*
153 * kill knowledge of next, this one is a goner
154 */
155 deadline_remove_request(q, next);
156}
157
158/*
159 * move an entry to dispatch queue
160 */
161static void
162deadline_move_request(struct deadline_data *dd, struct request *rq)
163{
164 const int data_dir = rq_data_dir(rq);
165
166 dd->next_rq[READ] = NULL;
167 dd->next_rq[WRITE] = NULL;
168 dd->next_rq[data_dir] = deadline_latter_request(rq);
169
170 /*
171 * take it off the sort and fifo list
172 */
173 deadline_remove_request(rq->q, rq);
174}
175
176/*
177 * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
178 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
179 */
180static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
181{
182 struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
183
184 /*
185 * rq is expired!
186 */
187 if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
188 return 1;
189
190 return 0;
191}
192
193/*
194 * deadline_dispatch_requests selects the best request according to
195 * read/write expire, fifo_batch, etc
196 */
197static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
198{
199 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
200 struct request *rq;
201 bool reads, writes;
202 int data_dir;
203
204 if (!list_empty(&dd->dispatch)) {
205 rq = list_first_entry(&dd->dispatch, struct request, queuelist);
206 list_del_init(&rq->queuelist);
207 goto done;
208 }
209
210 reads = !list_empty(&dd->fifo_list[READ]);
211 writes = !list_empty(&dd->fifo_list[WRITE]);
212
213 /*
214 * batches are currently reads XOR writes
215 */
216 if (dd->next_rq[WRITE])
217 rq = dd->next_rq[WRITE];
218 else
219 rq = dd->next_rq[READ];
220
221 if (rq && dd->batching < dd->fifo_batch)
222 /* we have a next request are still entitled to batch */
223 goto dispatch_request;
224
225 /*
226 * at this point we are not running a batch. select the appropriate
227 * data direction (read / write)
228 */
229
230 if (reads) {
231 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
232
233 if (writes && (dd->starved++ >= dd->writes_starved))
234 goto dispatch_writes;
235
236 data_dir = READ;
237
238 goto dispatch_find_request;
239 }
240
241 /*
242 * there are either no reads or writes have been starved
243 */
244
245 if (writes) {
246dispatch_writes:
247 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
248
249 dd->starved = 0;
250
251 data_dir = WRITE;
252
253 goto dispatch_find_request;
254 }
255
256 return NULL;
257
258dispatch_find_request:
259 /*
260 * we are not running a batch, find best request for selected data_dir
261 */
262 if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
263 /*
264 * A deadline has expired, the last request was in the other
265 * direction, or we have run out of higher-sectored requests.
266 * Start again from the request with the earliest expiry time.
267 */
268 rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
269 } else {
270 /*
271 * The last req was the same dir and we have a next request in
272 * sort order. No expired requests so continue on from here.
273 */
274 rq = dd->next_rq[data_dir];
275 }
276
277 dd->batching = 0;
278
279dispatch_request:
280 /*
281 * rq is the selected appropriate request.
282 */
283 dd->batching++;
284 deadline_move_request(dd, rq);
285done:
286 rq->rq_flags |= RQF_STARTED;
287 return rq;
288}
289
290static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
291{
292 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
293 struct request *rq;
294
295 spin_lock(&dd->lock);
296 rq = __dd_dispatch_request(hctx);
297 spin_unlock(&dd->lock);
298
299 return rq;
300}
301
302static void dd_exit_queue(struct elevator_queue *e)
303{
304 struct deadline_data *dd = e->elevator_data;
305
306 BUG_ON(!list_empty(&dd->fifo_list[READ]));
307 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
308
309 kfree(dd);
310}
311
312/*
313 * initialize elevator private data (deadline_data).
314 */
315static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
316{
317 struct deadline_data *dd;
318 struct elevator_queue *eq;
319
320 eq = elevator_alloc(q, e);
321 if (!eq)
322 return -ENOMEM;
323
324 dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
325 if (!dd) {
326 kobject_put(&eq->kobj);
327 return -ENOMEM;
328 }
329 eq->elevator_data = dd;
330
331 INIT_LIST_HEAD(&dd->fifo_list[READ]);
332 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
333 dd->sort_list[READ] = RB_ROOT;
334 dd->sort_list[WRITE] = RB_ROOT;
335 dd->fifo_expire[READ] = read_expire;
336 dd->fifo_expire[WRITE] = write_expire;
337 dd->writes_starved = writes_starved;
338 dd->front_merges = 1;
339 dd->fifo_batch = fifo_batch;
340 spin_lock_init(&dd->lock);
341 INIT_LIST_HEAD(&dd->dispatch);
342
343 q->elevator = eq;
344 return 0;
345}
346
347static int dd_request_merge(struct request_queue *q, struct request **rq,
348 struct bio *bio)
349{
350 struct deadline_data *dd = q->elevator->elevator_data;
351 sector_t sector = bio_end_sector(bio);
352 struct request *__rq;
353
354 if (!dd->front_merges)
355 return ELEVATOR_NO_MERGE;
356
357 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
358 if (__rq) {
359 BUG_ON(sector != blk_rq_pos(__rq));
360
361 if (elv_bio_merge_ok(__rq, bio)) {
362 *rq = __rq;
363 return ELEVATOR_FRONT_MERGE;
364 }
365 }
366
367 return ELEVATOR_NO_MERGE;
368}
369
370static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
371{
372 struct request_queue *q = hctx->queue;
373 struct deadline_data *dd = q->elevator->elevator_data;
374 struct request *free = NULL;
375 bool ret;
376
377 spin_lock(&dd->lock);
378 ret = blk_mq_sched_try_merge(q, bio, &free);
379 spin_unlock(&dd->lock);
380
381 if (free)
382 blk_mq_free_request(free);
383
384 return ret;
385}
386
387/*
388 * add rq to rbtree and fifo
389 */
390static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
391 bool at_head)
392{
393 struct request_queue *q = hctx->queue;
394 struct deadline_data *dd = q->elevator->elevator_data;
395 const int data_dir = rq_data_dir(rq);
396
397 if (blk_mq_sched_try_insert_merge(q, rq))
398 return;
399
400 blk_mq_sched_request_inserted(rq);
401
402 if (at_head || blk_rq_is_passthrough(rq)) {
403 if (at_head)
404 list_add(&rq->queuelist, &dd->dispatch);
405 else
406 list_add_tail(&rq->queuelist, &dd->dispatch);
407 } else {
408 deadline_add_rq_rb(dd, rq);
409
410 if (rq_mergeable(rq)) {
411 elv_rqhash_add(q, rq);
412 if (!q->last_merge)
413 q->last_merge = rq;
414 }
415
416 /*
417 * set expire time and add to fifo list
418 */
419 rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
420 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
421 }
422}
423
424static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
425 struct list_head *list, bool at_head)
426{
427 struct request_queue *q = hctx->queue;
428 struct deadline_data *dd = q->elevator->elevator_data;
429
430 spin_lock(&dd->lock);
431 while (!list_empty(list)) {
432 struct request *rq;
433
434 rq = list_first_entry(list, struct request, queuelist);
435 list_del_init(&rq->queuelist);
436 dd_insert_request(hctx, rq, at_head);
437 }
438 spin_unlock(&dd->lock);
439}
440
441static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
442{
443 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
444
445 return !list_empty_careful(&dd->dispatch) ||
446 !list_empty_careful(&dd->fifo_list[0]) ||
447 !list_empty_careful(&dd->fifo_list[1]);
448}
449
450/*
451 * sysfs parts below
452 */
453static ssize_t
454deadline_var_show(int var, char *page)
455{
456 return sprintf(page, "%d\n", var);
457}
458
459static ssize_t
460deadline_var_store(int *var, const char *page, size_t count)
461{
462 char *p = (char *) page;
463
464 *var = simple_strtol(p, &p, 10);
465 return count;
466}
467
468#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
469static ssize_t __FUNC(struct elevator_queue *e, char *page) \
470{ \
471 struct deadline_data *dd = e->elevator_data; \
472 int __data = __VAR; \
473 if (__CONV) \
474 __data = jiffies_to_msecs(__data); \
475 return deadline_var_show(__data, (page)); \
476}
477SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
478SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
479SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
480SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
481SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
482#undef SHOW_FUNCTION
483
484#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
485static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
486{ \
487 struct deadline_data *dd = e->elevator_data; \
488 int __data; \
489 int ret = deadline_var_store(&__data, (page), count); \
490 if (__data < (MIN)) \
491 __data = (MIN); \
492 else if (__data > (MAX)) \
493 __data = (MAX); \
494 if (__CONV) \
495 *(__PTR) = msecs_to_jiffies(__data); \
496 else \
497 *(__PTR) = __data; \
498 return ret; \
499}
500STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
501STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
502STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
503STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
504STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
505#undef STORE_FUNCTION
506
507#define DD_ATTR(name) \
508 __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
509 deadline_##name##_store)
510
511static struct elv_fs_entry deadline_attrs[] = {
512 DD_ATTR(read_expire),
513 DD_ATTR(write_expire),
514 DD_ATTR(writes_starved),
515 DD_ATTR(front_merges),
516 DD_ATTR(fifo_batch),
517 __ATTR_NULL
518};
519
520static struct elevator_type mq_deadline = {
521 .ops.mq = {
522 .insert_requests = dd_insert_requests,
523 .dispatch_request = dd_dispatch_request,
524 .next_request = elv_rb_latter_request,
525 .former_request = elv_rb_former_request,
526 .bio_merge = dd_bio_merge,
527 .request_merge = dd_request_merge,
528 .requests_merged = dd_merged_requests,
529 .request_merged = dd_request_merged,
530 .has_work = dd_has_work,
531 .init_sched = dd_init_queue,
532 .exit_sched = dd_exit_queue,
533 },
534
535 .uses_mq = true,
536 .elevator_attrs = deadline_attrs,
537 .elevator_name = "mq-deadline",
538 .elevator_owner = THIS_MODULE,
539};
540
541static int __init deadline_init(void)
542{
543 return elv_register(&mq_deadline);
544}
545
546static void __exit deadline_exit(void)
547{
548 elv_unregister(&mq_deadline);
549}
550
551module_init(deadline_init);
552module_exit(deadline_exit);
553
554MODULE_AUTHOR("Jens Axboe");
555MODULE_LICENSE("GPL");
556MODULE_DESCRIPTION("MQ deadline IO scheduler");
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index a163c487cf38..2d1b15d89b45 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e)
92} 92}
93 93
94static struct elevator_type elevator_noop = { 94static struct elevator_type elevator_noop = {
95 .ops = { 95 .ops.sq = {
96 .elevator_merge_req_fn = noop_merged_requests, 96 .elevator_merge_req_fn = noop_merged_requests,
97 .elevator_dispatch_fn = noop_dispatch, 97 .elevator_dispatch_fn = noop_dispatch,
98 .elevator_add_req_fn = noop_add_request, 98 .elevator_add_req_fn = noop_add_request,
diff --git a/block/opal_proto.h b/block/opal_proto.h
new file mode 100644
index 000000000000..f40c9acf8895
--- /dev/null
+++ b/block/opal_proto.h
@@ -0,0 +1,452 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Rafael Antognolli <rafael.antognolli@intel.com>
6 * Scott Bauer <scott.bauer@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17#include <linux/types.h>
18
19#ifndef _OPAL_PROTO_H
20#define _OPAL_PROTO_H
21
22/*
23 * These constant values come from:
24 * SPC-4 section
25 * 6.30 SECURITY PROTOCOL IN command / table 265.
26 */
27enum {
28 TCG_SECP_00 = 0,
29 TCG_SECP_01,
30};
31
32/*
33 * Token defs derived from:
34 * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
35 * 3.2.2 Data Stream Encoding
36 */
37enum opal_response_token {
38 OPAL_DTA_TOKENID_BYTESTRING = 0xe0,
39 OPAL_DTA_TOKENID_SINT = 0xe1,
40 OPAL_DTA_TOKENID_UINT = 0xe2,
41 OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */
42 OPAL_DTA_TOKENID_INVALID = 0X0
43};
44
45#define DTAERROR_NO_METHOD_STATUS 0x89
46#define GENERIC_HOST_SESSION_NUM 0x41
47
48#define TPER_SYNC_SUPPORTED 0x01
49
50#define TINY_ATOM_DATA_MASK 0x3F
51#define TINY_ATOM_SIGNED 0x40
52
53#define SHORT_ATOM_ID 0x80
54#define SHORT_ATOM_BYTESTRING 0x20
55#define SHORT_ATOM_SIGNED 0x10
56#define SHORT_ATOM_LEN_MASK 0xF
57
58#define MEDIUM_ATOM_ID 0xC0
59#define MEDIUM_ATOM_BYTESTRING 0x10
60#define MEDIUM_ATOM_SIGNED 0x8
61#define MEDIUM_ATOM_LEN_MASK 0x7
62
63#define LONG_ATOM_ID 0xe0
64#define LONG_ATOM_BYTESTRING 0x2
65#define LONG_ATOM_SIGNED 0x1
66
67/* Derived from TCG Core spec 2.01 Section:
68 * 3.2.2.1
69 * Data Type
70 */
71#define TINY_ATOM_BYTE 0x7F
72#define SHORT_ATOM_BYTE 0xBF
73#define MEDIUM_ATOM_BYTE 0xDF
74#define LONG_ATOM_BYTE 0xE3
75
76#define OPAL_INVAL_PARAM 12
77#define OPAL_MANUFACTURED_INACTIVE 0x08
78#define OPAL_DISCOVERY_COMID 0x0001
79
80#define LOCKING_RANGE_NON_GLOBAL 0x03
81/*
82 * User IDs used in the TCG storage SSCs
83 * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
84 * Section: 6.3 Assigned UIDs
85 */
86#define OPAL_UID_LENGTH 8
87#define OPAL_METHOD_LENGTH 8
88#define OPAL_MSID_KEYLEN 15
89#define OPAL_UID_LENGTH_HALF 4
90
91/* Enum to index OPALUID array */
92enum opal_uid {
93 /* users */
94 OPAL_SMUID_UID,
95 OPAL_THISSP_UID,
96 OPAL_ADMINSP_UID,
97 OPAL_LOCKINGSP_UID,
98 OPAL_ENTERPRISE_LOCKINGSP_UID,
99 OPAL_ANYBODY_UID,
100 OPAL_SID_UID,
101 OPAL_ADMIN1_UID,
102 OPAL_USER1_UID,
103 OPAL_USER2_UID,
104 OPAL_PSID_UID,
105 OPAL_ENTERPRISE_BANDMASTER0_UID,
106 OPAL_ENTERPRISE_ERASEMASTER_UID,
107 /* tables */
108 OPAL_LOCKINGRANGE_GLOBAL,
109 OPAL_LOCKINGRANGE_ACE_RDLOCKED,
110 OPAL_LOCKINGRANGE_ACE_WRLOCKED,
111 OPAL_MBRCONTROL,
112 OPAL_MBR,
113 OPAL_AUTHORITY_TABLE,
114 OPAL_C_PIN_TABLE,
115 OPAL_LOCKING_INFO_TABLE,
116 OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
117 /* C_PIN_TABLE object ID's */
118 OPAL_C_PIN_MSID,
119 OPAL_C_PIN_SID,
120 OPAL_C_PIN_ADMIN1,
121 /* half UID's (only first 4 bytes used) */
122 OPAL_HALF_UID_AUTHORITY_OBJ_REF,
123 OPAL_HALF_UID_BOOLEAN_ACE,
124 /* omitted optional parameter */
125 OPAL_UID_HEXFF,
126};
127
128#define OPAL_METHOD_LENGTH 8
129
130/* Enum for indexing the OPALMETHOD array */
131enum opal_method {
132 OPAL_PROPERTIES,
133 OPAL_STARTSESSION,
134 OPAL_REVERT,
135 OPAL_ACTIVATE,
136 OPAL_EGET,
137 OPAL_ESET,
138 OPAL_NEXT,
139 OPAL_EAUTHENTICATE,
140 OPAL_GETACL,
141 OPAL_GENKEY,
142 OPAL_REVERTSP,
143 OPAL_GET,
144 OPAL_SET,
145 OPAL_AUTHENTICATE,
146 OPAL_RANDOM,
147 OPAL_ERASE,
148};
149
150enum opal_token {
151 /* Boolean */
152 OPAL_TRUE = 0x01,
153 OPAL_FALSE = 0x00,
154 OPAL_BOOLEAN_EXPR = 0x03,
155 /* cellblocks */
156 OPAL_TABLE = 0x00,
157 OPAL_STARTROW = 0x01,
158 OPAL_ENDROW = 0x02,
159 OPAL_STARTCOLUMN = 0x03,
160 OPAL_ENDCOLUMN = 0x04,
161 OPAL_VALUES = 0x01,
162 /* authority table */
163 OPAL_PIN = 0x03,
164 /* locking tokens */
165 OPAL_RANGESTART = 0x03,
166 OPAL_RANGELENGTH = 0x04,
167 OPAL_READLOCKENABLED = 0x05,
168 OPAL_WRITELOCKENABLED = 0x06,
169 OPAL_READLOCKED = 0x07,
170 OPAL_WRITELOCKED = 0x08,
171 OPAL_ACTIVEKEY = 0x0A,
172 /* locking info table */
173 OPAL_MAXRANGES = 0x04,
174 /* mbr control */
175 OPAL_MBRENABLE = 0x01,
176 OPAL_MBRDONE = 0x02,
177 /* properties */
178 OPAL_HOSTPROPERTIES = 0x00,
179 /* atoms */
180 OPAL_STARTLIST = 0xf0,
181 OPAL_ENDLIST = 0xf1,
182 OPAL_STARTNAME = 0xf2,
183 OPAL_ENDNAME = 0xf3,
184 OPAL_CALL = 0xf8,
185 OPAL_ENDOFDATA = 0xf9,
186 OPAL_ENDOFSESSION = 0xfa,
187 OPAL_STARTTRANSACTON = 0xfb,
188 OPAL_ENDTRANSACTON = 0xfC,
189 OPAL_EMPTYATOM = 0xff,
190 OPAL_WHERE = 0x00,
191};
192
193/* Locking state for a locking range */
194enum opal_lockingstate {
195 OPAL_LOCKING_READWRITE = 0x01,
196 OPAL_LOCKING_READONLY = 0x02,
197 OPAL_LOCKING_LOCKED = 0x03,
198};
199
200/* Packets derived from:
201 * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
202 * Secion: 3.2.3 ComPackets, Packets & Subpackets
203 */
204
205/* Comm Packet (header) for transmissions. */
206struct opal_compacket {
207 __be32 reserved0;
208 u8 extendedComID[4];
209 __be32 outstandingData;
210 __be32 minTransfer;
211 __be32 length;
212};
213
214/* Packet structure. */
215struct opal_packet {
216 __be32 tsn;
217 __be32 hsn;
218 __be32 seq_number;
219 __be16 reserved0;
220 __be16 ack_type;
221 __be32 acknowledgment;
222 __be32 length;
223};
224
225/* Data sub packet header */
226struct opal_data_subpacket {
227 u8 reserved0[6];
228 __be16 kind;
229 __be32 length;
230};
231
232/* header of a response */
233struct opal_header {
234 struct opal_compacket cp;
235 struct opal_packet pkt;
236 struct opal_data_subpacket subpkt;
237};
238
239#define FC_TPER 0x0001
240#define FC_LOCKING 0x0002
241#define FC_GEOMETRY 0x0003
242#define FC_ENTERPRISE 0x0100
243#define FC_DATASTORE 0x0202
244#define FC_SINGLEUSER 0x0201
245#define FC_OPALV100 0x0200
246#define FC_OPALV200 0x0203
247
248/*
249 * The Discovery 0 Header. As defined in
250 * Opal SSC Documentation
251 * Section: 3.3.5 Capability Discovery
252 */
253struct d0_header {
254 __be32 length; /* the length of the header 48 in 2.00.100 */
255 __be32 revision; /**< revision of the header 1 in 2.00.100 */
256 __be32 reserved01;
257 __be32 reserved02;
258 /*
259 * the remainder of the structure is vendor specific and will not be
260 * addressed now
261 */
262 u8 ignored[32];
263};
264
265/*
266 * TPer Feature Descriptor. Contains flags indicating support for the
267 * TPer features described in the OPAL specification. The names match the
268 * OPAL terminology
269 *
270 * code == 0x001 in 2.00.100
271 */
272struct d0_tper_features {
273 /*
274 * supported_features bits:
275 * bit 7: reserved
276 * bit 6: com ID management
277 * bit 5: reserved
278 * bit 4: streaming support
279 * bit 3: buffer management
280 * bit 2: ACK/NACK
281 * bit 1: async
282 * bit 0: sync
283 */
284 u8 supported_features;
285 /*
286 * bytes 5 through 15 are reserved, but we represent the first 3 as
287 * u8 to keep the other two 32bits integers aligned.
288 */
289 u8 reserved01[3];
290 __be32 reserved02;
291 __be32 reserved03;
292};
293
294/*
295 * Locking Feature Descriptor. Contains flags indicating support for the
296 * locking features described in the OPAL specification. The names match the
297 * OPAL terminology
298 *
299 * code == 0x0002 in 2.00.100
300 */
301struct d0_locking_features {
302 /*
303 * supported_features bits:
304 * bits 6-7: reserved
305 * bit 5: MBR done
306 * bit 4: MBR enabled
307 * bit 3: media encryption
308 * bit 2: locked
309 * bit 1: locking enabled
310 * bit 0: locking supported
311 */
312 u8 supported_features;
313 /*
314 * bytes 5 through 15 are reserved, but we represent the first 3 as
315 * u8 to keep the other two 32bits integers aligned.
316 */
317 u8 reserved01[3];
318 __be32 reserved02;
319 __be32 reserved03;
320};
321
322/*
323 * Geometry Feature Descriptor. Contains flags indicating support for the
324 * geometry features described in the OPAL specification. The names match the
325 * OPAL terminology
326 *
327 * code == 0x0003 in 2.00.100
328 */
329struct d0_geometry_features {
330 /*
331 * skip 32 bits from header, needed to align the struct to 64 bits.
332 */
333 u8 header[4];
334 /*
335 * reserved01:
336 * bits 1-6: reserved
337 * bit 0: align
338 */
339 u8 reserved01;
340 u8 reserved02[7];
341 __be32 logical_block_size;
342 __be64 alignment_granularity;
343 __be64 lowest_aligned_lba;
344};
345
346/*
347 * Enterprise SSC Feature
348 *
349 * code == 0x0100
350 */
351struct d0_enterprise_ssc {
352 __be16 baseComID;
353 __be16 numComIDs;
354 /* range_crossing:
355 * bits 1-6: reserved
356 * bit 0: range crossing
357 */
358 u8 range_crossing;
359 u8 reserved01;
360 __be16 reserved02;
361 __be32 reserved03;
362 __be32 reserved04;
363};
364
365/*
366 * Opal V1 feature
367 *
368 * code == 0x0200
369 */
370struct d0_opal_v100 {
371 __be16 baseComID;
372 __be16 numComIDs;
373};
374
375/*
376 * Single User Mode feature
377 *
378 * code == 0x0201
379 */
380struct d0_single_user_mode {
381 __be32 num_locking_objects;
382 /* reserved01:
383 * bit 0: any
384 * bit 1: all
385 * bit 2: policy
386 * bits 3-7: reserved
387 */
388 u8 reserved01;
389 u8 reserved02;
390 __be16 reserved03;
391 __be32 reserved04;
392};
393
394/*
395 * Additonal Datastores feature
396 *
397 * code == 0x0202
398 */
399struct d0_datastore_table {
400 __be16 reserved01;
401 __be16 max_tables;
402 __be32 max_size_tables;
403 __be32 table_size_alignment;
404};
405
406/*
407 * OPAL 2.0 feature
408 *
409 * code == 0x0203
410 */
411struct d0_opal_v200 {
412 __be16 baseComID;
413 __be16 numComIDs;
414 /* range_crossing:
415 * bits 1-6: reserved
416 * bit 0: range crossing
417 */
418 u8 range_crossing;
419 /* num_locking_admin_auth:
420 * not aligned to 16 bits, so use two u8.
421 * stored in big endian:
422 * 0: MSB
423 * 1: LSB
424 */
425 u8 num_locking_admin_auth[2];
426 /* num_locking_user_auth:
427 * not aligned to 16 bits, so use two u8.
428 * stored in big endian:
429 * 0: MSB
430 * 1: LSB
431 */
432 u8 num_locking_user_auth[2];
433 u8 initialPIN;
434 u8 revertedPIN;
435 u8 reserved01;
436 __be32 reserved02;
437};
438
439/* Union of features used to parse the discovery 0 response */
440struct d0_features {
441 __be16 code;
442 /*
443 * r_version bits:
444 * bits 4-7: version
445 * bits 0-3: reserved
446 */
447 u8 r_version;
448 u8 length;
449 u8 features[];
450};
451
452#endif /* _OPAL_PROTO_H */
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index bcd86e5cd546..39f70d968754 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
293 if (!gpt) 293 if (!gpt)
294 return NULL; 294 return NULL;
295 295
296 count = le32_to_cpu(gpt->num_partition_entries) * 296 count = (size_t)le32_to_cpu(gpt->num_partition_entries) *
297 le32_to_cpu(gpt->sizeof_partition_entry); 297 le32_to_cpu(gpt->sizeof_partition_entry);
298 if (!count) 298 if (!count)
299 return NULL; 299 return NULL;
@@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
352 gpt_header **gpt, gpt_entry **ptes) 352 gpt_header **gpt, gpt_entry **ptes)
353{ 353{
354 u32 crc, origcrc; 354 u32 crc, origcrc;
355 u64 lastlba; 355 u64 lastlba, pt_size;
356 356
357 if (!ptes) 357 if (!ptes)
358 return 0; 358 return 0;
@@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
434 goto fail; 434 goto fail;
435 } 435 }
436 436
437 /* Sanity check partition table size */
438 pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) *
439 le32_to_cpu((*gpt)->sizeof_partition_entry);
440 if (pt_size > KMALLOC_MAX_SIZE) {
441 pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n",
442 (unsigned long long)pt_size, KMALLOC_MAX_SIZE);
443 goto fail;
444 }
445
437 if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) 446 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
438 goto fail; 447 goto fail;
439 448
440 /* Check the GUID Partition Entry Array CRC */ 449 /* Check the GUID Partition Entry Array CRC */
441 crc = efi_crc32((const unsigned char *) (*ptes), 450 crc = efi_crc32((const unsigned char *) (*ptes), pt_size);
442 le32_to_cpu((*gpt)->num_partition_entries) *
443 le32_to_cpu((*gpt)->sizeof_partition_entry));
444 451
445 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { 452 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
446 pr_debug("GUID Partition Entry Array CRC check failed.\n"); 453 pr_debug("GUID Partition Entry Array CRC check failed.\n");
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index c2b64923ab66..2a2fc768b27a 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -230,15 +230,17 @@ EXPORT_SYMBOL(blk_verify_command);
230static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, 230static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
231 struct sg_io_hdr *hdr, fmode_t mode) 231 struct sg_io_hdr *hdr, fmode_t mode)
232{ 232{
233 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) 233 struct scsi_request *req = scsi_req(rq);
234
235 if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len))
234 return -EFAULT; 236 return -EFAULT;
235 if (blk_verify_command(rq->cmd, mode & FMODE_WRITE)) 237 if (blk_verify_command(req->cmd, mode & FMODE_WRITE))
236 return -EPERM; 238 return -EPERM;
237 239
238 /* 240 /*
239 * fill in request structure 241 * fill in request structure
240 */ 242 */
241 rq->cmd_len = hdr->cmd_len; 243 req->cmd_len = hdr->cmd_len;
242 244
243 rq->timeout = msecs_to_jiffies(hdr->timeout); 245 rq->timeout = msecs_to_jiffies(hdr->timeout);
244 if (!rq->timeout) 246 if (!rq->timeout)
@@ -254,6 +256,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
254static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, 256static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
255 struct bio *bio) 257 struct bio *bio)
256{ 258{
259 struct scsi_request *req = scsi_req(rq);
257 int r, ret = 0; 260 int r, ret = 0;
258 261
259 /* 262 /*
@@ -267,13 +270,13 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
267 hdr->info = 0; 270 hdr->info = 0;
268 if (hdr->masked_status || hdr->host_status || hdr->driver_status) 271 if (hdr->masked_status || hdr->host_status || hdr->driver_status)
269 hdr->info |= SG_INFO_CHECK; 272 hdr->info |= SG_INFO_CHECK;
270 hdr->resid = rq->resid_len; 273 hdr->resid = req->resid_len;
271 hdr->sb_len_wr = 0; 274 hdr->sb_len_wr = 0;
272 275
273 if (rq->sense_len && hdr->sbp) { 276 if (req->sense_len && hdr->sbp) {
274 int len = min((unsigned int) hdr->mx_sb_len, rq->sense_len); 277 int len = min((unsigned int) hdr->mx_sb_len, req->sense_len);
275 278
276 if (!copy_to_user(hdr->sbp, rq->sense, len)) 279 if (!copy_to_user(hdr->sbp, req->sense, len))
277 hdr->sb_len_wr = len; 280 hdr->sb_len_wr = len;
278 else 281 else
279 ret = -EFAULT; 282 ret = -EFAULT;
@@ -294,7 +297,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
294 int writing = 0; 297 int writing = 0;
295 int at_head = 0; 298 int at_head = 0;
296 struct request *rq; 299 struct request *rq;
297 char sense[SCSI_SENSE_BUFFERSIZE]; 300 struct scsi_request *req;
298 struct bio *bio; 301 struct bio *bio;
299 302
300 if (hdr->interface_id != 'S') 303 if (hdr->interface_id != 'S')
@@ -318,14 +321,16 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
318 at_head = 1; 321 at_head = 1;
319 322
320 ret = -ENOMEM; 323 ret = -ENOMEM;
321 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); 324 rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
325 GFP_KERNEL);
322 if (IS_ERR(rq)) 326 if (IS_ERR(rq))
323 return PTR_ERR(rq); 327 return PTR_ERR(rq);
324 blk_rq_set_block_pc(rq); 328 req = scsi_req(rq);
329 scsi_req_init(rq);
325 330
326 if (hdr->cmd_len > BLK_MAX_CDB) { 331 if (hdr->cmd_len > BLK_MAX_CDB) {
327 rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); 332 req->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL);
328 if (!rq->cmd) 333 if (!req->cmd)
329 goto out_put_request; 334 goto out_put_request;
330 } 335 }
331 336
@@ -357,9 +362,6 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
357 goto out_free_cdb; 362 goto out_free_cdb;
358 363
359 bio = rq->bio; 364 bio = rq->bio;
360 memset(sense, 0, sizeof(sense));
361 rq->sense = sense;
362 rq->sense_len = 0;
363 rq->retries = 0; 365 rq->retries = 0;
364 366
365 start_time = jiffies; 367 start_time = jiffies;
@@ -375,8 +377,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
375 ret = blk_complete_sghdr_rq(rq, hdr, bio); 377 ret = blk_complete_sghdr_rq(rq, hdr, bio);
376 378
377out_free_cdb: 379out_free_cdb:
378 if (rq->cmd != rq->__cmd) 380 scsi_req_free_cmd(req);
379 kfree(rq->cmd);
380out_put_request: 381out_put_request:
381 blk_put_request(rq); 382 blk_put_request(rq);
382 return ret; 383 return ret;
@@ -420,9 +421,10 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
420 struct scsi_ioctl_command __user *sic) 421 struct scsi_ioctl_command __user *sic)
421{ 422{
422 struct request *rq; 423 struct request *rq;
424 struct scsi_request *req;
423 int err; 425 int err;
424 unsigned int in_len, out_len, bytes, opcode, cmdlen; 426 unsigned int in_len, out_len, bytes, opcode, cmdlen;
425 char *buffer = NULL, sense[SCSI_SENSE_BUFFERSIZE]; 427 char *buffer = NULL;
426 428
427 if (!sic) 429 if (!sic)
428 return -EINVAL; 430 return -EINVAL;
@@ -447,12 +449,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
447 449
448 } 450 }
449 451
450 rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_RECLAIM); 452 rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
453 __GFP_RECLAIM);
451 if (IS_ERR(rq)) { 454 if (IS_ERR(rq)) {
452 err = PTR_ERR(rq); 455 err = PTR_ERR(rq);
453 goto error_free_buffer; 456 goto error_free_buffer;
454 } 457 }
455 blk_rq_set_block_pc(rq); 458 req = scsi_req(rq);
459 scsi_req_init(rq);
456 460
457 cmdlen = COMMAND_SIZE(opcode); 461 cmdlen = COMMAND_SIZE(opcode);
458 462
@@ -460,14 +464,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
460 * get command and data to send to device, if any 464 * get command and data to send to device, if any
461 */ 465 */
462 err = -EFAULT; 466 err = -EFAULT;
463 rq->cmd_len = cmdlen; 467 req->cmd_len = cmdlen;
464 if (copy_from_user(rq->cmd, sic->data, cmdlen)) 468 if (copy_from_user(req->cmd, sic->data, cmdlen))
465 goto error; 469 goto error;
466 470
467 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) 471 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
468 goto error; 472 goto error;
469 473
470 err = blk_verify_command(rq->cmd, mode & FMODE_WRITE); 474 err = blk_verify_command(req->cmd, mode & FMODE_WRITE);
471 if (err) 475 if (err)
472 goto error; 476 goto error;
473 477
@@ -503,18 +507,14 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
503 goto error; 507 goto error;
504 } 508 }
505 509
506 memset(sense, 0, sizeof(sense));
507 rq->sense = sense;
508 rq->sense_len = 0;
509
510 blk_execute_rq(q, disk, rq, 0); 510 blk_execute_rq(q, disk, rq, 0);
511 511
512 err = rq->errors & 0xff; /* only 8 bit SCSI status */ 512 err = rq->errors & 0xff; /* only 8 bit SCSI status */
513 if (err) { 513 if (err) {
514 if (rq->sense_len && rq->sense) { 514 if (req->sense_len && req->sense) {
515 bytes = (OMAX_SB_LEN > rq->sense_len) ? 515 bytes = (OMAX_SB_LEN > req->sense_len) ?
516 rq->sense_len : OMAX_SB_LEN; 516 req->sense_len : OMAX_SB_LEN;
517 if (copy_to_user(sic->data, rq->sense, bytes)) 517 if (copy_to_user(sic->data, req->sense, bytes))
518 err = -EFAULT; 518 err = -EFAULT;
519 } 519 }
520 } else { 520 } else {
@@ -539,14 +539,14 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
539 struct request *rq; 539 struct request *rq;
540 int err; 540 int err;
541 541
542 rq = blk_get_request(q, WRITE, __GFP_RECLAIM); 542 rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
543 if (IS_ERR(rq)) 543 if (IS_ERR(rq))
544 return PTR_ERR(rq); 544 return PTR_ERR(rq);
545 blk_rq_set_block_pc(rq); 545 scsi_req_init(rq);
546 rq->timeout = BLK_DEFAULT_SG_TIMEOUT; 546 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
547 rq->cmd[0] = cmd; 547 scsi_req(rq)->cmd[0] = cmd;
548 rq->cmd[4] = data; 548 scsi_req(rq)->cmd[4] = data;
549 rq->cmd_len = 6; 549 scsi_req(rq)->cmd_len = 6;
550 err = blk_execute_rq(q, bd_disk, rq, 0); 550 err = blk_execute_rq(q, bd_disk, rq, 0);
551 blk_put_request(rq); 551 blk_put_request(rq);
552 552
@@ -743,6 +743,17 @@ int scsi_cmd_blk_ioctl(struct block_device *bd, fmode_t mode,
743} 743}
744EXPORT_SYMBOL(scsi_cmd_blk_ioctl); 744EXPORT_SYMBOL(scsi_cmd_blk_ioctl);
745 745
746void scsi_req_init(struct request *rq)
747{
748 struct scsi_request *req = scsi_req(rq);
749
750 memset(req->__cmd, 0, sizeof(req->__cmd));
751 req->cmd = req->__cmd;
752 req->cmd_len = BLK_MAX_CDB;
753 req->sense_len = 0;
754}
755EXPORT_SYMBOL(scsi_req_init);
756
746static int __init blk_scsi_ioctl_init(void) 757static int __init blk_scsi_ioctl_init(void)
747{ 758{
748 blk_set_cmd_filter_defaults(&blk_default_cmd_filter); 759 blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
diff --git a/block/sed-opal.c b/block/sed-opal.c
new file mode 100644
index 000000000000..d1c52ba4d62d
--- /dev/null
+++ b/block/sed-opal.c
@@ -0,0 +1,2488 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Scott Bauer <scott.bauer@intel.com>
6 * Rafael Antognolli <rafael.antognolli@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ":OPAL: " fmt
19
20#include <linux/delay.h>
21#include <linux/device.h>
22#include <linux/kernel.h>
23#include <linux/list.h>
24#include <linux/genhd.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <uapi/linux/sed-opal.h>
28#include <linux/sed-opal.h>
29#include <linux/string.h>
30#include <linux/kdev_t.h>
31
32#include "opal_proto.h"
33
34#define IO_BUFFER_LENGTH 2048
35#define MAX_TOKS 64
36
37typedef int (*opal_step)(struct opal_dev *dev);
38
39enum opal_atom_width {
40 OPAL_WIDTH_TINY,
41 OPAL_WIDTH_SHORT,
42 OPAL_WIDTH_MEDIUM,
43 OPAL_WIDTH_LONG,
44 OPAL_WIDTH_TOKEN
45};
46
47/*
48 * On the parsed response, we don't store again the toks that are already
49 * stored in the response buffer. Instead, for each token, we just store a
50 * pointer to the position in the buffer where the token starts, and the size
51 * of the token in bytes.
52 */
53struct opal_resp_tok {
54 const u8 *pos;
55 size_t len;
56 enum opal_response_token type;
57 enum opal_atom_width width;
58 union {
59 u64 u;
60 s64 s;
61 } stored;
62};
63
64/*
65 * From the response header it's not possible to know how many tokens there are
66 * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later
67 * if we start dealing with messages that have more than that, we can increase
68 * this number. This is done to avoid having to make two passes through the
69 * response, the first one counting how many tokens we have and the second one
70 * actually storing the positions.
71 */
72struct parsed_resp {
73 int num;
74 struct opal_resp_tok toks[MAX_TOKS];
75};
76
77struct opal_dev {
78 bool supported;
79
80 void *data;
81 sec_send_recv *send_recv;
82
83 const opal_step *funcs;
84 void **func_data;
85 int state;
86 struct mutex dev_lock;
87 u16 comid;
88 u32 hsn;
89 u32 tsn;
90 u64 align;
91 u64 lowest_lba;
92
93 size_t pos;
94 u8 cmd[IO_BUFFER_LENGTH];
95 u8 resp[IO_BUFFER_LENGTH];
96
97 struct parsed_resp parsed;
98 size_t prev_d_len;
99 void *prev_data;
100
101 struct list_head unlk_lst;
102};
103
104
105static const u8 opaluid[][OPAL_UID_LENGTH] = {
106 /* users */
107 [OPAL_SMUID_UID] =
108 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff },
109 [OPAL_THISSP_UID] =
110 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
111 [OPAL_ADMINSP_UID] =
112 { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x01 },
113 [OPAL_LOCKINGSP_UID] =
114 { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x02 },
115 [OPAL_ENTERPRISE_LOCKINGSP_UID] =
116 { 0x00, 0x00, 0x02, 0x05, 0x00, 0x01, 0x00, 0x01 },
117 [OPAL_ANYBODY_UID] =
118 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01 },
119 [OPAL_SID_UID] =
120 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06 },
121 [OPAL_ADMIN1_UID] =
122 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x01 },
123 [OPAL_USER1_UID] =
124 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x01 },
125 [OPAL_USER2_UID] =
126 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x02 },
127 [OPAL_PSID_UID] =
128 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0xff, 0x01 },
129 [OPAL_ENTERPRISE_BANDMASTER0_UID] =
130 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x80, 0x01 },
131 [OPAL_ENTERPRISE_ERASEMASTER_UID] =
132 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },
133
134 /* tables */
135
136 [OPAL_LOCKINGRANGE_GLOBAL] =
137 { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
138 [OPAL_LOCKINGRANGE_ACE_RDLOCKED] =
139 { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE0, 0x01 },
140 [OPAL_LOCKINGRANGE_ACE_WRLOCKED] =
141 { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE8, 0x01 },
142 [OPAL_MBRCONTROL] =
143 { 0x00, 0x00, 0x08, 0x03, 0x00, 0x00, 0x00, 0x01 },
144 [OPAL_MBR] =
145 { 0x00, 0x00, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00 },
146 [OPAL_AUTHORITY_TABLE] =
147 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00},
148 [OPAL_C_PIN_TABLE] =
149 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00},
150 [OPAL_LOCKING_INFO_TABLE] =
151 { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 },
152 [OPAL_ENTERPRISE_LOCKING_INFO_TABLE] =
153 { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
154
155 /* C_PIN_TABLE object ID's */
156
157 [OPAL_C_PIN_MSID] =
158 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02},
159 [OPAL_C_PIN_SID] =
160 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01},
161 [OPAL_C_PIN_ADMIN1] =
162 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x01, 0x00, 0x01},
163
164 /* half UID's (only first 4 bytes used) */
165
166 [OPAL_HALF_UID_AUTHORITY_OBJ_REF] =
167 { 0x00, 0x00, 0x0C, 0x05, 0xff, 0xff, 0xff, 0xff },
168 [OPAL_HALF_UID_BOOLEAN_ACE] =
169 { 0x00, 0x00, 0x04, 0x0E, 0xff, 0xff, 0xff, 0xff },
170
171 /* special value for omitted optional parameter */
172 [OPAL_UID_HEXFF] =
173 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
174};
175
176/*
177 * TCG Storage SSC Methods.
178 * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
179 * Section: 6.3 Assigned UIDs
180 */
181static const u8 opalmethod[][OPAL_UID_LENGTH] = {
182 [OPAL_PROPERTIES] =
183 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01 },
184 [OPAL_STARTSESSION] =
185 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x02 },
186 [OPAL_REVERT] =
187 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x02 },
188 [OPAL_ACTIVATE] =
189 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x03 },
190 [OPAL_EGET] =
191 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06 },
192 [OPAL_ESET] =
193 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x07 },
194 [OPAL_NEXT] =
195 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08 },
196 [OPAL_EAUTHENTICATE] =
197 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c },
198 [OPAL_GETACL] =
199 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0d },
200 [OPAL_GENKEY] =
201 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x10 },
202 [OPAL_REVERTSP] =
203 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11 },
204 [OPAL_GET] =
205 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16 },
206 [OPAL_SET] =
207 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17 },
208 [OPAL_AUTHENTICATE] =
209 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1c },
210 [OPAL_RANDOM] =
211 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x01 },
212 [OPAL_ERASE] =
213 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 },
214};
215
216typedef int (cont_fn)(struct opal_dev *dev);
217
218static int end_opal_session_error(struct opal_dev *dev);
219
220struct opal_suspend_data {
221 struct opal_lock_unlock unlk;
222 u8 lr;
223 struct list_head node;
224};
225
226/*
227 * Derived from:
228 * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
229 * Section: 5.1.5 Method Status Codes
230 */
231static const char * const opal_errors[] = {
232 "Success",
233 "Not Authorized",
234 "Unknown Error",
235 "SP Busy",
236 "SP Failed",
237 "SP Disabled",
238 "SP Frozen",
239 "No Sessions Available",
240 "Uniqueness Conflict",
241 "Insufficient Space",
242 "Insufficient Rows",
243 "Invalid Function",
244 "Invalid Parameter",
245 "Invalid Reference",
246 "Unknown Error",
247 "TPER Malfunction",
248 "Transaction Failure",
249 "Response Overflow",
250 "Authority Locked Out",
251};
252
253static const char *opal_error_to_human(int error)
254{
255 if (error == 0x3f)
256 return "Failed";
257
258 if (error >= ARRAY_SIZE(opal_errors) || error < 0)
259 return "Unknown Error";
260
261 return opal_errors[error];
262}
263
264static void print_buffer(const u8 *ptr, u32 length)
265{
266#ifdef DEBUG
267 print_hex_dump_bytes("OPAL: ", DUMP_PREFIX_OFFSET, ptr, length);
268 pr_debug("\n");
269#endif
270}
271
272static bool check_tper(const void *data)
273{
274 const struct d0_tper_features *tper = data;
275 u8 flags = tper->supported_features;
276
277 if (!(flags & TPER_SYNC_SUPPORTED)) {
278 pr_err("TPer sync not supported. flags = %d\n",
279 tper->supported_features);
280 return false;
281 }
282
283 return true;
284}
285
286static bool check_sum(const void *data)
287{
288 const struct d0_single_user_mode *sum = data;
289 u32 nlo = be32_to_cpu(sum->num_locking_objects);
290
291 if (nlo == 0) {
292 pr_err("Need at least one locking object.\n");
293 return false;
294 }
295
296 pr_debug("Number of locking objects: %d\n", nlo);
297
298 return true;
299}
300
301static u16 get_comid_v100(const void *data)
302{
303 const struct d0_opal_v100 *v100 = data;
304
305 return be16_to_cpu(v100->baseComID);
306}
307
308static u16 get_comid_v200(const void *data)
309{
310 const struct d0_opal_v200 *v200 = data;
311
312 return be16_to_cpu(v200->baseComID);
313}
314
315static int opal_send_cmd(struct opal_dev *dev)
316{
317 return dev->send_recv(dev->data, dev->comid, TCG_SECP_01,
318 dev->cmd, IO_BUFFER_LENGTH,
319 true);
320}
321
322static int opal_recv_cmd(struct opal_dev *dev)
323{
324 return dev->send_recv(dev->data, dev->comid, TCG_SECP_01,
325 dev->resp, IO_BUFFER_LENGTH,
326 false);
327}
328
329static int opal_recv_check(struct opal_dev *dev)
330{
331 size_t buflen = IO_BUFFER_LENGTH;
332 void *buffer = dev->resp;
333 struct opal_header *hdr = buffer;
334 int ret;
335
336 do {
337 pr_debug("Sent OPAL command: outstanding=%d, minTransfer=%d\n",
338 hdr->cp.outstandingData,
339 hdr->cp.minTransfer);
340
341 if (hdr->cp.outstandingData == 0 ||
342 hdr->cp.minTransfer != 0)
343 return 0;
344
345 memset(buffer, 0, buflen);
346 ret = opal_recv_cmd(dev);
347 } while (!ret);
348
349 return ret;
350}
351
352static int opal_send_recv(struct opal_dev *dev, cont_fn *cont)
353{
354 int ret;
355
356 ret = opal_send_cmd(dev);
357 if (ret)
358 return ret;
359 ret = opal_recv_cmd(dev);
360 if (ret)
361 return ret;
362 ret = opal_recv_check(dev);
363 if (ret)
364 return ret;
365 return cont(dev);
366}
367
368static void check_geometry(struct opal_dev *dev, const void *data)
369{
370 const struct d0_geometry_features *geo = data;
371
372 dev->align = geo->alignment_granularity;
373 dev->lowest_lba = geo->lowest_aligned_lba;
374}
375
376static int next(struct opal_dev *dev)
377{
378 opal_step func;
379 int error = 0;
380
381 do {
382 func = dev->funcs[dev->state];
383 if (!func)
384 break;
385
386 error = func(dev);
387 if (error) {
388 pr_err("Error on step function: %d with error %d: %s\n",
389 dev->state, error,
390 opal_error_to_human(error));
391
392 /* For each OPAL command we do a discovery0 then we
393 * start some sort of session.
394 * If we haven't passed state 1 then there was an error
395 * on discovery0 or during the attempt to start a
396 * session. Therefore we shouldn't attempt to terminate
397 * a session, as one has not yet been created.
398 */
399 if (dev->state > 1)
400 return end_opal_session_error(dev);
401 }
402 dev->state++;
403 } while (!error);
404
405 return error;
406}
407
408static int opal_discovery0_end(struct opal_dev *dev)
409{
410 bool found_com_id = false, supported = true, single_user = false;
411 const struct d0_header *hdr = (struct d0_header *)dev->resp;
412 const u8 *epos = dev->resp, *cpos = dev->resp;
413 u16 comid = 0;
414
415 print_buffer(dev->resp, be32_to_cpu(hdr->length));
416
417 epos += be32_to_cpu(hdr->length); /* end of buffer */
418 cpos += sizeof(*hdr); /* current position on buffer */
419
420 while (cpos < epos && supported) {
421 const struct d0_features *body =
422 (const struct d0_features *)cpos;
423
424 switch (be16_to_cpu(body->code)) {
425 case FC_TPER:
426 supported = check_tper(body->features);
427 break;
428 case FC_SINGLEUSER:
429 single_user = check_sum(body->features);
430 break;
431 case FC_GEOMETRY:
432 check_geometry(dev, body);
433 break;
434 case FC_LOCKING:
435 case FC_ENTERPRISE:
436 case FC_DATASTORE:
437 /* some ignored properties */
438 pr_debug("Found OPAL feature description: %d\n",
439 be16_to_cpu(body->code));
440 break;
441 case FC_OPALV100:
442 comid = get_comid_v100(body->features);
443 found_com_id = true;
444 break;
445 case FC_OPALV200:
446 comid = get_comid_v200(body->features);
447 found_com_id = true;
448 break;
449 case 0xbfff ... 0xffff:
450 /* vendor specific, just ignore */
451 break;
452 default:
453 pr_debug("OPAL Unknown feature: %d\n",
454 be16_to_cpu(body->code));
455
456 }
457 cpos += body->length + 4;
458 }
459
460 if (!supported) {
461 pr_debug("This device is not Opal enabled. Not Supported!\n");
462 return -EOPNOTSUPP;
463 }
464
465 if (!single_user)
466 pr_debug("Device doesn't support single user mode\n");
467
468
469 if (!found_com_id) {
470 pr_debug("Could not find OPAL comid for device. Returning early\n");
471 return -EOPNOTSUPP;;
472 }
473
474 dev->comid = comid;
475
476 return 0;
477}
478
479static int opal_discovery0(struct opal_dev *dev)
480{
481 int ret;
482
483 memset(dev->resp, 0, IO_BUFFER_LENGTH);
484 dev->comid = OPAL_DISCOVERY_COMID;
485 ret = opal_recv_cmd(dev);
486 if (ret)
487 return ret;
488 return opal_discovery0_end(dev);
489}
490
491static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok)
492{
493 if (*err)
494 return;
495 if (cmd->pos >= IO_BUFFER_LENGTH - 1) {
496 pr_err("Error adding u8: end of buffer.\n");
497 *err = -ERANGE;
498 return;
499 }
500 cmd->cmd[cmd->pos++] = tok;
501}
502
503static void add_short_atom_header(struct opal_dev *cmd, bool bytestring,
504 bool has_sign, int len)
505{
506 u8 atom;
507 int err = 0;
508
509 atom = SHORT_ATOM_ID;
510 atom |= bytestring ? SHORT_ATOM_BYTESTRING : 0;
511 atom |= has_sign ? SHORT_ATOM_SIGNED : 0;
512 atom |= len & SHORT_ATOM_LEN_MASK;
513
514 add_token_u8(&err, cmd, atom);
515}
516
517static void add_medium_atom_header(struct opal_dev *cmd, bool bytestring,
518 bool has_sign, int len)
519{
520 u8 header0;
521
522 header0 = MEDIUM_ATOM_ID;
523 header0 |= bytestring ? MEDIUM_ATOM_BYTESTRING : 0;
524 header0 |= has_sign ? MEDIUM_ATOM_SIGNED : 0;
525 header0 |= (len >> 8) & MEDIUM_ATOM_LEN_MASK;
526 cmd->cmd[cmd->pos++] = header0;
527 cmd->cmd[cmd->pos++] = len;
528}
529
530static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
531{
532
533 size_t len;
534 int msb;
535 u8 n;
536
537 if (!(number & ~TINY_ATOM_DATA_MASK)) {
538 add_token_u8(err, cmd, number);
539 return;
540 }
541
542 msb = fls(number);
543 len = DIV_ROUND_UP(msb, 4);
544
545 if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
546 pr_err("Error adding u64: end of buffer.\n");
547 *err = -ERANGE;
548 return;
549 }
550 add_short_atom_header(cmd, false, false, len);
551 while (len--) {
552 n = number >> (len * 8);
553 add_token_u8(err, cmd, n);
554 }
555}
556
557static void add_token_bytestring(int *err, struct opal_dev *cmd,
558 const u8 *bytestring, size_t len)
559{
560 size_t header_len = 1;
561 bool is_short_atom = true;
562
563 if (*err)
564 return;
565
566 if (len & ~SHORT_ATOM_LEN_MASK) {
567 header_len = 2;
568 is_short_atom = false;
569 }
570
571 if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) {
572 pr_err("Error adding bytestring: end of buffer.\n");
573 *err = -ERANGE;
574 return;
575 }
576
577 if (is_short_atom)
578 add_short_atom_header(cmd, true, false, len);
579 else
580 add_medium_atom_header(cmd, true, false, len);
581
582 memcpy(&cmd->cmd[cmd->pos], bytestring, len);
583 cmd->pos += len;
584
585}
586
587static int build_locking_range(u8 *buffer, size_t length, u8 lr)
588{
589 if (length > OPAL_UID_LENGTH) {
590 pr_err("Can't build locking range. Length OOB\n");
591 return -ERANGE;
592 }
593
594 memcpy(buffer, opaluid[OPAL_LOCKINGRANGE_GLOBAL], OPAL_UID_LENGTH);
595
596 if (lr == 0)
597 return 0;
598 buffer[5] = LOCKING_RANGE_NON_GLOBAL;
599 buffer[7] = lr;
600
601 return 0;
602}
603
604static int build_locking_user(u8 *buffer, size_t length, u8 lr)
605{
606 if (length > OPAL_UID_LENGTH) {
607 pr_err("Can't build locking range user, Length OOB\n");
608 return -ERANGE;
609 }
610
611 memcpy(buffer, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
612
613 buffer[7] = lr + 1;
614
615 return 0;
616}
617
618static void set_comid(struct opal_dev *cmd, u16 comid)
619{
620 struct opal_header *hdr = (struct opal_header *)cmd->cmd;
621
622 hdr->cp.extendedComID[0] = comid >> 8;
623 hdr->cp.extendedComID[1] = comid;
624 hdr->cp.extendedComID[2] = 0;
625 hdr->cp.extendedComID[3] = 0;
626}
627
628static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
629{
630 struct opal_header *hdr;
631 int err = 0;
632
633 add_token_u8(&err, cmd, OPAL_ENDOFDATA);
634 add_token_u8(&err, cmd, OPAL_STARTLIST);
635 add_token_u8(&err, cmd, 0);
636 add_token_u8(&err, cmd, 0);
637 add_token_u8(&err, cmd, 0);
638 add_token_u8(&err, cmd, OPAL_ENDLIST);
639
640 if (err) {
641 pr_err("Error finalizing command.\n");
642 return -EFAULT;
643 }
644
645 hdr = (struct opal_header *) cmd->cmd;
646
647 hdr->pkt.tsn = cpu_to_be32(tsn);
648 hdr->pkt.hsn = cpu_to_be32(hsn);
649
650 hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr));
651 while (cmd->pos % 4) {
652 if (cmd->pos >= IO_BUFFER_LENGTH) {
653 pr_err("Error: Buffer overrun\n");
654 return -ERANGE;
655 }
656 cmd->cmd[cmd->pos++] = 0;
657 }
658 hdr->pkt.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp) -
659 sizeof(hdr->pkt));
660 hdr->cp.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp));
661
662 return 0;
663}
664
665static enum opal_response_token token_type(const struct parsed_resp *resp,
666 int n)
667{
668 const struct opal_resp_tok *tok;
669
670 if (n >= resp->num) {
671 pr_err("Token number doesn't exist: %d, resp: %d\n",
672 n, resp->num);
673 return OPAL_DTA_TOKENID_INVALID;
674 }
675
676 tok = &resp->toks[n];
677 if (tok->len == 0) {
678 pr_err("Token length must be non-zero\n");
679 return OPAL_DTA_TOKENID_INVALID;
680 }
681
682 return tok->type;
683}
684
685/*
686 * This function returns 0 in case of invalid token. One should call
687 * token_type() first to find out if the token is valid or not.
688 */
689static enum opal_token response_get_token(const struct parsed_resp *resp,
690 int n)
691{
692 const struct opal_resp_tok *tok;
693
694 if (n >= resp->num) {
695 pr_err("Token number doesn't exist: %d, resp: %d\n",
696 n, resp->num);
697 return 0;
698 }
699
700 tok = &resp->toks[n];
701 if (tok->len == 0) {
702 pr_err("Token length must be non-zero\n");
703 return 0;
704 }
705
706 return tok->pos[0];
707}
708
709static size_t response_parse_tiny(struct opal_resp_tok *tok,
710 const u8 *pos)
711{
712 tok->pos = pos;
713 tok->len = 1;
714 tok->width = OPAL_WIDTH_TINY;
715
716 if (pos[0] & TINY_ATOM_SIGNED) {
717 tok->type = OPAL_DTA_TOKENID_SINT;
718 } else {
719 tok->type = OPAL_DTA_TOKENID_UINT;
720 tok->stored.u = pos[0] & 0x3f;
721 }
722
723 return tok->len;
724}
725
726static size_t response_parse_short(struct opal_resp_tok *tok,
727 const u8 *pos)
728{
729 tok->pos = pos;
730 tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1;
731 tok->width = OPAL_WIDTH_SHORT;
732
733 if (pos[0] & SHORT_ATOM_BYTESTRING) {
734 tok->type = OPAL_DTA_TOKENID_BYTESTRING;
735 } else if (pos[0] & SHORT_ATOM_SIGNED) {
736 tok->type = OPAL_DTA_TOKENID_SINT;
737 } else {
738 u64 u_integer = 0;
739 int i, b = 0;
740
741 tok->type = OPAL_DTA_TOKENID_UINT;
742 if (tok->len > 9) {
743 pr_warn("uint64 with more than 8 bytes\n");
744 return -EINVAL;
745 }
746 for (i = tok->len - 1; i > 0; i--) {
747 u_integer |= ((u64)pos[i] << (8 * b));
748 b++;
749 }
750 tok->stored.u = u_integer;
751 }
752
753 return tok->len;
754}
755
756static size_t response_parse_medium(struct opal_resp_tok *tok,
757 const u8 *pos)
758{
759 tok->pos = pos;
760 tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2;
761 tok->width = OPAL_WIDTH_MEDIUM;
762
763 if (pos[0] & MEDIUM_ATOM_BYTESTRING)
764 tok->type = OPAL_DTA_TOKENID_BYTESTRING;
765 else if (pos[0] & MEDIUM_ATOM_SIGNED)
766 tok->type = OPAL_DTA_TOKENID_SINT;
767 else
768 tok->type = OPAL_DTA_TOKENID_UINT;
769
770 return tok->len;
771}
772
773static size_t response_parse_long(struct opal_resp_tok *tok,
774 const u8 *pos)
775{
776 tok->pos = pos;
777 tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4;
778 tok->width = OPAL_WIDTH_LONG;
779
780 if (pos[0] & LONG_ATOM_BYTESTRING)
781 tok->type = OPAL_DTA_TOKENID_BYTESTRING;
782 else if (pos[0] & LONG_ATOM_SIGNED)
783 tok->type = OPAL_DTA_TOKENID_SINT;
784 else
785 tok->type = OPAL_DTA_TOKENID_UINT;
786
787 return tok->len;
788}
789
790static size_t response_parse_token(struct opal_resp_tok *tok,
791 const u8 *pos)
792{
793 tok->pos = pos;
794 tok->len = 1;
795 tok->type = OPAL_DTA_TOKENID_TOKEN;
796 tok->width = OPAL_WIDTH_TOKEN;
797
798 return tok->len;
799}
800
801static int response_parse(const u8 *buf, size_t length,
802 struct parsed_resp *resp)
803{
804 const struct opal_header *hdr;
805 struct opal_resp_tok *iter;
806 int num_entries = 0;
807 int total;
808 size_t token_length;
809 const u8 *pos;
810
811 if (!buf)
812 return -EFAULT;
813
814 if (!resp)
815 return -EFAULT;
816
817 hdr = (struct opal_header *)buf;
818 pos = buf;
819 pos += sizeof(*hdr);
820
821 pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n",
822 be32_to_cpu(hdr->cp.length),
823 be32_to_cpu(hdr->pkt.length),
824 be32_to_cpu(hdr->subpkt.length));
825
826 if (hdr->cp.length == 0 || hdr->pkt.length == 0 ||
827 hdr->subpkt.length == 0) {
828 pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n",
829 be32_to_cpu(hdr->cp.length),
830 be32_to_cpu(hdr->pkt.length),
831 be32_to_cpu(hdr->subpkt.length));
832 print_buffer(pos, sizeof(*hdr));
833 return -EINVAL;
834 }
835
836 if (pos > buf + length)
837 return -EFAULT;
838
839 iter = resp->toks;
840 total = be32_to_cpu(hdr->subpkt.length);
841 print_buffer(pos, total);
842 while (total > 0) {
843 if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */
844 token_length = response_parse_tiny(iter, pos);
845 else if (pos[0] <= SHORT_ATOM_BYTE) /* short atom */
846 token_length = response_parse_short(iter, pos);
847 else if (pos[0] <= MEDIUM_ATOM_BYTE) /* medium atom */
848 token_length = response_parse_medium(iter, pos);
849 else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */
850 token_length = response_parse_long(iter, pos);
851 else /* TOKEN */
852 token_length = response_parse_token(iter, pos);
853
854 if (token_length == -EINVAL)
855 return -EINVAL;
856
857 pos += token_length;
858 total -= token_length;
859 iter++;
860 num_entries++;
861 }
862
863 if (num_entries == 0) {
864 pr_err("Couldn't parse response.\n");
865 return -EINVAL;
866 }
867 resp->num = num_entries;
868
869 return 0;
870}
871
872static size_t response_get_string(const struct parsed_resp *resp, int n,
873 const char **store)
874{
875 *store = NULL;
876 if (!resp) {
877 pr_err("Response is NULL\n");
878 return 0;
879 }
880
881 if (n > resp->num) {
882 pr_err("Response has %d tokens. Can't access %d\n",
883 resp->num, n);
884 return 0;
885 }
886
887 if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
888 pr_err("Token is not a byte string!\n");
889 return 0;
890 }
891
892 *store = resp->toks[n].pos + 1;
893 return resp->toks[n].len - 1;
894}
895
896static u64 response_get_u64(const struct parsed_resp *resp, int n)
897{
898 if (!resp) {
899 pr_err("Response is NULL\n");
900 return 0;
901 }
902
903 if (n > resp->num) {
904 pr_err("Response has %d tokens. Can't access %d\n",
905 resp->num, n);
906 return 0;
907 }
908
909 if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) {
910 pr_err("Token is not unsigned it: %d\n",
911 resp->toks[n].type);
912 return 0;
913 }
914
915 if (!(resp->toks[n].width == OPAL_WIDTH_TINY ||
916 resp->toks[n].width == OPAL_WIDTH_SHORT)) {
917 pr_err("Atom is not short or tiny: %d\n",
918 resp->toks[n].width);
919 return 0;
920 }
921
922 return resp->toks[n].stored.u;
923}
924
925static u8 response_status(const struct parsed_resp *resp)
926{
927 if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN &&
928 response_get_token(resp, 0) == OPAL_ENDOFSESSION) {
929 return 0;
930 }
931
932 if (resp->num < 5)
933 return DTAERROR_NO_METHOD_STATUS;
934
935 if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN ||
936 token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN ||
937 response_get_token(resp, resp->num - 1) != OPAL_ENDLIST ||
938 response_get_token(resp, resp->num - 5) != OPAL_STARTLIST)
939 return DTAERROR_NO_METHOD_STATUS;
940
941 return response_get_u64(resp, resp->num - 4);
942}
943
944/* Parses and checks for errors */
945static int parse_and_check_status(struct opal_dev *dev)
946{
947 int error;
948
949 print_buffer(dev->cmd, dev->pos);
950
951 error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed);
952 if (error) {
953 pr_err("Couldn't parse response.\n");
954 return error;
955 }
956
957 return response_status(&dev->parsed);
958}
959
960static void clear_opal_cmd(struct opal_dev *dev)
961{
962 dev->pos = sizeof(struct opal_header);
963 memset(dev->cmd, 0, IO_BUFFER_LENGTH);
964}
965
966static int start_opal_session_cont(struct opal_dev *dev)
967{
968 u32 hsn, tsn;
969 int error = 0;
970
971 error = parse_and_check_status(dev);
972 if (error)
973 return error;
974
975 hsn = response_get_u64(&dev->parsed, 4);
976 tsn = response_get_u64(&dev->parsed, 5);
977
978 if (hsn == 0 && tsn == 0) {
979 pr_err("Couldn't authenticate session\n");
980 return -EPERM;
981 }
982
983 dev->hsn = hsn;
984 dev->tsn = tsn;
985 return 0;
986}
987
988static void add_suspend_info(struct opal_dev *dev,
989 struct opal_suspend_data *sus)
990{
991 struct opal_suspend_data *iter;
992
993 list_for_each_entry(iter, &dev->unlk_lst, node) {
994 if (iter->lr == sus->lr) {
995 list_del(&iter->node);
996 kfree(iter);
997 break;
998 }
999 }
1000 list_add_tail(&sus->node, &dev->unlk_lst);
1001}
1002
1003static int end_session_cont(struct opal_dev *dev)
1004{
1005 dev->hsn = 0;
1006 dev->tsn = 0;
1007 return parse_and_check_status(dev);
1008}
1009
1010static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
1011{
1012 int ret;
1013
1014 ret = cmd_finalize(dev, dev->hsn, dev->tsn);
1015 if (ret) {
1016 pr_err("Error finalizing command buffer: %d\n", ret);
1017 return ret;
1018 }
1019
1020 print_buffer(dev->cmd, dev->pos);
1021
1022 return opal_send_recv(dev, cont);
1023}
1024
1025static int gen_key(struct opal_dev *dev)
1026{
1027 const u8 *method;
1028 u8 uid[OPAL_UID_LENGTH];
1029 int err = 0;
1030
1031 clear_opal_cmd(dev);
1032 set_comid(dev, dev->comid);
1033
1034 memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len));
1035 method = opalmethod[OPAL_GENKEY];
1036 kfree(dev->prev_data);
1037 dev->prev_data = NULL;
1038
1039 add_token_u8(&err, dev, OPAL_CALL);
1040 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1041 add_token_bytestring(&err, dev, opalmethod[OPAL_GENKEY],
1042 OPAL_UID_LENGTH);
1043 add_token_u8(&err, dev, OPAL_STARTLIST);
1044 add_token_u8(&err, dev, OPAL_ENDLIST);
1045
1046 if (err) {
1047 pr_err("Error building gen key command\n");
1048 return err;
1049
1050 }
1051 return finalize_and_send(dev, parse_and_check_status);
1052}
1053
1054static int get_active_key_cont(struct opal_dev *dev)
1055{
1056 const char *activekey;
1057 size_t keylen;
1058 int error = 0;
1059
1060 error = parse_and_check_status(dev);
1061 if (error)
1062 return error;
1063 keylen = response_get_string(&dev->parsed, 4, &activekey);
1064 if (!activekey) {
1065 pr_err("%s: Couldn't extract the Activekey from the response\n",
1066 __func__);
1067 return OPAL_INVAL_PARAM;
1068 }
1069 dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL);
1070
1071 if (!dev->prev_data)
1072 return -ENOMEM;
1073
1074 dev->prev_d_len = keylen;
1075
1076 return 0;
1077}
1078
1079static int get_active_key(struct opal_dev *dev)
1080{
1081 u8 uid[OPAL_UID_LENGTH];
1082 int err = 0;
1083 u8 *lr;
1084
1085 clear_opal_cmd(dev);
1086 set_comid(dev, dev->comid);
1087 lr = dev->func_data[dev->state];
1088
1089 err = build_locking_range(uid, sizeof(uid), *lr);
1090 if (err)
1091 return err;
1092
1093 err = 0;
1094 add_token_u8(&err, dev, OPAL_CALL);
1095 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1096 add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH);
1097 add_token_u8(&err, dev, OPAL_STARTLIST);
1098 add_token_u8(&err, dev, OPAL_STARTLIST);
1099 add_token_u8(&err, dev, OPAL_STARTNAME);
1100 add_token_u8(&err, dev, 3); /* startCloumn */
1101 add_token_u8(&err, dev, 10); /* ActiveKey */
1102 add_token_u8(&err, dev, OPAL_ENDNAME);
1103 add_token_u8(&err, dev, OPAL_STARTNAME);
1104 add_token_u8(&err, dev, 4); /* endColumn */
1105 add_token_u8(&err, dev, 10); /* ActiveKey */
1106 add_token_u8(&err, dev, OPAL_ENDNAME);
1107 add_token_u8(&err, dev, OPAL_ENDLIST);
1108 add_token_u8(&err, dev, OPAL_ENDLIST);
1109 if (err) {
1110 pr_err("Error building get active key command\n");
1111 return err;
1112 }
1113
1114 return finalize_and_send(dev, get_active_key_cont);
1115}
1116
1117static int generic_lr_enable_disable(struct opal_dev *dev,
1118 u8 *uid, bool rle, bool wle,
1119 bool rl, bool wl)
1120{
1121 int err = 0;
1122
1123 add_token_u8(&err, dev, OPAL_CALL);
1124 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1125 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1126
1127 add_token_u8(&err, dev, OPAL_STARTLIST);
1128 add_token_u8(&err, dev, OPAL_STARTNAME);
1129 add_token_u8(&err, dev, OPAL_VALUES);
1130 add_token_u8(&err, dev, OPAL_STARTLIST);
1131
1132 add_token_u8(&err, dev, OPAL_STARTNAME);
1133 add_token_u8(&err, dev, 5); /* ReadLockEnabled */
1134 add_token_u8(&err, dev, rle);
1135 add_token_u8(&err, dev, OPAL_ENDNAME);
1136
1137 add_token_u8(&err, dev, OPAL_STARTNAME);
1138 add_token_u8(&err, dev, 6); /* WriteLockEnabled */
1139 add_token_u8(&err, dev, wle);
1140 add_token_u8(&err, dev, OPAL_ENDNAME);
1141
1142 add_token_u8(&err, dev, OPAL_STARTNAME);
1143 add_token_u8(&err, dev, OPAL_READLOCKED);
1144 add_token_u8(&err, dev, rl);
1145 add_token_u8(&err, dev, OPAL_ENDNAME);
1146
1147 add_token_u8(&err, dev, OPAL_STARTNAME);
1148 add_token_u8(&err, dev, OPAL_WRITELOCKED);
1149 add_token_u8(&err, dev, wl);
1150 add_token_u8(&err, dev, OPAL_ENDNAME);
1151
1152 add_token_u8(&err, dev, OPAL_ENDLIST);
1153 add_token_u8(&err, dev, OPAL_ENDNAME);
1154 add_token_u8(&err, dev, OPAL_ENDLIST);
1155 return err;
1156}
1157
1158static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
1159 struct opal_user_lr_setup *setup)
1160{
1161 int err;
1162
1163 err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE,
1164 0, 0);
1165 if (err)
1166 pr_err("Failed to create enable global lr command\n");
1167 return err;
1168}
1169
1170static int setup_locking_range(struct opal_dev *dev)
1171{
1172 u8 uid[OPAL_UID_LENGTH];
1173 struct opal_user_lr_setup *setup;
1174 u8 lr;
1175 int err = 0;
1176
1177 clear_opal_cmd(dev);
1178 set_comid(dev, dev->comid);
1179
1180 setup = dev->func_data[dev->state];
1181 lr = setup->session.opal_key.lr;
1182 err = build_locking_range(uid, sizeof(uid), lr);
1183 if (err)
1184 return err;
1185
1186 if (lr == 0)
1187 err = enable_global_lr(dev, uid, setup);
1188 else {
1189 add_token_u8(&err, dev, OPAL_CALL);
1190 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1191 add_token_bytestring(&err, dev, opalmethod[OPAL_SET],
1192 OPAL_UID_LENGTH);
1193
1194 add_token_u8(&err, dev, OPAL_STARTLIST);
1195 add_token_u8(&err, dev, OPAL_STARTNAME);
1196 add_token_u8(&err, dev, OPAL_VALUES);
1197 add_token_u8(&err, dev, OPAL_STARTLIST);
1198
1199 add_token_u8(&err, dev, OPAL_STARTNAME);
1200 add_token_u8(&err, dev, 3); /* Ranges Start */
1201 add_token_u64(&err, dev, setup->range_start);
1202 add_token_u8(&err, dev, OPAL_ENDNAME);
1203
1204 add_token_u8(&err, dev, OPAL_STARTNAME);
1205 add_token_u8(&err, dev, 4); /* Ranges length */
1206 add_token_u64(&err, dev, setup->range_length);
1207 add_token_u8(&err, dev, OPAL_ENDNAME);
1208
1209 add_token_u8(&err, dev, OPAL_STARTNAME);
1210 add_token_u8(&err, dev, 5); /*ReadLockEnabled */
1211 add_token_u64(&err, dev, !!setup->RLE);
1212 add_token_u8(&err, dev, OPAL_ENDNAME);
1213
1214 add_token_u8(&err, dev, OPAL_STARTNAME);
1215 add_token_u8(&err, dev, 6); /*WriteLockEnabled*/
1216 add_token_u64(&err, dev, !!setup->WLE);
1217 add_token_u8(&err, dev, OPAL_ENDNAME);
1218
1219 add_token_u8(&err, dev, OPAL_ENDLIST);
1220 add_token_u8(&err, dev, OPAL_ENDNAME);
1221 add_token_u8(&err, dev, OPAL_ENDLIST);
1222
1223 }
1224 if (err) {
1225 pr_err("Error building Setup Locking range command.\n");
1226 return err;
1227
1228 }
1229
1230 return finalize_and_send(dev, parse_and_check_status);
1231}
1232
1233static int start_generic_opal_session(struct opal_dev *dev,
1234 enum opal_uid auth,
1235 enum opal_uid sp_type,
1236 const char *key,
1237 u8 key_len)
1238{
1239 u32 hsn;
1240 int err = 0;
1241
1242 if (key == NULL && auth != OPAL_ANYBODY_UID) {
1243 pr_err("%s: Attempted to open ADMIN_SP Session without a Host" \
1244 "Challenge, and not as the Anybody UID\n", __func__);
1245 return OPAL_INVAL_PARAM;
1246 }
1247
1248 clear_opal_cmd(dev);
1249
1250 set_comid(dev, dev->comid);
1251 hsn = GENERIC_HOST_SESSION_NUM;
1252
1253 add_token_u8(&err, dev, OPAL_CALL);
1254 add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID],
1255 OPAL_UID_LENGTH);
1256 add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION],
1257 OPAL_UID_LENGTH);
1258 add_token_u8(&err, dev, OPAL_STARTLIST);
1259 add_token_u64(&err, dev, hsn);
1260 add_token_bytestring(&err, dev, opaluid[sp_type], OPAL_UID_LENGTH);
1261 add_token_u8(&err, dev, 1);
1262
1263 switch (auth) {
1264 case OPAL_ANYBODY_UID:
1265 add_token_u8(&err, dev, OPAL_ENDLIST);
1266 break;
1267 case OPAL_ADMIN1_UID:
1268 case OPAL_SID_UID:
1269 add_token_u8(&err, dev, OPAL_STARTNAME);
1270 add_token_u8(&err, dev, 0); /* HostChallenge */
1271 add_token_bytestring(&err, dev, key, key_len);
1272 add_token_u8(&err, dev, OPAL_ENDNAME);
1273 add_token_u8(&err, dev, OPAL_STARTNAME);
1274 add_token_u8(&err, dev, 3); /* HostSignAuth */
1275 add_token_bytestring(&err, dev, opaluid[auth],
1276 OPAL_UID_LENGTH);
1277 add_token_u8(&err, dev, OPAL_ENDNAME);
1278 add_token_u8(&err, dev, OPAL_ENDLIST);
1279 break;
1280 default:
1281 pr_err("Cannot start Admin SP session with auth %d\n", auth);
1282 return OPAL_INVAL_PARAM;
1283 }
1284
1285 if (err) {
1286 pr_err("Error building start adminsp session command.\n");
1287 return err;
1288 }
1289
1290 return finalize_and_send(dev, start_opal_session_cont);
1291}
1292
1293static int start_anybodyASP_opal_session(struct opal_dev *dev)
1294{
1295 return start_generic_opal_session(dev, OPAL_ANYBODY_UID,
1296 OPAL_ADMINSP_UID, NULL, 0);
1297}
1298
1299static int start_SIDASP_opal_session(struct opal_dev *dev)
1300{
1301 int ret;
1302 const u8 *key = dev->prev_data;
1303 struct opal_key *okey;
1304
1305 if (!key) {
1306 okey = dev->func_data[dev->state];
1307 ret = start_generic_opal_session(dev, OPAL_SID_UID,
1308 OPAL_ADMINSP_UID,
1309 okey->key,
1310 okey->key_len);
1311 } else {
1312 ret = start_generic_opal_session(dev, OPAL_SID_UID,
1313 OPAL_ADMINSP_UID,
1314 key, dev->prev_d_len);
1315 kfree(key);
1316 dev->prev_data = NULL;
1317 }
1318 return ret;
1319}
1320
1321static inline int start_admin1LSP_opal_session(struct opal_dev *dev)
1322{
1323 struct opal_key *key = dev->func_data[dev->state];
1324
1325 return start_generic_opal_session(dev, OPAL_ADMIN1_UID,
1326 OPAL_LOCKINGSP_UID,
1327 key->key, key->key_len);
1328}
1329
1330static int start_auth_opal_session(struct opal_dev *dev)
1331{
1332 u8 lk_ul_user[OPAL_UID_LENGTH];
1333 int err = 0;
1334
1335 struct opal_session_info *session = dev->func_data[dev->state];
1336 size_t keylen = session->opal_key.key_len;
1337 u8 *key = session->opal_key.key;
1338 u32 hsn = GENERIC_HOST_SESSION_NUM;
1339
1340 clear_opal_cmd(dev);
1341 set_comid(dev, dev->comid);
1342
1343 if (session->sum) {
1344 err = build_locking_user(lk_ul_user, sizeof(lk_ul_user),
1345 session->opal_key.lr);
1346 if (err)
1347 return err;
1348
1349 } else if (session->who != OPAL_ADMIN1 && !session->sum) {
1350 err = build_locking_user(lk_ul_user, sizeof(lk_ul_user),
1351 session->who - 1);
1352 if (err)
1353 return err;
1354 } else
1355 memcpy(lk_ul_user, opaluid[OPAL_ADMIN1_UID], OPAL_UID_LENGTH);
1356
1357 add_token_u8(&err, dev, OPAL_CALL);
1358 add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID],
1359 OPAL_UID_LENGTH);
1360 add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION],
1361 OPAL_UID_LENGTH);
1362
1363 add_token_u8(&err, dev, OPAL_STARTLIST);
1364 add_token_u64(&err, dev, hsn);
1365 add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
1366 OPAL_UID_LENGTH);
1367 add_token_u8(&err, dev, 1);
1368 add_token_u8(&err, dev, OPAL_STARTNAME);
1369 add_token_u8(&err, dev, 0);
1370 add_token_bytestring(&err, dev, key, keylen);
1371 add_token_u8(&err, dev, OPAL_ENDNAME);
1372 add_token_u8(&err, dev, OPAL_STARTNAME);
1373 add_token_u8(&err, dev, 3);
1374 add_token_bytestring(&err, dev, lk_ul_user, OPAL_UID_LENGTH);
1375 add_token_u8(&err, dev, OPAL_ENDNAME);
1376 add_token_u8(&err, dev, OPAL_ENDLIST);
1377
1378 if (err) {
1379 pr_err("Error building STARTSESSION command.\n");
1380 return err;
1381 }
1382
1383 return finalize_and_send(dev, start_opal_session_cont);
1384}
1385
1386static int revert_tper(struct opal_dev *dev)
1387{
1388 int err = 0;
1389
1390 clear_opal_cmd(dev);
1391 set_comid(dev, dev->comid);
1392
1393 add_token_u8(&err, dev, OPAL_CALL);
1394 add_token_bytestring(&err, dev, opaluid[OPAL_ADMINSP_UID],
1395 OPAL_UID_LENGTH);
1396 add_token_bytestring(&err, dev, opalmethod[OPAL_REVERT],
1397 OPAL_UID_LENGTH);
1398 add_token_u8(&err, dev, OPAL_STARTLIST);
1399 add_token_u8(&err, dev, OPAL_ENDLIST);
1400 if (err) {
1401 pr_err("Error building REVERT TPER command.\n");
1402 return err;
1403 }
1404
1405 return finalize_and_send(dev, parse_and_check_status);
1406}
1407
1408static int internal_activate_user(struct opal_dev *dev)
1409{
1410 struct opal_session_info *session = dev->func_data[dev->state];
1411 u8 uid[OPAL_UID_LENGTH];
1412 int err = 0;
1413
1414 clear_opal_cmd(dev);
1415 set_comid(dev, dev->comid);
1416
1417 memcpy(uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
1418 uid[7] = session->who;
1419
1420 add_token_u8(&err, dev, OPAL_CALL);
1421 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1422 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1423 add_token_u8(&err, dev, OPAL_STARTLIST);
1424 add_token_u8(&err, dev, OPAL_STARTNAME);
1425 add_token_u8(&err, dev, OPAL_VALUES);
1426 add_token_u8(&err, dev, OPAL_STARTLIST);
1427 add_token_u8(&err, dev, OPAL_STARTNAME);
1428 add_token_u8(&err, dev, 5); /* Enabled */
1429 add_token_u8(&err, dev, OPAL_TRUE);
1430 add_token_u8(&err, dev, OPAL_ENDNAME);
1431 add_token_u8(&err, dev, OPAL_ENDLIST);
1432 add_token_u8(&err, dev, OPAL_ENDNAME);
1433 add_token_u8(&err, dev, OPAL_ENDLIST);
1434
1435 if (err) {
1436 pr_err("Error building Activate UserN command.\n");
1437 return err;
1438 }
1439
1440 return finalize_and_send(dev, parse_and_check_status);
1441}
1442
1443static int erase_locking_range(struct opal_dev *dev)
1444{
1445 struct opal_session_info *session;
1446 u8 uid[OPAL_UID_LENGTH];
1447 int err = 0;
1448
1449 clear_opal_cmd(dev);
1450 set_comid(dev, dev->comid);
1451 session = dev->func_data[dev->state];
1452
1453 if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0)
1454 return -ERANGE;
1455
1456 add_token_u8(&err, dev, OPAL_CALL);
1457 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1458 add_token_bytestring(&err, dev, opalmethod[OPAL_ERASE],
1459 OPAL_UID_LENGTH);
1460 add_token_u8(&err, dev, OPAL_STARTLIST);
1461 add_token_u8(&err, dev, OPAL_ENDLIST);
1462
1463 if (err) {
1464 pr_err("Error building Erase Locking Range Command.\n");
1465 return err;
1466 }
1467 return finalize_and_send(dev, parse_and_check_status);
1468}
1469
1470static int set_mbr_done(struct opal_dev *dev)
1471{
1472 u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state];
1473 int err = 0;
1474
1475 clear_opal_cmd(dev);
1476 set_comid(dev, dev->comid);
1477
1478 add_token_u8(&err, dev, OPAL_CALL);
1479 add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL],
1480 OPAL_UID_LENGTH);
1481 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1482 add_token_u8(&err, dev, OPAL_STARTLIST);
1483 add_token_u8(&err, dev, OPAL_STARTNAME);
1484 add_token_u8(&err, dev, OPAL_VALUES);
1485 add_token_u8(&err, dev, OPAL_STARTLIST);
1486 add_token_u8(&err, dev, OPAL_STARTNAME);
1487 add_token_u8(&err, dev, 2); /* Done */
1488 add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */
1489 add_token_u8(&err, dev, OPAL_ENDNAME);
1490 add_token_u8(&err, dev, OPAL_ENDLIST);
1491 add_token_u8(&err, dev, OPAL_ENDNAME);
1492 add_token_u8(&err, dev, OPAL_ENDLIST);
1493
1494 if (err) {
1495 pr_err("Error Building set MBR Done command\n");
1496 return err;
1497 }
1498
1499 return finalize_and_send(dev, parse_and_check_status);
1500}
1501
1502static int set_mbr_enable_disable(struct opal_dev *dev)
1503{
1504 u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state];
1505 int err = 0;
1506
1507 clear_opal_cmd(dev);
1508 set_comid(dev, dev->comid);
1509
1510 add_token_u8(&err, dev, OPAL_CALL);
1511 add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL],
1512 OPAL_UID_LENGTH);
1513 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1514 add_token_u8(&err, dev, OPAL_STARTLIST);
1515 add_token_u8(&err, dev, OPAL_STARTNAME);
1516 add_token_u8(&err, dev, OPAL_VALUES);
1517 add_token_u8(&err, dev, OPAL_STARTLIST);
1518 add_token_u8(&err, dev, OPAL_STARTNAME);
1519 add_token_u8(&err, dev, 1);
1520 add_token_u8(&err, dev, mbr_en_dis);
1521 add_token_u8(&err, dev, OPAL_ENDNAME);
1522 add_token_u8(&err, dev, OPAL_ENDLIST);
1523 add_token_u8(&err, dev, OPAL_ENDNAME);
1524 add_token_u8(&err, dev, OPAL_ENDLIST);
1525
1526 if (err) {
1527 pr_err("Error Building set MBR done command\n");
1528 return err;
1529 }
1530
1531 return finalize_and_send(dev, parse_and_check_status);
1532}
1533
1534static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
1535 struct opal_dev *dev)
1536{
1537 int err = 0;
1538
1539 clear_opal_cmd(dev);
1540 set_comid(dev, dev->comid);
1541
1542 add_token_u8(&err, dev, OPAL_CALL);
1543 add_token_bytestring(&err, dev, cpin_uid, OPAL_UID_LENGTH);
1544 add_token_bytestring(&err, dev, opalmethod[OPAL_SET],
1545 OPAL_UID_LENGTH);
1546 add_token_u8(&err, dev, OPAL_STARTLIST);
1547 add_token_u8(&err, dev, OPAL_STARTNAME);
1548 add_token_u8(&err, dev, OPAL_VALUES);
1549 add_token_u8(&err, dev, OPAL_STARTLIST);
1550 add_token_u8(&err, dev, OPAL_STARTNAME);
1551 add_token_u8(&err, dev, 3); /* PIN */
1552 add_token_bytestring(&err, dev, key, key_len);
1553 add_token_u8(&err, dev, OPAL_ENDNAME);
1554 add_token_u8(&err, dev, OPAL_ENDLIST);
1555 add_token_u8(&err, dev, OPAL_ENDNAME);
1556 add_token_u8(&err, dev, OPAL_ENDLIST);
1557
1558 return err;
1559}
1560
1561static int set_new_pw(struct opal_dev *dev)
1562{
1563 u8 cpin_uid[OPAL_UID_LENGTH];
1564 struct opal_session_info *usr = dev->func_data[dev->state];
1565
1566
1567 memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH);
1568
1569 if (usr->who != OPAL_ADMIN1) {
1570 cpin_uid[5] = 0x03;
1571 if (usr->sum)
1572 cpin_uid[7] = usr->opal_key.lr + 1;
1573 else
1574 cpin_uid[7] = usr->who;
1575 }
1576
1577 if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len,
1578 cpin_uid, dev)) {
1579 pr_err("Error building set password command.\n");
1580 return -ERANGE;
1581 }
1582
1583 return finalize_and_send(dev, parse_and_check_status);
1584}
1585
1586static int set_sid_cpin_pin(struct opal_dev *dev)
1587{
1588 u8 cpin_uid[OPAL_UID_LENGTH];
1589 struct opal_key *key = dev->func_data[dev->state];
1590
1591 memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH);
1592
1593 if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) {
1594 pr_err("Error building Set SID cpin\n");
1595 return -ERANGE;
1596 }
1597 return finalize_and_send(dev, parse_and_check_status);
1598}
1599
1600static int add_user_to_lr(struct opal_dev *dev)
1601{
1602 u8 lr_buffer[OPAL_UID_LENGTH];
1603 u8 user_uid[OPAL_UID_LENGTH];
1604 struct opal_lock_unlock *lkul;
1605 int err = 0;
1606
1607 clear_opal_cmd(dev);
1608 set_comid(dev, dev->comid);
1609
1610 lkul = dev->func_data[dev->state];
1611
1612 memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED],
1613 OPAL_UID_LENGTH);
1614
1615 if (lkul->l_state == OPAL_RW)
1616 memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_WRLOCKED],
1617 OPAL_UID_LENGTH);
1618
1619 lr_buffer[7] = lkul->session.opal_key.lr;
1620
1621 memcpy(user_uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
1622
1623 user_uid[7] = lkul->session.who;
1624
1625 add_token_u8(&err, dev, OPAL_CALL);
1626 add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH);
1627 add_token_bytestring(&err, dev, opalmethod[OPAL_SET],
1628 OPAL_UID_LENGTH);
1629
1630 add_token_u8(&err, dev, OPAL_STARTLIST);
1631 add_token_u8(&err, dev, OPAL_STARTNAME);
1632 add_token_u8(&err, dev, OPAL_VALUES);
1633
1634 add_token_u8(&err, dev, OPAL_STARTLIST);
1635 add_token_u8(&err, dev, OPAL_STARTNAME);
1636 add_token_u8(&err, dev, 3);
1637
1638 add_token_u8(&err, dev, OPAL_STARTLIST);
1639
1640
1641 add_token_u8(&err, dev, OPAL_STARTNAME);
1642 add_token_bytestring(&err, dev,
1643 opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
1644 OPAL_UID_LENGTH/2);
1645 add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
1646 add_token_u8(&err, dev, OPAL_ENDNAME);
1647
1648
1649 add_token_u8(&err, dev, OPAL_STARTNAME);
1650 add_token_bytestring(&err, dev,
1651 opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
1652 OPAL_UID_LENGTH/2);
1653 add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
1654 add_token_u8(&err, dev, OPAL_ENDNAME);
1655
1656
1657 add_token_u8(&err, dev, OPAL_STARTNAME);
1658 add_token_bytestring(&err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE],
1659 OPAL_UID_LENGTH/2);
1660 add_token_u8(&err, dev, 1);
1661 add_token_u8(&err, dev, OPAL_ENDNAME);
1662
1663
1664 add_token_u8(&err, dev, OPAL_ENDLIST);
1665 add_token_u8(&err, dev, OPAL_ENDNAME);
1666 add_token_u8(&err, dev, OPAL_ENDLIST);
1667 add_token_u8(&err, dev, OPAL_ENDNAME);
1668 add_token_u8(&err, dev, OPAL_ENDLIST);
1669
1670 if (err) {
1671 pr_err("Error building add user to locking range command.\n");
1672 return err;
1673 }
1674
1675 return finalize_and_send(dev, parse_and_check_status);
1676}
1677
1678static int lock_unlock_locking_range(struct opal_dev *dev)
1679{
1680 u8 lr_buffer[OPAL_UID_LENGTH];
1681 const u8 *method;
1682 struct opal_lock_unlock *lkul;
1683 u8 read_locked = 1, write_locked = 1;
1684 int err = 0;
1685
1686 clear_opal_cmd(dev);
1687 set_comid(dev, dev->comid);
1688
1689 method = opalmethod[OPAL_SET];
1690 lkul = dev->func_data[dev->state];
1691 if (build_locking_range(lr_buffer, sizeof(lr_buffer),
1692 lkul->session.opal_key.lr) < 0)
1693 return -ERANGE;
1694
1695 switch (lkul->l_state) {
1696 case OPAL_RO:
1697 read_locked = 0;
1698 write_locked = 1;
1699 break;
1700 case OPAL_RW:
1701 read_locked = 0;
1702 write_locked = 0;
1703 break;
1704 case OPAL_LK:
1705 /* vars are initalized to locked */
1706 break;
1707 default:
1708 pr_err("Tried to set an invalid locking state... returning to uland\n");
1709 return OPAL_INVAL_PARAM;
1710 }
1711
1712 add_token_u8(&err, dev, OPAL_CALL);
1713 add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH);
1714 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1715 add_token_u8(&err, dev, OPAL_STARTLIST);
1716 add_token_u8(&err, dev, OPAL_STARTNAME);
1717 add_token_u8(&err, dev, OPAL_VALUES);
1718 add_token_u8(&err, dev, OPAL_STARTLIST);
1719
1720 add_token_u8(&err, dev, OPAL_STARTNAME);
1721 add_token_u8(&err, dev, OPAL_READLOCKED);
1722 add_token_u8(&err, dev, read_locked);
1723 add_token_u8(&err, dev, OPAL_ENDNAME);
1724
1725 add_token_u8(&err, dev, OPAL_STARTNAME);
1726 add_token_u8(&err, dev, OPAL_WRITELOCKED);
1727 add_token_u8(&err, dev, write_locked);
1728 add_token_u8(&err, dev, OPAL_ENDNAME);
1729
1730 add_token_u8(&err, dev, OPAL_ENDLIST);
1731 add_token_u8(&err, dev, OPAL_ENDNAME);
1732 add_token_u8(&err, dev, OPAL_ENDLIST);
1733
1734 if (err) {
1735 pr_err("Error building SET command.\n");
1736 return err;
1737 }
1738 return finalize_and_send(dev, parse_and_check_status);
1739}
1740
1741
1742static int lock_unlock_locking_range_sum(struct opal_dev *dev)
1743{
1744 u8 lr_buffer[OPAL_UID_LENGTH];
1745 u8 read_locked = 1, write_locked = 1;
1746 const u8 *method;
1747 struct opal_lock_unlock *lkul;
1748 int ret;
1749
1750 clear_opal_cmd(dev);
1751 set_comid(dev, dev->comid);
1752
1753 method = opalmethod[OPAL_SET];
1754 lkul = dev->func_data[dev->state];
1755 if (build_locking_range(lr_buffer, sizeof(lr_buffer),
1756 lkul->session.opal_key.lr) < 0)
1757 return -ERANGE;
1758
1759 switch (lkul->l_state) {
1760 case OPAL_RO:
1761 read_locked = 0;
1762 write_locked = 1;
1763 break;
1764 case OPAL_RW:
1765 read_locked = 0;
1766 write_locked = 0;
1767 break;
1768 case OPAL_LK:
1769 /* vars are initalized to locked */
1770 break;
1771 default:
1772 pr_err("Tried to set an invalid locking state.\n");
1773 return OPAL_INVAL_PARAM;
1774 }
1775 ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1,
1776 read_locked, write_locked);
1777
1778 if (ret < 0) {
1779 pr_err("Error building SET command.\n");
1780 return ret;
1781 }
1782 return finalize_and_send(dev, parse_and_check_status);
1783}
1784
1785static int activate_lsp(struct opal_dev *dev)
1786{
1787 struct opal_lr_act *opal_act;
1788 u8 user_lr[OPAL_UID_LENGTH];
1789 u8 uint_3 = 0x83;
1790 int err = 0, i;
1791
1792 clear_opal_cmd(dev);
1793 set_comid(dev, dev->comid);
1794
1795 opal_act = dev->func_data[dev->state];
1796
1797 add_token_u8(&err, dev, OPAL_CALL);
1798 add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
1799 OPAL_UID_LENGTH);
1800 add_token_bytestring(&err, dev, opalmethod[OPAL_ACTIVATE],
1801 OPAL_UID_LENGTH);
1802
1803
1804 if (opal_act->sum) {
1805 err = build_locking_range(user_lr, sizeof(user_lr),
1806 opal_act->lr[0]);
1807 if (err)
1808 return err;
1809
1810 add_token_u8(&err, dev, OPAL_STARTLIST);
1811 add_token_u8(&err, dev, OPAL_STARTNAME);
1812 add_token_u8(&err, dev, uint_3);
1813 add_token_u8(&err, dev, 6);
1814 add_token_u8(&err, dev, 0);
1815 add_token_u8(&err, dev, 0);
1816
1817 add_token_u8(&err, dev, OPAL_STARTLIST);
1818 add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
1819 for (i = 1; i < opal_act->num_lrs; i++) {
1820 user_lr[7] = opal_act->lr[i];
1821 add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
1822 }
1823 add_token_u8(&err, dev, OPAL_ENDLIST);
1824 add_token_u8(&err, dev, OPAL_ENDNAME);
1825 add_token_u8(&err, dev, OPAL_ENDLIST);
1826
1827 } else {
1828 add_token_u8(&err, dev, OPAL_STARTLIST);
1829 add_token_u8(&err, dev, OPAL_ENDLIST);
1830 }
1831
1832 if (err) {
1833 pr_err("Error building Activate LockingSP command.\n");
1834 return err;
1835 }
1836
1837 return finalize_and_send(dev, parse_and_check_status);
1838}
1839
1840static int get_lsp_lifecycle_cont(struct opal_dev *dev)
1841{
1842 u8 lc_status;
1843 int error = 0;
1844
1845 error = parse_and_check_status(dev);
1846 if (error)
1847 return error;
1848
1849 lc_status = response_get_u64(&dev->parsed, 4);
1850 /* 0x08 is Manufacured Inactive */
1851 /* 0x09 is Manufactured */
1852 if (lc_status != OPAL_MANUFACTURED_INACTIVE) {
1853 pr_err("Couldn't determine the status of the Lifcycle state\n");
1854 return -ENODEV;
1855 }
1856
1857 return 0;
1858}
1859
1860/* Determine if we're in the Manufactured Inactive or Active state */
1861static int get_lsp_lifecycle(struct opal_dev *dev)
1862{
1863 int err = 0;
1864
1865 clear_opal_cmd(dev);
1866 set_comid(dev, dev->comid);
1867
1868 add_token_u8(&err, dev, OPAL_CALL);
1869 add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
1870 OPAL_UID_LENGTH);
1871 add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH);
1872
1873 add_token_u8(&err, dev, OPAL_STARTLIST);
1874 add_token_u8(&err, dev, OPAL_STARTLIST);
1875
1876 add_token_u8(&err, dev, OPAL_STARTNAME);
1877 add_token_u8(&err, dev, 3); /* Start Column */
1878 add_token_u8(&err, dev, 6); /* Lifecycle Column */
1879 add_token_u8(&err, dev, OPAL_ENDNAME);
1880
1881 add_token_u8(&err, dev, OPAL_STARTNAME);
1882 add_token_u8(&err, dev, 4); /* End Column */
1883 add_token_u8(&err, dev, 6); /* Lifecycle Column */
1884 add_token_u8(&err, dev, OPAL_ENDNAME);
1885
1886 add_token_u8(&err, dev, OPAL_ENDLIST);
1887 add_token_u8(&err, dev, OPAL_ENDLIST);
1888
1889 if (err) {
1890 pr_err("Error Building GET Lifecycle Status command\n");
1891 return err;
1892 }
1893
1894 return finalize_and_send(dev, get_lsp_lifecycle_cont);
1895}
1896
1897static int get_msid_cpin_pin_cont(struct opal_dev *dev)
1898{
1899 const char *msid_pin;
1900 size_t strlen;
1901 int error = 0;
1902
1903 error = parse_and_check_status(dev);
1904 if (error)
1905 return error;
1906
1907 strlen = response_get_string(&dev->parsed, 4, &msid_pin);
1908 if (!msid_pin) {
1909 pr_err("%s: Couldn't extract PIN from response\n", __func__);
1910 return OPAL_INVAL_PARAM;
1911 }
1912
1913 dev->prev_data = kmemdup(msid_pin, strlen, GFP_KERNEL);
1914 if (!dev->prev_data)
1915 return -ENOMEM;
1916
1917 dev->prev_d_len = strlen;
1918
1919 return 0;
1920}
1921
1922static int get_msid_cpin_pin(struct opal_dev *dev)
1923{
1924 int err = 0;
1925
1926 clear_opal_cmd(dev);
1927 set_comid(dev, dev->comid);
1928
1929
1930 add_token_u8(&err, dev, OPAL_CALL);
1931 add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID],
1932 OPAL_UID_LENGTH);
1933 add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH);
1934
1935 add_token_u8(&err, dev, OPAL_STARTLIST);
1936 add_token_u8(&err, dev, OPAL_STARTLIST);
1937
1938 add_token_u8(&err, dev, OPAL_STARTNAME);
1939 add_token_u8(&err, dev, 3); /* Start Column */
1940 add_token_u8(&err, dev, 3); /* PIN */
1941 add_token_u8(&err, dev, OPAL_ENDNAME);
1942
1943 add_token_u8(&err, dev, OPAL_STARTNAME);
1944 add_token_u8(&err, dev, 4); /* End Column */
1945 add_token_u8(&err, dev, 3); /* Lifecycle Column */
1946 add_token_u8(&err, dev, OPAL_ENDNAME);
1947
1948 add_token_u8(&err, dev, OPAL_ENDLIST);
1949 add_token_u8(&err, dev, OPAL_ENDLIST);
1950
1951 if (err) {
1952 pr_err("Error building Get MSID CPIN PIN command.\n");
1953 return err;
1954 }
1955
1956 return finalize_and_send(dev, get_msid_cpin_pin_cont);
1957}
1958
1959static int build_end_opal_session(struct opal_dev *dev)
1960{
1961 int err = 0;
1962
1963 clear_opal_cmd(dev);
1964
1965 set_comid(dev, dev->comid);
1966 add_token_u8(&err, dev, OPAL_ENDOFSESSION);
1967 return err;
1968}
1969
1970static int end_opal_session(struct opal_dev *dev)
1971{
1972 int ret = build_end_opal_session(dev);
1973
1974 if (ret < 0)
1975 return ret;
1976 return finalize_and_send(dev, end_session_cont);
1977}
1978
1979static int end_opal_session_error(struct opal_dev *dev)
1980{
1981 const opal_step error_end_session[] = {
1982 end_opal_session,
1983 NULL,
1984 };
1985 dev->funcs = error_end_session;
1986 dev->state = 0;
1987 return next(dev);
1988}
1989
1990static inline void setup_opal_dev(struct opal_dev *dev,
1991 const opal_step *funcs)
1992{
1993 dev->state = 0;
1994 dev->funcs = funcs;
1995 dev->tsn = 0;
1996 dev->hsn = 0;
1997 dev->func_data = NULL;
1998 dev->prev_data = NULL;
1999}
2000
2001static int check_opal_support(struct opal_dev *dev)
2002{
2003 static const opal_step funcs[] = {
2004 opal_discovery0,
2005 NULL
2006 };
2007 int ret;
2008
2009 mutex_lock(&dev->dev_lock);
2010 setup_opal_dev(dev, funcs);
2011 ret = next(dev);
2012 dev->supported = !ret;
2013 mutex_unlock(&dev->dev_lock);
2014 return ret;
2015}
2016
2017struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv)
2018{
2019 struct opal_dev *dev;
2020
2021 dev = kmalloc(sizeof(*dev), GFP_KERNEL);
2022 if (!dev)
2023 return NULL;
2024
2025 INIT_LIST_HEAD(&dev->unlk_lst);
2026 mutex_init(&dev->dev_lock);
2027 dev->data = data;
2028 dev->send_recv = send_recv;
2029 if (check_opal_support(dev) != 0) {
2030 pr_debug("Opal is not supported on this device\n");
2031 kfree(dev);
2032 return NULL;
2033 }
2034 return dev;
2035}
2036EXPORT_SYMBOL(init_opal_dev);
2037
2038static int opal_secure_erase_locking_range(struct opal_dev *dev,
2039 struct opal_session_info *opal_session)
2040{
2041 void *data[3] = { NULL };
2042 static const opal_step erase_funcs[] = {
2043 opal_discovery0,
2044 start_auth_opal_session,
2045 get_active_key,
2046 gen_key,
2047 end_opal_session,
2048 NULL,
2049 };
2050 int ret;
2051
2052 mutex_lock(&dev->dev_lock);
2053 setup_opal_dev(dev, erase_funcs);
2054
2055 dev->func_data = data;
2056 dev->func_data[1] = opal_session;
2057 dev->func_data[2] = &opal_session->opal_key.lr;
2058
2059 ret = next(dev);
2060 mutex_unlock(&dev->dev_lock);
2061 return ret;
2062}
2063
2064static int opal_erase_locking_range(struct opal_dev *dev,
2065 struct opal_session_info *opal_session)
2066{
2067 void *data[3] = { NULL };
2068 static const opal_step erase_funcs[] = {
2069 opal_discovery0,
2070 start_auth_opal_session,
2071 erase_locking_range,
2072 end_opal_session,
2073 NULL,
2074 };
2075 int ret;
2076
2077 mutex_lock(&dev->dev_lock);
2078 setup_opal_dev(dev, erase_funcs);
2079
2080 dev->func_data = data;
2081 dev->func_data[1] = opal_session;
2082 dev->func_data[2] = opal_session;
2083
2084 ret = next(dev);
2085 mutex_unlock(&dev->dev_lock);
2086 return ret;
2087}
2088
2089static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
2090 struct opal_mbr_data *opal_mbr)
2091{
2092 void *func_data[6] = { NULL };
2093 static const opal_step mbr_funcs[] = {
2094 opal_discovery0,
2095 start_admin1LSP_opal_session,
2096 set_mbr_done,
2097 end_opal_session,
2098 start_admin1LSP_opal_session,
2099 set_mbr_enable_disable,
2100 end_opal_session,
2101 NULL,
2102 };
2103 int ret;
2104
2105 if (opal_mbr->enable_disable != OPAL_MBR_ENABLE &&
2106 opal_mbr->enable_disable != OPAL_MBR_DISABLE)
2107 return -EINVAL;
2108
2109 mutex_lock(&dev->dev_lock);
2110 setup_opal_dev(dev, mbr_funcs);
2111 dev->func_data = func_data;
2112 dev->func_data[1] = &opal_mbr->key;
2113 dev->func_data[2] = &opal_mbr->enable_disable;
2114 dev->func_data[4] = &opal_mbr->key;
2115 dev->func_data[5] = &opal_mbr->enable_disable;
2116 ret = next(dev);
2117 mutex_unlock(&dev->dev_lock);
2118 return ret;
2119}
2120
2121static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
2122{
2123 struct opal_suspend_data *suspend;
2124
2125 suspend = kzalloc(sizeof(*suspend), GFP_KERNEL);
2126 if (!suspend)
2127 return -ENOMEM;
2128
2129 suspend->unlk = *lk_unlk;
2130 suspend->lr = lk_unlk->session.opal_key.lr;
2131
2132 mutex_lock(&dev->dev_lock);
2133 setup_opal_dev(dev, NULL);
2134 add_suspend_info(dev, suspend);
2135 mutex_unlock(&dev->dev_lock);
2136 return 0;
2137}
2138
2139static int opal_add_user_to_lr(struct opal_dev *dev,
2140 struct opal_lock_unlock *lk_unlk)
2141{
2142 void *func_data[3] = { NULL };
2143 static const opal_step funcs[] = {
2144 opal_discovery0,
2145 start_admin1LSP_opal_session,
2146 add_user_to_lr,
2147 end_opal_session,
2148 NULL
2149 };
2150 int ret;
2151
2152 if (lk_unlk->l_state != OPAL_RO &&
2153 lk_unlk->l_state != OPAL_RW) {
2154 pr_err("Locking state was not RO or RW\n");
2155 return -EINVAL;
2156 }
2157 if (lk_unlk->session.who < OPAL_USER1 &&
2158 lk_unlk->session.who > OPAL_USER9) {
2159 pr_err("Authority was not within the range of users: %d\n",
2160 lk_unlk->session.who);
2161 return -EINVAL;
2162 }
2163 if (lk_unlk->session.sum) {
2164 pr_err("%s not supported in sum. Use setup locking range\n",
2165 __func__);
2166 return -EINVAL;
2167 }
2168
2169 mutex_lock(&dev->dev_lock);
2170 setup_opal_dev(dev, funcs);
2171 dev->func_data = func_data;
2172 dev->func_data[1] = &lk_unlk->session.opal_key;
2173 dev->func_data[2] = lk_unlk;
2174 ret = next(dev);
2175 mutex_unlock(&dev->dev_lock);
2176 return ret;
2177}
2178
2179static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal)
2180{
2181 void *data[2] = { NULL };
2182 static const opal_step revert_funcs[] = {
2183 opal_discovery0,
2184 start_SIDASP_opal_session,
2185 revert_tper, /* controller will terminate session */
2186 NULL,
2187 };
2188 int ret;
2189
2190 mutex_lock(&dev->dev_lock);
2191 setup_opal_dev(dev, revert_funcs);
2192 dev->func_data = data;
2193 dev->func_data[1] = opal;
2194 ret = next(dev);
2195 mutex_unlock(&dev->dev_lock);
2196 return ret;
2197}
2198
2199static int __opal_lock_unlock_sum(struct opal_dev *dev)
2200{
2201 static const opal_step ulk_funcs_sum[] = {
2202 opal_discovery0,
2203 start_auth_opal_session,
2204 lock_unlock_locking_range_sum,
2205 end_opal_session,
2206 NULL
2207 };
2208
2209 dev->funcs = ulk_funcs_sum;
2210 return next(dev);
2211}
2212
2213static int __opal_lock_unlock(struct opal_dev *dev)
2214{
2215 static const opal_step _unlock_funcs[] = {
2216 opal_discovery0,
2217 start_auth_opal_session,
2218 lock_unlock_locking_range,
2219 end_opal_session,
2220 NULL
2221 };
2222
2223 dev->funcs = _unlock_funcs;
2224 return next(dev);
2225}
2226
2227static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
2228{
2229 void *func_data[3] = { NULL };
2230 int ret;
2231
2232 if (lk_unlk->session.who < OPAL_ADMIN1 ||
2233 lk_unlk->session.who > OPAL_USER9)
2234 return -EINVAL;
2235
2236 mutex_lock(&dev->dev_lock);
2237 setup_opal_dev(dev, NULL);
2238 dev->func_data = func_data;
2239 dev->func_data[1] = &lk_unlk->session;
2240 dev->func_data[2] = lk_unlk;
2241
2242 if (lk_unlk->session.sum)
2243 ret = __opal_lock_unlock_sum(dev);
2244 else
2245 ret = __opal_lock_unlock(dev);
2246
2247 mutex_unlock(&dev->dev_lock);
2248 return ret;
2249}
2250
2251static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
2252{
2253 static const opal_step owner_funcs[] = {
2254 opal_discovery0,
2255 start_anybodyASP_opal_session,
2256 get_msid_cpin_pin,
2257 end_opal_session,
2258 start_SIDASP_opal_session,
2259 set_sid_cpin_pin,
2260 end_opal_session,
2261 NULL
2262 };
2263 void *data[6] = { NULL };
2264 int ret;
2265
2266 if (!dev)
2267 return -ENODEV;
2268
2269 mutex_lock(&dev->dev_lock);
2270 setup_opal_dev(dev, owner_funcs);
2271 dev->func_data = data;
2272 dev->func_data[4] = opal;
2273 dev->func_data[5] = opal;
2274 ret = next(dev);
2275 mutex_unlock(&dev->dev_lock);
2276 return ret;
2277}
2278
2279static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act)
2280{
2281 void *data[4] = { NULL };
2282 static const opal_step active_funcs[] = {
2283 opal_discovery0,
2284 start_SIDASP_opal_session, /* Open session as SID auth */
2285 get_lsp_lifecycle,
2286 activate_lsp,
2287 end_opal_session,
2288 NULL
2289 };
2290 int ret;
2291
2292 if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS)
2293 return -EINVAL;
2294
2295 mutex_lock(&dev->dev_lock);
2296 setup_opal_dev(dev, active_funcs);
2297 dev->func_data = data;
2298 dev->func_data[1] = &opal_lr_act->key;
2299 dev->func_data[3] = opal_lr_act;
2300 ret = next(dev);
2301 mutex_unlock(&dev->dev_lock);
2302 return ret;
2303}
2304
2305static int opal_setup_locking_range(struct opal_dev *dev,
2306 struct opal_user_lr_setup *opal_lrs)
2307{
2308 void *data[3] = { NULL };
2309 static const opal_step lr_funcs[] = {
2310 opal_discovery0,
2311 start_auth_opal_session,
2312 setup_locking_range,
2313 end_opal_session,
2314 NULL,
2315 };
2316 int ret;
2317
2318 mutex_lock(&dev->dev_lock);
2319 setup_opal_dev(dev, lr_funcs);
2320 dev->func_data = data;
2321 dev->func_data[1] = &opal_lrs->session;
2322 dev->func_data[2] = opal_lrs;
2323 ret = next(dev);
2324 mutex_unlock(&dev->dev_lock);
2325 return ret;
2326}
2327
2328static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
2329{
2330 static const opal_step pw_funcs[] = {
2331 opal_discovery0,
2332 start_auth_opal_session,
2333 set_new_pw,
2334 end_opal_session,
2335 NULL
2336 };
2337 void *data[3] = { NULL };
2338 int ret;
2339
2340 if (opal_pw->session.who < OPAL_ADMIN1 ||
2341 opal_pw->session.who > OPAL_USER9 ||
2342 opal_pw->new_user_pw.who < OPAL_ADMIN1 ||
2343 opal_pw->new_user_pw.who > OPAL_USER9)
2344 return -EINVAL;
2345
2346 mutex_lock(&dev->dev_lock);
2347 setup_opal_dev(dev, pw_funcs);
2348 dev->func_data = data;
2349 dev->func_data[1] = (void *) &opal_pw->session;
2350 dev->func_data[2] = (void *) &opal_pw->new_user_pw;
2351
2352 ret = next(dev);
2353 mutex_unlock(&dev->dev_lock);
2354 return ret;
2355}
2356
2357static int opal_activate_user(struct opal_dev *dev,
2358 struct opal_session_info *opal_session)
2359{
2360 static const opal_step act_funcs[] = {
2361 opal_discovery0,
2362 start_admin1LSP_opal_session,
2363 internal_activate_user,
2364 end_opal_session,
2365 NULL
2366 };
2367 void *data[3] = { NULL };
2368 int ret;
2369
2370 /* We can't activate Admin1 it's active as manufactured */
2371 if (opal_session->who < OPAL_USER1 &&
2372 opal_session->who > OPAL_USER9) {
2373 pr_err("Who was not a valid user: %d\n", opal_session->who);
2374 return -EINVAL;
2375 }
2376
2377 mutex_lock(&dev->dev_lock);
2378 setup_opal_dev(dev, act_funcs);
2379 dev->func_data = data;
2380 dev->func_data[1] = &opal_session->opal_key;
2381 dev->func_data[2] = opal_session;
2382 ret = next(dev);
2383 mutex_unlock(&dev->dev_lock);
2384 return ret;
2385}
2386
2387bool opal_unlock_from_suspend(struct opal_dev *dev)
2388{
2389 struct opal_suspend_data *suspend;
2390 void *func_data[3] = { NULL };
2391 bool was_failure = false;
2392 int ret = 0;
2393
2394 if (!dev)
2395 return false;
2396 if (!dev->supported)
2397 return false;
2398
2399 mutex_lock(&dev->dev_lock);
2400 setup_opal_dev(dev, NULL);
2401 dev->func_data = func_data;
2402
2403 list_for_each_entry(suspend, &dev->unlk_lst, node) {
2404 dev->state = 0;
2405 dev->func_data[1] = &suspend->unlk.session;
2406 dev->func_data[2] = &suspend->unlk;
2407 dev->tsn = 0;
2408 dev->hsn = 0;
2409
2410 if (suspend->unlk.session.sum)
2411 ret = __opal_lock_unlock_sum(dev);
2412 else
2413 ret = __opal_lock_unlock(dev);
2414 if (ret) {
2415 pr_warn("Failed to unlock LR %hhu with sum %d\n",
2416 suspend->unlk.session.opal_key.lr,
2417 suspend->unlk.session.sum);
2418 was_failure = true;
2419 }
2420 }
2421 mutex_unlock(&dev->dev_lock);
2422 return was_failure;
2423}
2424EXPORT_SYMBOL(opal_unlock_from_suspend);
2425
2426int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
2427{
2428 void *p;
2429 int ret = -ENOTTY;
2430
2431 if (!capable(CAP_SYS_ADMIN))
2432 return -EACCES;
2433 if (!dev)
2434 return -ENOTSUPP;
2435 if (!dev->supported) {
2436 pr_err("Not supported\n");
2437 return -ENOTSUPP;
2438 }
2439
2440 p = memdup_user(arg, _IOC_SIZE(cmd));
2441 if (IS_ERR(p))
2442 return PTR_ERR(p);
2443
2444 switch (cmd) {
2445 case IOC_OPAL_SAVE:
2446 ret = opal_save(dev, p);
2447 break;
2448 case IOC_OPAL_LOCK_UNLOCK:
2449 ret = opal_lock_unlock(dev, p);
2450 break;
2451 case IOC_OPAL_TAKE_OWNERSHIP:
2452 ret = opal_take_ownership(dev, p);
2453 break;
2454 case IOC_OPAL_ACTIVATE_LSP:
2455 ret = opal_activate_lsp(dev, p);
2456 break;
2457 case IOC_OPAL_SET_PW:
2458 ret = opal_set_new_pw(dev, p);
2459 break;
2460 case IOC_OPAL_ACTIVATE_USR:
2461 ret = opal_activate_user(dev, p);
2462 break;
2463 case IOC_OPAL_REVERT_TPR:
2464 ret = opal_reverttper(dev, p);
2465 break;
2466 case IOC_OPAL_LR_SETUP:
2467 ret = opal_setup_locking_range(dev, p);
2468 break;
2469 case IOC_OPAL_ADD_USR_TO_LR:
2470 ret = opal_add_user_to_lr(dev, p);
2471 break;
2472 case IOC_OPAL_ENABLE_DISABLE_MBR:
2473 ret = opal_enable_disable_shadow_mbr(dev, p);
2474 break;
2475 case IOC_OPAL_ERASE_LR:
2476 ret = opal_erase_locking_range(dev, p);
2477 break;
2478 case IOC_OPAL_SECURE_ERASE_LR:
2479 ret = opal_secure_erase_locking_range(dev, p);
2480 break;
2481 default:
2482 pr_warn("No such Opal Ioctl %u\n", cmd);
2483 }
2484
2485 kfree(p);
2486 return ret;
2487}
2488EXPORT_SYMBOL_GPL(sed_ioctl);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1f863e757ee4..c771d4c341ea 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1265,13 +1265,13 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
1265 */ 1265 */
1266static int atapi_drain_needed(struct request *rq) 1266static int atapi_drain_needed(struct request *rq)
1267{ 1267{
1268 if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC)) 1268 if (likely(!blk_rq_is_passthrough(rq)))
1269 return 0; 1269 return 0;
1270 1270
1271 if (!blk_rq_bytes(rq) || op_is_write(req_op(rq))) 1271 if (!blk_rq_bytes(rq) || op_is_write(req_op(rq)))
1272 return 0; 1272 return 0;
1273 1273
1274 return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC; 1274 return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC;
1275} 1275}
1276 1276
1277static int ata_scsi_dev_config(struct scsi_device *sdev, 1277static int ata_scsi_dev_config(struct scsi_device *sdev,
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 223ff2fcae7e..f744de7a0f9b 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -69,6 +69,7 @@ config AMIGA_Z2RAM
69config GDROM 69config GDROM
70 tristate "SEGA Dreamcast GD-ROM drive" 70 tristate "SEGA Dreamcast GD-ROM drive"
71 depends on SH_DREAMCAST 71 depends on SH_DREAMCAST
72 select BLK_SCSI_REQUEST # only for the generic cdrom code
72 help 73 help
73 A standard SEGA Dreamcast comes with a modified CD ROM drive called a 74 A standard SEGA Dreamcast comes with a modified CD ROM drive called a
74 "GD-ROM" by SEGA to signify it is capable of reading special disks 75 "GD-ROM" by SEGA to signify it is capable of reading special disks
@@ -114,6 +115,7 @@ config BLK_CPQ_CISS_DA
114 tristate "Compaq Smart Array 5xxx support" 115 tristate "Compaq Smart Array 5xxx support"
115 depends on PCI 116 depends on PCI
116 select CHECK_SIGNATURE 117 select CHECK_SIGNATURE
118 select BLK_SCSI_REQUEST
117 help 119 help
118 This is the driver for Compaq Smart Array 5xxx controllers. 120 This is the driver for Compaq Smart Array 5xxx controllers.
119 Everyone using these boards should say Y here. 121 Everyone using these boards should say Y here.
@@ -386,6 +388,7 @@ config BLK_DEV_RAM_DAX
386config CDROM_PKTCDVD 388config CDROM_PKTCDVD
387 tristate "Packet writing on CD/DVD media (DEPRECATED)" 389 tristate "Packet writing on CD/DVD media (DEPRECATED)"
388 depends on !UML 390 depends on !UML
391 select BLK_SCSI_REQUEST
389 help 392 help
390 Note: This driver is deprecated and will be removed from the 393 Note: This driver is deprecated and will be removed from the
391 kernel in the near future! 394 kernel in the near future!
@@ -501,6 +504,16 @@ config VIRTIO_BLK
501 This is the virtual block driver for virtio. It can be used with 504 This is the virtual block driver for virtio. It can be used with
502 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. 505 lguest or QEMU based VMMs (like KVM or Xen). Say Y or M.
503 506
507config VIRTIO_BLK_SCSI
508 bool "SCSI passthrough request for the Virtio block driver"
509 depends on VIRTIO_BLK
510 select BLK_SCSI_REQUEST
511 ---help---
512 Enable support for SCSI passthrough (e.g. the SG_IO ioctl) on
513 virtio-blk devices. This is only supported for the legacy
514 virtio protocol and not enabled by default by any hypervisor.
515 Your probably want to virtio-scsi instead.
516
504config BLK_DEV_HD 517config BLK_DEV_HD
505 bool "Very old hard disk (MFM/RLL/IDE) driver" 518 bool "Very old hard disk (MFM/RLL/IDE) driver"
506 depends on HAVE_IDE 519 depends on HAVE_IDE
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ec9d8610b25f..027b876370bc 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -396,8 +396,8 @@ aoeblk_gdalloc(void *vp)
396 WARN_ON(d->gd); 396 WARN_ON(d->gd);
397 WARN_ON(d->flags & DEVFL_UP); 397 WARN_ON(d->flags & DEVFL_UP);
398 blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); 398 blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
399 q->backing_dev_info.name = "aoe"; 399 q->backing_dev_info->name = "aoe";
400 q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_SIZE; 400 q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE;
401 d->bufpool = mp; 401 d->bufpool = mp;
402 d->blkq = gd->queue = q; 402 d->blkq = gd->queue = q;
403 q->queuedata = d; 403 q->queuedata = d;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e5c5b8eb14a9..27d613795653 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -52,6 +52,7 @@
52#include <scsi/scsi.h> 52#include <scsi/scsi.h>
53#include <scsi/sg.h> 53#include <scsi/sg.h>
54#include <scsi/scsi_ioctl.h> 54#include <scsi/scsi_ioctl.h>
55#include <scsi/scsi_request.h>
55#include <linux/cdrom.h> 56#include <linux/cdrom.h>
56#include <linux/scatterlist.h> 57#include <linux/scatterlist.h>
57#include <linux/kthread.h> 58#include <linux/kthread.h>
@@ -1853,8 +1854,8 @@ static void cciss_softirq_done(struct request *rq)
1853 dev_dbg(&h->pdev->dev, "Done with %p\n", rq); 1854 dev_dbg(&h->pdev->dev, "Done with %p\n", rq);
1854 1855
1855 /* set the residual count for pc requests */ 1856 /* set the residual count for pc requests */
1856 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) 1857 if (blk_rq_is_passthrough(rq))
1857 rq->resid_len = c->err_info->ResidualCnt; 1858 scsi_req(rq)->resid_len = c->err_info->ResidualCnt;
1858 1859
1859 blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO); 1860 blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
1860 1861
@@ -1941,9 +1942,16 @@ static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
1941static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk, 1942static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1942 int drv_index) 1943 int drv_index)
1943{ 1944{
1944 disk->queue = blk_init_queue(do_cciss_request, &h->lock); 1945 disk->queue = blk_alloc_queue(GFP_KERNEL);
1945 if (!disk->queue) 1946 if (!disk->queue)
1946 goto init_queue_failure; 1947 goto init_queue_failure;
1948
1949 disk->queue->cmd_size = sizeof(struct scsi_request);
1950 disk->queue->request_fn = do_cciss_request;
1951 disk->queue->queue_lock = &h->lock;
1952 if (blk_init_allocated_queue(disk->queue) < 0)
1953 goto cleanup_queue;
1954
1947 sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index); 1955 sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
1948 disk->major = h->major; 1956 disk->major = h->major;
1949 disk->first_minor = drv_index << NWD_SHIFT; 1957 disk->first_minor = drv_index << NWD_SHIFT;
@@ -3075,7 +3083,7 @@ static inline int evaluate_target_status(ctlr_info_t *h,
3075 driver_byte = DRIVER_OK; 3083 driver_byte = DRIVER_OK;
3076 msg_byte = cmd->err_info->CommandStatus; /* correct? seems too device specific */ 3084 msg_byte = cmd->err_info->CommandStatus; /* correct? seems too device specific */
3077 3085
3078 if (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) 3086 if (blk_rq_is_passthrough(cmd->rq))
3079 host_byte = DID_PASSTHROUGH; 3087 host_byte = DID_PASSTHROUGH;
3080 else 3088 else
3081 host_byte = DID_OK; 3089 host_byte = DID_OK;
@@ -3084,7 +3092,7 @@ static inline int evaluate_target_status(ctlr_info_t *h,
3084 host_byte, driver_byte); 3092 host_byte, driver_byte);
3085 3093
3086 if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) { 3094 if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
3087 if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) 3095 if (!blk_rq_is_passthrough(cmd->rq))
3088 dev_warn(&h->pdev->dev, "cmd %p " 3096 dev_warn(&h->pdev->dev, "cmd %p "
3089 "has SCSI Status 0x%x\n", 3097 "has SCSI Status 0x%x\n",
3090 cmd, cmd->err_info->ScsiStatus); 3098 cmd, cmd->err_info->ScsiStatus);
@@ -3095,31 +3103,23 @@ static inline int evaluate_target_status(ctlr_info_t *h,
3095 sense_key = 0xf & cmd->err_info->SenseInfo[2]; 3103 sense_key = 0xf & cmd->err_info->SenseInfo[2];
3096 /* no status or recovered error */ 3104 /* no status or recovered error */
3097 if (((sense_key == 0x0) || (sense_key == 0x1)) && 3105 if (((sense_key == 0x0) || (sense_key == 0x1)) &&
3098 (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC)) 3106 !blk_rq_is_passthrough(cmd->rq))
3099 error_value = 0; 3107 error_value = 0;
3100 3108
3101 if (check_for_unit_attention(h, cmd)) { 3109 if (check_for_unit_attention(h, cmd)) {
3102 *retry_cmd = !(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC); 3110 *retry_cmd = !blk_rq_is_passthrough(cmd->rq);
3103 return 0; 3111 return 0;
3104 } 3112 }
3105 3113
3106 /* Not SG_IO or similar? */ 3114 /* Not SG_IO or similar? */
3107 if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) { 3115 if (!blk_rq_is_passthrough(cmd->rq)) {
3108 if (error_value != 0) 3116 if (error_value != 0)
3109 dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION" 3117 dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION"
3110 " sense key = 0x%x\n", cmd, sense_key); 3118 " sense key = 0x%x\n", cmd, sense_key);
3111 return error_value; 3119 return error_value;
3112 } 3120 }
3113 3121
3114 /* SG_IO or similar, copy sense data back */ 3122 scsi_req(cmd->rq)->sense_len = cmd->err_info->SenseLen;
3115 if (cmd->rq->sense) {
3116 if (cmd->rq->sense_len > cmd->err_info->SenseLen)
3117 cmd->rq->sense_len = cmd->err_info->SenseLen;
3118 memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
3119 cmd->rq->sense_len);
3120 } else
3121 cmd->rq->sense_len = 0;
3122
3123 return error_value; 3123 return error_value;
3124} 3124}
3125 3125
@@ -3146,15 +3146,14 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3146 rq->errors = evaluate_target_status(h, cmd, &retry_cmd); 3146 rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
3147 break; 3147 break;
3148 case CMD_DATA_UNDERRUN: 3148 case CMD_DATA_UNDERRUN:
3149 if (cmd->rq->cmd_type == REQ_TYPE_FS) { 3149 if (!blk_rq_is_passthrough(cmd->rq)) {
3150 dev_warn(&h->pdev->dev, "cmd %p has" 3150 dev_warn(&h->pdev->dev, "cmd %p has"
3151 " completed with data underrun " 3151 " completed with data underrun "
3152 "reported\n", cmd); 3152 "reported\n", cmd);
3153 cmd->rq->resid_len = cmd->err_info->ResidualCnt;
3154 } 3153 }
3155 break; 3154 break;
3156 case CMD_DATA_OVERRUN: 3155 case CMD_DATA_OVERRUN:
3157 if (cmd->rq->cmd_type == REQ_TYPE_FS) 3156 if (!blk_rq_is_passthrough(cmd->rq))
3158 dev_warn(&h->pdev->dev, "cciss: cmd %p has" 3157 dev_warn(&h->pdev->dev, "cciss: cmd %p has"
3159 " completed with data overrun " 3158 " completed with data overrun "
3160 "reported\n", cmd); 3159 "reported\n", cmd);
@@ -3164,7 +3163,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3164 "reported invalid\n", cmd); 3163 "reported invalid\n", cmd);
3165 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3164 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3166 cmd->err_info->CommandStatus, DRIVER_OK, 3165 cmd->err_info->CommandStatus, DRIVER_OK,
3167 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3166 blk_rq_is_passthrough(cmd->rq) ?
3168 DID_PASSTHROUGH : DID_ERROR); 3167 DID_PASSTHROUGH : DID_ERROR);
3169 break; 3168 break;
3170 case CMD_PROTOCOL_ERR: 3169 case CMD_PROTOCOL_ERR:
@@ -3172,7 +3171,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3172 "protocol error\n", cmd); 3171 "protocol error\n", cmd);
3173 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3172 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3174 cmd->err_info->CommandStatus, DRIVER_OK, 3173 cmd->err_info->CommandStatus, DRIVER_OK,
3175 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3174 blk_rq_is_passthrough(cmd->rq) ?
3176 DID_PASSTHROUGH : DID_ERROR); 3175 DID_PASSTHROUGH : DID_ERROR);
3177 break; 3176 break;
3178 case CMD_HARDWARE_ERR: 3177 case CMD_HARDWARE_ERR:
@@ -3180,7 +3179,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3180 " hardware error\n", cmd); 3179 " hardware error\n", cmd);
3181 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3180 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3182 cmd->err_info->CommandStatus, DRIVER_OK, 3181 cmd->err_info->CommandStatus, DRIVER_OK,
3183 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3182 blk_rq_is_passthrough(cmd->rq) ?
3184 DID_PASSTHROUGH : DID_ERROR); 3183 DID_PASSTHROUGH : DID_ERROR);
3185 break; 3184 break;
3186 case CMD_CONNECTION_LOST: 3185 case CMD_CONNECTION_LOST:
@@ -3188,7 +3187,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3188 "connection lost\n", cmd); 3187 "connection lost\n", cmd);
3189 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3188 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3190 cmd->err_info->CommandStatus, DRIVER_OK, 3189 cmd->err_info->CommandStatus, DRIVER_OK,
3191 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3190 blk_rq_is_passthrough(cmd->rq) ?
3192 DID_PASSTHROUGH : DID_ERROR); 3191 DID_PASSTHROUGH : DID_ERROR);
3193 break; 3192 break;
3194 case CMD_ABORTED: 3193 case CMD_ABORTED:
@@ -3196,7 +3195,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3196 "aborted\n", cmd); 3195 "aborted\n", cmd);
3197 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3196 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3198 cmd->err_info->CommandStatus, DRIVER_OK, 3197 cmd->err_info->CommandStatus, DRIVER_OK,
3199 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3198 blk_rq_is_passthrough(cmd->rq) ?
3200 DID_PASSTHROUGH : DID_ABORT); 3199 DID_PASSTHROUGH : DID_ABORT);
3201 break; 3200 break;
3202 case CMD_ABORT_FAILED: 3201 case CMD_ABORT_FAILED:
@@ -3204,7 +3203,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3204 "abort failed\n", cmd); 3203 "abort failed\n", cmd);
3205 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3204 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3206 cmd->err_info->CommandStatus, DRIVER_OK, 3205 cmd->err_info->CommandStatus, DRIVER_OK,
3207 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3206 blk_rq_is_passthrough(cmd->rq) ?
3208 DID_PASSTHROUGH : DID_ERROR); 3207 DID_PASSTHROUGH : DID_ERROR);
3209 break; 3208 break;
3210 case CMD_UNSOLICITED_ABORT: 3209 case CMD_UNSOLICITED_ABORT:
@@ -3219,21 +3218,21 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3219 "%p retried too many times\n", cmd); 3218 "%p retried too many times\n", cmd);
3220 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3219 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3221 cmd->err_info->CommandStatus, DRIVER_OK, 3220 cmd->err_info->CommandStatus, DRIVER_OK,
3222 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3221 blk_rq_is_passthrough(cmd->rq) ?
3223 DID_PASSTHROUGH : DID_ABORT); 3222 DID_PASSTHROUGH : DID_ABORT);
3224 break; 3223 break;
3225 case CMD_TIMEOUT: 3224 case CMD_TIMEOUT:
3226 dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd); 3225 dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
3227 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3226 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3228 cmd->err_info->CommandStatus, DRIVER_OK, 3227 cmd->err_info->CommandStatus, DRIVER_OK,
3229 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3228 blk_rq_is_passthrough(cmd->rq) ?
3230 DID_PASSTHROUGH : DID_ERROR); 3229 DID_PASSTHROUGH : DID_ERROR);
3231 break; 3230 break;
3232 case CMD_UNABORTABLE: 3231 case CMD_UNABORTABLE:
3233 dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd); 3232 dev_warn(&h->pdev->dev, "cmd %p unabortable\n", cmd);
3234 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3233 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3235 cmd->err_info->CommandStatus, DRIVER_OK, 3234 cmd->err_info->CommandStatus, DRIVER_OK,
3236 cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC ? 3235 blk_rq_is_passthrough(cmd->rq) ?
3237 DID_PASSTHROUGH : DID_ERROR); 3236 DID_PASSTHROUGH : DID_ERROR);
3238 break; 3237 break;
3239 default: 3238 default:
@@ -3242,7 +3241,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
3242 cmd->err_info->CommandStatus); 3241 cmd->err_info->CommandStatus);
3243 rq->errors = make_status_bytes(SAM_STAT_GOOD, 3242 rq->errors = make_status_bytes(SAM_STAT_GOOD,
3244 cmd->err_info->CommandStatus, DRIVER_OK, 3243 cmd->err_info->CommandStatus, DRIVER_OK,
3245 (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 3244 blk_rq_is_passthrough(cmd->rq) ?
3246 DID_PASSTHROUGH : DID_ERROR); 3245 DID_PASSTHROUGH : DID_ERROR);
3247 } 3246 }
3248 3247
@@ -3395,7 +3394,9 @@ static void do_cciss_request(struct request_queue *q)
3395 c->Header.SGList = h->max_cmd_sgentries; 3394 c->Header.SGList = h->max_cmd_sgentries;
3396 set_performant_mode(h, c); 3395 set_performant_mode(h, c);
3397 3396
3398 if (likely(creq->cmd_type == REQ_TYPE_FS)) { 3397 switch (req_op(creq)) {
3398 case REQ_OP_READ:
3399 case REQ_OP_WRITE:
3399 if(h->cciss_read == CCISS_READ_10) { 3400 if(h->cciss_read == CCISS_READ_10) {
3400 c->Request.CDB[1] = 0; 3401 c->Request.CDB[1] = 0;
3401 c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */ 3402 c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */
@@ -3425,12 +3426,16 @@ static void do_cciss_request(struct request_queue *q)
3425 c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff; 3426 c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
3426 c->Request.CDB[14] = c->Request.CDB[15] = 0; 3427 c->Request.CDB[14] = c->Request.CDB[15] = 0;
3427 } 3428 }
3428 } else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) { 3429 break;
3429 c->Request.CDBLen = creq->cmd_len; 3430 case REQ_OP_SCSI_IN:
3430 memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB); 3431 case REQ_OP_SCSI_OUT:
3431 } else { 3432 c->Request.CDBLen = scsi_req(creq)->cmd_len;
3433 memcpy(c->Request.CDB, scsi_req(creq)->cmd, BLK_MAX_CDB);
3434 scsi_req(creq)->sense = c->err_info->SenseInfo;
3435 break;
3436 default:
3432 dev_warn(&h->pdev->dev, "bad request type %d\n", 3437 dev_warn(&h->pdev->dev, "bad request type %d\n",
3433 creq->cmd_type); 3438 creq->cmd_flags);
3434 BUG(); 3439 BUG();
3435 } 3440 }
3436 3441
@@ -4074,41 +4079,27 @@ clean_up:
4074 4079
4075static void cciss_interrupt_mode(ctlr_info_t *h) 4080static void cciss_interrupt_mode(ctlr_info_t *h)
4076{ 4081{
4077#ifdef CONFIG_PCI_MSI 4082 int ret;
4078 int err;
4079 struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
4080 {0, 2}, {0, 3}
4081 };
4082 4083
4083 /* Some boards advertise MSI but don't really support it */ 4084 /* Some boards advertise MSI but don't really support it */
4084 if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || 4085 if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
4085 (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11)) 4086 (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
4086 goto default_int_mode; 4087 goto default_int_mode;
4087 4088
4088 if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { 4089 ret = pci_alloc_irq_vectors(h->pdev, 4, 4, PCI_IRQ_MSIX);
4089 err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); 4090 if (ret >= 0) {
4090 if (!err) { 4091 h->intr[0] = pci_irq_vector(h->pdev, 0);
4091 h->intr[0] = cciss_msix_entries[0].vector; 4092 h->intr[1] = pci_irq_vector(h->pdev, 1);
4092 h->intr[1] = cciss_msix_entries[1].vector; 4093 h->intr[2] = pci_irq_vector(h->pdev, 2);
4093 h->intr[2] = cciss_msix_entries[2].vector; 4094 h->intr[3] = pci_irq_vector(h->pdev, 3);
4094 h->intr[3] = cciss_msix_entries[3].vector; 4095 return;
4095 h->msix_vector = 1;
4096 return;
4097 } else {
4098 dev_warn(&h->pdev->dev,
4099 "MSI-X init failed %d\n", err);
4100 }
4101 }
4102 if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
4103 if (!pci_enable_msi(h->pdev))
4104 h->msi_vector = 1;
4105 else
4106 dev_warn(&h->pdev->dev, "MSI init failed\n");
4107 } 4096 }
4097
4098 ret = pci_alloc_irq_vectors(h->pdev, 1, 1, PCI_IRQ_MSI);
4099
4108default_int_mode: 4100default_int_mode:
4109#endif /* CONFIG_PCI_MSI */
4110 /* if we get here we're going to use the default interrupt mode */ 4101 /* if we get here we're going to use the default interrupt mode */
4111 h->intr[h->intr_mode] = h->pdev->irq; 4102 h->intr[h->intr_mode] = pci_irq_vector(h->pdev, 0);
4112 return; 4103 return;
4113} 4104}
4114 4105
@@ -4888,7 +4879,7 @@ static int cciss_request_irq(ctlr_info_t *h,
4888 irqreturn_t (*msixhandler)(int, void *), 4879 irqreturn_t (*msixhandler)(int, void *),
4889 irqreturn_t (*intxhandler)(int, void *)) 4880 irqreturn_t (*intxhandler)(int, void *))
4890{ 4881{
4891 if (h->msix_vector || h->msi_vector) { 4882 if (h->pdev->msi_enabled || h->pdev->msix_enabled) {
4892 if (!request_irq(h->intr[h->intr_mode], msixhandler, 4883 if (!request_irq(h->intr[h->intr_mode], msixhandler,
4893 0, h->devname, h)) 4884 0, h->devname, h))
4894 return 0; 4885 return 0;
@@ -4934,12 +4925,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
4934 int ctlr = h->ctlr; 4925 int ctlr = h->ctlr;
4935 4926
4936 free_irq(h->intr[h->intr_mode], h); 4927 free_irq(h->intr[h->intr_mode], h);
4937#ifdef CONFIG_PCI_MSI 4928 pci_free_irq_vectors(h->pdev);
4938 if (h->msix_vector)
4939 pci_disable_msix(h->pdev);
4940 else if (h->msi_vector)
4941 pci_disable_msi(h->pdev);
4942#endif /* CONFIG_PCI_MSI */
4943 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); 4929 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
4944 cciss_free_scatterlists(h); 4930 cciss_free_scatterlists(h);
4945 cciss_free_cmd_pool(h); 4931 cciss_free_cmd_pool(h);
@@ -5295,12 +5281,7 @@ static void cciss_remove_one(struct pci_dev *pdev)
5295 5281
5296 cciss_shutdown(pdev); 5282 cciss_shutdown(pdev);
5297 5283
5298#ifdef CONFIG_PCI_MSI 5284 pci_free_irq_vectors(h->pdev);
5299 if (h->msix_vector)
5300 pci_disable_msix(h->pdev);
5301 else if (h->msi_vector)
5302 pci_disable_msi(h->pdev);
5303#endif /* CONFIG_PCI_MSI */
5304 5285
5305 iounmap(h->transtable); 5286 iounmap(h->transtable);
5306 iounmap(h->cfgtable); 5287 iounmap(h->cfgtable);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 7fda30e4a241..4affa94ca17b 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -90,8 +90,6 @@ struct ctlr_info
90# define SIMPLE_MODE_INT 2 90# define SIMPLE_MODE_INT 2
91# define MEMQ_MODE_INT 3 91# define MEMQ_MODE_INT 3
92 unsigned int intr[4]; 92 unsigned int intr[4];
93 unsigned int msix_vector;
94 unsigned int msi_vector;
95 int intr_mode; 93 int intr_mode;
96 int cciss_max_sectors; 94 int cciss_max_sectors;
97 BYTE cciss_read; 95 BYTE cciss_read;
@@ -333,7 +331,7 @@ static unsigned long SA5_performant_completed(ctlr_info_t *h)
333 */ 331 */
334 register_value = readl(h->vaddr + SA5_OUTDB_STATUS); 332 register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
335 /* msi auto clears the interrupt pending bit. */ 333 /* msi auto clears the interrupt pending bit. */
336 if (!(h->msi_vector || h->msix_vector)) { 334 if (!(h->pdev->msi_enabled || h->pdev->msix_enabled)) {
337 writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); 335 writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
338 /* Do a read in order to flush the write to the controller 336 /* Do a read in order to flush the write to the controller
339 * (as per spec.) 337 * (as per spec.)
@@ -393,7 +391,7 @@ static bool SA5_performant_intr_pending(ctlr_info_t *h)
393 if (!register_value) 391 if (!register_value)
394 return false; 392 return false;
395 393
396 if (h->msi_vector || h->msix_vector) 394 if (h->pdev->msi_enabled || h->pdev->msix_enabled)
397 return true; 395 return true;
398 396
399 /* Read outbound doorbell to flush */ 397 /* Read outbound doorbell to flush */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c3ff60c30dde..615e5b5178a0 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2462,7 +2462,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
2462 2462
2463 if (get_ldev(device)) { 2463 if (get_ldev(device)) {
2464 q = bdev_get_queue(device->ldev->backing_bdev); 2464 q = bdev_get_queue(device->ldev->backing_bdev);
2465 r = bdi_congested(&q->backing_dev_info, bdi_bits); 2465 r = bdi_congested(q->backing_dev_info, bdi_bits);
2466 put_ldev(device); 2466 put_ldev(device);
2467 if (r) 2467 if (r)
2468 reason = 'b'; 2468 reason = 'b';
@@ -2834,8 +2834,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2834 /* we have no partitions. we contain only ourselves. */ 2834 /* we have no partitions. we contain only ourselves. */
2835 device->this_bdev->bd_contains = device->this_bdev; 2835 device->this_bdev->bd_contains = device->this_bdev;
2836 2836
2837 q->backing_dev_info.congested_fn = drbd_congested; 2837 q->backing_dev_info->congested_fn = drbd_congested;
2838 q->backing_dev_info.congested_data = device; 2838 q->backing_dev_info->congested_data = device;
2839 2839
2840 blk_queue_make_request(q, drbd_make_request); 2840 blk_queue_make_request(q, drbd_make_request);
2841 blk_queue_write_cache(q, true, true); 2841 blk_queue_write_cache(q, true, true);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f35db29cac76..908c704e20aa 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1328,11 +1328,13 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
1328 if (b) { 1328 if (b) {
1329 blk_queue_stack_limits(q, b); 1329 blk_queue_stack_limits(q, b);
1330 1330
1331 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { 1331 if (q->backing_dev_info->ra_pages !=
1332 b->backing_dev_info->ra_pages) {
1332 drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n", 1333 drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1333 q->backing_dev_info.ra_pages, 1334 q->backing_dev_info->ra_pages,
1334 b->backing_dev_info.ra_pages); 1335 b->backing_dev_info->ra_pages);
1335 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; 1336 q->backing_dev_info->ra_pages =
1337 b->backing_dev_info->ra_pages;
1336 } 1338 }
1337 } 1339 }
1338 fixup_discard_if_not_supported(q); 1340 fixup_discard_if_not_supported(q);
@@ -3345,7 +3347,7 @@ static void device_to_statistics(struct device_statistics *s,
3345 s->dev_disk_flags = md->flags; 3347 s->dev_disk_flags = md->flags;
3346 q = bdev_get_queue(device->ldev->backing_bdev); 3348 q = bdev_get_queue(device->ldev->backing_bdev);
3347 s->dev_lower_blocked = 3349 s->dev_lower_blocked =
3348 bdi_congested(&q->backing_dev_info, 3350 bdi_congested(q->backing_dev_info,
3349 (1 << WB_async_congested) | 3351 (1 << WB_async_congested) |
3350 (1 << WB_sync_congested)); 3352 (1 << WB_sync_congested));
3351 put_ldev(device); 3353 put_ldev(device);
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index be2b93fd2c11..8378142f7a55 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -288,7 +288,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
288 seq_printf(seq, "%2d: cs:Unconfigured\n", i); 288 seq_printf(seq, "%2d: cs:Unconfigured\n", i);
289 } else { 289 } else {
290 /* reset device->congestion_reason */ 290 /* reset device->congestion_reason */
291 bdi_rw_congested(&device->rq_queue->backing_dev_info); 291 bdi_rw_congested(device->rq_queue->backing_dev_info);
292 292
293 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 293 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
294 wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' '; 294 wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index b489ac2e9c44..652114ae1a8a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -927,7 +927,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
927 927
928 switch (rbm) { 928 switch (rbm) {
929 case RB_CONGESTED_REMOTE: 929 case RB_CONGESTED_REMOTE:
930 bdi = &device->ldev->backing_bdev->bd_disk->queue->backing_dev_info; 930 bdi = device->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
931 return bdi_read_congested(bdi); 931 return bdi_read_congested(bdi);
932 case RB_LEAST_PENDING: 932 case RB_LEAST_PENDING:
933 return atomic_read(&device->local_cnt) > 933 return atomic_read(&device->local_cnt) >
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index a391a3cfb3fe..45b4384f650c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2900,8 +2900,8 @@ static void do_fd_request(struct request_queue *q)
2900 return; 2900 return;
2901 2901
2902 if (WARN(atomic_read(&usage_count) == 0, 2902 if (WARN(atomic_read(&usage_count) == 0,
2903 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n", 2903 "warning: usage count=0, current_req=%p sect=%ld flags=%llx\n",
2904 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, 2904 current_req, (long)blk_rq_pos(current_req),
2905 (unsigned long long) current_req->cmd_flags)) 2905 (unsigned long long) current_req->cmd_flags))
2906 return; 2906 return;
2907 2907
@@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param,
3119 *rcmd = NULL; 3119 *rcmd = NULL;
3120 3120
3121loop: 3121loop:
3122 ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER); 3122 ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL);
3123 if (!ptr) 3123 if (!ptr)
3124 return -ENOMEM; 3124 return -ENOMEM;
3125 *rcmd = ptr; 3125 *rcmd = ptr;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index a9b48ed7a3cd..6043648da1e8 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -626,30 +626,29 @@ repeat:
626 req_data_dir(req) == READ ? "read" : "writ", 626 req_data_dir(req) == READ ? "read" : "writ",
627 cyl, head, sec, nsect, bio_data(req->bio)); 627 cyl, head, sec, nsect, bio_data(req->bio));
628#endif 628#endif
629 if (req->cmd_type == REQ_TYPE_FS) { 629
630 switch (rq_data_dir(req)) { 630 switch (req_op(req)) {
631 case READ: 631 case REQ_OP_READ:
632 hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, 632 hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,
633 &read_intr); 633 &read_intr);
634 if (reset) 634 if (reset)
635 goto repeat; 635 goto repeat;
636 break; 636 break;
637 case WRITE: 637 case REQ_OP_WRITE:
638 hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, 638 hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE,
639 &write_intr); 639 &write_intr);
640 if (reset) 640 if (reset)
641 goto repeat; 641 goto repeat;
642 if (wait_DRQ()) { 642 if (wait_DRQ()) {
643 bad_rw_intr(); 643 bad_rw_intr();
644 goto repeat; 644 goto repeat;
645 }
646 outsw(HD_DATA, bio_data(req->bio), 256);
647 break;
648 default:
649 printk("unknown hd-command\n");
650 hd_end_request_cur(-EIO);
651 break;
652 } 645 }
646 outsw(HD_DATA, bio_data(req->bio), 256);
647 break;
648 default:
649 printk("unknown hd-command\n");
650 hd_end_request_cur(-EIO);
651 break;
653 } 652 }
654} 653}
655 654
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f347285c67ec..304377182c1a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1097 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 1097 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1098 return -EINVAL; 1098 return -EINVAL;
1099 1099
1100 /* I/O need to be drained during transfer transition */
1101 blk_mq_freeze_queue(lo->lo_queue);
1102
1100 err = loop_release_xfer(lo); 1103 err = loop_release_xfer(lo);
1101 if (err) 1104 if (err)
1102 return err; 1105 goto exit;
1103 1106
1104 if (info->lo_encrypt_type) { 1107 if (info->lo_encrypt_type) {
1105 unsigned int type = info->lo_encrypt_type; 1108 unsigned int type = info->lo_encrypt_type;
@@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1114 1117
1115 err = loop_init_xfer(lo, xfer, info); 1118 err = loop_init_xfer(lo, xfer, info);
1116 if (err) 1119 if (err)
1117 return err; 1120 goto exit;
1118 1121
1119 if (lo->lo_offset != info->lo_offset || 1122 if (lo->lo_offset != info->lo_offset ||
1120 lo->lo_sizelimit != info->lo_sizelimit) 1123 lo->lo_sizelimit != info->lo_sizelimit)
1121 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) 1124 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
1122 return -EFBIG; 1125 err = -EFBIG;
1126 goto exit;
1127 }
1123 1128
1124 loop_config_discard(lo); 1129 loop_config_discard(lo);
1125 1130
@@ -1156,7 +1161,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1156 /* update dio if lo_offset or transfer is changed */ 1161 /* update dio if lo_offset or transfer is changed */
1157 __loop_update_dio(lo, lo->use_dio); 1162 __loop_update_dio(lo, lo->use_dio);
1158 1163
1159 return 0; 1164 exit:
1165 blk_mq_unfreeze_queue(lo->lo_queue);
1166 return err;
1160} 1167}
1161 1168
1162static int 1169static int
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index e937fcf71769..286f276f586e 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -670,15 +670,17 @@ static void mg_request_poll(struct request_queue *q)
670 break; 670 break;
671 } 671 }
672 672
673 if (unlikely(host->req->cmd_type != REQ_TYPE_FS)) { 673 switch (req_op(host->req)) {
674 mg_end_request_cur(host, -EIO); 674 case REQ_OP_READ:
675 continue;
676 }
677
678 if (rq_data_dir(host->req) == READ)
679 mg_read(host->req); 675 mg_read(host->req);
680 else 676 break;
677 case REQ_OP_WRITE:
681 mg_write(host->req); 678 mg_write(host->req);
679 break;
680 default:
681 mg_end_request_cur(host, -EIO);
682 break;
683 }
682 } 684 }
683} 685}
684 686
@@ -687,13 +689,15 @@ static unsigned int mg_issue_req(struct request *req,
687 unsigned int sect_num, 689 unsigned int sect_num,
688 unsigned int sect_cnt) 690 unsigned int sect_cnt)
689{ 691{
690 if (rq_data_dir(req) == READ) { 692 switch (req_op(host->req)) {
693 case REQ_OP_READ:
691 if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr) 694 if (mg_out(host, sect_num, sect_cnt, MG_CMD_RD, &mg_read_intr)
692 != MG_ERR_NONE) { 695 != MG_ERR_NONE) {
693 mg_bad_rw_intr(host); 696 mg_bad_rw_intr(host);
694 return host->error; 697 return host->error;
695 } 698 }
696 } else { 699 break;
700 case REQ_OP_WRITE:
697 /* TODO : handler */ 701 /* TODO : handler */
698 outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); 702 outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL);
699 if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) 703 if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr)
@@ -712,6 +716,10 @@ static unsigned int mg_issue_req(struct request *req,
712 mod_timer(&host->timer, jiffies + 3 * HZ); 716 mod_timer(&host->timer, jiffies + 3 * HZ);
713 outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + 717 outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base +
714 MG_REG_COMMAND); 718 MG_REG_COMMAND);
719 break;
720 default:
721 mg_end_request_cur(host, -EIO);
722 break;
715 } 723 }
716 return MG_ERR_NONE; 724 return MG_ERR_NONE;
717} 725}
@@ -753,11 +761,6 @@ static void mg_request(struct request_queue *q)
753 continue; 761 continue;
754 } 762 }
755 763
756 if (unlikely(req->cmd_type != REQ_TYPE_FS)) {
757 mg_end_request_cur(host, -EIO);
758 continue;
759 }
760
761 if (!mg_issue_req(req, host, sect_num, sect_cnt)) 764 if (!mg_issue_req(req, host, sect_num, sect_cnt))
762 return; 765 return;
763 } 766 }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9fd06eeb1a17..0be84a3cb6d7 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -41,6 +41,9 @@
41 41
42#include <linux/nbd.h> 42#include <linux/nbd.h>
43 43
44static DEFINE_IDR(nbd_index_idr);
45static DEFINE_MUTEX(nbd_index_mutex);
46
44struct nbd_sock { 47struct nbd_sock {
45 struct socket *sock; 48 struct socket *sock;
46 struct mutex tx_lock; 49 struct mutex tx_lock;
@@ -89,8 +92,9 @@ static struct dentry *nbd_dbg_dir;
89#define NBD_MAGIC 0x68797548 92#define NBD_MAGIC 0x68797548
90 93
91static unsigned int nbds_max = 16; 94static unsigned int nbds_max = 16;
92static struct nbd_device *nbd_dev;
93static int max_part; 95static int max_part;
96static struct workqueue_struct *recv_workqueue;
97static int part_shift;
94 98
95static inline struct device *nbd_to_dev(struct nbd_device *nbd) 99static inline struct device *nbd_to_dev(struct nbd_device *nbd)
96{ 100{
@@ -193,13 +197,6 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
193 set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); 197 set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
194 req->errors++; 198 req->errors++;
195 199
196 /*
197 * If our disconnect packet times out then we're already holding the
198 * config_lock and could deadlock here, so just set an error and return,
199 * we'll handle shutting everything down later.
200 */
201 if (req->cmd_type == REQ_TYPE_DRV_PRIV)
202 return BLK_EH_HANDLED;
203 mutex_lock(&nbd->config_lock); 200 mutex_lock(&nbd->config_lock);
204 sock_shutdown(nbd); 201 sock_shutdown(nbd);
205 mutex_unlock(&nbd->config_lock); 202 mutex_unlock(&nbd->config_lock);
@@ -278,14 +275,29 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
278 u32 type; 275 u32 type;
279 u32 tag = blk_mq_unique_tag(req); 276 u32 tag = blk_mq_unique_tag(req);
280 277
281 if (req_op(req) == REQ_OP_DISCARD) 278 switch (req_op(req)) {
279 case REQ_OP_DISCARD:
282 type = NBD_CMD_TRIM; 280 type = NBD_CMD_TRIM;
283 else if (req_op(req) == REQ_OP_FLUSH) 281 break;
282 case REQ_OP_FLUSH:
284 type = NBD_CMD_FLUSH; 283 type = NBD_CMD_FLUSH;
285 else if (rq_data_dir(req) == WRITE) 284 break;
285 case REQ_OP_WRITE:
286 type = NBD_CMD_WRITE; 286 type = NBD_CMD_WRITE;
287 else 287 break;
288 case REQ_OP_READ:
288 type = NBD_CMD_READ; 289 type = NBD_CMD_READ;
290 break;
291 default:
292 return -EIO;
293 }
294
295 if (rq_data_dir(req) == WRITE &&
296 (nbd->flags & NBD_FLAG_READ_ONLY)) {
297 dev_err_ratelimited(disk_to_dev(nbd->disk),
298 "Write on read-only\n");
299 return -EIO;
300 }
289 301
290 memset(&request, 0, sizeof(request)); 302 memset(&request, 0, sizeof(request));
291 request.magic = htonl(NBD_REQUEST_MAGIC); 303 request.magic = htonl(NBD_REQUEST_MAGIC);
@@ -510,18 +522,6 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
510 goto error_out; 522 goto error_out;
511 } 523 }
512 524
513 if (req->cmd_type != REQ_TYPE_FS &&
514 req->cmd_type != REQ_TYPE_DRV_PRIV)
515 goto error_out;
516
517 if (req->cmd_type == REQ_TYPE_FS &&
518 rq_data_dir(req) == WRITE &&
519 (nbd->flags & NBD_FLAG_READ_ONLY)) {
520 dev_err_ratelimited(disk_to_dev(nbd->disk),
521 "Write on read-only\n");
522 goto error_out;
523 }
524
525 req->errors = 0; 525 req->errors = 0;
526 526
527 nsock = nbd->socks[index]; 527 nsock = nbd->socks[index];
@@ -785,7 +785,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
785 INIT_WORK(&args[i].work, recv_work); 785 INIT_WORK(&args[i].work, recv_work);
786 args[i].nbd = nbd; 786 args[i].nbd = nbd;
787 args[i].index = i; 787 args[i].index = i;
788 queue_work(system_long_wq, &args[i].work); 788 queue_work(recv_workqueue, &args[i].work);
789 } 789 }
790 wait_event_interruptible(nbd->recv_wq, 790 wait_event_interruptible(nbd->recv_wq,
791 atomic_read(&nbd->recv_threads) == 0); 791 atomic_read(&nbd->recv_threads) == 0);
@@ -996,6 +996,103 @@ static struct blk_mq_ops nbd_mq_ops = {
996 .timeout = nbd_xmit_timeout, 996 .timeout = nbd_xmit_timeout,
997}; 997};
998 998
999static void nbd_dev_remove(struct nbd_device *nbd)
1000{
1001 struct gendisk *disk = nbd->disk;
1002 nbd->magic = 0;
1003 if (disk) {
1004 del_gendisk(disk);
1005 blk_cleanup_queue(disk->queue);
1006 blk_mq_free_tag_set(&nbd->tag_set);
1007 put_disk(disk);
1008 }
1009 kfree(nbd);
1010}
1011
1012static int nbd_dev_add(int index)
1013{
1014 struct nbd_device *nbd;
1015 struct gendisk *disk;
1016 struct request_queue *q;
1017 int err = -ENOMEM;
1018
1019 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
1020 if (!nbd)
1021 goto out;
1022
1023 disk = alloc_disk(1 << part_shift);
1024 if (!disk)
1025 goto out_free_nbd;
1026
1027 if (index >= 0) {
1028 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
1029 GFP_KERNEL);
1030 if (err == -ENOSPC)
1031 err = -EEXIST;
1032 } else {
1033 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1034 if (err >= 0)
1035 index = err;
1036 }
1037 if (err < 0)
1038 goto out_free_disk;
1039
1040 nbd->disk = disk;
1041 nbd->tag_set.ops = &nbd_mq_ops;
1042 nbd->tag_set.nr_hw_queues = 1;
1043 nbd->tag_set.queue_depth = 128;
1044 nbd->tag_set.numa_node = NUMA_NO_NODE;
1045 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
1046 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1047 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
1048 nbd->tag_set.driver_data = nbd;
1049
1050 err = blk_mq_alloc_tag_set(&nbd->tag_set);
1051 if (err)
1052 goto out_free_idr;
1053
1054 q = blk_mq_init_queue(&nbd->tag_set);
1055 if (IS_ERR(q)) {
1056 err = PTR_ERR(q);
1057 goto out_free_tags;
1058 }
1059 disk->queue = q;
1060
1061 /*
1062 * Tell the block layer that we are not a rotational device
1063 */
1064 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
1065 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1066 disk->queue->limits.discard_granularity = 512;
1067 blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
1068 disk->queue->limits.discard_zeroes_data = 0;
1069 blk_queue_max_hw_sectors(disk->queue, 65536);
1070 disk->queue->limits.max_sectors = 256;
1071
1072 nbd->magic = NBD_MAGIC;
1073 mutex_init(&nbd->config_lock);
1074 disk->major = NBD_MAJOR;
1075 disk->first_minor = index << part_shift;
1076 disk->fops = &nbd_fops;
1077 disk->private_data = nbd;
1078 sprintf(disk->disk_name, "nbd%d", index);
1079 init_waitqueue_head(&nbd->recv_wq);
1080 nbd_reset(nbd);
1081 add_disk(disk);
1082 return index;
1083
1084out_free_tags:
1085 blk_mq_free_tag_set(&nbd->tag_set);
1086out_free_idr:
1087 idr_remove(&nbd_index_idr, index);
1088out_free_disk:
1089 put_disk(disk);
1090out_free_nbd:
1091 kfree(nbd);
1092out:
1093 return err;
1094}
1095
999/* 1096/*
1000 * And here should be modules and kernel interface 1097 * And here should be modules and kernel interface
1001 * (Just smiley confuses emacs :-) 1098 * (Just smiley confuses emacs :-)
@@ -1003,9 +1100,7 @@ static struct blk_mq_ops nbd_mq_ops = {
1003 1100
1004static int __init nbd_init(void) 1101static int __init nbd_init(void)
1005{ 1102{
1006 int err = -ENOMEM;
1007 int i; 1103 int i;
1008 int part_shift;
1009 1104
1010 BUILD_BUG_ON(sizeof(struct nbd_request) != 28); 1105 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
1011 1106
@@ -1034,111 +1129,38 @@ static int __init nbd_init(void)
1034 1129
1035 if (nbds_max > 1UL << (MINORBITS - part_shift)) 1130 if (nbds_max > 1UL << (MINORBITS - part_shift))
1036 return -EINVAL; 1131 return -EINVAL;
1037 1132 recv_workqueue = alloc_workqueue("knbd-recv",
1038 nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); 1133 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
1039 if (!nbd_dev) 1134 if (!recv_workqueue)
1040 return -ENOMEM; 1135 return -ENOMEM;
1041 1136
1042 for (i = 0; i < nbds_max; i++) { 1137 if (register_blkdev(NBD_MAJOR, "nbd"))
1043 struct request_queue *q; 1138 return -EIO;
1044 struct gendisk *disk = alloc_disk(1 << part_shift);
1045 if (!disk)
1046 goto out;
1047 nbd_dev[i].disk = disk;
1048
1049 nbd_dev[i].tag_set.ops = &nbd_mq_ops;
1050 nbd_dev[i].tag_set.nr_hw_queues = 1;
1051 nbd_dev[i].tag_set.queue_depth = 128;
1052 nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
1053 nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
1054 nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1055 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
1056 nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
1057
1058 err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
1059 if (err) {
1060 put_disk(disk);
1061 goto out;
1062 }
1063
1064 /*
1065 * The new linux 2.5 block layer implementation requires
1066 * every gendisk to have its very own request_queue struct.
1067 * These structs are big so we dynamically allocate them.
1068 */
1069 q = blk_mq_init_queue(&nbd_dev[i].tag_set);
1070 if (IS_ERR(q)) {
1071 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1072 put_disk(disk);
1073 goto out;
1074 }
1075 disk->queue = q;
1076
1077 /*
1078 * Tell the block layer that we are not a rotational device
1079 */
1080 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
1081 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1082 disk->queue->limits.discard_granularity = 512;
1083 blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
1084 disk->queue->limits.discard_zeroes_data = 0;
1085 blk_queue_max_hw_sectors(disk->queue, 65536);
1086 disk->queue->limits.max_sectors = 256;
1087 }
1088
1089 if (register_blkdev(NBD_MAJOR, "nbd")) {
1090 err = -EIO;
1091 goto out;
1092 }
1093
1094 printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);
1095 1139
1096 nbd_dbg_init(); 1140 nbd_dbg_init();
1097 1141
1098 for (i = 0; i < nbds_max; i++) { 1142 mutex_lock(&nbd_index_mutex);
1099 struct gendisk *disk = nbd_dev[i].disk; 1143 for (i = 0; i < nbds_max; i++)
1100 nbd_dev[i].magic = NBD_MAGIC; 1144 nbd_dev_add(i);
1101 mutex_init(&nbd_dev[i].config_lock); 1145 mutex_unlock(&nbd_index_mutex);
1102 disk->major = NBD_MAJOR; 1146 return 0;
1103 disk->first_minor = i << part_shift; 1147}
1104 disk->fops = &nbd_fops;
1105 disk->private_data = &nbd_dev[i];
1106 sprintf(disk->disk_name, "nbd%d", i);
1107 init_waitqueue_head(&nbd_dev[i].recv_wq);
1108 nbd_reset(&nbd_dev[i]);
1109 add_disk(disk);
1110 }
1111 1148
1149static int nbd_exit_cb(int id, void *ptr, void *data)
1150{
1151 struct nbd_device *nbd = ptr;
1152 nbd_dev_remove(nbd);
1112 return 0; 1153 return 0;
1113out:
1114 while (i--) {
1115 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1116 blk_cleanup_queue(nbd_dev[i].disk->queue);
1117 put_disk(nbd_dev[i].disk);
1118 }
1119 kfree(nbd_dev);
1120 return err;
1121} 1154}
1122 1155
1123static void __exit nbd_cleanup(void) 1156static void __exit nbd_cleanup(void)
1124{ 1157{
1125 int i;
1126
1127 nbd_dbg_close(); 1158 nbd_dbg_close();
1128 1159
1129 for (i = 0; i < nbds_max; i++) { 1160 idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
1130 struct gendisk *disk = nbd_dev[i].disk; 1161 idr_destroy(&nbd_index_idr);
1131 nbd_dev[i].magic = 0; 1162 destroy_workqueue(recv_workqueue);
1132 if (disk) {
1133 del_gendisk(disk);
1134 blk_cleanup_queue(disk->queue);
1135 blk_mq_free_tag_set(&nbd_dev[i].tag_set);
1136 put_disk(disk);
1137 }
1138 }
1139 unregister_blkdev(NBD_MAJOR, "nbd"); 1163 unregister_blkdev(NBD_MAJOR, "nbd");
1140 kfree(nbd_dev);
1141 printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
1142} 1164}
1143 1165
1144module_init(nbd_init); 1166module_init(nbd_init);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index c0e14e54909b..6f2e565bccc5 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -420,7 +420,8 @@ static void null_lnvm_end_io(struct request *rq, int error)
420{ 420{
421 struct nvm_rq *rqd = rq->end_io_data; 421 struct nvm_rq *rqd = rq->end_io_data;
422 422
423 nvm_end_io(rqd, error); 423 rqd->error = error;
424 nvm_end_io(rqd);
424 425
425 blk_put_request(rq); 426 blk_put_request(rq);
426} 427}
@@ -431,11 +432,11 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
431 struct request *rq; 432 struct request *rq;
432 struct bio *bio = rqd->bio; 433 struct bio *bio = rqd->bio;
433 434
434 rq = blk_mq_alloc_request(q, bio_data_dir(bio), 0); 435 rq = blk_mq_alloc_request(q,
436 op_is_write(bio_op(bio)) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
435 if (IS_ERR(rq)) 437 if (IS_ERR(rq))
436 return -ENOMEM; 438 return -ENOMEM;
437 439
438 rq->cmd_type = REQ_TYPE_DRV_PRIV;
439 rq->__sector = bio->bi_iter.bi_sector; 440 rq->__sector = bio->bi_iter.bi_sector;
440 rq->ioprio = bio_prio(bio); 441 rq->ioprio = bio_prio(bio);
441 442
@@ -460,7 +461,6 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
460 461
461 id->ver_id = 0x1; 462 id->ver_id = 0x1;
462 id->vmnt = 0; 463 id->vmnt = 0;
463 id->cgrps = 1;
464 id->cap = 0x2; 464 id->cap = 0x2;
465 id->dom = 0x1; 465 id->dom = 0x1;
466 466
@@ -479,7 +479,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
479 479
480 sector_div(size, bs); /* convert size to pages */ 480 sector_div(size, bs); /* convert size to pages */
481 size >>= 8; /* concert size to pgs pr blk */ 481 size >>= 8; /* concert size to pgs pr blk */
482 grp = &id->groups[0]; 482 grp = &id->grp;
483 grp->mtype = 0; 483 grp->mtype = 0;
484 grp->fmtype = 0; 484 grp->fmtype = 0;
485 grp->num_ch = 1; 485 grp->num_ch = 1;
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 92900f5f0b47..8127b8201a01 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -308,12 +308,6 @@ static void osdblk_rq_fn(struct request_queue *q)
308 if (!rq) 308 if (!rq)
309 break; 309 break;
310 310
311 /* filter out block requests we don't understand */
312 if (rq->cmd_type != REQ_TYPE_FS) {
313 blk_end_request_all(rq, 0);
314 continue;
315 }
316
317 /* deduce our operation (read, write, flush) */ 311 /* deduce our operation (read, write, flush) */
318 /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] 312 /* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
319 * into a clearly defined set of RPC commands: 313 * into a clearly defined set of RPC commands:
diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig
index efefb5ac3004..3a15247942e4 100644
--- a/drivers/block/paride/Kconfig
+++ b/drivers/block/paride/Kconfig
@@ -25,6 +25,7 @@ config PARIDE_PD
25config PARIDE_PCD 25config PARIDE_PCD
26 tristate "Parallel port ATAPI CD-ROMs" 26 tristate "Parallel port ATAPI CD-ROMs"
27 depends on PARIDE 27 depends on PARIDE
28 select BLK_SCSI_REQUEST # only for the generic cdrom code
28 ---help--- 29 ---help---
29 This option enables the high-level driver for ATAPI CD-ROM devices 30 This option enables the high-level driver for ATAPI CD-ROM devices
30 connected through a parallel port. If you chose to build PARIDE 31 connected through a parallel port. If you chose to build PARIDE
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 5fd2d0e25567..10aed84244f5 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -273,7 +273,7 @@ static const struct block_device_operations pcd_bdops = {
273 .check_events = pcd_block_check_events, 273 .check_events = pcd_block_check_events,
274}; 274};
275 275
276static struct cdrom_device_ops pcd_dops = { 276static const struct cdrom_device_ops pcd_dops = {
277 .open = pcd_open, 277 .open = pcd_open,
278 .release = pcd_release, 278 .release = pcd_release,
279 .drive_status = pcd_drive_status, 279 .drive_status = pcd_drive_status,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index c3ed2fc72daa..644ba0888bd4 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -439,18 +439,16 @@ static int pd_retries = 0; /* i/o error retry count */
439static int pd_block; /* address of next requested block */ 439static int pd_block; /* address of next requested block */
440static int pd_count; /* number of blocks still to do */ 440static int pd_count; /* number of blocks still to do */
441static int pd_run; /* sectors in current cluster */ 441static int pd_run; /* sectors in current cluster */
442static int pd_cmd; /* current command READ/WRITE */
443static char *pd_buf; /* buffer for request in progress */ 442static char *pd_buf; /* buffer for request in progress */
444 443
445static enum action do_pd_io_start(void) 444static enum action do_pd_io_start(void)
446{ 445{
447 if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) { 446 switch (req_op(pd_req)) {
447 case REQ_OP_DRV_IN:
448 phase = pd_special; 448 phase = pd_special;
449 return pd_special(); 449 return pd_special();
450 } 450 case REQ_OP_READ:
451 451 case REQ_OP_WRITE:
452 pd_cmd = rq_data_dir(pd_req);
453 if (pd_cmd == READ || pd_cmd == WRITE) {
454 pd_block = blk_rq_pos(pd_req); 452 pd_block = blk_rq_pos(pd_req);
455 pd_count = blk_rq_cur_sectors(pd_req); 453 pd_count = blk_rq_cur_sectors(pd_req);
456 if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) 454 if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
@@ -458,7 +456,7 @@ static enum action do_pd_io_start(void)
458 pd_run = blk_rq_sectors(pd_req); 456 pd_run = blk_rq_sectors(pd_req);
459 pd_buf = bio_data(pd_req->bio); 457 pd_buf = bio_data(pd_req->bio);
460 pd_retries = 0; 458 pd_retries = 0;
461 if (pd_cmd == READ) 459 if (req_op(pd_req) == REQ_OP_READ)
462 return do_pd_read_start(); 460 return do_pd_read_start();
463 else 461 else
464 return do_pd_write_start(); 462 return do_pd_write_start();
@@ -723,11 +721,10 @@ static int pd_special_command(struct pd_unit *disk,
723 struct request *rq; 721 struct request *rq;
724 int err = 0; 722 int err = 0;
725 723
726 rq = blk_get_request(disk->gd->queue, READ, __GFP_RECLAIM); 724 rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
727 if (IS_ERR(rq)) 725 if (IS_ERR(rq))
728 return PTR_ERR(rq); 726 return PTR_ERR(rq);
729 727
730 rq->cmd_type = REQ_TYPE_DRV_PRIV;
731 rq->special = func; 728 rq->special = func;
732 729
733 err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); 730 err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 1b94c1ca5c5f..66d846ba85a9 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,10 +704,10 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
704 int ret = 0; 704 int ret = 0;
705 705
706 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? 706 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
707 WRITE : READ, __GFP_RECLAIM); 707 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
708 if (IS_ERR(rq)) 708 if (IS_ERR(rq))
709 return PTR_ERR(rq); 709 return PTR_ERR(rq);
710 blk_rq_set_block_pc(rq); 710 scsi_req_init(rq);
711 711
712 if (cgc->buflen) { 712 if (cgc->buflen) {
713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, 713 ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -716,8 +716,8 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
716 goto out; 716 goto out;
717 } 717 }
718 718
719 rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]); 719 scsi_req(rq)->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
720 memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); 720 memcpy(scsi_req(rq)->cmd, cgc->cmd, CDROM_PACKET_SIZE);
721 721
722 rq->timeout = 60*HZ; 722 rq->timeout = 60*HZ;
723 if (cgc->quiet) 723 if (cgc->quiet)
@@ -1243,7 +1243,7 @@ try_next_bio:
1243 && pd->bio_queue_size <= pd->write_congestion_off); 1243 && pd->bio_queue_size <= pd->write_congestion_off);
1244 spin_unlock(&pd->lock); 1244 spin_unlock(&pd->lock);
1245 if (wakeup) { 1245 if (wakeup) {
1246 clear_bdi_congested(&pd->disk->queue->backing_dev_info, 1246 clear_bdi_congested(pd->disk->queue->backing_dev_info,
1247 BLK_RW_ASYNC); 1247 BLK_RW_ASYNC);
1248 } 1248 }
1249 1249
@@ -2370,7 +2370,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
2370 spin_lock(&pd->lock); 2370 spin_lock(&pd->lock);
2371 if (pd->write_congestion_on > 0 2371 if (pd->write_congestion_on > 0
2372 && pd->bio_queue_size >= pd->write_congestion_on) { 2372 && pd->bio_queue_size >= pd->write_congestion_on) {
2373 set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC); 2373 set_bdi_congested(q->backing_dev_info, BLK_RW_ASYNC);
2374 do { 2374 do {
2375 spin_unlock(&pd->lock); 2375 spin_unlock(&pd->lock);
2376 congestion_wait(BLK_RW_ASYNC, HZ); 2376 congestion_wait(BLK_RW_ASYNC, HZ);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 76f33c84ce3d..a809e3e9feb8 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -196,16 +196,19 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
196 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); 196 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
197 197
198 while ((req = blk_fetch_request(q))) { 198 while ((req = blk_fetch_request(q))) {
199 if (req_op(req) == REQ_OP_FLUSH) { 199 switch (req_op(req)) {
200 case REQ_OP_FLUSH:
200 if (ps3disk_submit_flush_request(dev, req)) 201 if (ps3disk_submit_flush_request(dev, req))
201 break; 202 return;
202 } else if (req->cmd_type == REQ_TYPE_FS) { 203 break;
204 case REQ_OP_READ:
205 case REQ_OP_WRITE:
203 if (ps3disk_submit_request_sg(dev, req)) 206 if (ps3disk_submit_request_sg(dev, req))
204 break; 207 return;
205 } else { 208 break;
209 default:
206 blk_dump_rq_flags(req, DEVICE_NAME " bad request"); 210 blk_dump_rq_flags(req, DEVICE_NAME " bad request");
207 __blk_end_request_all(req, -EIO); 211 __blk_end_request_all(req, -EIO);
208 continue;
209 } 212 }
210 } 213 }
211} 214}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 436baa66f701..362cecc77130 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4099,19 +4099,21 @@ static void rbd_queue_workfn(struct work_struct *work)
4099 bool must_be_locked; 4099 bool must_be_locked;
4100 int result; 4100 int result;
4101 4101
4102 if (rq->cmd_type != REQ_TYPE_FS) { 4102 switch (req_op(rq)) {
4103 dout("%s: non-fs request type %d\n", __func__, 4103 case REQ_OP_DISCARD:
4104 (int) rq->cmd_type);
4105 result = -EIO;
4106 goto err;
4107 }
4108
4109 if (req_op(rq) == REQ_OP_DISCARD)
4110 op_type = OBJ_OP_DISCARD; 4104 op_type = OBJ_OP_DISCARD;
4111 else if (req_op(rq) == REQ_OP_WRITE) 4105 break;
4106 case REQ_OP_WRITE:
4112 op_type = OBJ_OP_WRITE; 4107 op_type = OBJ_OP_WRITE;
4113 else 4108 break;
4109 case REQ_OP_READ:
4114 op_type = OBJ_OP_READ; 4110 op_type = OBJ_OP_READ;
4111 break;
4112 default:
4113 dout("%s: non-fs request type %d\n", __func__, req_op(rq));
4114 result = -EIO;
4115 goto err;
4116 }
4115 4117
4116 /* Ignore/skip any zero-length requests */ 4118 /* Ignore/skip any zero-length requests */
4117 4119
@@ -4524,7 +4526,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
4524 q->limits.discard_zeroes_data = 1; 4526 q->limits.discard_zeroes_data = 1;
4525 4527
4526 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) 4528 if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
4527 q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; 4529 q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES;
4528 4530
4529 disk->queue = q; 4531 disk->queue = q;
4530 4532
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index abf805e332e2..27833e4dae2a 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -1204,10 +1204,11 @@ static void skd_complete_special(struct skd_device *skdev,
1204static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode, 1204static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode,
1205 uint cmd_in, ulong arg) 1205 uint cmd_in, ulong arg)
1206{ 1206{
1207 int rc = 0; 1207 static const int sg_version_num = 30527;
1208 int rc = 0, timeout;
1208 struct gendisk *disk = bdev->bd_disk; 1209 struct gendisk *disk = bdev->bd_disk;
1209 struct skd_device *skdev = disk->private_data; 1210 struct skd_device *skdev = disk->private_data;
1210 void __user *p = (void *)arg; 1211 int __user *p = (int __user *)arg;
1211 1212
1212 pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n", 1213 pr_debug("%s:%s:%d %s: CMD[%s] ioctl mode 0x%x, cmd 0x%x arg %0lx\n",
1213 skdev->name, __func__, __LINE__, 1214 skdev->name, __func__, __LINE__,
@@ -1218,12 +1219,18 @@ static int skd_bdev_ioctl(struct block_device *bdev, fmode_t mode,
1218 1219
1219 switch (cmd_in) { 1220 switch (cmd_in) {
1220 case SG_SET_TIMEOUT: 1221 case SG_SET_TIMEOUT:
1222 rc = get_user(timeout, p);
1223 if (!rc)
1224 disk->queue->sg_timeout = clock_t_to_jiffies(timeout);
1225 break;
1221 case SG_GET_TIMEOUT: 1226 case SG_GET_TIMEOUT:
1227 rc = jiffies_to_clock_t(disk->queue->sg_timeout);
1228 break;
1222 case SG_GET_VERSION_NUM: 1229 case SG_GET_VERSION_NUM:
1223 rc = scsi_cmd_ioctl(disk->queue, disk, mode, cmd_in, p); 1230 rc = put_user(sg_version_num, p);
1224 break; 1231 break;
1225 case SG_IO: 1232 case SG_IO:
1226 rc = skd_ioctl_sg_io(skdev, mode, p); 1233 rc = skd_ioctl_sg_io(skdev, mode, (void __user *)arg);
1227 break; 1234 break;
1228 1235
1229 default: 1236 default:
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 0e93ad7b8511..c8e072caf56f 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -567,7 +567,7 @@ static struct carm_request *carm_get_special(struct carm_host *host)
567 if (!crq) 567 if (!crq)
568 return NULL; 568 return NULL;
569 569
570 rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL); 570 rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL);
571 if (IS_ERR(rq)) { 571 if (IS_ERR(rq)) {
572 spin_lock_irqsave(&host->lock, flags); 572 spin_lock_irqsave(&host->lock, flags);
573 carm_put_request(host, crq); 573 carm_put_request(host, crq);
@@ -620,7 +620,6 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
620 spin_unlock_irq(&host->lock); 620 spin_unlock_irq(&host->lock);
621 621
622 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); 622 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
623 crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
624 crq->rq->special = crq; 623 crq->rq->special = crq;
625 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); 624 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
626 625
@@ -661,7 +660,6 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
661 crq->msg_bucket = (u32) rc; 660 crq->msg_bucket = (u32) rc;
662 661
663 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); 662 DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
664 crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
665 crq->rq->special = crq; 663 crq->rq->special = crq;
666 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); 664 blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
667 665
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 264c5eac12b0..024b473524c0 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -52,11 +52,13 @@ struct virtio_blk {
52}; 52};
53 53
54struct virtblk_req { 54struct virtblk_req {
55 struct request *req; 55#ifdef CONFIG_VIRTIO_BLK_SCSI
56 struct virtio_blk_outhdr out_hdr; 56 struct scsi_request sreq; /* for SCSI passthrough, must be first */
57 u8 sense[SCSI_SENSE_BUFFERSIZE];
57 struct virtio_scsi_inhdr in_hdr; 58 struct virtio_scsi_inhdr in_hdr;
59#endif
60 struct virtio_blk_outhdr out_hdr;
58 u8 status; 61 u8 status;
59 u8 sense[SCSI_SENSE_BUFFERSIZE];
60 struct scatterlist sg[]; 62 struct scatterlist sg[];
61}; 63};
62 64
@@ -72,28 +74,88 @@ static inline int virtblk_result(struct virtblk_req *vbr)
72 } 74 }
73} 75}
74 76
75static int __virtblk_add_req(struct virtqueue *vq, 77/*
76 struct virtblk_req *vbr, 78 * If this is a packet command we need a couple of additional headers. Behind
77 struct scatterlist *data_sg, 79 * the normal outhdr we put a segment with the scsi command block, and before
78 bool have_data) 80 * the normal inhdr we put the sense data and the inhdr with additional status
81 * information.
82 */
83#ifdef CONFIG_VIRTIO_BLK_SCSI
84static int virtblk_add_req_scsi(struct virtqueue *vq, struct virtblk_req *vbr,
85 struct scatterlist *data_sg, bool have_data)
79{ 86{
80 struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6]; 87 struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
81 unsigned int num_out = 0, num_in = 0; 88 unsigned int num_out = 0, num_in = 0;
82 __virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT);
83 89
84 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 90 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
85 sgs[num_out++] = &hdr; 91 sgs[num_out++] = &hdr;
92 sg_init_one(&cmd, vbr->sreq.cmd, vbr->sreq.cmd_len);
93 sgs[num_out++] = &cmd;
94
95 if (have_data) {
96 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
97 sgs[num_out++] = data_sg;
98 else
99 sgs[num_out + num_in++] = data_sg;
100 }
101
102 sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
103 sgs[num_out + num_in++] = &sense;
104 sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
105 sgs[num_out + num_in++] = &inhdr;
106 sg_init_one(&status, &vbr->status, sizeof(vbr->status));
107 sgs[num_out + num_in++] = &status;
108
109 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
110}
111
112static inline void virtblk_scsi_reques_done(struct request *req)
113{
114 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
115 struct virtio_blk *vblk = req->q->queuedata;
116 struct scsi_request *sreq = &vbr->sreq;
117
118 sreq->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
119 sreq->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
120 req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
121}
122
123static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
124 unsigned int cmd, unsigned long data)
125{
126 struct gendisk *disk = bdev->bd_disk;
127 struct virtio_blk *vblk = disk->private_data;
86 128
87 /* 129 /*
88 * If this is a packet command we need a couple of additional headers. 130 * Only allow the generic SCSI ioctls if the host can support it.
89 * Behind the normal outhdr we put a segment with the scsi command
90 * block, and before the normal inhdr we put the sense data and the
91 * inhdr with additional status information.
92 */ 131 */
93 if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) { 132 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
94 sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len); 133 return -ENOTTY;
95 sgs[num_out++] = &cmd; 134
96 } 135 return scsi_cmd_blk_ioctl(bdev, mode, cmd,
136 (void __user *)data);
137}
138#else
139static inline int virtblk_add_req_scsi(struct virtqueue *vq,
140 struct virtblk_req *vbr, struct scatterlist *data_sg,
141 bool have_data)
142{
143 return -EIO;
144}
145static inline void virtblk_scsi_reques_done(struct request *req)
146{
147}
148#define virtblk_ioctl NULL
149#endif /* CONFIG_VIRTIO_BLK_SCSI */
150
151static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
152 struct scatterlist *data_sg, bool have_data)
153{
154 struct scatterlist hdr, status, *sgs[3];
155 unsigned int num_out = 0, num_in = 0;
156
157 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
158 sgs[num_out++] = &hdr;
97 159
98 if (have_data) { 160 if (have_data) {
99 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 161 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
@@ -102,14 +164,6 @@ static int __virtblk_add_req(struct virtqueue *vq,
102 sgs[num_out + num_in++] = data_sg; 164 sgs[num_out + num_in++] = data_sg;
103 } 165 }
104 166
105 if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
106 memcpy(vbr->sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
107 sg_init_one(&sense, vbr->sense, SCSI_SENSE_BUFFERSIZE);
108 sgs[num_out + num_in++] = &sense;
109 sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
110 sgs[num_out + num_in++] = &inhdr;
111 }
112
113 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 167 sg_init_one(&status, &vbr->status, sizeof(vbr->status));
114 sgs[num_out + num_in++] = &status; 168 sgs[num_out + num_in++] = &status;
115 169
@@ -119,15 +173,16 @@ static int __virtblk_add_req(struct virtqueue *vq,
119static inline void virtblk_request_done(struct request *req) 173static inline void virtblk_request_done(struct request *req)
120{ 174{
121 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 175 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
122 struct virtio_blk *vblk = req->q->queuedata;
123 int error = virtblk_result(vbr); 176 int error = virtblk_result(vbr);
124 177
125 if (req->cmd_type == REQ_TYPE_BLOCK_PC) { 178 switch (req_op(req)) {
126 req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); 179 case REQ_OP_SCSI_IN:
127 req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); 180 case REQ_OP_SCSI_OUT:
128 req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); 181 virtblk_scsi_reques_done(req);
129 } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) { 182 break;
183 case REQ_OP_DRV_IN:
130 req->errors = (error != 0); 184 req->errors = (error != 0);
185 break;
131 } 186 }
132 187
133 blk_mq_end_request(req, error); 188 blk_mq_end_request(req, error);
@@ -146,7 +201,9 @@ static void virtblk_done(struct virtqueue *vq)
146 do { 201 do {
147 virtqueue_disable_cb(vq); 202 virtqueue_disable_cb(vq);
148 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 203 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
149 blk_mq_complete_request(vbr->req, vbr->req->errors); 204 struct request *req = blk_mq_rq_from_pdu(vbr);
205
206 blk_mq_complete_request(req, req->errors);
150 req_done = true; 207 req_done = true;
151 } 208 }
152 if (unlikely(virtqueue_is_broken(vq))) 209 if (unlikely(virtqueue_is_broken(vq)))
@@ -170,49 +227,50 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
170 int qid = hctx->queue_num; 227 int qid = hctx->queue_num;
171 int err; 228 int err;
172 bool notify = false; 229 bool notify = false;
230 u32 type;
173 231
174 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 232 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
175 233
176 vbr->req = req; 234 switch (req_op(req)) {
177 if (req_op(req) == REQ_OP_FLUSH) { 235 case REQ_OP_READ:
178 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH); 236 case REQ_OP_WRITE:
179 vbr->out_hdr.sector = 0; 237 type = 0;
180 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); 238 break;
181 } else { 239 case REQ_OP_FLUSH:
182 switch (req->cmd_type) { 240 type = VIRTIO_BLK_T_FLUSH;
183 case REQ_TYPE_FS: 241 break;
184 vbr->out_hdr.type = 0; 242 case REQ_OP_SCSI_IN:
185 vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req)); 243 case REQ_OP_SCSI_OUT:
186 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); 244 type = VIRTIO_BLK_T_SCSI_CMD;
187 break; 245 break;
188 case REQ_TYPE_BLOCK_PC: 246 case REQ_OP_DRV_IN:
189 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD); 247 type = VIRTIO_BLK_T_GET_ID;
190 vbr->out_hdr.sector = 0; 248 break;
191 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); 249 default:
192 break; 250 WARN_ON_ONCE(1);
193 case REQ_TYPE_DRV_PRIV: 251 return BLK_MQ_RQ_QUEUE_ERROR;
194 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
195 vbr->out_hdr.sector = 0;
196 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
197 break;
198 default:
199 /* We don't put anything else in the queue. */
200 BUG();
201 }
202 } 252 }
203 253
254 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
255 vbr->out_hdr.sector = type ?
256 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
257 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
258
204 blk_mq_start_request(req); 259 blk_mq_start_request(req);
205 260
206 num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); 261 num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
207 if (num) { 262 if (num) {
208 if (rq_data_dir(vbr->req) == WRITE) 263 if (rq_data_dir(req) == WRITE)
209 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 264 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
210 else 265 else
211 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 266 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
212 } 267 }
213 268
214 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 269 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
215 err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 270 if (req_op(req) == REQ_OP_SCSI_IN || req_op(req) == REQ_OP_SCSI_OUT)
271 err = virtblk_add_req_scsi(vblk->vqs[qid].vq, vbr, vbr->sg, num);
272 else
273 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
216 if (err) { 274 if (err) {
217 virtqueue_kick(vblk->vqs[qid].vq); 275 virtqueue_kick(vblk->vqs[qid].vq);
218 blk_mq_stop_hw_queue(hctx); 276 blk_mq_stop_hw_queue(hctx);
@@ -242,10 +300,9 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
242 struct request *req; 300 struct request *req;
243 int err; 301 int err;
244 302
245 req = blk_get_request(q, READ, GFP_KERNEL); 303 req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL);
246 if (IS_ERR(req)) 304 if (IS_ERR(req))
247 return PTR_ERR(req); 305 return PTR_ERR(req);
248 req->cmd_type = REQ_TYPE_DRV_PRIV;
249 306
250 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 307 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
251 if (err) 308 if (err)
@@ -257,22 +314,6 @@ out:
257 return err; 314 return err;
258} 315}
259 316
260static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
261 unsigned int cmd, unsigned long data)
262{
263 struct gendisk *disk = bdev->bd_disk;
264 struct virtio_blk *vblk = disk->private_data;
265
266 /*
267 * Only allow the generic SCSI ioctls if the host can support it.
268 */
269 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
270 return -ENOTTY;
271
272 return scsi_cmd_blk_ioctl(bdev, mode, cmd,
273 (void __user *)data);
274}
275
276/* We provide getgeo only to please some old bootloader/partitioning tools */ 317/* We provide getgeo only to please some old bootloader/partitioning tools */
277static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 318static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
278{ 319{
@@ -538,6 +579,9 @@ static int virtblk_init_request(void *data, struct request *rq,
538 struct virtio_blk *vblk = data; 579 struct virtio_blk *vblk = data;
539 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 580 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
540 581
582#ifdef CONFIG_VIRTIO_BLK_SCSI
583 vbr->sreq.sense = vbr->sense;
584#endif
541 sg_init_table(vbr->sg, vblk->sg_elems); 585 sg_init_table(vbr->sg, vblk->sg_elems);
542 return 0; 586 return 0;
543} 587}
@@ -821,7 +865,10 @@ static const struct virtio_device_id id_table[] = {
821 865
822static unsigned int features_legacy[] = { 866static unsigned int features_legacy[] = {
823 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 867 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
824 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, 868 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
869#ifdef CONFIG_VIRTIO_BLK_SCSI
870 VIRTIO_BLK_F_SCSI,
871#endif
825 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 872 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
826 VIRTIO_BLK_F_MQ, 873 VIRTIO_BLK_F_MQ,
827} 874}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 265f1a7072e9..5067a0a952cb 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -865,7 +865,7 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo)
865static inline bool blkif_request_flush_invalid(struct request *req, 865static inline bool blkif_request_flush_invalid(struct request *req,
866 struct blkfront_info *info) 866 struct blkfront_info *info)
867{ 867{
868 return ((req->cmd_type != REQ_TYPE_FS) || 868 return (blk_rq_is_passthrough(req) ||
869 ((req_op(req) == REQ_OP_FLUSH) && 869 ((req_op(req) == REQ_OP_FLUSH) &&
870 !info->feature_flush) || 870 !info->feature_flush) ||
871 ((req->cmd_flags & REQ_FUA) && 871 ((req->cmd_flags & REQ_FUA) &&
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index c4328d9d9981..757dce2147e0 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -468,7 +468,7 @@ static struct request *ace_get_next_request(struct request_queue *q)
468 struct request *req; 468 struct request *req;
469 469
470 while ((req = blk_peek_request(q)) != NULL) { 470 while ((req = blk_peek_request(q)) != NULL) {
471 if (req->cmd_type == REQ_TYPE_FS) 471 if (!blk_rq_is_passthrough(req))
472 break; 472 break;
473 blk_start_request(req); 473 blk_start_request(req);
474 __blk_end_request_all(req, -EIO); 474 __blk_end_request_all(req, -EIO);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e5ab7d9e8c45..3cd7856156b4 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -117,7 +117,7 @@ static void zram_revalidate_disk(struct zram *zram)
117{ 117{
118 revalidate_disk(zram->disk); 118 revalidate_disk(zram->disk);
119 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ 119 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
120 zram->disk->queue->backing_dev_info.capabilities |= 120 zram->disk->queue->backing_dev_info->capabilities |=
121 BDI_CAP_STABLE_WRITES; 121 BDI_CAP_STABLE_WRITES;
122} 122}
123 123
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 59cca72647a6..87739649eac2 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -281,8 +281,8 @@
281#include <linux/fcntl.h> 281#include <linux/fcntl.h>
282#include <linux/blkdev.h> 282#include <linux/blkdev.h>
283#include <linux/times.h> 283#include <linux/times.h>
284
285#include <linux/uaccess.h> 284#include <linux/uaccess.h>
285#include <scsi/scsi_request.h>
286 286
287/* used to tell the module to turn on full debugging messages */ 287/* used to tell the module to turn on full debugging messages */
288static bool debug; 288static bool debug;
@@ -342,8 +342,8 @@ static void cdrom_sysctl_register(void);
342 342
343static LIST_HEAD(cdrom_list); 343static LIST_HEAD(cdrom_list);
344 344
345static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, 345int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
346 struct packet_command *cgc) 346 struct packet_command *cgc)
347{ 347{
348 if (cgc->sense) { 348 if (cgc->sense) {
349 cgc->sense->sense_key = 0x05; 349 cgc->sense->sense_key = 0x05;
@@ -354,6 +354,7 @@ static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
354 cgc->stat = -EIO; 354 cgc->stat = -EIO;
355 return -EIO; 355 return -EIO;
356} 356}
357EXPORT_SYMBOL(cdrom_dummy_generic_packet);
357 358
358static int cdrom_flush_cache(struct cdrom_device_info *cdi) 359static int cdrom_flush_cache(struct cdrom_device_info *cdi)
359{ 360{
@@ -371,7 +372,7 @@ static int cdrom_flush_cache(struct cdrom_device_info *cdi)
371static int cdrom_get_disc_info(struct cdrom_device_info *cdi, 372static int cdrom_get_disc_info(struct cdrom_device_info *cdi,
372 disc_information *di) 373 disc_information *di)
373{ 374{
374 struct cdrom_device_ops *cdo = cdi->ops; 375 const struct cdrom_device_ops *cdo = cdi->ops;
375 struct packet_command cgc; 376 struct packet_command cgc;
376 int ret, buflen; 377 int ret, buflen;
377 378
@@ -586,7 +587,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
586int register_cdrom(struct cdrom_device_info *cdi) 587int register_cdrom(struct cdrom_device_info *cdi)
587{ 588{
588 static char banner_printed; 589 static char banner_printed;
589 struct cdrom_device_ops *cdo = cdi->ops; 590 const struct cdrom_device_ops *cdo = cdi->ops;
590 int *change_capability = (int *)&cdo->capability; /* hack */ 591 int *change_capability = (int *)&cdo->capability; /* hack */
591 592
592 cd_dbg(CD_OPEN, "entering register_cdrom\n"); 593 cd_dbg(CD_OPEN, "entering register_cdrom\n");
@@ -610,7 +611,6 @@ int register_cdrom(struct cdrom_device_info *cdi)
610 ENSURE(reset, CDC_RESET); 611 ENSURE(reset, CDC_RESET);
611 ENSURE(generic_packet, CDC_GENERIC_PACKET); 612 ENSURE(generic_packet, CDC_GENERIC_PACKET);
612 cdi->mc_flags = 0; 613 cdi->mc_flags = 0;
613 cdo->n_minors = 0;
614 cdi->options = CDO_USE_FFLAGS; 614 cdi->options = CDO_USE_FFLAGS;
615 615
616 if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY)) 616 if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY))
@@ -630,8 +630,7 @@ int register_cdrom(struct cdrom_device_info *cdi)
630 else 630 else
631 cdi->cdda_method = CDDA_OLD; 631 cdi->cdda_method = CDDA_OLD;
632 632
633 if (!cdo->generic_packet) 633 WARN_ON(!cdo->generic_packet);
634 cdo->generic_packet = cdrom_dummy_generic_packet;
635 634
636 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); 635 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name);
637 mutex_lock(&cdrom_mutex); 636 mutex_lock(&cdrom_mutex);
@@ -652,7 +651,6 @@ void unregister_cdrom(struct cdrom_device_info *cdi)
652 if (cdi->exit) 651 if (cdi->exit)
653 cdi->exit(cdi); 652 cdi->exit(cdi);
654 653
655 cdi->ops->n_minors--;
656 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); 654 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
657} 655}
658 656
@@ -1036,7 +1034,7 @@ static
1036int open_for_data(struct cdrom_device_info *cdi) 1034int open_for_data(struct cdrom_device_info *cdi)
1037{ 1035{
1038 int ret; 1036 int ret;
1039 struct cdrom_device_ops *cdo = cdi->ops; 1037 const struct cdrom_device_ops *cdo = cdi->ops;
1040 tracktype tracks; 1038 tracktype tracks;
1041 cd_dbg(CD_OPEN, "entering open_for_data\n"); 1039 cd_dbg(CD_OPEN, "entering open_for_data\n");
1042 /* Check if the driver can report drive status. If it can, we 1040 /* Check if the driver can report drive status. If it can, we
@@ -1198,8 +1196,8 @@ err:
1198/* This code is similar to that in open_for_data. The routine is called 1196/* This code is similar to that in open_for_data. The routine is called
1199 whenever an audio play operation is requested. 1197 whenever an audio play operation is requested.
1200*/ 1198*/
1201static int check_for_audio_disc(struct cdrom_device_info * cdi, 1199static int check_for_audio_disc(struct cdrom_device_info *cdi,
1202 struct cdrom_device_ops * cdo) 1200 const struct cdrom_device_ops *cdo)
1203{ 1201{
1204 int ret; 1202 int ret;
1205 tracktype tracks; 1203 tracktype tracks;
@@ -1254,7 +1252,7 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi,
1254 1252
1255void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) 1253void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
1256{ 1254{
1257 struct cdrom_device_ops *cdo = cdi->ops; 1255 const struct cdrom_device_ops *cdo = cdi->ops;
1258 int opened_for_data; 1256 int opened_for_data;
1259 1257
1260 cd_dbg(CD_CLOSE, "entering cdrom_release\n"); 1258 cd_dbg(CD_CLOSE, "entering cdrom_release\n");
@@ -1294,7 +1292,7 @@ static int cdrom_read_mech_status(struct cdrom_device_info *cdi,
1294 struct cdrom_changer_info *buf) 1292 struct cdrom_changer_info *buf)
1295{ 1293{
1296 struct packet_command cgc; 1294 struct packet_command cgc;
1297 struct cdrom_device_ops *cdo = cdi->ops; 1295 const struct cdrom_device_ops *cdo = cdi->ops;
1298 int length; 1296 int length;
1299 1297
1300 /* 1298 /*
@@ -1643,7 +1641,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1643 int ret; 1641 int ret;
1644 u_char buf[20]; 1642 u_char buf[20];
1645 struct packet_command cgc; 1643 struct packet_command cgc;
1646 struct cdrom_device_ops *cdo = cdi->ops; 1644 const struct cdrom_device_ops *cdo = cdi->ops;
1647 rpc_state_t rpc_state; 1645 rpc_state_t rpc_state;
1648 1646
1649 memset(buf, 0, sizeof(buf)); 1647 memset(buf, 0, sizeof(buf));
@@ -1791,7 +1789,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s,
1791{ 1789{
1792 unsigned char buf[21], *base; 1790 unsigned char buf[21], *base;
1793 struct dvd_layer *layer; 1791 struct dvd_layer *layer;
1794 struct cdrom_device_ops *cdo = cdi->ops; 1792 const struct cdrom_device_ops *cdo = cdi->ops;
1795 int ret, layer_num = s->physical.layer_num; 1793 int ret, layer_num = s->physical.layer_num;
1796 1794
1797 if (layer_num >= DVD_LAYERS) 1795 if (layer_num >= DVD_LAYERS)
@@ -1842,7 +1840,7 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s,
1842{ 1840{
1843 int ret; 1841 int ret;
1844 u_char buf[8]; 1842 u_char buf[8];
1845 struct cdrom_device_ops *cdo = cdi->ops; 1843 const struct cdrom_device_ops *cdo = cdi->ops;
1846 1844
1847 init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); 1845 init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ);
1848 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1846 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
@@ -1866,7 +1864,7 @@ static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s,
1866{ 1864{
1867 int ret, size; 1865 int ret, size;
1868 u_char *buf; 1866 u_char *buf;
1869 struct cdrom_device_ops *cdo = cdi->ops; 1867 const struct cdrom_device_ops *cdo = cdi->ops;
1870 1868
1871 size = sizeof(s->disckey.value) + 4; 1869 size = sizeof(s->disckey.value) + 4;
1872 1870
@@ -1894,7 +1892,7 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s,
1894{ 1892{
1895 int ret, size = 4 + 188; 1893 int ret, size = 4 + 188;
1896 u_char *buf; 1894 u_char *buf;
1897 struct cdrom_device_ops *cdo = cdi->ops; 1895 const struct cdrom_device_ops *cdo = cdi->ops;
1898 1896
1899 buf = kmalloc(size, GFP_KERNEL); 1897 buf = kmalloc(size, GFP_KERNEL);
1900 if (!buf) 1898 if (!buf)
@@ -1928,7 +1926,7 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
1928{ 1926{
1929 int ret = 0, size; 1927 int ret = 0, size;
1930 u_char *buf; 1928 u_char *buf;
1931 struct cdrom_device_ops *cdo = cdi->ops; 1929 const struct cdrom_device_ops *cdo = cdi->ops;
1932 1930
1933 size = sizeof(s->manufact.value) + 4; 1931 size = sizeof(s->manufact.value) + 4;
1934 1932
@@ -1995,7 +1993,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
1995 struct packet_command *cgc, 1993 struct packet_command *cgc,
1996 int page_code, int page_control) 1994 int page_code, int page_control)
1997{ 1995{
1998 struct cdrom_device_ops *cdo = cdi->ops; 1996 const struct cdrom_device_ops *cdo = cdi->ops;
1999 1997
2000 memset(cgc->cmd, 0, sizeof(cgc->cmd)); 1998 memset(cgc->cmd, 0, sizeof(cgc->cmd));
2001 1999
@@ -2010,7 +2008,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
2010int cdrom_mode_select(struct cdrom_device_info *cdi, 2008int cdrom_mode_select(struct cdrom_device_info *cdi,
2011 struct packet_command *cgc) 2009 struct packet_command *cgc)
2012{ 2010{
2013 struct cdrom_device_ops *cdo = cdi->ops; 2011 const struct cdrom_device_ops *cdo = cdi->ops;
2014 2012
2015 memset(cgc->cmd, 0, sizeof(cgc->cmd)); 2013 memset(cgc->cmd, 0, sizeof(cgc->cmd));
2016 memset(cgc->buffer, 0, 2); 2014 memset(cgc->buffer, 0, 2);
@@ -2025,7 +2023,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi,
2025static int cdrom_read_subchannel(struct cdrom_device_info *cdi, 2023static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
2026 struct cdrom_subchnl *subchnl, int mcn) 2024 struct cdrom_subchnl *subchnl, int mcn)
2027{ 2025{
2028 struct cdrom_device_ops *cdo = cdi->ops; 2026 const struct cdrom_device_ops *cdo = cdi->ops;
2029 struct packet_command cgc; 2027 struct packet_command cgc;
2030 char buffer[32]; 2028 char buffer[32];
2031 int ret; 2029 int ret;
@@ -2073,7 +2071,7 @@ static int cdrom_read_cd(struct cdrom_device_info *cdi,
2073 struct packet_command *cgc, int lba, 2071 struct packet_command *cgc, int lba,
2074 int blocksize, int nblocks) 2072 int blocksize, int nblocks)
2075{ 2073{
2076 struct cdrom_device_ops *cdo = cdi->ops; 2074 const struct cdrom_device_ops *cdo = cdi->ops;
2077 2075
2078 memset(&cgc->cmd, 0, sizeof(cgc->cmd)); 2076 memset(&cgc->cmd, 0, sizeof(cgc->cmd));
2079 cgc->cmd[0] = GPCMD_READ_10; 2077 cgc->cmd[0] = GPCMD_READ_10;
@@ -2093,7 +2091,7 @@ static int cdrom_read_block(struct cdrom_device_info *cdi,
2093 struct packet_command *cgc, 2091 struct packet_command *cgc,
2094 int lba, int nblocks, int format, int blksize) 2092 int lba, int nblocks, int format, int blksize)
2095{ 2093{
2096 struct cdrom_device_ops *cdo = cdi->ops; 2094 const struct cdrom_device_ops *cdo = cdi->ops;
2097 2095
2098 memset(&cgc->cmd, 0, sizeof(cgc->cmd)); 2096 memset(&cgc->cmd, 0, sizeof(cgc->cmd));
2099 cgc->cmd[0] = GPCMD_READ_CD; 2097 cgc->cmd[0] = GPCMD_READ_CD;
@@ -2172,6 +2170,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2172{ 2170{
2173 struct request_queue *q = cdi->disk->queue; 2171 struct request_queue *q = cdi->disk->queue;
2174 struct request *rq; 2172 struct request *rq;
2173 struct scsi_request *req;
2175 struct bio *bio; 2174 struct bio *bio;
2176 unsigned int len; 2175 unsigned int len;
2177 int nr, ret = 0; 2176 int nr, ret = 0;
@@ -2190,12 +2189,13 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2190 2189
2191 len = nr * CD_FRAMESIZE_RAW; 2190 len = nr * CD_FRAMESIZE_RAW;
2192 2191
2193 rq = blk_get_request(q, READ, GFP_KERNEL); 2192 rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
2194 if (IS_ERR(rq)) { 2193 if (IS_ERR(rq)) {
2195 ret = PTR_ERR(rq); 2194 ret = PTR_ERR(rq);
2196 break; 2195 break;
2197 } 2196 }
2198 blk_rq_set_block_pc(rq); 2197 req = scsi_req(rq);
2198 scsi_req_init(rq);
2199 2199
2200 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); 2200 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
2201 if (ret) { 2201 if (ret) {
@@ -2203,23 +2203,23 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2203 break; 2203 break;
2204 } 2204 }
2205 2205
2206 rq->cmd[0] = GPCMD_READ_CD; 2206 req->cmd[0] = GPCMD_READ_CD;
2207 rq->cmd[1] = 1 << 2; 2207 req->cmd[1] = 1 << 2;
2208 rq->cmd[2] = (lba >> 24) & 0xff; 2208 req->cmd[2] = (lba >> 24) & 0xff;
2209 rq->cmd[3] = (lba >> 16) & 0xff; 2209 req->cmd[3] = (lba >> 16) & 0xff;
2210 rq->cmd[4] = (lba >> 8) & 0xff; 2210 req->cmd[4] = (lba >> 8) & 0xff;
2211 rq->cmd[5] = lba & 0xff; 2211 req->cmd[5] = lba & 0xff;
2212 rq->cmd[6] = (nr >> 16) & 0xff; 2212 req->cmd[6] = (nr >> 16) & 0xff;
2213 rq->cmd[7] = (nr >> 8) & 0xff; 2213 req->cmd[7] = (nr >> 8) & 0xff;
2214 rq->cmd[8] = nr & 0xff; 2214 req->cmd[8] = nr & 0xff;
2215 rq->cmd[9] = 0xf8; 2215 req->cmd[9] = 0xf8;
2216 2216
2217 rq->cmd_len = 12; 2217 req->cmd_len = 12;
2218 rq->timeout = 60 * HZ; 2218 rq->timeout = 60 * HZ;
2219 bio = rq->bio; 2219 bio = rq->bio;
2220 2220
2221 if (blk_execute_rq(q, cdi->disk, rq, 0)) { 2221 if (blk_execute_rq(q, cdi->disk, rq, 0)) {
2222 struct request_sense *s = rq->sense; 2222 struct request_sense *s = req->sense;
2223 ret = -EIO; 2223 ret = -EIO;
2224 cdi->last_sense = s->sense_key; 2224 cdi->last_sense = s->sense_key;
2225 } 2225 }
@@ -2764,7 +2764,7 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi,
2764 */ 2764 */
2765static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) 2765static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
2766{ 2766{
2767 struct cdrom_device_ops *cdo = cdi->ops; 2767 const struct cdrom_device_ops *cdo = cdi->ops;
2768 struct packet_command cgc; 2768 struct packet_command cgc;
2769 struct modesel_head mh; 2769 struct modesel_head mh;
2770 2770
@@ -2790,7 +2790,7 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
2790static int cdrom_get_track_info(struct cdrom_device_info *cdi, 2790static int cdrom_get_track_info(struct cdrom_device_info *cdi,
2791 __u16 track, __u8 type, track_information *ti) 2791 __u16 track, __u8 type, track_information *ti)
2792{ 2792{
2793 struct cdrom_device_ops *cdo = cdi->ops; 2793 const struct cdrom_device_ops *cdo = cdi->ops;
2794 struct packet_command cgc; 2794 struct packet_command cgc;
2795 int ret, buflen; 2795 int ret, buflen;
2796 2796
@@ -3049,7 +3049,7 @@ static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
3049 void __user *arg, 3049 void __user *arg,
3050 struct packet_command *cgc) 3050 struct packet_command *cgc)
3051{ 3051{
3052 struct cdrom_device_ops *cdo = cdi->ops; 3052 const struct cdrom_device_ops *cdo = cdi->ops;
3053 struct cdrom_msf msf; 3053 struct cdrom_msf msf;
3054 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); 3054 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
3055 if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) 3055 if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf)))
@@ -3069,7 +3069,7 @@ static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
3069 void __user *arg, 3069 void __user *arg,
3070 struct packet_command *cgc) 3070 struct packet_command *cgc)
3071{ 3071{
3072 struct cdrom_device_ops *cdo = cdi->ops; 3072 const struct cdrom_device_ops *cdo = cdi->ops;
3073 struct cdrom_blk blk; 3073 struct cdrom_blk blk;
3074 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); 3074 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
3075 if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk))) 3075 if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk)))
@@ -3164,7 +3164,7 @@ static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
3164 struct packet_command *cgc, 3164 struct packet_command *cgc,
3165 int cmd) 3165 int cmd)
3166{ 3166{
3167 struct cdrom_device_ops *cdo = cdi->ops; 3167 const struct cdrom_device_ops *cdo = cdi->ops;
3168 cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); 3168 cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
3169 cgc->cmd[0] = GPCMD_START_STOP_UNIT; 3169 cgc->cmd[0] = GPCMD_START_STOP_UNIT;
3170 cgc->cmd[1] = 1; 3170 cgc->cmd[1] = 1;
@@ -3177,7 +3177,7 @@ static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
3177 struct packet_command *cgc, 3177 struct packet_command *cgc,
3178 int cmd) 3178 int cmd)
3179{ 3179{
3180 struct cdrom_device_ops *cdo = cdi->ops; 3180 const struct cdrom_device_ops *cdo = cdi->ops;
3181 cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); 3181 cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
3182 cgc->cmd[0] = GPCMD_PAUSE_RESUME; 3182 cgc->cmd[0] = GPCMD_PAUSE_RESUME;
3183 cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; 3183 cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 584bc3126403..1372763a948f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -481,7 +481,7 @@ static int gdrom_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
481 return -EINVAL; 481 return -EINVAL;
482} 482}
483 483
484static struct cdrom_device_ops gdrom_ops = { 484static const struct cdrom_device_ops gdrom_ops = {
485 .open = gdrom_open, 485 .open = gdrom_open,
486 .release = gdrom_release, 486 .release = gdrom_release,
487 .drive_status = gdrom_drivestatus, 487 .drive_status = gdrom_drivestatus,
@@ -489,9 +489,9 @@ static struct cdrom_device_ops gdrom_ops = {
489 .get_last_session = gdrom_get_last_session, 489 .get_last_session = gdrom_get_last_session,
490 .reset = gdrom_hardreset, 490 .reset = gdrom_hardreset,
491 .audio_ioctl = gdrom_audio_ioctl, 491 .audio_ioctl = gdrom_audio_ioctl,
492 .generic_packet = cdrom_dummy_generic_packet,
492 .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED | 493 .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED |
493 CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R, 494 CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R,
494 .n_minors = 1,
495}; 495};
496 496
497static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) 497static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
@@ -659,23 +659,24 @@ static void gdrom_request(struct request_queue *rq)
659 struct request *req; 659 struct request *req;
660 660
661 while ((req = blk_fetch_request(rq)) != NULL) { 661 while ((req = blk_fetch_request(rq)) != NULL) {
662 if (req->cmd_type != REQ_TYPE_FS) { 662 switch (req_op(req)) {
663 printk(KERN_DEBUG "gdrom: Non-fs request ignored\n"); 663 case REQ_OP_READ:
664 __blk_end_request_all(req, -EIO); 664 /*
665 continue; 665 * Add to list of deferred work and then schedule
666 } 666 * workqueue.
667 if (rq_data_dir(req) != READ) { 667 */
668 list_add_tail(&req->queuelist, &gdrom_deferred);
669 schedule_work(&work);
670 break;
671 case REQ_OP_WRITE:
668 pr_notice("Read only device - write request ignored\n"); 672 pr_notice("Read only device - write request ignored\n");
669 __blk_end_request_all(req, -EIO); 673 __blk_end_request_all(req, -EIO);
670 continue; 674 break;
675 default:
676 printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
677 __blk_end_request_all(req, -EIO);
678 break;
671 } 679 }
672
673 /*
674 * Add to list of deferred work and then schedule
675 * workqueue.
676 */
677 list_add_tail(&req->queuelist, &gdrom_deferred);
678 schedule_work(&work);
679 } 680 }
680} 681}
681 682
@@ -807,16 +808,20 @@ static int probe_gdrom(struct platform_device *devptr)
807 if (err) 808 if (err)
808 goto probe_fail_cmdirq_register; 809 goto probe_fail_cmdirq_register;
809 gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); 810 gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock);
810 if (!gd.gdrom_rq) 811 if (!gd.gdrom_rq) {
812 err = -ENOMEM;
811 goto probe_fail_requestq; 813 goto probe_fail_requestq;
814 }
812 815
813 err = probe_gdrom_setupqueue(); 816 err = probe_gdrom_setupqueue();
814 if (err) 817 if (err)
815 goto probe_fail_toc; 818 goto probe_fail_toc;
816 819
817 gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL); 820 gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL);
818 if (!gd.toc) 821 if (!gd.toc) {
822 err = -ENOMEM;
819 goto probe_fail_toc; 823 goto probe_fail_toc;
824 }
820 add_disk(gd.disk); 825 add_disk(gd.disk);
821 return 0; 826 return 0;
822 827
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 39ea67f9b066..c99a25c075bc 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -10,6 +10,7 @@ menuconfig IDE
10 tristate "ATA/ATAPI/MFM/RLL support (DEPRECATED)" 10 tristate "ATA/ATAPI/MFM/RLL support (DEPRECATED)"
11 depends on HAVE_IDE 11 depends on HAVE_IDE
12 depends on BLOCK 12 depends on BLOCK
13 select BLK_SCSI_REQUEST
13 ---help--- 14 ---help---
14 If you say Y here, your kernel will be able to manage ATA/(E)IDE and 15 If you say Y here, your kernel will be able to manage ATA/(E)IDE and
15 ATAPI units. The most common cases are IDE hard drives and ATAPI 16 ATAPI units. The most common cases are IDE hard drives and ATAPI
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index f90ea221f7f2..feb30061123b 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,8 +92,9 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
92 struct request *rq; 92 struct request *rq;
93 int error; 93 int error;
94 94
95 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 95 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
96 rq->cmd_type = REQ_TYPE_DRV_PRIV; 96 scsi_req_init(rq);
97 ide_req(rq)->type = ATA_PRIV_MISC;
97 rq->special = (char *)pc; 98 rq->special = (char *)pc;
98 99
99 if (buf && bufflen) { 100 if (buf && bufflen) {
@@ -103,9 +104,9 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
103 goto put_req; 104 goto put_req;
104 } 105 }
105 106
106 memcpy(rq->cmd, pc->c, 12); 107 memcpy(scsi_req(rq)->cmd, pc->c, 12);
107 if (drive->media == ide_tape) 108 if (drive->media == ide_tape)
108 rq->cmd[13] = REQ_IDETAPE_PC1; 109 scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
109 error = blk_execute_rq(drive->queue, disk, rq, 0); 110 error = blk_execute_rq(drive->queue, disk, rq, 0);
110put_req: 111put_req:
111 blk_put_request(rq); 112 blk_put_request(rq);
@@ -171,7 +172,8 @@ EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
171void ide_prep_sense(ide_drive_t *drive, struct request *rq) 172void ide_prep_sense(ide_drive_t *drive, struct request *rq)
172{ 173{
173 struct request_sense *sense = &drive->sense_data; 174 struct request_sense *sense = &drive->sense_data;
174 struct request *sense_rq = &drive->sense_rq; 175 struct request *sense_rq = drive->sense_rq;
176 struct scsi_request *req = scsi_req(sense_rq);
175 unsigned int cmd_len, sense_len; 177 unsigned int cmd_len, sense_len;
176 int err; 178 int err;
177 179
@@ -191,12 +193,13 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
191 193
192 BUG_ON(sense_len > sizeof(*sense)); 194 BUG_ON(sense_len > sizeof(*sense));
193 195
194 if (rq->cmd_type == REQ_TYPE_ATA_SENSE || drive->sense_rq_armed) 196 if (ata_sense_request(rq) || drive->sense_rq_armed)
195 return; 197 return;
196 198
197 memset(sense, 0, sizeof(*sense)); 199 memset(sense, 0, sizeof(*sense));
198 200
199 blk_rq_init(rq->q, sense_rq); 201 blk_rq_init(rq->q, sense_rq);
202 scsi_req_init(sense_rq);
200 203
201 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, 204 err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
202 GFP_NOIO); 205 GFP_NOIO);
@@ -208,13 +211,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
208 } 211 }
209 212
210 sense_rq->rq_disk = rq->rq_disk; 213 sense_rq->rq_disk = rq->rq_disk;
211 sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; 214 sense_rq->cmd_flags = REQ_OP_DRV_IN;
212 sense_rq->cmd[4] = cmd_len; 215 ide_req(sense_rq)->type = ATA_PRIV_SENSE;
213 sense_rq->cmd_type = REQ_TYPE_ATA_SENSE;
214 sense_rq->rq_flags |= RQF_PREEMPT; 216 sense_rq->rq_flags |= RQF_PREEMPT;
215 217
218 req->cmd[0] = GPCMD_REQUEST_SENSE;
219 req->cmd[4] = cmd_len;
216 if (drive->media == ide_tape) 220 if (drive->media == ide_tape)
217 sense_rq->cmd[13] = REQ_IDETAPE_PC1; 221 req->cmd[13] = REQ_IDETAPE_PC1;
218 222
219 drive->sense_rq_armed = true; 223 drive->sense_rq_armed = true;
220} 224}
@@ -229,12 +233,12 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special)
229 return -ENOMEM; 233 return -ENOMEM;
230 } 234 }
231 235
232 drive->sense_rq.special = special; 236 drive->sense_rq->special = special;
233 drive->sense_rq_armed = false; 237 drive->sense_rq_armed = false;
234 238
235 drive->hwif->rq = NULL; 239 drive->hwif->rq = NULL;
236 240
237 elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT); 241 elv_add_request(drive->queue, drive->sense_rq, ELEVATOR_INSERT_FRONT);
238 return 0; 242 return 0;
239} 243}
240EXPORT_SYMBOL_GPL(ide_queue_sense_rq); 244EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
@@ -247,14 +251,14 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
247void ide_retry_pc(ide_drive_t *drive) 251void ide_retry_pc(ide_drive_t *drive)
248{ 252{
249 struct request *failed_rq = drive->hwif->rq; 253 struct request *failed_rq = drive->hwif->rq;
250 struct request *sense_rq = &drive->sense_rq; 254 struct request *sense_rq = drive->sense_rq;
251 struct ide_atapi_pc *pc = &drive->request_sense_pc; 255 struct ide_atapi_pc *pc = &drive->request_sense_pc;
252 256
253 (void)ide_read_error(drive); 257 (void)ide_read_error(drive);
254 258
255 /* init pc from sense_rq */ 259 /* init pc from sense_rq */
256 ide_init_pc(pc); 260 ide_init_pc(pc);
257 memcpy(pc->c, sense_rq->cmd, 12); 261 memcpy(pc->c, scsi_req(sense_rq)->cmd, 12);
258 262
259 if (drive->media == ide_tape) 263 if (drive->media == ide_tape)
260 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC; 264 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
@@ -286,7 +290,7 @@ int ide_cd_expiry(ide_drive_t *drive)
286 * commands/drives support that. Let ide_timer_expiry keep polling us 290 * commands/drives support that. Let ide_timer_expiry keep polling us
287 * for these. 291 * for these.
288 */ 292 */
289 switch (rq->cmd[0]) { 293 switch (scsi_req(rq)->cmd[0]) {
290 case GPCMD_BLANK: 294 case GPCMD_BLANK:
291 case GPCMD_FORMAT_UNIT: 295 case GPCMD_FORMAT_UNIT:
292 case GPCMD_RESERVE_RZONE_TRACK: 296 case GPCMD_RESERVE_RZONE_TRACK:
@@ -297,7 +301,7 @@ int ide_cd_expiry(ide_drive_t *drive)
297 default: 301 default:
298 if (!(rq->rq_flags & RQF_QUIET)) 302 if (!(rq->rq_flags & RQF_QUIET))
299 printk(KERN_INFO PFX "cmd 0x%x timed out\n", 303 printk(KERN_INFO PFX "cmd 0x%x timed out\n",
300 rq->cmd[0]); 304 scsi_req(rq)->cmd[0]);
301 wait = 0; 305 wait = 0;
302 break; 306 break;
303 } 307 }
@@ -307,15 +311,21 @@ EXPORT_SYMBOL_GPL(ide_cd_expiry);
307 311
308int ide_cd_get_xferlen(struct request *rq) 312int ide_cd_get_xferlen(struct request *rq)
309{ 313{
310 switch (rq->cmd_type) { 314 switch (req_op(rq)) {
311 case REQ_TYPE_FS: 315 default:
312 return 32768; 316 return 32768;
313 case REQ_TYPE_ATA_SENSE: 317 case REQ_OP_SCSI_IN:
314 case REQ_TYPE_BLOCK_PC: 318 case REQ_OP_SCSI_OUT:
315 case REQ_TYPE_ATA_PC:
316 return blk_rq_bytes(rq); 319 return blk_rq_bytes(rq);
317 default: 320 case REQ_OP_DRV_IN:
318 return 0; 321 case REQ_OP_DRV_OUT:
322 switch (ide_req(rq)->type) {
323 case ATA_PRIV_PC:
324 case ATA_PRIV_SENSE:
325 return blk_rq_bytes(rq);
326 default:
327 return 0;
328 }
319 } 329 }
320} 330}
321EXPORT_SYMBOL_GPL(ide_cd_get_xferlen); 331EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
@@ -374,7 +384,7 @@ int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len,
374 drive->name, __func__, ireason); 384 drive->name, __func__, ireason);
375 } 385 }
376 386
377 if (dev_is_idecd(drive) && rq->cmd_type == REQ_TYPE_ATA_PC) 387 if (dev_is_idecd(drive) && ata_pc_request(rq))
378 rq->rq_flags |= RQF_FAILED; 388 rq->rq_flags |= RQF_FAILED;
379 389
380 return 1; 390 return 1;
@@ -420,7 +430,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
420 ? "write" : "read"); 430 ? "write" : "read");
421 pc->flags |= PC_FLAG_DMA_ERROR; 431 pc->flags |= PC_FLAG_DMA_ERROR;
422 } else 432 } else
423 rq->resid_len = 0; 433 scsi_req(rq)->resid_len = 0;
424 debug_log("%s: DMA finished\n", drive->name); 434 debug_log("%s: DMA finished\n", drive->name);
425 } 435 }
426 436
@@ -436,7 +446,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
436 local_irq_enable_in_hardirq(); 446 local_irq_enable_in_hardirq();
437 447
438 if (drive->media == ide_tape && 448 if (drive->media == ide_tape &&
439 (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE) 449 (stat & ATA_ERR) && scsi_req(rq)->cmd[0] == REQUEST_SENSE)
440 stat &= ~ATA_ERR; 450 stat &= ~ATA_ERR;
441 451
442 if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) { 452 if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) {
@@ -446,7 +456,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
446 if (drive->media != ide_tape) 456 if (drive->media != ide_tape)
447 pc->rq->errors++; 457 pc->rq->errors++;
448 458
449 if (rq->cmd[0] == REQUEST_SENSE) { 459 if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
450 printk(KERN_ERR PFX "%s: I/O error in request " 460 printk(KERN_ERR PFX "%s: I/O error in request "
451 "sense command\n", drive->name); 461 "sense command\n", drive->name);
452 return ide_do_reset(drive); 462 return ide_do_reset(drive);
@@ -477,12 +487,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
477 if (uptodate == 0) 487 if (uptodate == 0)
478 drive->failed_pc = NULL; 488 drive->failed_pc = NULL;
479 489
480 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { 490 if (ata_misc_request(rq)) {
481 rq->errors = 0; 491 rq->errors = 0;
482 error = 0; 492 error = 0;
483 } else { 493 } else {
484 494
485 if (rq->cmd_type != REQ_TYPE_FS && uptodate <= 0) { 495 if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
486 if (rq->errors == 0) 496 if (rq->errors == 0)
487 rq->errors = -EIO; 497 rq->errors = -EIO;
488 } 498 }
@@ -512,7 +522,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
512 ide_pio_bytes(drive, cmd, write, done); 522 ide_pio_bytes(drive, cmd, write, done);
513 523
514 /* Update transferred byte count */ 524 /* Update transferred byte count */
515 rq->resid_len -= done; 525 scsi_req(rq)->resid_len -= done;
516 526
517 bcount -= done; 527 bcount -= done;
518 528
@@ -520,7 +530,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
520 ide_pad_transfer(drive, write, bcount); 530 ide_pad_transfer(drive, write, bcount);
521 531
522 debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n", 532 debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n",
523 rq->cmd[0], done, bcount, rq->resid_len); 533 rq->cmd[0], done, bcount, scsi_req(rq)->resid_len);
524 534
525 /* And set the interrupt handler again */ 535 /* And set the interrupt handler again */
526 ide_set_handler(drive, ide_pc_intr, timeout); 536 ide_set_handler(drive, ide_pc_intr, timeout);
@@ -603,7 +613,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
603 613
604 if (dev_is_idecd(drive)) { 614 if (dev_is_idecd(drive)) {
605 /* ATAPI commands get padded out to 12 bytes minimum */ 615 /* ATAPI commands get padded out to 12 bytes minimum */
606 cmd_len = COMMAND_SIZE(rq->cmd[0]); 616 cmd_len = COMMAND_SIZE(scsi_req(rq)->cmd[0]);
607 if (cmd_len < ATAPI_MIN_CDB_BYTES) 617 if (cmd_len < ATAPI_MIN_CDB_BYTES)
608 cmd_len = ATAPI_MIN_CDB_BYTES; 618 cmd_len = ATAPI_MIN_CDB_BYTES;
609 619
@@ -650,7 +660,7 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
650 660
651 /* Send the actual packet */ 661 /* Send the actual packet */
652 if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0) 662 if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0)
653 hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len); 663 hwif->tp_ops->output_data(drive, NULL, scsi_req(rq)->cmd, cmd_len);
654 664
655 /* Begin DMA, if necessary */ 665 /* Begin DMA, if necessary */
656 if (dev_is_idecd(drive)) { 666 if (dev_is_idecd(drive)) {
@@ -695,7 +705,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
695 bytes, 63 * 1024)); 705 bytes, 63 * 1024));
696 706
697 /* We haven't transferred any data yet */ 707 /* We haven't transferred any data yet */
698 rq->resid_len = bcount; 708 scsi_req(rq)->resid_len = bcount;
699 709
700 if (pc->flags & PC_FLAG_DMA_ERROR) { 710 if (pc->flags & PC_FLAG_DMA_ERROR) {
701 pc->flags &= ~PC_FLAG_DMA_ERROR; 711 pc->flags &= ~PC_FLAG_DMA_ERROR;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 9cbd217bc0c9..aef00511ca86 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -121,7 +121,7 @@ static int cdrom_log_sense(ide_drive_t *drive, struct request *rq)
121 * don't log START_STOP unit with LoEj set, since we cannot 121 * don't log START_STOP unit with LoEj set, since we cannot
122 * reliably check if drive can auto-close 122 * reliably check if drive can auto-close
123 */ 123 */
124 if (rq->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24) 124 if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24)
125 break; 125 break;
126 log = 1; 126 log = 1;
127 break; 127 break;
@@ -163,7 +163,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
163 * toc has not been recorded yet, it will fail with 05/24/00 (which is a 163 * toc has not been recorded yet, it will fail with 05/24/00 (which is a
164 * confusing error) 164 * confusing error)
165 */ 165 */
166 if (failed_command && failed_command->cmd[0] == GPCMD_READ_TOC_PMA_ATIP) 166 if (failed_command && scsi_req(failed_command)->cmd[0] == GPCMD_READ_TOC_PMA_ATIP)
167 if (sense->sense_key == 0x05 && sense->asc == 0x24) 167 if (sense->sense_key == 0x05 && sense->asc == 0x24)
168 return; 168 return;
169 169
@@ -176,7 +176,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
176 if (!sense->valid) 176 if (!sense->valid)
177 break; 177 break;
178 if (failed_command == NULL || 178 if (failed_command == NULL ||
179 failed_command->cmd_type != REQ_TYPE_FS) 179 blk_rq_is_passthrough(failed_command))
180 break; 180 break;
181 sector = (sense->information[0] << 24) | 181 sector = (sense->information[0] << 24) |
182 (sense->information[1] << 16) | 182 (sense->information[1] << 16) |
@@ -210,7 +210,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
210static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) 210static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
211{ 211{
212 /* 212 /*
213 * For REQ_TYPE_ATA_SENSE, "rq->special" points to the original 213 * For ATA_PRIV_SENSE, "rq->special" points to the original
214 * failed request. Also, the sense data should be read 214 * failed request. Also, the sense data should be read
215 * directly from rq which might be different from the original 215 * directly from rq which might be different from the original
216 * sense buffer if it got copied during mapping. 216 * sense buffer if it got copied during mapping.
@@ -219,15 +219,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
219 void *sense = bio_data(rq->bio); 219 void *sense = bio_data(rq->bio);
220 220
221 if (failed) { 221 if (failed) {
222 if (failed->sense) { 222 /*
223 /* 223 * Sense is always read into drive->sense_data, copy back to the
224 * Sense is always read into drive->sense_data. 224 * original request.
225 * Copy back if the failed request has its 225 */
226 * sense pointer set. 226 memcpy(scsi_req(failed)->sense, sense, 18);
227 */ 227 scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
228 memcpy(failed->sense, sense, 18);
229 failed->sense_len = rq->sense_len;
230 }
231 cdrom_analyze_sense_data(drive, failed); 228 cdrom_analyze_sense_data(drive, failed);
232 229
233 if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed))) 230 if (ide_end_rq(drive, failed, -EIO, blk_rq_bytes(failed)))
@@ -285,7 +282,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
285 "stat 0x%x", 282 "stat 0x%x",
286 rq->cmd[0], rq->cmd_type, err, stat); 283 rq->cmd[0], rq->cmd_type, err, stat);
287 284
288 if (rq->cmd_type == REQ_TYPE_ATA_SENSE) { 285 if (ata_sense_request(rq)) {
289 /* 286 /*
290 * We got an error trying to get sense info from the drive 287 * We got an error trying to get sense info from the drive
291 * (probably while trying to recover from a former error). 288 * (probably while trying to recover from a former error).
@@ -296,7 +293,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
296 } 293 }
297 294
298 /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */ 295 /* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
299 if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !rq->errors) 296 if (blk_rq_is_scsi(rq) && !rq->errors)
300 rq->errors = SAM_STAT_CHECK_CONDITION; 297 rq->errors = SAM_STAT_CHECK_CONDITION;
301 298
302 if (blk_noretry_request(rq)) 299 if (blk_noretry_request(rq))
@@ -304,13 +301,13 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
304 301
305 switch (sense_key) { 302 switch (sense_key) {
306 case NOT_READY: 303 case NOT_READY:
307 if (rq->cmd_type == REQ_TYPE_FS && rq_data_dir(rq) == WRITE) { 304 if (req_op(rq) == REQ_OP_WRITE) {
308 if (ide_cd_breathe(drive, rq)) 305 if (ide_cd_breathe(drive, rq))
309 return 1; 306 return 1;
310 } else { 307 } else {
311 cdrom_saw_media_change(drive); 308 cdrom_saw_media_change(drive);
312 309
313 if (rq->cmd_type == REQ_TYPE_FS && 310 if (!blk_rq_is_passthrough(rq) &&
314 !(rq->rq_flags & RQF_QUIET)) 311 !(rq->rq_flags & RQF_QUIET))
315 printk(KERN_ERR PFX "%s: tray open\n", 312 printk(KERN_ERR PFX "%s: tray open\n",
316 drive->name); 313 drive->name);
@@ -320,7 +317,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
320 case UNIT_ATTENTION: 317 case UNIT_ATTENTION:
321 cdrom_saw_media_change(drive); 318 cdrom_saw_media_change(drive);
322 319
323 if (rq->cmd_type != REQ_TYPE_FS) 320 if (blk_rq_is_passthrough(rq))
324 return 0; 321 return 0;
325 322
326 /* 323 /*
@@ -338,7 +335,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
338 * 335 *
339 * cdrom_log_sense() knows this! 336 * cdrom_log_sense() knows this!
340 */ 337 */
341 if (rq->cmd[0] == GPCMD_START_STOP_UNIT) 338 if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT)
342 break; 339 break;
343 /* fall-through */ 340 /* fall-through */
344 case DATA_PROTECT: 341 case DATA_PROTECT:
@@ -368,7 +365,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
368 do_end_request = 1; 365 do_end_request = 1;
369 break; 366 break;
370 default: 367 default:
371 if (rq->cmd_type != REQ_TYPE_FS) 368 if (blk_rq_is_passthrough(rq))
372 break; 369 break;
373 if (err & ~ATA_ABORTED) { 370 if (err & ~ATA_ABORTED) {
374 /* go to the default handler for other errors */ 371 /* go to the default handler for other errors */
@@ -379,7 +376,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
379 do_end_request = 1; 376 do_end_request = 1;
380 } 377 }
381 378
382 if (rq->cmd_type != REQ_TYPE_FS) { 379 if (blk_rq_is_passthrough(rq)) {
383 rq->rq_flags |= RQF_FAILED; 380 rq->rq_flags |= RQF_FAILED;
384 do_end_request = 1; 381 do_end_request = 1;
385 } 382 }
@@ -414,7 +411,7 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
414 * Some of the trailing request sense fields are optional, 411 * Some of the trailing request sense fields are optional,
415 * and some drives don't send them. Sigh. 412 * and some drives don't send them. Sigh.
416 */ 413 */
417 if (rq->cmd[0] == GPCMD_REQUEST_SENSE && 414 if (scsi_req(rq)->cmd[0] == GPCMD_REQUEST_SENSE &&
418 cmd->nleft > 0 && cmd->nleft <= 5) 415 cmd->nleft > 0 && cmd->nleft <= 5)
419 cmd->nleft = 0; 416 cmd->nleft = 0;
420} 417}
@@ -425,12 +422,8 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
425 req_flags_t rq_flags) 422 req_flags_t rq_flags)
426{ 423{
427 struct cdrom_info *info = drive->driver_data; 424 struct cdrom_info *info = drive->driver_data;
428 struct request_sense local_sense;
429 int retries = 10; 425 int retries = 10;
430 req_flags_t flags = 0; 426 bool failed;
431
432 if (!sense)
433 sense = &local_sense;
434 427
435 ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, " 428 ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, "
436 "rq_flags: 0x%x", 429 "rq_flags: 0x%x",
@@ -440,12 +433,13 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
440 do { 433 do {
441 struct request *rq; 434 struct request *rq;
442 int error; 435 int error;
436 bool delay = false;
443 437
444 rq = blk_get_request(drive->queue, write, __GFP_RECLAIM); 438 rq = blk_get_request(drive->queue,
445 439 write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
446 memcpy(rq->cmd, cmd, BLK_MAX_CDB); 440 scsi_req_init(rq);
447 rq->cmd_type = REQ_TYPE_ATA_PC; 441 memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
448 rq->sense = sense; 442 ide_req(rq)->type = ATA_PRIV_PC;
449 rq->rq_flags |= rq_flags; 443 rq->rq_flags |= rq_flags;
450 rq->timeout = timeout; 444 rq->timeout = timeout;
451 if (buffer) { 445 if (buffer) {
@@ -460,21 +454,21 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
460 error = blk_execute_rq(drive->queue, info->disk, rq, 0); 454 error = blk_execute_rq(drive->queue, info->disk, rq, 0);
461 455
462 if (buffer) 456 if (buffer)
463 *bufflen = rq->resid_len; 457 *bufflen = scsi_req(rq)->resid_len;
464 458 if (sense)
465 flags = rq->rq_flags; 459 memcpy(sense, scsi_req(rq)->sense, sizeof(*sense));
466 blk_put_request(rq);
467 460
468 /* 461 /*
469 * FIXME: we should probably abort/retry or something in case of 462 * FIXME: we should probably abort/retry or something in case of
470 * failure. 463 * failure.
471 */ 464 */
472 if (flags & RQF_FAILED) { 465 failed = (rq->rq_flags & RQF_FAILED) != 0;
466 if (failed) {
473 /* 467 /*
474 * The request failed. Retry if it was due to a unit 468 * The request failed. Retry if it was due to a unit
475 * attention status (usually means media was changed). 469 * attention status (usually means media was changed).
476 */ 470 */
477 struct request_sense *reqbuf = sense; 471 struct request_sense *reqbuf = scsi_req(rq)->sense;
478 472
479 if (reqbuf->sense_key == UNIT_ATTENTION) 473 if (reqbuf->sense_key == UNIT_ATTENTION)
480 cdrom_saw_media_change(drive); 474 cdrom_saw_media_change(drive);
@@ -485,19 +479,20 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
485 * a disk. Retry, but wait a little to give 479 * a disk. Retry, but wait a little to give
486 * the drive time to complete the load. 480 * the drive time to complete the load.
487 */ 481 */
488 ssleep(2); 482 delay = true;
489 } else { 483 } else {
490 /* otherwise, don't retry */ 484 /* otherwise, don't retry */
491 retries = 0; 485 retries = 0;
492 } 486 }
493 --retries; 487 --retries;
494 } 488 }
495 489 blk_put_request(rq);
496 /* end of retry loop */ 490 if (delay)
497 } while ((flags & RQF_FAILED) && retries >= 0); 491 ssleep(2);
492 } while (failed && retries >= 0);
498 493
499 /* return an error if the command failed */ 494 /* return an error if the command failed */
500 return (flags & RQF_FAILED) ? -EIO : 0; 495 return failed ? -EIO : 0;
501} 496}
502 497
503/* 498/*
@@ -526,7 +521,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
526 ide_expiry_t *expiry = NULL; 521 ide_expiry_t *expiry = NULL;
527 int dma_error = 0, dma, thislen, uptodate = 0; 522 int dma_error = 0, dma, thislen, uptodate = 0;
528 int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0; 523 int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
529 int sense = (rq->cmd_type == REQ_TYPE_ATA_SENSE); 524 int sense = ata_sense_request(rq);
530 unsigned int timeout; 525 unsigned int timeout;
531 u16 len; 526 u16 len;
532 u8 ireason, stat; 527 u8 ireason, stat;
@@ -569,7 +564,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
569 564
570 ide_read_bcount_and_ireason(drive, &len, &ireason); 565 ide_read_bcount_and_ireason(drive, &len, &ireason);
571 566
572 thislen = (rq->cmd_type == REQ_TYPE_FS) ? len : cmd->nleft; 567 thislen = !blk_rq_is_passthrough(rq) ? len : cmd->nleft;
573 if (thislen > len) 568 if (thislen > len)
574 thislen = len; 569 thislen = len;
575 570
@@ -578,7 +573,8 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
578 573
579 /* If DRQ is clear, the command has completed. */ 574 /* If DRQ is clear, the command has completed. */
580 if ((stat & ATA_DRQ) == 0) { 575 if ((stat & ATA_DRQ) == 0) {
581 if (rq->cmd_type == REQ_TYPE_FS) { 576 switch (req_op(rq)) {
577 default:
582 /* 578 /*
583 * If we're not done reading/writing, complain. 579 * If we're not done reading/writing, complain.
584 * Otherwise, complete the command normally. 580 * Otherwise, complete the command normally.
@@ -592,7 +588,9 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
592 rq->rq_flags |= RQF_FAILED; 588 rq->rq_flags |= RQF_FAILED;
593 uptodate = 0; 589 uptodate = 0;
594 } 590 }
595 } else if (rq->cmd_type != REQ_TYPE_BLOCK_PC) { 591 goto out_end;
592 case REQ_OP_DRV_IN:
593 case REQ_OP_DRV_OUT:
596 ide_cd_request_sense_fixup(drive, cmd); 594 ide_cd_request_sense_fixup(drive, cmd);
597 595
598 uptodate = cmd->nleft ? 0 : 1; 596 uptodate = cmd->nleft ? 0 : 1;
@@ -608,8 +606,11 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
608 606
609 if (!uptodate) 607 if (!uptodate)
610 rq->rq_flags |= RQF_FAILED; 608 rq->rq_flags |= RQF_FAILED;
609 goto out_end;
610 case REQ_OP_SCSI_IN:
611 case REQ_OP_SCSI_OUT:
612 goto out_end;
611 } 613 }
612 goto out_end;
613 } 614 }
614 615
615 rc = ide_check_ireason(drive, rq, len, ireason, write); 616 rc = ide_check_ireason(drive, rq, len, ireason, write);
@@ -636,12 +637,12 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
636 len -= blen; 637 len -= blen;
637 638
638 if (sense && write == 0) 639 if (sense && write == 0)
639 rq->sense_len += blen; 640 scsi_req(rq)->sense_len += blen;
640 } 641 }
641 642
642 /* pad, if necessary */ 643 /* pad, if necessary */
643 if (len > 0) { 644 if (len > 0) {
644 if (rq->cmd_type != REQ_TYPE_FS || write == 0) 645 if (blk_rq_is_passthrough(rq) || write == 0)
645 ide_pad_transfer(drive, write, len); 646 ide_pad_transfer(drive, write, len);
646 else { 647 else {
647 printk(KERN_ERR PFX "%s: confused, missing data\n", 648 printk(KERN_ERR PFX "%s: confused, missing data\n",
@@ -650,12 +651,18 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
650 } 651 }
651 } 652 }
652 653
653 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 654 switch (req_op(rq)) {
655 case REQ_OP_SCSI_IN:
656 case REQ_OP_SCSI_OUT:
654 timeout = rq->timeout; 657 timeout = rq->timeout;
655 } else { 658 break;
659 case REQ_OP_DRV_IN:
660 case REQ_OP_DRV_OUT:
661 expiry = ide_cd_expiry;
662 /*FALLTHRU*/
663 default:
656 timeout = ATAPI_WAIT_PC; 664 timeout = ATAPI_WAIT_PC;
657 if (rq->cmd_type != REQ_TYPE_FS) 665 break;
658 expiry = ide_cd_expiry;
659 } 666 }
660 667
661 hwif->expiry = expiry; 668 hwif->expiry = expiry;
@@ -663,15 +670,15 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
663 return ide_started; 670 return ide_started;
664 671
665out_end: 672out_end:
666 if (rq->cmd_type == REQ_TYPE_BLOCK_PC && rc == 0) { 673 if (blk_rq_is_scsi(rq) && rc == 0) {
667 rq->resid_len = 0; 674 scsi_req(rq)->resid_len = 0;
668 blk_end_request_all(rq, 0); 675 blk_end_request_all(rq, 0);
669 hwif->rq = NULL; 676 hwif->rq = NULL;
670 } else { 677 } else {
671 if (sense && uptodate) 678 if (sense && uptodate)
672 ide_cd_complete_failed_rq(drive, rq); 679 ide_cd_complete_failed_rq(drive, rq);
673 680
674 if (rq->cmd_type == REQ_TYPE_FS) { 681 if (!blk_rq_is_passthrough(rq)) {
675 if (cmd->nleft == 0) 682 if (cmd->nleft == 0)
676 uptodate = 1; 683 uptodate = 1;
677 } else { 684 } else {
@@ -684,10 +691,10 @@ out_end:
684 return ide_stopped; 691 return ide_stopped;
685 692
686 /* make sure it's fully ended */ 693 /* make sure it's fully ended */
687 if (rq->cmd_type != REQ_TYPE_FS) { 694 if (blk_rq_is_passthrough(rq)) {
688 rq->resid_len -= cmd->nbytes - cmd->nleft; 695 scsi_req(rq)->resid_len -= cmd->nbytes - cmd->nleft;
689 if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE)) 696 if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
690 rq->resid_len += cmd->last_xfer_len; 697 scsi_req(rq)->resid_len += cmd->last_xfer_len;
691 } 698 }
692 699
693 ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq)); 700 ide_complete_rq(drive, uptodate ? 0 : -EIO, blk_rq_bytes(rq));
@@ -744,7 +751,7 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
744 ide_debug_log(IDE_DBG_PC, "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x", 751 ide_debug_log(IDE_DBG_PC, "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x",
745 rq->cmd[0], rq->cmd_type); 752 rq->cmd[0], rq->cmd_type);
746 753
747 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) 754 if (blk_rq_is_scsi(rq))
748 rq->rq_flags |= RQF_QUIET; 755 rq->rq_flags |= RQF_QUIET;
749 else 756 else
750 rq->rq_flags &= ~RQF_FAILED; 757 rq->rq_flags &= ~RQF_FAILED;
@@ -786,25 +793,31 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
786 if (drive->debug_mask & IDE_DBG_RQ) 793 if (drive->debug_mask & IDE_DBG_RQ)
787 blk_dump_rq_flags(rq, "ide_cd_do_request"); 794 blk_dump_rq_flags(rq, "ide_cd_do_request");
788 795
789 switch (rq->cmd_type) { 796 switch (req_op(rq)) {
790 case REQ_TYPE_FS: 797 default:
791 if (cdrom_start_rw(drive, rq) == ide_stopped) 798 if (cdrom_start_rw(drive, rq) == ide_stopped)
792 goto out_end; 799 goto out_end;
793 break; 800 break;
794 case REQ_TYPE_ATA_SENSE: 801 case REQ_OP_SCSI_IN:
795 case REQ_TYPE_BLOCK_PC: 802 case REQ_OP_SCSI_OUT:
796 case REQ_TYPE_ATA_PC: 803 handle_pc:
797 if (!rq->timeout) 804 if (!rq->timeout)
798 rq->timeout = ATAPI_WAIT_PC; 805 rq->timeout = ATAPI_WAIT_PC;
799
800 cdrom_do_block_pc(drive, rq); 806 cdrom_do_block_pc(drive, rq);
801 break; 807 break;
802 case REQ_TYPE_DRV_PRIV: 808 case REQ_OP_DRV_IN:
803 /* right now this can only be a reset... */ 809 case REQ_OP_DRV_OUT:
804 uptodate = 1; 810 switch (ide_req(rq)->type) {
805 goto out_end; 811 case ATA_PRIV_MISC:
806 default: 812 /* right now this can only be a reset... */
807 BUG(); 813 uptodate = 1;
814 goto out_end;
815 case ATA_PRIV_SENSE:
816 case ATA_PRIV_PC:
817 goto handle_pc;
818 default:
819 BUG();
820 }
808 } 821 }
809 822
810 /* prepare sense request for this command */ 823 /* prepare sense request for this command */
@@ -817,7 +830,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
817 830
818 cmd.rq = rq; 831 cmd.rq = rq;
819 832
820 if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) { 833 if (!blk_rq_is_passthrough(rq) || blk_rq_bytes(rq)) {
821 ide_init_sg_cmd(&cmd, blk_rq_bytes(rq)); 834 ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
822 ide_map_sg(drive, &cmd); 835 ide_map_sg(drive, &cmd);
823 } 836 }
@@ -1166,7 +1179,7 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
1166 CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \ 1179 CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \
1167 CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM) 1180 CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM)
1168 1181
1169static struct cdrom_device_ops ide_cdrom_dops = { 1182static const struct cdrom_device_ops ide_cdrom_dops = {
1170 .open = ide_cdrom_open_real, 1183 .open = ide_cdrom_open_real,
1171 .release = ide_cdrom_release_real, 1184 .release = ide_cdrom_release_real,
1172 .drive_status = ide_cdrom_drive_status, 1185 .drive_status = ide_cdrom_drive_status,
@@ -1312,28 +1325,29 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
1312 int hard_sect = queue_logical_block_size(q); 1325 int hard_sect = queue_logical_block_size(q);
1313 long block = (long)blk_rq_pos(rq) / (hard_sect >> 9); 1326 long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
1314 unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9); 1327 unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
1328 struct scsi_request *req = scsi_req(rq);
1315 1329
1316 memset(rq->cmd, 0, BLK_MAX_CDB); 1330 memset(req->cmd, 0, BLK_MAX_CDB);
1317 1331
1318 if (rq_data_dir(rq) == READ) 1332 if (rq_data_dir(rq) == READ)
1319 rq->cmd[0] = GPCMD_READ_10; 1333 req->cmd[0] = GPCMD_READ_10;
1320 else 1334 else
1321 rq->cmd[0] = GPCMD_WRITE_10; 1335 req->cmd[0] = GPCMD_WRITE_10;
1322 1336
1323 /* 1337 /*
1324 * fill in lba 1338 * fill in lba
1325 */ 1339 */
1326 rq->cmd[2] = (block >> 24) & 0xff; 1340 req->cmd[2] = (block >> 24) & 0xff;
1327 rq->cmd[3] = (block >> 16) & 0xff; 1341 req->cmd[3] = (block >> 16) & 0xff;
1328 rq->cmd[4] = (block >> 8) & 0xff; 1342 req->cmd[4] = (block >> 8) & 0xff;
1329 rq->cmd[5] = block & 0xff; 1343 req->cmd[5] = block & 0xff;
1330 1344
1331 /* 1345 /*
1332 * and transfer length 1346 * and transfer length
1333 */ 1347 */
1334 rq->cmd[7] = (blocks >> 8) & 0xff; 1348 req->cmd[7] = (blocks >> 8) & 0xff;
1335 rq->cmd[8] = blocks & 0xff; 1349 req->cmd[8] = blocks & 0xff;
1336 rq->cmd_len = 10; 1350 req->cmd_len = 10;
1337 return BLKPREP_OK; 1351 return BLKPREP_OK;
1338} 1352}
1339 1353
@@ -1343,7 +1357,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
1343 */ 1357 */
1344static int ide_cdrom_prep_pc(struct request *rq) 1358static int ide_cdrom_prep_pc(struct request *rq)
1345{ 1359{
1346 u8 *c = rq->cmd; 1360 u8 *c = scsi_req(rq)->cmd;
1347 1361
1348 /* transform 6-byte read/write commands to the 10-byte version */ 1362 /* transform 6-byte read/write commands to the 10-byte version */
1349 if (c[0] == READ_6 || c[0] == WRITE_6) { 1363 if (c[0] == READ_6 || c[0] == WRITE_6) {
@@ -1354,7 +1368,7 @@ static int ide_cdrom_prep_pc(struct request *rq)
1354 c[2] = 0; 1368 c[2] = 0;
1355 c[1] &= 0xe0; 1369 c[1] &= 0xe0;
1356 c[0] += (READ_10 - READ_6); 1370 c[0] += (READ_10 - READ_6);
1357 rq->cmd_len = 10; 1371 scsi_req(rq)->cmd_len = 10;
1358 return BLKPREP_OK; 1372 return BLKPREP_OK;
1359 } 1373 }
1360 1374
@@ -1373,9 +1387,9 @@ static int ide_cdrom_prep_pc(struct request *rq)
1373 1387
1374static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq) 1388static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq)
1375{ 1389{
1376 if (rq->cmd_type == REQ_TYPE_FS) 1390 if (!blk_rq_is_passthrough(rq))
1377 return ide_cdrom_prep_fs(q, rq); 1391 return ide_cdrom_prep_fs(q, rq);
1378 else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) 1392 else if (blk_rq_is_scsi(rq))
1379 return ide_cdrom_prep_pc(rq); 1393 return ide_cdrom_prep_pc(rq);
1380 1394
1381 return 0; 1395 return 0;
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index f085e3a2e1d6..9fcefbc8425e 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -303,8 +303,9 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi)
303 struct request *rq; 303 struct request *rq;
304 int ret; 304 int ret;
305 305
306 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 306 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
307 rq->cmd_type = REQ_TYPE_DRV_PRIV; 307 scsi_req_init(rq);
308 ide_req(rq)->type = ATA_PRIV_MISC;
308 rq->rq_flags = RQF_QUIET; 309 rq->rq_flags = RQF_QUIET;
309 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); 310 ret = blk_execute_rq(drive->queue, cd->disk, rq, 0);
310 blk_put_request(rq); 311 blk_put_request(rq);
diff --git a/drivers/ide/ide-cd_verbose.c b/drivers/ide/ide-cd_verbose.c
index f079ca2f260b..58a6feb74c02 100644
--- a/drivers/ide/ide-cd_verbose.c
+++ b/drivers/ide/ide-cd_verbose.c
@@ -315,12 +315,12 @@ void ide_cd_log_error(const char *name, struct request *failed_command,
315 while (hi > lo) { 315 while (hi > lo) {
316 mid = (lo + hi) / 2; 316 mid = (lo + hi) / 2;
317 if (packet_command_texts[mid].packet_command == 317 if (packet_command_texts[mid].packet_command ==
318 failed_command->cmd[0]) { 318 scsi_req(failed_command)->cmd[0]) {
319 s = packet_command_texts[mid].text; 319 s = packet_command_texts[mid].text;
320 break; 320 break;
321 } 321 }
322 if (packet_command_texts[mid].packet_command > 322 if (packet_command_texts[mid].packet_command >
323 failed_command->cmd[0]) 323 scsi_req(failed_command)->cmd[0])
324 hi = mid; 324 hi = mid;
325 else 325 else
326 lo = mid + 1; 326 lo = mid + 1;
@@ -329,7 +329,7 @@ void ide_cd_log_error(const char *name, struct request *failed_command,
329 printk(KERN_ERR " The failed \"%s\" packet command " 329 printk(KERN_ERR " The failed \"%s\" packet command "
330 "was: \n \"", s); 330 "was: \n \"", s);
331 for (i = 0; i < BLK_MAX_CDB; i++) 331 for (i = 0; i < BLK_MAX_CDB; i++)
332 printk(KERN_CONT "%02x ", failed_command->cmd[i]); 332 printk(KERN_CONT "%02x ", scsi_req(failed_command)->cmd[i]);
333 printk(KERN_CONT "\"\n"); 333 printk(KERN_CONT "\"\n");
334 } 334 }
335 335
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 0dd43b4fcec6..a45dda5386e4 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -165,11 +165,12 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
165 if (!(setting->flags & DS_SYNC)) 165 if (!(setting->flags & DS_SYNC))
166 return setting->set(drive, arg); 166 return setting->set(drive, arg);
167 167
168 rq = blk_get_request(q, READ, __GFP_RECLAIM); 168 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
169 rq->cmd_type = REQ_TYPE_DRV_PRIV; 169 scsi_req_init(rq);
170 rq->cmd_len = 5; 170 ide_req(rq)->type = ATA_PRIV_MISC;
171 rq->cmd[0] = REQ_DEVSET_EXEC; 171 scsi_req(rq)->cmd_len = 5;
172 *(int *)&rq->cmd[1] = arg; 172 scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
173 *(int *)&scsi_req(rq)->cmd[1] = arg;
173 rq->special = setting->set; 174 rq->special = setting->set;
174 175
175 if (blk_execute_rq(q, NULL, rq, 0)) 176 if (blk_execute_rq(q, NULL, rq, 0))
@@ -183,7 +184,7 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
183{ 184{
184 int err, (*setfunc)(ide_drive_t *, int) = rq->special; 185 int err, (*setfunc)(ide_drive_t *, int) = rq->special;
185 186
186 err = setfunc(drive, *(int *)&rq->cmd[1]); 187 err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
187 if (err) 188 if (err)
188 rq->errors = err; 189 rq->errors = err;
189 ide_complete_rq(drive, err, blk_rq_bytes(rq)); 190 ide_complete_rq(drive, err, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 5ceace542b77..186159715b71 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -184,7 +184,7 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
184 ide_hwif_t *hwif = drive->hwif; 184 ide_hwif_t *hwif = drive->hwif;
185 185
186 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED); 186 BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
187 BUG_ON(rq->cmd_type != REQ_TYPE_FS); 187 BUG_ON(blk_rq_is_passthrough(rq));
188 188
189 ledtrig_disk_activity(); 189 ledtrig_disk_activity();
190 190
@@ -452,8 +452,9 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
452 cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; 452 cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
453 cmd->tf_flags = IDE_TFLAG_DYN; 453 cmd->tf_flags = IDE_TFLAG_DYN;
454 cmd->protocol = ATA_PROT_NODATA; 454 cmd->protocol = ATA_PROT_NODATA;
455 455 rq->cmd_flags &= ~REQ_OP_MASK;
456 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 456 rq->cmd_flags |= REQ_OP_DRV_OUT;
457 ide_req(rq)->type = ATA_PRIV_TASKFILE;
457 rq->special = cmd; 458 rq->special = cmd;
458 cmd->rq = rq; 459 cmd->rq = rq;
459 460
@@ -477,8 +478,9 @@ static int set_multcount(ide_drive_t *drive, int arg)
477 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) 478 if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
478 return -EBUSY; 479 return -EBUSY;
479 480
480 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 481 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
481 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 482 scsi_req_init(rq);
483 ide_req(rq)->type = ATA_PRIV_TASKFILE;
482 484
483 drive->mult_req = arg; 485 drive->mult_req = arg;
484 drive->special_flags |= IDE_SFLAG_SET_MULTMODE; 486 drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index d6da011299f5..cf3af6840368 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -123,8 +123,8 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
123 return ide_stopped; 123 return ide_stopped;
124 124
125 /* retry only "normal" I/O: */ 125 /* retry only "normal" I/O: */
126 if (rq->cmd_type != REQ_TYPE_FS) { 126 if (blk_rq_is_passthrough(rq)) {
127 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { 127 if (ata_taskfile_request(rq)) {
128 struct ide_cmd *cmd = rq->special; 128 struct ide_cmd *cmd = rq->special;
129 129
130 if (cmd) 130 if (cmd)
@@ -147,8 +147,8 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
147{ 147{
148 struct request *rq = drive->hwif->rq; 148 struct request *rq = drive->hwif->rq;
149 149
150 if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV && 150 if (rq && ata_misc_request(rq) &&
151 rq->cmd[0] == REQ_DRIVE_RESET) { 151 scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
152 if (err <= 0 && rq->errors == 0) 152 if (err <= 0 && rq->errors == 0)
153 rq->errors = -EIO; 153 rq->errors = -EIO;
154 ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq)); 154 ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index f079d8d1d856..a69e8013f1df 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -72,7 +72,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
72 drive->failed_pc = NULL; 72 drive->failed_pc = NULL;
73 73
74 if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 || 74 if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
75 rq->cmd_type == REQ_TYPE_BLOCK_PC) 75 (req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT))
76 uptodate = 1; /* FIXME */ 76 uptodate = 1; /* FIXME */
77 else if (pc->c[0] == GPCMD_REQUEST_SENSE) { 77 else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
78 78
@@ -97,7 +97,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
97 "Aborting request!\n"); 97 "Aborting request!\n");
98 } 98 }
99 99
100 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) 100 if (ata_misc_request(rq))
101 rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; 101 rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
102 102
103 return uptodate; 103 return uptodate;
@@ -203,7 +203,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
203 put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]); 203 put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]);
204 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]); 204 put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]);
205 205
206 memcpy(rq->cmd, pc->c, 12); 206 memcpy(scsi_req(rq)->cmd, pc->c, 12);
207 207
208 pc->rq = rq; 208 pc->rq = rq;
209 if (cmd == WRITE) 209 if (cmd == WRITE)
@@ -216,7 +216,7 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
216 struct ide_atapi_pc *pc, struct request *rq) 216 struct ide_atapi_pc *pc, struct request *rq)
217{ 217{
218 ide_init_pc(pc); 218 ide_init_pc(pc);
219 memcpy(pc->c, rq->cmd, sizeof(pc->c)); 219 memcpy(pc->c, scsi_req(rq)->cmd, sizeof(pc->c));
220 pc->rq = rq; 220 pc->rq = rq;
221 if (blk_rq_bytes(rq)) { 221 if (blk_rq_bytes(rq)) {
222 pc->flags |= PC_FLAG_DMA_OK; 222 pc->flags |= PC_FLAG_DMA_OK;
@@ -246,7 +246,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
246 } else 246 } else
247 printk(KERN_ERR PFX "%s: I/O error\n", drive->name); 247 printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
248 248
249 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { 249 if (ata_misc_request(rq)) {
250 rq->errors = 0; 250 rq->errors = 0;
251 ide_complete_rq(drive, 0, blk_rq_bytes(rq)); 251 ide_complete_rq(drive, 0, blk_rq_bytes(rq));
252 return ide_stopped; 252 return ide_stopped;
@@ -254,8 +254,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
254 goto out_end; 254 goto out_end;
255 } 255 }
256 256
257 switch (rq->cmd_type) { 257 switch (req_op(rq)) {
258 case REQ_TYPE_FS: 258 default:
259 if (((long)blk_rq_pos(rq) % floppy->bs_factor) || 259 if (((long)blk_rq_pos(rq) % floppy->bs_factor) ||
260 (blk_rq_sectors(rq) % floppy->bs_factor)) { 260 (blk_rq_sectors(rq) % floppy->bs_factor)) {
261 printk(KERN_ERR PFX "%s: unsupported r/w rq size\n", 261 printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
@@ -265,16 +265,21 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
265 pc = &floppy->queued_pc; 265 pc = &floppy->queued_pc;
266 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); 266 idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
267 break; 267 break;
268 case REQ_TYPE_DRV_PRIV: 268 case REQ_OP_SCSI_IN:
269 case REQ_TYPE_ATA_SENSE: 269 case REQ_OP_SCSI_OUT:
270 pc = (struct ide_atapi_pc *)rq->special;
271 break;
272 case REQ_TYPE_BLOCK_PC:
273 pc = &floppy->queued_pc; 270 pc = &floppy->queued_pc;
274 idefloppy_blockpc_cmd(floppy, pc, rq); 271 idefloppy_blockpc_cmd(floppy, pc, rq);
275 break; 272 break;
276 default: 273 case REQ_OP_DRV_IN:
277 BUG(); 274 case REQ_OP_DRV_OUT:
275 switch (ide_req(rq)->type) {
276 case ATA_PRIV_MISC:
277 case ATA_PRIV_SENSE:
278 pc = (struct ide_atapi_pc *)rq->special;
279 break;
280 default:
281 BUG();
282 }
278 } 283 }
279 284
280 ide_prep_sense(drive, rq); 285 ide_prep_sense(drive, rq);
@@ -286,7 +291,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
286 291
287 cmd.rq = rq; 292 cmd.rq = rq;
288 293
289 if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) { 294 if (!blk_rq_is_passthrough(rq) || blk_rq_bytes(rq)) {
290 ide_init_sg_cmd(&cmd, blk_rq_bytes(rq)); 295 ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
291 ide_map_sg(drive, &cmd); 296 ide_map_sg(drive, &cmd);
292 } 297 }
@@ -296,7 +301,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
296 return ide_floppy_issue_pc(drive, &cmd, pc); 301 return ide_floppy_issue_pc(drive, &cmd, pc);
297out_end: 302out_end:
298 drive->failed_pc = NULL; 303 drive->failed_pc = NULL;
299 if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0) 304 if (blk_rq_is_passthrough(rq) && rq->errors == 0)
300 rq->errors = -EIO; 305 rq->errors = -EIO;
301 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq)); 306 ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
302 return ide_stopped; 307 return ide_stopped;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 201e43fcbc94..043b1fb963cb 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -102,7 +102,7 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
102 drive->dev_flags |= IDE_DFLAG_PARKED; 102 drive->dev_flags |= IDE_DFLAG_PARKED;
103 } 103 }
104 104
105 if (rq && rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { 105 if (rq && ata_taskfile_request(rq)) {
106 struct ide_cmd *orig_cmd = rq->special; 106 struct ide_cmd *orig_cmd = rq->special;
107 107
108 if (cmd->tf_flags & IDE_TFLAG_DYN) 108 if (cmd->tf_flags & IDE_TFLAG_DYN)
@@ -135,7 +135,7 @@ EXPORT_SYMBOL(ide_complete_rq);
135 135
136void ide_kill_rq(ide_drive_t *drive, struct request *rq) 136void ide_kill_rq(ide_drive_t *drive, struct request *rq)
137{ 137{
138 u8 drv_req = (rq->cmd_type == REQ_TYPE_DRV_PRIV) && rq->rq_disk; 138 u8 drv_req = ata_misc_request(rq) && rq->rq_disk;
139 u8 media = drive->media; 139 u8 media = drive->media;
140 140
141 drive->failed_pc = NULL; 141 drive->failed_pc = NULL;
@@ -145,7 +145,7 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
145 } else { 145 } else {
146 if (media == ide_tape) 146 if (media == ide_tape)
147 rq->errors = IDE_DRV_ERROR_GENERAL; 147 rq->errors = IDE_DRV_ERROR_GENERAL;
148 else if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0) 148 else if (blk_rq_is_passthrough(rq) && rq->errors == 0)
149 rq->errors = -EIO; 149 rq->errors = -EIO;
150 } 150 }
151 151
@@ -279,7 +279,7 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
279 279
280static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) 280static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
281{ 281{
282 u8 cmd = rq->cmd[0]; 282 u8 cmd = scsi_req(rq)->cmd[0];
283 283
284 switch (cmd) { 284 switch (cmd) {
285 case REQ_PARK_HEADS: 285 case REQ_PARK_HEADS:
@@ -340,7 +340,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
340 if (drive->current_speed == 0xff) 340 if (drive->current_speed == 0xff)
341 ide_config_drive_speed(drive, drive->desired_speed); 341 ide_config_drive_speed(drive, drive->desired_speed);
342 342
343 if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) 343 if (ata_taskfile_request(rq))
344 return execute_drive_cmd(drive, rq); 344 return execute_drive_cmd(drive, rq);
345 else if (ata_pm_request(rq)) { 345 else if (ata_pm_request(rq)) {
346 struct ide_pm_state *pm = rq->special; 346 struct ide_pm_state *pm = rq->special;
@@ -353,7 +353,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
353 pm->pm_step == IDE_PM_COMPLETED) 353 pm->pm_step == IDE_PM_COMPLETED)
354 ide_complete_pm_rq(drive, rq); 354 ide_complete_pm_rq(drive, rq);
355 return startstop; 355 return startstop;
356 } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_DRV_PRIV) 356 } else if (!rq->rq_disk && ata_misc_request(rq))
357 /* 357 /*
358 * TODO: Once all ULDs have been modified to 358 * TODO: Once all ULDs have been modified to
359 * check for specific op codes rather than 359 * check for specific op codes rather than
@@ -545,6 +545,7 @@ repeat:
545 goto plug_device; 545 goto plug_device;
546 } 546 }
547 547
548 scsi_req(rq)->resid_len = blk_rq_bytes(rq);
548 hwif->rq = rq; 549 hwif->rq = rq;
549 550
550 spin_unlock_irq(&hwif->lock); 551 spin_unlock_irq(&hwif->lock);
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index d05db2469209..248a3e0ceb46 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,8 +125,9 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
125 if (NULL == (void *) arg) { 125 if (NULL == (void *) arg) {
126 struct request *rq; 126 struct request *rq;
127 127
128 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 128 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
129 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 129 scsi_req_init(rq);
130 ide_req(rq)->type = ATA_PRIV_TASKFILE;
130 err = blk_execute_rq(drive->queue, NULL, rq, 0); 131 err = blk_execute_rq(drive->queue, NULL, rq, 0);
131 blk_put_request(rq); 132 blk_put_request(rq);
132 133
@@ -221,10 +222,11 @@ static int generic_drive_reset(ide_drive_t *drive)
221 struct request *rq; 222 struct request *rq;
222 int ret = 0; 223 int ret = 0;
223 224
224 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 225 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
225 rq->cmd_type = REQ_TYPE_DRV_PRIV; 226 scsi_req_init(rq);
226 rq->cmd_len = 1; 227 ide_req(rq)->type = ATA_PRIV_MISC;
227 rq->cmd[0] = REQ_DRIVE_RESET; 228 scsi_req(rq)->cmd_len = 1;
229 scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
228 if (blk_execute_rq(drive->queue, NULL, rq, 1)) 230 if (blk_execute_rq(drive->queue, NULL, rq, 1))
229 ret = rq->errors; 231 ret = rq->errors;
230 blk_put_request(rq); 232 blk_put_request(rq);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 2d7dca56dd24..101aed9a61ca 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -31,10 +31,11 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
31 } 31 }
32 spin_unlock_irq(&hwif->lock); 32 spin_unlock_irq(&hwif->lock);
33 33
34 rq = blk_get_request(q, READ, __GFP_RECLAIM); 34 rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
35 rq->cmd[0] = REQ_PARK_HEADS; 35 scsi_req_init(rq);
36 rq->cmd_len = 1; 36 scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
37 rq->cmd_type = REQ_TYPE_DRV_PRIV; 37 scsi_req(rq)->cmd_len = 1;
38 ide_req(rq)->type = ATA_PRIV_MISC;
38 rq->special = &timeout; 39 rq->special = &timeout;
39 rc = blk_execute_rq(q, NULL, rq, 1); 40 rc = blk_execute_rq(q, NULL, rq, 1);
40 blk_put_request(rq); 41 blk_put_request(rq);
@@ -45,13 +46,14 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
45 * Make sure that *some* command is sent to the drive after the 46 * Make sure that *some* command is sent to the drive after the
46 * timeout has expired, so power management will be reenabled. 47 * timeout has expired, so power management will be reenabled.
47 */ 48 */
48 rq = blk_get_request(q, READ, GFP_NOWAIT); 49 rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
50 scsi_req_init(rq);
49 if (IS_ERR(rq)) 51 if (IS_ERR(rq))
50 goto out; 52 goto out;
51 53
52 rq->cmd[0] = REQ_UNPARK_HEADS; 54 scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
53 rq->cmd_len = 1; 55 scsi_req(rq)->cmd_len = 1;
54 rq->cmd_type = REQ_TYPE_DRV_PRIV; 56 ide_req(rq)->type = ATA_PRIV_MISC;
55 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); 57 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT);
56 58
57out: 59out:
@@ -64,7 +66,7 @@ ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq)
64 struct ide_taskfile *tf = &cmd.tf; 66 struct ide_taskfile *tf = &cmd.tf;
65 67
66 memset(&cmd, 0, sizeof(cmd)); 68 memset(&cmd, 0, sizeof(cmd));
67 if (rq->cmd[0] == REQ_PARK_HEADS) { 69 if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) {
68 drive->sleep = *(unsigned long *)rq->special; 70 drive->sleep = *(unsigned long *)rq->special;
69 drive->dev_flags |= IDE_DFLAG_SLEEPING; 71 drive->dev_flags |= IDE_DFLAG_SLEEPING;
70 tf->command = ATA_CMD_IDLEIMMEDIATE; 72 tf->command = ATA_CMD_IDLEIMMEDIATE;
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index a015acdffb39..ec951be4b0c8 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -18,8 +18,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
18 } 18 }
19 19
20 memset(&rqpm, 0, sizeof(rqpm)); 20 memset(&rqpm, 0, sizeof(rqpm));
21 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 21 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
22 rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND; 22 scsi_req_init(rq);
23 ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
23 rq->special = &rqpm; 24 rq->special = &rqpm;
24 rqpm.pm_step = IDE_PM_START_SUSPEND; 25 rqpm.pm_step = IDE_PM_START_SUSPEND;
25 if (mesg.event == PM_EVENT_PRETHAW) 26 if (mesg.event == PM_EVENT_PRETHAW)
@@ -88,8 +89,9 @@ int generic_ide_resume(struct device *dev)
88 } 89 }
89 90
90 memset(&rqpm, 0, sizeof(rqpm)); 91 memset(&rqpm, 0, sizeof(rqpm));
91 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 92 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
92 rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; 93 scsi_req_init(rq);
94 ide_req(rq)->type = ATA_PRIV_PM_RESUME;
93 rq->rq_flags |= RQF_PREEMPT; 95 rq->rq_flags |= RQF_PREEMPT;
94 rq->special = &rqpm; 96 rq->special = &rqpm;
95 rqpm.pm_step = IDE_PM_START_RESUME; 97 rqpm.pm_step = IDE_PM_START_RESUME;
@@ -221,10 +223,10 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
221 223
222#ifdef DEBUG_PM 224#ifdef DEBUG_PM
223 printk("%s: completing PM request, %s\n", drive->name, 225 printk("%s: completing PM request, %s\n", drive->name,
224 (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) ? "suspend" : "resume"); 226 (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume");
225#endif 227#endif
226 spin_lock_irqsave(q->queue_lock, flags); 228 spin_lock_irqsave(q->queue_lock, flags);
227 if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) 229 if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND)
228 blk_stop_queue(q); 230 blk_stop_queue(q);
229 else 231 else
230 drive->dev_flags &= ~IDE_DFLAG_BLOCKED; 232 drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
@@ -240,11 +242,13 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
240{ 242{
241 struct ide_pm_state *pm = rq->special; 243 struct ide_pm_state *pm = rq->special;
242 244
243 if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND && 245 if (blk_rq_is_private(rq) &&
246 ide_req(rq)->type == ATA_PRIV_PM_SUSPEND &&
244 pm->pm_step == IDE_PM_START_SUSPEND) 247 pm->pm_step == IDE_PM_START_SUSPEND)
245 /* Mark drive blocked when starting the suspend sequence. */ 248 /* Mark drive blocked when starting the suspend sequence. */
246 drive->dev_flags |= IDE_DFLAG_BLOCKED; 249 drive->dev_flags |= IDE_DFLAG_BLOCKED;
247 else if (rq->cmd_type == REQ_TYPE_ATA_PM_RESUME && 250 else if (blk_rq_is_private(rq) &&
251 ide_req(rq)->type == ATA_PRIV_PM_RESUME &&
248 pm->pm_step == IDE_PM_START_RESUME) { 252 pm->pm_step == IDE_PM_START_RESUME) {
249 /* 253 /*
250 * The first thing we do on wakeup is to wait for BSY bit to 254 * The first thing we do on wakeup is to wait for BSY bit to
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 330e319419e6..a74ae8df4bb8 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -741,6 +741,14 @@ static void ide_port_tune_devices(ide_hwif_t *hwif)
741 } 741 }
742} 742}
743 743
744static int ide_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
745{
746 struct ide_request *req = blk_mq_rq_to_pdu(rq);
747
748 req->sreq.sense = req->sense;
749 return 0;
750}
751
744/* 752/*
745 * init request queue 753 * init request queue
746 */ 754 */
@@ -758,11 +766,18 @@ static int ide_init_queue(ide_drive_t *drive)
758 * limits and LBA48 we could raise it but as yet 766 * limits and LBA48 we could raise it but as yet
759 * do not. 767 * do not.
760 */ 768 */
761 769 q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif));
762 q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif));
763 if (!q) 770 if (!q)
764 return 1; 771 return 1;
765 772
773 q->request_fn = do_ide_request;
774 q->init_rq_fn = ide_init_rq;
775 q->cmd_size = sizeof(struct ide_request);
776 if (blk_init_allocated_queue(q) < 0) {
777 blk_cleanup_queue(q);
778 return 1;
779 }
780
766 q->queuedata = drive; 781 q->queuedata = drive;
767 blk_queue_segment_boundary(q, 0xffff); 782 blk_queue_segment_boundary(q, 0xffff);
768 783
@@ -1131,10 +1146,12 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif)
1131 ide_port_for_each_dev(i, drive, hwif) { 1146 ide_port_for_each_dev(i, drive, hwif) {
1132 u8 j = (hwif->index * MAX_DRIVES) + i; 1147 u8 j = (hwif->index * MAX_DRIVES) + i;
1133 u16 *saved_id = drive->id; 1148 u16 *saved_id = drive->id;
1149 struct request *saved_sense_rq = drive->sense_rq;
1134 1150
1135 memset(drive, 0, sizeof(*drive)); 1151 memset(drive, 0, sizeof(*drive));
1136 memset(saved_id, 0, SECTOR_SIZE); 1152 memset(saved_id, 0, SECTOR_SIZE);
1137 drive->id = saved_id; 1153 drive->id = saved_id;
1154 drive->sense_rq = saved_sense_rq;
1138 1155
1139 drive->media = ide_disk; 1156 drive->media = ide_disk;
1140 drive->select = (i << 4) | ATA_DEVICE_OBS; 1157 drive->select = (i << 4) | ATA_DEVICE_OBS;
@@ -1241,6 +1258,7 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
1241 int i; 1258 int i;
1242 1259
1243 ide_port_for_each_dev(i, drive, hwif) { 1260 ide_port_for_each_dev(i, drive, hwif) {
1261 kfree(drive->sense_rq);
1244 kfree(drive->id); 1262 kfree(drive->id);
1245 kfree(drive); 1263 kfree(drive);
1246 } 1264 }
@@ -1248,11 +1266,10 @@ static void ide_port_free_devices(ide_hwif_t *hwif)
1248 1266
1249static int ide_port_alloc_devices(ide_hwif_t *hwif, int node) 1267static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
1250{ 1268{
1269 ide_drive_t *drive;
1251 int i; 1270 int i;
1252 1271
1253 for (i = 0; i < MAX_DRIVES; i++) { 1272 for (i = 0; i < MAX_DRIVES; i++) {
1254 ide_drive_t *drive;
1255
1256 drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node); 1273 drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node);
1257 if (drive == NULL) 1274 if (drive == NULL)
1258 goto out_nomem; 1275 goto out_nomem;
@@ -1267,12 +1284,21 @@ static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
1267 */ 1284 */
1268 drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node); 1285 drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
1269 if (drive->id == NULL) 1286 if (drive->id == NULL)
1270 goto out_nomem; 1287 goto out_free_drive;
1288
1289 drive->sense_rq = kmalloc(sizeof(struct request) +
1290 sizeof(struct ide_request), GFP_KERNEL);
1291 if (!drive->sense_rq)
1292 goto out_free_id;
1271 1293
1272 hwif->devices[i] = drive; 1294 hwif->devices[i] = drive;
1273 } 1295 }
1274 return 0; 1296 return 0;
1275 1297
1298out_free_id:
1299 kfree(drive->id);
1300out_free_drive:
1301 kfree(drive);
1276out_nomem: 1302out_nomem:
1277 ide_port_free_devices(hwif); 1303 ide_port_free_devices(hwif);
1278 return -ENOMEM; 1304 return -ENOMEM;
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9ecf4e35adcd..3c1b7974d66d 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -282,7 +282,7 @@ static void idetape_analyze_error(ide_drive_t *drive)
282 282
283 /* correct remaining bytes to transfer */ 283 /* correct remaining bytes to transfer */
284 if (pc->flags & PC_FLAG_DMA_ERROR) 284 if (pc->flags & PC_FLAG_DMA_ERROR)
285 rq->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]); 285 scsi_req(rq)->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]);
286 286
287 /* 287 /*
288 * If error was the result of a zero-length read or write command, 288 * If error was the result of a zero-length read or write command,
@@ -316,7 +316,7 @@ static void idetape_analyze_error(ide_drive_t *drive)
316 pc->flags |= PC_FLAG_ABORT; 316 pc->flags |= PC_FLAG_ABORT;
317 } 317 }
318 if (!(pc->flags & PC_FLAG_ABORT) && 318 if (!(pc->flags & PC_FLAG_ABORT) &&
319 (blk_rq_bytes(rq) - rq->resid_len)) 319 (blk_rq_bytes(rq) - scsi_req(rq)->resid_len))
320 pc->retries = IDETAPE_MAX_PC_RETRIES + 1; 320 pc->retries = IDETAPE_MAX_PC_RETRIES + 1;
321 } 321 }
322} 322}
@@ -348,7 +348,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
348 "itself - Aborting request!\n"); 348 "itself - Aborting request!\n");
349 } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) { 349 } else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
350 unsigned int blocks = 350 unsigned int blocks =
351 (blk_rq_bytes(rq) - rq->resid_len) / tape->blk_size; 351 (blk_rq_bytes(rq) - scsi_req(rq)->resid_len) / tape->blk_size;
352 352
353 tape->avg_size += blocks * tape->blk_size; 353 tape->avg_size += blocks * tape->blk_size;
354 354
@@ -560,7 +560,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
560 pc->flags |= PC_FLAG_WRITING; 560 pc->flags |= PC_FLAG_WRITING;
561 } 561 }
562 562
563 memcpy(rq->cmd, pc->c, 12); 563 memcpy(scsi_req(rq)->cmd, pc->c, 12);
564} 564}
565 565
566static ide_startstop_t idetape_do_request(ide_drive_t *drive, 566static ide_startstop_t idetape_do_request(ide_drive_t *drive,
@@ -570,14 +570,16 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
570 idetape_tape_t *tape = drive->driver_data; 570 idetape_tape_t *tape = drive->driver_data;
571 struct ide_atapi_pc *pc = NULL; 571 struct ide_atapi_pc *pc = NULL;
572 struct ide_cmd cmd; 572 struct ide_cmd cmd;
573 struct scsi_request *req = scsi_req(rq);
573 u8 stat; 574 u8 stat;
574 575
575 ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, sector: %llu, nr_sectors: %u", 576 ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, sector: %llu, nr_sectors: %u",
576 rq->cmd[0], (unsigned long long)blk_rq_pos(rq), 577 req->cmd[0], (unsigned long long)blk_rq_pos(rq),
577 blk_rq_sectors(rq)); 578 blk_rq_sectors(rq));
578 579
579 BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV || 580 BUG_ON(!blk_rq_is_private(rq));
580 rq->cmd_type == REQ_TYPE_ATA_SENSE)); 581 BUG_ON(ide_req(rq)->type != ATA_PRIV_MISC &&
582 ide_req(rq)->type != ATA_PRIV_SENSE);
581 583
582 /* Retry a failed packet command */ 584 /* Retry a failed packet command */
583 if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) { 585 if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) {
@@ -592,7 +594,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
592 stat = hwif->tp_ops->read_status(hwif); 594 stat = hwif->tp_ops->read_status(hwif);
593 595
594 if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 && 596 if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
595 (rq->cmd[13] & REQ_IDETAPE_PC2) == 0) 597 (req->cmd[13] & REQ_IDETAPE_PC2) == 0)
596 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC; 598 drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
597 599
598 if (drive->dev_flags & IDE_DFLAG_POST_RESET) { 600 if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
@@ -609,7 +611,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
609 } else if (time_after(jiffies, tape->dsc_timeout)) { 611 } else if (time_after(jiffies, tape->dsc_timeout)) {
610 printk(KERN_ERR "ide-tape: %s: DSC timeout\n", 612 printk(KERN_ERR "ide-tape: %s: DSC timeout\n",
611 tape->name); 613 tape->name);
612 if (rq->cmd[13] & REQ_IDETAPE_PC2) { 614 if (req->cmd[13] & REQ_IDETAPE_PC2) {
613 idetape_media_access_finished(drive); 615 idetape_media_access_finished(drive);
614 return ide_stopped; 616 return ide_stopped;
615 } else { 617 } else {
@@ -626,23 +628,23 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
626 tape->postponed_rq = false; 628 tape->postponed_rq = false;
627 } 629 }
628 630
629 if (rq->cmd[13] & REQ_IDETAPE_READ) { 631 if (req->cmd[13] & REQ_IDETAPE_READ) {
630 pc = &tape->queued_pc; 632 pc = &tape->queued_pc;
631 ide_tape_create_rw_cmd(tape, pc, rq, READ_6); 633 ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
632 goto out; 634 goto out;
633 } 635 }
634 if (rq->cmd[13] & REQ_IDETAPE_WRITE) { 636 if (req->cmd[13] & REQ_IDETAPE_WRITE) {
635 pc = &tape->queued_pc; 637 pc = &tape->queued_pc;
636 ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6); 638 ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6);
637 goto out; 639 goto out;
638 } 640 }
639 if (rq->cmd[13] & REQ_IDETAPE_PC1) { 641 if (req->cmd[13] & REQ_IDETAPE_PC1) {
640 pc = (struct ide_atapi_pc *)rq->special; 642 pc = (struct ide_atapi_pc *)rq->special;
641 rq->cmd[13] &= ~(REQ_IDETAPE_PC1); 643 req->cmd[13] &= ~(REQ_IDETAPE_PC1);
642 rq->cmd[13] |= REQ_IDETAPE_PC2; 644 req->cmd[13] |= REQ_IDETAPE_PC2;
643 goto out; 645 goto out;
644 } 646 }
645 if (rq->cmd[13] & REQ_IDETAPE_PC2) { 647 if (req->cmd[13] & REQ_IDETAPE_PC2) {
646 idetape_media_access_finished(drive); 648 idetape_media_access_finished(drive);
647 return ide_stopped; 649 return ide_stopped;
648 } 650 }
@@ -852,9 +854,10 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
852 BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); 854 BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
853 BUG_ON(size < 0 || size % tape->blk_size); 855 BUG_ON(size < 0 || size % tape->blk_size);
854 856
855 rq = blk_get_request(drive->queue, READ, __GFP_RECLAIM); 857 rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
856 rq->cmd_type = REQ_TYPE_DRV_PRIV; 858 scsi_req_init(rq);
857 rq->cmd[13] = cmd; 859 ide_req(rq)->type = ATA_PRIV_MISC;
860 scsi_req(rq)->cmd[13] = cmd;
858 rq->rq_disk = tape->disk; 861 rq->rq_disk = tape->disk;
859 rq->__sector = tape->first_frame; 862 rq->__sector = tape->first_frame;
860 863
@@ -868,7 +871,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
868 blk_execute_rq(drive->queue, tape->disk, rq, 0); 871 blk_execute_rq(drive->queue, tape->disk, rq, 0);
869 872
870 /* calculate the number of transferred bytes and update buffer state */ 873 /* calculate the number of transferred bytes and update buffer state */
871 size -= rq->resid_len; 874 size -= scsi_req(rq)->resid_len;
872 tape->cur = tape->buf; 875 tape->cur = tape->buf;
873 if (cmd == REQ_IDETAPE_READ) 876 if (cmd == REQ_IDETAPE_READ)
874 tape->valid = size; 877 tape->valid = size;
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index a716693417a3..247b9faccce1 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -428,10 +428,12 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
428{ 428{
429 struct request *rq; 429 struct request *rq;
430 int error; 430 int error;
431 int rw = !(cmd->tf_flags & IDE_TFLAG_WRITE) ? READ : WRITE;
432 431
433 rq = blk_get_request(drive->queue, rw, __GFP_RECLAIM); 432 rq = blk_get_request(drive->queue,
434 rq->cmd_type = REQ_TYPE_ATA_TASKFILE; 433 (cmd->tf_flags & IDE_TFLAG_WRITE) ?
434 REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
435 scsi_req_init(rq);
436 ide_req(rq)->type = ATA_PRIV_TASKFILE;
435 437
436 /* 438 /*
437 * (ks) We transfer currently only whole sectors. 439 * (ks) We transfer currently only whole sectors.
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index 247853ea1368..c3062b53056f 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -54,7 +54,7 @@
54#define DRV_NAME "sis5513" 54#define DRV_NAME "sis5513"
55 55
56/* registers layout and init values are chipset family dependent */ 56/* registers layout and init values are chipset family dependent */
57 57#undef ATA_16
58#define ATA_16 0x01 58#define ATA_16 0x01
59#define ATA_33 0x02 59#define ATA_33 0x02
60#define ATA_66 0x03 60#define ATA_66 0x03
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 2f5d5f4a4c75..052714106b7b 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -26,15 +26,6 @@ config NVM_DEBUG
26 26
27 It is required to create/remove targets without IOCTLs. 27 It is required to create/remove targets without IOCTLs.
28 28
29config NVM_GENNVM
30 tristate "General Non-Volatile Memory Manager for Open-Channel SSDs"
31 ---help---
32 Non-volatile memory media manager for Open-Channel SSDs that implements
33 physical media metadata management and block provisioning API.
34
35 This is the standard media manager for using Open-Channel SSDs, and
36 required for targets to be instantiated.
37
38config NVM_RRPC 29config NVM_RRPC
39 tristate "Round-robin Hybrid Open-Channel SSD target" 30 tristate "Round-robin Hybrid Open-Channel SSD target"
40 ---help--- 31 ---help---
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index a7a0a22cf1a5..b2a39e2d2895 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -2,6 +2,5 @@
2# Makefile for Open-Channel SSDs. 2# Makefile for Open-Channel SSDs.
3# 3#
4 4
5obj-$(CONFIG_NVM) := core.o sysblk.o 5obj-$(CONFIG_NVM) := core.o
6obj-$(CONFIG_NVM_GENNVM) += gennvm.o
7obj-$(CONFIG_NVM_RRPC) += rrpc.o 6obj-$(CONFIG_NVM_RRPC) += rrpc.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 02240a0b39c9..5262ba66a7a7 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -29,10 +29,483 @@
29 29
30static LIST_HEAD(nvm_tgt_types); 30static LIST_HEAD(nvm_tgt_types);
31static DECLARE_RWSEM(nvm_tgtt_lock); 31static DECLARE_RWSEM(nvm_tgtt_lock);
32static LIST_HEAD(nvm_mgrs);
33static LIST_HEAD(nvm_devices); 32static LIST_HEAD(nvm_devices);
34static DECLARE_RWSEM(nvm_lock); 33static DECLARE_RWSEM(nvm_lock);
35 34
35/* Map between virtual and physical channel and lun */
36struct nvm_ch_map {
37 int ch_off;
38 int nr_luns;
39 int *lun_offs;
40};
41
42struct nvm_dev_map {
43 struct nvm_ch_map *chnls;
44 int nr_chnls;
45};
46
47struct nvm_area {
48 struct list_head list;
49 sector_t begin;
50 sector_t end; /* end is excluded */
51};
52
53static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
54{
55 struct nvm_target *tgt;
56
57 list_for_each_entry(tgt, &dev->targets, list)
58 if (!strcmp(name, tgt->disk->disk_name))
59 return tgt;
60
61 return NULL;
62}
63
64static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end)
65{
66 int i;
67
68 for (i = lun_begin; i <= lun_end; i++) {
69 if (test_and_set_bit(i, dev->lun_map)) {
70 pr_err("nvm: lun %d already allocated\n", i);
71 goto err;
72 }
73 }
74
75 return 0;
76err:
77 while (--i > lun_begin)
78 clear_bit(i, dev->lun_map);
79
80 return -EBUSY;
81}
82
83static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin,
84 int lun_end)
85{
86 int i;
87
88 for (i = lun_begin; i <= lun_end; i++)
89 WARN_ON(!test_and_clear_bit(i, dev->lun_map));
90}
91
92static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
93{
94 struct nvm_dev *dev = tgt_dev->parent;
95 struct nvm_dev_map *dev_map = tgt_dev->map;
96 int i, j;
97
98 for (i = 0; i < dev_map->nr_chnls; i++) {
99 struct nvm_ch_map *ch_map = &dev_map->chnls[i];
100 int *lun_offs = ch_map->lun_offs;
101 int ch = i + ch_map->ch_off;
102
103 for (j = 0; j < ch_map->nr_luns; j++) {
104 int lun = j + lun_offs[j];
105 int lunid = (ch * dev->geo.luns_per_chnl) + lun;
106
107 WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
108 }
109
110 kfree(ch_map->lun_offs);
111 }
112
113 kfree(dev_map->chnls);
114 kfree(dev_map);
115
116 kfree(tgt_dev->luns);
117 kfree(tgt_dev);
118}
119
120static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
121 int lun_begin, int lun_end)
122{
123 struct nvm_tgt_dev *tgt_dev = NULL;
124 struct nvm_dev_map *dev_rmap = dev->rmap;
125 struct nvm_dev_map *dev_map;
126 struct ppa_addr *luns;
127 int nr_luns = lun_end - lun_begin + 1;
128 int luns_left = nr_luns;
129 int nr_chnls = nr_luns / dev->geo.luns_per_chnl;
130 int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl;
131 int bch = lun_begin / dev->geo.luns_per_chnl;
132 int blun = lun_begin % dev->geo.luns_per_chnl;
133 int lunid = 0;
134 int lun_balanced = 1;
135 int prev_nr_luns;
136 int i, j;
137
138 nr_chnls = nr_luns / dev->geo.luns_per_chnl;
139 nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
140
141 dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
142 if (!dev_map)
143 goto err_dev;
144
145 dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map),
146 GFP_KERNEL);
147 if (!dev_map->chnls)
148 goto err_chnls;
149
150 luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
151 if (!luns)
152 goto err_luns;
153
154 prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ?
155 dev->geo.luns_per_chnl : luns_left;
156 for (i = 0; i < nr_chnls; i++) {
157 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
158 int *lun_roffs = ch_rmap->lun_offs;
159 struct nvm_ch_map *ch_map = &dev_map->chnls[i];
160 int *lun_offs;
161 int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ?
162 dev->geo.luns_per_chnl : luns_left;
163
164 if (lun_balanced && prev_nr_luns != luns_in_chnl)
165 lun_balanced = 0;
166
167 ch_map->ch_off = ch_rmap->ch_off = bch;
168 ch_map->nr_luns = luns_in_chnl;
169
170 lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
171 if (!lun_offs)
172 goto err_ch;
173
174 for (j = 0; j < luns_in_chnl; j++) {
175 luns[lunid].ppa = 0;
176 luns[lunid].g.ch = i;
177 luns[lunid++].g.lun = j;
178
179 lun_offs[j] = blun;
180 lun_roffs[j + blun] = blun;
181 }
182
183 ch_map->lun_offs = lun_offs;
184
185 /* when starting a new channel, lun offset is reset */
186 blun = 0;
187 luns_left -= luns_in_chnl;
188 }
189
190 dev_map->nr_chnls = nr_chnls;
191
192 tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
193 if (!tgt_dev)
194 goto err_ch;
195
196 memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
197 /* Target device only owns a portion of the physical device */
198 tgt_dev->geo.nr_chnls = nr_chnls;
199 tgt_dev->geo.nr_luns = nr_luns;
200 tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1;
201 tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
202 tgt_dev->q = dev->q;
203 tgt_dev->map = dev_map;
204 tgt_dev->luns = luns;
205 memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
206
207 tgt_dev->parent = dev;
208
209 return tgt_dev;
210err_ch:
211 while (--i > 0)
212 kfree(dev_map->chnls[i].lun_offs);
213 kfree(luns);
214err_luns:
215 kfree(dev_map->chnls);
216err_chnls:
217 kfree(dev_map);
218err_dev:
219 return tgt_dev;
220}
221
222static const struct block_device_operations nvm_fops = {
223 .owner = THIS_MODULE,
224};
225
226static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
227{
228 struct nvm_ioctl_create_simple *s = &create->conf.s;
229 struct request_queue *tqueue;
230 struct gendisk *tdisk;
231 struct nvm_tgt_type *tt;
232 struct nvm_target *t;
233 struct nvm_tgt_dev *tgt_dev;
234 void *targetdata;
235
236 tt = nvm_find_target_type(create->tgttype, 1);
237 if (!tt) {
238 pr_err("nvm: target type %s not found\n", create->tgttype);
239 return -EINVAL;
240 }
241
242 mutex_lock(&dev->mlock);
243 t = nvm_find_target(dev, create->tgtname);
244 if (t) {
245 pr_err("nvm: target name already exists.\n");
246 mutex_unlock(&dev->mlock);
247 return -EINVAL;
248 }
249 mutex_unlock(&dev->mlock);
250
251 if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
252 return -ENOMEM;
253
254 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
255 if (!t)
256 goto err_reserve;
257
258 tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end);
259 if (!tgt_dev) {
260 pr_err("nvm: could not create target device\n");
261 goto err_t;
262 }
263
264 tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
265 if (!tqueue)
266 goto err_dev;
267 blk_queue_make_request(tqueue, tt->make_rq);
268
269 tdisk = alloc_disk(0);
270 if (!tdisk)
271 goto err_queue;
272
273 sprintf(tdisk->disk_name, "%s", create->tgtname);
274 tdisk->flags = GENHD_FL_EXT_DEVT;
275 tdisk->major = 0;
276 tdisk->first_minor = 0;
277 tdisk->fops = &nvm_fops;
278 tdisk->queue = tqueue;
279
280 targetdata = tt->init(tgt_dev, tdisk);
281 if (IS_ERR(targetdata))
282 goto err_init;
283
284 tdisk->private_data = targetdata;
285 tqueue->queuedata = targetdata;
286
287 blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
288
289 set_capacity(tdisk, tt->capacity(targetdata));
290 add_disk(tdisk);
291
292 if (tt->sysfs_init && tt->sysfs_init(tdisk))
293 goto err_sysfs;
294
295 t->type = tt;
296 t->disk = tdisk;
297 t->dev = tgt_dev;
298
299 mutex_lock(&dev->mlock);
300 list_add_tail(&t->list, &dev->targets);
301 mutex_unlock(&dev->mlock);
302
303 return 0;
304err_sysfs:
305 if (tt->exit)
306 tt->exit(targetdata);
307err_init:
308 put_disk(tdisk);
309err_queue:
310 blk_cleanup_queue(tqueue);
311err_dev:
312 nvm_remove_tgt_dev(tgt_dev);
313err_t:
314 kfree(t);
315err_reserve:
316 nvm_release_luns_err(dev, s->lun_begin, s->lun_end);
317 return -ENOMEM;
318}
319
320static void __nvm_remove_target(struct nvm_target *t)
321{
322 struct nvm_tgt_type *tt = t->type;
323 struct gendisk *tdisk = t->disk;
324 struct request_queue *q = tdisk->queue;
325
326 del_gendisk(tdisk);
327 blk_cleanup_queue(q);
328
329 if (tt->sysfs_exit)
330 tt->sysfs_exit(tdisk);
331
332 if (tt->exit)
333 tt->exit(tdisk->private_data);
334
335 nvm_remove_tgt_dev(t->dev);
336 put_disk(tdisk);
337
338 list_del(&t->list);
339 kfree(t);
340}
341
342/**
343 * nvm_remove_tgt - Removes a target from the media manager
344 * @dev: device
345 * @remove: ioctl structure with target name to remove.
346 *
347 * Returns:
348 * 0: on success
349 * 1: on not found
350 * <0: on error
351 */
352static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
353{
354 struct nvm_target *t;
355
356 mutex_lock(&dev->mlock);
357 t = nvm_find_target(dev, remove->tgtname);
358 if (!t) {
359 mutex_unlock(&dev->mlock);
360 return 1;
361 }
362 __nvm_remove_target(t);
363 mutex_unlock(&dev->mlock);
364
365 return 0;
366}
367
368static int nvm_register_map(struct nvm_dev *dev)
369{
370 struct nvm_dev_map *rmap;
371 int i, j;
372
373 rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
374 if (!rmap)
375 goto err_rmap;
376
377 rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map),
378 GFP_KERNEL);
379 if (!rmap->chnls)
380 goto err_chnls;
381
382 for (i = 0; i < dev->geo.nr_chnls; i++) {
383 struct nvm_ch_map *ch_rmap;
384 int *lun_roffs;
385 int luns_in_chnl = dev->geo.luns_per_chnl;
386
387 ch_rmap = &rmap->chnls[i];
388
389 ch_rmap->ch_off = -1;
390 ch_rmap->nr_luns = luns_in_chnl;
391
392 lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
393 if (!lun_roffs)
394 goto err_ch;
395
396 for (j = 0; j < luns_in_chnl; j++)
397 lun_roffs[j] = -1;
398
399 ch_rmap->lun_offs = lun_roffs;
400 }
401
402 dev->rmap = rmap;
403
404 return 0;
405err_ch:
406 while (--i >= 0)
407 kfree(rmap->chnls[i].lun_offs);
408err_chnls:
409 kfree(rmap);
410err_rmap:
411 return -ENOMEM;
412}
413
414static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
415{
416 struct nvm_dev_map *dev_map = tgt_dev->map;
417 struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch];
418 int lun_off = ch_map->lun_offs[p->g.lun];
419
420 p->g.ch += ch_map->ch_off;
421 p->g.lun += lun_off;
422}
423
424static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
425{
426 struct nvm_dev *dev = tgt_dev->parent;
427 struct nvm_dev_map *dev_rmap = dev->rmap;
428 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
429 int lun_roff = ch_rmap->lun_offs[p->g.lun];
430
431 p->g.ch -= ch_rmap->ch_off;
432 p->g.lun -= lun_roff;
433}
434
435static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
436 struct ppa_addr *ppa_list, int nr_ppas)
437{
438 int i;
439
440 for (i = 0; i < nr_ppas; i++) {
441 nvm_map_to_dev(tgt_dev, &ppa_list[i]);
442 ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]);
443 }
444}
445
446static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
447 struct ppa_addr *ppa_list, int nr_ppas)
448{
449 int i;
450
451 for (i = 0; i < nr_ppas; i++) {
452 ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]);
453 nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
454 }
455}
456
457static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
458{
459 if (rqd->nr_ppas == 1) {
460 nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1);
461 return;
462 }
463
464 nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
465}
466
467static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
468{
469 if (rqd->nr_ppas == 1) {
470 nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1);
471 return;
472 }
473
474 nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
475}
476
477void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
478 int len)
479{
480 struct nvm_geo *geo = &dev->geo;
481 struct nvm_dev_map *dev_rmap = dev->rmap;
482 u64 i;
483
484 for (i = 0; i < len; i++) {
485 struct nvm_ch_map *ch_rmap;
486 int *lun_roffs;
487 struct ppa_addr gaddr;
488 u64 pba = le64_to_cpu(entries[i]);
489 int off;
490 u64 diff;
491
492 if (!pba)
493 continue;
494
495 gaddr = linear_to_generic_addr(geo, pba);
496 ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
497 lun_roffs = ch_rmap->lun_offs;
498
499 off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
500
501 diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
502 (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
503
504 entries[i] -= cpu_to_le64(diff);
505 }
506}
507EXPORT_SYMBOL(nvm_part_to_tgt);
508
36struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) 509struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock)
37{ 510{
38 struct nvm_tgt_type *tmp, *tt = NULL; 511 struct nvm_tgt_type *tmp, *tt = NULL;
@@ -92,78 +565,6 @@ void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
92} 565}
93EXPORT_SYMBOL(nvm_dev_dma_free); 566EXPORT_SYMBOL(nvm_dev_dma_free);
94 567
95static struct nvmm_type *nvm_find_mgr_type(const char *name)
96{
97 struct nvmm_type *mt;
98
99 list_for_each_entry(mt, &nvm_mgrs, list)
100 if (!strcmp(name, mt->name))
101 return mt;
102
103 return NULL;
104}
105
106static struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
107{
108 struct nvmm_type *mt;
109 int ret;
110
111 lockdep_assert_held(&nvm_lock);
112
113 list_for_each_entry(mt, &nvm_mgrs, list) {
114 if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN))
115 continue;
116
117 ret = mt->register_mgr(dev);
118 if (ret < 0) {
119 pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
120 ret, dev->name);
121 return NULL; /* initialization failed */
122 } else if (ret > 0)
123 return mt;
124 }
125
126 return NULL;
127}
128
129int nvm_register_mgr(struct nvmm_type *mt)
130{
131 struct nvm_dev *dev;
132 int ret = 0;
133
134 down_write(&nvm_lock);
135 if (nvm_find_mgr_type(mt->name)) {
136 ret = -EEXIST;
137 goto finish;
138 } else {
139 list_add(&mt->list, &nvm_mgrs);
140 }
141
142 /* try to register media mgr if any device have none configured */
143 list_for_each_entry(dev, &nvm_devices, devices) {
144 if (dev->mt)
145 continue;
146
147 dev->mt = nvm_init_mgr(dev);
148 }
149finish:
150 up_write(&nvm_lock);
151
152 return ret;
153}
154EXPORT_SYMBOL(nvm_register_mgr);
155
156void nvm_unregister_mgr(struct nvmm_type *mt)
157{
158 if (!mt)
159 return;
160
161 down_write(&nvm_lock);
162 list_del(&mt->list);
163 up_write(&nvm_lock);
164}
165EXPORT_SYMBOL(nvm_unregister_mgr);
166
167static struct nvm_dev *nvm_find_nvm_dev(const char *name) 568static struct nvm_dev *nvm_find_nvm_dev(const char *name)
168{ 569{
169 struct nvm_dev *dev; 570 struct nvm_dev *dev;
@@ -175,53 +576,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
175 return NULL; 576 return NULL;
176} 577}
177 578
178static void nvm_tgt_generic_to_addr_mode(struct nvm_tgt_dev *tgt_dev,
179 struct nvm_rq *rqd)
180{
181 struct nvm_dev *dev = tgt_dev->parent;
182 int i;
183
184 if (rqd->nr_ppas > 1) {
185 for (i = 0; i < rqd->nr_ppas; i++) {
186 rqd->ppa_list[i] = dev->mt->trans_ppa(tgt_dev,
187 rqd->ppa_list[i], TRANS_TGT_TO_DEV);
188 rqd->ppa_list[i] = generic_to_dev_addr(dev,
189 rqd->ppa_list[i]);
190 }
191 } else {
192 rqd->ppa_addr = dev->mt->trans_ppa(tgt_dev, rqd->ppa_addr,
193 TRANS_TGT_TO_DEV);
194 rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
195 }
196}
197
198int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
199 int type)
200{
201 struct nvm_rq rqd;
202 int ret;
203
204 if (nr_ppas > dev->ops->max_phys_sect) {
205 pr_err("nvm: unable to update all sysblocks atomically\n");
206 return -EINVAL;
207 }
208
209 memset(&rqd, 0, sizeof(struct nvm_rq));
210
211 nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
212 nvm_generic_to_addr_mode(dev, &rqd);
213
214 ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
215 nvm_free_rqd_ppalist(dev, &rqd);
216 if (ret) {
217 pr_err("nvm: sysblk failed bb mark\n");
218 return -EINVAL;
219 }
220
221 return 0;
222}
223EXPORT_SYMBOL(nvm_set_bb_tbl);
224
225int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, 579int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
226 int nr_ppas, int type) 580 int nr_ppas, int type)
227{ 581{
@@ -237,12 +591,12 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
237 memset(&rqd, 0, sizeof(struct nvm_rq)); 591 memset(&rqd, 0, sizeof(struct nvm_rq));
238 592
239 nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); 593 nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
240 nvm_tgt_generic_to_addr_mode(tgt_dev, &rqd); 594 nvm_rq_tgt_to_dev(tgt_dev, &rqd);
241 595
242 ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); 596 ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
243 nvm_free_rqd_ppalist(dev, &rqd); 597 nvm_free_rqd_ppalist(dev, &rqd);
244 if (ret) { 598 if (ret) {
245 pr_err("nvm: sysblk failed bb mark\n"); 599 pr_err("nvm: failed bb mark\n");
246 return -EINVAL; 600 return -EINVAL;
247 } 601 }
248 602
@@ -262,15 +616,42 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
262{ 616{
263 struct nvm_dev *dev = tgt_dev->parent; 617 struct nvm_dev *dev = tgt_dev->parent;
264 618
265 return dev->mt->submit_io(tgt_dev, rqd); 619 if (!dev->ops->submit_io)
620 return -ENODEV;
621
622 nvm_rq_tgt_to_dev(tgt_dev, rqd);
623
624 rqd->dev = tgt_dev;
625 return dev->ops->submit_io(dev, rqd);
266} 626}
267EXPORT_SYMBOL(nvm_submit_io); 627EXPORT_SYMBOL(nvm_submit_io);
268 628
269int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags) 629int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags)
270{ 630{
271 struct nvm_dev *dev = tgt_dev->parent; 631 struct nvm_dev *dev = tgt_dev->parent;
632 struct nvm_rq rqd;
633 int ret;
634
635 if (!dev->ops->erase_block)
636 return 0;
637
638 nvm_map_to_dev(tgt_dev, ppas);
639
640 memset(&rqd, 0, sizeof(struct nvm_rq));
641
642 ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1);
643 if (ret)
644 return ret;
645
646 nvm_rq_tgt_to_dev(tgt_dev, &rqd);
647
648 rqd.flags = flags;
649
650 ret = dev->ops->erase_block(dev, &rqd);
272 651
273 return dev->mt->erase_blk(tgt_dev, p, flags); 652 nvm_free_rqd_ppalist(dev, &rqd);
653
654 return ret;
274} 655}
275EXPORT_SYMBOL(nvm_erase_blk); 656EXPORT_SYMBOL(nvm_erase_blk);
276 657
@@ -289,46 +670,67 @@ EXPORT_SYMBOL(nvm_get_l2p_tbl);
289int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len) 670int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len)
290{ 671{
291 struct nvm_dev *dev = tgt_dev->parent; 672 struct nvm_dev *dev = tgt_dev->parent;
673 struct nvm_geo *geo = &dev->geo;
674 struct nvm_area *area, *prev, *next;
675 sector_t begin = 0;
676 sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9;
292 677
293 return dev->mt->get_area(dev, lba, len); 678 if (len > max_sectors)
294} 679 return -EINVAL;
295EXPORT_SYMBOL(nvm_get_area);
296 680
297void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t lba) 681 area = kmalloc(sizeof(struct nvm_area), GFP_KERNEL);
298{ 682 if (!area)
299 struct nvm_dev *dev = tgt_dev->parent; 683 return -ENOMEM;
300 684
301 dev->mt->put_area(dev, lba); 685 prev = NULL;
302}
303EXPORT_SYMBOL(nvm_put_area);
304 686
305void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) 687 spin_lock(&dev->lock);
306{ 688 list_for_each_entry(next, &dev->area_list, list) {
307 int i; 689 if (begin + len > next->begin) {
690 begin = next->end;
691 prev = next;
692 continue;
693 }
694 break;
695 }
308 696
309 if (rqd->nr_ppas > 1) { 697 if ((begin + len) > max_sectors) {
310 for (i = 0; i < rqd->nr_ppas; i++) 698 spin_unlock(&dev->lock);
311 rqd->ppa_list[i] = dev_to_generic_addr(dev, 699 kfree(area);
312 rqd->ppa_list[i]); 700 return -EINVAL;
313 } else {
314 rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
315 } 701 }
702
703 area->begin = *lba = begin;
704 area->end = begin + len;
705
706 if (prev) /* insert into sorted order */
707 list_add(&area->list, &prev->list);
708 else
709 list_add(&area->list, &dev->area_list);
710 spin_unlock(&dev->lock);
711
712 return 0;
316} 713}
317EXPORT_SYMBOL(nvm_addr_to_generic_mode); 714EXPORT_SYMBOL(nvm_get_area);
318 715
319void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) 716void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin)
320{ 717{
321 int i; 718 struct nvm_dev *dev = tgt_dev->parent;
719 struct nvm_area *area;
322 720
323 if (rqd->nr_ppas > 1) { 721 spin_lock(&dev->lock);
324 for (i = 0; i < rqd->nr_ppas; i++) 722 list_for_each_entry(area, &dev->area_list, list) {
325 rqd->ppa_list[i] = generic_to_dev_addr(dev, 723 if (area->begin != begin)
326 rqd->ppa_list[i]); 724 continue;
327 } else { 725
328 rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); 726 list_del(&area->list);
727 spin_unlock(&dev->lock);
728 kfree(area);
729 return;
329 } 730 }
731 spin_unlock(&dev->lock);
330} 732}
331EXPORT_SYMBOL(nvm_generic_to_addr_mode); 733EXPORT_SYMBOL(nvm_put_area);
332 734
333int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, 735int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
334 const struct ppa_addr *ppas, int nr_ppas, int vblk) 736 const struct ppa_addr *ppas, int nr_ppas, int vblk)
@@ -380,149 +782,19 @@ void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
380} 782}
381EXPORT_SYMBOL(nvm_free_rqd_ppalist); 783EXPORT_SYMBOL(nvm_free_rqd_ppalist);
382 784
383int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas, 785void nvm_end_io(struct nvm_rq *rqd)
384 int flags)
385{ 786{
386 struct nvm_rq rqd; 787 struct nvm_tgt_dev *tgt_dev = rqd->dev;
387 int ret;
388 788
389 if (!dev->ops->erase_block) 789 /* Convert address space */
390 return 0; 790 if (tgt_dev)
791 nvm_rq_dev_to_tgt(tgt_dev, rqd);
391 792
392 memset(&rqd, 0, sizeof(struct nvm_rq)); 793 if (rqd->end_io)
393 794 rqd->end_io(rqd);
394 ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
395 if (ret)
396 return ret;
397
398 nvm_generic_to_addr_mode(dev, &rqd);
399
400 rqd.flags = flags;
401
402 ret = dev->ops->erase_block(dev, &rqd);
403
404 nvm_free_rqd_ppalist(dev, &rqd);
405
406 return ret;
407}
408EXPORT_SYMBOL(nvm_erase_ppa);
409
410void nvm_end_io(struct nvm_rq *rqd, int error)
411{
412 rqd->error = error;
413 rqd->end_io(rqd);
414} 795}
415EXPORT_SYMBOL(nvm_end_io); 796EXPORT_SYMBOL(nvm_end_io);
416 797
417static void nvm_end_io_sync(struct nvm_rq *rqd)
418{
419 struct completion *waiting = rqd->wait;
420
421 rqd->wait = NULL;
422
423 complete(waiting);
424}
425
426static int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode,
427 int flags, void *buf, int len)
428{
429 DECLARE_COMPLETION_ONSTACK(wait);
430 struct bio *bio;
431 int ret;
432 unsigned long hang_check;
433
434 bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL);
435 if (IS_ERR_OR_NULL(bio))
436 return -ENOMEM;
437
438 nvm_generic_to_addr_mode(dev, rqd);
439
440 rqd->dev = NULL;
441 rqd->opcode = opcode;
442 rqd->flags = flags;
443 rqd->bio = bio;
444 rqd->wait = &wait;
445 rqd->end_io = nvm_end_io_sync;
446
447 ret = dev->ops->submit_io(dev, rqd);
448 if (ret) {
449 bio_put(bio);
450 return ret;
451 }
452
453 /* Prevent hang_check timer from firing at us during very long I/O */
454 hang_check = sysctl_hung_task_timeout_secs;
455 if (hang_check)
456 while (!wait_for_completion_io_timeout(&wait,
457 hang_check * (HZ/2)))
458 ;
459 else
460 wait_for_completion_io(&wait);
461
462 return rqd->error;
463}
464
465/**
466 * nvm_submit_ppa_list - submit user-defined ppa list to device. The user must
467 * take to free ppa list if necessary.
468 * @dev: device
469 * @ppa_list: user created ppa_list
470 * @nr_ppas: length of ppa_list
471 * @opcode: device opcode
472 * @flags: device flags
473 * @buf: data buffer
474 * @len: data buffer length
475 */
476int nvm_submit_ppa_list(struct nvm_dev *dev, struct ppa_addr *ppa_list,
477 int nr_ppas, int opcode, int flags, void *buf, int len)
478{
479 struct nvm_rq rqd;
480
481 if (dev->ops->max_phys_sect < nr_ppas)
482 return -EINVAL;
483
484 memset(&rqd, 0, sizeof(struct nvm_rq));
485
486 rqd.nr_ppas = nr_ppas;
487 if (nr_ppas > 1)
488 rqd.ppa_list = ppa_list;
489 else
490 rqd.ppa_addr = ppa_list[0];
491
492 return __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len);
493}
494EXPORT_SYMBOL(nvm_submit_ppa_list);
495
496/**
497 * nvm_submit_ppa - submit PPAs to device. PPAs will automatically be unfolded
498 * as single, dual, quad plane PPAs depending on device type.
499 * @dev: device
500 * @ppa: user created ppa_list
501 * @nr_ppas: length of ppa_list
502 * @opcode: device opcode
503 * @flags: device flags
504 * @buf: data buffer
505 * @len: data buffer length
506 */
507int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas,
508 int opcode, int flags, void *buf, int len)
509{
510 struct nvm_rq rqd;
511 int ret;
512
513 memset(&rqd, 0, sizeof(struct nvm_rq));
514 ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas, 1);
515 if (ret)
516 return ret;
517
518 ret = __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len);
519
520 nvm_free_rqd_ppalist(dev, &rqd);
521
522 return ret;
523}
524EXPORT_SYMBOL(nvm_submit_ppa);
525
526/* 798/*
527 * folds a bad block list from its plane representation to its virtual 799 * folds a bad block list from its plane representation to its virtual
528 * block representation. The fold is done in place and reduced size is 800 * block representation. The fold is done in place and reduced size is
@@ -559,21 +831,14 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
559} 831}
560EXPORT_SYMBOL(nvm_bb_tbl_fold); 832EXPORT_SYMBOL(nvm_bb_tbl_fold);
561 833
562int nvm_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, u8 *blks)
563{
564 ppa = generic_to_dev_addr(dev, ppa);
565
566 return dev->ops->get_bb_tbl(dev, ppa, blks);
567}
568EXPORT_SYMBOL(nvm_get_bb_tbl);
569
570int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, 834int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
571 u8 *blks) 835 u8 *blks)
572{ 836{
573 struct nvm_dev *dev = tgt_dev->parent; 837 struct nvm_dev *dev = tgt_dev->parent;
574 838
575 ppa = dev->mt->trans_ppa(tgt_dev, ppa, TRANS_TGT_TO_DEV); 839 nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
576 return nvm_get_bb_tbl(dev, ppa, blks); 840
841 return dev->ops->get_bb_tbl(dev, ppa, blks);
577} 842}
578EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); 843EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
579 844
@@ -627,7 +892,7 @@ static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
627static int nvm_core_init(struct nvm_dev *dev) 892static int nvm_core_init(struct nvm_dev *dev)
628{ 893{
629 struct nvm_id *id = &dev->identity; 894 struct nvm_id *id = &dev->identity;
630 struct nvm_id_group *grp = &id->groups[0]; 895 struct nvm_id_group *grp = &id->grp;
631 struct nvm_geo *geo = &dev->geo; 896 struct nvm_geo *geo = &dev->geo;
632 int ret; 897 int ret;
633 898
@@ -691,36 +956,31 @@ static int nvm_core_init(struct nvm_dev *dev)
691 goto err_fmtype; 956 goto err_fmtype;
692 } 957 }
693 958
959 INIT_LIST_HEAD(&dev->area_list);
960 INIT_LIST_HEAD(&dev->targets);
694 mutex_init(&dev->mlock); 961 mutex_init(&dev->mlock);
695 spin_lock_init(&dev->lock); 962 spin_lock_init(&dev->lock);
696 963
697 blk_queue_logical_block_size(dev->q, geo->sec_size); 964 ret = nvm_register_map(dev);
965 if (ret)
966 goto err_fmtype;
698 967
968 blk_queue_logical_block_size(dev->q, geo->sec_size);
699 return 0; 969 return 0;
700err_fmtype: 970err_fmtype:
701 kfree(dev->lun_map); 971 kfree(dev->lun_map);
702 return ret; 972 return ret;
703} 973}
704 974
705static void nvm_free_mgr(struct nvm_dev *dev)
706{
707 if (!dev->mt)
708 return;
709
710 dev->mt->unregister_mgr(dev);
711 dev->mt = NULL;
712}
713
714void nvm_free(struct nvm_dev *dev) 975void nvm_free(struct nvm_dev *dev)
715{ 976{
716 if (!dev) 977 if (!dev)
717 return; 978 return;
718 979
719 nvm_free_mgr(dev);
720
721 if (dev->dma_pool) 980 if (dev->dma_pool)
722 dev->ops->destroy_dma_pool(dev->dma_pool); 981 dev->ops->destroy_dma_pool(dev->dma_pool);
723 982
983 kfree(dev->rmap);
724 kfree(dev->lptbl); 984 kfree(dev->lptbl);
725 kfree(dev->lun_map); 985 kfree(dev->lun_map);
726 kfree(dev); 986 kfree(dev);
@@ -731,28 +991,19 @@ static int nvm_init(struct nvm_dev *dev)
731 struct nvm_geo *geo = &dev->geo; 991 struct nvm_geo *geo = &dev->geo;
732 int ret = -EINVAL; 992 int ret = -EINVAL;
733 993
734 if (!dev->q || !dev->ops)
735 return ret;
736
737 if (dev->ops->identity(dev, &dev->identity)) { 994 if (dev->ops->identity(dev, &dev->identity)) {
738 pr_err("nvm: device could not be identified\n"); 995 pr_err("nvm: device could not be identified\n");
739 goto err; 996 goto err;
740 } 997 }
741 998
742 pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n", 999 pr_debug("nvm: ver:%x nvm_vendor:%x\n",
743 dev->identity.ver_id, dev->identity.vmnt, 1000 dev->identity.ver_id, dev->identity.vmnt);
744 dev->identity.cgrps);
745 1001
746 if (dev->identity.ver_id != 1) { 1002 if (dev->identity.ver_id != 1) {
747 pr_err("nvm: device not supported by kernel."); 1003 pr_err("nvm: device not supported by kernel.");
748 goto err; 1004 goto err;
749 } 1005 }
750 1006
751 if (dev->identity.cgrps != 1) {
752 pr_err("nvm: only one group configuration supported.");
753 goto err;
754 }
755
756 ret = nvm_core_init(dev); 1007 ret = nvm_core_init(dev);
757 if (ret) { 1008 if (ret) {
758 pr_err("nvm: could not initialize core structures.\n"); 1009 pr_err("nvm: could not initialize core structures.\n");
@@ -779,49 +1030,50 @@ int nvm_register(struct nvm_dev *dev)
779{ 1030{
780 int ret; 1031 int ret;
781 1032
782 ret = nvm_init(dev); 1033 if (!dev->q || !dev->ops)
783 if (ret) 1034 return -EINVAL;
784 goto err_init;
785 1035
786 if (dev->ops->max_phys_sect > 256) { 1036 if (dev->ops->max_phys_sect > 256) {
787 pr_info("nvm: max sectors supported is 256.\n"); 1037 pr_info("nvm: max sectors supported is 256.\n");
788 ret = -EINVAL; 1038 return -EINVAL;
789 goto err_init;
790 } 1039 }
791 1040
792 if (dev->ops->max_phys_sect > 1) { 1041 if (dev->ops->max_phys_sect > 1) {
793 dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); 1042 dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
794 if (!dev->dma_pool) { 1043 if (!dev->dma_pool) {
795 pr_err("nvm: could not create dma pool\n"); 1044 pr_err("nvm: could not create dma pool\n");
796 ret = -ENOMEM; 1045 return -ENOMEM;
797 goto err_init;
798 } 1046 }
799 } 1047 }
800 1048
801 if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { 1049 ret = nvm_init(dev);
802 ret = nvm_get_sysblock(dev, &dev->sb); 1050 if (ret)
803 if (!ret) 1051 goto err_init;
804 pr_err("nvm: device not initialized.\n");
805 else if (ret < 0)
806 pr_err("nvm: err (%d) on device initialization\n", ret);
807 }
808 1052
809 /* register device with a supported media manager */ 1053 /* register device with a supported media manager */
810 down_write(&nvm_lock); 1054 down_write(&nvm_lock);
811 if (ret > 0)
812 dev->mt = nvm_init_mgr(dev);
813 list_add(&dev->devices, &nvm_devices); 1055 list_add(&dev->devices, &nvm_devices);
814 up_write(&nvm_lock); 1056 up_write(&nvm_lock);
815 1057
816 return 0; 1058 return 0;
817err_init: 1059err_init:
818 kfree(dev->lun_map); 1060 dev->ops->destroy_dma_pool(dev->dma_pool);
819 return ret; 1061 return ret;
820} 1062}
821EXPORT_SYMBOL(nvm_register); 1063EXPORT_SYMBOL(nvm_register);
822 1064
823void nvm_unregister(struct nvm_dev *dev) 1065void nvm_unregister(struct nvm_dev *dev)
824{ 1066{
1067 struct nvm_target *t, *tmp;
1068
1069 mutex_lock(&dev->mlock);
1070 list_for_each_entry_safe(t, tmp, &dev->targets, list) {
1071 if (t->dev->parent != dev)
1072 continue;
1073 __nvm_remove_target(t);
1074 }
1075 mutex_unlock(&dev->mlock);
1076
825 down_write(&nvm_lock); 1077 down_write(&nvm_lock);
826 list_del(&dev->devices); 1078 list_del(&dev->devices);
827 up_write(&nvm_lock); 1079 up_write(&nvm_lock);
@@ -844,24 +1096,24 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
844 return -EINVAL; 1096 return -EINVAL;
845 } 1097 }
846 1098
847 if (!dev->mt) {
848 pr_info("nvm: device has no media manager registered.\n");
849 return -ENODEV;
850 }
851
852 if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { 1099 if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) {
853 pr_err("nvm: config type not valid\n"); 1100 pr_err("nvm: config type not valid\n");
854 return -EINVAL; 1101 return -EINVAL;
855 } 1102 }
856 s = &create->conf.s; 1103 s = &create->conf.s;
857 1104
858 if (s->lun_begin > s->lun_end || s->lun_end > dev->geo.nr_luns) { 1105 if (s->lun_begin == -1 && s->lun_end == -1) {
1106 s->lun_begin = 0;
1107 s->lun_end = dev->geo.nr_luns - 1;
1108 }
1109
1110 if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.nr_luns) {
859 pr_err("nvm: lun out of bound (%u:%u > %u)\n", 1111 pr_err("nvm: lun out of bound (%u:%u > %u)\n",
860 s->lun_begin, s->lun_end, dev->geo.nr_luns); 1112 s->lun_begin, s->lun_end, dev->geo.nr_luns - 1);
861 return -EINVAL; 1113 return -EINVAL;
862 } 1114 }
863 1115
864 return dev->mt->create_tgt(dev, create); 1116 return nvm_create_tgt(dev, create);
865} 1117}
866 1118
867static long nvm_ioctl_info(struct file *file, void __user *arg) 1119static long nvm_ioctl_info(struct file *file, void __user *arg)
@@ -923,16 +1175,14 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
923 struct nvm_ioctl_device_info *info = &devices->info[i]; 1175 struct nvm_ioctl_device_info *info = &devices->info[i];
924 1176
925 sprintf(info->devname, "%s", dev->name); 1177 sprintf(info->devname, "%s", dev->name);
926 if (dev->mt) {
927 info->bmversion[0] = dev->mt->version[0];
928 info->bmversion[1] = dev->mt->version[1];
929 info->bmversion[2] = dev->mt->version[2];
930 sprintf(info->bmname, "%s", dev->mt->name);
931 } else {
932 sprintf(info->bmname, "none");
933 }
934 1178
1179 /* kept for compatibility */
1180 info->bmversion[0] = 1;
1181 info->bmversion[1] = 0;
1182 info->bmversion[2] = 0;
1183 sprintf(info->bmname, "%s", "gennvm");
935 i++; 1184 i++;
1185
936 if (i > 31) { 1186 if (i > 31) {
937 pr_err("nvm: max 31 devices can be reported.\n"); 1187 pr_err("nvm: max 31 devices can be reported.\n");
938 break; 1188 break;
@@ -994,7 +1244,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
994 } 1244 }
995 1245
996 list_for_each_entry(dev, &nvm_devices, devices) { 1246 list_for_each_entry(dev, &nvm_devices, devices) {
997 ret = dev->mt->remove_tgt(dev, &remove); 1247 ret = nvm_remove_tgt(dev, &remove);
998 if (!ret) 1248 if (!ret)
999 break; 1249 break;
1000 } 1250 }
@@ -1002,47 +1252,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
1002 return ret; 1252 return ret;
1003} 1253}
1004 1254
1005static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info) 1255/* kept for compatibility reasons */
1006{
1007 info->seqnr = 1;
1008 info->erase_cnt = 0;
1009 info->version = 1;
1010}
1011
1012static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
1013{
1014 struct nvm_dev *dev;
1015 struct nvm_sb_info info;
1016 int ret;
1017
1018 down_write(&nvm_lock);
1019 dev = nvm_find_nvm_dev(init->dev);
1020 up_write(&nvm_lock);
1021 if (!dev) {
1022 pr_err("nvm: device not found\n");
1023 return -EINVAL;
1024 }
1025
1026 nvm_setup_nvm_sb_info(&info);
1027
1028 strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
1029 info.fs_ppa.ppa = -1;
1030
1031 if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
1032 ret = nvm_init_sysblock(dev, &info);
1033 if (ret)
1034 return ret;
1035 }
1036
1037 memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
1038
1039 down_write(&nvm_lock);
1040 dev->mt = nvm_init_mgr(dev);
1041 up_write(&nvm_lock);
1042
1043 return 0;
1044}
1045
1046static long nvm_ioctl_dev_init(struct file *file, void __user *arg) 1256static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
1047{ 1257{
1048 struct nvm_ioctl_dev_init init; 1258 struct nvm_ioctl_dev_init init;
@@ -1058,15 +1268,13 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
1058 return -EINVAL; 1268 return -EINVAL;
1059 } 1269 }
1060 1270
1061 init.dev[DISK_NAME_LEN - 1] = '\0'; 1271 return 0;
1062
1063 return __nvm_ioctl_dev_init(&init);
1064} 1272}
1065 1273
1274/* Kept for compatibility reasons */
1066static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) 1275static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
1067{ 1276{
1068 struct nvm_ioctl_dev_factory fact; 1277 struct nvm_ioctl_dev_factory fact;
1069 struct nvm_dev *dev;
1070 1278
1071 if (!capable(CAP_SYS_ADMIN)) 1279 if (!capable(CAP_SYS_ADMIN))
1072 return -EPERM; 1280 return -EPERM;
@@ -1079,19 +1287,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
1079 if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) 1287 if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
1080 return -EINVAL; 1288 return -EINVAL;
1081 1289
1082 down_write(&nvm_lock);
1083 dev = nvm_find_nvm_dev(fact.dev);
1084 up_write(&nvm_lock);
1085 if (!dev) {
1086 pr_err("nvm: device not found\n");
1087 return -EINVAL;
1088 }
1089
1090 nvm_free_mgr(dev);
1091
1092 if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT)
1093 return nvm_dev_factory(dev, fact.flags);
1094
1095 return 0; 1290 return 0;
1096} 1291}
1097 1292
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
deleted file mode 100644
index ca7880082d80..000000000000
--- a/drivers/lightnvm/gennvm.c
+++ /dev/null
@@ -1,657 +0,0 @@
1/*
2 * Copyright (C) 2015 Matias Bjorling <m@bjorling.me>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
16 * USA.
17 *
18 * Implementation of a general nvm manager for Open-Channel SSDs.
19 */
20
21#include "gennvm.h"
22
23static struct nvm_target *gen_find_target(struct gen_dev *gn, const char *name)
24{
25 struct nvm_target *tgt;
26
27 list_for_each_entry(tgt, &gn->targets, list)
28 if (!strcmp(name, tgt->disk->disk_name))
29 return tgt;
30
31 return NULL;
32}
33
34static const struct block_device_operations gen_fops = {
35 .owner = THIS_MODULE,
36};
37
38static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t,
39 int lun_begin, int lun_end)
40{
41 int i;
42
43 for (i = lun_begin; i <= lun_end; i++) {
44 if (test_and_set_bit(i, dev->lun_map)) {
45 pr_err("nvm: lun %d already allocated\n", i);
46 goto err;
47 }
48 }
49
50 return 0;
51
52err:
53 while (--i > lun_begin)
54 clear_bit(i, dev->lun_map);
55
56 return -EBUSY;
57}
58
59static void gen_release_luns_err(struct nvm_dev *dev, int lun_begin,
60 int lun_end)
61{
62 int i;
63
64 for (i = lun_begin; i <= lun_end; i++)
65 WARN_ON(!test_and_clear_bit(i, dev->lun_map));
66}
67
68static void gen_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
69{
70 struct nvm_dev *dev = tgt_dev->parent;
71 struct gen_dev_map *dev_map = tgt_dev->map;
72 int i, j;
73
74 for (i = 0; i < dev_map->nr_chnls; i++) {
75 struct gen_ch_map *ch_map = &dev_map->chnls[i];
76 int *lun_offs = ch_map->lun_offs;
77 int ch = i + ch_map->ch_off;
78
79 for (j = 0; j < ch_map->nr_luns; j++) {
80 int lun = j + lun_offs[j];
81 int lunid = (ch * dev->geo.luns_per_chnl) + lun;
82
83 WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
84 }
85
86 kfree(ch_map->lun_offs);
87 }
88
89 kfree(dev_map->chnls);
90 kfree(dev_map);
91 kfree(tgt_dev->luns);
92 kfree(tgt_dev);
93}
94
95static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev,
96 int lun_begin, int lun_end)
97{
98 struct nvm_tgt_dev *tgt_dev = NULL;
99 struct gen_dev_map *dev_rmap = dev->rmap;
100 struct gen_dev_map *dev_map;
101 struct ppa_addr *luns;
102 int nr_luns = lun_end - lun_begin + 1;
103 int luns_left = nr_luns;
104 int nr_chnls = nr_luns / dev->geo.luns_per_chnl;
105 int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl;
106 int bch = lun_begin / dev->geo.luns_per_chnl;
107 int blun = lun_begin % dev->geo.luns_per_chnl;
108 int lunid = 0;
109 int lun_balanced = 1;
110 int prev_nr_luns;
111 int i, j;
112
113 nr_chnls = nr_luns / dev->geo.luns_per_chnl;
114 nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
115
116 dev_map = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL);
117 if (!dev_map)
118 goto err_dev;
119
120 dev_map->chnls = kcalloc(nr_chnls, sizeof(struct gen_ch_map),
121 GFP_KERNEL);
122 if (!dev_map->chnls)
123 goto err_chnls;
124
125 luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
126 if (!luns)
127 goto err_luns;
128
129 prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ?
130 dev->geo.luns_per_chnl : luns_left;
131 for (i = 0; i < nr_chnls; i++) {
132 struct gen_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
133 int *lun_roffs = ch_rmap->lun_offs;
134 struct gen_ch_map *ch_map = &dev_map->chnls[i];
135 int *lun_offs;
136 int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ?
137 dev->geo.luns_per_chnl : luns_left;
138
139 if (lun_balanced && prev_nr_luns != luns_in_chnl)
140 lun_balanced = 0;
141
142 ch_map->ch_off = ch_rmap->ch_off = bch;
143 ch_map->nr_luns = luns_in_chnl;
144
145 lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
146 if (!lun_offs)
147 goto err_ch;
148
149 for (j = 0; j < luns_in_chnl; j++) {
150 luns[lunid].ppa = 0;
151 luns[lunid].g.ch = i;
152 luns[lunid++].g.lun = j;
153
154 lun_offs[j] = blun;
155 lun_roffs[j + blun] = blun;
156 }
157
158 ch_map->lun_offs = lun_offs;
159
160 /* when starting a new channel, lun offset is reset */
161 blun = 0;
162 luns_left -= luns_in_chnl;
163 }
164
165 dev_map->nr_chnls = nr_chnls;
166
167 tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
168 if (!tgt_dev)
169 goto err_ch;
170
171 memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
172 /* Target device only owns a portion of the physical device */
173 tgt_dev->geo.nr_chnls = nr_chnls;
174 tgt_dev->geo.nr_luns = nr_luns;
175 tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1;
176 tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
177 tgt_dev->q = dev->q;
178 tgt_dev->map = dev_map;
179 tgt_dev->luns = luns;
180 memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
181
182 tgt_dev->parent = dev;
183
184 return tgt_dev;
185err_ch:
186 while (--i > 0)
187 kfree(dev_map->chnls[i].lun_offs);
188 kfree(luns);
189err_luns:
190 kfree(dev_map->chnls);
191err_chnls:
192 kfree(dev_map);
193err_dev:
194 return tgt_dev;
195}
196
197static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
198{
199 struct gen_dev *gn = dev->mp;
200 struct nvm_ioctl_create_simple *s = &create->conf.s;
201 struct request_queue *tqueue;
202 struct gendisk *tdisk;
203 struct nvm_tgt_type *tt;
204 struct nvm_target *t;
205 struct nvm_tgt_dev *tgt_dev;
206 void *targetdata;
207
208 tt = nvm_find_target_type(create->tgttype, 1);
209 if (!tt) {
210 pr_err("nvm: target type %s not found\n", create->tgttype);
211 return -EINVAL;
212 }
213
214 mutex_lock(&gn->lock);
215 t = gen_find_target(gn, create->tgtname);
216 if (t) {
217 pr_err("nvm: target name already exists.\n");
218 mutex_unlock(&gn->lock);
219 return -EINVAL;
220 }
221 mutex_unlock(&gn->lock);
222
223 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
224 if (!t)
225 return -ENOMEM;
226
227 if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end))
228 goto err_t;
229
230 tgt_dev = gen_create_tgt_dev(dev, s->lun_begin, s->lun_end);
231 if (!tgt_dev) {
232 pr_err("nvm: could not create target device\n");
233 goto err_reserve;
234 }
235
236 tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
237 if (!tqueue)
238 goto err_dev;
239 blk_queue_make_request(tqueue, tt->make_rq);
240
241 tdisk = alloc_disk(0);
242 if (!tdisk)
243 goto err_queue;
244
245 sprintf(tdisk->disk_name, "%s", create->tgtname);
246 tdisk->flags = GENHD_FL_EXT_DEVT;
247 tdisk->major = 0;
248 tdisk->first_minor = 0;
249 tdisk->fops = &gen_fops;
250 tdisk->queue = tqueue;
251
252 targetdata = tt->init(tgt_dev, tdisk);
253 if (IS_ERR(targetdata))
254 goto err_init;
255
256 tdisk->private_data = targetdata;
257 tqueue->queuedata = targetdata;
258
259 blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
260
261 set_capacity(tdisk, tt->capacity(targetdata));
262 add_disk(tdisk);
263
264 t->type = tt;
265 t->disk = tdisk;
266 t->dev = tgt_dev;
267
268 mutex_lock(&gn->lock);
269 list_add_tail(&t->list, &gn->targets);
270 mutex_unlock(&gn->lock);
271
272 return 0;
273err_init:
274 put_disk(tdisk);
275err_queue:
276 blk_cleanup_queue(tqueue);
277err_dev:
278 kfree(tgt_dev);
279err_reserve:
280 gen_release_luns_err(dev, s->lun_begin, s->lun_end);
281err_t:
282 kfree(t);
283 return -ENOMEM;
284}
285
286static void __gen_remove_target(struct nvm_target *t)
287{
288 struct nvm_tgt_type *tt = t->type;
289 struct gendisk *tdisk = t->disk;
290 struct request_queue *q = tdisk->queue;
291
292 del_gendisk(tdisk);
293 blk_cleanup_queue(q);
294
295 if (tt->exit)
296 tt->exit(tdisk->private_data);
297
298 gen_remove_tgt_dev(t->dev);
299 put_disk(tdisk);
300
301 list_del(&t->list);
302 kfree(t);
303}
304
305/**
306 * gen_remove_tgt - Removes a target from the media manager
307 * @dev: device
308 * @remove: ioctl structure with target name to remove.
309 *
310 * Returns:
311 * 0: on success
312 * 1: on not found
313 * <0: on error
314 */
315static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
316{
317 struct gen_dev *gn = dev->mp;
318 struct nvm_target *t;
319
320 if (!gn)
321 return 1;
322
323 mutex_lock(&gn->lock);
324 t = gen_find_target(gn, remove->tgtname);
325 if (!t) {
326 mutex_unlock(&gn->lock);
327 return 1;
328 }
329 __gen_remove_target(t);
330 mutex_unlock(&gn->lock);
331
332 return 0;
333}
334
335static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
336{
337 struct nvm_geo *geo = &dev->geo;
338 struct gen_dev *gn = dev->mp;
339 struct gen_area *area, *prev, *next;
340 sector_t begin = 0;
341 sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9;
342
343 if (len > max_sectors)
344 return -EINVAL;
345
346 area = kmalloc(sizeof(struct gen_area), GFP_KERNEL);
347 if (!area)
348 return -ENOMEM;
349
350 prev = NULL;
351
352 spin_lock(&dev->lock);
353 list_for_each_entry(next, &gn->area_list, list) {
354 if (begin + len > next->begin) {
355 begin = next->end;
356 prev = next;
357 continue;
358 }
359 break;
360 }
361
362 if ((begin + len) > max_sectors) {
363 spin_unlock(&dev->lock);
364 kfree(area);
365 return -EINVAL;
366 }
367
368 area->begin = *lba = begin;
369 area->end = begin + len;
370
371 if (prev) /* insert into sorted order */
372 list_add(&area->list, &prev->list);
373 else
374 list_add(&area->list, &gn->area_list);
375 spin_unlock(&dev->lock);
376
377 return 0;
378}
379
380static void gen_put_area(struct nvm_dev *dev, sector_t begin)
381{
382 struct gen_dev *gn = dev->mp;
383 struct gen_area *area;
384
385 spin_lock(&dev->lock);
386 list_for_each_entry(area, &gn->area_list, list) {
387 if (area->begin != begin)
388 continue;
389
390 list_del(&area->list);
391 spin_unlock(&dev->lock);
392 kfree(area);
393 return;
394 }
395 spin_unlock(&dev->lock);
396}
397
398static void gen_free(struct nvm_dev *dev)
399{
400 kfree(dev->mp);
401 kfree(dev->rmap);
402 dev->mp = NULL;
403}
404
405static int gen_register(struct nvm_dev *dev)
406{
407 struct gen_dev *gn;
408 struct gen_dev_map *dev_rmap;
409 int i, j;
410
411 if (!try_module_get(THIS_MODULE))
412 return -ENODEV;
413
414 gn = kzalloc(sizeof(struct gen_dev), GFP_KERNEL);
415 if (!gn)
416 goto err_gn;
417
418 dev_rmap = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL);
419 if (!dev_rmap)
420 goto err_rmap;
421
422 dev_rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct gen_ch_map),
423 GFP_KERNEL);
424 if (!dev_rmap->chnls)
425 goto err_chnls;
426
427 for (i = 0; i < dev->geo.nr_chnls; i++) {
428 struct gen_ch_map *ch_rmap;
429 int *lun_roffs;
430 int luns_in_chnl = dev->geo.luns_per_chnl;
431
432 ch_rmap = &dev_rmap->chnls[i];
433
434 ch_rmap->ch_off = -1;
435 ch_rmap->nr_luns = luns_in_chnl;
436
437 lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
438 if (!lun_roffs)
439 goto err_ch;
440
441 for (j = 0; j < luns_in_chnl; j++)
442 lun_roffs[j] = -1;
443
444 ch_rmap->lun_offs = lun_roffs;
445 }
446
447 gn->dev = dev;
448 gn->nr_luns = dev->geo.nr_luns;
449 INIT_LIST_HEAD(&gn->area_list);
450 mutex_init(&gn->lock);
451 INIT_LIST_HEAD(&gn->targets);
452 dev->mp = gn;
453 dev->rmap = dev_rmap;
454
455 return 1;
456err_ch:
457 while (--i >= 0)
458 kfree(dev_rmap->chnls[i].lun_offs);
459err_chnls:
460 kfree(dev_rmap);
461err_rmap:
462 gen_free(dev);
463err_gn:
464 module_put(THIS_MODULE);
465 return -ENOMEM;
466}
467
468static void gen_unregister(struct nvm_dev *dev)
469{
470 struct gen_dev *gn = dev->mp;
471 struct nvm_target *t, *tmp;
472
473 mutex_lock(&gn->lock);
474 list_for_each_entry_safe(t, tmp, &gn->targets, list) {
475 if (t->dev->parent != dev)
476 continue;
477 __gen_remove_target(t);
478 }
479 mutex_unlock(&gn->lock);
480
481 gen_free(dev);
482 module_put(THIS_MODULE);
483}
484
485static int gen_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
486{
487 struct gen_dev_map *dev_map = tgt_dev->map;
488 struct gen_ch_map *ch_map = &dev_map->chnls[p->g.ch];
489 int lun_off = ch_map->lun_offs[p->g.lun];
490 struct nvm_dev *dev = tgt_dev->parent;
491 struct gen_dev_map *dev_rmap = dev->rmap;
492 struct gen_ch_map *ch_rmap;
493 int lun_roff;
494
495 p->g.ch += ch_map->ch_off;
496 p->g.lun += lun_off;
497
498 ch_rmap = &dev_rmap->chnls[p->g.ch];
499 lun_roff = ch_rmap->lun_offs[p->g.lun];
500
501 if (unlikely(ch_rmap->ch_off < 0 || lun_roff < 0)) {
502 pr_err("nvm: corrupted device partition table\n");
503 return -EINVAL;
504 }
505
506 return 0;
507}
508
509static int gen_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
510{
511 struct nvm_dev *dev = tgt_dev->parent;
512 struct gen_dev_map *dev_rmap = dev->rmap;
513 struct gen_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
514 int lun_roff = ch_rmap->lun_offs[p->g.lun];
515
516 p->g.ch -= ch_rmap->ch_off;
517 p->g.lun -= lun_roff;
518
519 return 0;
520}
521
522static int gen_trans_rq(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
523 int flag)
524{
525 gen_trans_fn *f;
526 int i;
527 int ret = 0;
528
529 f = (flag == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt;
530
531 if (rqd->nr_ppas == 1)
532 return f(tgt_dev, &rqd->ppa_addr);
533
534 for (i = 0; i < rqd->nr_ppas; i++) {
535 ret = f(tgt_dev, &rqd->ppa_list[i]);
536 if (ret)
537 goto out;
538 }
539
540out:
541 return ret;
542}
543
544static void gen_end_io(struct nvm_rq *rqd)
545{
546 struct nvm_tgt_dev *tgt_dev = rqd->dev;
547 struct nvm_tgt_instance *ins = rqd->ins;
548
549 /* Convert address space */
550 if (tgt_dev)
551 gen_trans_rq(tgt_dev, rqd, TRANS_DEV_TO_TGT);
552
553 ins->tt->end_io(rqd);
554}
555
556static int gen_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
557{
558 struct nvm_dev *dev = tgt_dev->parent;
559
560 if (!dev->ops->submit_io)
561 return -ENODEV;
562
563 /* Convert address space */
564 gen_trans_rq(tgt_dev, rqd, TRANS_TGT_TO_DEV);
565 nvm_generic_to_addr_mode(dev, rqd);
566
567 rqd->dev = tgt_dev;
568 rqd->end_io = gen_end_io;
569 return dev->ops->submit_io(dev, rqd);
570}
571
572static int gen_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p,
573 int flags)
574{
575 /* Convert address space */
576 gen_map_to_dev(tgt_dev, p);
577
578 return nvm_erase_ppa(tgt_dev->parent, p, 1, flags);
579}
580
581static struct ppa_addr gen_trans_ppa(struct nvm_tgt_dev *tgt_dev,
582 struct ppa_addr p, int direction)
583{
584 gen_trans_fn *f;
585 struct ppa_addr ppa = p;
586
587 f = (direction == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt;
588 f(tgt_dev, &ppa);
589
590 return ppa;
591}
592
593static void gen_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
594 int len)
595{
596 struct nvm_geo *geo = &dev->geo;
597 struct gen_dev_map *dev_rmap = dev->rmap;
598 u64 i;
599
600 for (i = 0; i < len; i++) {
601 struct gen_ch_map *ch_rmap;
602 int *lun_roffs;
603 struct ppa_addr gaddr;
604 u64 pba = le64_to_cpu(entries[i]);
605 int off;
606 u64 diff;
607
608 if (!pba)
609 continue;
610
611 gaddr = linear_to_generic_addr(geo, pba);
612 ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
613 lun_roffs = ch_rmap->lun_offs;
614
615 off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
616
617 diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
618 (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
619
620 entries[i] -= cpu_to_le64(diff);
621 }
622}
623
624static struct nvmm_type gen = {
625 .name = "gennvm",
626 .version = {0, 1, 0},
627
628 .register_mgr = gen_register,
629 .unregister_mgr = gen_unregister,
630
631 .create_tgt = gen_create_tgt,
632 .remove_tgt = gen_remove_tgt,
633
634 .submit_io = gen_submit_io,
635 .erase_blk = gen_erase_blk,
636
637 .get_area = gen_get_area,
638 .put_area = gen_put_area,
639
640 .trans_ppa = gen_trans_ppa,
641 .part_to_tgt = gen_part_to_tgt,
642};
643
644static int __init gen_module_init(void)
645{
646 return nvm_register_mgr(&gen);
647}
648
649static void gen_module_exit(void)
650{
651 nvm_unregister_mgr(&gen);
652}
653
654module_init(gen_module_init);
655module_exit(gen_module_exit);
656MODULE_LICENSE("GPL v2");
657MODULE_DESCRIPTION("General media manager for Open-Channel SSDs");
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
deleted file mode 100644
index 6a4b3f368848..000000000000
--- a/drivers/lightnvm/gennvm.h
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Copyright: Matias Bjorling <mb@bjorling.me>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 */
14
15#ifndef GENNVM_H_
16#define GENNVM_H_
17
18#include <linux/module.h>
19#include <linux/vmalloc.h>
20
21#include <linux/lightnvm.h>
22
23struct gen_dev {
24 struct nvm_dev *dev;
25
26 int nr_luns;
27 struct list_head area_list;
28
29 struct mutex lock;
30 struct list_head targets;
31};
32
33/* Map between virtual and physical channel and lun */
34struct gen_ch_map {
35 int ch_off;
36 int nr_luns;
37 int *lun_offs;
38};
39
40struct gen_dev_map {
41 struct gen_ch_map *chnls;
42 int nr_chnls;
43};
44
45struct gen_area {
46 struct list_head list;
47 sector_t begin;
48 sector_t end; /* end is excluded */
49};
50
51static inline void *ch_map_to_lun_offs(struct gen_ch_map *ch_map)
52{
53 return ch_map + 1;
54}
55
56typedef int (gen_trans_fn)(struct nvm_tgt_dev *, struct ppa_addr *);
57
58#define gen_for_each_lun(bm, lun, i) \
59 for ((i) = 0, lun = &(bm)->luns[0]; \
60 (i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
61
62#endif /* GENNVM_H_ */
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 9fb7de395915..e00b1d7b976f 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -779,7 +779,7 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
779 779
780static void rrpc_end_io(struct nvm_rq *rqd) 780static void rrpc_end_io(struct nvm_rq *rqd)
781{ 781{
782 struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); 782 struct rrpc *rrpc = rqd->private;
783 struct nvm_tgt_dev *dev = rrpc->dev; 783 struct nvm_tgt_dev *dev = rrpc->dev;
784 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); 784 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
785 uint8_t npages = rqd->nr_ppas; 785 uint8_t npages = rqd->nr_ppas;
@@ -972,8 +972,9 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
972 972
973 bio_get(bio); 973 bio_get(bio);
974 rqd->bio = bio; 974 rqd->bio = bio;
975 rqd->ins = &rrpc->instance; 975 rqd->private = rrpc;
976 rqd->nr_ppas = nr_pages; 976 rqd->nr_ppas = nr_pages;
977 rqd->end_io = rrpc_end_io;
977 rrq->flags = flags; 978 rrq->flags = flags;
978 979
979 err = nvm_submit_io(dev, rqd); 980 err = nvm_submit_io(dev, rqd);
@@ -1532,7 +1533,6 @@ static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk)
1532 if (!rrpc) 1533 if (!rrpc)
1533 return ERR_PTR(-ENOMEM); 1534 return ERR_PTR(-ENOMEM);
1534 1535
1535 rrpc->instance.tt = &tt_rrpc;
1536 rrpc->dev = dev; 1536 rrpc->dev = dev;
1537 rrpc->disk = tdisk; 1537 rrpc->disk = tdisk;
1538 1538
@@ -1611,7 +1611,6 @@ static struct nvm_tgt_type tt_rrpc = {
1611 1611
1612 .make_rq = rrpc_make_rq, 1612 .make_rq = rrpc_make_rq,
1613 .capacity = rrpc_capacity, 1613 .capacity = rrpc_capacity,
1614 .end_io = rrpc_end_io,
1615 1614
1616 .init = rrpc_init, 1615 .init = rrpc_init,
1617 .exit = rrpc_exit, 1616 .exit = rrpc_exit,
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 94e4d73116b2..fdb6ff902903 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -102,9 +102,6 @@ struct rrpc_lun {
102}; 102};
103 103
104struct rrpc { 104struct rrpc {
105 /* instance must be kept in top to resolve rrpc in unprep */
106 struct nvm_tgt_instance instance;
107
108 struct nvm_tgt_dev *dev; 105 struct nvm_tgt_dev *dev;
109 struct gendisk *disk; 106 struct gendisk *disk;
110 107
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
deleted file mode 100644
index 12002bf4efc2..000000000000
--- a/drivers/lightnvm/sysblk.c
+++ /dev/null
@@ -1,733 +0,0 @@
1/*
2 * Copyright (C) 2015 Matias Bjorling. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
16 * USA.
17 *
18 */
19
20#include <linux/lightnvm.h>
21
22#define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */
23#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases
24 * enables ~1.5M updates per sysblk unit
25 */
26
27struct sysblk_scan {
28 /* A row is a collection of flash blocks for a system block. */
29 int nr_rows;
30 int row;
31 int act_blk[MAX_SYSBLKS];
32
33 int nr_ppas;
34 struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */
35};
36
37static inline int scan_ppa_idx(int row, int blkid)
38{
39 return (row * MAX_BLKS_PR_SYSBLK) + blkid;
40}
41
42static void nvm_sysblk_to_cpu(struct nvm_sb_info *info,
43 struct nvm_system_block *sb)
44{
45 info->seqnr = be32_to_cpu(sb->seqnr);
46 info->erase_cnt = be32_to_cpu(sb->erase_cnt);
47 info->version = be16_to_cpu(sb->version);
48 strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN);
49 info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
50}
51
52static void nvm_cpu_to_sysblk(struct nvm_system_block *sb,
53 struct nvm_sb_info *info)
54{
55 sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
56 sb->seqnr = cpu_to_be32(info->seqnr);
57 sb->erase_cnt = cpu_to_be32(info->erase_cnt);
58 sb->version = cpu_to_be16(info->version);
59 strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN);
60 sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa);
61}
62
63static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
64{
65 struct nvm_geo *geo = &dev->geo;
66 int nr_rows = min_t(int, MAX_SYSBLKS, geo->nr_chnls);
67 int i;
68
69 for (i = 0; i < nr_rows; i++)
70 sysblk_ppas[i].ppa = 0;
71
72 /* if possible, place sysblk at first channel, middle channel and last
73 * channel of the device. If not, create only one or two sys blocks
74 */
75 switch (geo->nr_chnls) {
76 case 2:
77 sysblk_ppas[1].g.ch = 1;
78 /* fall-through */
79 case 1:
80 sysblk_ppas[0].g.ch = 0;
81 break;
82 default:
83 sysblk_ppas[0].g.ch = 0;
84 sysblk_ppas[1].g.ch = geo->nr_chnls / 2;
85 sysblk_ppas[2].g.ch = geo->nr_chnls - 1;
86 break;
87 }
88
89 return nr_rows;
90}
91
92static void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
93 struct ppa_addr *sysblk_ppas)
94{
95 memset(s, 0, sizeof(struct sysblk_scan));
96 s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas);
97}
98
99static int sysblk_get_free_blks(struct nvm_dev *dev, struct ppa_addr ppa,
100 u8 *blks, int nr_blks,
101 struct sysblk_scan *s)
102{
103 struct ppa_addr *sppa;
104 int i, blkid = 0;
105
106 nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
107 if (nr_blks < 0)
108 return nr_blks;
109
110 for (i = 0; i < nr_blks; i++) {
111 if (blks[i] == NVM_BLK_T_HOST)
112 return -EEXIST;
113
114 if (blks[i] != NVM_BLK_T_FREE)
115 continue;
116
117 sppa = &s->ppas[scan_ppa_idx(s->row, blkid)];
118 sppa->g.ch = ppa.g.ch;
119 sppa->g.lun = ppa.g.lun;
120 sppa->g.blk = i;
121 s->nr_ppas++;
122 blkid++;
123
124 pr_debug("nvm: use (%u %u %u) as sysblk\n",
125 sppa->g.ch, sppa->g.lun, sppa->g.blk);
126 if (blkid > MAX_BLKS_PR_SYSBLK - 1)
127 return 0;
128 }
129
130 pr_err("nvm: sysblk failed get sysblk\n");
131 return -EINVAL;
132}
133
134static int sysblk_get_host_blks(struct nvm_dev *dev, struct ppa_addr ppa,
135 u8 *blks, int nr_blks,
136 struct sysblk_scan *s)
137{
138 int i, nr_sysblk = 0;
139
140 nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
141 if (nr_blks < 0)
142 return nr_blks;
143
144 for (i = 0; i < nr_blks; i++) {
145 if (blks[i] != NVM_BLK_T_HOST)
146 continue;
147
148 if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) {
149 pr_err("nvm: too many host blks\n");
150 return -EINVAL;
151 }
152
153 ppa.g.blk = i;
154
155 s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa;
156 s->nr_ppas++;
157 nr_sysblk++;
158 }
159
160 return 0;
161}
162
163static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
164 struct ppa_addr *ppas, int get_free)
165{
166 struct nvm_geo *geo = &dev->geo;
167 int i, nr_blks, ret = 0;
168 u8 *blks;
169
170 s->nr_ppas = 0;
171 nr_blks = geo->blks_per_lun * geo->plane_mode;
172
173 blks = kmalloc(nr_blks, GFP_KERNEL);
174 if (!blks)
175 return -ENOMEM;
176
177 for (i = 0; i < s->nr_rows; i++) {
178 s->row = i;
179
180 ret = nvm_get_bb_tbl(dev, ppas[i], blks);
181 if (ret) {
182 pr_err("nvm: failed bb tbl for ppa (%u %u)\n",
183 ppas[i].g.ch,
184 ppas[i].g.blk);
185 goto err_get;
186 }
187
188 if (get_free)
189 ret = sysblk_get_free_blks(dev, ppas[i], blks, nr_blks,
190 s);
191 else
192 ret = sysblk_get_host_blks(dev, ppas[i], blks, nr_blks,
193 s);
194
195 if (ret)
196 goto err_get;
197 }
198
199err_get:
200 kfree(blks);
201 return ret;
202}
203
204/*
205 * scans a block for latest sysblk.
206 * Returns:
207 * 0 - newer sysblk not found. PPA is updated to latest page.
208 * 1 - newer sysblk found and stored in *cur. PPA is updated to
209 * next valid page.
210 * <0- error.
211 */
212static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
213 struct nvm_system_block *sblk)
214{
215 struct nvm_geo *geo = &dev->geo;
216 struct nvm_system_block *cur;
217 int pg, ret, found = 0;
218
219 /* the full buffer for a flash page is allocated. Only the first of it
220 * contains the system block information
221 */
222 cur = kmalloc(geo->pfpg_size, GFP_KERNEL);
223 if (!cur)
224 return -ENOMEM;
225
226 /* perform linear scan through the block */
227 for (pg = 0; pg < dev->lps_per_blk; pg++) {
228 ppa->g.pg = ppa_to_slc(dev, pg);
229
230 ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
231 cur, geo->pfpg_size);
232 if (ret) {
233 if (ret == NVM_RSP_ERR_EMPTYPAGE) {
234 pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
235 ppa->g.ch,
236 ppa->g.lun,
237 ppa->g.blk,
238 ppa->g.pg);
239 break;
240 }
241 pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)",
242 ret,
243 ppa->g.ch,
244 ppa->g.lun,
245 ppa->g.blk,
246 ppa->g.pg);
247 break; /* if we can't read a page, continue to the
248 * next blk
249 */
250 }
251
252 if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) {
253 pr_debug("nvm: scan break for ppa (%u %u %u %u)\n",
254 ppa->g.ch,
255 ppa->g.lun,
256 ppa->g.blk,
257 ppa->g.pg);
258 break; /* last valid page already found */
259 }
260
261 if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr))
262 continue;
263
264 memcpy(sblk, cur, sizeof(struct nvm_system_block));
265 found = 1;
266 }
267
268 kfree(cur);
269
270 return found;
271}
272
273static int nvm_sysblk_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s,
274 int type)
275{
276 return nvm_set_bb_tbl(dev, s->ppas, s->nr_ppas, type);
277}
278
279static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
280 struct sysblk_scan *s)
281{
282 struct nvm_geo *geo = &dev->geo;
283 struct nvm_system_block nvmsb;
284 void *buf;
285 int i, sect, ret = 0;
286 struct ppa_addr *ppas;
287
288 nvm_cpu_to_sysblk(&nvmsb, info);
289
290 buf = kzalloc(geo->pfpg_size, GFP_KERNEL);
291 if (!buf)
292 return -ENOMEM;
293 memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
294
295 ppas = kcalloc(geo->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
296 if (!ppas) {
297 ret = -ENOMEM;
298 goto err;
299 }
300
301 /* Write and verify */
302 for (i = 0; i < s->nr_rows; i++) {
303 ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])];
304
305 pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n",
306 ppas[0].g.ch,
307 ppas[0].g.lun,
308 ppas[0].g.blk,
309 ppas[0].g.pg);
310
311 /* Expand to all sectors within a flash page */
312 if (geo->sec_per_pg > 1) {
313 for (sect = 1; sect < geo->sec_per_pg; sect++) {
314 ppas[sect].ppa = ppas[0].ppa;
315 ppas[sect].g.sec = sect;
316 }
317 }
318
319 ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PWRITE,
320 NVM_IO_SLC_MODE, buf, geo->pfpg_size);
321 if (ret) {
322 pr_err("nvm: sysblk failed program (%u %u %u)\n",
323 ppas[0].g.ch,
324 ppas[0].g.lun,
325 ppas[0].g.blk);
326 break;
327 }
328
329 ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PREAD,
330 NVM_IO_SLC_MODE, buf, geo->pfpg_size);
331 if (ret) {
332 pr_err("nvm: sysblk failed read (%u %u %u)\n",
333 ppas[0].g.ch,
334 ppas[0].g.lun,
335 ppas[0].g.blk);
336 break;
337 }
338
339 if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) {
340 pr_err("nvm: sysblk failed verify (%u %u %u)\n",
341 ppas[0].g.ch,
342 ppas[0].g.lun,
343 ppas[0].g.blk);
344 ret = -EINVAL;
345 break;
346 }
347 }
348
349 kfree(ppas);
350err:
351 kfree(buf);
352
353 return ret;
354}
355
356static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
357{
358 int i, ret;
359 unsigned long nxt_blk;
360 struct ppa_addr *ppa;
361
362 for (i = 0; i < s->nr_rows; i++) {
363 nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK;
364 ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
365 ppa->g.pg = ppa_to_slc(dev, 0);
366
367 ret = nvm_erase_ppa(dev, ppa, 1, 0);
368 if (ret)
369 return ret;
370
371 s->act_blk[i] = nxt_blk;
372 }
373
374 return 0;
375}
376
377int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
378{
379 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
380 struct sysblk_scan s;
381 struct nvm_system_block *cur;
382 int i, j, found = 0;
383 int ret = -ENOMEM;
384
385 /*
386 * 1. setup sysblk locations
387 * 2. get bad block list
388 * 3. filter on host-specific (type 3)
389 * 4. iterate through all and find the highest seq nr.
390 * 5. return superblock information
391 */
392
393 if (!dev->ops->get_bb_tbl)
394 return -EINVAL;
395
396 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
397
398 mutex_lock(&dev->mlock);
399 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
400 if (ret)
401 goto err_sysblk;
402
403 /* no sysblocks initialized */
404 if (!s.nr_ppas)
405 goto err_sysblk;
406
407 cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
408 if (!cur)
409 goto err_sysblk;
410
411 /* find the latest block across all sysblocks */
412 for (i = 0; i < s.nr_rows; i++) {
413 for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
414 struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)];
415
416 ret = nvm_scan_block(dev, &ppa, cur);
417 if (ret > 0)
418 found = 1;
419 else if (ret < 0)
420 break;
421 }
422 }
423
424 nvm_sysblk_to_cpu(info, cur);
425
426 kfree(cur);
427err_sysblk:
428 mutex_unlock(&dev->mlock);
429
430 if (found)
431 return 1;
432 return ret;
433}
434
435int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new)
436{
437 /* 1. for each latest superblock
438 * 2. if room
439 * a. write new flash page entry with the updated information
440 * 3. if no room
441 * a. find next available block on lun (linear search)
442 * if none, continue to next lun
443 * if none at all, report error. also report that it wasn't
444 * possible to write to all superblocks.
445 * c. write data to block.
446 */
447 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
448 struct sysblk_scan s;
449 struct nvm_system_block *cur;
450 int i, j, ppaidx, found = 0;
451 int ret = -ENOMEM;
452
453 if (!dev->ops->get_bb_tbl)
454 return -EINVAL;
455
456 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
457
458 mutex_lock(&dev->mlock);
459 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
460 if (ret)
461 goto err_sysblk;
462
463 cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
464 if (!cur)
465 goto err_sysblk;
466
467 /* Get the latest sysblk for each sysblk row */
468 for (i = 0; i < s.nr_rows; i++) {
469 found = 0;
470 for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
471 ppaidx = scan_ppa_idx(i, j);
472 ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur);
473 if (ret > 0) {
474 s.act_blk[i] = j;
475 found = 1;
476 } else if (ret < 0)
477 break;
478 }
479 }
480
481 if (!found) {
482 pr_err("nvm: no valid sysblks found to update\n");
483 ret = -EINVAL;
484 goto err_cur;
485 }
486
487 /*
488 * All sysblocks found. Check that they have same page id in their flash
489 * blocks
490 */
491 for (i = 1; i < s.nr_rows; i++) {
492 struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])];
493 struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])];
494
495 if (l.g.pg != r.g.pg) {
496 pr_err("nvm: sysblks not on same page. Previous update failed.\n");
497 ret = -EINVAL;
498 goto err_cur;
499 }
500 }
501
502 /*
503 * Check that there haven't been another update to the seqnr since we
504 * began
505 */
506 if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) {
507 pr_err("nvm: seq is not sequential\n");
508 ret = -EINVAL;
509 goto err_cur;
510 }
511
512 /*
513 * When all pages in a block has been written, a new block is selected
514 * and writing is performed on the new block.
515 */
516 if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg ==
517 dev->lps_per_blk - 1) {
518 ret = nvm_prepare_new_sysblks(dev, &s);
519 if (ret)
520 goto err_cur;
521 }
522
523 ret = nvm_write_and_verify(dev, new, &s);
524err_cur:
525 kfree(cur);
526err_sysblk:
527 mutex_unlock(&dev->mlock);
528
529 return ret;
530}
531
532int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
533{
534 struct nvm_geo *geo = &dev->geo;
535 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
536 struct sysblk_scan s;
537 int ret;
538
539 /*
540 * 1. select master blocks and select first available blks
541 * 2. get bad block list
542 * 3. mark MAX_SYSBLKS block as host-based device allocated.
543 * 4. write and verify data to block
544 */
545
546 if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
547 return -EINVAL;
548
549 if (!(geo->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
550 pr_err("nvm: memory does not support SLC access\n");
551 return -EINVAL;
552 }
553
554 /* Index all sysblocks and mark them as host-driven */
555 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
556
557 mutex_lock(&dev->mlock);
558 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 1);
559 if (ret)
560 goto err_mark;
561
562 ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
563 if (ret)
564 goto err_mark;
565
566 /* Write to the first block of each row */
567 ret = nvm_write_and_verify(dev, info, &s);
568err_mark:
569 mutex_unlock(&dev->mlock);
570 return ret;
571}
572
573static int factory_nblks(int nblks)
574{
575 /* Round up to nearest BITS_PER_LONG */
576 return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
577}
578
579static unsigned int factory_blk_offset(struct nvm_geo *geo, struct ppa_addr ppa)
580{
581 int nblks = factory_nblks(geo->blks_per_lun);
582
583 return ((ppa.g.ch * geo->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) /
584 BITS_PER_LONG;
585}
586
587static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa,
588 u8 *blks, int nr_blks,
589 unsigned long *blk_bitmap, int flags)
590{
591 int i, lunoff;
592
593 nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
594 if (nr_blks < 0)
595 return nr_blks;
596
597 lunoff = factory_blk_offset(&dev->geo, ppa);
598
599 /* non-set bits correspond to the block must be erased */
600 for (i = 0; i < nr_blks; i++) {
601 switch (blks[i]) {
602 case NVM_BLK_T_FREE:
603 if (flags & NVM_FACTORY_ERASE_ONLY_USER)
604 set_bit(i, &blk_bitmap[lunoff]);
605 break;
606 case NVM_BLK_T_HOST:
607 if (!(flags & NVM_FACTORY_RESET_HOST_BLKS))
608 set_bit(i, &blk_bitmap[lunoff]);
609 break;
610 case NVM_BLK_T_GRWN_BAD:
611 if (!(flags & NVM_FACTORY_RESET_GRWN_BBLKS))
612 set_bit(i, &blk_bitmap[lunoff]);
613 break;
614 default:
615 set_bit(i, &blk_bitmap[lunoff]);
616 break;
617 }
618 }
619
620 return 0;
621}
622
623static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
624 int max_ppas, unsigned long *blk_bitmap)
625{
626 struct nvm_geo *geo = &dev->geo;
627 struct ppa_addr ppa;
628 int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
629 unsigned long *offset;
630
631 while (!done) {
632 done = 1;
633 nvm_for_each_lun_ppa(geo, ppa, ch, lun) {
634 idx = factory_blk_offset(geo, ppa);
635 offset = &blk_bitmap[idx];
636
637 blkid = find_first_zero_bit(offset, geo->blks_per_lun);
638 if (blkid >= geo->blks_per_lun)
639 continue;
640 set_bit(blkid, offset);
641
642 ppa.g.blk = blkid;
643 pr_debug("nvm: erase ppa (%u %u %u)\n",
644 ppa.g.ch,
645 ppa.g.lun,
646 ppa.g.blk);
647
648 erase_list[ppa_cnt] = ppa;
649 ppa_cnt++;
650 done = 0;
651
652 if (ppa_cnt == max_ppas)
653 return ppa_cnt;
654 }
655 }
656
657 return ppa_cnt;
658}
659
660static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap,
661 int flags)
662{
663 struct nvm_geo *geo = &dev->geo;
664 struct ppa_addr ppa;
665 int ch, lun, nr_blks, ret = 0;
666 u8 *blks;
667
668 nr_blks = geo->blks_per_lun * geo->plane_mode;
669 blks = kmalloc(nr_blks, GFP_KERNEL);
670 if (!blks)
671 return -ENOMEM;
672
673 nvm_for_each_lun_ppa(geo, ppa, ch, lun) {
674 ret = nvm_get_bb_tbl(dev, ppa, blks);
675 if (ret)
676 pr_err("nvm: failed bb tbl for ch%u lun%u\n",
677 ppa.g.ch, ppa.g.blk);
678
679 ret = nvm_factory_blks(dev, ppa, blks, nr_blks, blk_bitmap,
680 flags);
681 if (ret)
682 break;
683 }
684
685 kfree(blks);
686 return ret;
687}
688
689int nvm_dev_factory(struct nvm_dev *dev, int flags)
690{
691 struct nvm_geo *geo = &dev->geo;
692 struct ppa_addr *ppas;
693 int ppa_cnt, ret = -ENOMEM;
694 int max_ppas = dev->ops->max_phys_sect / geo->nr_planes;
695 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
696 struct sysblk_scan s;
697 unsigned long *blk_bitmap;
698
699 blk_bitmap = kzalloc(factory_nblks(geo->blks_per_lun) * geo->nr_luns,
700 GFP_KERNEL);
701 if (!blk_bitmap)
702 return ret;
703
704 ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL);
705 if (!ppas)
706 goto err_blks;
707
708 /* create list of blks to be erased */
709 ret = nvm_fact_select_blks(dev, blk_bitmap, flags);
710 if (ret)
711 goto err_ppas;
712
713 /* continue to erase until list of blks until empty */
714 while ((ppa_cnt =
715 nvm_fact_get_blks(dev, ppas, max_ppas, blk_bitmap)) > 0)
716 nvm_erase_ppa(dev, ppas, ppa_cnt, 0);
717
718 /* mark host reserved blocks free */
719 if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
720 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
721 mutex_lock(&dev->mlock);
722 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
723 if (!ret)
724 ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
725 mutex_unlock(&dev->mlock);
726 }
727err_ppas:
728 kfree(ppas);
729err_blks:
730 kfree(blk_bitmap);
731 return ret;
732}
733EXPORT_SYMBOL(nvm_dev_factory);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 76d20875503c..709c9cc34369 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio,
666 s->iop.write_prio = 0; 666 s->iop.write_prio = 0;
667 s->iop.error = 0; 667 s->iop.error = 0;
668 s->iop.flags = 0; 668 s->iop.flags = 0;
669 s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0; 669 s->iop.flush_journal = op_is_flush(bio->bi_opf);
670 s->iop.wq = bcache_wq; 670 s->iop.wq = bcache_wq;
671 671
672 return s; 672 return s;
@@ -1009,7 +1009,7 @@ static int cached_dev_congested(void *data, int bits)
1009 struct request_queue *q = bdev_get_queue(dc->bdev); 1009 struct request_queue *q = bdev_get_queue(dc->bdev);
1010 int ret = 0; 1010 int ret = 0;
1011 1011
1012 if (bdi_congested(&q->backing_dev_info, bits)) 1012 if (bdi_congested(q->backing_dev_info, bits))
1013 return 1; 1013 return 1;
1014 1014
1015 if (cached_dev_get(dc)) { 1015 if (cached_dev_get(dc)) {
@@ -1018,7 +1018,7 @@ static int cached_dev_congested(void *data, int bits)
1018 1018
1019 for_each_cache(ca, d->c, i) { 1019 for_each_cache(ca, d->c, i) {
1020 q = bdev_get_queue(ca->bdev); 1020 q = bdev_get_queue(ca->bdev);
1021 ret |= bdi_congested(&q->backing_dev_info, bits); 1021 ret |= bdi_congested(q->backing_dev_info, bits);
1022 } 1022 }
1023 1023
1024 cached_dev_put(dc); 1024 cached_dev_put(dc);
@@ -1032,7 +1032,7 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
1032 struct gendisk *g = dc->disk.disk; 1032 struct gendisk *g = dc->disk.disk;
1033 1033
1034 g->queue->make_request_fn = cached_dev_make_request; 1034 g->queue->make_request_fn = cached_dev_make_request;
1035 g->queue->backing_dev_info.congested_fn = cached_dev_congested; 1035 g->queue->backing_dev_info->congested_fn = cached_dev_congested;
1036 dc->disk.cache_miss = cached_dev_cache_miss; 1036 dc->disk.cache_miss = cached_dev_cache_miss;
1037 dc->disk.ioctl = cached_dev_ioctl; 1037 dc->disk.ioctl = cached_dev_ioctl;
1038} 1038}
@@ -1125,7 +1125,7 @@ static int flash_dev_congested(void *data, int bits)
1125 1125
1126 for_each_cache(ca, d->c, i) { 1126 for_each_cache(ca, d->c, i) {
1127 q = bdev_get_queue(ca->bdev); 1127 q = bdev_get_queue(ca->bdev);
1128 ret |= bdi_congested(&q->backing_dev_info, bits); 1128 ret |= bdi_congested(q->backing_dev_info, bits);
1129 } 1129 }
1130 1130
1131 return ret; 1131 return ret;
@@ -1136,7 +1136,7 @@ void bch_flash_dev_request_init(struct bcache_device *d)
1136 struct gendisk *g = d->disk; 1136 struct gendisk *g = d->disk;
1137 1137
1138 g->queue->make_request_fn = flash_dev_make_request; 1138 g->queue->make_request_fn = flash_dev_make_request;
1139 g->queue->backing_dev_info.congested_fn = flash_dev_congested; 1139 g->queue->backing_dev_info->congested_fn = flash_dev_congested;
1140 d->cache_miss = flash_dev_cache_miss; 1140 d->cache_miss = flash_dev_cache_miss;
1141 d->ioctl = flash_dev_ioctl; 1141 d->ioctl = flash_dev_ioctl;
1142} 1142}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3a19cbc8b230..85e3f21c2514 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -807,7 +807,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
807 blk_queue_make_request(q, NULL); 807 blk_queue_make_request(q, NULL);
808 d->disk->queue = q; 808 d->disk->queue = q;
809 q->queuedata = d; 809 q->queuedata = d;
810 q->backing_dev_info.congested_data = d; 810 q->backing_dev_info->congested_data = d;
811 q->limits.max_hw_sectors = UINT_MAX; 811 q->limits.max_hw_sectors = UINT_MAX;
812 q->limits.max_sectors = UINT_MAX; 812 q->limits.max_sectors = UINT_MAX;
813 q->limits.max_segment_size = UINT_MAX; 813 q->limits.max_segment_size = UINT_MAX;
@@ -1132,9 +1132,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
1132 set_capacity(dc->disk.disk, 1132 set_capacity(dc->disk.disk,
1133 dc->bdev->bd_part->nr_sects - dc->sb.data_offset); 1133 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1134 1134
1135 dc->disk.disk->queue->backing_dev_info.ra_pages = 1135 dc->disk.disk->queue->backing_dev_info->ra_pages =
1136 max(dc->disk.disk->queue->backing_dev_info.ra_pages, 1136 max(dc->disk.disk->queue->backing_dev_info->ra_pages,
1137 q->backing_dev_info.ra_pages); 1137 q->backing_dev_info->ra_pages);
1138 1138
1139 bch_cached_dev_request_init(dc); 1139 bch_cached_dev_request_init(dc);
1140 bch_cached_dev_writeback_init(dc); 1140 bch_cached_dev_writeback_init(dc);
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index e04c61e0839e..894bc14469c8 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
788 788
789 spin_lock_irqsave(&cache->lock, flags); 789 spin_lock_irqsave(&cache->lock, flags);
790 if (cache->need_tick_bio && 790 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
791 !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
792 bio_op(bio) != REQ_OP_DISCARD) { 791 bio_op(bio) != REQ_OP_DISCARD) {
793 pb->tick = true; 792 pb->tick = true;
794 cache->need_tick_bio = false; 793 cache->need_tick_bio = false;
@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
828 return to_oblock(block_nr); 827 return to_oblock(block_nr);
829} 828}
830 829
831static int bio_triggers_commit(struct cache *cache, struct bio *bio)
832{
833 return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
834}
835
836/* 830/*
837 * You must increment the deferred set whilst the prison cell is held. To 831 * You must increment the deferred set whilst the prison cell is held. To
838 * encourage this, we ask for 'cell' to be passed in. 832 * encourage this, we ask for 'cell' to be passed in.
@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio)
884{ 878{
885 unsigned long flags; 879 unsigned long flags;
886 880
887 if (!bio_triggers_commit(cache, bio)) { 881 if (!op_is_flush(bio->bi_opf)) {
888 accounted_request(cache, bio); 882 accounted_request(cache, bio);
889 return; 883 return;
890 } 884 }
@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache)
1069 1063
1070static bool discard_or_flush(struct bio *bio) 1064static bool discard_or_flush(struct bio *bio)
1071{ 1065{
1072 return bio_op(bio) == REQ_OP_DISCARD || 1066 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1073 bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
1074} 1067}
1075 1068
1076static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) 1069static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
@@ -2291,7 +2284,7 @@ static void do_waker(struct work_struct *ws)
2291static int is_congested(struct dm_dev *dev, int bdi_bits) 2284static int is_congested(struct dm_dev *dev, int bdi_bits)
2292{ 2285{
2293 struct request_queue *q = bdev_get_queue(dev->bdev); 2286 struct request_queue *q = bdev_get_queue(dev->bdev);
2294 return bdi_congested(&q->backing_dev_info, bdi_bits); 2287 return bdi_congested(q->backing_dev_info, bdi_bits);
2295} 2288}
2296 2289
2297static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 2290static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 40ceba1fe8be..136fda3ff9e5 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -92,7 +92,6 @@ struct mapped_device {
92 * io objects are allocated from here. 92 * io objects are allocated from here.
93 */ 93 */
94 mempool_t *io_pool; 94 mempool_t *io_pool;
95 mempool_t *rq_pool;
96 95
97 struct bio_set *bs; 96 struct bio_set *bs;
98 97
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index bf2b2676cb8a..9fab33b113c4 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1379,7 +1379,7 @@ static void stop_worker(struct era *era)
1379static int dev_is_congested(struct dm_dev *dev, int bdi_bits) 1379static int dev_is_congested(struct dm_dev *dev, int bdi_bits)
1380{ 1380{
1381 struct request_queue *q = bdev_get_queue(dev->bdev); 1381 struct request_queue *q = bdev_get_queue(dev->bdev);
1382 return bdi_congested(&q->backing_dev_info, bdi_bits); 1382 return bdi_congested(q->backing_dev_info, bdi_bits);
1383} 1383}
1384 1384
1385static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1385static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3570bcb7a4a4..7f223dbed49f 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -92,12 +92,6 @@ struct multipath {
92 92
93 unsigned queue_mode; 93 unsigned queue_mode;
94 94
95 /*
96 * We must use a mempool of dm_mpath_io structs so that we
97 * can resubmit bios on error.
98 */
99 mempool_t *mpio_pool;
100
101 struct mutex work_mutex; 95 struct mutex work_mutex;
102 struct work_struct trigger_event; 96 struct work_struct trigger_event;
103 97
@@ -115,8 +109,6 @@ struct dm_mpath_io {
115 109
116typedef int (*action_fn) (struct pgpath *pgpath); 110typedef int (*action_fn) (struct pgpath *pgpath);
117 111
118static struct kmem_cache *_mpio_cache;
119
120static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 112static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
121static void trigger_event(struct work_struct *work); 113static void trigger_event(struct work_struct *work);
122static void activate_path(struct work_struct *work); 114static void activate_path(struct work_struct *work);
@@ -209,7 +201,6 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
209 init_waitqueue_head(&m->pg_init_wait); 201 init_waitqueue_head(&m->pg_init_wait);
210 mutex_init(&m->work_mutex); 202 mutex_init(&m->work_mutex);
211 203
212 m->mpio_pool = NULL;
213 m->queue_mode = DM_TYPE_NONE; 204 m->queue_mode = DM_TYPE_NONE;
214 205
215 m->ti = ti; 206 m->ti = ti;
@@ -229,16 +220,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
229 m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; 220 m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
230 else 221 else
231 m->queue_mode = DM_TYPE_REQUEST_BASED; 222 m->queue_mode = DM_TYPE_REQUEST_BASED;
232 } 223 } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
233
234 if (m->queue_mode == DM_TYPE_REQUEST_BASED) {
235 unsigned min_ios = dm_get_reserved_rq_based_ios();
236
237 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
238 if (!m->mpio_pool)
239 return -ENOMEM;
240 }
241 else if (m->queue_mode == DM_TYPE_BIO_BASED) {
242 INIT_WORK(&m->process_queued_bios, process_queued_bios); 224 INIT_WORK(&m->process_queued_bios, process_queued_bios);
243 /* 225 /*
244 * bio-based doesn't support any direct scsi_dh management; 226 * bio-based doesn't support any direct scsi_dh management;
@@ -263,7 +245,6 @@ static void free_multipath(struct multipath *m)
263 245
264 kfree(m->hw_handler_name); 246 kfree(m->hw_handler_name);
265 kfree(m->hw_handler_params); 247 kfree(m->hw_handler_params);
266 mempool_destroy(m->mpio_pool);
267 kfree(m); 248 kfree(m);
268} 249}
269 250
@@ -272,38 +253,6 @@ static struct dm_mpath_io *get_mpio(union map_info *info)
272 return info->ptr; 253 return info->ptr;
273} 254}
274 255
275static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
276{
277 struct dm_mpath_io *mpio;
278
279 if (!m->mpio_pool) {
280 /* Use blk-mq pdu memory requested via per_io_data_size */
281 mpio = get_mpio(info);
282 memset(mpio, 0, sizeof(*mpio));
283 return mpio;
284 }
285
286 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
287 if (!mpio)
288 return NULL;
289
290 memset(mpio, 0, sizeof(*mpio));
291 info->ptr = mpio;
292
293 return mpio;
294}
295
296static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
297{
298 /* Only needed for non blk-mq (.request_fn) multipath */
299 if (m->mpio_pool) {
300 struct dm_mpath_io *mpio = info->ptr;
301
302 info->ptr = NULL;
303 mempool_free(mpio, m->mpio_pool);
304 }
305}
306
307static size_t multipath_per_bio_data_size(void) 256static size_t multipath_per_bio_data_size(void)
308{ 257{
309 return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details); 258 return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
@@ -530,16 +479,17 @@ static bool must_push_back_bio(struct multipath *m)
530/* 479/*
531 * Map cloned requests (request-based multipath) 480 * Map cloned requests (request-based multipath)
532 */ 481 */
533static int __multipath_map(struct dm_target *ti, struct request *clone, 482static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
534 union map_info *map_context, 483 union map_info *map_context,
535 struct request *rq, struct request **__clone) 484 struct request **__clone)
536{ 485{
537 struct multipath *m = ti->private; 486 struct multipath *m = ti->private;
538 int r = DM_MAPIO_REQUEUE; 487 int r = DM_MAPIO_REQUEUE;
539 size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq); 488 size_t nr_bytes = blk_rq_bytes(rq);
540 struct pgpath *pgpath; 489 struct pgpath *pgpath;
541 struct block_device *bdev; 490 struct block_device *bdev;
542 struct dm_mpath_io *mpio; 491 struct dm_mpath_io *mpio = get_mpio(map_context);
492 struct request *clone;
543 493
544 /* Do we need to select a new pgpath? */ 494 /* Do we need to select a new pgpath? */
545 pgpath = lockless_dereference(m->current_pgpath); 495 pgpath = lockless_dereference(m->current_pgpath);
@@ -556,42 +506,23 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
556 return r; 506 return r;
557 } 507 }
558 508
559 mpio = set_mpio(m, map_context); 509 memset(mpio, 0, sizeof(*mpio));
560 if (!mpio)
561 /* ENOMEM, requeue */
562 return r;
563
564 mpio->pgpath = pgpath; 510 mpio->pgpath = pgpath;
565 mpio->nr_bytes = nr_bytes; 511 mpio->nr_bytes = nr_bytes;
566 512
567 bdev = pgpath->path.dev->bdev; 513 bdev = pgpath->path.dev->bdev;
568 514
569 if (clone) { 515 clone = blk_get_request(bdev_get_queue(bdev),
570 /* 516 rq->cmd_flags | REQ_NOMERGE,
571 * Old request-based interface: allocated clone is passed in. 517 GFP_ATOMIC);
572 * Used by: .request_fn stacked on .request_fn path(s). 518 if (IS_ERR(clone)) {
573 */ 519 /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
574 clone->q = bdev_get_queue(bdev); 520 return r;
575 clone->rq_disk = bdev->bd_disk;
576 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
577 } else {
578 /*
579 * blk-mq request-based interface; used by both:
580 * .request_fn stacked on blk-mq path(s) and
581 * blk-mq stacked on blk-mq path(s).
582 */
583 clone = blk_mq_alloc_request(bdev_get_queue(bdev),
584 rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
585 if (IS_ERR(clone)) {
586 /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
587 clear_request_fn_mpio(m, map_context);
588 return r;
589 }
590 clone->bio = clone->biotail = NULL;
591 clone->rq_disk = bdev->bd_disk;
592 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
593 *__clone = clone;
594 } 521 }
522 clone->bio = clone->biotail = NULL;
523 clone->rq_disk = bdev->bd_disk;
524 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
525 *__clone = clone;
595 526
596 if (pgpath->pg->ps.type->start_io) 527 if (pgpath->pg->ps.type->start_io)
597 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 528 pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
@@ -600,22 +531,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
600 return DM_MAPIO_REMAPPED; 531 return DM_MAPIO_REMAPPED;
601} 532}
602 533
603static int multipath_map(struct dm_target *ti, struct request *clone,
604 union map_info *map_context)
605{
606 return __multipath_map(ti, clone, map_context, NULL, NULL);
607}
608
609static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
610 union map_info *map_context,
611 struct request **clone)
612{
613 return __multipath_map(ti, NULL, map_context, rq, clone);
614}
615
616static void multipath_release_clone(struct request *clone) 534static void multipath_release_clone(struct request *clone)
617{ 535{
618 blk_mq_free_request(clone); 536 blk_put_request(clone);
619} 537}
620 538
621/* 539/*
@@ -1187,7 +1105,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
1187 ti->num_write_same_bios = 1; 1105 ti->num_write_same_bios = 1;
1188 if (m->queue_mode == DM_TYPE_BIO_BASED) 1106 if (m->queue_mode == DM_TYPE_BIO_BASED)
1189 ti->per_io_data_size = multipath_per_bio_data_size(); 1107 ti->per_io_data_size = multipath_per_bio_data_size();
1190 else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) 1108 else
1191 ti->per_io_data_size = sizeof(struct dm_mpath_io); 1109 ti->per_io_data_size = sizeof(struct dm_mpath_io);
1192 1110
1193 return 0; 1111 return 0;
@@ -1610,7 +1528,6 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
1610 if (ps->type->end_io) 1528 if (ps->type->end_io)
1611 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1529 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1612 } 1530 }
1613 clear_request_fn_mpio(m, map_context);
1614 1531
1615 return r; 1532 return r;
1616} 1533}
@@ -2060,7 +1977,6 @@ static struct target_type multipath_target = {
2060 .module = THIS_MODULE, 1977 .module = THIS_MODULE,
2061 .ctr = multipath_ctr, 1978 .ctr = multipath_ctr,
2062 .dtr = multipath_dtr, 1979 .dtr = multipath_dtr,
2063 .map_rq = multipath_map,
2064 .clone_and_map_rq = multipath_clone_and_map, 1980 .clone_and_map_rq = multipath_clone_and_map,
2065 .release_clone_rq = multipath_release_clone, 1981 .release_clone_rq = multipath_release_clone,
2066 .rq_end_io = multipath_end_io, 1982 .rq_end_io = multipath_end_io,
@@ -2080,11 +1996,6 @@ static int __init dm_multipath_init(void)
2080{ 1996{
2081 int r; 1997 int r;
2082 1998
2083 /* allocate a slab for the dm_mpath_ios */
2084 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
2085 if (!_mpio_cache)
2086 return -ENOMEM;
2087
2088 r = dm_register_target(&multipath_target); 1999 r = dm_register_target(&multipath_target);
2089 if (r < 0) { 2000 if (r < 0) {
2090 DMERR("request-based register failed %d", r); 2001 DMERR("request-based register failed %d", r);
@@ -2120,8 +2031,6 @@ bad_alloc_kmpath_handlerd:
2120bad_alloc_kmultipathd: 2031bad_alloc_kmultipathd:
2121 dm_unregister_target(&multipath_target); 2032 dm_unregister_target(&multipath_target);
2122bad_register_target: 2033bad_register_target:
2123 kmem_cache_destroy(_mpio_cache);
2124
2125 return r; 2034 return r;
2126} 2035}
2127 2036
@@ -2131,7 +2040,6 @@ static void __exit dm_multipath_exit(void)
2131 destroy_workqueue(kmultipathd); 2040 destroy_workqueue(kmultipathd);
2132 2041
2133 dm_unregister_target(&multipath_target); 2042 dm_unregister_target(&multipath_target);
2134 kmem_cache_destroy(_mpio_cache);
2135} 2043}
2136 2044
2137module_init(dm_multipath_init); 2045module_init(dm_multipath_init);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 6e702fc69a83..67d76f21fecd 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -109,28 +109,6 @@ void dm_stop_queue(struct request_queue *q)
109 dm_mq_stop_queue(q); 109 dm_mq_stop_queue(q);
110} 110}
111 111
112static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
113 gfp_t gfp_mask)
114{
115 return mempool_alloc(md->io_pool, gfp_mask);
116}
117
118static void free_old_rq_tio(struct dm_rq_target_io *tio)
119{
120 mempool_free(tio, tio->md->io_pool);
121}
122
123static struct request *alloc_old_clone_request(struct mapped_device *md,
124 gfp_t gfp_mask)
125{
126 return mempool_alloc(md->rq_pool, gfp_mask);
127}
128
129static void free_old_clone_request(struct mapped_device *md, struct request *rq)
130{
131 mempool_free(rq, md->rq_pool);
132}
133
134/* 112/*
135 * Partial completion handling for request-based dm 113 * Partial completion handling for request-based dm
136 */ 114 */
@@ -185,7 +163,7 @@ static void end_clone_bio(struct bio *clone)
185 163
186static struct dm_rq_target_io *tio_from_request(struct request *rq) 164static struct dm_rq_target_io *tio_from_request(struct request *rq)
187{ 165{
188 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); 166 return blk_mq_rq_to_pdu(rq);
189} 167}
190 168
191static void rq_end_stats(struct mapped_device *md, struct request *orig) 169static void rq_end_stats(struct mapped_device *md, struct request *orig)
@@ -233,31 +211,6 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
233 dm_put(md); 211 dm_put(md);
234} 212}
235 213
236static void free_rq_clone(struct request *clone)
237{
238 struct dm_rq_target_io *tio = clone->end_io_data;
239 struct mapped_device *md = tio->md;
240
241 blk_rq_unprep_clone(clone);
242
243 /*
244 * It is possible for a clone_old_rq() allocated clone to
245 * get passed in -- it may not yet have a request_queue.
246 * This is known to occur if the error target replaces
247 * a multipath target that has a request_fn queue stacked
248 * on blk-mq queue(s).
249 */
250 if (clone->q && clone->q->mq_ops)
251 /* stacked on blk-mq queue(s) */
252 tio->ti->type->release_clone_rq(clone);
253 else if (!md->queue->mq_ops)
254 /* request_fn queue stacked on request_fn queue(s) */
255 free_old_clone_request(md, clone);
256
257 if (!md->queue->mq_ops)
258 free_old_rq_tio(tio);
259}
260
261/* 214/*
262 * Complete the clone and the original request. 215 * Complete the clone and the original request.
263 * Must be called without clone's queue lock held, 216 * Must be called without clone's queue lock held,
@@ -270,20 +223,9 @@ static void dm_end_request(struct request *clone, int error)
270 struct mapped_device *md = tio->md; 223 struct mapped_device *md = tio->md;
271 struct request *rq = tio->orig; 224 struct request *rq = tio->orig;
272 225
273 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 226 blk_rq_unprep_clone(clone);
274 rq->errors = clone->errors; 227 tio->ti->type->release_clone_rq(clone);
275 rq->resid_len = clone->resid_len;
276
277 if (rq->sense)
278 /*
279 * We are using the sense buffer of the original
280 * request.
281 * So setting the length of the sense data is enough.
282 */
283 rq->sense_len = clone->sense_len;
284 }
285 228
286 free_rq_clone(clone);
287 rq_end_stats(md, rq); 229 rq_end_stats(md, rq);
288 if (!rq->q->mq_ops) 230 if (!rq->q->mq_ops)
289 blk_end_request_all(rq, error); 231 blk_end_request_all(rq, error);
@@ -292,22 +234,6 @@ static void dm_end_request(struct request *clone, int error)
292 rq_completed(md, rw, true); 234 rq_completed(md, rw, true);
293} 235}
294 236
295static void dm_unprep_request(struct request *rq)
296{
297 struct dm_rq_target_io *tio = tio_from_request(rq);
298 struct request *clone = tio->clone;
299
300 if (!rq->q->mq_ops) {
301 rq->special = NULL;
302 rq->rq_flags &= ~RQF_DONTPREP;
303 }
304
305 if (clone)
306 free_rq_clone(clone);
307 else if (!tio->md->queue->mq_ops)
308 free_old_rq_tio(tio);
309}
310
311/* 237/*
312 * Requeue the original request of a clone. 238 * Requeue the original request of a clone.
313 */ 239 */
@@ -346,7 +272,10 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
346 int rw = rq_data_dir(rq); 272 int rw = rq_data_dir(rq);
347 273
348 rq_end_stats(md, rq); 274 rq_end_stats(md, rq);
349 dm_unprep_request(rq); 275 if (tio->clone) {
276 blk_rq_unprep_clone(tio->clone);
277 tio->ti->type->release_clone_rq(tio->clone);
278 }
350 279
351 if (!rq->q->mq_ops) 280 if (!rq->q->mq_ops)
352 dm_old_requeue_request(rq); 281 dm_old_requeue_request(rq);
@@ -401,14 +330,11 @@ static void dm_softirq_done(struct request *rq)
401 if (!clone) { 330 if (!clone) {
402 rq_end_stats(tio->md, rq); 331 rq_end_stats(tio->md, rq);
403 rw = rq_data_dir(rq); 332 rw = rq_data_dir(rq);
404 if (!rq->q->mq_ops) { 333 if (!rq->q->mq_ops)
405 blk_end_request_all(rq, tio->error); 334 blk_end_request_all(rq, tio->error);
406 rq_completed(tio->md, rw, false); 335 else
407 free_old_rq_tio(tio);
408 } else {
409 blk_mq_end_request(rq, tio->error); 336 blk_mq_end_request(rq, tio->error);
410 rq_completed(tio->md, rw, false); 337 rq_completed(tio->md, rw, false);
411 }
412 return; 338 return;
413 } 339 }
414 340
@@ -452,16 +378,6 @@ static void end_clone_request(struct request *clone, int error)
452{ 378{
453 struct dm_rq_target_io *tio = clone->end_io_data; 379 struct dm_rq_target_io *tio = clone->end_io_data;
454 380
455 if (!clone->q->mq_ops) {
456 /*
457 * For just cleaning up the information of the queue in which
458 * the clone was dispatched.
459 * The clone is *NOT* freed actually here because it is alloced
460 * from dm own mempool (RQF_ALLOCED isn't set).
461 */
462 __blk_put_request(clone->q, clone);
463 }
464
465 /* 381 /*
466 * Actual request completion is done in a softirq context which doesn't 382 * Actual request completion is done in a softirq context which doesn't
467 * hold the clone's queue lock. Otherwise, deadlock could occur because: 383 * hold the clone's queue lock. Otherwise, deadlock could occur because:
@@ -511,9 +427,6 @@ static int setup_clone(struct request *clone, struct request *rq,
511 if (r) 427 if (r)
512 return r; 428 return r;
513 429
514 clone->cmd = rq->cmd;
515 clone->cmd_len = rq->cmd_len;
516 clone->sense = rq->sense;
517 clone->end_io = end_clone_request; 430 clone->end_io = end_clone_request;
518 clone->end_io_data = tio; 431 clone->end_io_data = tio;
519 432
@@ -522,28 +435,6 @@ static int setup_clone(struct request *clone, struct request *rq,
522 return 0; 435 return 0;
523} 436}
524 437
525static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
526 struct dm_rq_target_io *tio, gfp_t gfp_mask)
527{
528 /*
529 * Create clone for use with .request_fn request_queue
530 */
531 struct request *clone;
532
533 clone = alloc_old_clone_request(md, gfp_mask);
534 if (!clone)
535 return NULL;
536
537 blk_rq_init(NULL, clone);
538 if (setup_clone(clone, rq, tio, gfp_mask)) {
539 /* -ENOMEM */
540 free_old_clone_request(md, clone);
541 return NULL;
542 }
543
544 return clone;
545}
546
547static void map_tio_request(struct kthread_work *work); 438static void map_tio_request(struct kthread_work *work);
548 439
549static void init_tio(struct dm_rq_target_io *tio, struct request *rq, 440static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
@@ -565,60 +456,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
565 kthread_init_work(&tio->work, map_tio_request); 456 kthread_init_work(&tio->work, map_tio_request);
566} 457}
567 458
568static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
569 struct mapped_device *md,
570 gfp_t gfp_mask)
571{
572 struct dm_rq_target_io *tio;
573 int srcu_idx;
574 struct dm_table *table;
575
576 tio = alloc_old_rq_tio(md, gfp_mask);
577 if (!tio)
578 return NULL;
579
580 init_tio(tio, rq, md);
581
582 table = dm_get_live_table(md, &srcu_idx);
583 /*
584 * Must clone a request if this .request_fn DM device
585 * is stacked on .request_fn device(s).
586 */
587 if (!dm_table_all_blk_mq_devices(table)) {
588 if (!clone_old_rq(rq, md, tio, gfp_mask)) {
589 dm_put_live_table(md, srcu_idx);
590 free_old_rq_tio(tio);
591 return NULL;
592 }
593 }
594 dm_put_live_table(md, srcu_idx);
595
596 return tio;
597}
598
599/*
600 * Called with the queue lock held.
601 */
602static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
603{
604 struct mapped_device *md = q->queuedata;
605 struct dm_rq_target_io *tio;
606
607 if (unlikely(rq->special)) {
608 DMWARN("Already has something in rq->special.");
609 return BLKPREP_KILL;
610 }
611
612 tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
613 if (!tio)
614 return BLKPREP_DEFER;
615
616 rq->special = tio;
617 rq->rq_flags |= RQF_DONTPREP;
618
619 return BLKPREP_OK;
620}
621
622/* 459/*
623 * Returns: 460 * Returns:
624 * DM_MAPIO_* : the request has been processed as indicated 461 * DM_MAPIO_* : the request has been processed as indicated
@@ -633,31 +470,18 @@ static int map_request(struct dm_rq_target_io *tio)
633 struct request *rq = tio->orig; 470 struct request *rq = tio->orig;
634 struct request *clone = NULL; 471 struct request *clone = NULL;
635 472
636 if (tio->clone) { 473 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
637 clone = tio->clone;
638 r = ti->type->map_rq(ti, clone, &tio->info);
639 if (r == DM_MAPIO_DELAY_REQUEUE)
640 return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
641 } else {
642 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
643 if (r < 0) {
644 /* The target wants to complete the I/O */
645 dm_kill_unmapped_request(rq, r);
646 return r;
647 }
648 if (r == DM_MAPIO_REMAPPED &&
649 setup_clone(clone, rq, tio, GFP_ATOMIC)) {
650 /* -ENOMEM */
651 ti->type->release_clone_rq(clone);
652 return DM_MAPIO_REQUEUE;
653 }
654 }
655
656 switch (r) { 474 switch (r) {
657 case DM_MAPIO_SUBMITTED: 475 case DM_MAPIO_SUBMITTED:
658 /* The target has taken the I/O to submit by itself later */ 476 /* The target has taken the I/O to submit by itself later */
659 break; 477 break;
660 case DM_MAPIO_REMAPPED: 478 case DM_MAPIO_REMAPPED:
479 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
480 /* -ENOMEM */
481 ti->type->release_clone_rq(clone);
482 return DM_MAPIO_REQUEUE;
483 }
484
661 /* The target has remapped the I/O so dispatch it */ 485 /* The target has remapped the I/O so dispatch it */
662 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 486 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
663 blk_rq_pos(rq)); 487 blk_rq_pos(rq));
@@ -716,6 +540,29 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
716 dm_get(md); 540 dm_get(md);
717} 541}
718 542
543static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
544{
545 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
546
547 /*
548 * Must initialize md member of tio, otherwise it won't
549 * be available in dm_mq_queue_rq.
550 */
551 tio->md = md;
552
553 if (md->init_tio_pdu) {
554 /* target-specific per-io data is immediately after the tio */
555 tio->info.ptr = tio + 1;
556 }
557
558 return 0;
559}
560
561static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
562{
563 return __dm_rq_init_rq(q->rq_alloc_data, rq);
564}
565
719static void map_tio_request(struct kthread_work *work) 566static void map_tio_request(struct kthread_work *work)
720{ 567{
721 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); 568 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
@@ -814,6 +661,7 @@ static void dm_old_request_fn(struct request_queue *q)
814 dm_start_request(md, rq); 661 dm_start_request(md, rq);
815 662
816 tio = tio_from_request(rq); 663 tio = tio_from_request(rq);
664 init_tio(tio, rq, md);
817 /* Establish tio->ti before queuing work (map_tio_request) */ 665 /* Establish tio->ti before queuing work (map_tio_request) */
818 tio->ti = ti; 666 tio->ti = ti;
819 kthread_queue_work(&md->kworker, &tio->work); 667 kthread_queue_work(&md->kworker, &tio->work);
@@ -824,10 +672,23 @@ static void dm_old_request_fn(struct request_queue *q)
824/* 672/*
825 * Fully initialize a .request_fn request-based queue. 673 * Fully initialize a .request_fn request-based queue.
826 */ 674 */
827int dm_old_init_request_queue(struct mapped_device *md) 675int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
828{ 676{
677 struct dm_target *immutable_tgt;
678
829 /* Fully initialize the queue */ 679 /* Fully initialize the queue */
830 if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL)) 680 md->queue->cmd_size = sizeof(struct dm_rq_target_io);
681 md->queue->rq_alloc_data = md;
682 md->queue->request_fn = dm_old_request_fn;
683 md->queue->init_rq_fn = dm_rq_init_rq;
684
685 immutable_tgt = dm_table_get_immutable_target(t);
686 if (immutable_tgt && immutable_tgt->per_io_data_size) {
687 /* any target-specific per-io data is immediately after the tio */
688 md->queue->cmd_size += immutable_tgt->per_io_data_size;
689 md->init_tio_pdu = true;
690 }
691 if (blk_init_allocated_queue(md->queue) < 0)
831 return -EINVAL; 692 return -EINVAL;
832 693
833 /* disable dm_old_request_fn's merge heuristic by default */ 694 /* disable dm_old_request_fn's merge heuristic by default */
@@ -835,7 +696,6 @@ int dm_old_init_request_queue(struct mapped_device *md)
835 696
836 dm_init_normal_md_queue(md); 697 dm_init_normal_md_queue(md);
837 blk_queue_softirq_done(md->queue, dm_softirq_done); 698 blk_queue_softirq_done(md->queue, dm_softirq_done);
838 blk_queue_prep_rq(md->queue, dm_old_prep_fn);
839 699
840 /* Initialize the request-based DM worker thread */ 700 /* Initialize the request-based DM worker thread */
841 kthread_init_worker(&md->kworker); 701 kthread_init_worker(&md->kworker);
@@ -856,21 +716,7 @@ static int dm_mq_init_request(void *data, struct request *rq,
856 unsigned int hctx_idx, unsigned int request_idx, 716 unsigned int hctx_idx, unsigned int request_idx,
857 unsigned int numa_node) 717 unsigned int numa_node)
858{ 718{
859 struct mapped_device *md = data; 719 return __dm_rq_init_rq(data, rq);
860 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
861
862 /*
863 * Must initialize md member of tio, otherwise it won't
864 * be available in dm_mq_queue_rq.
865 */
866 tio->md = md;
867
868 if (md->init_tio_pdu) {
869 /* target-specific per-io data is immediately after the tio */
870 tio->info.ptr = tio + 1;
871 }
872
873 return 0;
874} 720}
875 721
876static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 722static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index 4da06cae7bad..f0020d21b95f 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -48,7 +48,7 @@ struct dm_rq_clone_bio_info {
48bool dm_use_blk_mq_default(void); 48bool dm_use_blk_mq_default(void);
49bool dm_use_blk_mq(struct mapped_device *md); 49bool dm_use_blk_mq(struct mapped_device *md);
50 50
51int dm_old_init_request_queue(struct mapped_device *md); 51int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
52int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t); 52int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
53void dm_mq_cleanup_mapped_device(struct mapped_device *md); 53void dm_mq_cleanup_mapped_device(struct mapped_device *md);
54 54
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0a427de23ed2..3ad16d9c9d5a 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1750,7 +1750,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
1750 char b[BDEVNAME_SIZE]; 1750 char b[BDEVNAME_SIZE];
1751 1751
1752 if (likely(q)) 1752 if (likely(q))
1753 r |= bdi_congested(&q->backing_dev_info, bdi_bits); 1753 r |= bdi_congested(q->backing_dev_info, bdi_bits);
1754 else 1754 else
1755 DMWARN_LIMIT("%s: any_congested: nonexistent device %s", 1755 DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
1756 dm_device_name(t->md), 1756 dm_device_name(t->md),
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 710ae28fd618..43d3445b121d 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -131,12 +131,6 @@ static int io_err_map(struct dm_target *tt, struct bio *bio)
131 return -EIO; 131 return -EIO;
132} 132}
133 133
134static int io_err_map_rq(struct dm_target *ti, struct request *clone,
135 union map_info *map_context)
136{
137 return -EIO;
138}
139
140static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq, 134static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq,
141 union map_info *map_context, 135 union map_info *map_context,
142 struct request **clone) 136 struct request **clone)
@@ -161,7 +155,6 @@ static struct target_type error_target = {
161 .ctr = io_err_ctr, 155 .ctr = io_err_ctr,
162 .dtr = io_err_dtr, 156 .dtr = io_err_dtr,
163 .map = io_err_map, 157 .map = io_err_map,
164 .map_rq = io_err_map_rq,
165 .clone_and_map_rq = io_err_clone_and_map_rq, 158 .clone_and_map_rq = io_err_clone_and_map_rq,
166 .release_clone_rq = io_err_release_clone_rq, 159 .release_clone_rq = io_err_release_clone_rq,
167 .direct_access = io_err_direct_access, 160 .direct_access = io_err_direct_access,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index d1c05c12a9db..2b266a2b5035 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
699 699
700static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) 700static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
701{ 701{
702 return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && 702 return op_is_flush(bio->bi_opf) &&
703 dm_thin_changed_this_transaction(tc->td); 703 dm_thin_changed_this_transaction(tc->td);
704} 704}
705 705
@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context,
870 struct bio *bio; 870 struct bio *bio;
871 871
872 while ((bio = bio_list_pop(&cell->bios))) { 872 while ((bio = bio_list_pop(&cell->bios))) {
873 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || 873 if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
874 bio_op(bio) == REQ_OP_DISCARD)
875 bio_list_add(&info->defer_bios, bio); 874 bio_list_add(&info->defer_bios, bio);
876 else { 875 else {
877 inc_all_io_entry(info->tc->pool, bio); 876 inc_all_io_entry(info->tc->pool, bio);
@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context,
1716 struct bio *bio; 1715 struct bio *bio;
1717 1716
1718 while ((bio = bio_list_pop(&cell->bios))) { 1717 while ((bio = bio_list_pop(&cell->bios))) {
1719 if ((bio_data_dir(bio) == WRITE) || 1718 if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
1720 (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || 1719 bio_op(bio) == REQ_OP_DISCARD)
1721 bio_op(bio) == REQ_OP_DISCARD))
1722 bio_list_add(&info->defer_bios, bio); 1720 bio_list_add(&info->defer_bios, bio);
1723 else { 1721 else {
1724 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; 1722 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
2635 return DM_MAPIO_SUBMITTED; 2633 return DM_MAPIO_SUBMITTED;
2636 } 2634 }
2637 2635
2638 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || 2636 if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
2639 bio_op(bio) == REQ_OP_DISCARD) {
2640 thin_defer_bio_with_throttle(tc, bio); 2637 thin_defer_bio_with_throttle(tc, bio);
2641 return DM_MAPIO_SUBMITTED; 2638 return DM_MAPIO_SUBMITTED;
2642 } 2639 }
@@ -2714,7 +2711,7 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
2714 return 1; 2711 return 1;
2715 2712
2716 q = bdev_get_queue(pt->data_dev->bdev); 2713 q = bdev_get_queue(pt->data_dev->bdev);
2717 return bdi_congested(&q->backing_dev_info, bdi_bits); 2714 return bdi_congested(q->backing_dev_info, bdi_bits);
2718} 2715}
2719 2716
2720static void requeue_bios(struct pool *pool) 2717static void requeue_bios(struct pool *pool)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3086da5664f3..5bd9ab06a562 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -91,7 +91,6 @@ static int dm_numa_node = DM_NUMA_NODE;
91 */ 91 */
92struct dm_md_mempools { 92struct dm_md_mempools {
93 mempool_t *io_pool; 93 mempool_t *io_pool;
94 mempool_t *rq_pool;
95 struct bio_set *bs; 94 struct bio_set *bs;
96}; 95};
97 96
@@ -466,13 +465,16 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
466 465
467 if (r > 0) { 466 if (r > 0) {
468 /* 467 /*
469 * Target determined this ioctl is being issued against 468 * Target determined this ioctl is being issued against a
470 * a logical partition of the parent bdev; so extra 469 * subset of the parent bdev; require extra privileges.
471 * validation is needed.
472 */ 470 */
473 r = scsi_verify_blk_ioctl(NULL, cmd); 471 if (!capable(CAP_SYS_RAWIO)) {
474 if (r) 472 DMWARN_LIMIT(
473 "%s: sending ioctl %x to DM device without required privilege.",
474 current->comm, cmd);
475 r = -ENOIOCTLCMD;
475 goto out; 476 goto out;
477 }
476 } 478 }
477 479
478 r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 480 r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
@@ -1314,7 +1316,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
1314 * With request-based DM we only need to check the 1316 * With request-based DM we only need to check the
1315 * top-level queue for congestion. 1317 * top-level queue for congestion.
1316 */ 1318 */
1317 r = md->queue->backing_dev_info.wb.state & bdi_bits; 1319 r = md->queue->backing_dev_info->wb.state & bdi_bits;
1318 } else { 1320 } else {
1319 map = dm_get_live_table_fast(md); 1321 map = dm_get_live_table_fast(md);
1320 if (map) 1322 if (map)
@@ -1397,7 +1399,7 @@ void dm_init_md_queue(struct mapped_device *md)
1397 * - must do so here (in alloc_dev callchain) before queue is used 1399 * - must do so here (in alloc_dev callchain) before queue is used
1398 */ 1400 */
1399 md->queue->queuedata = md; 1401 md->queue->queuedata = md;
1400 md->queue->backing_dev_info.congested_data = md; 1402 md->queue->backing_dev_info->congested_data = md;
1401} 1403}
1402 1404
1403void dm_init_normal_md_queue(struct mapped_device *md) 1405void dm_init_normal_md_queue(struct mapped_device *md)
@@ -1408,7 +1410,7 @@ void dm_init_normal_md_queue(struct mapped_device *md)
1408 /* 1410 /*
1409 * Initialize aspects of queue that aren't relevant for blk-mq 1411 * Initialize aspects of queue that aren't relevant for blk-mq
1410 */ 1412 */
1411 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1413 md->queue->backing_dev_info->congested_fn = dm_any_congested;
1412 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1414 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
1413} 1415}
1414 1416
@@ -1419,7 +1421,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
1419 if (md->kworker_task) 1421 if (md->kworker_task)
1420 kthread_stop(md->kworker_task); 1422 kthread_stop(md->kworker_task);
1421 mempool_destroy(md->io_pool); 1423 mempool_destroy(md->io_pool);
1422 mempool_destroy(md->rq_pool);
1423 if (md->bs) 1424 if (md->bs)
1424 bioset_free(md->bs); 1425 bioset_free(md->bs);
1425 1426
@@ -1595,12 +1596,10 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
1595 goto out; 1596 goto out;
1596 } 1597 }
1597 1598
1598 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); 1599 BUG_ON(!p || md->io_pool || md->bs);
1599 1600
1600 md->io_pool = p->io_pool; 1601 md->io_pool = p->io_pool;
1601 p->io_pool = NULL; 1602 p->io_pool = NULL;
1602 md->rq_pool = p->rq_pool;
1603 p->rq_pool = NULL;
1604 md->bs = p->bs; 1603 md->bs = p->bs;
1605 p->bs = NULL; 1604 p->bs = NULL;
1606 1605
@@ -1777,7 +1776,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
1777 1776
1778 switch (type) { 1777 switch (type) {
1779 case DM_TYPE_REQUEST_BASED: 1778 case DM_TYPE_REQUEST_BASED:
1780 r = dm_old_init_request_queue(md); 1779 r = dm_old_init_request_queue(md, t);
1781 if (r) { 1780 if (r) {
1782 DMERR("Cannot initialize queue for request-based mapped device"); 1781 DMERR("Cannot initialize queue for request-based mapped device");
1783 return r; 1782 return r;
@@ -2493,7 +2492,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
2493 unsigned integrity, unsigned per_io_data_size) 2492 unsigned integrity, unsigned per_io_data_size)
2494{ 2493{
2495 struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); 2494 struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
2496 struct kmem_cache *cachep = NULL;
2497 unsigned int pool_size = 0; 2495 unsigned int pool_size = 0;
2498 unsigned int front_pad; 2496 unsigned int front_pad;
2499 2497
@@ -2503,20 +2501,16 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
2503 switch (type) { 2501 switch (type) {
2504 case DM_TYPE_BIO_BASED: 2502 case DM_TYPE_BIO_BASED:
2505 case DM_TYPE_DAX_BIO_BASED: 2503 case DM_TYPE_DAX_BIO_BASED:
2506 cachep = _io_cache;
2507 pool_size = dm_get_reserved_bio_based_ios(); 2504 pool_size = dm_get_reserved_bio_based_ios();
2508 front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 2505 front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
2506
2507 pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
2508 if (!pools->io_pool)
2509 goto out;
2509 break; 2510 break;
2510 case DM_TYPE_REQUEST_BASED: 2511 case DM_TYPE_REQUEST_BASED:
2511 cachep = _rq_tio_cache;
2512 pool_size = dm_get_reserved_rq_based_ios();
2513 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
2514 if (!pools->rq_pool)
2515 goto out;
2516 /* fall through to setup remaining rq-based pools */
2517 case DM_TYPE_MQ_REQUEST_BASED: 2512 case DM_TYPE_MQ_REQUEST_BASED:
2518 if (!pool_size) 2513 pool_size = dm_get_reserved_rq_based_ios();
2519 pool_size = dm_get_reserved_rq_based_ios();
2520 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 2514 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
2521 /* per_io_data_size is used for blk-mq pdu at queue allocation */ 2515 /* per_io_data_size is used for blk-mq pdu at queue allocation */
2522 break; 2516 break;
@@ -2524,12 +2518,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
2524 BUG(); 2518 BUG();
2525 } 2519 }
2526 2520
2527 if (cachep) {
2528 pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
2529 if (!pools->io_pool)
2530 goto out;
2531 }
2532
2533 pools->bs = bioset_create_nobvec(pool_size, front_pad); 2521 pools->bs = bioset_create_nobvec(pool_size, front_pad);
2534 if (!pools->bs) 2522 if (!pools->bs)
2535 goto out; 2523 goto out;
@@ -2551,7 +2539,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
2551 return; 2539 return;
2552 2540
2553 mempool_destroy(pools->io_pool); 2541 mempool_destroy(pools->io_pool);
2554 mempool_destroy(pools->rq_pool);
2555 2542
2556 if (pools->bs) 2543 if (pools->bs)
2557 bioset_free(pools->bs); 2544 bioset_free(pools->bs);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index f0aad08b9654..f298b01f7ab3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -95,8 +95,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
95/* 95/*
96 * To check whether the target type is request-based or not (bio-based). 96 * To check whether the target type is request-based or not (bio-based).
97 */ 97 */
98#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \ 98#define dm_target_request_based(t) ((t)->type->clone_and_map_rq != NULL)
99 ((t)->type->clone_and_map_rq != NULL))
100 99
101/* 100/*
102 * To check whether the target type is a hybrid (capable of being 101 * To check whether the target type is a hybrid (capable of being
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5975c9915684..f1c7bbac31a5 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -62,7 +62,7 @@ static int linear_congested(struct mddev *mddev, int bits)
62 62
63 for (i = 0; i < mddev->raid_disks && !ret ; i++) { 63 for (i = 0; i < mddev->raid_disks && !ret ; i++) {
64 struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev); 64 struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
65 ret |= bdi_congested(&q->backing_dev_info, bits); 65 ret |= bdi_congested(q->backing_dev_info, bits);
66 } 66 }
67 67
68 return ret; 68 return ret;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 01175dac0db6..ba485dcf1064 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5346,8 +5346,8 @@ int md_run(struct mddev *mddev)
5346 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue); 5346 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5347 else 5347 else
5348 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue); 5348 queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue);
5349 mddev->queue->backing_dev_info.congested_data = mddev; 5349 mddev->queue->backing_dev_info->congested_data = mddev;
5350 mddev->queue->backing_dev_info.congested_fn = md_congested; 5350 mddev->queue->backing_dev_info->congested_fn = md_congested;
5351 } 5351 }
5352 if (pers->sync_request) { 5352 if (pers->sync_request) {
5353 if (mddev->kobj.sd && 5353 if (mddev->kobj.sd &&
@@ -5704,7 +5704,7 @@ static int do_md_stop(struct mddev *mddev, int mode,
5704 5704
5705 __md_stop_writes(mddev); 5705 __md_stop_writes(mddev);
5706 __md_stop(mddev); 5706 __md_stop(mddev);
5707 mddev->queue->backing_dev_info.congested_fn = NULL; 5707 mddev->queue->backing_dev_info->congested_fn = NULL;
5708 5708
5709 /* tell userspace to handle 'inactive' */ 5709 /* tell userspace to handle 'inactive' */
5710 sysfs_notify_dirent_safe(mddev->sysfs_state); 5710 sysfs_notify_dirent_safe(mddev->sysfs_state);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index aa8c4e5c1ee2..d457afa672d5 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -169,7 +169,7 @@ static int multipath_congested(struct mddev *mddev, int bits)
169 if (rdev && !test_bit(Faulty, &rdev->flags)) { 169 if (rdev && !test_bit(Faulty, &rdev->flags)) {
170 struct request_queue *q = bdev_get_queue(rdev->bdev); 170 struct request_queue *q = bdev_get_queue(rdev->bdev);
171 171
172 ret |= bdi_congested(&q->backing_dev_info, bits); 172 ret |= bdi_congested(q->backing_dev_info, bits);
173 /* Just like multipath_map, we just check the 173 /* Just like multipath_map, we just check the
174 * first available device 174 * first available device
175 */ 175 */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 848365d474f3..d6585239bff2 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -41,7 +41,7 @@ static int raid0_congested(struct mddev *mddev, int bits)
41 for (i = 0; i < raid_disks && !ret ; i++) { 41 for (i = 0; i < raid_disks && !ret ; i++) {
42 struct request_queue *q = bdev_get_queue(devlist[i]->bdev); 42 struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
43 43
44 ret |= bdi_congested(&q->backing_dev_info, bits); 44 ret |= bdi_congested(q->backing_dev_info, bits);
45 } 45 }
46 return ret; 46 return ret;
47} 47}
@@ -420,8 +420,8 @@ static int raid0_run(struct mddev *mddev)
420 */ 420 */
421 int stripe = mddev->raid_disks * 421 int stripe = mddev->raid_disks *
422 (mddev->chunk_sectors << 9) / PAGE_SIZE; 422 (mddev->chunk_sectors << 9) / PAGE_SIZE;
423 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe) 423 if (mddev->queue->backing_dev_info->ra_pages < 2* stripe)
424 mddev->queue->backing_dev_info.ra_pages = 2* stripe; 424 mddev->queue->backing_dev_info->ra_pages = 2* stripe;
425 } 425 }
426 426
427 dump_zones(mddev); 427 dump_zones(mddev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7b0f647bcccb..830ff2b20346 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -744,9 +744,9 @@ static int raid1_congested(struct mddev *mddev, int bits)
744 * non-congested targets, it can be removed 744 * non-congested targets, it can be removed
745 */ 745 */
746 if ((bits & (1 << WB_async_congested)) || 1) 746 if ((bits & (1 << WB_async_congested)) || 1)
747 ret |= bdi_congested(&q->backing_dev_info, bits); 747 ret |= bdi_congested(q->backing_dev_info, bits);
748 else 748 else
749 ret &= bdi_congested(&q->backing_dev_info, bits); 749 ret &= bdi_congested(q->backing_dev_info, bits);
750 } 750 }
751 } 751 }
752 rcu_read_unlock(); 752 rcu_read_unlock();
@@ -1170,10 +1170,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1170 int i, disks; 1170 int i, disks;
1171 struct bitmap *bitmap = mddev->bitmap; 1171 struct bitmap *bitmap = mddev->bitmap;
1172 unsigned long flags; 1172 unsigned long flags;
1173 const int op = bio_op(bio);
1174 const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
1175 const unsigned long do_flush_fua = (bio->bi_opf &
1176 (REQ_PREFLUSH | REQ_FUA));
1177 struct md_rdev *blocked_rdev; 1173 struct md_rdev *blocked_rdev;
1178 struct blk_plug_cb *cb; 1174 struct blk_plug_cb *cb;
1179 struct raid1_plug_cb *plug = NULL; 1175 struct raid1_plug_cb *plug = NULL;
@@ -1389,7 +1385,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
1389 conf->mirrors[i].rdev->data_offset); 1385 conf->mirrors[i].rdev->data_offset);
1390 mbio->bi_bdev = conf->mirrors[i].rdev->bdev; 1386 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
1391 mbio->bi_end_io = raid1_end_write_request; 1387 mbio->bi_end_io = raid1_end_write_request;
1392 bio_set_op_attrs(mbio, op, do_flush_fua | do_sync); 1388 mbio->bi_opf = bio_op(bio) |
1389 (bio->bi_opf & (REQ_SYNC | REQ_PREFLUSH | REQ_FUA));
1393 if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) && 1390 if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) &&
1394 !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) && 1391 !test_bit(WriteMostly, &conf->mirrors[i].rdev->flags) &&
1395 conf->raid_disks - mddev->degraded > 1) 1392 conf->raid_disks - mddev->degraded > 1)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1920756828df..6bc5c2a85160 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -860,7 +860,7 @@ static int raid10_congested(struct mddev *mddev, int bits)
860 if (rdev && !test_bit(Faulty, &rdev->flags)) { 860 if (rdev && !test_bit(Faulty, &rdev->flags)) {
861 struct request_queue *q = bdev_get_queue(rdev->bdev); 861 struct request_queue *q = bdev_get_queue(rdev->bdev);
862 862
863 ret |= bdi_congested(&q->backing_dev_info, bits); 863 ret |= bdi_congested(q->backing_dev_info, bits);
864 } 864 }
865 } 865 }
866 rcu_read_unlock(); 866 rcu_read_unlock();
@@ -3841,8 +3841,8 @@ static int raid10_run(struct mddev *mddev)
3841 * maybe... 3841 * maybe...
3842 */ 3842 */
3843 stripe /= conf->geo.near_copies; 3843 stripe /= conf->geo.near_copies;
3844 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 3844 if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
3845 mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 3845 mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
3846 } 3846 }
3847 3847
3848 if (md_integrity_register(mddev)) 3848 if (md_integrity_register(mddev))
@@ -4643,8 +4643,8 @@ static void end_reshape(struct r10conf *conf)
4643 int stripe = conf->geo.raid_disks * 4643 int stripe = conf->geo.raid_disks *
4644 ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE); 4644 ((conf->mddev->chunk_sectors << 9) / PAGE_SIZE);
4645 stripe /= conf->geo.near_copies; 4645 stripe /= conf->geo.near_copies;
4646 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 4646 if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
4647 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 4647 conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
4648 } 4648 }
4649 conf->fullsync = 0; 4649 conf->fullsync = 0;
4650} 4650}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3c7e106c12a2..6214e699342c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6331,10 +6331,10 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
6331 mddev_suspend(mddev); 6331 mddev_suspend(mddev);
6332 conf->skip_copy = new; 6332 conf->skip_copy = new;
6333 if (new) 6333 if (new)
6334 mddev->queue->backing_dev_info.capabilities |= 6334 mddev->queue->backing_dev_info->capabilities |=
6335 BDI_CAP_STABLE_WRITES; 6335 BDI_CAP_STABLE_WRITES;
6336 else 6336 else
6337 mddev->queue->backing_dev_info.capabilities &= 6337 mddev->queue->backing_dev_info->capabilities &=
6338 ~BDI_CAP_STABLE_WRITES; 6338 ~BDI_CAP_STABLE_WRITES;
6339 mddev_resume(mddev); 6339 mddev_resume(mddev);
6340 } 6340 }
@@ -7153,8 +7153,8 @@ static int raid5_run(struct mddev *mddev)
7153 int data_disks = conf->previous_raid_disks - conf->max_degraded; 7153 int data_disks = conf->previous_raid_disks - conf->max_degraded;
7154 int stripe = data_disks * 7154 int stripe = data_disks *
7155 ((mddev->chunk_sectors << 9) / PAGE_SIZE); 7155 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
7156 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 7156 if (mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
7157 mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 7157 mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
7158 7158
7159 chunk_size = mddev->chunk_sectors << 9; 7159 chunk_size = mddev->chunk_sectors << 9;
7160 blk_queue_io_min(mddev->queue, chunk_size); 7160 blk_queue_io_min(mddev->queue, chunk_size);
@@ -7763,8 +7763,8 @@ static void end_reshape(struct r5conf *conf)
7763 int data_disks = conf->raid_disks - conf->max_degraded; 7763 int data_disks = conf->raid_disks - conf->max_degraded;
7764 int stripe = data_disks * ((conf->chunk_sectors << 9) 7764 int stripe = data_disks * ((conf->chunk_sectors << 9)
7765 / PAGE_SIZE); 7765 / PAGE_SIZE);
7766 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 7766 if (conf->mddev->queue->backing_dev_info->ra_pages < 2 * stripe)
7767 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 7767 conf->mddev->queue->backing_dev_info->ra_pages = 2 * stripe;
7768 } 7768 }
7769 } 7769 }
7770} 7770}
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index f3512404bc52..99e651c27fb7 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2000,16 +2000,6 @@ static int msb_bd_getgeo(struct block_device *bdev,
2000 return 0; 2000 return 0;
2001} 2001}
2002 2002
2003static int msb_prepare_req(struct request_queue *q, struct request *req)
2004{
2005 if (req->cmd_type != REQ_TYPE_FS) {
2006 blk_dump_rq_flags(req, "MS unsupported request");
2007 return BLKPREP_KILL;
2008 }
2009 req->rq_flags |= RQF_DONTPREP;
2010 return BLKPREP_OK;
2011}
2012
2013static void msb_submit_req(struct request_queue *q) 2003static void msb_submit_req(struct request_queue *q)
2014{ 2004{
2015 struct memstick_dev *card = q->queuedata; 2005 struct memstick_dev *card = q->queuedata;
@@ -2132,7 +2122,6 @@ static int msb_init_disk(struct memstick_dev *card)
2132 } 2122 }
2133 2123
2134 msb->queue->queuedata = card; 2124 msb->queue->queuedata = card;
2135 blk_queue_prep_rq(msb->queue, msb_prepare_req);
2136 2125
2137 blk_queue_bounce_limit(msb->queue, limit); 2126 blk_queue_bounce_limit(msb->queue, limit);
2138 blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES); 2127 blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index fa0746d182ff..c00d8a266878 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -827,18 +827,6 @@ static void mspro_block_start(struct memstick_dev *card)
827 spin_unlock_irqrestore(&msb->q_lock, flags); 827 spin_unlock_irqrestore(&msb->q_lock, flags);
828} 828}
829 829
830static int mspro_block_prepare_req(struct request_queue *q, struct request *req)
831{
832 if (req->cmd_type != REQ_TYPE_FS) {
833 blk_dump_rq_flags(req, "MSPro unsupported request");
834 return BLKPREP_KILL;
835 }
836
837 req->rq_flags |= RQF_DONTPREP;
838
839 return BLKPREP_OK;
840}
841
842static void mspro_block_submit_req(struct request_queue *q) 830static void mspro_block_submit_req(struct request_queue *q)
843{ 831{
844 struct memstick_dev *card = q->queuedata; 832 struct memstick_dev *card = q->queuedata;
@@ -1228,7 +1216,6 @@ static int mspro_block_init_disk(struct memstick_dev *card)
1228 } 1216 }
1229 1217
1230 msb->queue->queuedata = card; 1218 msb->queue->queuedata = card;
1231 blk_queue_prep_rq(msb->queue, mspro_block_prepare_req);
1232 1219
1233 blk_queue_bounce_limit(msb->queue, limit); 1220 blk_queue_bounce_limit(msb->queue, limit);
1234 blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES); 1221 blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 7ee1667acde4..b8c4b2ba7519 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2320,10 +2320,10 @@ static int mptsas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2320 SmpPassthroughReply_t *smprep; 2320 SmpPassthroughReply_t *smprep;
2321 2321
2322 smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply; 2322 smprep = (SmpPassthroughReply_t *)ioc->sas_mgmt.reply;
2323 memcpy(req->sense, smprep, sizeof(*smprep)); 2323 memcpy(scsi_req(req)->sense, smprep, sizeof(*smprep));
2324 req->sense_len = sizeof(*smprep); 2324 scsi_req(req)->sense_len = sizeof(*smprep);
2325 req->resid_len = 0; 2325 scsi_req(req)->resid_len = 0;
2326 rsp->resid_len -= smprep->ResponseDataLength; 2326 scsi_req(rsp)->resid_len -= smprep->ResponseDataLength;
2327 } else { 2327 } else {
2328 printk(MYIOC_s_ERR_FMT 2328 printk(MYIOC_s_ERR_FMT
2329 "%s: smp passthru reply failed to be returned\n", 2329 "%s: smp passthru reply failed to be returned\n",
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index a6496d8027bc..033f641eb8b7 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -30,15 +30,6 @@ static int mmc_prep_request(struct request_queue *q, struct request *req)
30{ 30{
31 struct mmc_queue *mq = q->queuedata; 31 struct mmc_queue *mq = q->queuedata;
32 32
33 /*
34 * We only like normal block requests and discards.
35 */
36 if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD &&
37 req_op(req) != REQ_OP_SECURE_ERASE) {
38 blk_dump_rq_flags(req, "MMC bad request");
39 return BLKPREP_KILL;
40 }
41
42 if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq))) 33 if (mq && (mmc_card_removed(mq->card) || mmc_access_rpmb(mq)))
43 return BLKPREP_KILL; 34 return BLKPREP_KILL;
44 35
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index df8a5ef334c0..6b8d5cd7dbf6 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -84,9 +84,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
84 nsect = blk_rq_cur_bytes(req) >> tr->blkshift; 84 nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
85 buf = bio_data(req->bio); 85 buf = bio_data(req->bio);
86 86
87 if (req->cmd_type != REQ_TYPE_FS)
88 return -EIO;
89
90 if (req_op(req) == REQ_OP_FLUSH) 87 if (req_op(req) == REQ_OP_FLUSH)
91 return tr->flush(dev); 88 return tr->flush(dev);
92 89
@@ -94,16 +91,16 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
94 get_capacity(req->rq_disk)) 91 get_capacity(req->rq_disk))
95 return -EIO; 92 return -EIO;
96 93
97 if (req_op(req) == REQ_OP_DISCARD) 94 switch (req_op(req)) {
95 case REQ_OP_DISCARD:
98 return tr->discard(dev, block, nsect); 96 return tr->discard(dev, block, nsect);
99 97 case REQ_OP_READ:
100 if (rq_data_dir(req) == READ) {
101 for (; nsect > 0; nsect--, block++, buf += tr->blksize) 98 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
102 if (tr->readsect(dev, block, buf)) 99 if (tr->readsect(dev, block, buf))
103 return -EIO; 100 return -EIO;
104 rq_flush_dcache_pages(req); 101 rq_flush_dcache_pages(req);
105 return 0; 102 return 0;
106 } else { 103 case REQ_OP_WRITE:
107 if (!tr->writesect) 104 if (!tr->writesect)
108 return -EIO; 105 return -EIO;
109 106
@@ -112,6 +109,8 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
112 if (tr->writesect(dev, block, buf)) 109 if (tr->writesect(dev, block, buf))
113 return -EIO; 110 return -EIO;
114 return 0; 111 return 0;
112 default:
113 return -EIO;
115 } 114 }
116} 115}
117 116
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index d1e6931c132f..c80869e60909 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -323,16 +323,15 @@ static int ubiblock_queue_rq(struct blk_mq_hw_ctx *hctx,
323 struct ubiblock *dev = hctx->queue->queuedata; 323 struct ubiblock *dev = hctx->queue->queuedata;
324 struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req); 324 struct ubiblock_pdu *pdu = blk_mq_rq_to_pdu(req);
325 325
326 if (req->cmd_type != REQ_TYPE_FS) 326 switch (req_op(req)) {
327 case REQ_OP_READ:
328 ubi_sgl_init(&pdu->usgl);
329 queue_work(dev->wq, &pdu->work);
330 return BLK_MQ_RQ_QUEUE_OK;
331 default:
327 return BLK_MQ_RQ_QUEUE_ERROR; 332 return BLK_MQ_RQ_QUEUE_ERROR;
333 }
328 334
329 if (rq_data_dir(req) != READ)
330 return BLK_MQ_RQ_QUEUE_ERROR; /* Write not implemented */
331
332 ubi_sgl_init(&pdu->usgl);
333 queue_work(dev->wq, &pdu->work);
334
335 return BLK_MQ_RQ_QUEUE_OK;
336} 335}
337 336
338static int ubiblock_init_request(void *data, struct request *req, 337static int ubiblock_init_request(void *data, struct request *req,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8a3c3e32a704..44a1a257e0b5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -208,18 +208,18 @@ EXPORT_SYMBOL_GPL(nvme_requeue_req);
208struct request *nvme_alloc_request(struct request_queue *q, 208struct request *nvme_alloc_request(struct request_queue *q,
209 struct nvme_command *cmd, unsigned int flags, int qid) 209 struct nvme_command *cmd, unsigned int flags, int qid)
210{ 210{
211 unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
211 struct request *req; 212 struct request *req;
212 213
213 if (qid == NVME_QID_ANY) { 214 if (qid == NVME_QID_ANY) {
214 req = blk_mq_alloc_request(q, nvme_is_write(cmd), flags); 215 req = blk_mq_alloc_request(q, op, flags);
215 } else { 216 } else {
216 req = blk_mq_alloc_request_hctx(q, nvme_is_write(cmd), flags, 217 req = blk_mq_alloc_request_hctx(q, op, flags,
217 qid ? qid - 1 : 0); 218 qid ? qid - 1 : 0);
218 } 219 }
219 if (IS_ERR(req)) 220 if (IS_ERR(req))
220 return req; 221 return req;
221 222
222 req->cmd_type = REQ_TYPE_DRV_PRIV;
223 req->cmd_flags |= REQ_FAILFAST_DRIVER; 223 req->cmd_flags |= REQ_FAILFAST_DRIVER;
224 nvme_req(req)->cmd = cmd; 224 nvme_req(req)->cmd = cmd;
225 225
@@ -238,26 +238,38 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
238static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, 238static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
239 struct nvme_command *cmnd) 239 struct nvme_command *cmnd)
240{ 240{
241 unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
241 struct nvme_dsm_range *range; 242 struct nvme_dsm_range *range;
242 unsigned int nr_bytes = blk_rq_bytes(req); 243 struct bio *bio;
243 244
244 range = kmalloc(sizeof(*range), GFP_ATOMIC); 245 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
245 if (!range) 246 if (!range)
246 return BLK_MQ_RQ_QUEUE_BUSY; 247 return BLK_MQ_RQ_QUEUE_BUSY;
247 248
248 range->cattr = cpu_to_le32(0); 249 __rq_for_each_bio(bio, req) {
249 range->nlb = cpu_to_le32(nr_bytes >> ns->lba_shift); 250 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
250 range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); 251 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
252
253 range[n].cattr = cpu_to_le32(0);
254 range[n].nlb = cpu_to_le32(nlb);
255 range[n].slba = cpu_to_le64(slba);
256 n++;
257 }
258
259 if (WARN_ON_ONCE(n != segments)) {
260 kfree(range);
261 return BLK_MQ_RQ_QUEUE_ERROR;
262 }
251 263
252 memset(cmnd, 0, sizeof(*cmnd)); 264 memset(cmnd, 0, sizeof(*cmnd));
253 cmnd->dsm.opcode = nvme_cmd_dsm; 265 cmnd->dsm.opcode = nvme_cmd_dsm;
254 cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); 266 cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
255 cmnd->dsm.nr = 0; 267 cmnd->dsm.nr = segments - 1;
256 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 268 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
257 269
258 req->special_vec.bv_page = virt_to_page(range); 270 req->special_vec.bv_page = virt_to_page(range);
259 req->special_vec.bv_offset = offset_in_page(range); 271 req->special_vec.bv_offset = offset_in_page(range);
260 req->special_vec.bv_len = sizeof(*range); 272 req->special_vec.bv_len = sizeof(*range) * segments;
261 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 273 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
262 274
263 return BLK_MQ_RQ_QUEUE_OK; 275 return BLK_MQ_RQ_QUEUE_OK;
@@ -309,17 +321,27 @@ int nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
309{ 321{
310 int ret = BLK_MQ_RQ_QUEUE_OK; 322 int ret = BLK_MQ_RQ_QUEUE_OK;
311 323
312 if (req->cmd_type == REQ_TYPE_DRV_PRIV) 324 switch (req_op(req)) {
325 case REQ_OP_DRV_IN:
326 case REQ_OP_DRV_OUT:
313 memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); 327 memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
314 else if (req_op(req) == REQ_OP_FLUSH) 328 break;
329 case REQ_OP_FLUSH:
315 nvme_setup_flush(ns, cmd); 330 nvme_setup_flush(ns, cmd);
316 else if (req_op(req) == REQ_OP_DISCARD) 331 break;
332 case REQ_OP_DISCARD:
317 ret = nvme_setup_discard(ns, req, cmd); 333 ret = nvme_setup_discard(ns, req, cmd);
318 else 334 break;
335 case REQ_OP_READ:
336 case REQ_OP_WRITE:
319 nvme_setup_rw(ns, req, cmd); 337 nvme_setup_rw(ns, req, cmd);
338 break;
339 default:
340 WARN_ON_ONCE(1);
341 return BLK_MQ_RQ_QUEUE_ERROR;
342 }
320 343
321 cmd->common.command_id = req->tag; 344 cmd->common.command_id = req->tag;
322
323 return ret; 345 return ret;
324} 346}
325EXPORT_SYMBOL_GPL(nvme_setup_cmd); 347EXPORT_SYMBOL_GPL(nvme_setup_cmd);
@@ -784,6 +806,13 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
784 return nvme_sg_io(ns, (void __user *)arg); 806 return nvme_sg_io(ns, (void __user *)arg);
785#endif 807#endif
786 default: 808 default:
809#ifdef CONFIG_NVM
810 if (ns->ndev)
811 return nvme_nvm_ioctl(ns, cmd, arg);
812#endif
813 if (is_sed_ioctl(cmd))
814 return sed_ioctl(ns->ctrl->opal_dev, cmd,
815 (void __user *) arg);
787 return -ENOTTY; 816 return -ENOTTY;
788 } 817 }
789} 818}
@@ -861,6 +890,9 @@ static void nvme_config_discard(struct nvme_ns *ns)
861 struct nvme_ctrl *ctrl = ns->ctrl; 890 struct nvme_ctrl *ctrl = ns->ctrl;
862 u32 logical_block_size = queue_logical_block_size(ns->queue); 891 u32 logical_block_size = queue_logical_block_size(ns->queue);
863 892
893 BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
894 NVME_DSM_MAX_RANGES);
895
864 if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES) 896 if (ctrl->quirks & NVME_QUIRK_DISCARD_ZEROES)
865 ns->queue->limits.discard_zeroes_data = 1; 897 ns->queue->limits.discard_zeroes_data = 1;
866 else 898 else
@@ -869,6 +901,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
869 ns->queue->limits.discard_alignment = logical_block_size; 901 ns->queue->limits.discard_alignment = logical_block_size;
870 ns->queue->limits.discard_granularity = logical_block_size; 902 ns->queue->limits.discard_granularity = logical_block_size;
871 blk_queue_max_discard_sectors(ns->queue, UINT_MAX); 903 blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
904 blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
872 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 905 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
873} 906}
874 907
@@ -1051,6 +1084,28 @@ static const struct pr_ops nvme_pr_ops = {
1051 .pr_clear = nvme_pr_clear, 1084 .pr_clear = nvme_pr_clear,
1052}; 1085};
1053 1086
1087#ifdef CONFIG_BLK_SED_OPAL
1088int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
1089 bool send)
1090{
1091 struct nvme_ctrl *ctrl = data;
1092 struct nvme_command cmd;
1093
1094 memset(&cmd, 0, sizeof(cmd));
1095 if (send)
1096 cmd.common.opcode = nvme_admin_security_send;
1097 else
1098 cmd.common.opcode = nvme_admin_security_recv;
1099 cmd.common.nsid = 0;
1100 cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
1101 cmd.common.cdw10[1] = cpu_to_le32(len);
1102
1103 return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
1104 ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0);
1105}
1106EXPORT_SYMBOL_GPL(nvme_sec_submit);
1107#endif /* CONFIG_BLK_SED_OPAL */
1108
1054static const struct block_device_operations nvme_fops = { 1109static const struct block_device_operations nvme_fops = {
1055 .owner = THIS_MODULE, 1110 .owner = THIS_MODULE,
1056 .ioctl = nvme_ioctl, 1111 .ioctl = nvme_ioctl,
@@ -1230,6 +1285,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1230 return -EIO; 1285 return -EIO;
1231 } 1286 }
1232 1287
1288 ctrl->oacs = le16_to_cpu(id->oacs);
1233 ctrl->vid = le16_to_cpu(id->vid); 1289 ctrl->vid = le16_to_cpu(id->vid);
1234 ctrl->oncs = le16_to_cpup(&id->oncs); 1290 ctrl->oncs = le16_to_cpup(&id->oncs);
1235 atomic_set(&ctrl->abort_limit, id->acl + 1); 1291 atomic_set(&ctrl->abort_limit, id->acl + 1);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e65041c640cb..fb51a8de9b29 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1937,7 +1937,7 @@ nvme_fc_complete_rq(struct request *rq)
1937 return; 1937 return;
1938 } 1938 }
1939 1939
1940 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) 1940 if (blk_rq_is_passthrough(rq))
1941 error = rq->errors; 1941 error = rq->errors;
1942 else 1942 else
1943 error = nvme_error_status(rq->errors); 1943 error = nvme_error_status(rq->errors);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 588d4a34c083..21cac8523bd8 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -26,6 +26,8 @@
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/lightnvm.h> 27#include <linux/lightnvm.h>
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/sched/sysctl.h>
30#include <uapi/linux/lightnvm.h>
29 31
30enum nvme_nvm_admin_opcode { 32enum nvme_nvm_admin_opcode {
31 nvme_nvm_admin_identity = 0xe2, 33 nvme_nvm_admin_identity = 0xe2,
@@ -248,50 +250,48 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
248{ 250{
249 struct nvme_nvm_id_group *src; 251 struct nvme_nvm_id_group *src;
250 struct nvm_id_group *dst; 252 struct nvm_id_group *dst;
251 int i, end;
252
253 end = min_t(u32, 4, nvm_id->cgrps);
254
255 for (i = 0; i < end; i++) {
256 src = &nvme_nvm_id->groups[i];
257 dst = &nvm_id->groups[i];
258
259 dst->mtype = src->mtype;
260 dst->fmtype = src->fmtype;
261 dst->num_ch = src->num_ch;
262 dst->num_lun = src->num_lun;
263 dst->num_pln = src->num_pln;
264
265 dst->num_pg = le16_to_cpu(src->num_pg);
266 dst->num_blk = le16_to_cpu(src->num_blk);
267 dst->fpg_sz = le16_to_cpu(src->fpg_sz);
268 dst->csecs = le16_to_cpu(src->csecs);
269 dst->sos = le16_to_cpu(src->sos);
270
271 dst->trdt = le32_to_cpu(src->trdt);
272 dst->trdm = le32_to_cpu(src->trdm);
273 dst->tprt = le32_to_cpu(src->tprt);
274 dst->tprm = le32_to_cpu(src->tprm);
275 dst->tbet = le32_to_cpu(src->tbet);
276 dst->tbem = le32_to_cpu(src->tbem);
277 dst->mpos = le32_to_cpu(src->mpos);
278 dst->mccap = le32_to_cpu(src->mccap);
279
280 dst->cpar = le16_to_cpu(src->cpar);
281
282 if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
283 memcpy(dst->lptbl.id, src->lptbl.id, 8);
284 dst->lptbl.mlc.num_pairs =
285 le16_to_cpu(src->lptbl.mlc.num_pairs);
286
287 if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
288 pr_err("nvm: number of MLC pairs not supported\n");
289 return -EINVAL;
290 }
291 253
292 memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, 254 if (nvme_nvm_id->cgrps != 1)
293 dst->lptbl.mlc.num_pairs); 255 return -EINVAL;
256
257 src = &nvme_nvm_id->groups[0];
258 dst = &nvm_id->grp;
259
260 dst->mtype = src->mtype;
261 dst->fmtype = src->fmtype;
262 dst->num_ch = src->num_ch;
263 dst->num_lun = src->num_lun;
264 dst->num_pln = src->num_pln;
265
266 dst->num_pg = le16_to_cpu(src->num_pg);
267 dst->num_blk = le16_to_cpu(src->num_blk);
268 dst->fpg_sz = le16_to_cpu(src->fpg_sz);
269 dst->csecs = le16_to_cpu(src->csecs);
270 dst->sos = le16_to_cpu(src->sos);
271
272 dst->trdt = le32_to_cpu(src->trdt);
273 dst->trdm = le32_to_cpu(src->trdm);
274 dst->tprt = le32_to_cpu(src->tprt);
275 dst->tprm = le32_to_cpu(src->tprm);
276 dst->tbet = le32_to_cpu(src->tbet);
277 dst->tbem = le32_to_cpu(src->tbem);
278 dst->mpos = le32_to_cpu(src->mpos);
279 dst->mccap = le32_to_cpu(src->mccap);
280
281 dst->cpar = le16_to_cpu(src->cpar);
282
283 if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
284 memcpy(dst->lptbl.id, src->lptbl.id, 8);
285 dst->lptbl.mlc.num_pairs =
286 le16_to_cpu(src->lptbl.mlc.num_pairs);
287
288 if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
289 pr_err("nvm: number of MLC pairs not supported\n");
290 return -EINVAL;
294 } 291 }
292
293 memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
294 dst->lptbl.mlc.num_pairs);
295 } 295 }
296 296
297 return 0; 297 return 0;
@@ -321,7 +321,6 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
321 321
322 nvm_id->ver_id = nvme_nvm_id->ver_id; 322 nvm_id->ver_id = nvme_nvm_id->ver_id;
323 nvm_id->vmnt = nvme_nvm_id->vmnt; 323 nvm_id->vmnt = nvme_nvm_id->vmnt;
324 nvm_id->cgrps = nvme_nvm_id->cgrps;
325 nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); 324 nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
326 nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); 325 nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
327 memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, 326 memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
@@ -372,7 +371,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
372 } 371 }
373 372
374 /* Transform physical address to target address space */ 373 /* Transform physical address to target address space */
375 nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb); 374 nvm_part_to_tgt(nvmdev, entries, cmd_nlb);
376 375
377 if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { 376 if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
378 ret = -EINTR; 377 ret = -EINTR;
@@ -485,7 +484,8 @@ static void nvme_nvm_end_io(struct request *rq, int error)
485 struct nvm_rq *rqd = rq->end_io_data; 484 struct nvm_rq *rqd = rq->end_io_data;
486 485
487 rqd->ppa_status = nvme_req(rq)->result.u64; 486 rqd->ppa_status = nvme_req(rq)->result.u64;
488 nvm_end_io(rqd, error); 487 rqd->error = error;
488 nvm_end_io(rqd);
489 489
490 kfree(nvme_req(rq)->cmd); 490 kfree(nvme_req(rq)->cmd);
491 blk_mq_free_request(rq); 491 blk_mq_free_request(rq);
@@ -586,6 +586,224 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
586 .max_phys_sect = 64, 586 .max_phys_sect = 64,
587}; 587};
588 588
589static void nvme_nvm_end_user_vio(struct request *rq, int error)
590{
591 struct completion *waiting = rq->end_io_data;
592
593 complete(waiting);
594}
595
596static int nvme_nvm_submit_user_cmd(struct request_queue *q,
597 struct nvme_ns *ns,
598 struct nvme_nvm_command *vcmd,
599 void __user *ubuf, unsigned int bufflen,
600 void __user *meta_buf, unsigned int meta_len,
601 void __user *ppa_buf, unsigned int ppa_len,
602 u32 *result, u64 *status, unsigned int timeout)
603{
604 bool write = nvme_is_write((struct nvme_command *)vcmd);
605 struct nvm_dev *dev = ns->ndev;
606 struct gendisk *disk = ns->disk;
607 struct request *rq;
608 struct bio *bio = NULL;
609 __le64 *ppa_list = NULL;
610 dma_addr_t ppa_dma;
611 __le64 *metadata = NULL;
612 dma_addr_t metadata_dma;
613 DECLARE_COMPLETION_ONSTACK(wait);
614 int ret;
615
616 rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
617 NVME_QID_ANY);
618 if (IS_ERR(rq)) {
619 ret = -ENOMEM;
620 goto err_cmd;
621 }
622
623 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
624
625 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
626 rq->end_io_data = &wait;
627
628 if (ppa_buf && ppa_len) {
629 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
630 if (!ppa_list) {
631 ret = -ENOMEM;
632 goto err_rq;
633 }
634 if (copy_from_user(ppa_list, (void __user *)ppa_buf,
635 sizeof(u64) * (ppa_len + 1))) {
636 ret = -EFAULT;
637 goto err_ppa;
638 }
639 vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
640 } else {
641 vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
642 }
643
644 if (ubuf && bufflen) {
645 ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
646 if (ret)
647 goto err_ppa;
648 bio = rq->bio;
649
650 if (meta_buf && meta_len) {
651 metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
652 &metadata_dma);
653 if (!metadata) {
654 ret = -ENOMEM;
655 goto err_map;
656 }
657
658 if (write) {
659 if (copy_from_user(metadata,
660 (void __user *)meta_buf,
661 meta_len)) {
662 ret = -EFAULT;
663 goto err_meta;
664 }
665 }
666 vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
667 }
668
669 if (!disk)
670 goto submit;
671
672 bio->bi_bdev = bdget_disk(disk, 0);
673 if (!bio->bi_bdev) {
674 ret = -ENODEV;
675 goto err_meta;
676 }
677 }
678
679submit:
680 blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
681
682 wait_for_completion_io(&wait);
683
684 ret = nvme_error_status(rq->errors);
685 if (result)
686 *result = rq->errors & 0x7ff;
687 if (status)
688 *status = le64_to_cpu(nvme_req(rq)->result.u64);
689
690 if (metadata && !ret && !write) {
691 if (copy_to_user(meta_buf, (void *)metadata, meta_len))
692 ret = -EFAULT;
693 }
694err_meta:
695 if (meta_buf && meta_len)
696 dma_pool_free(dev->dma_pool, metadata, metadata_dma);
697err_map:
698 if (bio) {
699 if (disk && bio->bi_bdev)
700 bdput(bio->bi_bdev);
701 blk_rq_unmap_user(bio);
702 }
703err_ppa:
704 if (ppa_buf && ppa_len)
705 dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
706err_rq:
707 blk_mq_free_request(rq);
708err_cmd:
709 return ret;
710}
711
712static int nvme_nvm_submit_vio(struct nvme_ns *ns,
713 struct nvm_user_vio __user *uvio)
714{
715 struct nvm_user_vio vio;
716 struct nvme_nvm_command c;
717 unsigned int length;
718 int ret;
719
720 if (copy_from_user(&vio, uvio, sizeof(vio)))
721 return -EFAULT;
722 if (vio.flags)
723 return -EINVAL;
724
725 memset(&c, 0, sizeof(c));
726 c.ph_rw.opcode = vio.opcode;
727 c.ph_rw.nsid = cpu_to_le32(ns->ns_id);
728 c.ph_rw.control = cpu_to_le16(vio.control);
729 c.ph_rw.length = cpu_to_le16(vio.nppas);
730
731 length = (vio.nppas + 1) << ns->lba_shift;
732
733 ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
734 (void __user *)(uintptr_t)vio.addr, length,
735 (void __user *)(uintptr_t)vio.metadata,
736 vio.metadata_len,
737 (void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
738 &vio.result, &vio.status, 0);
739
740 if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
741 return -EFAULT;
742
743 return ret;
744}
745
746static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
747 struct nvm_passthru_vio __user *uvcmd)
748{
749 struct nvm_passthru_vio vcmd;
750 struct nvme_nvm_command c;
751 struct request_queue *q;
752 unsigned int timeout = 0;
753 int ret;
754
755 if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
756 return -EFAULT;
757 if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
758 return -EACCES;
759 if (vcmd.flags)
760 return -EINVAL;
761
762 memset(&c, 0, sizeof(c));
763 c.common.opcode = vcmd.opcode;
764 c.common.nsid = cpu_to_le32(ns->ns_id);
765 c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
766 c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
767 /* cdw11-12 */
768 c.ph_rw.length = cpu_to_le16(vcmd.nppas);
769 c.ph_rw.control = cpu_to_le32(vcmd.control);
770 c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
771 c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
772 c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
773
774 if (vcmd.timeout_ms)
775 timeout = msecs_to_jiffies(vcmd.timeout_ms);
776
777 q = admin ? ns->ctrl->admin_q : ns->queue;
778
779 ret = nvme_nvm_submit_user_cmd(q, ns,
780 (struct nvme_nvm_command *)&c,
781 (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
782 (void __user *)(uintptr_t)vcmd.metadata,
783 vcmd.metadata_len,
784 (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
785 &vcmd.result, &vcmd.status, timeout);
786
787 if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
788 return -EFAULT;
789
790 return ret;
791}
792
793int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
794{
795 switch (cmd) {
796 case NVME_NVM_IOCTL_ADMIN_VIO:
797 return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg);
798 case NVME_NVM_IOCTL_IO_VIO:
799 return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg);
800 case NVME_NVM_IOCTL_SUBMIT_VIO:
801 return nvme_nvm_submit_vio(ns, (void __user *)arg);
802 default:
803 return -ENOTTY;
804 }
805}
806
589int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) 807int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
590{ 808{
591 struct request_queue *q = ns->queue; 809 struct request_queue *q = ns->queue;
@@ -622,7 +840,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
622 return 0; 840 return 0;
623 841
624 id = &ndev->identity; 842 id = &ndev->identity;
625 grp = &id->groups[0]; 843 grp = &id->grp;
626 attr = &dattr->attr; 844 attr = &dattr->attr;
627 845
628 if (strcmp(attr->name, "version") == 0) { 846 if (strcmp(attr->name, "version") == 0) {
@@ -633,10 +851,9 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
633 return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); 851 return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
634 } else if (strcmp(attr->name, "device_mode") == 0) { 852 } else if (strcmp(attr->name, "device_mode") == 0) {
635 return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); 853 return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
854 /* kept for compatibility */
636 } else if (strcmp(attr->name, "media_manager") == 0) { 855 } else if (strcmp(attr->name, "media_manager") == 0) {
637 if (!ndev->mt) 856 return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
638 return scnprintf(page, PAGE_SIZE, "%s\n", "none");
639 return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name);
640 } else if (strcmp(attr->name, "ppa_format") == 0) { 857 } else if (strcmp(attr->name, "ppa_format") == 0) {
641 return scnprintf(page, PAGE_SIZE, 858 return scnprintf(page, PAGE_SIZE,
642 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", 859 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index aead6d08ed2c..14cfc6f7facb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,6 +19,7 @@
19#include <linux/kref.h> 19#include <linux/kref.h>
20#include <linux/blk-mq.h> 20#include <linux/blk-mq.h>
21#include <linux/lightnvm.h> 21#include <linux/lightnvm.h>
22#include <linux/sed-opal.h>
22 23
23enum { 24enum {
24 /* 25 /*
@@ -125,6 +126,8 @@ struct nvme_ctrl {
125 struct list_head node; 126 struct list_head node;
126 struct ida ns_ida; 127 struct ida ns_ida;
127 128
129 struct opal_dev *opal_dev;
130
128 char name[12]; 131 char name[12];
129 char serial[20]; 132 char serial[20];
130 char model[40]; 133 char model[40];
@@ -137,6 +140,7 @@ struct nvme_ctrl {
137 u32 max_hw_sectors; 140 u32 max_hw_sectors;
138 u16 oncs; 141 u16 oncs;
139 u16 vid; 142 u16 vid;
143 u16 oacs;
140 atomic_t abort_limit; 144 atomic_t abort_limit;
141 u8 event_limit; 145 u8 event_limit;
142 u8 vwc; 146 u8 vwc;
@@ -267,6 +271,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl);
267void nvme_queue_scan(struct nvme_ctrl *ctrl); 271void nvme_queue_scan(struct nvme_ctrl *ctrl);
268void nvme_remove_namespaces(struct nvme_ctrl *ctrl); 272void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
269 273
274int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
275 bool send);
276
270#define NVME_NR_AERS 1 277#define NVME_NR_AERS 1
271void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, 278void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
272 union nvme_result *res); 279 union nvme_result *res);
@@ -318,6 +325,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
318void nvme_nvm_unregister(struct nvme_ns *ns); 325void nvme_nvm_unregister(struct nvme_ns *ns);
319int nvme_nvm_register_sysfs(struct nvme_ns *ns); 326int nvme_nvm_register_sysfs(struct nvme_ns *ns);
320void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); 327void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
328int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
321#else 329#else
322static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, 330static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
323 int node) 331 int node)
@@ -335,6 +343,11 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i
335{ 343{
336 return 0; 344 return 0;
337} 345}
346static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
347 unsigned long arg)
348{
349 return -ENOTTY;
350}
338#endif /* CONFIG_NVM */ 351#endif /* CONFIG_NVM */
339 352
340static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) 353static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3faefabf339c..ddc51adb594d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -43,6 +43,7 @@
43#include <linux/types.h> 43#include <linux/types.h>
44#include <linux/io-64-nonatomic-lo-hi.h> 44#include <linux/io-64-nonatomic-lo-hi.h>
45#include <asm/unaligned.h> 45#include <asm/unaligned.h>
46#include <linux/sed-opal.h>
46 47
47#include "nvme.h" 48#include "nvme.h"
48 49
@@ -588,7 +589,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
588 */ 589 */
589 if (ns && ns->ms && !blk_integrity_rq(req)) { 590 if (ns && ns->ms && !blk_integrity_rq(req)) {
590 if (!(ns->pi_type && ns->ms == 8) && 591 if (!(ns->pi_type && ns->ms == 8) &&
591 req->cmd_type != REQ_TYPE_DRV_PRIV) { 592 !blk_rq_is_passthrough(req)) {
592 blk_mq_end_request(req, -EFAULT); 593 blk_mq_end_request(req, -EFAULT);
593 return BLK_MQ_RQ_QUEUE_OK; 594 return BLK_MQ_RQ_QUEUE_OK;
594 } 595 }
@@ -645,7 +646,7 @@ static void nvme_complete_rq(struct request *req)
645 return; 646 return;
646 } 647 }
647 648
648 if (req->cmd_type == REQ_TYPE_DRV_PRIV) 649 if (blk_rq_is_passthrough(req))
649 error = req->errors; 650 error = req->errors;
650 else 651 else
651 error = nvme_error_status(req->errors); 652 error = nvme_error_status(req->errors);
@@ -895,12 +896,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
895 return BLK_EH_HANDLED; 896 return BLK_EH_HANDLED;
896 } 897 }
897 898
898 iod->aborted = 1;
899
900 if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { 899 if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
901 atomic_inc(&dev->ctrl.abort_limit); 900 atomic_inc(&dev->ctrl.abort_limit);
902 return BLK_EH_RESET_TIMER; 901 return BLK_EH_RESET_TIMER;
903 } 902 }
903 iod->aborted = 1;
904 904
905 memset(&cmd, 0, sizeof(cmd)); 905 memset(&cmd, 0, sizeof(cmd));
906 cmd.abort.opcode = nvme_admin_abort_cmd; 906 cmd.abort.opcode = nvme_admin_abort_cmd;
@@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
1178 dev->admin_tagset.timeout = ADMIN_TIMEOUT; 1178 dev->admin_tagset.timeout = ADMIN_TIMEOUT;
1179 dev->admin_tagset.numa_node = dev_to_node(dev->dev); 1179 dev->admin_tagset.numa_node = dev_to_node(dev->dev);
1180 dev->admin_tagset.cmd_size = nvme_cmd_size(dev); 1180 dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
1181 dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
1181 dev->admin_tagset.driver_data = dev; 1182 dev->admin_tagset.driver_data = dev;
1182 1183
1183 if (blk_mq_alloc_tag_set(&dev->admin_tagset)) 1184 if (blk_mq_alloc_tag_set(&dev->admin_tagset))
@@ -1738,6 +1739,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
1738 if (dev->ctrl.admin_q) 1739 if (dev->ctrl.admin_q)
1739 blk_put_queue(dev->ctrl.admin_q); 1740 blk_put_queue(dev->ctrl.admin_q);
1740 kfree(dev->queues); 1741 kfree(dev->queues);
1742 kfree(dev->ctrl.opal_dev);
1741 kfree(dev); 1743 kfree(dev);
1742} 1744}
1743 1745
@@ -1754,6 +1756,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
1754static void nvme_reset_work(struct work_struct *work) 1756static void nvme_reset_work(struct work_struct *work)
1755{ 1757{
1756 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); 1758 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
1759 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
1757 int result = -ENODEV; 1760 int result = -ENODEV;
1758 1761
1759 if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) 1762 if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
@@ -1786,6 +1789,14 @@ static void nvme_reset_work(struct work_struct *work)
1786 if (result) 1789 if (result)
1787 goto out; 1790 goto out;
1788 1791
1792 if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
1793 dev->ctrl.opal_dev =
1794 init_opal_dev(&dev->ctrl, &nvme_sec_submit);
1795 }
1796
1797 if (was_suspend)
1798 opal_unlock_from_suspend(dev->ctrl.opal_dev);
1799
1789 result = nvme_setup_io_queues(dev); 1800 result = nvme_setup_io_queues(dev);
1790 if (result) 1801 if (result)
1791 goto out; 1802 goto out;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 557f29b1f1bb..a75e95d42b3f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1423,7 +1423,7 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
1423 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { 1423 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
1424 struct nvme_command *cmd = nvme_req(rq)->cmd; 1424 struct nvme_command *cmd = nvme_req(rq)->cmd;
1425 1425
1426 if (rq->cmd_type != REQ_TYPE_DRV_PRIV || 1426 if (!blk_rq_is_passthrough(rq) ||
1427 cmd->common.opcode != nvme_fabrics_command || 1427 cmd->common.opcode != nvme_fabrics_command ||
1428 cmd->fabrics.fctype != nvme_fabrics_type_connect) 1428 cmd->fabrics.fctype != nvme_fabrics_type_connect)
1429 return false; 1429 return false;
@@ -1471,7 +1471,7 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1471 ib_dma_sync_single_for_device(dev, sqe->dma, 1471 ib_dma_sync_single_for_device(dev, sqe->dma,
1472 sizeof(struct nvme_command), DMA_TO_DEVICE); 1472 sizeof(struct nvme_command), DMA_TO_DEVICE);
1473 1473
1474 if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH) 1474 if (req_op(rq) == REQ_OP_FLUSH)
1475 flush = true; 1475 flush = true;
1476 ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, 1476 ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
1477 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); 1477 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
@@ -1522,7 +1522,7 @@ static void nvme_rdma_complete_rq(struct request *rq)
1522 return; 1522 return;
1523 } 1523 }
1524 1524
1525 if (rq->cmd_type == REQ_TYPE_DRV_PRIV) 1525 if (blk_rq_is_passthrough(rq))
1526 error = rq->errors; 1526 error = rq->errors;
1527 else 1527 else
1528 error = nvme_error_status(rq->errors); 1528 error = nvme_error_status(rq->errors);
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c
index a5c09e703bd8..f49ae2758bb7 100644
--- a/drivers/nvme/host/scsi.c
+++ b/drivers/nvme/host/scsi.c
@@ -43,6 +43,7 @@
43#include <asm/unaligned.h> 43#include <asm/unaligned.h>
44#include <scsi/sg.h> 44#include <scsi/sg.h>
45#include <scsi/scsi.h> 45#include <scsi/scsi.h>
46#include <scsi/scsi_request.h>
46 47
47#include "nvme.h" 48#include "nvme.h"
48 49
@@ -2347,12 +2348,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2347 2348
2348static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) 2349static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
2349{ 2350{
2350 u8 cmd[BLK_MAX_CDB]; 2351 u8 cmd[16];
2351 int retcode; 2352 int retcode;
2352 unsigned int opcode; 2353 unsigned int opcode;
2353 2354
2354 if (hdr->cmdp == NULL) 2355 if (hdr->cmdp == NULL)
2355 return -EMSGSIZE; 2356 return -EMSGSIZE;
2357 if (hdr->cmd_len > sizeof(cmd))
2358 return -EINVAL;
2356 if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len)) 2359 if (copy_from_user(cmd, hdr->cmdp, hdr->cmd_len))
2357 return -EFAULT; 2360 return -EFAULT;
2358 2361
@@ -2451,8 +2454,6 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
2451 return -EFAULT; 2454 return -EFAULT;
2452 if (hdr.interface_id != 'S') 2455 if (hdr.interface_id != 'S')
2453 return -EINVAL; 2456 return -EINVAL;
2454 if (hdr.cmd_len > BLK_MAX_CDB)
2455 return -EINVAL;
2456 2457
2457 /* 2458 /*
2458 * A positive return code means a NVMe status, which has been 2459 * A positive return code means a NVMe status, which has been
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 9aaa70071ae5..f3862e38f574 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -104,7 +104,7 @@ static void nvme_loop_complete_rq(struct request *req)
104 return; 104 return;
105 } 105 }
106 106
107 if (req->cmd_type == REQ_TYPE_DRV_PRIV) 107 if (blk_rq_is_passthrough(req))
108 error = req->errors; 108 error = req->errors;
109 else 109 else
110 error = nvme_error_status(req->errors); 110 error = nvme_error_status(req->errors);
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 9f16ea6964ec..152de6817875 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -300,13 +300,6 @@ static void scm_blk_request(struct request_queue *rq)
300 struct request *req; 300 struct request *req;
301 301
302 while ((req = blk_peek_request(rq))) { 302 while ((req = blk_peek_request(rq))) {
303 if (req->cmd_type != REQ_TYPE_FS) {
304 blk_start_request(req);
305 blk_dump_rq_flags(req, KMSG_COMPONENT " bad request");
306 __blk_end_request_all(req, -EIO);
307 continue;
308 }
309
310 if (!scm_permit_request(bdev, req)) 303 if (!scm_permit_request(bdev, req))
311 goto out; 304 goto out;
312 305
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index a4f6b0d95515..d4023bf1e739 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -18,6 +18,7 @@ config SCSI
18 depends on BLOCK 18 depends on BLOCK
19 select SCSI_DMA if HAS_DMA 19 select SCSI_DMA if HAS_DMA
20 select SG_POOL 20 select SG_POOL
21 select BLK_SCSI_REQUEST
21 ---help--- 22 ---help---
22 If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or 23 If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
23 any other SCSI device under Linux, say Y and make sure that you know 24 any other SCSI device under Linux, say Y and make sure that you know
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index 5b80746980b8..4a7679f6c73d 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -88,12 +88,6 @@ struct clariion_dh_data {
88 */ 88 */
89 unsigned char buffer[CLARIION_BUFFER_SIZE]; 89 unsigned char buffer[CLARIION_BUFFER_SIZE];
90 /* 90 /*
91 * SCSI sense buffer for commands -- assumes serial issuance
92 * and completion sequence of all commands for same multipath.
93 */
94 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
95 unsigned int senselen;
96 /*
97 * LUN state 91 * LUN state
98 */ 92 */
99 int lun_state; 93 int lun_state;
@@ -116,44 +110,38 @@ struct clariion_dh_data {
116/* 110/*
117 * Parse MODE_SELECT cmd reply. 111 * Parse MODE_SELECT cmd reply.
118 */ 112 */
119static int trespass_endio(struct scsi_device *sdev, char *sense) 113static int trespass_endio(struct scsi_device *sdev,
114 struct scsi_sense_hdr *sshdr)
120{ 115{
121 int err = SCSI_DH_IO; 116 int err = SCSI_DH_IO;
122 struct scsi_sense_hdr sshdr;
123
124 if (!scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr)) {
125 sdev_printk(KERN_ERR, sdev, "%s: Found valid sense data 0x%2x, "
126 "0x%2x, 0x%2x while sending CLARiiON trespass "
127 "command.\n", CLARIION_NAME, sshdr.sense_key,
128 sshdr.asc, sshdr.ascq);
129 117
130 if ((sshdr.sense_key == 0x05) && (sshdr.asc == 0x04) && 118 sdev_printk(KERN_ERR, sdev, "%s: Found valid sense data 0x%2x, "
131 (sshdr.ascq == 0x00)) { 119 "0x%2x, 0x%2x while sending CLARiiON trespass "
132 /* 120 "command.\n", CLARIION_NAME, sshdr->sense_key,
133 * Array based copy in progress -- do not send 121 sshdr->asc, sshdr->ascq);
134 * mode_select or copy will be aborted mid-stream. 122
135 */ 123 if (sshdr->sense_key == 0x05 && sshdr->asc == 0x04 &&
136 sdev_printk(KERN_INFO, sdev, "%s: Array Based Copy in " 124 sshdr->ascq == 0x00) {
137 "progress while sending CLARiiON trespass " 125 /*
138 "command.\n", CLARIION_NAME); 126 * Array based copy in progress -- do not send
139 err = SCSI_DH_DEV_TEMP_BUSY; 127 * mode_select or copy will be aborted mid-stream.
140 } else if ((sshdr.sense_key == 0x02) && (sshdr.asc == 0x04) && 128 */
141 (sshdr.ascq == 0x03)) { 129 sdev_printk(KERN_INFO, sdev, "%s: Array Based Copy in "
142 /* 130 "progress while sending CLARiiON trespass "
143 * LUN Not Ready - Manual Intervention Required 131 "command.\n", CLARIION_NAME);
144 * indicates in-progress ucode upgrade (NDU). 132 err = SCSI_DH_DEV_TEMP_BUSY;
145 */ 133 } else if (sshdr->sense_key == 0x02 && sshdr->asc == 0x04 &&
146 sdev_printk(KERN_INFO, sdev, "%s: Detected in-progress " 134 sshdr->ascq == 0x03) {
147 "ucode upgrade NDU operation while sending " 135 /*
148 "CLARiiON trespass command.\n", CLARIION_NAME); 136 * LUN Not Ready - Manual Intervention Required
149 err = SCSI_DH_DEV_TEMP_BUSY; 137 * indicates in-progress ucode upgrade (NDU).
150 } else 138 */
151 err = SCSI_DH_DEV_FAILED; 139 sdev_printk(KERN_INFO, sdev, "%s: Detected in-progress "
152 } else { 140 "ucode upgrade NDU operation while sending "
153 sdev_printk(KERN_INFO, sdev, 141 "CLARiiON trespass command.\n", CLARIION_NAME);
154 "%s: failed to send MODE SELECT, no sense available\n", 142 err = SCSI_DH_DEV_TEMP_BUSY;
155 CLARIION_NAME); 143 } else
156 } 144 err = SCSI_DH_DEV_FAILED;
157 return err; 145 return err;
158} 146}
159 147
@@ -257,103 +245,15 @@ out:
257 return sp_model; 245 return sp_model;
258} 246}
259 247
260/*
261 * Get block request for REQ_BLOCK_PC command issued to path. Currently
262 * limited to MODE_SELECT (trespass) and INQUIRY (VPD page 0xC0) commands.
263 *
264 * Uses data and sense buffers in hardware handler context structure and
265 * assumes serial servicing of commands, both issuance and completion.
266 */
267static struct request *get_req(struct scsi_device *sdev, int cmd,
268 unsigned char *buffer)
269{
270 struct request *rq;
271 int len = 0;
272
273 rq = blk_get_request(sdev->request_queue,
274 (cmd != INQUIRY) ? WRITE : READ, GFP_NOIO);
275 if (IS_ERR(rq)) {
276 sdev_printk(KERN_INFO, sdev, "get_req: blk_get_request failed");
277 return NULL;
278 }
279
280 blk_rq_set_block_pc(rq);
281 rq->cmd_len = COMMAND_SIZE(cmd);
282 rq->cmd[0] = cmd;
283
284 switch (cmd) {
285 case MODE_SELECT:
286 len = sizeof(short_trespass);
287 rq->cmd[1] = 0x10;
288 rq->cmd[4] = len;
289 break;
290 case MODE_SELECT_10:
291 len = sizeof(long_trespass);
292 rq->cmd[1] = 0x10;
293 rq->cmd[8] = len;
294 break;
295 case INQUIRY:
296 len = CLARIION_BUFFER_SIZE;
297 rq->cmd[4] = len;
298 memset(buffer, 0, len);
299 break;
300 default:
301 BUG_ON(1);
302 break;
303 }
304
305 rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
306 REQ_FAILFAST_DRIVER;
307 rq->timeout = CLARIION_TIMEOUT;
308 rq->retries = CLARIION_RETRIES;
309
310 if (blk_rq_map_kern(rq->q, rq, buffer, len, GFP_NOIO)) {
311 blk_put_request(rq);
312 return NULL;
313 }
314
315 return rq;
316}
317
318static int send_inquiry_cmd(struct scsi_device *sdev, int page,
319 struct clariion_dh_data *csdev)
320{
321 struct request *rq = get_req(sdev, INQUIRY, csdev->buffer);
322 int err;
323
324 if (!rq)
325 return SCSI_DH_RES_TEMP_UNAVAIL;
326
327 rq->sense = csdev->sense;
328 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
329 rq->sense_len = csdev->senselen = 0;
330
331 rq->cmd[0] = INQUIRY;
332 if (page != 0) {
333 rq->cmd[1] = 1;
334 rq->cmd[2] = page;
335 }
336 err = blk_execute_rq(sdev->request_queue, NULL, rq, 1);
337 if (err == -EIO) {
338 sdev_printk(KERN_INFO, sdev,
339 "%s: failed to send %s INQUIRY: %x\n",
340 CLARIION_NAME, page?"EVPD":"standard",
341 rq->errors);
342 csdev->senselen = rq->sense_len;
343 err = SCSI_DH_IO;
344 }
345
346 blk_put_request(rq);
347
348 return err;
349}
350
351static int send_trespass_cmd(struct scsi_device *sdev, 248static int send_trespass_cmd(struct scsi_device *sdev,
352 struct clariion_dh_data *csdev) 249 struct clariion_dh_data *csdev)
353{ 250{
354 struct request *rq;
355 unsigned char *page22; 251 unsigned char *page22;
356 int err, len, cmd; 252 unsigned char cdb[COMMAND_SIZE(MODE_SELECT)];
253 int err, res = SCSI_DH_OK, len;
254 struct scsi_sense_hdr sshdr;
255 u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
256 REQ_FAILFAST_DRIVER;
357 257
358 if (csdev->flags & CLARIION_SHORT_TRESPASS) { 258 if (csdev->flags & CLARIION_SHORT_TRESPASS) {
359 page22 = short_trespass; 259 page22 = short_trespass;
@@ -361,40 +261,37 @@ static int send_trespass_cmd(struct scsi_device *sdev,
361 /* Set Honor Reservations bit */ 261 /* Set Honor Reservations bit */
362 page22[6] |= 0x80; 262 page22[6] |= 0x80;
363 len = sizeof(short_trespass); 263 len = sizeof(short_trespass);
364 cmd = MODE_SELECT; 264 cdb[0] = MODE_SELECT;
265 cdb[1] = 0x10;
266 cdb[4] = len;
365 } else { 267 } else {
366 page22 = long_trespass; 268 page22 = long_trespass;
367 if (!(csdev->flags & CLARIION_HONOR_RESERVATIONS)) 269 if (!(csdev->flags & CLARIION_HONOR_RESERVATIONS))
368 /* Set Honor Reservations bit */ 270 /* Set Honor Reservations bit */
369 page22[10] |= 0x80; 271 page22[10] |= 0x80;
370 len = sizeof(long_trespass); 272 len = sizeof(long_trespass);
371 cmd = MODE_SELECT_10; 273 cdb[0] = MODE_SELECT_10;
274 cdb[8] = len;
372 } 275 }
373 BUG_ON((len > CLARIION_BUFFER_SIZE)); 276 BUG_ON((len > CLARIION_BUFFER_SIZE));
374 memcpy(csdev->buffer, page22, len); 277 memcpy(csdev->buffer, page22, len);
375 278
376 rq = get_req(sdev, cmd, csdev->buffer); 279 err = scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
377 if (!rq) 280 csdev->buffer, len, &sshdr,
378 return SCSI_DH_RES_TEMP_UNAVAIL; 281 CLARIION_TIMEOUT * HZ, CLARIION_RETRIES,
379 282 NULL, req_flags, 0);
380 rq->sense = csdev->sense; 283 if (err) {
381 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); 284 if (scsi_sense_valid(&sshdr))
382 rq->sense_len = csdev->senselen = 0; 285 res = trespass_endio(sdev, &sshdr);
383 286 else {
384 err = blk_execute_rq(sdev->request_queue, NULL, rq, 1);
385 if (err == -EIO) {
386 if (rq->sense_len) {
387 err = trespass_endio(sdev, csdev->sense);
388 } else {
389 sdev_printk(KERN_INFO, sdev, 287 sdev_printk(KERN_INFO, sdev,
390 "%s: failed to send MODE SELECT: %x\n", 288 "%s: failed to send MODE SELECT: %x\n",
391 CLARIION_NAME, rq->errors); 289 CLARIION_NAME, err);
290 res = SCSI_DH_IO;
392 } 291 }
393 } 292 }
394 293
395 blk_put_request(rq); 294 return res;
396
397 return err;
398} 295}
399 296
400static int clariion_check_sense(struct scsi_device *sdev, 297static int clariion_check_sense(struct scsi_device *sdev,
@@ -464,21 +361,7 @@ static int clariion_std_inquiry(struct scsi_device *sdev,
464 int err; 361 int err;
465 char *sp_model; 362 char *sp_model;
466 363
467 err = send_inquiry_cmd(sdev, 0, csdev); 364 sp_model = parse_sp_model(sdev, sdev->inquiry);
468 if (err != SCSI_DH_OK && csdev->senselen) {
469 struct scsi_sense_hdr sshdr;
470
471 if (scsi_normalize_sense(csdev->sense, SCSI_SENSE_BUFFERSIZE,
472 &sshdr)) {
473 sdev_printk(KERN_ERR, sdev, "%s: INQUIRY sense code "
474 "%02x/%02x/%02x\n", CLARIION_NAME,
475 sshdr.sense_key, sshdr.asc, sshdr.ascq);
476 }
477 err = SCSI_DH_IO;
478 goto out;
479 }
480
481 sp_model = parse_sp_model(sdev, csdev->buffer);
482 if (!sp_model) { 365 if (!sp_model) {
483 err = SCSI_DH_DEV_UNSUPP; 366 err = SCSI_DH_DEV_UNSUPP;
484 goto out; 367 goto out;
@@ -500,30 +383,12 @@ out:
500static int clariion_send_inquiry(struct scsi_device *sdev, 383static int clariion_send_inquiry(struct scsi_device *sdev,
501 struct clariion_dh_data *csdev) 384 struct clariion_dh_data *csdev)
502{ 385{
503 int err, retry = CLARIION_RETRIES; 386 int err = SCSI_DH_IO;
504 387
505retry: 388 if (!scsi_get_vpd_page(sdev, 0xC0, csdev->buffer,
506 err = send_inquiry_cmd(sdev, 0xC0, csdev); 389 CLARIION_BUFFER_SIZE))
507 if (err != SCSI_DH_OK && csdev->senselen) {
508 struct scsi_sense_hdr sshdr;
509
510 err = scsi_normalize_sense(csdev->sense, SCSI_SENSE_BUFFERSIZE,
511 &sshdr);
512 if (!err)
513 return SCSI_DH_IO;
514
515 err = clariion_check_sense(sdev, &sshdr);
516 if (retry > 0 && err == ADD_TO_MLQUEUE) {
517 retry--;
518 goto retry;
519 }
520 sdev_printk(KERN_ERR, sdev, "%s: INQUIRY sense code "
521 "%02x/%02x/%02x\n", CLARIION_NAME,
522 sshdr.sense_key, sshdr.asc, sshdr.ascq);
523 err = SCSI_DH_IO;
524 } else {
525 err = parse_sp_info_reply(sdev, csdev); 390 err = parse_sp_info_reply(sdev, csdev);
526 } 391
527 return err; 392 return err;
528} 393}
529 394
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 308e87195dc1..be43c940636d 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -38,13 +38,10 @@
38#define HP_SW_PATH_PASSIVE 1 38#define HP_SW_PATH_PASSIVE 1
39 39
40struct hp_sw_dh_data { 40struct hp_sw_dh_data {
41 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
42 int path_state; 41 int path_state;
43 int retries; 42 int retries;
44 int retry_cnt; 43 int retry_cnt;
45 struct scsi_device *sdev; 44 struct scsi_device *sdev;
46 activate_complete callback_fn;
47 void *callback_data;
48}; 45};
49 46
50static int hp_sw_start_stop(struct hp_sw_dh_data *); 47static int hp_sw_start_stop(struct hp_sw_dh_data *);
@@ -56,43 +53,34 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *);
56 * 53 *
57 * Returns SCSI_DH_DEV_OFFLINED if the sdev is on the passive path 54 * Returns SCSI_DH_DEV_OFFLINED if the sdev is on the passive path
58 */ 55 */
59static int tur_done(struct scsi_device *sdev, unsigned char *sense) 56static int tur_done(struct scsi_device *sdev, struct hp_sw_dh_data *h,
57 struct scsi_sense_hdr *sshdr)
60{ 58{
61 struct scsi_sense_hdr sshdr; 59 int ret = SCSI_DH_IO;
62 int ret;
63 60
64 ret = scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr); 61 switch (sshdr->sense_key) {
65 if (!ret) {
66 sdev_printk(KERN_WARNING, sdev,
67 "%s: sending tur failed, no sense available\n",
68 HP_SW_NAME);
69 ret = SCSI_DH_IO;
70 goto done;
71 }
72 switch (sshdr.sense_key) {
73 case UNIT_ATTENTION: 62 case UNIT_ATTENTION:
74 ret = SCSI_DH_IMM_RETRY; 63 ret = SCSI_DH_IMM_RETRY;
75 break; 64 break;
76 case NOT_READY: 65 case NOT_READY:
77 if ((sshdr.asc == 0x04) && (sshdr.ascq == 2)) { 66 if (sshdr->asc == 0x04 && sshdr->ascq == 2) {
78 /* 67 /*
79 * LUN not ready - Initialization command required 68 * LUN not ready - Initialization command required
80 * 69 *
81 * This is the passive path 70 * This is the passive path
82 */ 71 */
83 ret = SCSI_DH_DEV_OFFLINED; 72 h->path_state = HP_SW_PATH_PASSIVE;
73 ret = SCSI_DH_OK;
84 break; 74 break;
85 } 75 }
86 /* Fallthrough */ 76 /* Fallthrough */
87 default: 77 default:
88 sdev_printk(KERN_WARNING, sdev, 78 sdev_printk(KERN_WARNING, sdev,
89 "%s: sending tur failed, sense %x/%x/%x\n", 79 "%s: sending tur failed, sense %x/%x/%x\n",
90 HP_SW_NAME, sshdr.sense_key, sshdr.asc, 80 HP_SW_NAME, sshdr->sense_key, sshdr->asc,
91 sshdr.ascq); 81 sshdr->ascq);
92 break; 82 break;
93 } 83 }
94
95done:
96 return ret; 84 return ret;
97} 85}
98 86
@@ -105,131 +93,36 @@ done:
105 */ 93 */
106static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) 94static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h)
107{ 95{
108 struct request *req; 96 unsigned char cmd[6] = { TEST_UNIT_READY };
109 int ret; 97 struct scsi_sense_hdr sshdr;
98 int ret = SCSI_DH_OK, res;
99 u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
100 REQ_FAILFAST_DRIVER;
110 101
111retry: 102retry:
112 req = blk_get_request(sdev->request_queue, WRITE, GFP_NOIO); 103 res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
113 if (IS_ERR(req)) 104 HP_SW_TIMEOUT, HP_SW_RETRIES,
114 return SCSI_DH_RES_TEMP_UNAVAIL; 105 NULL, req_flags, 0);
115 106 if (res) {
116 blk_rq_set_block_pc(req); 107 if (scsi_sense_valid(&sshdr))
117 req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 108 ret = tur_done(sdev, h, &sshdr);
118 REQ_FAILFAST_DRIVER; 109 else {
119 req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
120 req->cmd[0] = TEST_UNIT_READY;
121 req->timeout = HP_SW_TIMEOUT;
122 req->sense = h->sense;
123 memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
124 req->sense_len = 0;
125
126 ret = blk_execute_rq(req->q, NULL, req, 1);
127 if (ret == -EIO) {
128 if (req->sense_len > 0) {
129 ret = tur_done(sdev, h->sense);
130 } else {
131 sdev_printk(KERN_WARNING, sdev, 110 sdev_printk(KERN_WARNING, sdev,
132 "%s: sending tur failed with %x\n", 111 "%s: sending tur failed with %x\n",
133 HP_SW_NAME, req->errors); 112 HP_SW_NAME, res);
134 ret = SCSI_DH_IO; 113 ret = SCSI_DH_IO;
135 } 114 }
136 } else { 115 } else {
137 h->path_state = HP_SW_PATH_ACTIVE; 116 h->path_state = HP_SW_PATH_ACTIVE;
138 ret = SCSI_DH_OK; 117 ret = SCSI_DH_OK;
139 } 118 }
140 if (ret == SCSI_DH_IMM_RETRY) { 119 if (ret == SCSI_DH_IMM_RETRY)
141 blk_put_request(req);
142 goto retry; 120 goto retry;
143 }
144 if (ret == SCSI_DH_DEV_OFFLINED) {
145 h->path_state = HP_SW_PATH_PASSIVE;
146 ret = SCSI_DH_OK;
147 }
148
149 blk_put_request(req);
150 121
151 return ret; 122 return ret;
152} 123}
153 124
154/* 125/*
155 * start_done - Handle START STOP UNIT return status
156 * @sdev: sdev the command has been sent to
157 * @errors: blk error code
158 */
159static int start_done(struct scsi_device *sdev, unsigned char *sense)
160{
161 struct scsi_sense_hdr sshdr;
162 int rc;
163
164 rc = scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr);
165 if (!rc) {
166 sdev_printk(KERN_WARNING, sdev,
167 "%s: sending start_stop_unit failed, "
168 "no sense available\n",
169 HP_SW_NAME);
170 return SCSI_DH_IO;
171 }
172 switch (sshdr.sense_key) {
173 case NOT_READY:
174 if ((sshdr.asc == 0x04) && (sshdr.ascq == 3)) {
175 /*
176 * LUN not ready - manual intervention required
177 *
178 * Switch-over in progress, retry.
179 */
180 rc = SCSI_DH_RETRY;
181 break;
182 }
183 /* fall through */
184 default:
185 sdev_printk(KERN_WARNING, sdev,
186 "%s: sending start_stop_unit failed, sense %x/%x/%x\n",
187 HP_SW_NAME, sshdr.sense_key, sshdr.asc,
188 sshdr.ascq);
189 rc = SCSI_DH_IO;
190 }
191
192 return rc;
193}
194
195static void start_stop_endio(struct request *req, int error)
196{
197 struct hp_sw_dh_data *h = req->end_io_data;
198 unsigned err = SCSI_DH_OK;
199
200 if (error || host_byte(req->errors) != DID_OK ||
201 msg_byte(req->errors) != COMMAND_COMPLETE) {
202 sdev_printk(KERN_WARNING, h->sdev,
203 "%s: sending start_stop_unit failed with %x\n",
204 HP_SW_NAME, req->errors);
205 err = SCSI_DH_IO;
206 goto done;
207 }
208
209 if (req->sense_len > 0) {
210 err = start_done(h->sdev, h->sense);
211 if (err == SCSI_DH_RETRY) {
212 err = SCSI_DH_IO;
213 if (--h->retry_cnt) {
214 blk_put_request(req);
215 err = hp_sw_start_stop(h);
216 if (err == SCSI_DH_OK)
217 return;
218 }
219 }
220 }
221done:
222 req->end_io_data = NULL;
223 __blk_put_request(req->q, req);
224 if (h->callback_fn) {
225 h->callback_fn(h->callback_data, err);
226 h->callback_fn = h->callback_data = NULL;
227 }
228 return;
229
230}
231
232/*
233 * hp_sw_start_stop - Send START STOP UNIT command 126 * hp_sw_start_stop - Send START STOP UNIT command
234 * @sdev: sdev command should be sent to 127 * @sdev: sdev command should be sent to
235 * 128 *
@@ -237,26 +130,48 @@ done:
237 */ 130 */
238static int hp_sw_start_stop(struct hp_sw_dh_data *h) 131static int hp_sw_start_stop(struct hp_sw_dh_data *h)
239{ 132{
240 struct request *req; 133 unsigned char cmd[6] = { START_STOP, 0, 0, 0, 1, 0 };
241 134 struct scsi_sense_hdr sshdr;
242 req = blk_get_request(h->sdev->request_queue, WRITE, GFP_ATOMIC); 135 struct scsi_device *sdev = h->sdev;
243 if (IS_ERR(req)) 136 int res, rc = SCSI_DH_OK;
244 return SCSI_DH_RES_TEMP_UNAVAIL; 137 int retry_cnt = HP_SW_RETRIES;
245 138 u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
246 blk_rq_set_block_pc(req); 139 REQ_FAILFAST_DRIVER;
247 req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
248 REQ_FAILFAST_DRIVER;
249 req->cmd_len = COMMAND_SIZE(START_STOP);
250 req->cmd[0] = START_STOP;
251 req->cmd[4] = 1; /* Start spin cycle */
252 req->timeout = HP_SW_TIMEOUT;
253 req->sense = h->sense;
254 memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
255 req->sense_len = 0;
256 req->end_io_data = h;
257 140
258 blk_execute_rq_nowait(req->q, NULL, req, 1, start_stop_endio); 141retry:
259 return SCSI_DH_OK; 142 res = scsi_execute_req_flags(sdev, cmd, DMA_NONE, NULL, 0, &sshdr,
143 HP_SW_TIMEOUT, HP_SW_RETRIES,
144 NULL, req_flags, 0);
145 if (res) {
146 if (!scsi_sense_valid(&sshdr)) {
147 sdev_printk(KERN_WARNING, sdev,
148 "%s: sending start_stop_unit failed, "
149 "no sense available\n", HP_SW_NAME);
150 return SCSI_DH_IO;
151 }
152 switch (sshdr.sense_key) {
153 case NOT_READY:
154 if (sshdr.asc == 0x04 && sshdr.ascq == 3) {
155 /*
156 * LUN not ready - manual intervention required
157 *
158 * Switch-over in progress, retry.
159 */
160 if (--retry_cnt)
161 goto retry;
162 rc = SCSI_DH_RETRY;
163 break;
164 }
165 /* fall through */
166 default:
167 sdev_printk(KERN_WARNING, sdev,
168 "%s: sending start_stop_unit failed, "
169 "sense %x/%x/%x\n", HP_SW_NAME,
170 sshdr.sense_key, sshdr.asc, sshdr.ascq);
171 rc = SCSI_DH_IO;
172 }
173 }
174 return rc;
260} 175}
261 176
262static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req) 177static int hp_sw_prep_fn(struct scsi_device *sdev, struct request *req)
@@ -290,15 +205,8 @@ static int hp_sw_activate(struct scsi_device *sdev,
290 205
291 ret = hp_sw_tur(sdev, h); 206 ret = hp_sw_tur(sdev, h);
292 207
293 if (ret == SCSI_DH_OK && h->path_state == HP_SW_PATH_PASSIVE) { 208 if (ret == SCSI_DH_OK && h->path_state == HP_SW_PATH_PASSIVE)
294 h->retry_cnt = h->retries;
295 h->callback_fn = fn;
296 h->callback_data = data;
297 ret = hp_sw_start_stop(h); 209 ret = hp_sw_start_stop(h);
298 if (ret == SCSI_DH_OK)
299 return 0;
300 h->callback_fn = h->callback_data = NULL;
301 }
302 210
303 if (fn) 211 if (fn)
304 fn(data, ret); 212 fn(data, ret);
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 00d9c326158e..b64eaae8533d 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -205,7 +205,6 @@ struct rdac_dh_data {
205#define RDAC_NON_PREFERRED 1 205#define RDAC_NON_PREFERRED 1
206 char preferred; 206 char preferred;
207 207
208 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
209 union { 208 union {
210 struct c2_inquiry c2; 209 struct c2_inquiry c2;
211 struct c4_inquiry c4; 210 struct c4_inquiry c4;
@@ -262,40 +261,12 @@ do { \
262 sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \ 261 sdev_printk(KERN_INFO, sdev, RDAC_NAME ": " f "\n", ## arg); \
263} while (0); 262} while (0);
264 263
265static struct request *get_rdac_req(struct scsi_device *sdev, 264static unsigned int rdac_failover_get(struct rdac_controller *ctlr,
266 void *buffer, unsigned buflen, int rw) 265 struct list_head *list,
266 unsigned char *cdb)
267{ 267{
268 struct request *rq; 268 struct scsi_device *sdev = ctlr->ms_sdev;
269 struct request_queue *q = sdev->request_queue; 269 struct rdac_dh_data *h = sdev->handler_data;
270
271 rq = blk_get_request(q, rw, GFP_NOIO);
272
273 if (IS_ERR(rq)) {
274 sdev_printk(KERN_INFO, sdev,
275 "get_rdac_req: blk_get_request failed.\n");
276 return NULL;
277 }
278 blk_rq_set_block_pc(rq);
279
280 if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
281 blk_put_request(rq);
282 sdev_printk(KERN_INFO, sdev,
283 "get_rdac_req: blk_rq_map_kern failed.\n");
284 return NULL;
285 }
286
287 rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
288 REQ_FAILFAST_DRIVER;
289 rq->retries = RDAC_RETRIES;
290 rq->timeout = RDAC_TIMEOUT;
291
292 return rq;
293}
294
295static struct request *rdac_failover_get(struct scsi_device *sdev,
296 struct rdac_dh_data *h, struct list_head *list)
297{
298 struct request *rq;
299 struct rdac_mode_common *common; 270 struct rdac_mode_common *common;
300 unsigned data_size; 271 unsigned data_size;
301 struct rdac_queue_data *qdata; 272 struct rdac_queue_data *qdata;
@@ -332,27 +303,17 @@ static struct request *rdac_failover_get(struct scsi_device *sdev,
332 lun_table[qdata->h->lun] = 0x81; 303 lun_table[qdata->h->lun] = 0x81;
333 } 304 }
334 305
335 /* get request for block layer packet command */
336 rq = get_rdac_req(sdev, &h->ctlr->mode_select, data_size, WRITE);
337 if (!rq)
338 return NULL;
339
340 /* Prepare the command. */ 306 /* Prepare the command. */
341 if (h->ctlr->use_ms10) { 307 if (h->ctlr->use_ms10) {
342 rq->cmd[0] = MODE_SELECT_10; 308 cdb[0] = MODE_SELECT_10;
343 rq->cmd[7] = data_size >> 8; 309 cdb[7] = data_size >> 8;
344 rq->cmd[8] = data_size & 0xff; 310 cdb[8] = data_size & 0xff;
345 } else { 311 } else {
346 rq->cmd[0] = MODE_SELECT; 312 cdb[0] = MODE_SELECT;
347 rq->cmd[4] = data_size; 313 cdb[4] = data_size;
348 } 314 }
349 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
350
351 rq->sense = h->sense;
352 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
353 rq->sense_len = 0;
354 315
355 return rq; 316 return data_size;
356} 317}
357 318
358static void release_controller(struct kref *kref) 319static void release_controller(struct kref *kref)
@@ -400,46 +361,14 @@ static struct rdac_controller *get_controller(int index, char *array_name,
400 return ctlr; 361 return ctlr;
401} 362}
402 363
403static int submit_inquiry(struct scsi_device *sdev, int page_code,
404 unsigned int len, struct rdac_dh_data *h)
405{
406 struct request *rq;
407 struct request_queue *q = sdev->request_queue;
408 int err = SCSI_DH_RES_TEMP_UNAVAIL;
409
410 rq = get_rdac_req(sdev, &h->inq, len, READ);
411 if (!rq)
412 goto done;
413
414 /* Prepare the command. */
415 rq->cmd[0] = INQUIRY;
416 rq->cmd[1] = 1;
417 rq->cmd[2] = page_code;
418 rq->cmd[4] = len;
419 rq->cmd_len = COMMAND_SIZE(INQUIRY);
420
421 rq->sense = h->sense;
422 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
423 rq->sense_len = 0;
424
425 err = blk_execute_rq(q, NULL, rq, 1);
426 if (err == -EIO)
427 err = SCSI_DH_IO;
428
429 blk_put_request(rq);
430done:
431 return err;
432}
433
434static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h, 364static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h,
435 char *array_name, u8 *array_id) 365 char *array_name, u8 *array_id)
436{ 366{
437 int err, i; 367 int err = SCSI_DH_IO, i;
438 struct c8_inquiry *inqp; 368 struct c8_inquiry *inqp = &h->inq.c8;
439 369
440 err = submit_inquiry(sdev, 0xC8, sizeof(struct c8_inquiry), h); 370 if (!scsi_get_vpd_page(sdev, 0xC8, (unsigned char *)inqp,
441 if (err == SCSI_DH_OK) { 371 sizeof(struct c8_inquiry))) {
442 inqp = &h->inq.c8;
443 if (inqp->page_code != 0xc8) 372 if (inqp->page_code != 0xc8)
444 return SCSI_DH_NOSYS; 373 return SCSI_DH_NOSYS;
445 if (inqp->page_id[0] != 'e' || inqp->page_id[1] != 'd' || 374 if (inqp->page_id[0] != 'e' || inqp->page_id[1] != 'd' ||
@@ -453,20 +382,20 @@ static int get_lun_info(struct scsi_device *sdev, struct rdac_dh_data *h,
453 *(array_name+ARRAY_LABEL_LEN-1) = '\0'; 382 *(array_name+ARRAY_LABEL_LEN-1) = '\0';
454 memset(array_id, 0, UNIQUE_ID_LEN); 383 memset(array_id, 0, UNIQUE_ID_LEN);
455 memcpy(array_id, inqp->array_unique_id, inqp->array_uniq_id_len); 384 memcpy(array_id, inqp->array_unique_id, inqp->array_uniq_id_len);
385 err = SCSI_DH_OK;
456 } 386 }
457 return err; 387 return err;
458} 388}
459 389
460static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h) 390static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
461{ 391{
462 int err, access_state; 392 int err = SCSI_DH_IO, access_state;
463 struct rdac_dh_data *tmp; 393 struct rdac_dh_data *tmp;
464 struct c9_inquiry *inqp; 394 struct c9_inquiry *inqp = &h->inq.c9;
465 395
466 h->state = RDAC_STATE_ACTIVE; 396 h->state = RDAC_STATE_ACTIVE;
467 err = submit_inquiry(sdev, 0xC9, sizeof(struct c9_inquiry), h); 397 if (!scsi_get_vpd_page(sdev, 0xC9, (unsigned char *)inqp,
468 if (err == SCSI_DH_OK) { 398 sizeof(struct c9_inquiry))) {
469 inqp = &h->inq.c9;
470 /* detect the operating mode */ 399 /* detect the operating mode */
471 if ((inqp->avte_cvp >> 5) & 0x1) 400 if ((inqp->avte_cvp >> 5) & 0x1)
472 h->mode = RDAC_MODE_IOSHIP; /* LUN in IOSHIP mode */ 401 h->mode = RDAC_MODE_IOSHIP; /* LUN in IOSHIP mode */
@@ -501,6 +430,7 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
501 tmp->sdev->access_state = access_state; 430 tmp->sdev->access_state = access_state;
502 } 431 }
503 rcu_read_unlock(); 432 rcu_read_unlock();
433 err = SCSI_DH_OK;
504 } 434 }
505 435
506 return err; 436 return err;
@@ -509,12 +439,11 @@ static int check_ownership(struct scsi_device *sdev, struct rdac_dh_data *h)
509static int initialize_controller(struct scsi_device *sdev, 439static int initialize_controller(struct scsi_device *sdev,
510 struct rdac_dh_data *h, char *array_name, u8 *array_id) 440 struct rdac_dh_data *h, char *array_name, u8 *array_id)
511{ 441{
512 int err, index; 442 int err = SCSI_DH_IO, index;
513 struct c4_inquiry *inqp; 443 struct c4_inquiry *inqp = &h->inq.c4;
514 444
515 err = submit_inquiry(sdev, 0xC4, sizeof(struct c4_inquiry), h); 445 if (!scsi_get_vpd_page(sdev, 0xC4, (unsigned char *)inqp,
516 if (err == SCSI_DH_OK) { 446 sizeof(struct c4_inquiry))) {
517 inqp = &h->inq.c4;
518 /* get the controller index */ 447 /* get the controller index */
519 if (inqp->slot_id[1] == 0x31) 448 if (inqp->slot_id[1] == 0x31)
520 index = 0; 449 index = 0;
@@ -530,18 +459,18 @@ static int initialize_controller(struct scsi_device *sdev,
530 h->sdev = sdev; 459 h->sdev = sdev;
531 } 460 }
532 spin_unlock(&list_lock); 461 spin_unlock(&list_lock);
462 err = SCSI_DH_OK;
533 } 463 }
534 return err; 464 return err;
535} 465}
536 466
537static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h) 467static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h)
538{ 468{
539 int err; 469 int err = SCSI_DH_IO;
540 struct c2_inquiry *inqp; 470 struct c2_inquiry *inqp = &h->inq.c2;
541 471
542 err = submit_inquiry(sdev, 0xC2, sizeof(struct c2_inquiry), h); 472 if (!scsi_get_vpd_page(sdev, 0xC2, (unsigned char *)inqp,
543 if (err == SCSI_DH_OK) { 473 sizeof(struct c2_inquiry))) {
544 inqp = &h->inq.c2;
545 /* 474 /*
546 * If more than MODE6_MAX_LUN luns are supported, use 475 * If more than MODE6_MAX_LUN luns are supported, use
547 * mode select 10 476 * mode select 10
@@ -550,36 +479,35 @@ static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h)
550 h->ctlr->use_ms10 = 1; 479 h->ctlr->use_ms10 = 1;
551 else 480 else
552 h->ctlr->use_ms10 = 0; 481 h->ctlr->use_ms10 = 0;
482 err = SCSI_DH_OK;
553 } 483 }
554 return err; 484 return err;
555} 485}
556 486
557static int mode_select_handle_sense(struct scsi_device *sdev, 487static int mode_select_handle_sense(struct scsi_device *sdev,
558 unsigned char *sensebuf) 488 struct scsi_sense_hdr *sense_hdr)
559{ 489{
560 struct scsi_sense_hdr sense_hdr; 490 int err = SCSI_DH_IO;
561 int err = SCSI_DH_IO, ret;
562 struct rdac_dh_data *h = sdev->handler_data; 491 struct rdac_dh_data *h = sdev->handler_data;
563 492
564 ret = scsi_normalize_sense(sensebuf, SCSI_SENSE_BUFFERSIZE, &sense_hdr); 493 if (!scsi_sense_valid(sense_hdr))
565 if (!ret)
566 goto done; 494 goto done;
567 495
568 switch (sense_hdr.sense_key) { 496 switch (sense_hdr->sense_key) {
569 case NO_SENSE: 497 case NO_SENSE:
570 case ABORTED_COMMAND: 498 case ABORTED_COMMAND:
571 case UNIT_ATTENTION: 499 case UNIT_ATTENTION:
572 err = SCSI_DH_RETRY; 500 err = SCSI_DH_RETRY;
573 break; 501 break;
574 case NOT_READY: 502 case NOT_READY:
575 if (sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x01) 503 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
576 /* LUN Not Ready and is in the Process of Becoming 504 /* LUN Not Ready and is in the Process of Becoming
577 * Ready 505 * Ready
578 */ 506 */
579 err = SCSI_DH_RETRY; 507 err = SCSI_DH_RETRY;
580 break; 508 break;
581 case ILLEGAL_REQUEST: 509 case ILLEGAL_REQUEST:
582 if (sense_hdr.asc == 0x91 && sense_hdr.ascq == 0x36) 510 if (sense_hdr->asc == 0x91 && sense_hdr->ascq == 0x36)
583 /* 511 /*
584 * Command Lock contention 512 * Command Lock contention
585 */ 513 */
@@ -592,7 +520,7 @@ static int mode_select_handle_sense(struct scsi_device *sdev,
592 RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, " 520 RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
593 "MODE_SELECT returned with sense %02x/%02x/%02x", 521 "MODE_SELECT returned with sense %02x/%02x/%02x",
594 (char *) h->ctlr->array_name, h->ctlr->index, 522 (char *) h->ctlr->array_name, h->ctlr->index,
595 sense_hdr.sense_key, sense_hdr.asc, sense_hdr.ascq); 523 sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq);
596 524
597done: 525done:
598 return err; 526 return err;
@@ -602,13 +530,16 @@ static void send_mode_select(struct work_struct *work)
602{ 530{
603 struct rdac_controller *ctlr = 531 struct rdac_controller *ctlr =
604 container_of(work, struct rdac_controller, ms_work); 532 container_of(work, struct rdac_controller, ms_work);
605 struct request *rq;
606 struct scsi_device *sdev = ctlr->ms_sdev; 533 struct scsi_device *sdev = ctlr->ms_sdev;
607 struct rdac_dh_data *h = sdev->handler_data; 534 struct rdac_dh_data *h = sdev->handler_data;
608 struct request_queue *q = sdev->request_queue; 535 int err = SCSI_DH_OK, retry_cnt = RDAC_RETRY_COUNT;
609 int err, retry_cnt = RDAC_RETRY_COUNT;
610 struct rdac_queue_data *tmp, *qdata; 536 struct rdac_queue_data *tmp, *qdata;
611 LIST_HEAD(list); 537 LIST_HEAD(list);
538 unsigned char cdb[COMMAND_SIZE(MODE_SELECT_10)];
539 struct scsi_sense_hdr sshdr;
540 unsigned int data_size;
541 u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
542 REQ_FAILFAST_DRIVER;
612 543
613 spin_lock(&ctlr->ms_lock); 544 spin_lock(&ctlr->ms_lock);
614 list_splice_init(&ctlr->ms_head, &list); 545 list_splice_init(&ctlr->ms_head, &list);
@@ -616,21 +547,19 @@ static void send_mode_select(struct work_struct *work)
616 ctlr->ms_sdev = NULL; 547 ctlr->ms_sdev = NULL;
617 spin_unlock(&ctlr->ms_lock); 548 spin_unlock(&ctlr->ms_lock);
618 549
619retry: 550 retry:
620 err = SCSI_DH_RES_TEMP_UNAVAIL; 551 data_size = rdac_failover_get(ctlr, &list, cdb);
621 rq = rdac_failover_get(sdev, h, &list);
622 if (!rq)
623 goto done;
624 552
625 RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, " 553 RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
626 "%s MODE_SELECT command", 554 "%s MODE_SELECT command",
627 (char *) h->ctlr->array_name, h->ctlr->index, 555 (char *) h->ctlr->array_name, h->ctlr->index,
628 (retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying"); 556 (retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying");
629 557
630 err = blk_execute_rq(q, NULL, rq, 1); 558 if (scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE,
631 blk_put_request(rq); 559 &h->ctlr->mode_select, data_size, &sshdr,
632 if (err != SCSI_DH_OK) { 560 RDAC_TIMEOUT * HZ,
633 err = mode_select_handle_sense(sdev, h->sense); 561 RDAC_RETRIES, NULL, req_flags, 0)) {
562 err = mode_select_handle_sense(sdev, &sshdr);
634 if (err == SCSI_DH_RETRY && retry_cnt--) 563 if (err == SCSI_DH_RETRY && retry_cnt--)
635 goto retry; 564 goto retry;
636 if (err == SCSI_DH_IMM_RETRY) 565 if (err == SCSI_DH_IMM_RETRY)
@@ -643,7 +572,6 @@ retry:
643 (char *) h->ctlr->array_name, h->ctlr->index); 572 (char *) h->ctlr->array_name, h->ctlr->index);
644 } 573 }
645 574
646done:
647 list_for_each_entry_safe(qdata, tmp, &list, entry) { 575 list_for_each_entry_safe(qdata, tmp, &list, entry) {
648 list_del(&qdata->entry); 576 list_del(&qdata->entry);
649 if (err == SCSI_DH_OK) 577 if (err == SCSI_DH_OK)
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 258a3f9a2519..831a1c8b9f89 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -213,6 +213,10 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
213 goto fail; 213 goto fail;
214 } 214 }
215 215
216 error = scsi_init_sense_cache(shost);
217 if (error)
218 goto fail;
219
216 if (shost_use_blk_mq(shost)) { 220 if (shost_use_blk_mq(shost)) {
217 error = scsi_mq_setup_tags(shost); 221 error = scsi_mq_setup_tags(shost);
218 if (error) 222 if (error)
@@ -226,19 +230,6 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
226 } 230 }
227 } 231 }
228 232
229 /*
230 * Note that we allocate the freelist even for the MQ case for now,
231 * as we need a command set aside for scsi_reset_provider. Having
232 * the full host freelist and one command available for that is a
233 * little heavy-handed, but avoids introducing a special allocator
234 * just for this. Eventually the structure of scsi_reset_provider
235 * will need a major overhaul.
236 */
237 error = scsi_setup_command_freelist(shost);
238 if (error)
239 goto out_destroy_tags;
240
241
242 if (!shost->shost_gendev.parent) 233 if (!shost->shost_gendev.parent)
243 shost->shost_gendev.parent = dev ? dev : &platform_bus; 234 shost->shost_gendev.parent = dev ? dev : &platform_bus;
244 if (!dma_dev) 235 if (!dma_dev)
@@ -258,7 +249,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
258 249
259 error = device_add(&shost->shost_gendev); 250 error = device_add(&shost->shost_gendev);
260 if (error) 251 if (error)
261 goto out_destroy_freelist; 252 goto out_disable_runtime_pm;
262 253
263 scsi_host_set_state(shost, SHOST_RUNNING); 254 scsi_host_set_state(shost, SHOST_RUNNING);
264 get_device(shost->shost_gendev.parent); 255 get_device(shost->shost_gendev.parent);
@@ -308,13 +299,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
308 device_del(&shost->shost_dev); 299 device_del(&shost->shost_dev);
309 out_del_gendev: 300 out_del_gendev:
310 device_del(&shost->shost_gendev); 301 device_del(&shost->shost_gendev);
311 out_destroy_freelist: 302 out_disable_runtime_pm:
312 device_disable_async_suspend(&shost->shost_gendev); 303 device_disable_async_suspend(&shost->shost_gendev);
313 pm_runtime_disable(&shost->shost_gendev); 304 pm_runtime_disable(&shost->shost_gendev);
314 pm_runtime_set_suspended(&shost->shost_gendev); 305 pm_runtime_set_suspended(&shost->shost_gendev);
315 pm_runtime_put_noidle(&shost->shost_gendev); 306 pm_runtime_put_noidle(&shost->shost_gendev);
316 scsi_destroy_command_freelist(shost);
317 out_destroy_tags:
318 if (shost_use_blk_mq(shost)) 307 if (shost_use_blk_mq(shost))
319 scsi_mq_destroy_tags(shost); 308 scsi_mq_destroy_tags(shost);
320 fail: 309 fail:
@@ -355,7 +344,6 @@ static void scsi_host_dev_release(struct device *dev)
355 kfree(dev_name(&shost->shost_dev)); 344 kfree(dev_name(&shost->shost_dev));
356 } 345 }
357 346
358 scsi_destroy_command_freelist(shost);
359 if (shost_use_blk_mq(shost)) { 347 if (shost_use_blk_mq(shost)) {
360 if (shost->tag_set.tags) 348 if (shost->tag_set.tags)
361 scsi_mq_destroy_tags(shost); 349 scsi_mq_destroy_tags(shost);
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index cbc0c5fe5a60..c611412a8de9 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5539,8 +5539,8 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
5539 * Retries always go down the normal I/O path. 5539 * Retries always go down the normal I/O path.
5540 */ 5540 */
5541 if (likely(cmd->retries == 0 && 5541 if (likely(cmd->retries == 0 &&
5542 cmd->request->cmd_type == REQ_TYPE_FS && 5542 !blk_rq_is_passthrough(cmd->request) &&
5543 h->acciopath_status)) { 5543 h->acciopath_status)) {
5544 rc = hpsa_ioaccel_submit(h, c, cmd, scsi3addr); 5544 rc = hpsa_ioaccel_submit(h, c, cmd, scsi3addr);
5545 if (rc == 0) 5545 if (rc == 0)
5546 return 0; 5546 return 0;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 919736a74ffa..aa76f36abe03 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -2095,7 +2095,7 @@ int fc_lport_bsg_request(struct bsg_job *job)
2095 2095
2096 bsg_reply->reply_payload_rcv_len = 0; 2096 bsg_reply->reply_payload_rcv_len = 0;
2097 if (rsp) 2097 if (rsp)
2098 rsp->resid_len = job->reply_payload.payload_len; 2098 scsi_req(rsp)->resid_len = job->reply_payload.payload_len;
2099 2099
2100 mutex_lock(&lport->lp_mutex); 2100 mutex_lock(&lport->lp_mutex);
2101 2101
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 022bb6e10d98..570b2cb2da43 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2174,12 +2174,12 @@ int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2174 bio_data(rsp->bio), blk_rq_bytes(rsp)); 2174 bio_data(rsp->bio), blk_rq_bytes(rsp));
2175 if (ret > 0) { 2175 if (ret > 0) {
2176 /* positive number is the untransferred residual */ 2176 /* positive number is the untransferred residual */
2177 rsp->resid_len = ret; 2177 scsi_req(rsp)->resid_len = ret;
2178 req->resid_len = 0; 2178 scsi_req(req)->resid_len = 0;
2179 ret = 0; 2179 ret = 0;
2180 } else if (ret == 0) { 2180 } else if (ret == 0) {
2181 rsp->resid_len = 0; 2181 scsi_req(rsp)->resid_len = 0;
2182 req->resid_len = 0; 2182 scsi_req(req)->resid_len = 0;
2183 } 2183 }
2184 2184
2185 return ret; 2185 return ret;
diff --git a/drivers/scsi/libsas/sas_host_smp.c b/drivers/scsi/libsas/sas_host_smp.c
index d24792575169..45cbbc44f4d7 100644
--- a/drivers/scsi/libsas/sas_host_smp.c
+++ b/drivers/scsi/libsas/sas_host_smp.c
@@ -274,15 +274,15 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
274 274
275 switch (req_data[1]) { 275 switch (req_data[1]) {
276 case SMP_REPORT_GENERAL: 276 case SMP_REPORT_GENERAL:
277 req->resid_len -= 8; 277 scsi_req(req)->resid_len -= 8;
278 rsp->resid_len -= 32; 278 scsi_req(rsp)->resid_len -= 32;
279 resp_data[2] = SMP_RESP_FUNC_ACC; 279 resp_data[2] = SMP_RESP_FUNC_ACC;
280 resp_data[9] = sas_ha->num_phys; 280 resp_data[9] = sas_ha->num_phys;
281 break; 281 break;
282 282
283 case SMP_REPORT_MANUF_INFO: 283 case SMP_REPORT_MANUF_INFO:
284 req->resid_len -= 8; 284 scsi_req(req)->resid_len -= 8;
285 rsp->resid_len -= 64; 285 scsi_req(rsp)->resid_len -= 64;
286 resp_data[2] = SMP_RESP_FUNC_ACC; 286 resp_data[2] = SMP_RESP_FUNC_ACC;
287 memcpy(resp_data + 12, shost->hostt->name, 287 memcpy(resp_data + 12, shost->hostt->name,
288 SAS_EXPANDER_VENDOR_ID_LEN); 288 SAS_EXPANDER_VENDOR_ID_LEN);
@@ -295,13 +295,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
295 break; 295 break;
296 296
297 case SMP_DISCOVER: 297 case SMP_DISCOVER:
298 req->resid_len -= 16; 298 scsi_req(req)->resid_len -= 16;
299 if ((int)req->resid_len < 0) { 299 if ((int)scsi_req(req)->resid_len < 0) {
300 req->resid_len = 0; 300 scsi_req(req)->resid_len = 0;
301 error = -EINVAL; 301 error = -EINVAL;
302 goto out; 302 goto out;
303 } 303 }
304 rsp->resid_len -= 56; 304 scsi_req(rsp)->resid_len -= 56;
305 sas_host_smp_discover(sas_ha, resp_data, req_data[9]); 305 sas_host_smp_discover(sas_ha, resp_data, req_data[9]);
306 break; 306 break;
307 307
@@ -311,13 +311,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
311 break; 311 break;
312 312
313 case SMP_REPORT_PHY_SATA: 313 case SMP_REPORT_PHY_SATA:
314 req->resid_len -= 16; 314 scsi_req(req)->resid_len -= 16;
315 if ((int)req->resid_len < 0) { 315 if ((int)scsi_req(req)->resid_len < 0) {
316 req->resid_len = 0; 316 scsi_req(req)->resid_len = 0;
317 error = -EINVAL; 317 error = -EINVAL;
318 goto out; 318 goto out;
319 } 319 }
320 rsp->resid_len -= 60; 320 scsi_req(rsp)->resid_len -= 60;
321 sas_report_phy_sata(sas_ha, resp_data, req_data[9]); 321 sas_report_phy_sata(sas_ha, resp_data, req_data[9]);
322 break; 322 break;
323 323
@@ -331,15 +331,15 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
331 int to_write = req_data[4]; 331 int to_write = req_data[4];
332 332
333 if (blk_rq_bytes(req) < base_frame_size + to_write * 4 || 333 if (blk_rq_bytes(req) < base_frame_size + to_write * 4 ||
334 req->resid_len < base_frame_size + to_write * 4) { 334 scsi_req(req)->resid_len < base_frame_size + to_write * 4) {
335 resp_data[2] = SMP_RESP_INV_FRM_LEN; 335 resp_data[2] = SMP_RESP_INV_FRM_LEN;
336 break; 336 break;
337 } 337 }
338 338
339 to_write = sas_host_smp_write_gpio(sas_ha, resp_data, req_data[2], 339 to_write = sas_host_smp_write_gpio(sas_ha, resp_data, req_data[2],
340 req_data[3], to_write, &req_data[8]); 340 req_data[3], to_write, &req_data[8]);
341 req->resid_len -= base_frame_size + to_write * 4; 341 scsi_req(req)->resid_len -= base_frame_size + to_write * 4;
342 rsp->resid_len -= 8; 342 scsi_req(rsp)->resid_len -= 8;
343 break; 343 break;
344 } 344 }
345 345
@@ -348,13 +348,13 @@ int sas_smp_host_handler(struct Scsi_Host *shost, struct request *req,
348 break; 348 break;
349 349
350 case SMP_PHY_CONTROL: 350 case SMP_PHY_CONTROL:
351 req->resid_len -= 44; 351 scsi_req(req)->resid_len -= 44;
352 if ((int)req->resid_len < 0) { 352 if ((int)scsi_req(req)->resid_len < 0) {
353 req->resid_len = 0; 353 scsi_req(req)->resid_len = 0;
354 error = -EINVAL; 354 error = -EINVAL;
355 goto out; 355 goto out;
356 } 356 }
357 rsp->resid_len -= 8; 357 scsi_req(rsp)->resid_len -= 8;
358 sas_phy_control(sas_ha, req_data[9], req_data[10], 358 sas_phy_control(sas_ha, req_data[9], req_data[10],
359 req_data[32] >> 4, req_data[33] >> 4, 359 req_data[32] >> 4, req_data[33] >> 4,
360 resp_data); 360 resp_data);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 0b5b423b1db0..c6d550551504 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4723,7 +4723,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
4723 * then scsi-ml does not need to handle this misbehavior. 4723 * then scsi-ml does not need to handle this misbehavior.
4724 */ 4724 */
4725 sector_sz = scmd->device->sector_size; 4725 sector_sz = scmd->device->sector_size;
4726 if (unlikely(scmd->request->cmd_type == REQ_TYPE_FS && sector_sz && 4726 if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz &&
4727 xfer_cnt % sector_sz)) { 4727 xfer_cnt % sector_sz)) {
4728 sdev_printk(KERN_INFO, scmd->device, 4728 sdev_printk(KERN_INFO, scmd->device,
4729 "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", 4729 "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n",
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index 7f1d5785bc30..e7a7a704a315 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -2057,10 +2057,10 @@ _transport_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
2057 ioc->name, __func__, 2057 ioc->name, __func__,
2058 le16_to_cpu(mpi_reply->ResponseDataLength))); 2058 le16_to_cpu(mpi_reply->ResponseDataLength)));
2059 2059
2060 memcpy(req->sense, mpi_reply, sizeof(*mpi_reply)); 2060 memcpy(scsi_req(req)->sense, mpi_reply, sizeof(*mpi_reply));
2061 req->sense_len = sizeof(*mpi_reply); 2061 scsi_req(req)->sense_len = sizeof(*mpi_reply);
2062 req->resid_len = 0; 2062 scsi_req(req)->resid_len = 0;
2063 rsp->resid_len -= 2063 scsi_req(rsp)->resid_len -=
2064 le16_to_cpu(mpi_reply->ResponseDataLength); 2064 le16_to_cpu(mpi_reply->ResponseDataLength);
2065 2065
2066 /* check if the resp needs to be copied from the allocated 2066 /* check if the resp needs to be copied from the allocated
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index ef99f62831fb..30b905080c61 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -48,6 +48,7 @@
48#include <scsi/osd_sense.h> 48#include <scsi/osd_sense.h>
49 49
50#include <scsi/scsi_device.h> 50#include <scsi/scsi_device.h>
51#include <scsi/scsi_request.h>
51 52
52#include "osd_debug.h" 53#include "osd_debug.h"
53 54
@@ -477,11 +478,13 @@ static void _set_error_resid(struct osd_request *or, struct request *req,
477{ 478{
478 or->async_error = error; 479 or->async_error = error;
479 or->req_errors = req->errors ? : error; 480 or->req_errors = req->errors ? : error;
480 or->sense_len = req->sense_len; 481 or->sense_len = scsi_req(req)->sense_len;
482 if (or->sense_len)
483 memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
481 if (or->out.req) 484 if (or->out.req)
482 or->out.residual = or->out.req->resid_len; 485 or->out.residual = scsi_req(or->out.req)->resid_len;
483 if (or->in.req) 486 if (or->in.req)
484 or->in.residual = or->in.req->resid_len; 487 or->in.residual = scsi_req(or->in.req)->resid_len;
485} 488}
486 489
487int osd_execute_request(struct osd_request *or) 490int osd_execute_request(struct osd_request *or)
@@ -1562,10 +1565,11 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
1562 struct bio *bio = oii->bio; 1565 struct bio *bio = oii->bio;
1563 int ret; 1566 int ret;
1564 1567
1565 req = blk_get_request(q, has_write ? WRITE : READ, flags); 1568 req = blk_get_request(q, has_write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
1569 flags);
1566 if (IS_ERR(req)) 1570 if (IS_ERR(req))
1567 return req; 1571 return req;
1568 blk_rq_set_block_pc(req); 1572 scsi_req_init(req);
1569 1573
1570 for_each_bio(bio) { 1574 for_each_bio(bio) {
1571 struct bio *bounce_bio = bio; 1575 struct bio *bounce_bio = bio;
@@ -1599,8 +1603,6 @@ static int _init_blk_request(struct osd_request *or,
1599 1603
1600 req->timeout = or->timeout; 1604 req->timeout = or->timeout;
1601 req->retries = or->retries; 1605 req->retries = or->retries;
1602 req->sense = or->sense;
1603 req->sense_len = 0;
1604 1606
1605 if (has_out) { 1607 if (has_out) {
1606 or->out.req = req; 1608 or->out.req = req;
@@ -1612,7 +1614,7 @@ static int _init_blk_request(struct osd_request *or,
1612 ret = PTR_ERR(req); 1614 ret = PTR_ERR(req);
1613 goto out; 1615 goto out;
1614 } 1616 }
1615 blk_rq_set_block_pc(req); 1617 scsi_req_init(req);
1616 or->in.req = or->request->next_rq = req; 1618 or->in.req = or->request->next_rq = req;
1617 } 1619 }
1618 } else if (has_in) 1620 } else if (has_in)
@@ -1699,8 +1701,8 @@ int osd_finalize_request(struct osd_request *or,
1699 1701
1700 osd_sec_sign_cdb(&or->cdb, cap_key); 1702 osd_sec_sign_cdb(&or->cdb, cap_key);
1701 1703
1702 or->request->cmd = or->cdb.buff; 1704 scsi_req(or->request)->cmd = or->cdb.buff;
1703 or->request->cmd_len = _osd_req_cdb_len(or); 1705 scsi_req(or->request)->cmd_len = _osd_req_cdb_len(or);
1704 1706
1705 return 0; 1707 return 0;
1706} 1708}
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index e8196c55b633..451de6c5e3c9 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -322,6 +322,7 @@ static int osst_chk_result(struct osst_tape * STp, struct osst_request * SRpnt)
322/* Wakeup from interrupt */ 322/* Wakeup from interrupt */
323static void osst_end_async(struct request *req, int update) 323static void osst_end_async(struct request *req, int update)
324{ 324{
325 struct scsi_request *rq = scsi_req(req);
325 struct osst_request *SRpnt = req->end_io_data; 326 struct osst_request *SRpnt = req->end_io_data;
326 struct osst_tape *STp = SRpnt->stp; 327 struct osst_tape *STp = SRpnt->stp;
327 struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; 328 struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
@@ -330,6 +331,8 @@ static void osst_end_async(struct request *req, int update)
330#if DEBUG 331#if DEBUG
331 STp->write_pending = 0; 332 STp->write_pending = 0;
332#endif 333#endif
334 if (rq->sense_len)
335 memcpy(SRpnt->sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
333 if (SRpnt->waiting) 336 if (SRpnt->waiting)
334 complete(SRpnt->waiting); 337 complete(SRpnt->waiting);
335 338
@@ -357,17 +360,20 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
357 int use_sg, int timeout, int retries) 360 int use_sg, int timeout, int retries)
358{ 361{
359 struct request *req; 362 struct request *req;
363 struct scsi_request *rq;
360 struct page **pages = NULL; 364 struct page **pages = NULL;
361 struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; 365 struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
362 366
363 int err = 0; 367 int err = 0;
364 int write = (data_direction == DMA_TO_DEVICE); 368 int write = (data_direction == DMA_TO_DEVICE);
365 369
366 req = blk_get_request(SRpnt->stp->device->request_queue, write, GFP_KERNEL); 370 req = blk_get_request(SRpnt->stp->device->request_queue,
371 write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
367 if (IS_ERR(req)) 372 if (IS_ERR(req))
368 return DRIVER_ERROR << 24; 373 return DRIVER_ERROR << 24;
369 374
370 blk_rq_set_block_pc(req); 375 rq = scsi_req(req);
376 scsi_req_init(req);
371 req->rq_flags |= RQF_QUIET; 377 req->rq_flags |= RQF_QUIET;
372 378
373 SRpnt->bio = NULL; 379 SRpnt->bio = NULL;
@@ -404,11 +410,9 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
404 goto free_req; 410 goto free_req;
405 } 411 }
406 412
407 req->cmd_len = cmd_len; 413 rq->cmd_len = cmd_len;
408 memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ 414 memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
409 memcpy(req->cmd, cmd, req->cmd_len); 415 memcpy(rq->cmd, cmd, rq->cmd_len);
410 req->sense = SRpnt->sense;
411 req->sense_len = 0;
412 req->timeout = timeout; 416 req->timeout = timeout;
413 req->retries = retries; 417 req->retries = retries;
414 req->end_io_data = SRpnt; 418 req->end_io_data = SRpnt;
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 1bf8061ff803..40ca75bbcb9d 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -921,7 +921,7 @@ qla2x00_process_loopback(struct bsg_job *bsg_job)
921 921
922 bsg_job->reply_len = sizeof(struct fc_bsg_reply) + 922 bsg_job->reply_len = sizeof(struct fc_bsg_reply) +
923 sizeof(response) + sizeof(uint8_t); 923 sizeof(response) + sizeof(uint8_t);
924 fw_sts_ptr = ((uint8_t *)bsg_job->req->sense) + 924 fw_sts_ptr = ((uint8_t *)scsi_req(bsg_job->req)->sense) +
925 sizeof(struct fc_bsg_reply); 925 sizeof(struct fc_bsg_reply);
926 memcpy(fw_sts_ptr, response, sizeof(response)); 926 memcpy(fw_sts_ptr, response, sizeof(response));
927 fw_sts_ptr += sizeof(response); 927 fw_sts_ptr += sizeof(response);
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index a94b0b6bd030..9281bf47cbed 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1468,7 +1468,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
1468 type, sp->handle, comp_status, fw_status[1], fw_status[2], 1468 type, sp->handle, comp_status, fw_status[1], fw_status[2],
1469 le16_to_cpu(((struct els_sts_entry_24xx *) 1469 le16_to_cpu(((struct els_sts_entry_24xx *)
1470 pkt)->total_byte_count)); 1470 pkt)->total_byte_count));
1471 fw_sts_ptr = ((uint8_t*)bsg_job->req->sense) + sizeof(struct fc_bsg_reply); 1471 fw_sts_ptr = ((uint8_t*)scsi_req(bsg_job->req)->sense) +
1472 sizeof(struct fc_bsg_reply);
1472 memcpy( fw_sts_ptr, fw_status, sizeof(fw_status)); 1473 memcpy( fw_sts_ptr, fw_status, sizeof(fw_status));
1473 } 1474 }
1474 else { 1475 else {
@@ -1482,7 +1483,8 @@ qla24xx_els_ct_entry(scsi_qla_host_t *vha, struct req_que *req,
1482 pkt)->error_subcode_2)); 1483 pkt)->error_subcode_2));
1483 res = DID_ERROR << 16; 1484 res = DID_ERROR << 16;
1484 bsg_reply->reply_payload_rcv_len = 0; 1485 bsg_reply->reply_payload_rcv_len = 0;
1485 fw_sts_ptr = ((uint8_t*)bsg_job->req->sense) + sizeof(struct fc_bsg_reply); 1486 fw_sts_ptr = ((uint8_t*)scsi_req(bsg_job->req)->sense) +
1487 sizeof(struct fc_bsg_reply);
1486 memcpy( fw_sts_ptr, fw_status, sizeof(fw_status)); 1488 memcpy( fw_sts_ptr, fw_status, sizeof(fw_status));
1487 } 1489 }
1488 ql_dump_buffer(ql_dbg_user + ql_dbg_buffer, vha, 0x5056, 1490 ql_dump_buffer(ql_dbg_user + ql_dbg_buffer, vha, 0x5056,
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 02f1de18bc2b..96c33e292eba 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2244,7 +2244,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
2244 memcpy(fstatus.reserved_3, 2244 memcpy(fstatus.reserved_3,
2245 pkt->reserved_2, 20 * sizeof(uint8_t)); 2245 pkt->reserved_2, 20 * sizeof(uint8_t));
2246 2246
2247 fw_sts_ptr = ((uint8_t *)bsg_job->req->sense) + 2247 fw_sts_ptr = ((uint8_t *)scsi_req(bsg_job->req)->sense) +
2248 sizeof(struct fc_bsg_reply); 2248 sizeof(struct fc_bsg_reply);
2249 2249
2250 memcpy(fw_sts_ptr, (uint8_t *)&fstatus, 2250 memcpy(fw_sts_ptr, (uint8_t *)&fstatus,
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 75455d4dab68..7bfbcfa7af40 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -98,176 +98,6 @@ EXPORT_SYMBOL(scsi_sd_probe_domain);
98ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain); 98ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain);
99EXPORT_SYMBOL(scsi_sd_pm_domain); 99EXPORT_SYMBOL(scsi_sd_pm_domain);
100 100
101struct scsi_host_cmd_pool {
102 struct kmem_cache *cmd_slab;
103 struct kmem_cache *sense_slab;
104 unsigned int users;
105 char *cmd_name;
106 char *sense_name;
107 unsigned int slab_flags;
108 gfp_t gfp_mask;
109};
110
111static struct scsi_host_cmd_pool scsi_cmd_pool = {
112 .cmd_name = "scsi_cmd_cache",
113 .sense_name = "scsi_sense_cache",
114 .slab_flags = SLAB_HWCACHE_ALIGN,
115};
116
117static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
118 .cmd_name = "scsi_cmd_cache(DMA)",
119 .sense_name = "scsi_sense_cache(DMA)",
120 .slab_flags = SLAB_HWCACHE_ALIGN|SLAB_CACHE_DMA,
121 .gfp_mask = __GFP_DMA,
122};
123
124static DEFINE_MUTEX(host_cmd_pool_mutex);
125
126/**
127 * scsi_host_free_command - internal function to release a command
128 * @shost: host to free the command for
129 * @cmd: command to release
130 *
131 * the command must previously have been allocated by
132 * scsi_host_alloc_command.
133 */
134static void
135scsi_host_free_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
136{
137 struct scsi_host_cmd_pool *pool = shost->cmd_pool;
138
139 if (cmd->prot_sdb)
140 kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
141 kmem_cache_free(pool->sense_slab, cmd->sense_buffer);
142 kmem_cache_free(pool->cmd_slab, cmd);
143}
144
145/**
146 * scsi_host_alloc_command - internal function to allocate command
147 * @shost: SCSI host whose pool to allocate from
148 * @gfp_mask: mask for the allocation
149 *
150 * Returns a fully allocated command with sense buffer and protection
151 * data buffer (where applicable) or NULL on failure
152 */
153static struct scsi_cmnd *
154scsi_host_alloc_command(struct Scsi_Host *shost, gfp_t gfp_mask)
155{
156 struct scsi_host_cmd_pool *pool = shost->cmd_pool;
157 struct scsi_cmnd *cmd;
158
159 cmd = kmem_cache_zalloc(pool->cmd_slab, gfp_mask | pool->gfp_mask);
160 if (!cmd)
161 goto fail;
162
163 cmd->sense_buffer = kmem_cache_alloc(pool->sense_slab,
164 gfp_mask | pool->gfp_mask);
165 if (!cmd->sense_buffer)
166 goto fail_free_cmd;
167
168 if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
169 cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp_mask);
170 if (!cmd->prot_sdb)
171 goto fail_free_sense;
172 }
173
174 return cmd;
175
176fail_free_sense:
177 kmem_cache_free(pool->sense_slab, cmd->sense_buffer);
178fail_free_cmd:
179 kmem_cache_free(pool->cmd_slab, cmd);
180fail:
181 return NULL;
182}
183
184/**
185 * __scsi_get_command - Allocate a struct scsi_cmnd
186 * @shost: host to transmit command
187 * @gfp_mask: allocation mask
188 *
189 * Description: allocate a struct scsi_cmd from host's slab, recycling from the
190 * host's free_list if necessary.
191 */
192static struct scsi_cmnd *
193__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
194{
195 struct scsi_cmnd *cmd = scsi_host_alloc_command(shost, gfp_mask);
196
197 if (unlikely(!cmd)) {
198 unsigned long flags;
199
200 spin_lock_irqsave(&shost->free_list_lock, flags);
201 if (likely(!list_empty(&shost->free_list))) {
202 cmd = list_entry(shost->free_list.next,
203 struct scsi_cmnd, list);
204 list_del_init(&cmd->list);
205 }
206 spin_unlock_irqrestore(&shost->free_list_lock, flags);
207
208 if (cmd) {
209 void *buf, *prot;
210
211 buf = cmd->sense_buffer;
212 prot = cmd->prot_sdb;
213
214 memset(cmd, 0, sizeof(*cmd));
215
216 cmd->sense_buffer = buf;
217 cmd->prot_sdb = prot;
218 }
219 }
220
221 return cmd;
222}
223
224/**
225 * scsi_get_command - Allocate and setup a scsi command block
226 * @dev: parent scsi device
227 * @gfp_mask: allocator flags
228 *
229 * Returns: The allocated scsi command structure.
230 */
231struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
232{
233 struct scsi_cmnd *cmd = __scsi_get_command(dev->host, gfp_mask);
234 unsigned long flags;
235
236 if (unlikely(cmd == NULL))
237 return NULL;
238
239 cmd->device = dev;
240 INIT_LIST_HEAD(&cmd->list);
241 INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
242 spin_lock_irqsave(&dev->list_lock, flags);
243 list_add_tail(&cmd->list, &dev->cmd_list);
244 spin_unlock_irqrestore(&dev->list_lock, flags);
245 cmd->jiffies_at_alloc = jiffies;
246 return cmd;
247}
248
249/**
250 * __scsi_put_command - Free a struct scsi_cmnd
251 * @shost: dev->host
252 * @cmd: Command to free
253 */
254static void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
255{
256 unsigned long flags;
257
258 if (unlikely(list_empty(&shost->free_list))) {
259 spin_lock_irqsave(&shost->free_list_lock, flags);
260 if (list_empty(&shost->free_list)) {
261 list_add(&cmd->list, &shost->free_list);
262 cmd = NULL;
263 }
264 spin_unlock_irqrestore(&shost->free_list_lock, flags);
265 }
266
267 if (likely(cmd != NULL))
268 scsi_host_free_command(shost, cmd);
269}
270
271/** 101/**
272 * scsi_put_command - Free a scsi command block 102 * scsi_put_command - Free a scsi command block
273 * @cmd: command block to free 103 * @cmd: command block to free
@@ -287,188 +117,6 @@ void scsi_put_command(struct scsi_cmnd *cmd)
287 spin_unlock_irqrestore(&cmd->device->list_lock, flags); 117 spin_unlock_irqrestore(&cmd->device->list_lock, flags);
288 118
289 BUG_ON(delayed_work_pending(&cmd->abort_work)); 119 BUG_ON(delayed_work_pending(&cmd->abort_work));
290
291 __scsi_put_command(cmd->device->host, cmd);
292}
293
294static struct scsi_host_cmd_pool *
295scsi_find_host_cmd_pool(struct Scsi_Host *shost)
296{
297 if (shost->hostt->cmd_size)
298 return shost->hostt->cmd_pool;
299 if (shost->unchecked_isa_dma)
300 return &scsi_cmd_dma_pool;
301 return &scsi_cmd_pool;
302}
303
304static void
305scsi_free_host_cmd_pool(struct scsi_host_cmd_pool *pool)
306{
307 kfree(pool->sense_name);
308 kfree(pool->cmd_name);
309 kfree(pool);
310}
311
312static struct scsi_host_cmd_pool *
313scsi_alloc_host_cmd_pool(struct Scsi_Host *shost)
314{
315 struct scsi_host_template *hostt = shost->hostt;
316 struct scsi_host_cmd_pool *pool;
317
318 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
319 if (!pool)
320 return NULL;
321
322 pool->cmd_name = kasprintf(GFP_KERNEL, "%s_cmd", hostt->proc_name);
323 pool->sense_name = kasprintf(GFP_KERNEL, "%s_sense", hostt->proc_name);
324 if (!pool->cmd_name || !pool->sense_name) {
325 scsi_free_host_cmd_pool(pool);
326 return NULL;
327 }
328
329 pool->slab_flags = SLAB_HWCACHE_ALIGN;
330 if (shost->unchecked_isa_dma) {
331 pool->slab_flags |= SLAB_CACHE_DMA;
332 pool->gfp_mask = __GFP_DMA;
333 }
334
335 if (hostt->cmd_size)
336 hostt->cmd_pool = pool;
337
338 return pool;
339}
340
341static struct scsi_host_cmd_pool *
342scsi_get_host_cmd_pool(struct Scsi_Host *shost)
343{
344 struct scsi_host_template *hostt = shost->hostt;
345 struct scsi_host_cmd_pool *retval = NULL, *pool;
346 size_t cmd_size = sizeof(struct scsi_cmnd) + hostt->cmd_size;
347
348 /*
349 * Select a command slab for this host and create it if not
350 * yet existent.
351 */
352 mutex_lock(&host_cmd_pool_mutex);
353 pool = scsi_find_host_cmd_pool(shost);
354 if (!pool) {
355 pool = scsi_alloc_host_cmd_pool(shost);
356 if (!pool)
357 goto out;
358 }
359
360 if (!pool->users) {
361 pool->cmd_slab = kmem_cache_create(pool->cmd_name, cmd_size, 0,
362 pool->slab_flags, NULL);
363 if (!pool->cmd_slab)
364 goto out_free_pool;
365
366 pool->sense_slab = kmem_cache_create(pool->sense_name,
367 SCSI_SENSE_BUFFERSIZE, 0,
368 pool->slab_flags, NULL);
369 if (!pool->sense_slab)
370 goto out_free_slab;
371 }
372
373 pool->users++;
374 retval = pool;
375out:
376 mutex_unlock(&host_cmd_pool_mutex);
377 return retval;
378
379out_free_slab:
380 kmem_cache_destroy(pool->cmd_slab);
381out_free_pool:
382 if (hostt->cmd_size) {
383 scsi_free_host_cmd_pool(pool);
384 hostt->cmd_pool = NULL;
385 }
386 goto out;
387}
388
389static void scsi_put_host_cmd_pool(struct Scsi_Host *shost)
390{
391 struct scsi_host_template *hostt = shost->hostt;
392 struct scsi_host_cmd_pool *pool;
393
394 mutex_lock(&host_cmd_pool_mutex);
395 pool = scsi_find_host_cmd_pool(shost);
396
397 /*
398 * This may happen if a driver has a mismatched get and put
399 * of the command pool; the driver should be implicated in
400 * the stack trace
401 */
402 BUG_ON(pool->users == 0);
403
404 if (!--pool->users) {
405 kmem_cache_destroy(pool->cmd_slab);
406 kmem_cache_destroy(pool->sense_slab);
407 if (hostt->cmd_size) {
408 scsi_free_host_cmd_pool(pool);
409 hostt->cmd_pool = NULL;
410 }
411 }
412 mutex_unlock(&host_cmd_pool_mutex);
413}
414
415/**
416 * scsi_setup_command_freelist - Setup the command freelist for a scsi host.
417 * @shost: host to allocate the freelist for.
418 *
419 * Description: The command freelist protects against system-wide out of memory
420 * deadlock by preallocating one SCSI command structure for each host, so the
421 * system can always write to a swap file on a device associated with that host.
422 *
423 * Returns: Nothing.
424 */
425int scsi_setup_command_freelist(struct Scsi_Host *shost)
426{
427 const gfp_t gfp_mask = shost->unchecked_isa_dma ? GFP_DMA : GFP_KERNEL;
428 struct scsi_cmnd *cmd;
429
430 spin_lock_init(&shost->free_list_lock);
431 INIT_LIST_HEAD(&shost->free_list);
432
433 shost->cmd_pool = scsi_get_host_cmd_pool(shost);
434 if (!shost->cmd_pool)
435 return -ENOMEM;
436
437 /*
438 * Get one backup command for this host.
439 */
440 cmd = scsi_host_alloc_command(shost, gfp_mask);
441 if (!cmd) {
442 scsi_put_host_cmd_pool(shost);
443 shost->cmd_pool = NULL;
444 return -ENOMEM;
445 }
446 list_add(&cmd->list, &shost->free_list);
447 return 0;
448}
449
450/**
451 * scsi_destroy_command_freelist - Release the command freelist for a scsi host.
452 * @shost: host whose freelist is going to be destroyed
453 */
454void scsi_destroy_command_freelist(struct Scsi_Host *shost)
455{
456 /*
457 * If cmd_pool is NULL the free list was not initialized, so
458 * do not attempt to release resources.
459 */
460 if (!shost->cmd_pool)
461 return;
462
463 while (!list_empty(&shost->free_list)) {
464 struct scsi_cmnd *cmd;
465
466 cmd = list_entry(shost->free_list.next, struct scsi_cmnd, list);
467 list_del_init(&cmd->list);
468 scsi_host_free_command(shost, cmd);
469 }
470 shost->cmd_pool = NULL;
471 scsi_put_host_cmd_pool(shost);
472} 120}
473 121
474#ifdef CONFIG_SCSI_LOGGING 122#ifdef CONFIG_SCSI_LOGGING
@@ -590,7 +238,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
590 "(result %x)\n", cmd->result)); 238 "(result %x)\n", cmd->result));
591 239
592 good_bytes = scsi_bufflen(cmd); 240 good_bytes = scsi_bufflen(cmd);
593 if (cmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { 241 if (!blk_rq_is_passthrough(cmd->request)) {
594 int old_good_bytes = good_bytes; 242 int old_good_bytes = good_bytes;
595 drv = scsi_cmd_to_driver(cmd); 243 drv = scsi_cmd_to_driver(cmd);
596 if (drv->done) 244 if (drv->done)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 996e134d79fa..9e82fa5715bc 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1106,7 +1106,7 @@ static int scsi_request_sense(struct scsi_cmnd *scmd)
1106 1106
1107static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn) 1107static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
1108{ 1108{
1109 if (scmd->request->cmd_type != REQ_TYPE_BLOCK_PC) { 1109 if (!blk_rq_is_passthrough(scmd->request)) {
1110 struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); 1110 struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
1111 if (sdrv->eh_action) 1111 if (sdrv->eh_action)
1112 rtn = sdrv->eh_action(scmd, rtn); 1112 rtn = sdrv->eh_action(scmd, rtn);
@@ -1746,7 +1746,7 @@ check_type:
1746 * the check condition was retryable. 1746 * the check condition was retryable.
1747 */ 1747 */
1748 if (scmd->request->cmd_flags & REQ_FAILFAST_DEV || 1748 if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
1749 scmd->request->cmd_type == REQ_TYPE_BLOCK_PC) 1749 blk_rq_is_passthrough(scmd->request))
1750 return 1; 1750 return 1;
1751 else 1751 else
1752 return 0; 1752 return 0;
@@ -1968,25 +1968,25 @@ static void eh_lock_door_done(struct request *req, int uptodate)
1968static void scsi_eh_lock_door(struct scsi_device *sdev) 1968static void scsi_eh_lock_door(struct scsi_device *sdev)
1969{ 1969{
1970 struct request *req; 1970 struct request *req;
1971 struct scsi_request *rq;
1971 1972
1972 /* 1973 /*
1973 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a 1974 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
1974 * request becomes available 1975 * request becomes available
1975 */ 1976 */
1976 req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL); 1977 req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, GFP_KERNEL);
1977 if (IS_ERR(req)) 1978 if (IS_ERR(req))
1978 return; 1979 return;
1980 rq = scsi_req(req);
1981 scsi_req_init(req);
1979 1982
1980 blk_rq_set_block_pc(req); 1983 rq->cmd[0] = ALLOW_MEDIUM_REMOVAL;
1981 1984 rq->cmd[1] = 0;
1982 req->cmd[0] = ALLOW_MEDIUM_REMOVAL; 1985 rq->cmd[2] = 0;
1983 req->cmd[1] = 0; 1986 rq->cmd[3] = 0;
1984 req->cmd[2] = 0; 1987 rq->cmd[4] = SCSI_REMOVAL_PREVENT;
1985 req->cmd[3] = 0; 1988 rq->cmd[5] = 0;
1986 req->cmd[4] = SCSI_REMOVAL_PREVENT; 1989 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
1987 req->cmd[5] = 0;
1988
1989 req->cmd_len = COMMAND_SIZE(req->cmd[0]);
1990 1990
1991 req->rq_flags |= RQF_QUIET; 1991 req->rq_flags |= RQF_QUIET;
1992 req->timeout = 10 * HZ; 1992 req->timeout = 10 * HZ;
@@ -2331,7 +2331,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
2331{ 2331{
2332 struct scsi_cmnd *scmd; 2332 struct scsi_cmnd *scmd;
2333 struct Scsi_Host *shost = dev->host; 2333 struct Scsi_Host *shost = dev->host;
2334 struct request req; 2334 struct request *rq;
2335 unsigned long flags; 2335 unsigned long flags;
2336 int error = 0, rtn, val; 2336 int error = 0, rtn, val;
2337 2337
@@ -2346,14 +2346,16 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
2346 return -EIO; 2346 return -EIO;
2347 2347
2348 error = -EIO; 2348 error = -EIO;
2349 scmd = scsi_get_command(dev, GFP_KERNEL); 2349 rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) +
2350 if (!scmd) 2350 shost->hostt->cmd_size, GFP_KERNEL);
2351 if (!rq)
2351 goto out_put_autopm_host; 2352 goto out_put_autopm_host;
2353 blk_rq_init(NULL, rq);
2352 2354
2353 blk_rq_init(NULL, &req); 2355 scmd = (struct scsi_cmnd *)(rq + 1);
2354 scmd->request = &req; 2356 scsi_init_command(dev, scmd);
2355 2357 scmd->request = rq;
2356 scmd->cmnd = req.cmd; 2358 scmd->cmnd = scsi_req(rq)->cmd;
2357 2359
2358 scmd->scsi_done = scsi_reset_provider_done_command; 2360 scmd->scsi_done = scsi_reset_provider_done_command;
2359 memset(&scmd->sdb, 0, sizeof(scmd->sdb)); 2361 memset(&scmd->sdb, 0, sizeof(scmd->sdb));
@@ -2413,6 +2415,7 @@ scsi_ioctl_reset(struct scsi_device *dev, int __user *arg)
2413 scsi_run_host_queues(shost); 2415 scsi_run_host_queues(shost);
2414 2416
2415 scsi_put_command(scmd); 2417 scsi_put_command(scmd);
2418 kfree(rq);
2416 2419
2417out_put_autopm_host: 2420out_put_autopm_host:
2418 scsi_autopm_put_host(shost); 2421 scsi_autopm_put_host(shost);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 78db07fd8055..912fbc3b4543 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -37,8 +37,59 @@
37#include "scsi_priv.h" 37#include "scsi_priv.h"
38#include "scsi_logging.h" 38#include "scsi_logging.h"
39 39
40static struct kmem_cache *scsi_sdb_cache;
41static struct kmem_cache *scsi_sense_cache;
42static struct kmem_cache *scsi_sense_isadma_cache;
43static DEFINE_MUTEX(scsi_sense_cache_mutex);
40 44
41struct kmem_cache *scsi_sdb_cache; 45static inline struct kmem_cache *
46scsi_select_sense_cache(struct Scsi_Host *shost)
47{
48 return shost->unchecked_isa_dma ?
49 scsi_sense_isadma_cache : scsi_sense_cache;
50}
51
52static void scsi_free_sense_buffer(struct Scsi_Host *shost,
53 unsigned char *sense_buffer)
54{
55 kmem_cache_free(scsi_select_sense_cache(shost), sense_buffer);
56}
57
58static unsigned char *scsi_alloc_sense_buffer(struct Scsi_Host *shost,
59 gfp_t gfp_mask, int numa_node)
60{
61 return kmem_cache_alloc_node(scsi_select_sense_cache(shost), gfp_mask,
62 numa_node);
63}
64
65int scsi_init_sense_cache(struct Scsi_Host *shost)
66{
67 struct kmem_cache *cache;
68 int ret = 0;
69
70 cache = scsi_select_sense_cache(shost);
71 if (cache)
72 return 0;
73
74 mutex_lock(&scsi_sense_cache_mutex);
75 if (shost->unchecked_isa_dma) {
76 scsi_sense_isadma_cache =
77 kmem_cache_create("scsi_sense_cache(DMA)",
78 SCSI_SENSE_BUFFERSIZE, 0,
79 SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL);
80 if (!scsi_sense_isadma_cache)
81 ret = -ENOMEM;
82 } else {
83 scsi_sense_cache =
84 kmem_cache_create("scsi_sense_cache",
85 SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
86 if (!scsi_sense_cache)
87 ret = -ENOMEM;
88 }
89
90 mutex_unlock(&scsi_sense_cache_mutex);
91 return ret;
92}
42 93
43/* 94/*
44 * When to reinvoke queueing after a resource shortage. It's 3 msecs to 95 * When to reinvoke queueing after a resource shortage. It's 3 msecs to
@@ -168,22 +219,23 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
168 req_flags_t rq_flags, int *resid) 219 req_flags_t rq_flags, int *resid)
169{ 220{
170 struct request *req; 221 struct request *req;
171 int write = (data_direction == DMA_TO_DEVICE); 222 struct scsi_request *rq;
172 int ret = DRIVER_ERROR << 24; 223 int ret = DRIVER_ERROR << 24;
173 224
174 req = blk_get_request(sdev->request_queue, write, __GFP_RECLAIM); 225 req = blk_get_request(sdev->request_queue,
226 data_direction == DMA_TO_DEVICE ?
227 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
175 if (IS_ERR(req)) 228 if (IS_ERR(req))
176 return ret; 229 return ret;
177 blk_rq_set_block_pc(req); 230 rq = scsi_req(req);
231 scsi_req_init(req);
178 232
179 if (bufflen && blk_rq_map_kern(sdev->request_queue, req, 233 if (bufflen && blk_rq_map_kern(sdev->request_queue, req,
180 buffer, bufflen, __GFP_RECLAIM)) 234 buffer, bufflen, __GFP_RECLAIM))
181 goto out; 235 goto out;
182 236
183 req->cmd_len = COMMAND_SIZE(cmd[0]); 237 rq->cmd_len = COMMAND_SIZE(cmd[0]);
184 memcpy(req->cmd, cmd, req->cmd_len); 238 memcpy(rq->cmd, cmd, rq->cmd_len);
185 req->sense = sense;
186 req->sense_len = 0;
187 req->retries = retries; 239 req->retries = retries;
188 req->timeout = timeout; 240 req->timeout = timeout;
189 req->cmd_flags |= flags; 241 req->cmd_flags |= flags;
@@ -200,11 +252,13 @@ static int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
200 * is invalid. Prevent the garbage from being misinterpreted 252 * is invalid. Prevent the garbage from being misinterpreted
201 * and prevent security leaks by zeroing out the excess data. 253 * and prevent security leaks by zeroing out the excess data.
202 */ 254 */
203 if (unlikely(req->resid_len > 0 && req->resid_len <= bufflen)) 255 if (unlikely(rq->resid_len > 0 && rq->resid_len <= bufflen))
204 memset(buffer + (bufflen - req->resid_len), 0, req->resid_len); 256 memset(buffer + (bufflen - rq->resid_len), 0, rq->resid_len);
205 257
206 if (resid) 258 if (resid)
207 *resid = req->resid_len; 259 *resid = rq->resid_len;
260 if (sense && rq->sense_len)
261 memcpy(sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
208 ret = req->errors; 262 ret = req->errors;
209 out: 263 out:
210 blk_put_request(req); 264 blk_put_request(req);
@@ -529,7 +583,7 @@ void scsi_run_host_queues(struct Scsi_Host *shost)
529 583
530static void scsi_uninit_cmd(struct scsi_cmnd *cmd) 584static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
531{ 585{
532 if (cmd->request->cmd_type == REQ_TYPE_FS) { 586 if (!blk_rq_is_passthrough(cmd->request)) {
533 struct scsi_driver *drv = scsi_cmd_to_driver(cmd); 587 struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
534 588
535 if (drv->uninit_command) 589 if (drv->uninit_command)
@@ -645,14 +699,13 @@ static bool scsi_end_request(struct request *req, int error,
645 699
646 if (bidi_bytes) 700 if (bidi_bytes)
647 scsi_release_bidi_buffers(cmd); 701 scsi_release_bidi_buffers(cmd);
702 scsi_release_buffers(cmd);
703 scsi_put_command(cmd);
648 704
649 spin_lock_irqsave(q->queue_lock, flags); 705 spin_lock_irqsave(q->queue_lock, flags);
650 blk_finish_request(req, error); 706 blk_finish_request(req, error);
651 spin_unlock_irqrestore(q->queue_lock, flags); 707 spin_unlock_irqrestore(q->queue_lock, flags);
652 708
653 scsi_release_buffers(cmd);
654
655 scsi_put_command(cmd);
656 scsi_run_queue(q); 709 scsi_run_queue(q);
657 } 710 }
658 711
@@ -754,18 +807,15 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
754 sense_deferred = scsi_sense_is_deferred(&sshdr); 807 sense_deferred = scsi_sense_is_deferred(&sshdr);
755 } 808 }
756 809
757 if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */ 810 if (blk_rq_is_passthrough(req)) {
758 if (result) { 811 if (result) {
759 if (sense_valid && req->sense) { 812 if (sense_valid) {
760 /* 813 /*
761 * SG_IO wants current and deferred errors 814 * SG_IO wants current and deferred errors
762 */ 815 */
763 int len = 8 + cmd->sense_buffer[7]; 816 scsi_req(req)->sense_len =
764 817 min(8 + cmd->sense_buffer[7],
765 if (len > SCSI_SENSE_BUFFERSIZE) 818 SCSI_SENSE_BUFFERSIZE);
766 len = SCSI_SENSE_BUFFERSIZE;
767 memcpy(req->sense, cmd->sense_buffer, len);
768 req->sense_len = len;
769 } 819 }
770 if (!sense_deferred) 820 if (!sense_deferred)
771 error = __scsi_error_from_host_byte(cmd, result); 821 error = __scsi_error_from_host_byte(cmd, result);
@@ -775,14 +825,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
775 */ 825 */
776 req->errors = cmd->result; 826 req->errors = cmd->result;
777 827
778 req->resid_len = scsi_get_resid(cmd); 828 scsi_req(req)->resid_len = scsi_get_resid(cmd);
779 829
780 if (scsi_bidi_cmnd(cmd)) { 830 if (scsi_bidi_cmnd(cmd)) {
781 /* 831 /*
782 * Bidi commands Must be complete as a whole, 832 * Bidi commands Must be complete as a whole,
783 * both sides at once. 833 * both sides at once.
784 */ 834 */
785 req->next_rq->resid_len = scsi_in(cmd)->resid; 835 scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
786 if (scsi_end_request(req, 0, blk_rq_bytes(req), 836 if (scsi_end_request(req, 0, blk_rq_bytes(req),
787 blk_rq_bytes(req->next_rq))) 837 blk_rq_bytes(req->next_rq)))
788 BUG(); 838 BUG();
@@ -790,15 +840,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
790 } 840 }
791 } else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) { 841 } else if (blk_rq_bytes(req) == 0 && result && !sense_deferred) {
792 /* 842 /*
793 * Certain non BLOCK_PC requests are commands that don't 843 * Flush commands do not transfers any data, and thus cannot use
794 * actually transfer anything (FLUSH), so cannot use
795 * good_bytes != blk_rq_bytes(req) as the signal for an error. 844 * good_bytes != blk_rq_bytes(req) as the signal for an error.
796 * This sets the error explicitly for the problem case. 845 * This sets the error explicitly for the problem case.
797 */ 846 */
798 error = __scsi_error_from_host_byte(cmd, result); 847 error = __scsi_error_from_host_byte(cmd, result);
799 } 848 }
800 849
801 /* no bidi support for !REQ_TYPE_BLOCK_PC yet */ 850 /* no bidi support for !blk_rq_is_passthrough yet */
802 BUG_ON(blk_bidi_rq(req)); 851 BUG_ON(blk_bidi_rq(req));
803 852
804 /* 853 /*
@@ -810,8 +859,8 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
810 blk_rq_sectors(req), good_bytes)); 859 blk_rq_sectors(req), good_bytes));
811 860
812 /* 861 /*
813 * Recovered errors need reporting, but they're always treated 862 * Recovered errors need reporting, but they're always treated as
814 * as success, so fiddle the result code here. For BLOCK_PC 863 * success, so fiddle the result code here. For passthrough requests
815 * we already took a copy of the original into rq->errors which 864 * we already took a copy of the original into rq->errors which
816 * is what gets returned to the user 865 * is what gets returned to the user
817 */ 866 */
@@ -825,7 +874,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
825 else if (!(req->rq_flags & RQF_QUIET)) 874 else if (!(req->rq_flags & RQF_QUIET))
826 scsi_print_sense(cmd); 875 scsi_print_sense(cmd);
827 result = 0; 876 result = 0;
828 /* BLOCK_PC may have set error */ 877 /* for passthrough error may be set */
829 error = 0; 878 error = 0;
830 } 879 }
831 880
@@ -1110,42 +1159,33 @@ err_exit:
1110} 1159}
1111EXPORT_SYMBOL(scsi_init_io); 1160EXPORT_SYMBOL(scsi_init_io);
1112 1161
1113static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, 1162void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
1114 struct request *req)
1115{ 1163{
1116 struct scsi_cmnd *cmd; 1164 void *buf = cmd->sense_buffer;
1117 1165 void *prot = cmd->prot_sdb;
1118 if (!req->special) { 1166 unsigned long flags;
1119 /* Bail if we can't get a reference to the device */
1120 if (!get_device(&sdev->sdev_gendev))
1121 return NULL;
1122
1123 cmd = scsi_get_command(sdev, GFP_ATOMIC);
1124 if (unlikely(!cmd)) {
1125 put_device(&sdev->sdev_gendev);
1126 return NULL;
1127 }
1128 req->special = cmd;
1129 } else {
1130 cmd = req->special;
1131 }
1132 1167
1133 /* pull a tag out of the request if we have one */ 1168 /* zero out the cmd, except for the embedded scsi_request */
1134 cmd->tag = req->tag; 1169 memset((char *)cmd + sizeof(cmd->req), 0,
1135 cmd->request = req; 1170 sizeof(*cmd) - sizeof(cmd->req));
1136 1171
1137 cmd->cmnd = req->cmd; 1172 cmd->device = dev;
1138 cmd->prot_op = SCSI_PROT_NORMAL; 1173 cmd->sense_buffer = buf;
1174 cmd->prot_sdb = prot;
1175 INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
1176 cmd->jiffies_at_alloc = jiffies;
1139 1177
1140 return cmd; 1178 spin_lock_irqsave(&dev->list_lock, flags);
1179 list_add_tail(&cmd->list, &dev->cmd_list);
1180 spin_unlock_irqrestore(&dev->list_lock, flags);
1141} 1181}
1142 1182
1143static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) 1183static int scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req)
1144{ 1184{
1145 struct scsi_cmnd *cmd = req->special; 1185 struct scsi_cmnd *cmd = req->special;
1146 1186
1147 /* 1187 /*
1148 * BLOCK_PC requests may transfer data, in which case they must 1188 * Passthrough requests may transfer data, in which case they must
1149 * a bio attached to them. Or they might contain a SCSI command 1189 * a bio attached to them. Or they might contain a SCSI command
1150 * that does not transfer data, in which case they may optionally 1190 * that does not transfer data, in which case they may optionally
1151 * submit a request without an attached bio. 1191 * submit a request without an attached bio.
@@ -1160,14 +1200,15 @@ static int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
1160 memset(&cmd->sdb, 0, sizeof(cmd->sdb)); 1200 memset(&cmd->sdb, 0, sizeof(cmd->sdb));
1161 } 1201 }
1162 1202
1163 cmd->cmd_len = req->cmd_len; 1203 cmd->cmd_len = scsi_req(req)->cmd_len;
1204 cmd->cmnd = scsi_req(req)->cmd;
1164 cmd->transfersize = blk_rq_bytes(req); 1205 cmd->transfersize = blk_rq_bytes(req);
1165 cmd->allowed = req->retries; 1206 cmd->allowed = req->retries;
1166 return BLKPREP_OK; 1207 return BLKPREP_OK;
1167} 1208}
1168 1209
1169/* 1210/*
1170 * Setup a REQ_TYPE_FS command. These are simple request from filesystems 1211 * Setup a normal block command. These are simple request from filesystems
1171 * that still need to be translated to SCSI CDBs from the ULD. 1212 * that still need to be translated to SCSI CDBs from the ULD.
1172 */ 1213 */
1173static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) 1214static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
@@ -1180,6 +1221,7 @@ static int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
1180 return ret; 1221 return ret;
1181 } 1222 }
1182 1223
1224 cmd->cmnd = scsi_req(req)->cmd = scsi_req(req)->__cmd;
1183 memset(cmd->cmnd, 0, BLK_MAX_CDB); 1225 memset(cmd->cmnd, 0, BLK_MAX_CDB);
1184 return scsi_cmd_to_driver(cmd)->init_command(cmd); 1226 return scsi_cmd_to_driver(cmd)->init_command(cmd);
1185} 1227}
@@ -1195,14 +1237,10 @@ static int scsi_setup_cmnd(struct scsi_device *sdev, struct request *req)
1195 else 1237 else
1196 cmd->sc_data_direction = DMA_FROM_DEVICE; 1238 cmd->sc_data_direction = DMA_FROM_DEVICE;
1197 1239
1198 switch (req->cmd_type) { 1240 if (blk_rq_is_scsi(req))
1199 case REQ_TYPE_FS: 1241 return scsi_setup_scsi_cmnd(sdev, req);
1242 else
1200 return scsi_setup_fs_cmnd(sdev, req); 1243 return scsi_setup_fs_cmnd(sdev, req);
1201 case REQ_TYPE_BLOCK_PC:
1202 return scsi_setup_blk_pc_cmnd(sdev, req);
1203 default:
1204 return BLKPREP_KILL;
1205 }
1206} 1244}
1207 1245
1208static int 1246static int
@@ -1298,19 +1336,28 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
1298static int scsi_prep_fn(struct request_queue *q, struct request *req) 1336static int scsi_prep_fn(struct request_queue *q, struct request *req)
1299{ 1337{
1300 struct scsi_device *sdev = q->queuedata; 1338 struct scsi_device *sdev = q->queuedata;
1301 struct scsi_cmnd *cmd; 1339 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1302 int ret; 1340 int ret;
1303 1341
1304 ret = scsi_prep_state_check(sdev, req); 1342 ret = scsi_prep_state_check(sdev, req);
1305 if (ret != BLKPREP_OK) 1343 if (ret != BLKPREP_OK)
1306 goto out; 1344 goto out;
1307 1345
1308 cmd = scsi_get_cmd_from_req(sdev, req); 1346 if (!req->special) {
1309 if (unlikely(!cmd)) { 1347 /* Bail if we can't get a reference to the device */
1310 ret = BLKPREP_DEFER; 1348 if (unlikely(!get_device(&sdev->sdev_gendev))) {
1311 goto out; 1349 ret = BLKPREP_DEFER;
1350 goto out;
1351 }
1352
1353 scsi_init_command(sdev, cmd);
1354 req->special = cmd;
1312 } 1355 }
1313 1356
1357 cmd->tag = req->tag;
1358 cmd->request = req;
1359 cmd->prot_op = SCSI_PROT_NORMAL;
1360
1314 ret = scsi_setup_cmnd(sdev, req); 1361 ret = scsi_setup_cmnd(sdev, req);
1315out: 1362out:
1316 return scsi_prep_return(q, req, ret); 1363 return scsi_prep_return(q, req, ret);
@@ -1827,7 +1874,9 @@ static int scsi_mq_prep_fn(struct request *req)
1827 unsigned char *sense_buf = cmd->sense_buffer; 1874 unsigned char *sense_buf = cmd->sense_buffer;
1828 struct scatterlist *sg; 1875 struct scatterlist *sg;
1829 1876
1830 memset(cmd, 0, sizeof(struct scsi_cmnd)); 1877 /* zero out the cmd, except for the embedded scsi_request */
1878 memset((char *)cmd + sizeof(cmd->req), 0,
1879 sizeof(*cmd) - sizeof(cmd->req));
1831 1880
1832 req->special = cmd; 1881 req->special = cmd;
1833 1882
@@ -1837,7 +1886,6 @@ static int scsi_mq_prep_fn(struct request *req)
1837 1886
1838 cmd->tag = req->tag; 1887 cmd->tag = req->tag;
1839 1888
1840 cmd->cmnd = req->cmd;
1841 cmd->prot_op = SCSI_PROT_NORMAL; 1889 cmd->prot_op = SCSI_PROT_NORMAL;
1842 1890
1843 INIT_LIST_HEAD(&cmd->list); 1891 INIT_LIST_HEAD(&cmd->list);
@@ -1912,7 +1960,6 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1912 if (!scsi_host_queue_ready(q, shost, sdev)) 1960 if (!scsi_host_queue_ready(q, shost, sdev))
1913 goto out_dec_target_busy; 1961 goto out_dec_target_busy;
1914 1962
1915
1916 if (!(req->rq_flags & RQF_DONTPREP)) { 1963 if (!(req->rq_flags & RQF_DONTPREP)) {
1917 ret = prep_to_mq(scsi_mq_prep_fn(req)); 1964 ret = prep_to_mq(scsi_mq_prep_fn(req));
1918 if (ret != BLK_MQ_RQ_QUEUE_OK) 1965 if (ret != BLK_MQ_RQ_QUEUE_OK)
@@ -1982,21 +2029,24 @@ static int scsi_init_request(void *data, struct request *rq,
1982 unsigned int hctx_idx, unsigned int request_idx, 2029 unsigned int hctx_idx, unsigned int request_idx,
1983 unsigned int numa_node) 2030 unsigned int numa_node)
1984{ 2031{
2032 struct Scsi_Host *shost = data;
1985 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); 2033 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1986 2034
1987 cmd->sense_buffer = kzalloc_node(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL, 2035 cmd->sense_buffer =
1988 numa_node); 2036 scsi_alloc_sense_buffer(shost, GFP_KERNEL, numa_node);
1989 if (!cmd->sense_buffer) 2037 if (!cmd->sense_buffer)
1990 return -ENOMEM; 2038 return -ENOMEM;
2039 cmd->req.sense = cmd->sense_buffer;
1991 return 0; 2040 return 0;
1992} 2041}
1993 2042
1994static void scsi_exit_request(void *data, struct request *rq, 2043static void scsi_exit_request(void *data, struct request *rq,
1995 unsigned int hctx_idx, unsigned int request_idx) 2044 unsigned int hctx_idx, unsigned int request_idx)
1996{ 2045{
2046 struct Scsi_Host *shost = data;
1997 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); 2047 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1998 2048
1999 kfree(cmd->sense_buffer); 2049 scsi_free_sense_buffer(shost, cmd->sense_buffer);
2000} 2050}
2001 2051
2002static int scsi_map_queues(struct blk_mq_tag_set *set) 2052static int scsi_map_queues(struct blk_mq_tag_set *set)
@@ -2029,7 +2079,7 @@ static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
2029 return bounce_limit; 2079 return bounce_limit;
2030} 2080}
2031 2081
2032static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) 2082void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
2033{ 2083{
2034 struct device *dev = shost->dma_dev; 2084 struct device *dev = shost->dma_dev;
2035 2085
@@ -2064,28 +2114,64 @@ static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
2064 */ 2114 */
2065 blk_queue_dma_alignment(q, 0x03); 2115 blk_queue_dma_alignment(q, 0x03);
2066} 2116}
2117EXPORT_SYMBOL_GPL(__scsi_init_queue);
2067 2118
2068struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, 2119static int scsi_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
2069 request_fn_proc *request_fn)
2070{ 2120{
2071 struct request_queue *q; 2121 struct Scsi_Host *shost = q->rq_alloc_data;
2122 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
2072 2123
2073 q = blk_init_queue(request_fn, NULL); 2124 memset(cmd, 0, sizeof(*cmd));
2074 if (!q) 2125
2075 return NULL; 2126 cmd->sense_buffer = scsi_alloc_sense_buffer(shost, gfp, NUMA_NO_NODE);
2076 __scsi_init_queue(shost, q); 2127 if (!cmd->sense_buffer)
2077 return q; 2128 goto fail;
2129 cmd->req.sense = cmd->sense_buffer;
2130
2131 if (scsi_host_get_prot(shost) >= SHOST_DIX_TYPE0_PROTECTION) {
2132 cmd->prot_sdb = kmem_cache_zalloc(scsi_sdb_cache, gfp);
2133 if (!cmd->prot_sdb)
2134 goto fail_free_sense;
2135 }
2136
2137 return 0;
2138
2139fail_free_sense:
2140 scsi_free_sense_buffer(shost, cmd->sense_buffer);
2141fail:
2142 return -ENOMEM;
2143}
2144
2145static void scsi_exit_rq(struct request_queue *q, struct request *rq)
2146{
2147 struct Scsi_Host *shost = q->rq_alloc_data;
2148 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
2149
2150 if (cmd->prot_sdb)
2151 kmem_cache_free(scsi_sdb_cache, cmd->prot_sdb);
2152 scsi_free_sense_buffer(shost, cmd->sense_buffer);
2078} 2153}
2079EXPORT_SYMBOL(__scsi_alloc_queue);
2080 2154
2081struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) 2155struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
2082{ 2156{
2157 struct Scsi_Host *shost = sdev->host;
2083 struct request_queue *q; 2158 struct request_queue *q;
2084 2159
2085 q = __scsi_alloc_queue(sdev->host, scsi_request_fn); 2160 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
2086 if (!q) 2161 if (!q)
2087 return NULL; 2162 return NULL;
2163 q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
2164 q->rq_alloc_data = shost;
2165 q->request_fn = scsi_request_fn;
2166 q->init_rq_fn = scsi_init_rq;
2167 q->exit_rq_fn = scsi_exit_rq;
2168
2169 if (blk_init_allocated_queue(q) < 0) {
2170 blk_cleanup_queue(q);
2171 return NULL;
2172 }
2088 2173
2174 __scsi_init_queue(shost, q);
2089 blk_queue_prep_rq(q, scsi_prep_fn); 2175 blk_queue_prep_rq(q, scsi_prep_fn);
2090 blk_queue_unprep_rq(q, scsi_unprep_fn); 2176 blk_queue_unprep_rq(q, scsi_unprep_fn);
2091 blk_queue_softirq_done(q, scsi_softirq_done); 2177 blk_queue_softirq_done(q, scsi_softirq_done);
@@ -2209,6 +2295,8 @@ int __init scsi_init_queue(void)
2209 2295
2210void scsi_exit_queue(void) 2296void scsi_exit_queue(void)
2211{ 2297{
2298 kmem_cache_destroy(scsi_sense_cache);
2299 kmem_cache_destroy(scsi_sense_isadma_cache);
2212 kmem_cache_destroy(scsi_sdb_cache); 2300 kmem_cache_destroy(scsi_sdb_cache);
2213} 2301}
2214 2302
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 193636a59adf..99bfc985e190 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -30,8 +30,8 @@ extern void scsi_exit_hosts(void);
30 30
31/* scsi.c */ 31/* scsi.c */
32extern bool scsi_use_blk_mq; 32extern bool scsi_use_blk_mq;
33extern int scsi_setup_command_freelist(struct Scsi_Host *shost); 33int scsi_init_sense_cache(struct Scsi_Host *shost);
34extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); 34void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd);
35#ifdef CONFIG_SCSI_LOGGING 35#ifdef CONFIG_SCSI_LOGGING
36void scsi_log_send(struct scsi_cmnd *cmd); 36void scsi_log_send(struct scsi_cmnd *cmd);
37void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); 37void scsi_log_completion(struct scsi_cmnd *cmd, int disposition);
@@ -96,7 +96,6 @@ extern void scsi_exit_queue(void);
96extern void scsi_evt_thread(struct work_struct *work); 96extern void scsi_evt_thread(struct work_struct *work);
97struct request_queue; 97struct request_queue;
98struct request; 98struct request;
99extern struct kmem_cache *scsi_sdb_cache;
100 99
101/* scsi_proc.c */ 100/* scsi_proc.c */
102#ifdef CONFIG_SCSI_PROC_FS 101#ifdef CONFIG_SCSI_PROC_FS
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 03577bde6ac5..13dcb9ba823c 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3765,7 +3765,6 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
3765 struct device *dev = &shost->shost_gendev; 3765 struct device *dev = &shost->shost_gendev;
3766 struct fc_internal *i = to_fc_internal(shost->transportt); 3766 struct fc_internal *i = to_fc_internal(shost->transportt);
3767 struct request_queue *q; 3767 struct request_queue *q;
3768 int err;
3769 char bsg_name[20]; 3768 char bsg_name[20];
3770 3769
3771 fc_host->rqst_q = NULL; 3770 fc_host->rqst_q = NULL;
@@ -3776,23 +3775,14 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct fc_host_attrs *fc_host)
3776 snprintf(bsg_name, sizeof(bsg_name), 3775 snprintf(bsg_name, sizeof(bsg_name),
3777 "fc_host%d", shost->host_no); 3776 "fc_host%d", shost->host_no);
3778 3777
3779 q = __scsi_alloc_queue(shost, bsg_request_fn); 3778 q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size);
3780 if (!q) { 3779 if (IS_ERR(q)) {
3781 dev_err(dev,
3782 "fc_host%d: bsg interface failed to initialize - no request queue\n",
3783 shost->host_no);
3784 return -ENOMEM;
3785 }
3786
3787 err = bsg_setup_queue(dev, q, bsg_name, fc_bsg_dispatch,
3788 i->f->dd_bsg_size);
3789 if (err) {
3790 dev_err(dev, 3780 dev_err(dev,
3791 "fc_host%d: bsg interface failed to initialize - setup queue\n", 3781 "fc_host%d: bsg interface failed to initialize - setup queue\n",
3792 shost->host_no); 3782 shost->host_no);
3793 blk_cleanup_queue(q); 3783 return PTR_ERR(q);
3794 return err;
3795 } 3784 }
3785 __scsi_init_queue(shost, q);
3796 blk_queue_rq_timed_out(q, fc_bsg_job_timeout); 3786 blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
3797 blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT); 3787 blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);
3798 fc_host->rqst_q = q; 3788 fc_host->rqst_q = q;
@@ -3824,26 +3814,18 @@ fc_bsg_rportadd(struct Scsi_Host *shost, struct fc_rport *rport)
3824 struct device *dev = &rport->dev; 3814 struct device *dev = &rport->dev;
3825 struct fc_internal *i = to_fc_internal(shost->transportt); 3815 struct fc_internal *i = to_fc_internal(shost->transportt);
3826 struct request_queue *q; 3816 struct request_queue *q;
3827 int err;
3828 3817
3829 rport->rqst_q = NULL; 3818 rport->rqst_q = NULL;
3830 3819
3831 if (!i->f->bsg_request) 3820 if (!i->f->bsg_request)
3832 return -ENOTSUPP; 3821 return -ENOTSUPP;
3833 3822
3834 q = __scsi_alloc_queue(shost, bsg_request_fn); 3823 q = bsg_setup_queue(dev, NULL, fc_bsg_dispatch, i->f->dd_bsg_size);
3835 if (!q) { 3824 if (IS_ERR(q)) {
3836 dev_err(dev, "bsg interface failed to initialize - no request queue\n");
3837 return -ENOMEM;
3838 }
3839
3840 err = bsg_setup_queue(dev, q, NULL, fc_bsg_dispatch, i->f->dd_bsg_size);
3841 if (err) {
3842 dev_err(dev, "failed to setup bsg queue\n"); 3825 dev_err(dev, "failed to setup bsg queue\n");
3843 blk_cleanup_queue(q); 3826 return PTR_ERR(q);
3844 return err;
3845 } 3827 }
3846 3828 __scsi_init_queue(shost, q);
3847 blk_queue_prep_rq(q, fc_bsg_rport_prep); 3829 blk_queue_prep_rq(q, fc_bsg_rport_prep);
3848 blk_queue_rq_timed_out(q, fc_bsg_job_timeout); 3830 blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
3849 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); 3831 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 42bca619f854..568c9f26a561 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1537,24 +1537,18 @@ iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost)
1537 struct iscsi_internal *i = to_iscsi_internal(shost->transportt); 1537 struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
1538 struct request_queue *q; 1538 struct request_queue *q;
1539 char bsg_name[20]; 1539 char bsg_name[20];
1540 int ret;
1541 1540
1542 if (!i->iscsi_transport->bsg_request) 1541 if (!i->iscsi_transport->bsg_request)
1543 return -ENOTSUPP; 1542 return -ENOTSUPP;
1544 1543
1545 snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); 1544 snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
1546 1545 q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0);
1547 q = __scsi_alloc_queue(shost, bsg_request_fn); 1546 if (IS_ERR(q)) {
1548 if (!q)
1549 return -ENOMEM;
1550
1551 ret = bsg_setup_queue(dev, q, bsg_name, iscsi_bsg_host_dispatch, 0);
1552 if (ret) {
1553 shost_printk(KERN_ERR, shost, "bsg interface failed to " 1547 shost_printk(KERN_ERR, shost, "bsg interface failed to "
1554 "initialize - no request queue\n"); 1548 "initialize - no request queue\n");
1555 blk_cleanup_queue(q); 1549 return PTR_ERR(q);
1556 return ret;
1557 } 1550 }
1551 __scsi_init_queue(shost, q);
1558 1552
1559 ihost->bsg_q = q; 1553 ihost->bsg_q = q;
1560 return 0; 1554 return 0;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 60b651bfaa01..126a5ee00987 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -33,6 +33,7 @@
33#include <linux/bsg.h> 33#include <linux/bsg.h>
34 34
35#include <scsi/scsi.h> 35#include <scsi/scsi.h>
36#include <scsi/scsi_request.h>
36#include <scsi/scsi_device.h> 37#include <scsi/scsi_device.h>
37#include <scsi/scsi_host.h> 38#include <scsi/scsi_host.h>
38#include <scsi/scsi_transport.h> 39#include <scsi/scsi_transport.h>
@@ -177,6 +178,10 @@ static void sas_smp_request(struct request_queue *q, struct Scsi_Host *shost,
177 while ((req = blk_fetch_request(q)) != NULL) { 178 while ((req = blk_fetch_request(q)) != NULL) {
178 spin_unlock_irq(q->queue_lock); 179 spin_unlock_irq(q->queue_lock);
179 180
181 scsi_req(req)->resid_len = blk_rq_bytes(req);
182 if (req->next_rq)
183 scsi_req(req->next_rq)->resid_len =
184 blk_rq_bytes(req->next_rq);
180 handler = to_sas_internal(shost->transportt)->f->smp_handler; 185 handler = to_sas_internal(shost->transportt)->f->smp_handler;
181 ret = handler(shost, rphy, req); 186 ret = handler(shost, rphy, req);
182 req->errors = ret; 187 req->errors = ret;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1f5d92a25a49..40b4038c019e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -781,7 +781,7 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
781 rq->special_vec.bv_len = len; 781 rq->special_vec.bv_len = len;
782 782
783 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; 783 rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
784 rq->resid_len = len; 784 scsi_req(rq)->resid_len = len;
785 785
786 ret = scsi_init_io(cmd); 786 ret = scsi_init_io(cmd);
787out: 787out:
@@ -1179,7 +1179,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
1179 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) 1179 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1180 __free_page(rq->special_vec.bv_page); 1180 __free_page(rq->special_vec.bv_page);
1181 1181
1182 if (SCpnt->cmnd != rq->cmd) { 1182 if (SCpnt->cmnd != scsi_req(rq)->cmd) {
1183 mempool_free(SCpnt->cmnd, sd_cdb_pool); 1183 mempool_free(SCpnt->cmnd, sd_cdb_pool);
1184 SCpnt->cmnd = NULL; 1184 SCpnt->cmnd = NULL;
1185 SCpnt->cmd_len = 0; 1185 SCpnt->cmd_len = 0;
@@ -1750,9 +1750,6 @@ static unsigned int sd_completed_bytes(struct scsi_cmnd *scmd)
1750 unsigned int transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd); 1750 unsigned int transferred = scsi_bufflen(scmd) - scsi_get_resid(scmd);
1751 unsigned int good_bytes; 1751 unsigned int good_bytes;
1752 1752
1753 if (scmd->request->cmd_type != REQ_TYPE_FS)
1754 return 0;
1755
1756 info_valid = scsi_get_sense_info_fld(scmd->sense_buffer, 1753 info_valid = scsi_get_sense_info_fld(scmd->sense_buffer,
1757 SCSI_SENSE_BUFFERSIZE, 1754 SCSI_SENSE_BUFFERSIZE,
1758 &bad_lba); 1755 &bad_lba);
@@ -3082,6 +3079,23 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
3082 put_device(&sdkp->dev); 3079 put_device(&sdkp->dev);
3083} 3080}
3084 3081
3082struct sd_devt {
3083 int idx;
3084 struct disk_devt disk_devt;
3085};
3086
3087void sd_devt_release(struct disk_devt *disk_devt)
3088{
3089 struct sd_devt *sd_devt = container_of(disk_devt, struct sd_devt,
3090 disk_devt);
3091
3092 spin_lock(&sd_index_lock);
3093 ida_remove(&sd_index_ida, sd_devt->idx);
3094 spin_unlock(&sd_index_lock);
3095
3096 kfree(sd_devt);
3097}
3098
3085/** 3099/**
3086 * sd_probe - called during driver initialization and whenever a 3100 * sd_probe - called during driver initialization and whenever a
3087 * new scsi device is attached to the system. It is called once 3101 * new scsi device is attached to the system. It is called once
@@ -3103,6 +3117,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
3103static int sd_probe(struct device *dev) 3117static int sd_probe(struct device *dev)
3104{ 3118{
3105 struct scsi_device *sdp = to_scsi_device(dev); 3119 struct scsi_device *sdp = to_scsi_device(dev);
3120 struct sd_devt *sd_devt;
3106 struct scsi_disk *sdkp; 3121 struct scsi_disk *sdkp;
3107 struct gendisk *gd; 3122 struct gendisk *gd;
3108 int index; 3123 int index;
@@ -3128,9 +3143,13 @@ static int sd_probe(struct device *dev)
3128 if (!sdkp) 3143 if (!sdkp)
3129 goto out; 3144 goto out;
3130 3145
3146 sd_devt = kzalloc(sizeof(*sd_devt), GFP_KERNEL);
3147 if (!sd_devt)
3148 goto out_free;
3149
3131 gd = alloc_disk(SD_MINORS); 3150 gd = alloc_disk(SD_MINORS);
3132 if (!gd) 3151 if (!gd)
3133 goto out_free; 3152 goto out_free_devt;
3134 3153
3135 do { 3154 do {
3136 if (!ida_pre_get(&sd_index_ida, GFP_KERNEL)) 3155 if (!ida_pre_get(&sd_index_ida, GFP_KERNEL))
@@ -3146,6 +3165,11 @@ static int sd_probe(struct device *dev)
3146 goto out_put; 3165 goto out_put;
3147 } 3166 }
3148 3167
3168 atomic_set(&sd_devt->disk_devt.count, 1);
3169 sd_devt->disk_devt.release = sd_devt_release;
3170 sd_devt->idx = index;
3171 gd->disk_devt = &sd_devt->disk_devt;
3172
3149 error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); 3173 error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
3150 if (error) { 3174 if (error) {
3151 sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n"); 3175 sdev_printk(KERN_WARNING, sdp, "SCSI disk (sd) name length exceeded.\n");
@@ -3185,13 +3209,14 @@ static int sd_probe(struct device *dev)
3185 return 0; 3209 return 0;
3186 3210
3187 out_free_index: 3211 out_free_index:
3188 spin_lock(&sd_index_lock); 3212 put_disk_devt(&sd_devt->disk_devt);
3189 ida_remove(&sd_index_ida, index); 3213 sd_devt = NULL;
3190 spin_unlock(&sd_index_lock);
3191 out_put: 3214 out_put:
3192 put_disk(gd); 3215 put_disk(gd);
3193 out_free: 3216 out_free:
3194 kfree(sdkp); 3217 kfree(sdkp);
3218 out_free_devt:
3219 kfree(sd_devt);
3195 out: 3220 out:
3196 scsi_autopm_put_device(sdp); 3221 scsi_autopm_put_device(sdp);
3197 return error; 3222 return error;
@@ -3250,10 +3275,7 @@ static void scsi_disk_release(struct device *dev)
3250 struct scsi_disk *sdkp = to_scsi_disk(dev); 3275 struct scsi_disk *sdkp = to_scsi_disk(dev);
3251 struct gendisk *disk = sdkp->disk; 3276 struct gendisk *disk = sdkp->disk;
3252 3277
3253 spin_lock(&sd_index_lock); 3278 put_disk_devt(disk->disk_devt);
3254 ida_remove(&sd_index_ida, sdkp->index);
3255 spin_unlock(&sd_index_lock);
3256
3257 disk->private_data = NULL; 3279 disk->private_data = NULL;
3258 put_disk(disk); 3280 put_disk(disk);
3259 put_device(&sdkp->device->sdev_gendev); 3281 put_device(&sdkp->device->sdev_gendev);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 121de0aaa6ad..e831e01f9fa6 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -781,9 +781,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
781 } 781 }
782 if (atomic_read(&sdp->detaching)) { 782 if (atomic_read(&sdp->detaching)) {
783 if (srp->bio) { 783 if (srp->bio) {
784 if (srp->rq->cmd != srp->rq->__cmd) 784 scsi_req_free_cmd(scsi_req(srp->rq));
785 kfree(srp->rq->cmd);
786
787 blk_end_request_all(srp->rq, -EIO); 785 blk_end_request_all(srp->rq, -EIO);
788 srp->rq = NULL; 786 srp->rq = NULL;
789 } 787 }
@@ -1279,6 +1277,7 @@ static void
1279sg_rq_end_io(struct request *rq, int uptodate) 1277sg_rq_end_io(struct request *rq, int uptodate)
1280{ 1278{
1281 struct sg_request *srp = rq->end_io_data; 1279 struct sg_request *srp = rq->end_io_data;
1280 struct scsi_request *req = scsi_req(rq);
1282 Sg_device *sdp; 1281 Sg_device *sdp;
1283 Sg_fd *sfp; 1282 Sg_fd *sfp;
1284 unsigned long iflags; 1283 unsigned long iflags;
@@ -1297,9 +1296,9 @@ sg_rq_end_io(struct request *rq, int uptodate)
1297 if (unlikely(atomic_read(&sdp->detaching))) 1296 if (unlikely(atomic_read(&sdp->detaching)))
1298 pr_info("%s: device detaching\n", __func__); 1297 pr_info("%s: device detaching\n", __func__);
1299 1298
1300 sense = rq->sense; 1299 sense = req->sense;
1301 result = rq->errors; 1300 result = rq->errors;
1302 resid = rq->resid_len; 1301 resid = req->resid_len;
1303 1302
1304 SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, 1303 SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp,
1305 "sg_cmd_done: pack_id=%d, res=0x%x\n", 1304 "sg_cmd_done: pack_id=%d, res=0x%x\n",
@@ -1333,6 +1332,10 @@ sg_rq_end_io(struct request *rq, int uptodate)
1333 sdp->device->changed = 1; 1332 sdp->device->changed = 1;
1334 } 1333 }
1335 } 1334 }
1335
1336 if (req->sense_len)
1337 memcpy(srp->sense_b, req->sense, SCSI_SENSE_BUFFERSIZE);
1338
1336 /* Rely on write phase to clean out srp status values, so no "else" */ 1339 /* Rely on write phase to clean out srp status values, so no "else" */
1337 1340
1338 /* 1341 /*
@@ -1342,8 +1345,7 @@ sg_rq_end_io(struct request *rq, int uptodate)
1342 * blk_rq_unmap_user() can be called from user context. 1345 * blk_rq_unmap_user() can be called from user context.
1343 */ 1346 */
1344 srp->rq = NULL; 1347 srp->rq = NULL;
1345 if (rq->cmd != rq->__cmd) 1348 scsi_req_free_cmd(scsi_req(rq));
1346 kfree(rq->cmd);
1347 __blk_put_request(rq->q, rq); 1349 __blk_put_request(rq->q, rq);
1348 1350
1349 write_lock_irqsave(&sfp->rq_list_lock, iflags); 1351 write_lock_irqsave(&sfp->rq_list_lock, iflags);
@@ -1658,6 +1660,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1658{ 1660{
1659 int res; 1661 int res;
1660 struct request *rq; 1662 struct request *rq;
1663 struct scsi_request *req;
1661 Sg_fd *sfp = srp->parentfp; 1664 Sg_fd *sfp = srp->parentfp;
1662 sg_io_hdr_t *hp = &srp->header; 1665 sg_io_hdr_t *hp = &srp->header;
1663 int dxfer_len = (int) hp->dxfer_len; 1666 int dxfer_len = (int) hp->dxfer_len;
@@ -1695,22 +1698,23 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
1695 * With scsi-mq disabled, blk_get_request() with GFP_KERNEL usually 1698 * With scsi-mq disabled, blk_get_request() with GFP_KERNEL usually
1696 * does not sleep except under memory pressure. 1699 * does not sleep except under memory pressure.
1697 */ 1700 */
1698 rq = blk_get_request(q, rw, GFP_KERNEL); 1701 rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ?
1702 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
1699 if (IS_ERR(rq)) { 1703 if (IS_ERR(rq)) {
1700 kfree(long_cmdp); 1704 kfree(long_cmdp);
1701 return PTR_ERR(rq); 1705 return PTR_ERR(rq);
1702 } 1706 }
1707 req = scsi_req(rq);
1703 1708
1704 blk_rq_set_block_pc(rq); 1709 scsi_req_init(rq);
1705 1710
1706 if (hp->cmd_len > BLK_MAX_CDB) 1711 if (hp->cmd_len > BLK_MAX_CDB)
1707 rq->cmd = long_cmdp; 1712 req->cmd = long_cmdp;
1708 memcpy(rq->cmd, cmd, hp->cmd_len); 1713 memcpy(req->cmd, cmd, hp->cmd_len);
1709 rq->cmd_len = hp->cmd_len; 1714 req->cmd_len = hp->cmd_len;
1710 1715
1711 srp->rq = rq; 1716 srp->rq = rq;
1712 rq->end_io_data = srp; 1717 rq->end_io_data = srp;
1713 rq->sense = srp->sense_b;
1714 rq->retries = SG_DEFAULT_RETRIES; 1718 rq->retries = SG_DEFAULT_RETRIES;
1715 1719
1716 if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) 1720 if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
@@ -1790,8 +1794,7 @@ sg_finish_rem_req(Sg_request *srp)
1790 ret = blk_rq_unmap_user(srp->bio); 1794 ret = blk_rq_unmap_user(srp->bio);
1791 1795
1792 if (srp->rq) { 1796 if (srp->rq) {
1793 if (srp->rq->cmd != srp->rq->__cmd) 1797 scsi_req_free_cmd(scsi_req(srp->rq));
1794 kfree(srp->rq->cmd);
1795 blk_put_request(srp->rq); 1798 blk_put_request(srp->rq);
1796 } 1799 }
1797 1800
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 8702d9cf8040..11c0dfb3dfa3 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -4499,7 +4499,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost,
4499 if (pqi_is_logical_device(device)) { 4499 if (pqi_is_logical_device(device)) {
4500 raid_bypassed = false; 4500 raid_bypassed = false;
4501 if (device->offload_enabled && 4501 if (device->offload_enabled &&
4502 scmd->request->cmd_type == REQ_TYPE_FS) { 4502 !blk_rq_is_passthrough(scmd->request)) {
4503 rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device, 4503 rc = pqi_raid_bypass_submit_scsi_cmd(ctrl_info, device,
4504 scmd, queue_group); 4504 scmd, queue_group);
4505 if (rc == 0 || 4505 if (rc == 0 ||
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 94352e4df831..0b29b9329b1c 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -117,7 +117,7 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
117 unsigned int clearing, int slot); 117 unsigned int clearing, int slot);
118static int sr_packet(struct cdrom_device_info *, struct packet_command *); 118static int sr_packet(struct cdrom_device_info *, struct packet_command *);
119 119
120static struct cdrom_device_ops sr_dops = { 120static const struct cdrom_device_ops sr_dops = {
121 .open = sr_open, 121 .open = sr_open,
122 .release = sr_release, 122 .release = sr_release,
123 .drive_status = sr_drive_status, 123 .drive_status = sr_drive_status,
@@ -437,14 +437,17 @@ static int sr_init_command(struct scsi_cmnd *SCpnt)
437 goto out; 437 goto out;
438 } 438 }
439 439
440 if (rq_data_dir(rq) == WRITE) { 440 switch (req_op(rq)) {
441 case REQ_OP_WRITE:
441 if (!cd->writeable) 442 if (!cd->writeable)
442 goto out; 443 goto out;
443 SCpnt->cmnd[0] = WRITE_10; 444 SCpnt->cmnd[0] = WRITE_10;
444 cd->cdi.media_written = 1; 445 cd->cdi.media_written = 1;
445 } else if (rq_data_dir(rq) == READ) { 446 break;
447 case REQ_OP_READ:
446 SCpnt->cmnd[0] = READ_10; 448 SCpnt->cmnd[0] = READ_10;
447 } else { 449 break;
450 default:
448 blk_dump_rq_flags(rq, "Unknown sr command"); 451 blk_dump_rq_flags(rq, "Unknown sr command");
449 goto out; 452 goto out;
450 } 453 }
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 5f35b863e1a7..81212d4bd9bf 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -475,7 +475,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
475 ktime_t now; 475 ktime_t now;
476 476
477 now = ktime_get(); 477 now = ktime_get();
478 if (req->cmd[0] == WRITE_6) { 478 if (scsi_req(req)->cmd[0] == WRITE_6) {
479 now = ktime_sub(now, STp->stats->write_time); 479 now = ktime_sub(now, STp->stats->write_time);
480 atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time); 480 atomic64_add(ktime_to_ns(now), &STp->stats->tot_write_time);
481 atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); 481 atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
@@ -489,7 +489,7 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
489 } else 489 } else
490 atomic64_add(atomic_read(&STp->stats->last_write_size), 490 atomic64_add(atomic_read(&STp->stats->last_write_size),
491 &STp->stats->write_byte_cnt); 491 &STp->stats->write_byte_cnt);
492 } else if (req->cmd[0] == READ_6) { 492 } else if (scsi_req(req)->cmd[0] == READ_6) {
493 now = ktime_sub(now, STp->stats->read_time); 493 now = ktime_sub(now, STp->stats->read_time);
494 atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time); 494 atomic64_add(ktime_to_ns(now), &STp->stats->tot_read_time);
495 atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time); 495 atomic64_add(ktime_to_ns(now), &STp->stats->tot_io_time);
@@ -514,15 +514,18 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
514static void st_scsi_execute_end(struct request *req, int uptodate) 514static void st_scsi_execute_end(struct request *req, int uptodate)
515{ 515{
516 struct st_request *SRpnt = req->end_io_data; 516 struct st_request *SRpnt = req->end_io_data;
517 struct scsi_request *rq = scsi_req(req);
517 struct scsi_tape *STp = SRpnt->stp; 518 struct scsi_tape *STp = SRpnt->stp;
518 struct bio *tmp; 519 struct bio *tmp;
519 520
520 STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors; 521 STp->buffer->cmdstat.midlevel_result = SRpnt->result = req->errors;
521 STp->buffer->cmdstat.residual = req->resid_len; 522 STp->buffer->cmdstat.residual = rq->resid_len;
522 523
523 st_do_stats(STp, req); 524 st_do_stats(STp, req);
524 525
525 tmp = SRpnt->bio; 526 tmp = SRpnt->bio;
527 if (rq->sense_len)
528 memcpy(SRpnt->sense, rq->sense, SCSI_SENSE_BUFFERSIZE);
526 if (SRpnt->waiting) 529 if (SRpnt->waiting)
527 complete(SRpnt->waiting); 530 complete(SRpnt->waiting);
528 531
@@ -535,17 +538,18 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
535 int timeout, int retries) 538 int timeout, int retries)
536{ 539{
537 struct request *req; 540 struct request *req;
541 struct scsi_request *rq;
538 struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data; 542 struct rq_map_data *mdata = &SRpnt->stp->buffer->map_data;
539 int err = 0; 543 int err = 0;
540 int write = (data_direction == DMA_TO_DEVICE);
541 struct scsi_tape *STp = SRpnt->stp; 544 struct scsi_tape *STp = SRpnt->stp;
542 545
543 req = blk_get_request(SRpnt->stp->device->request_queue, write, 546 req = blk_get_request(SRpnt->stp->device->request_queue,
544 GFP_KERNEL); 547 data_direction == DMA_TO_DEVICE ?
548 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
545 if (IS_ERR(req)) 549 if (IS_ERR(req))
546 return DRIVER_ERROR << 24; 550 return DRIVER_ERROR << 24;
547 551 rq = scsi_req(req);
548 blk_rq_set_block_pc(req); 552 scsi_req_init(req);
549 req->rq_flags |= RQF_QUIET; 553 req->rq_flags |= RQF_QUIET;
550 554
551 mdata->null_mapped = 1; 555 mdata->null_mapped = 1;
@@ -571,11 +575,9 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
571 } 575 }
572 576
573 SRpnt->bio = req->bio; 577 SRpnt->bio = req->bio;
574 req->cmd_len = COMMAND_SIZE(cmd[0]); 578 rq->cmd_len = COMMAND_SIZE(cmd[0]);
575 memset(req->cmd, 0, BLK_MAX_CDB); 579 memset(rq->cmd, 0, BLK_MAX_CDB);
576 memcpy(req->cmd, cmd, req->cmd_len); 580 memcpy(rq->cmd, cmd, rq->cmd_len);
577 req->sense = SRpnt->sense;
578 req->sense_len = 0;
579 req->timeout = timeout; 581 req->timeout = timeout;
580 req->retries = retries; 582 req->retries = retries;
581 req->end_io_data = SRpnt; 583 req->end_io_data = SRpnt;
diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index 88db6992420e..bcf7d05d1aab 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c
@@ -260,7 +260,7 @@ static int sun3scsi_dma_xfer_len(struct NCR5380_hostdata *hostdata,
260{ 260{
261 int wanted_len = cmd->SCp.this_residual; 261 int wanted_len = cmd->SCp.this_residual;
262 262
263 if (wanted_len < DMA_MIN_SIZE || cmd->request->cmd_type != REQ_TYPE_FS) 263 if (wanted_len < DMA_MIN_SIZE || blk_rq_is_passthrough(cmd->request))
264 return 0; 264 return 0;
265 265
266 return wanted_len; 266 return wanted_len;
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index 257361280510..e2bc99980f75 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -4,6 +4,7 @@ menuconfig TARGET_CORE
4 depends on SCSI && BLOCK 4 depends on SCSI && BLOCK
5 select CONFIGFS_FS 5 select CONFIGFS_FS
6 select CRC_T10DIF 6 select CRC_T10DIF
7 select BLK_SCSI_REQUEST # only for scsi_command_size_tbl..
7 default n 8 default n
8 help 9 help
9 Say Y or M here to enable the TCM Storage Engine and ConfigFS enabled 10 Say Y or M here to enable the TCM Storage Engine and ConfigFS enabled
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 04d7aa7390d0..a8f8e53f2f57 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1005,7 +1005,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
1005 scsi_command_size(cmd->t_task_cdb)); 1005 scsi_command_size(cmd->t_task_cdb));
1006 1006
1007 req = blk_get_request(pdv->pdv_sd->request_queue, 1007 req = blk_get_request(pdv->pdv_sd->request_queue,
1008 (cmd->data_direction == DMA_TO_DEVICE), 1008 cmd->data_direction == DMA_TO_DEVICE ?
1009 REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
1009 GFP_KERNEL); 1010 GFP_KERNEL);
1010 if (IS_ERR(req)) { 1011 if (IS_ERR(req)) {
1011 pr_err("PSCSI: blk_get_request() failed\n"); 1012 pr_err("PSCSI: blk_get_request() failed\n");
@@ -1013,7 +1014,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
1013 goto fail; 1014 goto fail;
1014 } 1015 }
1015 1016
1016 blk_rq_set_block_pc(req); 1017 scsi_req_init(req);
1017 1018
1018 if (sgl) { 1019 if (sgl) {
1019 ret = pscsi_map_sg(cmd, sgl, sgl_nents, req); 1020 ret = pscsi_map_sg(cmd, sgl, sgl_nents, req);
@@ -1023,10 +1024,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
1023 1024
1024 req->end_io = pscsi_req_done; 1025 req->end_io = pscsi_req_done;
1025 req->end_io_data = cmd; 1026 req->end_io_data = cmd;
1026 req->cmd_len = scsi_command_size(pt->pscsi_cdb); 1027 scsi_req(req)->cmd_len = scsi_command_size(pt->pscsi_cdb);
1027 req->cmd = &pt->pscsi_cdb[0]; 1028 scsi_req(req)->cmd = &pt->pscsi_cdb[0];
1028 req->sense = &pt->pscsi_sense[0];
1029 req->sense_len = 0;
1030 if (pdv->pdv_sd->type == TYPE_DISK) 1029 if (pdv->pdv_sd->type == TYPE_DISK)
1031 req->timeout = PS_TIMEOUT_DISK; 1030 req->timeout = PS_TIMEOUT_DISK;
1032 else 1031 else
@@ -1075,7 +1074,7 @@ static void pscsi_req_done(struct request *req, int uptodate)
1075 struct pscsi_plugin_task *pt = cmd->priv; 1074 struct pscsi_plugin_task *pt = cmd->priv;
1076 1075
1077 pt->pscsi_result = req->errors; 1076 pt->pscsi_result = req->errors;
1078 pt->pscsi_resid = req->resid_len; 1077 pt->pscsi_resid = scsi_req(req)->resid_len;
1079 1078
1080 cmd->scsi_status = status_byte(pt->pscsi_result) << 1; 1079 cmd->scsi_status = status_byte(pt->pscsi_result) << 1;
1081 if (cmd->scsi_status) { 1080 if (cmd->scsi_status) {
@@ -1096,6 +1095,7 @@ static void pscsi_req_done(struct request *req, int uptodate)
1096 break; 1095 break;
1097 } 1096 }
1098 1097
1098 memcpy(pt->pscsi_sense, scsi_req(req)->sense, TRANSPORT_SENSE_BUFFER);
1099 __blk_put_request(req->q, req); 1099 __blk_put_request(req->q, req);
1100 kfree(pt); 1100 kfree(pt);
1101} 1101}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3c47614a4b32..73031ec54a7b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -884,6 +884,8 @@ static void bdev_evict_inode(struct inode *inode)
884 spin_lock(&bdev_lock); 884 spin_lock(&bdev_lock);
885 list_del_init(&bdev->bd_list); 885 list_del_init(&bdev->bd_list);
886 spin_unlock(&bdev_lock); 886 spin_unlock(&bdev_lock);
887 if (bdev->bd_bdi != &noop_backing_dev_info)
888 bdi_put(bdev->bd_bdi);
887} 889}
888 890
889static const struct super_operations bdev_sops = { 891static const struct super_operations bdev_sops = {
@@ -954,6 +956,21 @@ static int bdev_set(struct inode *inode, void *data)
954 956
955static LIST_HEAD(all_bdevs); 957static LIST_HEAD(all_bdevs);
956 958
959/*
960 * If there is a bdev inode for this device, unhash it so that it gets evicted
961 * as soon as last inode reference is dropped.
962 */
963void bdev_unhash_inode(dev_t dev)
964{
965 struct inode *inode;
966
967 inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
968 if (inode) {
969 remove_inode_hash(inode);
970 iput(inode);
971 }
972}
973
957struct block_device *bdget(dev_t dev) 974struct block_device *bdget(dev_t dev)
958{ 975{
959 struct block_device *bdev; 976 struct block_device *bdev;
@@ -971,6 +988,7 @@ struct block_device *bdget(dev_t dev)
971 bdev->bd_contains = NULL; 988 bdev->bd_contains = NULL;
972 bdev->bd_super = NULL; 989 bdev->bd_super = NULL;
973 bdev->bd_inode = inode; 990 bdev->bd_inode = inode;
991 bdev->bd_bdi = &noop_backing_dev_info;
974 bdev->bd_block_size = (1 << inode->i_blkbits); 992 bdev->bd_block_size = (1 << inode->i_blkbits);
975 bdev->bd_part_count = 0; 993 bdev->bd_part_count = 0;
976 bdev->bd_invalidated = 0; 994 bdev->bd_invalidated = 0;
@@ -1527,6 +1545,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1527 bdev->bd_disk = disk; 1545 bdev->bd_disk = disk;
1528 bdev->bd_queue = disk->queue; 1546 bdev->bd_queue = disk->queue;
1529 bdev->bd_contains = bdev; 1547 bdev->bd_contains = bdev;
1548 if (bdev->bd_bdi == &noop_backing_dev_info)
1549 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
1530 1550
1531 if (!partno) { 1551 if (!partno) {
1532 ret = -ENXIO; 1552 ret = -ENXIO;
@@ -1622,6 +1642,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1622 bdev->bd_disk = NULL; 1642 bdev->bd_disk = NULL;
1623 bdev->bd_part = NULL; 1643 bdev->bd_part = NULL;
1624 bdev->bd_queue = NULL; 1644 bdev->bd_queue = NULL;
1645 bdi_put(bdev->bd_bdi);
1646 bdev->bd_bdi = &noop_backing_dev_info;
1625 if (bdev != bdev->bd_contains) 1647 if (bdev != bdev->bd_contains)
1626 __blkdev_put(bdev->bd_contains, mode, 1); 1648 __blkdev_put(bdev->bd_contains, mode, 1);
1627 bdev->bd_contains = NULL; 1649 bdev->bd_contains = NULL;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 18004169552c..37a31b12bb0c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1800,7 +1800,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1800 list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) { 1800 list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) {
1801 if (!device->bdev) 1801 if (!device->bdev)
1802 continue; 1802 continue;
1803 bdi = blk_get_backing_dev_info(device->bdev); 1803 bdi = device->bdev->bd_bdi;
1804 if (bdi_congested(bdi, bdi_bits)) { 1804 if (bdi_congested(bdi, bdi_bits)) {
1805 ret = 1; 1805 ret = 1;
1806 break; 1806 break;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3c3c69c0eee4..b2e70073a10d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -366,7 +366,7 @@ static noinline void run_scheduled_bios(struct btrfs_device *device)
366 */ 366 */
367 blk_start_plug(&plug); 367 blk_start_plug(&plug);
368 368
369 bdi = blk_get_backing_dev_info(device->bdev); 369 bdi = device->bdev->bd_bdi;
370 limit = btrfs_async_submit_limit(fs_info); 370 limit = btrfs_async_submit_limit(fs_info);
371 limit = limit * 2 / 3; 371 limit = limit * 2 / 3;
372 372
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f17fcf89e18e..7fb1732a3630 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -248,6 +248,42 @@ static struct file_system_type debug_fs_type = {
248}; 248};
249MODULE_ALIAS_FS("debugfs"); 249MODULE_ALIAS_FS("debugfs");
250 250
251/**
252 * debugfs_lookup() - look up an existing debugfs file
253 * @name: a pointer to a string containing the name of the file to look up.
254 * @parent: a pointer to the parent dentry of the file.
255 *
256 * This function will return a pointer to a dentry if it succeeds. If the file
257 * doesn't exist or an error occurs, %NULL will be returned. The returned
258 * dentry must be passed to dput() when it is no longer needed.
259 *
260 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
261 * returned.
262 */
263struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
264{
265 struct dentry *dentry;
266
267 if (IS_ERR(parent))
268 return NULL;
269
270 if (!parent)
271 parent = debugfs_mount->mnt_root;
272
273 inode_lock(d_inode(parent));
274 dentry = lookup_one_len(name, parent, strlen(name));
275 inode_unlock(d_inode(parent));
276
277 if (IS_ERR(dentry))
278 return NULL;
279 if (!d_really_is_positive(dentry)) {
280 dput(dentry);
281 return NULL;
282 }
283 return dentry;
284}
285EXPORT_SYMBOL_GPL(debugfs_lookup);
286
251static struct dentry *start_creating(const char *name, struct dentry *parent) 287static struct dentry *start_creating(const char *name, struct dentry *parent)
252{ 288{
253 struct dentry *dentry; 289 struct dentry *dentry;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 58704799f0b9..b108e7ba81af 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1227,7 +1227,7 @@ static int set_gfs2_super(struct super_block *s, void *data)
1227 * We set the bdi here to the queue backing, file systems can 1227 * We set the bdi here to the queue backing, file systems can
1228 * overwrite this in ->fill_super() 1228 * overwrite this in ->fill_super()
1229 */ 1229 */
1230 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; 1230 s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
1231 return 0; 1231 return 0;
1232} 1232}
1233 1233
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 47febcf99185..20b1c17320d5 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -104,6 +104,7 @@ config NFSD_SCSILAYOUT
104 depends on NFSD_V4 && BLOCK 104 depends on NFSD_V4 && BLOCK
105 select NFSD_PNFS 105 select NFSD_PNFS
106 select EXPORTFS_BLOCK_OPS 106 select EXPORTFS_BLOCK_OPS
107 select BLK_SCSI_REQUEST
107 help 108 help
108 This option enables support for the exporting pNFS SCSI layouts 109 This option enables support for the exporting pNFS SCSI layouts
109 in the kernel's NFS server. The pNFS SCSI layout enables NFS 110 in the kernel's NFS server. The pNFS SCSI layout enables NFS
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 0780ff864539..a06115e31612 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -10,6 +10,7 @@
10#include <linux/nfsd/debug.h> 10#include <linux/nfsd/debug.h>
11#include <scsi/scsi_proto.h> 11#include <scsi/scsi_proto.h>
12#include <scsi/scsi_common.h> 12#include <scsi/scsi_common.h>
13#include <scsi/scsi_request.h>
13 14
14#include "blocklayoutxdr.h" 15#include "blocklayoutxdr.h"
15#include "pnfs.h" 16#include "pnfs.h"
@@ -213,6 +214,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
213{ 214{
214 struct request_queue *q = bdev->bd_disk->queue; 215 struct request_queue *q = bdev->bd_disk->queue;
215 struct request *rq; 216 struct request *rq;
217 struct scsi_request *req;
216 size_t bufflen = 252, len, id_len; 218 size_t bufflen = 252, len, id_len;
217 u8 *buf, *d, type, assoc; 219 u8 *buf, *d, type, assoc;
218 int error; 220 int error;
@@ -221,23 +223,24 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev,
221 if (!buf) 223 if (!buf)
222 return -ENOMEM; 224 return -ENOMEM;
223 225
224 rq = blk_get_request(q, READ, GFP_KERNEL); 226 rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
225 if (IS_ERR(rq)) { 227 if (IS_ERR(rq)) {
226 error = -ENOMEM; 228 error = -ENOMEM;
227 goto out_free_buf; 229 goto out_free_buf;
228 } 230 }
229 blk_rq_set_block_pc(rq); 231 req = scsi_req(rq);
232 scsi_req_init(rq);
230 233
231 error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); 234 error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
232 if (error) 235 if (error)
233 goto out_put_request; 236 goto out_put_request;
234 237
235 rq->cmd[0] = INQUIRY; 238 req->cmd[0] = INQUIRY;
236 rq->cmd[1] = 1; 239 req->cmd[1] = 1;
237 rq->cmd[2] = 0x83; 240 req->cmd[2] = 0x83;
238 rq->cmd[3] = bufflen >> 8; 241 req->cmd[3] = bufflen >> 8;
239 rq->cmd[4] = bufflen & 0xff; 242 req->cmd[4] = bufflen & 0xff;
240 rq->cmd_len = COMMAND_SIZE(INQUIRY); 243 req->cmd_len = COMMAND_SIZE(INQUIRY);
241 244
242 error = blk_execute_rq(rq->q, NULL, rq, 1); 245 error = blk_execute_rq(rq->q, NULL, rq, 1);
243 if (error) { 246 if (error) {
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 12eeae62a2b1..e1872f36147f 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1068,7 +1068,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1068 sb->s_time_gran = 1; 1068 sb->s_time_gran = 1;
1069 sb->s_max_links = NILFS_LINK_MAX; 1069 sb->s_max_links = NILFS_LINK_MAX;
1070 1070
1071 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; 1071 sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info;
1072 1072
1073 err = load_nilfs(nilfs, sb); 1073 err = load_nilfs(nilfs, sb);
1074 if (err) 1074 if (err)
diff --git a/fs/super.c b/fs/super.c
index 1709ed029a2c..ea662b0e5e78 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1047,7 +1047,7 @@ static int set_bdev_super(struct super_block *s, void *data)
1047 * We set the bdi here to the queue backing, file systems can 1047 * We set the bdi here to the queue backing, file systems can
1048 * overwrite this in ->fill_super() 1048 * overwrite this in ->fill_super()
1049 */ 1049 */
1050 s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; 1050 s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
1051 return 0; 1051 return 0;
1052} 1052}
1053 1053
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ac3b4db519df..8c7d01b75922 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -758,7 +758,7 @@ xfs_buf_readahead_map(
758 int nmaps, 758 int nmaps,
759 const struct xfs_buf_ops *ops) 759 const struct xfs_buf_ops *ops)
760{ 760{
761 if (bdi_read_congested(target->bt_bdi)) 761 if (bdi_read_congested(target->bt_bdev->bd_bdi))
762 return; 762 return;
763 763
764 xfs_buf_read_map(target, map, nmaps, 764 xfs_buf_read_map(target, map, nmaps,
@@ -1791,7 +1791,6 @@ xfs_alloc_buftarg(
1791 btp->bt_mount = mp; 1791 btp->bt_mount = mp;
1792 btp->bt_dev = bdev->bd_dev; 1792 btp->bt_dev = bdev->bd_dev;
1793 btp->bt_bdev = bdev; 1793 btp->bt_bdev = bdev;
1794 btp->bt_bdi = blk_get_backing_dev_info(bdev);
1795 1794
1796 if (xfs_setsize_buftarg_early(btp, bdev)) 1795 if (xfs_setsize_buftarg_early(btp, bdev))
1797 goto error; 1796 goto error;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8a9d3a9599f0..3c867e5a63e1 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -109,7 +109,6 @@ typedef unsigned int xfs_buf_flags_t;
109typedef struct xfs_buftarg { 109typedef struct xfs_buftarg {
110 dev_t bt_dev; 110 dev_t bt_dev;
111 struct block_device *bt_bdev; 111 struct block_device *bt_bdev;
112 struct backing_dev_info *bt_bdi;
113 struct xfs_mount *bt_mount; 112 struct xfs_mount *bt_mount;
114 unsigned int bt_meta_sectorsize; 113 unsigned int bt_meta_sectorsize;
115 size_t bt_meta_sectormask; 114 size_t bt_meta_sectormask;
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index e850e76acaaf..ad955817916d 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -10,6 +10,7 @@
10#include <linux/flex_proportions.h> 10#include <linux/flex_proportions.h>
11#include <linux/timer.h> 11#include <linux/timer.h>
12#include <linux/workqueue.h> 12#include <linux/workqueue.h>
13#include <linux/kref.h>
13 14
14struct page; 15struct page;
15struct device; 16struct device;
@@ -144,6 +145,7 @@ struct backing_dev_info {
144 145
145 char *name; 146 char *name;
146 147
148 struct kref refcnt; /* Reference counter for the structure */
147 unsigned int capabilities; /* Device capabilities */ 149 unsigned int capabilities; /* Device capabilities */
148 unsigned int min_ratio; 150 unsigned int min_ratio;
149 unsigned int max_ratio, max_prop_frac; 151 unsigned int max_ratio, max_prop_frac;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 43b93a947e61..c52a48cb9a66 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,7 +18,14 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19 19
20int __must_check bdi_init(struct backing_dev_info *bdi); 20int __must_check bdi_init(struct backing_dev_info *bdi);
21void bdi_exit(struct backing_dev_info *bdi); 21
22static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
23{
24 kref_get(&bdi->refcnt);
25 return bdi;
26}
27
28void bdi_put(struct backing_dev_info *bdi);
22 29
23__printf(3, 4) 30__printf(3, 4)
24int bdi_register(struct backing_dev_info *bdi, struct device *parent, 31int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -29,6 +36,7 @@ void bdi_unregister(struct backing_dev_info *bdi);
29 36
30int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); 37int __must_check bdi_setup_and_register(struct backing_dev_info *, char *);
31void bdi_destroy(struct backing_dev_info *bdi); 38void bdi_destroy(struct backing_dev_info *bdi);
39struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id);
32 40
33void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, 41void wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
34 bool range_cyclic, enum wb_reason reason); 42 bool range_cyclic, enum wb_reason reason);
@@ -183,7 +191,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
183 sb = inode->i_sb; 191 sb = inode->i_sb;
184#ifdef CONFIG_BLOCK 192#ifdef CONFIG_BLOCK
185 if (sb_is_blkdev_sb(sb)) 193 if (sb_is_blkdev_sb(sb))
186 return blk_get_backing_dev_info(I_BDEV(inode)); 194 return I_BDEV(inode)->bd_bdi;
187#endif 195#endif
188 return sb->s_bdi; 196 return sb->s_bdi;
189} 197}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4a2ab5d99ff7..8e4df3d6c8cd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
22 22
23 unsigned long flags; /* BLK_MQ_F_* flags */ 23 unsigned long flags; /* BLK_MQ_F_* flags */
24 24
25 void *sched_data;
25 struct request_queue *queue; 26 struct request_queue *queue;
26 struct blk_flush_queue *fq; 27 struct blk_flush_queue *fq;
27 28
@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
35 atomic_t wait_index; 36 atomic_t wait_index;
36 37
37 struct blk_mq_tags *tags; 38 struct blk_mq_tags *tags;
39 struct blk_mq_tags *sched_tags;
38 40
39 struct srcu_struct queue_rq_srcu; 41 struct srcu_struct queue_rq_srcu;
40 42
@@ -60,7 +62,7 @@ struct blk_mq_hw_ctx {
60 62
61struct blk_mq_tag_set { 63struct blk_mq_tag_set {
62 unsigned int *mq_map; 64 unsigned int *mq_map;
63 struct blk_mq_ops *ops; 65 const struct blk_mq_ops *ops;
64 unsigned int nr_hw_queues; 66 unsigned int nr_hw_queues;
65 unsigned int queue_depth; /* max hw supported */ 67 unsigned int queue_depth; /* max hw supported */
66 unsigned int reserved_tags; 68 unsigned int reserved_tags;
@@ -151,11 +153,13 @@ enum {
151 BLK_MQ_F_SG_MERGE = 1 << 2, 153 BLK_MQ_F_SG_MERGE = 1 << 2,
152 BLK_MQ_F_DEFER_ISSUE = 1 << 4, 154 BLK_MQ_F_DEFER_ISSUE = 1 << 4,
153 BLK_MQ_F_BLOCKING = 1 << 5, 155 BLK_MQ_F_BLOCKING = 1 << 5,
156 BLK_MQ_F_NO_SCHED = 1 << 6,
154 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, 157 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
155 BLK_MQ_F_ALLOC_POLICY_BITS = 1, 158 BLK_MQ_F_ALLOC_POLICY_BITS = 1,
156 159
157 BLK_MQ_S_STOPPED = 0, 160 BLK_MQ_S_STOPPED = 0,
158 BLK_MQ_S_TAG_ACTIVE = 1, 161 BLK_MQ_S_TAG_ACTIVE = 1,
162 BLK_MQ_S_SCHED_RESTART = 2,
159 163
160 BLK_MQ_MAX_DEPTH = 10240, 164 BLK_MQ_MAX_DEPTH = 10240,
161 165
@@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
179 183
180void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 184void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
181 185
182void blk_mq_insert_request(struct request *, bool, bool, bool);
183void blk_mq_free_request(struct request *rq); 186void blk_mq_free_request(struct request *rq);
184void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
185bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 187bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
186 188
187enum { 189enum {
188 BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ 190 BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
189 BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ 191 BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
192 BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
190}; 193};
191 194
192struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 195struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 519ea2c9df61..d703acb55d0f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -162,6 +162,13 @@ enum req_opf {
162 /* write the zero filled sector many times */ 162 /* write the zero filled sector many times */
163 REQ_OP_WRITE_ZEROES = 8, 163 REQ_OP_WRITE_ZEROES = 8,
164 164
165 /* SCSI passthrough using struct scsi_request */
166 REQ_OP_SCSI_IN = 32,
167 REQ_OP_SCSI_OUT = 33,
168 /* Driver private requests */
169 REQ_OP_DRV_IN = 34,
170 REQ_OP_DRV_OUT = 35,
171
165 REQ_OP_LAST, 172 REQ_OP_LAST,
166}; 173};
167 174
@@ -221,6 +228,15 @@ static inline bool op_is_write(unsigned int op)
221} 228}
222 229
223/* 230/*
231 * Check if the bio or request is one that needs special treatment in the
232 * flush state machine.
233 */
234static inline bool op_is_flush(unsigned int op)
235{
236 return op & (REQ_FUA | REQ_PREFLUSH);
237}
238
239/*
224 * Reads are always treated as synchronous, as are requests with the FUA or 240 * Reads are always treated as synchronous, as are requests with the FUA or
225 * PREFLUSH flag. Other operations may be marked as synchronous using the 241 * PREFLUSH flag. Other operations may be marked as synchronous using the
226 * REQ_SYNC flag. 242 * REQ_SYNC flag.
@@ -232,22 +248,29 @@ static inline bool op_is_sync(unsigned int op)
232} 248}
233 249
234typedef unsigned int blk_qc_t; 250typedef unsigned int blk_qc_t;
235#define BLK_QC_T_NONE -1U 251#define BLK_QC_T_NONE -1U
236#define BLK_QC_T_SHIFT 16 252#define BLK_QC_T_SHIFT 16
253#define BLK_QC_T_INTERNAL (1U << 31)
237 254
238static inline bool blk_qc_t_valid(blk_qc_t cookie) 255static inline bool blk_qc_t_valid(blk_qc_t cookie)
239{ 256{
240 return cookie != BLK_QC_T_NONE; 257 return cookie != BLK_QC_T_NONE;
241} 258}
242 259
243static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) 260static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
261 bool internal)
244{ 262{
245 return tag | (queue_num << BLK_QC_T_SHIFT); 263 blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
264
265 if (internal)
266 ret |= BLK_QC_T_INTERNAL;
267
268 return ret;
246} 269}
247 270
248static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) 271static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
249{ 272{
250 return cookie >> BLK_QC_T_SHIFT; 273 return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
251} 274}
252 275
253static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) 276static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
@@ -255,6 +278,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
255 return cookie & ((1u << BLK_QC_T_SHIFT) - 1); 278 return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
256} 279}
257 280
281static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
282{
283 return (cookie & BLK_QC_T_INTERNAL) != 0;
284}
285
258struct blk_issue_stat { 286struct blk_issue_stat {
259 u64 time; 287 u64 time;
260}; 288};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ca8e8fd1078..aecca0e7d9ca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -71,15 +71,6 @@ struct request_list {
71}; 71};
72 72
73/* 73/*
74 * request command types
75 */
76enum rq_cmd_type_bits {
77 REQ_TYPE_FS = 1, /* fs request */
78 REQ_TYPE_BLOCK_PC, /* scsi command */
79 REQ_TYPE_DRV_PRIV, /* driver defined types from here */
80};
81
82/*
83 * request flags */ 74 * request flags */
84typedef __u32 __bitwise req_flags_t; 75typedef __u32 __bitwise req_flags_t;
85 76
@@ -128,8 +119,6 @@ typedef __u32 __bitwise req_flags_t;
128#define RQF_NOMERGE_FLAGS \ 119#define RQF_NOMERGE_FLAGS \
129 (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) 120 (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
130 121
131#define BLK_MAX_CDB 16
132
133/* 122/*
134 * Try to put the fields that are referenced together in the same cacheline. 123 * Try to put the fields that are referenced together in the same cacheline.
135 * 124 *
@@ -147,13 +136,16 @@ struct request {
147 struct blk_mq_ctx *mq_ctx; 136 struct blk_mq_ctx *mq_ctx;
148 137
149 int cpu; 138 int cpu;
150 unsigned cmd_type;
151 unsigned int cmd_flags; /* op and common flags */ 139 unsigned int cmd_flags; /* op and common flags */
152 req_flags_t rq_flags; 140 req_flags_t rq_flags;
141
142 int internal_tag;
143
153 unsigned long atomic_flags; 144 unsigned long atomic_flags;
154 145
155 /* the following two fields are internal, NEVER access directly */ 146 /* the following two fields are internal, NEVER access directly */
156 unsigned int __data_len; /* total data len */ 147 unsigned int __data_len; /* total data len */
148 int tag;
157 sector_t __sector; /* sector cursor */ 149 sector_t __sector; /* sector cursor */
158 150
159 struct bio *bio; 151 struct bio *bio;
@@ -222,20 +214,9 @@ struct request {
222 214
223 void *special; /* opaque pointer available for LLD use */ 215 void *special; /* opaque pointer available for LLD use */
224 216
225 int tag;
226 int errors; 217 int errors;
227 218
228 /*
229 * when request is used as a packet command carrier
230 */
231 unsigned char __cmd[BLK_MAX_CDB];
232 unsigned char *cmd;
233 unsigned short cmd_len;
234
235 unsigned int extra_len; /* length of alignment and padding */ 219 unsigned int extra_len; /* length of alignment and padding */
236 unsigned int sense_len;
237 unsigned int resid_len; /* residual count */
238 void *sense;
239 220
240 unsigned long deadline; 221 unsigned long deadline;
241 struct list_head timeout_list; 222 struct list_head timeout_list;
@@ -252,6 +233,21 @@ struct request {
252 struct request *next_rq; 233 struct request *next_rq;
253}; 234};
254 235
236static inline bool blk_rq_is_scsi(struct request *rq)
237{
238 return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
239}
240
241static inline bool blk_rq_is_private(struct request *rq)
242{
243 return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
244}
245
246static inline bool blk_rq_is_passthrough(struct request *rq)
247{
248 return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
249}
250
255static inline unsigned short req_get_ioprio(struct request *req) 251static inline unsigned short req_get_ioprio(struct request *req)
256{ 252{
257 return req->ioprio; 253 return req->ioprio;
@@ -271,6 +267,8 @@ typedef void (softirq_done_fn)(struct request *);
271typedef int (dma_drain_needed_fn)(struct request *); 267typedef int (dma_drain_needed_fn)(struct request *);
272typedef int (lld_busy_fn) (struct request_queue *q); 268typedef int (lld_busy_fn) (struct request_queue *q);
273typedef int (bsg_job_fn) (struct bsg_job *); 269typedef int (bsg_job_fn) (struct bsg_job *);
270typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
271typedef void (exit_rq_fn)(struct request_queue *, struct request *);
274 272
275enum blk_eh_timer_return { 273enum blk_eh_timer_return {
276 BLK_EH_NOT_HANDLED, 274 BLK_EH_NOT_HANDLED,
@@ -333,6 +331,7 @@ struct queue_limits {
333 unsigned short logical_block_size; 331 unsigned short logical_block_size;
334 unsigned short max_segments; 332 unsigned short max_segments;
335 unsigned short max_integrity_segments; 333 unsigned short max_integrity_segments;
334 unsigned short max_discard_segments;
336 335
337 unsigned char misaligned; 336 unsigned char misaligned;
338 unsigned char discard_misaligned; 337 unsigned char discard_misaligned;
@@ -406,8 +405,10 @@ struct request_queue {
406 rq_timed_out_fn *rq_timed_out_fn; 405 rq_timed_out_fn *rq_timed_out_fn;
407 dma_drain_needed_fn *dma_drain_needed; 406 dma_drain_needed_fn *dma_drain_needed;
408 lld_busy_fn *lld_busy_fn; 407 lld_busy_fn *lld_busy_fn;
408 init_rq_fn *init_rq_fn;
409 exit_rq_fn *exit_rq_fn;
409 410
410 struct blk_mq_ops *mq_ops; 411 const struct blk_mq_ops *mq_ops;
411 412
412 unsigned int *mq_map; 413 unsigned int *mq_map;
413 414
@@ -432,7 +433,8 @@ struct request_queue {
432 */ 433 */
433 struct delayed_work delay_work; 434 struct delayed_work delay_work;
434 435
435 struct backing_dev_info backing_dev_info; 436 struct backing_dev_info *backing_dev_info;
437 struct disk_devt *disk_devt;
436 438
437 /* 439 /*
438 * The queue owner gets to use this for whatever they like. 440 * The queue owner gets to use this for whatever they like.
@@ -569,7 +571,15 @@ struct request_queue {
569 struct list_head tag_set_list; 571 struct list_head tag_set_list;
570 struct bio_set *bio_split; 572 struct bio_set *bio_split;
571 573
574#ifdef CONFIG_BLK_DEBUG_FS
575 struct dentry *debugfs_dir;
576 struct dentry *mq_debugfs_dir;
577#endif
578
572 bool mq_sysfs_init_done; 579 bool mq_sysfs_init_done;
580
581 size_t cmd_size;
582 void *rq_alloc_data;
573}; 583};
574 584
575#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ 585#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
@@ -600,6 +610,7 @@ struct request_queue {
600#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ 610#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
601#define QUEUE_FLAG_DAX 26 /* device supports DAX */ 611#define QUEUE_FLAG_DAX 26 /* device supports DAX */
602#define QUEUE_FLAG_STATS 27 /* track rq completion times */ 612#define QUEUE_FLAG_STATS 27 /* track rq completion times */
613#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */
603 614
604#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 615#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
605 (1 << QUEUE_FLAG_STACKABLE) | \ 616 (1 << QUEUE_FLAG_STACKABLE) | \
@@ -695,9 +706,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
695 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ 706 ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
696 REQ_FAILFAST_DRIVER)) 707 REQ_FAILFAST_DRIVER))
697 708
698#define blk_account_rq(rq) \ 709static inline bool blk_account_rq(struct request *rq)
699 (((rq)->rq_flags & RQF_STARTED) && \ 710{
700 ((rq)->cmd_type == REQ_TYPE_FS)) 711 return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
712}
701 713
702#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) 714#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
703#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 715#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
@@ -772,7 +784,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
772 784
773static inline bool rq_mergeable(struct request *rq) 785static inline bool rq_mergeable(struct request *rq)
774{ 786{
775 if (rq->cmd_type != REQ_TYPE_FS) 787 if (blk_rq_is_passthrough(rq))
776 return false; 788 return false;
777 789
778 if (req_op(rq) == REQ_OP_FLUSH) 790 if (req_op(rq) == REQ_OP_FLUSH)
@@ -910,7 +922,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
910extern void blk_put_request(struct request *); 922extern void blk_put_request(struct request *);
911extern void __blk_put_request(struct request_queue *, struct request *); 923extern void __blk_put_request(struct request_queue *, struct request *);
912extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 924extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
913extern void blk_rq_set_block_pc(struct request *);
914extern void blk_requeue_request(struct request_queue *, struct request *); 925extern void blk_requeue_request(struct request_queue *, struct request *);
915extern int blk_lld_busy(struct request_queue *q); 926extern int blk_lld_busy(struct request_queue *q);
916extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 927extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -1047,7 +1058,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
1047{ 1058{
1048 struct request_queue *q = rq->q; 1059 struct request_queue *q = rq->q;
1049 1060
1050 if (unlikely(rq->cmd_type != REQ_TYPE_FS)) 1061 if (blk_rq_is_passthrough(rq))
1051 return q->limits.max_hw_sectors; 1062 return q->limits.max_hw_sectors;
1052 1063
1053 if (!q->limits.chunk_sectors || 1064 if (!q->limits.chunk_sectors ||
@@ -1129,14 +1140,15 @@ extern void blk_unprep_request(struct request *);
1129extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, 1140extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
1130 spinlock_t *lock, int node_id); 1141 spinlock_t *lock, int node_id);
1131extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); 1142extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
1132extern struct request_queue *blk_init_allocated_queue(struct request_queue *, 1143extern int blk_init_allocated_queue(struct request_queue *);
1133 request_fn_proc *, spinlock_t *);
1134extern void blk_cleanup_queue(struct request_queue *); 1144extern void blk_cleanup_queue(struct request_queue *);
1135extern void blk_queue_make_request(struct request_queue *, make_request_fn *); 1145extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
1136extern void blk_queue_bounce_limit(struct request_queue *, u64); 1146extern void blk_queue_bounce_limit(struct request_queue *, u64);
1137extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); 1147extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
1138extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); 1148extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
1139extern void blk_queue_max_segments(struct request_queue *, unsigned short); 1149extern void blk_queue_max_segments(struct request_queue *, unsigned short);
1150extern void blk_queue_max_discard_segments(struct request_queue *,
1151 unsigned short);
1140extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 1152extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
1141extern void blk_queue_max_discard_sectors(struct request_queue *q, 1153extern void blk_queue_max_discard_sectors(struct request_queue *q,
1142 unsigned int max_discard_sectors); 1154 unsigned int max_discard_sectors);
@@ -1179,8 +1191,16 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
1179extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 1191extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
1180extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); 1192extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
1181extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); 1193extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
1182extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
1183 1194
1195/*
1196 * Number of physical segments as sent to the device.
1197 *
1198 * Normally this is the number of discontiguous data segments sent by the
1199 * submitter. But for data-less command like discard we might have no
1200 * actual data segments submitted, but the driver might have to add it's
1201 * own special payload. In that case we still return 1 here so that this
1202 * special payload will be mapped.
1203 */
1184static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) 1204static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
1185{ 1205{
1186 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) 1206 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
@@ -1188,6 +1208,15 @@ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
1188 return rq->nr_phys_segments; 1208 return rq->nr_phys_segments;
1189} 1209}
1190 1210
1211/*
1212 * Number of discard segments (or ranges) the driver needs to fill in.
1213 * Each discard bio merged into a request is counted as one segment.
1214 */
1215static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
1216{
1217 return max_t(unsigned short, rq->nr_phys_segments, 1);
1218}
1219
1191extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 1220extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
1192extern void blk_dump_rq_flags(struct request *, char *); 1221extern void blk_dump_rq_flags(struct request *, char *);
1193extern long nr_blockdev_pages(void); 1222extern long nr_blockdev_pages(void);
@@ -1376,6 +1405,11 @@ static inline unsigned short queue_max_segments(struct request_queue *q)
1376 return q->limits.max_segments; 1405 return q->limits.max_segments;
1377} 1406}
1378 1407
1408static inline unsigned short queue_max_discard_segments(struct request_queue *q)
1409{
1410 return q->limits.max_discard_segments;
1411}
1412
1379static inline unsigned int queue_max_segment_size(struct request_queue *q) 1413static inline unsigned int queue_max_segment_size(struct request_queue *q)
1380{ 1414{
1381 return q->limits.max_segment_size; 1415 return q->limits.max_segment_size;
@@ -1620,6 +1654,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
1620 return __bvec_gap_to_prev(q, bprv, offset); 1654 return __bvec_gap_to_prev(q, bprv, offset);
1621} 1655}
1622 1656
1657/*
1658 * Check if the two bvecs from two bios can be merged to one segment.
1659 * If yes, no need to check gap between the two bios since the 1st bio
1660 * and the 1st bvec in the 2nd bio can be handled in one segment.
1661 */
1662static inline bool bios_segs_mergeable(struct request_queue *q,
1663 struct bio *prev, struct bio_vec *prev_last_bv,
1664 struct bio_vec *next_first_bv)
1665{
1666 if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
1667 return false;
1668 if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
1669 return false;
1670 if (prev->bi_seg_back_size + next_first_bv->bv_len >
1671 queue_max_segment_size(q))
1672 return false;
1673 return true;
1674}
1675
1623static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, 1676static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
1624 struct bio *next) 1677 struct bio *next)
1625{ 1678{
@@ -1629,7 +1682,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
1629 bio_get_last_bvec(prev, &pb); 1682 bio_get_last_bvec(prev, &pb);
1630 bio_get_first_bvec(next, &nb); 1683 bio_get_first_bvec(next, &nb);
1631 1684
1632 return __bvec_gap_to_prev(q, &pb, nb.bv_offset); 1685 if (!bios_segs_mergeable(q, prev, &pb, &nb))
1686 return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
1633 } 1687 }
1634 1688
1635 return false; 1689 return false;
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index e417f080219a..d2e908586e3d 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -30,9 +30,6 @@ struct blk_trace {
30 30
31extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 31extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
32extern void blk_trace_shutdown(struct request_queue *); 32extern void blk_trace_shutdown(struct request_queue *);
33extern int do_blk_trace_setup(struct request_queue *q, char *name,
34 dev_t dev, struct block_device *bdev,
35 struct blk_user_trace_setup *buts);
36extern __printf(2, 3) 33extern __printf(2, 3)
37void __trace_note_message(struct blk_trace *, const char *fmt, ...); 34void __trace_note_message(struct blk_trace *, const char *fmt, ...);
38 35
@@ -80,7 +77,6 @@ extern struct attribute_group blk_trace_attr_group;
80#else /* !CONFIG_BLK_DEV_IO_TRACE */ 77#else /* !CONFIG_BLK_DEV_IO_TRACE */
81# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 78# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY)
82# define blk_trace_shutdown(q) do { } while (0) 79# define blk_trace_shutdown(q) do { } while (0)
83# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY)
84# define blk_add_driver_data(q, rq, data, len) do {} while (0) 80# define blk_add_driver_data(q, rq, data, len) do {} while (0)
85# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) 81# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY)
86# define blk_trace_startstop(q, start) (-ENOTTY) 82# define blk_trace_startstop(q, start) (-ENOTTY)
@@ -110,16 +106,16 @@ struct compat_blk_user_trace_setup {
110 106
111#endif 107#endif
112 108
113#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) 109extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
114 110
115static inline int blk_cmd_buf_len(struct request *rq) 111static inline sector_t blk_rq_trace_sector(struct request *rq)
116{ 112{
117 return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1; 113 return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq);
118} 114}
119 115
120extern void blk_dump_cmd(char *buf, struct request *rq); 116static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq)
121extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); 117{
122 118 return blk_rq_is_passthrough(rq) ? 0 : blk_rq_sectors(rq);
123#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ 119}
124 120
125#endif 121#endif
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 657a718c27d2..e34dde2da0ef 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -66,9 +66,8 @@ struct bsg_job {
66 66
67void bsg_job_done(struct bsg_job *job, int result, 67void bsg_job_done(struct bsg_job *job, int result,
68 unsigned int reply_payload_rcv_len); 68 unsigned int reply_payload_rcv_len);
69int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, 69struct request_queue *bsg_setup_queue(struct device *dev, char *name,
70 bsg_job_fn *job_fn, int dd_job_size); 70 bsg_job_fn *job_fn, int dd_job_size);
71void bsg_request_fn(struct request_queue *q);
72void bsg_job_put(struct bsg_job *job); 71void bsg_job_put(struct bsg_job *job);
73int __must_check bsg_job_get(struct bsg_job *job); 72int __must_check bsg_job_get(struct bsg_job *job);
74 73
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 8609d577bb66..6e8f209a6dff 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -36,7 +36,7 @@ struct packet_command
36 36
37/* Uniform cdrom data structures for cdrom.c */ 37/* Uniform cdrom data structures for cdrom.c */
38struct cdrom_device_info { 38struct cdrom_device_info {
39 struct cdrom_device_ops *ops; /* link to device_ops */ 39 const struct cdrom_device_ops *ops; /* link to device_ops */
40 struct list_head list; /* linked list of all device_info */ 40 struct list_head list; /* linked list of all device_info */
41 struct gendisk *disk; /* matching block layer disk */ 41 struct gendisk *disk; /* matching block layer disk */
42 void *handle; /* driver-dependent data */ 42 void *handle; /* driver-dependent data */
@@ -87,7 +87,6 @@ struct cdrom_device_ops {
87 87
88/* driver specifications */ 88/* driver specifications */
89 const int capability; /* capability flags */ 89 const int capability; /* capability flags */
90 int n_minors; /* number of active minor devices */
91 /* handle uniform packets for scsi type devices (scsi,atapi) */ 90 /* handle uniform packets for scsi type devices (scsi,atapi) */
92 int (*generic_packet) (struct cdrom_device_info *, 91 int (*generic_packet) (struct cdrom_device_info *,
93 struct packet_command *); 92 struct packet_command *);
@@ -123,6 +122,8 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi,
123 int page_code, int page_control); 122 int page_code, int page_control);
124extern void init_cdrom_command(struct packet_command *cgc, 123extern void init_cdrom_command(struct packet_command *cgc,
125 void *buffer, int len, int type); 124 void *buffer, int len, int type);
125extern int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
126 struct packet_command *cgc);
126 127
127/* The SCSI spec says there could be 256 slots. */ 128/* The SCSI spec says there could be 256 slots. */
128#define CDROM_MAX_SLOTS 256 129#define CDROM_MAX_SLOTS 256
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 014cc564d1c4..c0befcf41b58 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -80,6 +80,8 @@ static const struct file_operations __fops = { \
80 80
81#if defined(CONFIG_DEBUG_FS) 81#if defined(CONFIG_DEBUG_FS)
82 82
83struct dentry *debugfs_lookup(const char *name, struct dentry *parent);
84
83struct dentry *debugfs_create_file(const char *name, umode_t mode, 85struct dentry *debugfs_create_file(const char *name, umode_t mode,
84 struct dentry *parent, void *data, 86 struct dentry *parent, void *data,
85 const struct file_operations *fops); 87 const struct file_operations *fops);
@@ -181,6 +183,12 @@ ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
181 * want to duplicate the design decision mistakes of procfs and devfs again. 183 * want to duplicate the design decision mistakes of procfs and devfs again.
182 */ 184 */
183 185
186static inline struct dentry *debugfs_lookup(const char *name,
187 struct dentry *parent)
188{
189 return ERR_PTR(-ENODEV);
190}
191
184static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, 192static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
185 struct dentry *parent, void *data, 193 struct dentry *parent, void *data,
186 const struct file_operations *fops) 194 const struct file_operations *fops)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ef7962e84444..a7e6903866fd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -55,8 +55,6 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti);
55 * = 2: The target wants to push back the io 55 * = 2: The target wants to push back the io
56 */ 56 */
57typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); 57typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio);
58typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone,
59 union map_info *map_context);
60typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti, 58typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti,
61 struct request *rq, 59 struct request *rq,
62 union map_info *map_context, 60 union map_info *map_context,
@@ -163,7 +161,6 @@ struct target_type {
163 dm_ctr_fn ctr; 161 dm_ctr_fn ctr;
164 dm_dtr_fn dtr; 162 dm_dtr_fn dtr;
165 dm_map_fn map; 163 dm_map_fn map;
166 dm_map_request_fn map_rq;
167 dm_clone_and_map_request_fn clone_and_map_rq; 164 dm_clone_and_map_request_fn clone_and_map_rq;
168 dm_release_clone_request_fn release_clone_rq; 165 dm_release_clone_request_fn release_clone_rq;
169 dm_endio_fn end_io; 166 dm_endio_fn end_io;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b276e9ef0e0b..aebecc4ed088 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -9,12 +9,22 @@
9struct io_cq; 9struct io_cq;
10struct elevator_type; 10struct elevator_type;
11 11
12typedef int (elevator_merge_fn) (struct request_queue *, struct request **, 12/*
13 * Return values from elevator merger
14 */
15enum elv_merge {
16 ELEVATOR_NO_MERGE = 0,
17 ELEVATOR_FRONT_MERGE = 1,
18 ELEVATOR_BACK_MERGE = 2,
19 ELEVATOR_DISCARD_MERGE = 3,
20};
21
22typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **,
13 struct bio *); 23 struct bio *);
14 24
15typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); 25typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *);
16 26
17typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int); 27typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge);
18 28
19typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, 29typedef int (elevator_allow_bio_merge_fn) (struct request_queue *,
20 struct request *, struct bio *); 30 struct request *, struct bio *);
@@ -77,6 +87,34 @@ struct elevator_ops
77 elevator_registered_fn *elevator_registered_fn; 87 elevator_registered_fn *elevator_registered_fn;
78}; 88};
79 89
90struct blk_mq_alloc_data;
91struct blk_mq_hw_ctx;
92
93struct elevator_mq_ops {
94 int (*init_sched)(struct request_queue *, struct elevator_type *);
95 void (*exit_sched)(struct elevator_queue *);
96
97 bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
98 bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
99 int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
100 void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
101 void (*requests_merged)(struct request_queue *, struct request *, struct request *);
102 struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
103 void (*put_request)(struct request *);
104 void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
105 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
106 bool (*has_work)(struct blk_mq_hw_ctx *);
107 void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
108 void (*started_request)(struct request *);
109 void (*requeue_request)(struct request *);
110 struct request *(*former_request)(struct request_queue *, struct request *);
111 struct request *(*next_request)(struct request_queue *, struct request *);
112 int (*get_rq_priv)(struct request_queue *, struct request *, struct bio *);
113 void (*put_rq_priv)(struct request_queue *, struct request *);
114 void (*init_icq)(struct io_cq *);
115 void (*exit_icq)(struct io_cq *);
116};
117
80#define ELV_NAME_MAX (16) 118#define ELV_NAME_MAX (16)
81 119
82struct elv_fs_entry { 120struct elv_fs_entry {
@@ -94,12 +132,16 @@ struct elevator_type
94 struct kmem_cache *icq_cache; 132 struct kmem_cache *icq_cache;
95 133
96 /* fields provided by elevator implementation */ 134 /* fields provided by elevator implementation */
97 struct elevator_ops ops; 135 union {
136 struct elevator_ops sq;
137 struct elevator_mq_ops mq;
138 } ops;
98 size_t icq_size; /* see iocontext.h */ 139 size_t icq_size; /* see iocontext.h */
99 size_t icq_align; /* ditto */ 140 size_t icq_align; /* ditto */
100 struct elv_fs_entry *elevator_attrs; 141 struct elv_fs_entry *elevator_attrs;
101 char elevator_name[ELV_NAME_MAX]; 142 char elevator_name[ELV_NAME_MAX];
102 struct module *elevator_owner; 143 struct module *elevator_owner;
144 bool uses_mq;
103 145
104 /* managed by elevator core */ 146 /* managed by elevator core */
105 char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ 147 char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
@@ -123,6 +165,7 @@ struct elevator_queue
123 struct kobject kobj; 165 struct kobject kobj;
124 struct mutex sysfs_lock; 166 struct mutex sysfs_lock;
125 unsigned int registered:1; 167 unsigned int registered:1;
168 unsigned int uses_mq:1;
126 DECLARE_HASHTABLE(hash, ELV_HASH_BITS); 169 DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
127}; 170};
128 171
@@ -133,12 +176,15 @@ extern void elv_dispatch_sort(struct request_queue *, struct request *);
133extern void elv_dispatch_add_tail(struct request_queue *, struct request *); 176extern void elv_dispatch_add_tail(struct request_queue *, struct request *);
134extern void elv_add_request(struct request_queue *, struct request *, int); 177extern void elv_add_request(struct request_queue *, struct request *, int);
135extern void __elv_add_request(struct request_queue *, struct request *, int); 178extern void __elv_add_request(struct request_queue *, struct request *, int);
136extern int elv_merge(struct request_queue *, struct request **, struct bio *); 179extern enum elv_merge elv_merge(struct request_queue *, struct request **,
180 struct bio *);
137extern void elv_merge_requests(struct request_queue *, struct request *, 181extern void elv_merge_requests(struct request_queue *, struct request *,
138 struct request *); 182 struct request *);
139extern void elv_merged_request(struct request_queue *, struct request *, int); 183extern void elv_merged_request(struct request_queue *, struct request *,
184 enum elv_merge);
140extern void elv_bio_merged(struct request_queue *q, struct request *, 185extern void elv_bio_merged(struct request_queue *q, struct request *,
141 struct bio *); 186 struct bio *);
187extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
142extern void elv_requeue_request(struct request_queue *, struct request *); 188extern void elv_requeue_request(struct request_queue *, struct request *);
143extern struct request *elv_former_request(struct request_queue *, struct request *); 189extern struct request *elv_former_request(struct request_queue *, struct request *);
144extern struct request *elv_latter_request(struct request_queue *, struct request *); 190extern struct request *elv_latter_request(struct request_queue *, struct request *);
@@ -185,13 +231,6 @@ extern void elv_rb_del(struct rb_root *, struct request *);
185extern struct request *elv_rb_find(struct rb_root *, sector_t); 231extern struct request *elv_rb_find(struct rb_root *, sector_t);
186 232
187/* 233/*
188 * Return values from elevator merger
189 */
190#define ELEVATOR_NO_MERGE 0
191#define ELEVATOR_FRONT_MERGE 1
192#define ELEVATOR_BACK_MERGE 2
193
194/*
195 * Insertion selection 234 * Insertion selection
196 */ 235 */
197#define ELEVATOR_INSERT_FRONT 1 236#define ELEVATOR_INSERT_FRONT 1
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba074328894..c930cbc19342 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -423,6 +423,7 @@ struct block_device {
423 int bd_invalidated; 423 int bd_invalidated;
424 struct gendisk * bd_disk; 424 struct gendisk * bd_disk;
425 struct request_queue * bd_queue; 425 struct request_queue * bd_queue;
426 struct backing_dev_info *bd_bdi;
426 struct list_head bd_list; 427 struct list_head bd_list;
427 /* 428 /*
428 * Private data. You must have bd_claim'ed the block_device 429 * Private data. You must have bd_claim'ed the block_device
@@ -2342,6 +2343,7 @@ extern struct kmem_cache *names_cachep;
2342#ifdef CONFIG_BLOCK 2343#ifdef CONFIG_BLOCK
2343extern int register_blkdev(unsigned int, const char *); 2344extern int register_blkdev(unsigned int, const char *);
2344extern void unregister_blkdev(unsigned int, const char *); 2345extern void unregister_blkdev(unsigned int, const char *);
2346extern void bdev_unhash_inode(dev_t dev);
2345extern struct block_device *bdget(dev_t); 2347extern struct block_device *bdget(dev_t);
2346extern struct block_device *bdgrab(struct block_device *bdev); 2348extern struct block_device *bdgrab(struct block_device *bdev);
2347extern void bd_set_size(struct block_device *, loff_t size); 2349extern void bd_set_size(struct block_device *, loff_t size);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 76f39754e7b0..a999d281a2f1 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -167,6 +167,13 @@ struct blk_integrity {
167}; 167};
168 168
169#endif /* CONFIG_BLK_DEV_INTEGRITY */ 169#endif /* CONFIG_BLK_DEV_INTEGRITY */
170struct disk_devt {
171 atomic_t count;
172 void (*release)(struct disk_devt *disk_devt);
173};
174
175void put_disk_devt(struct disk_devt *disk_devt);
176void get_disk_devt(struct disk_devt *disk_devt);
170 177
171struct gendisk { 178struct gendisk {
172 /* major, first_minor and minors are input parameters only, 179 /* major, first_minor and minors are input parameters only,
@@ -176,6 +183,7 @@ struct gendisk {
176 int first_minor; 183 int first_minor;
177 int minors; /* maximum number of minors, =1 for 184 int minors; /* maximum number of minors, =1 for
178 * disks that can't be partitioned. */ 185 * disks that can't be partitioned. */
186 struct disk_devt *disk_devt;
179 187
180 char disk_name[DISK_NAME_LEN]; /* name of major driver */ 188 char disk_name[DISK_NAME_LEN]; /* name of major driver */
181 char *(*devnode)(struct gendisk *gd, umode_t *mode); 189 char *(*devnode)(struct gendisk *gd, umode_t *mode);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a633898f36ac..2f51c1724b5a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -20,6 +20,7 @@
20#include <linux/mutex.h> 20#include <linux/mutex.h>
21/* for request_sense */ 21/* for request_sense */
22#include <linux/cdrom.h> 22#include <linux/cdrom.h>
23#include <scsi/scsi_cmnd.h>
23#include <asm/byteorder.h> 24#include <asm/byteorder.h>
24#include <asm/io.h> 25#include <asm/io.h>
25 26
@@ -39,18 +40,53 @@
39 40
40struct device; 41struct device;
41 42
42/* IDE-specific values for req->cmd_type */ 43/* values for ide_request.type */
43enum ata_cmd_type_bits { 44enum ata_priv_type {
44 REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, 45 ATA_PRIV_MISC,
45 REQ_TYPE_ATA_PC, 46 ATA_PRIV_TASKFILE,
46 REQ_TYPE_ATA_SENSE, /* sense request */ 47 ATA_PRIV_PC,
47 REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ 48 ATA_PRIV_SENSE, /* sense request */
48 REQ_TYPE_ATA_PM_RESUME, /* resume request */ 49 ATA_PRIV_PM_SUSPEND, /* suspend request */
50 ATA_PRIV_PM_RESUME, /* resume request */
49}; 51};
50 52
51#define ata_pm_request(rq) \ 53struct ide_request {
52 ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ 54 struct scsi_request sreq;
53 (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) 55 u8 sense[SCSI_SENSE_BUFFERSIZE];
56 u8 type;
57};
58
59static inline struct ide_request *ide_req(struct request *rq)
60{
61 return blk_mq_rq_to_pdu(rq);
62}
63
64static inline bool ata_misc_request(struct request *rq)
65{
66 return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_MISC;
67}
68
69static inline bool ata_taskfile_request(struct request *rq)
70{
71 return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_TASKFILE;
72}
73
74static inline bool ata_pc_request(struct request *rq)
75{
76 return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PC;
77}
78
79static inline bool ata_sense_request(struct request *rq)
80{
81 return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_SENSE;
82}
83
84static inline bool ata_pm_request(struct request *rq)
85{
86 return blk_rq_is_private(rq) &&
87 (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND ||
88 ide_req(rq)->type == ATA_PRIV_PM_RESUME);
89}
54 90
55/* Error codes returned in rq->errors to the higher part of the driver. */ 91/* Error codes returned in rq->errors to the higher part of the driver. */
56enum { 92enum {
@@ -579,7 +615,7 @@ struct ide_drive_s {
579 615
580 /* current sense rq and buffer */ 616 /* current sense rq and buffer */
581 bool sense_rq_armed; 617 bool sense_rq_armed;
582 struct request sense_rq; 618 struct request *sense_rq;
583 struct request_sense sense_data; 619 struct request_sense sense_data;
584}; 620};
585 621
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 7c273bbc5351..ca45e4a088a9 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -80,8 +80,6 @@ struct nvm_dev_ops {
80 unsigned int max_phys_sect; 80 unsigned int max_phys_sect;
81}; 81};
82 82
83
84
85#ifdef CONFIG_NVM 83#ifdef CONFIG_NVM
86 84
87#include <linux/blkdev.h> 85#include <linux/blkdev.h>
@@ -109,6 +107,7 @@ enum {
109 NVM_RSP_ERR_FAILWRITE = 0x40ff, 107 NVM_RSP_ERR_FAILWRITE = 0x40ff,
110 NVM_RSP_ERR_EMPTYPAGE = 0x42ff, 108 NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
111 NVM_RSP_ERR_FAILECC = 0x4281, 109 NVM_RSP_ERR_FAILECC = 0x4281,
110 NVM_RSP_ERR_FAILCRC = 0x4004,
112 NVM_RSP_WARN_HIGHECC = 0x4700, 111 NVM_RSP_WARN_HIGHECC = 0x4700,
113 112
114 /* Device opcodes */ 113 /* Device opcodes */
@@ -202,11 +201,10 @@ struct nvm_addr_format {
202struct nvm_id { 201struct nvm_id {
203 u8 ver_id; 202 u8 ver_id;
204 u8 vmnt; 203 u8 vmnt;
205 u8 cgrps;
206 u32 cap; 204 u32 cap;
207 u32 dom; 205 u32 dom;
208 struct nvm_addr_format ppaf; 206 struct nvm_addr_format ppaf;
209 struct nvm_id_group groups[4]; 207 struct nvm_id_group grp;
210} __packed; 208} __packed;
211 209
212struct nvm_target { 210struct nvm_target {
@@ -216,10 +214,6 @@ struct nvm_target {
216 struct gendisk *disk; 214 struct gendisk *disk;
217}; 215};
218 216
219struct nvm_tgt_instance {
220 struct nvm_tgt_type *tt;
221};
222
223#define ADDR_EMPTY (~0ULL) 217#define ADDR_EMPTY (~0ULL)
224 218
225#define NVM_VERSION_MAJOR 1 219#define NVM_VERSION_MAJOR 1
@@ -230,7 +224,6 @@ struct nvm_rq;
230typedef void (nvm_end_io_fn)(struct nvm_rq *); 224typedef void (nvm_end_io_fn)(struct nvm_rq *);
231 225
232struct nvm_rq { 226struct nvm_rq {
233 struct nvm_tgt_instance *ins;
234 struct nvm_tgt_dev *dev; 227 struct nvm_tgt_dev *dev;
235 228
236 struct bio *bio; 229 struct bio *bio;
@@ -254,6 +247,8 @@ struct nvm_rq {
254 247
255 u64 ppa_status; /* ppa media status */ 248 u64 ppa_status; /* ppa media status */
256 int error; 249 int error;
250
251 void *private;
257}; 252};
258 253
259static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) 254static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
@@ -272,15 +267,6 @@ enum {
272 NVM_BLK_ST_BAD = 0x8, /* Bad block */ 267 NVM_BLK_ST_BAD = 0x8, /* Bad block */
273}; 268};
274 269
275/* system block cpu representation */
276struct nvm_sb_info {
277 unsigned long seqnr;
278 unsigned long erase_cnt;
279 unsigned int version;
280 char mmtype[NVM_MMTYPE_LEN];
281 struct ppa_addr fs_ppa;
282};
283
284/* Device generic information */ 270/* Device generic information */
285struct nvm_geo { 271struct nvm_geo {
286 int nr_chnls; 272 int nr_chnls;
@@ -308,6 +294,7 @@ struct nvm_geo {
308 int sec_per_lun; 294 int sec_per_lun;
309}; 295};
310 296
297/* sub-device structure */
311struct nvm_tgt_dev { 298struct nvm_tgt_dev {
312 /* Device information */ 299 /* Device information */
313 struct nvm_geo geo; 300 struct nvm_geo geo;
@@ -329,17 +316,10 @@ struct nvm_dev {
329 316
330 struct list_head devices; 317 struct list_head devices;
331 318
332 /* Media manager */
333 struct nvmm_type *mt;
334 void *mp;
335
336 /* System blocks */
337 struct nvm_sb_info sb;
338
339 /* Device information */ 319 /* Device information */
340 struct nvm_geo geo; 320 struct nvm_geo geo;
341 321
342 /* lower page table */ 322 /* lower page table */
343 int lps_per_blk; 323 int lps_per_blk;
344 int *lptbl; 324 int *lptbl;
345 325
@@ -359,6 +339,10 @@ struct nvm_dev {
359 339
360 struct mutex mlock; 340 struct mutex mlock;
361 spinlock_t lock; 341 spinlock_t lock;
342
343 /* target management */
344 struct list_head area_list;
345 struct list_head targets;
362}; 346};
363 347
364static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, 348static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
@@ -391,10 +375,10 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
391 return l; 375 return l;
392} 376}
393 377
394static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, 378static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev,
395 struct ppa_addr r) 379 struct ppa_addr r)
396{ 380{
397 struct nvm_geo *geo = &dev->geo; 381 struct nvm_geo *geo = &tgt_dev->geo;
398 struct ppa_addr l; 382 struct ppa_addr l;
399 383
400 l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; 384 l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
@@ -407,10 +391,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
407 return l; 391 return l;
408} 392}
409 393
410static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, 394static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev,
411 struct ppa_addr r) 395 struct ppa_addr r)
412{ 396{
413 struct nvm_geo *geo = &dev->geo; 397 struct nvm_geo *geo = &tgt_dev->geo;
414 struct ppa_addr l; 398 struct ppa_addr l;
415 399
416 l.ppa = 0; 400 l.ppa = 0;
@@ -452,15 +436,12 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
452 (ppa1.g.blk == ppa2.g.blk)); 436 (ppa1.g.blk == ppa2.g.blk));
453} 437}
454 438
455static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
456{
457 return dev->lptbl[slc_pg];
458}
459
460typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); 439typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
461typedef sector_t (nvm_tgt_capacity_fn)(void *); 440typedef sector_t (nvm_tgt_capacity_fn)(void *);
462typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); 441typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
463typedef void (nvm_tgt_exit_fn)(void *); 442typedef void (nvm_tgt_exit_fn)(void *);
443typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
444typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
464 445
465struct nvm_tgt_type { 446struct nvm_tgt_type {
466 const char *name; 447 const char *name;
@@ -469,12 +450,15 @@ struct nvm_tgt_type {
469 /* target entry points */ 450 /* target entry points */
470 nvm_tgt_make_rq_fn *make_rq; 451 nvm_tgt_make_rq_fn *make_rq;
471 nvm_tgt_capacity_fn *capacity; 452 nvm_tgt_capacity_fn *capacity;
472 nvm_end_io_fn *end_io;
473 453
474 /* module-specific init/teardown */ 454 /* module-specific init/teardown */
475 nvm_tgt_init_fn *init; 455 nvm_tgt_init_fn *init;
476 nvm_tgt_exit_fn *exit; 456 nvm_tgt_exit_fn *exit;
477 457
458 /* sysfs */
459 nvm_tgt_sysfs_init_fn *sysfs_init;
460 nvm_tgt_sysfs_exit_fn *sysfs_exit;
461
478 /* For internal use */ 462 /* For internal use */
479 struct list_head list; 463 struct list_head list;
480}; 464};
@@ -487,103 +471,29 @@ extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
487extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); 471extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
488extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); 472extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
489 473
490typedef int (nvmm_register_fn)(struct nvm_dev *);
491typedef void (nvmm_unregister_fn)(struct nvm_dev *);
492
493typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
494typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
495typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
496typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
497typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
498typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
499typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
500 struct ppa_addr, int);
501typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
502
503enum {
504 TRANS_TGT_TO_DEV = 0x0,
505 TRANS_DEV_TO_TGT = 0x1,
506};
507
508struct nvmm_type {
509 const char *name;
510 unsigned int version[3];
511
512 nvmm_register_fn *register_mgr;
513 nvmm_unregister_fn *unregister_mgr;
514
515 nvmm_create_tgt_fn *create_tgt;
516 nvmm_remove_tgt_fn *remove_tgt;
517
518 nvmm_submit_io_fn *submit_io;
519 nvmm_erase_blk_fn *erase_blk;
520
521 nvmm_get_area_fn *get_area;
522 nvmm_put_area_fn *put_area;
523
524 nvmm_trans_ppa_fn *trans_ppa;
525 nvmm_part_to_tgt_fn *part_to_tgt;
526
527 struct list_head list;
528};
529
530extern int nvm_register_mgr(struct nvmm_type *);
531extern void nvm_unregister_mgr(struct nvmm_type *);
532
533extern struct nvm_dev *nvm_alloc_dev(int); 474extern struct nvm_dev *nvm_alloc_dev(int);
534extern int nvm_register(struct nvm_dev *); 475extern int nvm_register(struct nvm_dev *);
535extern void nvm_unregister(struct nvm_dev *); 476extern void nvm_unregister(struct nvm_dev *);
536 477
537extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
538extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, 478extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
539 int, int); 479 int, int);
540extern int nvm_max_phys_sects(struct nvm_tgt_dev *); 480extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
541extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); 481extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
542extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
543extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
544extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, 482extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
545 const struct ppa_addr *, int, int); 483 const struct ppa_addr *, int, int);
546extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); 484extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
547extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
548extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); 485extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
549extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, 486extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
550 void *); 487 void *);
551extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); 488extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
552extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); 489extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
553extern void nvm_end_io(struct nvm_rq *, int); 490extern void nvm_end_io(struct nvm_rq *);
554extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
555 void *, int);
556extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
557 int, void *, int);
558extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); 491extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
559extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
560extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); 492extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
561 493
562/* sysblk.c */
563#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
564
565/* system block on disk representation */
566struct nvm_system_block {
567 __be32 magic; /* magic signature */
568 __be32 seqnr; /* sequence number */
569 __be32 erase_cnt; /* erase count */
570 __be16 version; /* version number */
571 u8 mmtype[NVM_MMTYPE_LEN]; /* media manager name */
572 __be64 fs_ppa; /* PPA for media manager
573 * superblock */
574};
575
576extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *);
577extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *);
578extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
579
580extern int nvm_dev_factory(struct nvm_dev *, int flags); 494extern int nvm_dev_factory(struct nvm_dev *, int flags);
581 495
582#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid) \ 496extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
583 for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls; \
584 (chid)++, (ppa).g.ch = (chid)) \
585 for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl; \
586 (lunid)++, (ppa).g.lun = (lunid))
587 497
588#else /* CONFIG_NVM */ 498#else /* CONFIG_NVM */
589struct nvm_dev_ops; 499struct nvm_dev_ops;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3d1c6f1b15c9..0b676a02cf3e 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -244,6 +244,7 @@ enum {
244 NVME_CTRL_ONCS_DSM = 1 << 2, 244 NVME_CTRL_ONCS_DSM = 1 << 2,
245 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, 245 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
246 NVME_CTRL_VWC_PRESENT = 1 << 0, 246 NVME_CTRL_VWC_PRESENT = 1 << 0,
247 NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
247}; 248};
248 249
249struct nvme_lbaf { 250struct nvme_lbaf {
@@ -553,6 +554,8 @@ enum {
553 NVME_DSMGMT_AD = 1 << 2, 554 NVME_DSMGMT_AD = 1 << 2,
554}; 555};
555 556
557#define NVME_DSM_MAX_RANGES 256
558
556struct nvme_dsm_range { 559struct nvme_dsm_range {
557 __le32 cattr; 560 __le32 cattr;
558 __le32 nlb; 561 __le32 nlb;
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index f017fd6e69c4..d4e0a204c118 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -259,6 +259,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
259unsigned int sbitmap_weight(const struct sbitmap *sb); 259unsigned int sbitmap_weight(const struct sbitmap *sb);
260 260
261/** 261/**
262 * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
263 * @sb: Bitmap to show.
264 * @m: struct seq_file to write to.
265 *
266 * This is intended for debugging. The format may change at any time.
267 */
268void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
269
270/**
271 * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
272 * seq_file.
273 * @sb: Bitmap to show.
274 * @m: struct seq_file to write to.
275 *
276 * This is intended for debugging. The output isn't guaranteed to be internally
277 * consistent.
278 */
279void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
280
281/**
262 * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific 282 * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
263 * memory node. 283 * memory node.
264 * @sbq: Bitmap queue to initialize. 284 * @sbq: Bitmap queue to initialize.
@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
370 */ 390 */
371void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); 391void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
372 392
393/**
394 * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
395 * seq_file.
396 * @sbq: Bitmap queue to show.
397 * @m: struct seq_file to write to.
398 *
399 * This is intended for debugging. The format may change at any time.
400 */
401void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
402
373#endif /* __LINUX_SCALE_BITMAP_H */ 403#endif /* __LINUX_SCALE_BITMAP_H */
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
new file mode 100644
index 000000000000..deee23d012e7
--- /dev/null
+++ b/include/linux/sed-opal.h
@@ -0,0 +1,70 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Rafael Antognolli <rafael.antognolli@intel.com>
6 * Scott Bauer <scott.bauer@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef LINUX_OPAL_H
19#define LINUX_OPAL_H
20
21#include <uapi/linux/sed-opal.h>
22#include <linux/kernel.h>
23
24struct opal_dev;
25
26typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
27 size_t len, bool send);
28
29#ifdef CONFIG_BLK_SED_OPAL
30bool opal_unlock_from_suspend(struct opal_dev *dev);
31struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
32int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
33
34static inline bool is_sed_ioctl(unsigned int cmd)
35{
36 switch (cmd) {
37 case IOC_OPAL_SAVE:
38 case IOC_OPAL_LOCK_UNLOCK:
39 case IOC_OPAL_TAKE_OWNERSHIP:
40 case IOC_OPAL_ACTIVATE_LSP:
41 case IOC_OPAL_SET_PW:
42 case IOC_OPAL_ACTIVATE_USR:
43 case IOC_OPAL_REVERT_TPR:
44 case IOC_OPAL_LR_SETUP:
45 case IOC_OPAL_ADD_USR_TO_LR:
46 case IOC_OPAL_ENABLE_DISABLE_MBR:
47 case IOC_OPAL_ERASE_LR:
48 case IOC_OPAL_SECURE_ERASE_LR:
49 return true;
50 }
51 return false;
52}
53#else
54static inline bool is_sed_ioctl(unsigned int cmd)
55{
56 return false;
57}
58
59static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd,
60 void __user *ioctl_ptr)
61{
62 return 0;
63}
64static inline bool opal_unlock_from_suspend(struct opal_dev *dev)
65{
66 return false;
67}
68#define init_opal_dev(data, send_recv) NULL
69#endif /* CONFIG_BLK_SED_OPAL */
70#endif /* LINUX_OPAL_H */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 9fc1aecfc813..b379f93a2c48 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -8,6 +8,7 @@
8#include <linux/timer.h> 8#include <linux/timer.h>
9#include <linux/scatterlist.h> 9#include <linux/scatterlist.h>
10#include <scsi/scsi_device.h> 10#include <scsi/scsi_device.h>
11#include <scsi/scsi_request.h>
11 12
12struct Scsi_Host; 13struct Scsi_Host;
13struct scsi_driver; 14struct scsi_driver;
@@ -57,6 +58,7 @@ struct scsi_pointer {
57#define SCMD_TAGGED (1 << 0) 58#define SCMD_TAGGED (1 << 0)
58 59
59struct scsi_cmnd { 60struct scsi_cmnd {
61 struct scsi_request req;
60 struct scsi_device *device; 62 struct scsi_device *device;
61 struct list_head list; /* scsi_cmnd participates in queue lists */ 63 struct list_head list; /* scsi_cmnd participates in queue lists */
62 struct list_head eh_entry; /* entry for the host eh_cmd_q */ 64 struct list_head eh_entry; /* entry for the host eh_cmd_q */
@@ -149,7 +151,7 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd)
149 return cmd + 1; 151 return cmd + 1;
150} 152}
151 153
152/* make sure not to use it with REQ_TYPE_BLOCK_PC commands */ 154/* make sure not to use it with passthrough commands */
153static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) 155static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
154{ 156{
155 return *(struct scsi_driver **)cmd->request->rq_disk->private_data; 157 return *(struct scsi_driver **)cmd->request->rq_disk->private_data;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 36680f13270d..3cd8c3bec638 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -551,9 +551,6 @@ struct Scsi_Host {
551 struct list_head __devices; 551 struct list_head __devices;
552 struct list_head __targets; 552 struct list_head __targets;
553 553
554 struct scsi_host_cmd_pool *cmd_pool;
555 spinlock_t free_list_lock;
556 struct list_head free_list; /* backup store of cmd structs */
557 struct list_head starved_list; 554 struct list_head starved_list;
558 555
559 spinlock_t default_lock; 556 spinlock_t default_lock;
@@ -826,8 +823,6 @@ extern void scsi_block_requests(struct Scsi_Host *);
826 823
827struct class_container; 824struct class_container;
828 825
829extern struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
830 void (*) (struct request_queue *));
831/* 826/*
832 * These two functions are used to allocate and free a pseudo device 827 * These two functions are used to allocate and free a pseudo device
833 * which will connect to the host adapter itself rather than any 828 * which will connect to the host adapter itself rather than any
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
new file mode 100644
index 000000000000..ba0aeb980f7e
--- /dev/null
+++ b/include/scsi/scsi_request.h
@@ -0,0 +1,30 @@
1#ifndef _SCSI_SCSI_REQUEST_H
2#define _SCSI_SCSI_REQUEST_H
3
4#include <linux/blk-mq.h>
5
6#define BLK_MAX_CDB 16
7
8struct scsi_request {
9 unsigned char __cmd[BLK_MAX_CDB];
10 unsigned char *cmd;
11 unsigned short cmd_len;
12 unsigned int sense_len;
13 unsigned int resid_len; /* residual count */
14 void *sense;
15};
16
17static inline struct scsi_request *scsi_req(struct request *rq)
18{
19 return blk_mq_rq_to_pdu(rq);
20}
21
22static inline void scsi_req_free_cmd(struct scsi_request *req)
23{
24 if (req->cmd != req->__cmd)
25 kfree(req->cmd);
26}
27
28void scsi_req_init(struct request *);
29
30#endif /* _SCSI_SCSI_REQUEST_H */
diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
index 81292392adbc..b6e07b56d013 100644
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h
@@ -119,4 +119,6 @@ scsi_transport_device_data(struct scsi_device *sdev)
119 + shost->transportt->device_private_offset; 119 + shost->transportt->device_private_offset;
120} 120}
121 121
122void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q);
123
122#endif /* SCSI_TRANSPORT_H */ 124#endif /* SCSI_TRANSPORT_H */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 3e02e3a25413..a88ed13446ff 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -73,19 +73,17 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
73 __field( unsigned int, nr_sector ) 73 __field( unsigned int, nr_sector )
74 __field( int, errors ) 74 __field( int, errors )
75 __array( char, rwbs, RWBS_LEN ) 75 __array( char, rwbs, RWBS_LEN )
76 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 76 __dynamic_array( char, cmd, 1 )
77 ), 77 ),
78 78
79 TP_fast_assign( 79 TP_fast_assign(
80 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; 80 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
81 __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 81 __entry->sector = blk_rq_trace_sector(rq);
82 0 : blk_rq_pos(rq); 82 __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
83 __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
84 0 : blk_rq_sectors(rq);
85 __entry->errors = rq->errors; 83 __entry->errors = rq->errors;
86 84
87 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); 85 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
88 blk_dump_cmd(__get_str(cmd), rq); 86 __get_str(cmd)[0] = '\0';
89 ), 87 ),
90 88
91 TP_printk("%d,%d %s (%s) %llu + %u [%d]", 89 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
@@ -153,7 +151,7 @@ TRACE_EVENT(block_rq_complete,
153 __field( unsigned int, nr_sector ) 151 __field( unsigned int, nr_sector )
154 __field( int, errors ) 152 __field( int, errors )
155 __array( char, rwbs, RWBS_LEN ) 153 __array( char, rwbs, RWBS_LEN )
156 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 154 __dynamic_array( char, cmd, 1 )
157 ), 155 ),
158 156
159 TP_fast_assign( 157 TP_fast_assign(
@@ -163,7 +161,7 @@ TRACE_EVENT(block_rq_complete,
163 __entry->errors = rq->errors; 161 __entry->errors = rq->errors;
164 162
165 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes); 163 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
166 blk_dump_cmd(__get_str(cmd), rq); 164 __get_str(cmd)[0] = '\0';
167 ), 165 ),
168 166
169 TP_printk("%d,%d %s (%s) %llu + %u [%d]", 167 TP_printk("%d,%d %s (%s) %llu + %u [%d]",
@@ -186,20 +184,17 @@ DECLARE_EVENT_CLASS(block_rq,
186 __field( unsigned int, bytes ) 184 __field( unsigned int, bytes )
187 __array( char, rwbs, RWBS_LEN ) 185 __array( char, rwbs, RWBS_LEN )
188 __array( char, comm, TASK_COMM_LEN ) 186 __array( char, comm, TASK_COMM_LEN )
189 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 187 __dynamic_array( char, cmd, 1 )
190 ), 188 ),
191 189
192 TP_fast_assign( 190 TP_fast_assign(
193 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; 191 __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
194 __entry->sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 192 __entry->sector = blk_rq_trace_sector(rq);
195 0 : blk_rq_pos(rq); 193 __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
196 __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? 194 __entry->bytes = blk_rq_bytes(rq);
197 0 : blk_rq_sectors(rq);
198 __entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
199 blk_rq_bytes(rq) : 0;
200 195
201 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq)); 196 blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
202 blk_dump_cmd(__get_str(cmd), rq); 197 __get_str(cmd)[0] = '\0';
203 memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 198 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
204 ), 199 ),
205 200
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
index 774a43128a7a..fd19f36b3129 100644
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -122,6 +122,44 @@ struct nvm_ioctl_dev_factory {
122 __u32 flags; 122 __u32 flags;
123}; 123};
124 124
125struct nvm_user_vio {
126 __u8 opcode;
127 __u8 flags;
128 __u16 control;
129 __u16 nppas;
130 __u16 rsvd;
131 __u64 metadata;
132 __u64 addr;
133 __u64 ppa_list;
134 __u32 metadata_len;
135 __u32 data_len;
136 __u64 status;
137 __u32 result;
138 __u32 rsvd3[3];
139};
140
141struct nvm_passthru_vio {
142 __u8 opcode;
143 __u8 flags;
144 __u8 rsvd[2];
145 __u32 nsid;
146 __u32 cdw2;
147 __u32 cdw3;
148 __u64 metadata;
149 __u64 addr;
150 __u32 metadata_len;
151 __u32 data_len;
152 __u64 ppa_list;
153 __u16 nppas;
154 __u16 control;
155 __u32 cdw13;
156 __u32 cdw14;
157 __u32 cdw15;
158 __u64 status;
159 __u32 result;
160 __u32 timeout_ms;
161};
162
125/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ 163/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
126enum { 164enum {
127 /* top level cmds */ 165 /* top level cmds */
@@ -137,6 +175,11 @@ enum {
137 175
138 /* Factory reset device */ 176 /* Factory reset device */
139 NVM_DEV_FACTORY_CMD, 177 NVM_DEV_FACTORY_CMD,
178
179 /* Vector user I/O */
180 NVM_DEV_VIO_ADMIN_CMD = 0x41,
181 NVM_DEV_VIO_CMD = 0x42,
182 NVM_DEV_VIO_USER_CMD = 0x43,
140}; 183};
141 184
142#define NVM_IOCTL 'L' /* 0x4c */ 185#define NVM_IOCTL 'L' /* 0x4c */
@@ -154,6 +197,13 @@ enum {
154#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ 197#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
155 struct nvm_ioctl_dev_factory) 198 struct nvm_ioctl_dev_factory)
156 199
200#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \
201 struct nvm_passthru_vio)
202#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\
203 struct nvm_passthru_vio)
204#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\
205 struct nvm_user_vio)
206
157#define NVM_VERSION_MAJOR 1 207#define NVM_VERSION_MAJOR 1
158#define NVM_VERSION_MINOR 0 208#define NVM_VERSION_MINOR 0
159#define NVM_VERSION_PATCHLEVEL 0 209#define NVM_VERSION_PATCHLEVEL 0
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
new file mode 100644
index 000000000000..c72e0735532d
--- /dev/null
+++ b/include/uapi/linux/sed-opal.h
@@ -0,0 +1,119 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Rafael Antognolli <rafael.antognolli@intel.com>
6 * Scott Bauer <scott.bauer@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef _UAPI_SED_OPAL_H
19#define _UAPI_SED_OPAL_H
20
21#include <linux/types.h>
22
23#define OPAL_KEY_MAX 256
24#define OPAL_MAX_LRS 9
25
26enum opal_mbr {
27 OPAL_MBR_ENABLE = 0x0,
28 OPAL_MBR_DISABLE = 0x01,
29};
30
31enum opal_user {
32 OPAL_ADMIN1 = 0x0,
33 OPAL_USER1 = 0x01,
34 OPAL_USER2 = 0x02,
35 OPAL_USER3 = 0x03,
36 OPAL_USER4 = 0x04,
37 OPAL_USER5 = 0x05,
38 OPAL_USER6 = 0x06,
39 OPAL_USER7 = 0x07,
40 OPAL_USER8 = 0x08,
41 OPAL_USER9 = 0x09,
42};
43
44enum opal_lock_state {
45 OPAL_RO = 0x01, /* 0001 */
46 OPAL_RW = 0x02, /* 0010 */
47 OPAL_LK = 0x04, /* 0100 */
48};
49
50struct opal_key {
51 __u8 lr;
52 __u8 key_len;
53 __u8 __align[6];
54 __u8 key[OPAL_KEY_MAX];
55};
56
57struct opal_lr_act {
58 struct opal_key key;
59 __u32 sum;
60 __u8 num_lrs;
61 __u8 lr[OPAL_MAX_LRS];
62 __u8 align[2]; /* Align to 8 byte boundary */
63};
64
65struct opal_session_info {
66 __u32 sum;
67 __u32 who;
68 struct opal_key opal_key;
69};
70
71struct opal_user_lr_setup {
72 __u64 range_start;
73 __u64 range_length;
74 __u32 RLE; /* Read Lock enabled */
75 __u32 WLE; /* Write Lock Enabled */
76 struct opal_session_info session;
77};
78
79struct opal_lock_unlock {
80 struct opal_session_info session;
81 __u32 l_state;
82 __u8 __align[4];
83};
84
85struct opal_new_pw {
86 struct opal_session_info session;
87
88 /* When we're not operating in sum, and we first set
89 * passwords we need to set them via ADMIN authority.
90 * After passwords are changed, we can set them via,
91 * User authorities.
92 * Because of this restriction we need to know about
93 * Two different users. One in 'session' which we will use
94 * to start the session and new_userr_pw as the user we're
95 * chaning the pw for.
96 */
97 struct opal_session_info new_user_pw;
98};
99
100struct opal_mbr_data {
101 struct opal_key key;
102 __u8 enable_disable;
103 __u8 __align[7];
104};
105
106#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
107#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
108#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
109#define IOC_OPAL_ACTIVATE_LSP _IOW('p', 223, struct opal_lr_act)
110#define IOC_OPAL_SET_PW _IOW('p', 224, struct opal_new_pw)
111#define IOC_OPAL_ACTIVATE_USR _IOW('p', 225, struct opal_session_info)
112#define IOC_OPAL_REVERT_TPR _IOW('p', 226, struct opal_key)
113#define IOC_OPAL_LR_SETUP _IOW('p', 227, struct opal_user_lr_setup)
114#define IOC_OPAL_ADD_USR_TO_LR _IOW('p', 228, struct opal_lock_unlock)
115#define IOC_OPAL_ENABLE_DISABLE_MBR _IOW('p', 229, struct opal_mbr_data)
116#define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info)
117#define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info)
118
119#endif /* _UAPI_SED_OPAL_H */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 95cecbf67f5c..b2058a7f94bd 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -28,6 +28,8 @@
28#include <linux/uaccess.h> 28#include <linux/uaccess.h>
29#include <linux/list.h> 29#include <linux/list.h>
30 30
31#include "../../block/blk.h"
32
31#include <trace/events/block.h> 33#include <trace/events/block.h>
32 34
33#include "trace_output.h" 35#include "trace_output.h"
@@ -292,9 +294,6 @@ record_it:
292 local_irq_restore(flags); 294 local_irq_restore(flags);
293} 295}
294 296
295static struct dentry *blk_tree_root;
296static DEFINE_MUTEX(blk_tree_mutex);
297
298static void blk_trace_free(struct blk_trace *bt) 297static void blk_trace_free(struct blk_trace *bt)
299{ 298{
300 debugfs_remove(bt->msg_file); 299 debugfs_remove(bt->msg_file);
@@ -433,9 +432,9 @@ static void blk_trace_setup_lba(struct blk_trace *bt,
433/* 432/*
434 * Setup everything required to start tracing 433 * Setup everything required to start tracing
435 */ 434 */
436int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 435static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
437 struct block_device *bdev, 436 struct block_device *bdev,
438 struct blk_user_trace_setup *buts) 437 struct blk_user_trace_setup *buts)
439{ 438{
440 struct blk_trace *bt = NULL; 439 struct blk_trace *bt = NULL;
441 struct dentry *dir = NULL; 440 struct dentry *dir = NULL;
@@ -468,22 +467,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
468 467
469 ret = -ENOENT; 468 ret = -ENOENT;
470 469
471 mutex_lock(&blk_tree_mutex); 470 if (!blk_debugfs_root)
472 if (!blk_tree_root) { 471 goto err;
473 blk_tree_root = debugfs_create_dir("block", NULL);
474 if (!blk_tree_root) {
475 mutex_unlock(&blk_tree_mutex);
476 goto err;
477 }
478 }
479 mutex_unlock(&blk_tree_mutex);
480
481 dir = debugfs_create_dir(buts->name, blk_tree_root);
482 472
473 dir = debugfs_lookup(buts->name, blk_debugfs_root);
474 if (!dir)
475 bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
483 if (!dir) 476 if (!dir)
484 goto err; 477 goto err;
485 478
486 bt->dir = dir;
487 bt->dev = dev; 479 bt->dev = dev;
488 atomic_set(&bt->dropped, 0); 480 atomic_set(&bt->dropped, 0);
489 INIT_LIST_HEAD(&bt->running_list); 481 INIT_LIST_HEAD(&bt->running_list);
@@ -525,9 +517,12 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
525 if (atomic_inc_return(&blk_probes_ref) == 1) 517 if (atomic_inc_return(&blk_probes_ref) == 1)
526 blk_register_tracepoints(); 518 blk_register_tracepoints();
527 519
528 return 0; 520 ret = 0;
529err: 521err:
530 blk_trace_free(bt); 522 if (dir && !bt->dir)
523 dput(dir);
524 if (ret)
525 blk_trace_free(bt);
531 return ret; 526 return ret;
532} 527}
533 528
@@ -712,15 +707,13 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
712 if (likely(!bt)) 707 if (likely(!bt))
713 return; 708 return;
714 709
715 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 710 if (blk_rq_is_passthrough(rq))
716 what |= BLK_TC_ACT(BLK_TC_PC); 711 what |= BLK_TC_ACT(BLK_TC_PC);
717 __blk_add_trace(bt, 0, nr_bytes, req_op(rq), rq->cmd_flags, 712 else
718 what, rq->errors, rq->cmd_len, rq->cmd);
719 } else {
720 what |= BLK_TC_ACT(BLK_TC_FS); 713 what |= BLK_TC_ACT(BLK_TC_FS);
721 __blk_add_trace(bt, blk_rq_pos(rq), nr_bytes, req_op(rq), 714
722 rq->cmd_flags, what, rq->errors, 0, NULL); 715 __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),
723 } 716 rq->cmd_flags, what, rq->errors, 0, NULL);
724} 717}
725 718
726static void blk_add_trace_rq_abort(void *ignore, 719static void blk_add_trace_rq_abort(void *ignore,
@@ -972,11 +965,7 @@ void blk_add_driver_data(struct request_queue *q,
972 if (likely(!bt)) 965 if (likely(!bt))
973 return; 966 return;
974 967
975 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) 968 __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
976 __blk_add_trace(bt, 0, blk_rq_bytes(rq), 0, 0,
977 BLK_TA_DRV_DATA, rq->errors, len, data);
978 else
979 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 0, 0,
980 BLK_TA_DRV_DATA, rq->errors, len, data); 969 BLK_TA_DRV_DATA, rq->errors, len, data);
981} 970}
982EXPORT_SYMBOL_GPL(blk_add_driver_data); 971EXPORT_SYMBOL_GPL(blk_add_driver_data);
@@ -1752,31 +1741,6 @@ void blk_trace_remove_sysfs(struct device *dev)
1752 1741
1753#ifdef CONFIG_EVENT_TRACING 1742#ifdef CONFIG_EVENT_TRACING
1754 1743
1755void blk_dump_cmd(char *buf, struct request *rq)
1756{
1757 int i, end;
1758 int len = rq->cmd_len;
1759 unsigned char *cmd = rq->cmd;
1760
1761 if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
1762 buf[0] = '\0';
1763 return;
1764 }
1765
1766 for (end = len - 1; end >= 0; end--)
1767 if (cmd[end])
1768 break;
1769 end++;
1770
1771 for (i = 0; i < len; i++) {
1772 buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
1773 if (i == end && end != len - 1) {
1774 sprintf(buf, " ..");
1775 break;
1776 }
1777 }
1778}
1779
1780void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes) 1744void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes)
1781{ 1745{
1782 int i = 0; 1746 int i = 0;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2cecf05c82fd..55e11c4b2f3b 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -17,6 +17,7 @@
17 17
18#include <linux/random.h> 18#include <linux/random.h>
19#include <linux/sbitmap.h> 19#include <linux/sbitmap.h>
20#include <linux/seq_file.h>
20 21
21int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 22int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
22 gfp_t flags, int node) 23 gfp_t flags, int node)
@@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb)
180} 181}
181EXPORT_SYMBOL_GPL(sbitmap_weight); 182EXPORT_SYMBOL_GPL(sbitmap_weight);
182 183
184void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
185{
186 seq_printf(m, "depth=%u\n", sb->depth);
187 seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
188 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
189 seq_printf(m, "map_nr=%u\n", sb->map_nr);
190}
191EXPORT_SYMBOL_GPL(sbitmap_show);
192
193static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
194{
195 if ((offset & 0xf) == 0) {
196 if (offset != 0)
197 seq_putc(m, '\n');
198 seq_printf(m, "%08x:", offset);
199 }
200 if ((offset & 0x1) == 0)
201 seq_putc(m, ' ');
202 seq_printf(m, "%02x", byte);
203}
204
205void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
206{
207 u8 byte = 0;
208 unsigned int byte_bits = 0;
209 unsigned int offset = 0;
210 int i;
211
212 for (i = 0; i < sb->map_nr; i++) {
213 unsigned long word = READ_ONCE(sb->map[i].word);
214 unsigned int word_bits = READ_ONCE(sb->map[i].depth);
215
216 while (word_bits > 0) {
217 unsigned int bits = min(8 - byte_bits, word_bits);
218
219 byte |= (word & (BIT(bits) - 1)) << byte_bits;
220 byte_bits += bits;
221 if (byte_bits == 8) {
222 emit_byte(m, offset, byte);
223 byte = 0;
224 byte_bits = 0;
225 offset++;
226 }
227 word >>= bits;
228 word_bits -= bits;
229 }
230 }
231 if (byte_bits) {
232 emit_byte(m, offset, byte);
233 offset++;
234 }
235 if (offset)
236 seq_putc(m, '\n');
237}
238EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
239
183static unsigned int sbq_calc_wake_batch(unsigned int depth) 240static unsigned int sbq_calc_wake_batch(unsigned int depth)
184{ 241{
185 unsigned int wake_batch; 242 unsigned int wake_batch;
@@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
239 296
240void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 297void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
241{ 298{
242 sbq->wake_batch = sbq_calc_wake_batch(depth); 299 unsigned int wake_batch = sbq_calc_wake_batch(depth);
300 int i;
301
302 if (sbq->wake_batch != wake_batch) {
303 WRITE_ONCE(sbq->wake_batch, wake_batch);
304 /*
305 * Pairs with the memory barrier in sbq_wake_up() to ensure that
306 * the batch size is updated before the wait counts.
307 */
308 smp_mb__before_atomic();
309 for (i = 0; i < SBQ_WAIT_QUEUES; i++)
310 atomic_set(&sbq->ws[i].wait_cnt, 1);
311 }
243 sbitmap_resize(&sbq->sb, depth); 312 sbitmap_resize(&sbq->sb, depth);
244} 313}
245EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 314EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
297static void sbq_wake_up(struct sbitmap_queue *sbq) 366static void sbq_wake_up(struct sbitmap_queue *sbq)
298{ 367{
299 struct sbq_wait_state *ws; 368 struct sbq_wait_state *ws;
369 unsigned int wake_batch;
300 int wait_cnt; 370 int wait_cnt;
301 371
302 /* Ensure that the wait list checks occur after clear_bit(). */ 372 /*
303 smp_mb(); 373 * Pairs with the memory barrier in set_current_state() to ensure the
374 * proper ordering of clear_bit()/waitqueue_active() in the waker and
375 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
376 * the comment on waitqueue_active(). This is __after_atomic because we
377 * just did clear_bit() in the caller.
378 */
379 smp_mb__after_atomic();
304 380
305 ws = sbq_wake_ptr(sbq); 381 ws = sbq_wake_ptr(sbq);
306 if (!ws) 382 if (!ws)
307 return; 383 return;
308 384
309 wait_cnt = atomic_dec_return(&ws->wait_cnt); 385 wait_cnt = atomic_dec_return(&ws->wait_cnt);
310 if (unlikely(wait_cnt < 0)) 386 if (wait_cnt <= 0) {
311 wait_cnt = atomic_inc_return(&ws->wait_cnt); 387 wake_batch = READ_ONCE(sbq->wake_batch);
312 if (wait_cnt == 0) { 388 /*
313 atomic_add(sbq->wake_batch, &ws->wait_cnt); 389 * Pairs with the memory barrier in sbitmap_queue_resize() to
390 * ensure that we see the batch size update before the wait
391 * count is reset.
392 */
393 smp_mb__before_atomic();
394 /*
395 * If there are concurrent callers to sbq_wake_up(), the last
396 * one to decrement the wait count below zero will bump it back
397 * up. If there is a concurrent resize, the count reset will
398 * either cause the cmpxchg to fail or overwrite after the
399 * cmpxchg.
400 */
401 atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
314 sbq_index_atomic_inc(&sbq->wake_index); 402 sbq_index_atomic_inc(&sbq->wake_index);
315 wake_up(&ws->wait); 403 wake_up(&ws->wait);
316 } 404 }
@@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
331 int i, wake_index; 419 int i, wake_index;
332 420
333 /* 421 /*
334 * Make sure all changes prior to this are visible from other CPUs. 422 * Pairs with the memory barrier in set_current_state() like in
423 * sbq_wake_up().
335 */ 424 */
336 smp_mb(); 425 smp_mb();
337 wake_index = atomic_read(&sbq->wake_index); 426 wake_index = atomic_read(&sbq->wake_index);
@@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
345 } 434 }
346} 435}
347EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 436EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
437
438void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
439{
440 bool first;
441 int i;
442
443 sbitmap_show(&sbq->sb, m);
444
445 seq_puts(m, "alloc_hint={");
446 first = true;
447 for_each_possible_cpu(i) {
448 if (!first)
449 seq_puts(m, ", ");
450 first = false;
451 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
452 }
453 seq_puts(m, "}\n");
454
455 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
456 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
457
458 seq_puts(m, "ws={\n");
459 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
460 struct sbq_wait_state *ws = &sbq->ws[i];
461
462 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
463 atomic_read(&ws->wait_cnt),
464 waitqueue_active(&ws->wait) ? "active" : "inactive");
465 }
466 seq_puts(m, "}\n");
467
468 seq_printf(m, "round_robin=%d\n", sbq->round_robin);
469}
470EXPORT_SYMBOL_GPL(sbitmap_queue_show);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 3bfed5ab2475..39ce616a9d71 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -237,6 +237,7 @@ static __init int bdi_class_init(void)
237 237
238 bdi_class->dev_groups = bdi_dev_groups; 238 bdi_class->dev_groups = bdi_dev_groups;
239 bdi_debug_init(); 239 bdi_debug_init();
240
240 return 0; 241 return 0;
241} 242}
242postcore_initcall(bdi_class_init); 243postcore_initcall(bdi_class_init);
@@ -758,15 +759,20 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
758 if (!bdi->wb_congested) 759 if (!bdi->wb_congested)
759 return -ENOMEM; 760 return -ENOMEM;
760 761
762 atomic_set(&bdi->wb_congested->refcnt, 1);
763
761 err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); 764 err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
762 if (err) { 765 if (err) {
763 kfree(bdi->wb_congested); 766 wb_congested_put(bdi->wb_congested);
764 return err; 767 return err;
765 } 768 }
766 return 0; 769 return 0;
767} 770}
768 771
769static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { } 772static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
773{
774 wb_congested_put(bdi->wb_congested);
775}
770 776
771#endif /* CONFIG_CGROUP_WRITEBACK */ 777#endif /* CONFIG_CGROUP_WRITEBACK */
772 778
@@ -776,6 +782,7 @@ int bdi_init(struct backing_dev_info *bdi)
776 782
777 bdi->dev = NULL; 783 bdi->dev = NULL;
778 784
785 kref_init(&bdi->refcnt);
779 bdi->min_ratio = 0; 786 bdi->min_ratio = 0;
780 bdi->max_ratio = 100; 787 bdi->max_ratio = 100;
781 bdi->max_prop_frac = FPROP_FRAC_BASE; 788 bdi->max_prop_frac = FPROP_FRAC_BASE;
@@ -791,6 +798,22 @@ int bdi_init(struct backing_dev_info *bdi)
791} 798}
792EXPORT_SYMBOL(bdi_init); 799EXPORT_SYMBOL(bdi_init);
793 800
801struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
802{
803 struct backing_dev_info *bdi;
804
805 bdi = kmalloc_node(sizeof(struct backing_dev_info),
806 gfp_mask | __GFP_ZERO, node_id);
807 if (!bdi)
808 return NULL;
809
810 if (bdi_init(bdi)) {
811 kfree(bdi);
812 return NULL;
813 }
814 return bdi;
815}
816
794int bdi_register(struct backing_dev_info *bdi, struct device *parent, 817int bdi_register(struct backing_dev_info *bdi, struct device *parent,
795 const char *fmt, ...) 818 const char *fmt, ...)
796{ 819{
@@ -871,12 +894,26 @@ void bdi_unregister(struct backing_dev_info *bdi)
871 } 894 }
872} 895}
873 896
874void bdi_exit(struct backing_dev_info *bdi) 897static void bdi_exit(struct backing_dev_info *bdi)
875{ 898{
876 WARN_ON_ONCE(bdi->dev); 899 WARN_ON_ONCE(bdi->dev);
877 wb_exit(&bdi->wb); 900 wb_exit(&bdi->wb);
878} 901}
879 902
903static void release_bdi(struct kref *ref)
904{
905 struct backing_dev_info *bdi =
906 container_of(ref, struct backing_dev_info, refcnt);
907
908 bdi_exit(bdi);
909 kfree(bdi);
910}
911
912void bdi_put(struct backing_dev_info *bdi)
913{
914 kref_put(&bdi->refcnt, release_bdi);
915}
916
880void bdi_destroy(struct backing_dev_info *bdi) 917void bdi_destroy(struct backing_dev_info *bdi)
881{ 918{
882 bdi_unregister(bdi); 919 bdi_unregister(bdi);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 290e8b7d3181..216449825859 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1988,11 +1988,11 @@ void laptop_mode_timer_fn(unsigned long data)
1988 * We want to write everything out, not just down to the dirty 1988 * We want to write everything out, not just down to the dirty
1989 * threshold 1989 * threshold
1990 */ 1990 */
1991 if (!bdi_has_dirty_io(&q->backing_dev_info)) 1991 if (!bdi_has_dirty_io(q->backing_dev_info))
1992 return; 1992 return;
1993 1993
1994 rcu_read_lock(); 1994 rcu_read_lock();
1995 list_for_each_entry_rcu(wb, &q->backing_dev_info.wb_list, bdi_node) 1995 list_for_each_entry_rcu(wb, &q->backing_dev_info->wb_list, bdi_node)
1996 if (wb_has_dirty_io(wb)) 1996 if (wb_has_dirty_io(wb))
1997 wb_start_writeback(wb, nr_pages, true, 1997 wb_start_writeback(wb, nr_pages, true,
1998 WB_REASON_LAPTOP_TIMER); 1998 WB_REASON_LAPTOP_TIMER);