aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2017-02-17 16:06:45 -0500
committerJens Axboe <axboe@fb.com>2017-02-17 16:06:45 -0500
commit6010720da8aab51f33beee63b73cf88016e9b250 (patch)
treea4c5a7f645998e86a1f49cb05f8e0c4e51448294
parent2fe1e8a7b2f4dcac3fcb07ff06b0ae7396201fd6 (diff)
parent8a9ae523282f324989850fcf41312b42a2fb9296 (diff)
Merge branch 'for-4.11/block' into for-4.11/linus-merge
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--Documentation/cdrom/cdrom-standard.tex9
-rw-r--r--MAINTAINERS15
-rw-r--r--block/Kconfig19
-rw-r--r--block/Kconfig.iosched50
-rw-r--r--block/Makefile5
-rw-r--r--block/bio.c6
-rw-r--r--block/blk-cgroup.c22
-rw-r--r--block/blk-core.c32
-rw-r--r--block/blk-exec.c3
-rw-r--r--block/blk-flush.c12
-rw-r--r--block/blk-ioc.c12
-rw-r--r--block/blk-merge.c4
-rw-r--r--block/blk-mq-debugfs.c756
-rw-r--r--block/blk-mq-sched.c481
-rw-r--r--block/blk-mq-sched.h142
-rw-r--r--block/blk-mq-sysfs.c235
-rw-r--r--block/blk-mq-tag.c190
-rw-r--r--block/blk-mq-tag.h10
-rw-r--r--block/blk-mq.c555
-rw-r--r--block/blk-mq.h77
-rw-r--r--block/blk-tag.c1
-rw-r--r--block/blk-throttle.c6
-rw-r--r--block/blk.h26
-rw-r--r--block/cfq-iosched.c10
-rw-r--r--block/deadline-iosched.c2
-rw-r--r--block/elevator.c253
-rw-r--r--block/mq-deadline.c555
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/opal_proto.h452
-rw-r--r--block/partitions/efi.c17
-rw-r--r--block/sed-opal.c2488
-rw-r--r--drivers/block/cciss.c54
-rw-r--r--drivers/block/cciss.h6
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c17
-rw-r--r--drivers/block/null_blk.c6
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/cdrom/cdrom.c58
-rw-r--r--drivers/cdrom/gdrom.c12
-rw-r--r--drivers/ide/ide-cd.c2
-rw-r--r--drivers/lightnvm/Kconfig9
-rw-r--r--drivers/lightnvm/Makefile3
-rw-r--r--drivers/lightnvm/core.c1027
-rw-r--r--drivers/lightnvm/gennvm.c657
-rw-r--r--drivers/lightnvm/gennvm.h62
-rw-r--r--drivers/lightnvm/rrpc.c7
-rw-r--r--drivers/lightnvm/rrpc.h3
-rw-r--r--drivers/lightnvm/sysblk.c733
-rw-r--r--drivers/md/bcache/request.c2
-rw-r--r--drivers/md/dm-cache-target.c13
-rw-r--r--drivers/md/dm-thin.c13
-rw-r--r--drivers/nvme/host/core.c30
-rw-r--r--drivers/nvme/host/lightnvm.c315
-rw-r--r--drivers/nvme/host/nvme.h13
-rw-r--r--drivers/nvme/host/pci.c15
-rw-r--r--drivers/scsi/sr.c2
-rw-r--r--include/linux/blk-mq.h9
-rw-r--r--include/linux/blk_types.h31
-rw-r--r--include/linux/blkdev.h34
-rw-r--r--include/linux/cdrom.h5
-rw-r--r--include/linux/elevator.h36
-rw-r--r--include/linux/lightnvm.h138
-rw-r--r--include/linux/nvme.h1
-rw-r--r--include/linux/sbitmap.h30
-rw-r--r--include/linux/sed-opal.h70
-rw-r--r--include/uapi/linux/lightnvm.h50
-rw-r--r--include/uapi/linux/sed-opal.h119
-rw-r--r--lib/sbitmap.c139
68 files changed, 7340 insertions, 2832 deletions
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
index c06233fe52ac..8f85b0e41046 100644
--- a/Documentation/cdrom/cdrom-standard.tex
+++ b/Documentation/cdrom/cdrom-standard.tex
@@ -249,7 +249,6 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr
249 unsigned\ long);\cr 249 unsigned\ long);\cr
250\noalign{\medskip} 250\noalign{\medskip}
251 &const\ int& capability;& capability flags \cr 251 &const\ int& capability;& capability flags \cr
252 &int& n_minors;& number of active minor devices \cr
253\};\cr 252\};\cr
254} 253}
255$$ 254$$
@@ -258,13 +257,7 @@ it should add a function pointer to this $struct$. When a particular
258function is not implemented, however, this $struct$ should contain a 257function is not implemented, however, this $struct$ should contain a
259NULL instead. The $capability$ flags specify the capabilities of the 258NULL instead. The $capability$ flags specify the capabilities of the
260\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive 259\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive
261is registered with the \UCD. The value $n_minors$ should be a positive 260is registered with the \UCD.
262value indicating the number of minor devices that are supported by
263the low-level device driver, normally~1. Although these two variables
264are `informative' rather than `operational,' they are included in
265$cdrom_device_ops$ because they describe the capability of the {\em
266driver\/} rather than the {\em drive}. Nomenclature has always been
267difficult in computer programming.
268 261
269Note that most functions have fewer parameters than their 262Note that most functions have fewer parameters than their
270$blkdev_fops$ counterparts. This is because very little of the 263$blkdev_fops$ counterparts. This is because very little of the
diff --git a/MAINTAINERS b/MAINTAINERS
index 527d13759ecc..864e1fd31f0c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8612,10 +8612,10 @@ S: Maintained
8612F: drivers/net/ethernet/netronome/ 8612F: drivers/net/ethernet/netronome/
8613 8613
8614NETWORK BLOCK DEVICE (NBD) 8614NETWORK BLOCK DEVICE (NBD)
8615M: Markus Pargmann <mpa@pengutronix.de> 8615M: Josef Bacik <jbacik@fb.com>
8616S: Maintained 8616S: Maintained
8617L: linux-block@vger.kernel.org
8617L: nbd-general@lists.sourceforge.net 8618L: nbd-general@lists.sourceforge.net
8618T: git git://git.pengutronix.de/git/mpa/linux-nbd.git
8619F: Documentation/blockdev/nbd.txt 8619F: Documentation/blockdev/nbd.txt
8620F: drivers/block/nbd.c 8620F: drivers/block/nbd.c
8621F: include/uapi/linux/nbd.h 8621F: include/uapi/linux/nbd.h
@@ -11089,6 +11089,17 @@ L: linux-mmc@vger.kernel.org
11089S: Maintained 11089S: Maintained
11090F: drivers/mmc/host/sdhci-spear.c 11090F: drivers/mmc/host/sdhci-spear.c
11091 11091
11092SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER
11093M: Scott Bauer <scott.bauer@intel.com>
11094M: Jonathan Derrick <jonathan.derrick@intel.com>
11095M: Rafael Antognolli <rafael.antognolli@intel.com>
11096L: linux-block@vger.kernel.org
11097S: Supported
11098F: block/sed*
11099F: block/opal_proto.h
11100F: include/linux/sed*
11101F: include/uapi/linux/sed*
11102
11092SECURITY SUBSYSTEM 11103SECURITY SUBSYSTEM
11093M: James Morris <james.l.morris@oracle.com> 11104M: James Morris <james.l.morris@oracle.com>
11094M: "Serge E. Hallyn" <serge@hallyn.com> 11105M: "Serge E. Hallyn" <serge@hallyn.com>
diff --git a/block/Kconfig b/block/Kconfig
index 8bf114a3858a..1aef809affae 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -147,6 +147,25 @@ config BLK_WBT_MQ
147 Multiqueue currently doesn't have support for IO scheduling, 147 Multiqueue currently doesn't have support for IO scheduling,
148 enabling this option is recommended. 148 enabling this option is recommended.
149 149
150config BLK_DEBUG_FS
151 bool "Block layer debugging information in debugfs"
152 default y
153 depends on DEBUG_FS
154 ---help---
155 Include block layer debugging information in debugfs. This information
156 is mostly useful for kernel developers, but it doesn't incur any cost
157 at runtime.
158
159 Unless you are building a kernel for a tiny system, you should
160 say Y here.
161
162config BLK_SED_OPAL
163 bool "Logic for interfacing with Opal enabled SEDs"
164 ---help---
165 Builds Logic for interfacing with Opal enabled controllers.
166 Enabling this option enables users to setup/unlock/lock
167 Locking ranges for SED devices using the Opal protocol.
168
150menu "Partition Types" 169menu "Partition Types"
151 170
152source "block/partitions/Kconfig" 171source "block/partitions/Kconfig"
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 421bef9c4c48..0715ce93daef 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED
63 default "cfq" if DEFAULT_CFQ 63 default "cfq" if DEFAULT_CFQ
64 default "noop" if DEFAULT_NOOP 64 default "noop" if DEFAULT_NOOP
65 65
66config MQ_IOSCHED_DEADLINE
67 tristate "MQ deadline I/O scheduler"
68 default y
69 ---help---
70 MQ version of the deadline IO scheduler.
71
72config MQ_IOSCHED_NONE
73 bool
74 default y
75
76choice
77 prompt "Default single-queue blk-mq I/O scheduler"
78 default DEFAULT_SQ_NONE
79 help
80 Select the I/O scheduler which will be used by default for blk-mq
81 managed block devices with a single queue.
82
83 config DEFAULT_SQ_DEADLINE
84 bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
85
86 config DEFAULT_SQ_NONE
87 bool "None"
88
89endchoice
90
91config DEFAULT_SQ_IOSCHED
92 string
93 default "mq-deadline" if DEFAULT_SQ_DEADLINE
94 default "none" if DEFAULT_SQ_NONE
95
96choice
97 prompt "Default multi-queue blk-mq I/O scheduler"
98 default DEFAULT_MQ_NONE
99 help
100 Select the I/O scheduler which will be used by default for blk-mq
101 managed block devices with multiple queues.
102
103 config DEFAULT_MQ_DEADLINE
104 bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
105
106 config DEFAULT_MQ_NONE
107 bool "None"
108
109endchoice
110
111config DEFAULT_MQ_IOSCHED
112 string
113 default "mq-deadline" if DEFAULT_MQ_DEADLINE
114 default "none" if DEFAULT_MQ_NONE
115
66endmenu 116endmenu
67 117
68endif 118endif
diff --git a/block/Makefile b/block/Makefile
index a827f988c4e6..6ba1b1bc9529 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ 8 blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
9 blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ 9 blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ 10 genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
11 badblocks.o partitions/ 11 badblocks.o partitions/
12 12
@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
18obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 18obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
19obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o 19obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o 20obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
21obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
21 22
22obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 23obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
23obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o 24obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
@@ -25,3 +26,5 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
25obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o 26obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
26obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o 27obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
27obj-$(CONFIG_BLK_WBT) += blk-wbt.o 28obj-$(CONFIG_BLK_WBT) += blk-wbt.o
29obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
30obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
diff --git a/block/bio.c b/block/bio.c
index 2b375020fc49..d3c26d1cb1da 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1403,7 +1403,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
1403 bio_set_flag(bio, BIO_USER_MAPPED); 1403 bio_set_flag(bio, BIO_USER_MAPPED);
1404 1404
1405 /* 1405 /*
1406 * subtle -- if __bio_map_user() ended up bouncing a bio, 1406 * subtle -- if bio_map_user_iov() ended up bouncing a bio,
1407 * it would normally disappear when its bi_end_io is run. 1407 * it would normally disappear when its bi_end_io is run.
1408 * however, we need it for the unmap, so grab an extra 1408 * however, we need it for the unmap, so grab an extra
1409 * reference to it 1409 * reference to it
@@ -1445,8 +1445,8 @@ static void __bio_unmap_user(struct bio *bio)
1445 * bio_unmap_user - unmap a bio 1445 * bio_unmap_user - unmap a bio
1446 * @bio: the bio being unmapped 1446 * @bio: the bio being unmapped
1447 * 1447 *
1448 * Unmap a bio previously mapped by bio_map_user(). Must be called with 1448 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
1449 * a process context. 1449 * process context.
1450 * 1450 *
1451 * bio_unmap_user() may sleep. 1451 * bio_unmap_user() may sleep.
1452 */ 1452 */
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8ba0af780e88..fb59a3edc778 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q,
1223 if (blkcg_policy_enabled(q, pol)) 1223 if (blkcg_policy_enabled(q, pol))
1224 return 0; 1224 return 0;
1225 1225
1226 blk_queue_bypass_start(q); 1226 if (q->mq_ops)
1227 blk_mq_freeze_queue(q);
1228 else
1229 blk_queue_bypass_start(q);
1227pd_prealloc: 1230pd_prealloc:
1228 if (!pd_prealloc) { 1231 if (!pd_prealloc) {
1229 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); 1232 pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
@@ -1261,7 +1264,10 @@ pd_prealloc:
1261 1264
1262 spin_unlock_irq(q->queue_lock); 1265 spin_unlock_irq(q->queue_lock);
1263out_bypass_end: 1266out_bypass_end:
1264 blk_queue_bypass_end(q); 1267 if (q->mq_ops)
1268 blk_mq_unfreeze_queue(q);
1269 else
1270 blk_queue_bypass_end(q);
1265 if (pd_prealloc) 1271 if (pd_prealloc)
1266 pol->pd_free_fn(pd_prealloc); 1272 pol->pd_free_fn(pd_prealloc);
1267 return ret; 1273 return ret;
@@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
1284 if (!blkcg_policy_enabled(q, pol)) 1290 if (!blkcg_policy_enabled(q, pol))
1285 return; 1291 return;
1286 1292
1287 blk_queue_bypass_start(q); 1293 if (q->mq_ops)
1294 blk_mq_freeze_queue(q);
1295 else
1296 blk_queue_bypass_start(q);
1297
1288 spin_lock_irq(q->queue_lock); 1298 spin_lock_irq(q->queue_lock);
1289 1299
1290 __clear_bit(pol->plid, q->blkcg_pols); 1300 __clear_bit(pol->plid, q->blkcg_pols);
@@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
1304 } 1314 }
1305 1315
1306 spin_unlock_irq(q->queue_lock); 1316 spin_unlock_irq(q->queue_lock);
1307 blk_queue_bypass_end(q); 1317
1318 if (q->mq_ops)
1319 blk_mq_unfreeze_queue(q);
1320 else
1321 blk_queue_bypass_end(q);
1308} 1322}
1309EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); 1323EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
1310 1324
diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c58b64..b2df55a65250 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -39,6 +39,7 @@
39 39
40#include "blk.h" 40#include "blk.h"
41#include "blk-mq.h" 41#include "blk-mq.h"
42#include "blk-mq-sched.h"
42#include "blk-wbt.h" 43#include "blk-wbt.h"
43 44
44EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); 45EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
134 rq->cmd = rq->__cmd; 135 rq->cmd = rq->__cmd;
135 rq->cmd_len = BLK_MAX_CDB; 136 rq->cmd_len = BLK_MAX_CDB;
136 rq->tag = -1; 137 rq->tag = -1;
138 rq->internal_tag = -1;
137 rq->start_time = jiffies; 139 rq->start_time = jiffies;
138 set_start_time_ns(rq); 140 set_start_time_ns(rq);
139 rq->part = NULL; 141 rq->part = NULL;
@@ -525,12 +527,14 @@ void blk_set_queue_dying(struct request_queue *q)
525 else { 527 else {
526 struct request_list *rl; 528 struct request_list *rl;
527 529
530 spin_lock_irq(q->queue_lock);
528 blk_queue_for_each_rl(rl, q) { 531 blk_queue_for_each_rl(rl, q) {
529 if (rl->rq_pool) { 532 if (rl->rq_pool) {
530 wake_up(&rl->wait[BLK_RW_SYNC]); 533 wake_up(&rl->wait[BLK_RW_SYNC]);
531 wake_up(&rl->wait[BLK_RW_ASYNC]); 534 wake_up(&rl->wait[BLK_RW_ASYNC]);
532 } 535 }
533 } 536 }
537 spin_unlock_irq(q->queue_lock);
534 } 538 }
535} 539}
536EXPORT_SYMBOL_GPL(blk_set_queue_dying); 540EXPORT_SYMBOL_GPL(blk_set_queue_dying);
@@ -1033,29 +1037,13 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
1033 * Flush requests do not use the elevator so skip initialization. 1037 * Flush requests do not use the elevator so skip initialization.
1034 * This allows a request to share the flush and elevator data. 1038 * This allows a request to share the flush and elevator data.
1035 */ 1039 */
1036 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) 1040 if (op_is_flush(bio->bi_opf))
1037 return false; 1041 return false;
1038 1042
1039 return true; 1043 return true;
1040} 1044}
1041 1045
1042/** 1046/**
1043 * rq_ioc - determine io_context for request allocation
1044 * @bio: request being allocated is for this bio (can be %NULL)
1045 *
1046 * Determine io_context to use for request allocation for @bio. May return
1047 * %NULL if %current->io_context doesn't exist.
1048 */
1049static struct io_context *rq_ioc(struct bio *bio)
1050{
1051#ifdef CONFIG_BLK_CGROUP
1052 if (bio && bio->bi_ioc)
1053 return bio->bi_ioc;
1054#endif
1055 return current->io_context;
1056}
1057
1058/**
1059 * __get_request - get a free request 1047 * __get_request - get a free request
1060 * @rl: request list to allocate from 1048 * @rl: request list to allocate from
1061 * @op: operation and flags 1049 * @op: operation and flags
@@ -1655,7 +1643,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
1655 return BLK_QC_T_NONE; 1643 return BLK_QC_T_NONE;
1656 } 1644 }
1657 1645
1658 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) { 1646 if (op_is_flush(bio->bi_opf)) {
1659 spin_lock_irq(q->queue_lock); 1647 spin_lock_irq(q->queue_lock);
1660 where = ELEVATOR_INSERT_FLUSH; 1648 where = ELEVATOR_INSERT_FLUSH;
1661 goto get_rq; 1649 goto get_rq;
@@ -1894,7 +1882,7 @@ generic_make_request_checks(struct bio *bio)
1894 * drivers without flush support don't have to worry 1882 * drivers without flush support don't have to worry
1895 * about them. 1883 * about them.
1896 */ 1884 */
1897 if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && 1885 if (op_is_flush(bio->bi_opf) &&
1898 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { 1886 !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
1899 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); 1887 bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
1900 if (!nr_sectors) { 1888 if (!nr_sectors) {
@@ -2143,7 +2131,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2143 if (q->mq_ops) { 2131 if (q->mq_ops) {
2144 if (blk_queue_io_stat(q)) 2132 if (blk_queue_io_stat(q))
2145 blk_account_io_start(rq, true); 2133 blk_account_io_start(rq, true);
2146 blk_mq_insert_request(rq, false, true, false); 2134 blk_mq_sched_insert_request(rq, false, true, false, false);
2147 return 0; 2135 return 0;
2148 } 2136 }
2149 2137
@@ -2159,7 +2147,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
2159 */ 2147 */
2160 BUG_ON(blk_queued_rq(rq)); 2148 BUG_ON(blk_queued_rq(rq));
2161 2149
2162 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) 2150 if (op_is_flush(rq->cmd_flags))
2163 where = ELEVATOR_INSERT_FLUSH; 2151 where = ELEVATOR_INSERT_FLUSH;
2164 2152
2165 add_acct_request(q, rq, where); 2153 add_acct_request(q, rq, where);
@@ -3270,7 +3258,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
3270 /* 3258 /*
3271 * rq is already accounted, so use raw insert 3259 * rq is already accounted, so use raw insert
3272 */ 3260 */
3273 if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) 3261 if (op_is_flush(rq->cmd_flags))
3274 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); 3262 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
3275 else 3263 else
3276 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); 3264 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3ecb00a6cf45..ed1f10165268 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -9,6 +9,7 @@
9#include <linux/sched/sysctl.h> 9#include <linux/sched/sysctl.h>
10 10
11#include "blk.h" 11#include "blk.h"
12#include "blk-mq-sched.h"
12 13
13/* 14/*
14 * for max sense size 15 * for max sense size
@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
65 * be reused after dying flag is set 66 * be reused after dying flag is set
66 */ 67 */
67 if (q->mq_ops) { 68 if (q->mq_ops) {
68 blk_mq_insert_request(rq, at_head, true, false); 69 blk_mq_sched_insert_request(rq, at_head, true, false, false);
69 return; 70 return;
70 } 71 }
71 72
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 20b7c7a02f1c..4427896641ac 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -74,6 +74,7 @@
74#include "blk.h" 74#include "blk.h"
75#include "blk-mq.h" 75#include "blk-mq.h"
76#include "blk-mq-tag.h" 76#include "blk-mq-tag.h"
77#include "blk-mq-sched.h"
77 78
78/* FLUSH/FUA sequences */ 79/* FLUSH/FUA sequences */
79enum { 80enum {
@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
391 * the comment in flush_end_io(). 392 * the comment in flush_end_io().
392 */ 393 */
393 spin_lock_irqsave(&fq->mq_flush_lock, flags); 394 spin_lock_irqsave(&fq->mq_flush_lock, flags);
394 if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) 395 blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
395 blk_mq_run_hw_queue(hctx, true);
396 spin_unlock_irqrestore(&fq->mq_flush_lock, flags); 396 spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
397
398 blk_mq_run_hw_queue(hctx, true);
397} 399}
398 400
399/** 401/**
@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
453 */ 455 */
454 if ((policy & REQ_FSEQ_DATA) && 456 if ((policy & REQ_FSEQ_DATA) &&
455 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 457 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
456 if (q->mq_ops) { 458 if (q->mq_ops)
457 blk_mq_insert_request(rq, false, true, false); 459 blk_mq_sched_insert_request(rq, false, true, false, false);
458 } else 460 else
459 list_add_tail(&rq->queuelist, &q->queue_head); 461 list_add_tail(&rq->queuelist, &q->queue_head);
460 return; 462 return;
461 } 463 }
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 381cb50a673c..fe186a9eade9 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq)
43 if (icq->flags & ICQ_EXITED) 43 if (icq->flags & ICQ_EXITED)
44 return; 44 return;
45 45
46 if (et->ops.elevator_exit_icq_fn) 46 if (et->uses_mq && et->ops.mq.exit_icq)
47 et->ops.elevator_exit_icq_fn(icq); 47 et->ops.mq.exit_icq(icq);
48 else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
49 et->ops.sq.elevator_exit_icq_fn(icq);
48 50
49 icq->flags |= ICQ_EXITED; 51 icq->flags |= ICQ_EXITED;
50} 52}
@@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
383 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { 385 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
384 hlist_add_head(&icq->ioc_node, &ioc->icq_list); 386 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
385 list_add(&icq->q_node, &q->icq_list); 387 list_add(&icq->q_node, &q->icq_list);
386 if (et->ops.elevator_init_icq_fn) 388 if (et->uses_mq && et->ops.mq.init_icq)
387 et->ops.elevator_init_icq_fn(icq); 389 et->ops.mq.init_icq(icq);
390 else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
391 et->ops.sq.elevator_init_icq_fn(icq);
388 } else { 392 } else {
389 kmem_cache_free(et->icq_cache, icq); 393 kmem_cache_free(et->icq_cache, icq);
390 icq = ioc_lookup_icq(ioc, q); 394 icq = ioc_lookup_icq(ioc, q);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 182398cb1524..6aa43dec5af4 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
763{ 763{
764 struct elevator_queue *e = q->elevator; 764 struct elevator_queue *e = q->elevator;
765 765
766 if (e->type->ops.elevator_allow_rq_merge_fn) 766 if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
767 if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) 767 if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
768 return 0; 768 return 0;
769 769
770 return attempt_merge(q, rq, next); 770 return attempt_merge(q, rq, next);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
new file mode 100644
index 000000000000..5cd2b435a9f5
--- /dev/null
+++ b/block/blk-mq-debugfs.c
@@ -0,0 +1,756 @@
1/*
2 * Copyright (C) 2017 Facebook
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <https://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/blkdev.h>
19#include <linux/debugfs.h>
20
21#include <linux/blk-mq.h>
22#include "blk-mq.h"
23#include "blk-mq-tag.h"
24
25struct blk_mq_debugfs_attr {
26 const char *name;
27 umode_t mode;
28 const struct file_operations *fops;
29};
30
31static struct dentry *block_debugfs_root;
32
33static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file,
34 const struct seq_operations *ops)
35{
36 struct seq_file *m;
37 int ret;
38
39 ret = seq_open(file, ops);
40 if (!ret) {
41 m = file->private_data;
42 m->private = inode->i_private;
43 }
44 return ret;
45}
46
47static int hctx_state_show(struct seq_file *m, void *v)
48{
49 struct blk_mq_hw_ctx *hctx = m->private;
50
51 seq_printf(m, "0x%lx\n", hctx->state);
52 return 0;
53}
54
55static int hctx_state_open(struct inode *inode, struct file *file)
56{
57 return single_open(file, hctx_state_show, inode->i_private);
58}
59
60static const struct file_operations hctx_state_fops = {
61 .open = hctx_state_open,
62 .read = seq_read,
63 .llseek = seq_lseek,
64 .release = single_release,
65};
66
67static int hctx_flags_show(struct seq_file *m, void *v)
68{
69 struct blk_mq_hw_ctx *hctx = m->private;
70
71 seq_printf(m, "0x%lx\n", hctx->flags);
72 return 0;
73}
74
75static int hctx_flags_open(struct inode *inode, struct file *file)
76{
77 return single_open(file, hctx_flags_show, inode->i_private);
78}
79
80static const struct file_operations hctx_flags_fops = {
81 .open = hctx_flags_open,
82 .read = seq_read,
83 .llseek = seq_lseek,
84 .release = single_release,
85};
86
87static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
88{
89 struct request *rq = list_entry_rq(v);
90
91 seq_printf(m, "%p {.cmd_type=%u, .cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n",
92 rq, rq->cmd_type, rq->cmd_flags, (unsigned int)rq->rq_flags,
93 rq->tag, rq->internal_tag);
94 return 0;
95}
96
97static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
98{
99 struct blk_mq_hw_ctx *hctx = m->private;
100
101 spin_lock(&hctx->lock);
102 return seq_list_start(&hctx->dispatch, *pos);
103}
104
105static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos)
106{
107 struct blk_mq_hw_ctx *hctx = m->private;
108
109 return seq_list_next(v, &hctx->dispatch, pos);
110}
111
112static void hctx_dispatch_stop(struct seq_file *m, void *v)
113{
114 struct blk_mq_hw_ctx *hctx = m->private;
115
116 spin_unlock(&hctx->lock);
117}
118
119static const struct seq_operations hctx_dispatch_seq_ops = {
120 .start = hctx_dispatch_start,
121 .next = hctx_dispatch_next,
122 .stop = hctx_dispatch_stop,
123 .show = blk_mq_debugfs_rq_show,
124};
125
126static int hctx_dispatch_open(struct inode *inode, struct file *file)
127{
128 return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops);
129}
130
131static const struct file_operations hctx_dispatch_fops = {
132 .open = hctx_dispatch_open,
133 .read = seq_read,
134 .llseek = seq_lseek,
135 .release = seq_release,
136};
137
138static int hctx_ctx_map_show(struct seq_file *m, void *v)
139{
140 struct blk_mq_hw_ctx *hctx = m->private;
141
142 sbitmap_bitmap_show(&hctx->ctx_map, m);
143 return 0;
144}
145
146static int hctx_ctx_map_open(struct inode *inode, struct file *file)
147{
148 return single_open(file, hctx_ctx_map_show, inode->i_private);
149}
150
151static const struct file_operations hctx_ctx_map_fops = {
152 .open = hctx_ctx_map_open,
153 .read = seq_read,
154 .llseek = seq_lseek,
155 .release = single_release,
156};
157
158static void blk_mq_debugfs_tags_show(struct seq_file *m,
159 struct blk_mq_tags *tags)
160{
161 seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
162 seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
163 seq_printf(m, "active_queues=%d\n",
164 atomic_read(&tags->active_queues));
165
166 seq_puts(m, "\nbitmap_tags:\n");
167 sbitmap_queue_show(&tags->bitmap_tags, m);
168
169 if (tags->nr_reserved_tags) {
170 seq_puts(m, "\nbreserved_tags:\n");
171 sbitmap_queue_show(&tags->breserved_tags, m);
172 }
173}
174
175static int hctx_tags_show(struct seq_file *m, void *v)
176{
177 struct blk_mq_hw_ctx *hctx = m->private;
178 struct request_queue *q = hctx->queue;
179
180 mutex_lock(&q->sysfs_lock);
181 if (hctx->tags)
182 blk_mq_debugfs_tags_show(m, hctx->tags);
183 mutex_unlock(&q->sysfs_lock);
184
185 return 0;
186}
187
188static int hctx_tags_open(struct inode *inode, struct file *file)
189{
190 return single_open(file, hctx_tags_show, inode->i_private);
191}
192
193static const struct file_operations hctx_tags_fops = {
194 .open = hctx_tags_open,
195 .read = seq_read,
196 .llseek = seq_lseek,
197 .release = single_release,
198};
199
200static int hctx_tags_bitmap_show(struct seq_file *m, void *v)
201{
202 struct blk_mq_hw_ctx *hctx = m->private;
203 struct request_queue *q = hctx->queue;
204
205 mutex_lock(&q->sysfs_lock);
206 if (hctx->tags)
207 sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m);
208 mutex_unlock(&q->sysfs_lock);
209 return 0;
210}
211
212static int hctx_tags_bitmap_open(struct inode *inode, struct file *file)
213{
214 return single_open(file, hctx_tags_bitmap_show, inode->i_private);
215}
216
217static const struct file_operations hctx_tags_bitmap_fops = {
218 .open = hctx_tags_bitmap_open,
219 .read = seq_read,
220 .llseek = seq_lseek,
221 .release = single_release,
222};
223
224static int hctx_sched_tags_show(struct seq_file *m, void *v)
225{
226 struct blk_mq_hw_ctx *hctx = m->private;
227 struct request_queue *q = hctx->queue;
228
229 mutex_lock(&q->sysfs_lock);
230 if (hctx->sched_tags)
231 blk_mq_debugfs_tags_show(m, hctx->sched_tags);
232 mutex_unlock(&q->sysfs_lock);
233
234 return 0;
235}
236
237static int hctx_sched_tags_open(struct inode *inode, struct file *file)
238{
239 return single_open(file, hctx_sched_tags_show, inode->i_private);
240}
241
242static const struct file_operations hctx_sched_tags_fops = {
243 .open = hctx_sched_tags_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
246 .release = single_release,
247};
248
249static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v)
250{
251 struct blk_mq_hw_ctx *hctx = m->private;
252 struct request_queue *q = hctx->queue;
253
254 mutex_lock(&q->sysfs_lock);
255 if (hctx->sched_tags)
256 sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m);
257 mutex_unlock(&q->sysfs_lock);
258 return 0;
259}
260
261static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file)
262{
263 return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private);
264}
265
266static const struct file_operations hctx_sched_tags_bitmap_fops = {
267 .open = hctx_sched_tags_bitmap_open,
268 .read = seq_read,
269 .llseek = seq_lseek,
270 .release = single_release,
271};
272
273static int hctx_io_poll_show(struct seq_file *m, void *v)
274{
275 struct blk_mq_hw_ctx *hctx = m->private;
276
277 seq_printf(m, "considered=%lu\n", hctx->poll_considered);
278 seq_printf(m, "invoked=%lu\n", hctx->poll_invoked);
279 seq_printf(m, "success=%lu\n", hctx->poll_success);
280 return 0;
281}
282
283static int hctx_io_poll_open(struct inode *inode, struct file *file)
284{
285 return single_open(file, hctx_io_poll_show, inode->i_private);
286}
287
288static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf,
289 size_t count, loff_t *ppos)
290{
291 struct seq_file *m = file->private_data;
292 struct blk_mq_hw_ctx *hctx = m->private;
293
294 hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
295 return count;
296}
297
298static const struct file_operations hctx_io_poll_fops = {
299 .open = hctx_io_poll_open,
300 .read = seq_read,
301 .write = hctx_io_poll_write,
302 .llseek = seq_lseek,
303 .release = single_release,
304};
305
306static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
307{
308 seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
309 stat->nr_samples, stat->mean, stat->min, stat->max);
310}
311
312static int hctx_stats_show(struct seq_file *m, void *v)
313{
314 struct blk_mq_hw_ctx *hctx = m->private;
315 struct blk_rq_stat stat[2];
316
317 blk_stat_init(&stat[BLK_STAT_READ]);
318 blk_stat_init(&stat[BLK_STAT_WRITE]);
319
320 blk_hctx_stat_get(hctx, stat);
321
322 seq_puts(m, "read: ");
323 print_stat(m, &stat[BLK_STAT_READ]);
324 seq_puts(m, "\n");
325
326 seq_puts(m, "write: ");
327 print_stat(m, &stat[BLK_STAT_WRITE]);
328 seq_puts(m, "\n");
329 return 0;
330}
331
332static int hctx_stats_open(struct inode *inode, struct file *file)
333{
334 return single_open(file, hctx_stats_show, inode->i_private);
335}
336
337static ssize_t hctx_stats_write(struct file *file, const char __user *buf,
338 size_t count, loff_t *ppos)
339{
340 struct seq_file *m = file->private_data;
341 struct blk_mq_hw_ctx *hctx = m->private;
342 struct blk_mq_ctx *ctx;
343 int i;
344
345 hctx_for_each_ctx(hctx, ctx, i) {
346 blk_stat_init(&ctx->stat[BLK_STAT_READ]);
347 blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
348 }
349 return count;
350}
351
352static const struct file_operations hctx_stats_fops = {
353 .open = hctx_stats_open,
354 .read = seq_read,
355 .write = hctx_stats_write,
356 .llseek = seq_lseek,
357 .release = single_release,
358};
359
360static int hctx_dispatched_show(struct seq_file *m, void *v)
361{
362 struct blk_mq_hw_ctx *hctx = m->private;
363 int i;
364
365 seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
366
367 for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
368 unsigned int d = 1U << (i - 1);
369
370 seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]);
371 }
372
373 seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]);
374 return 0;
375}
376
377static int hctx_dispatched_open(struct inode *inode, struct file *file)
378{
379 return single_open(file, hctx_dispatched_show, inode->i_private);
380}
381
382static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf,
383 size_t count, loff_t *ppos)
384{
385 struct seq_file *m = file->private_data;
386 struct blk_mq_hw_ctx *hctx = m->private;
387 int i;
388
389 for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++)
390 hctx->dispatched[i] = 0;
391 return count;
392}
393
394static const struct file_operations hctx_dispatched_fops = {
395 .open = hctx_dispatched_open,
396 .read = seq_read,
397 .write = hctx_dispatched_write,
398 .llseek = seq_lseek,
399 .release = single_release,
400};
401
402static int hctx_queued_show(struct seq_file *m, void *v)
403{
404 struct blk_mq_hw_ctx *hctx = m->private;
405
406 seq_printf(m, "%lu\n", hctx->queued);
407 return 0;
408}
409
410static int hctx_queued_open(struct inode *inode, struct file *file)
411{
412 return single_open(file, hctx_queued_show, inode->i_private);
413}
414
415static ssize_t hctx_queued_write(struct file *file, const char __user *buf,
416 size_t count, loff_t *ppos)
417{
418 struct seq_file *m = file->private_data;
419 struct blk_mq_hw_ctx *hctx = m->private;
420
421 hctx->queued = 0;
422 return count;
423}
424
425static const struct file_operations hctx_queued_fops = {
426 .open = hctx_queued_open,
427 .read = seq_read,
428 .write = hctx_queued_write,
429 .llseek = seq_lseek,
430 .release = single_release,
431};
432
433static int hctx_run_show(struct seq_file *m, void *v)
434{
435 struct blk_mq_hw_ctx *hctx = m->private;
436
437 seq_printf(m, "%lu\n", hctx->run);
438 return 0;
439}
440
441static int hctx_run_open(struct inode *inode, struct file *file)
442{
443 return single_open(file, hctx_run_show, inode->i_private);
444}
445
446static ssize_t hctx_run_write(struct file *file, const char __user *buf,
447 size_t count, loff_t *ppos)
448{
449 struct seq_file *m = file->private_data;
450 struct blk_mq_hw_ctx *hctx = m->private;
451
452 hctx->run = 0;
453 return count;
454}
455
456static const struct file_operations hctx_run_fops = {
457 .open = hctx_run_open,
458 .read = seq_read,
459 .write = hctx_run_write,
460 .llseek = seq_lseek,
461 .release = single_release,
462};
463
464static int hctx_active_show(struct seq_file *m, void *v)
465{
466 struct blk_mq_hw_ctx *hctx = m->private;
467
468 seq_printf(m, "%d\n", atomic_read(&hctx->nr_active));
469 return 0;
470}
471
472static int hctx_active_open(struct inode *inode, struct file *file)
473{
474 return single_open(file, hctx_active_show, inode->i_private);
475}
476
477static const struct file_operations hctx_active_fops = {
478 .open = hctx_active_open,
479 .read = seq_read,
480 .llseek = seq_lseek,
481 .release = single_release,
482};
483
484static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
485{
486 struct blk_mq_ctx *ctx = m->private;
487
488 spin_lock(&ctx->lock);
489 return seq_list_start(&ctx->rq_list, *pos);
490}
491
492static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos)
493{
494 struct blk_mq_ctx *ctx = m->private;
495
496 return seq_list_next(v, &ctx->rq_list, pos);
497}
498
499static void ctx_rq_list_stop(struct seq_file *m, void *v)
500{
501 struct blk_mq_ctx *ctx = m->private;
502
503 spin_unlock(&ctx->lock);
504}
505
506static const struct seq_operations ctx_rq_list_seq_ops = {
507 .start = ctx_rq_list_start,
508 .next = ctx_rq_list_next,
509 .stop = ctx_rq_list_stop,
510 .show = blk_mq_debugfs_rq_show,
511};
512
513static int ctx_rq_list_open(struct inode *inode, struct file *file)
514{
515 return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops);
516}
517
518static const struct file_operations ctx_rq_list_fops = {
519 .open = ctx_rq_list_open,
520 .read = seq_read,
521 .llseek = seq_lseek,
522 .release = seq_release,
523};
524
525static int ctx_dispatched_show(struct seq_file *m, void *v)
526{
527 struct blk_mq_ctx *ctx = m->private;
528
529 seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]);
530 return 0;
531}
532
533static int ctx_dispatched_open(struct inode *inode, struct file *file)
534{
535 return single_open(file, ctx_dispatched_show, inode->i_private);
536}
537
538static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf,
539 size_t count, loff_t *ppos)
540{
541 struct seq_file *m = file->private_data;
542 struct blk_mq_ctx *ctx = m->private;
543
544 ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0;
545 return count;
546}
547
548static const struct file_operations ctx_dispatched_fops = {
549 .open = ctx_dispatched_open,
550 .read = seq_read,
551 .write = ctx_dispatched_write,
552 .llseek = seq_lseek,
553 .release = single_release,
554};
555
556static int ctx_merged_show(struct seq_file *m, void *v)
557{
558 struct blk_mq_ctx *ctx = m->private;
559
560 seq_printf(m, "%lu\n", ctx->rq_merged);
561 return 0;
562}
563
564static int ctx_merged_open(struct inode *inode, struct file *file)
565{
566 return single_open(file, ctx_merged_show, inode->i_private);
567}
568
569static ssize_t ctx_merged_write(struct file *file, const char __user *buf,
570 size_t count, loff_t *ppos)
571{
572 struct seq_file *m = file->private_data;
573 struct blk_mq_ctx *ctx = m->private;
574
575 ctx->rq_merged = 0;
576 return count;
577}
578
579static const struct file_operations ctx_merged_fops = {
580 .open = ctx_merged_open,
581 .read = seq_read,
582 .write = ctx_merged_write,
583 .llseek = seq_lseek,
584 .release = single_release,
585};
586
587static int ctx_completed_show(struct seq_file *m, void *v)
588{
589 struct blk_mq_ctx *ctx = m->private;
590
591 seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]);
592 return 0;
593}
594
595static int ctx_completed_open(struct inode *inode, struct file *file)
596{
597 return single_open(file, ctx_completed_show, inode->i_private);
598}
599
600static ssize_t ctx_completed_write(struct file *file, const char __user *buf,
601 size_t count, loff_t *ppos)
602{
603 struct seq_file *m = file->private_data;
604 struct blk_mq_ctx *ctx = m->private;
605
606 ctx->rq_completed[0] = ctx->rq_completed[1] = 0;
607 return count;
608}
609
610static const struct file_operations ctx_completed_fops = {
611 .open = ctx_completed_open,
612 .read = seq_read,
613 .write = ctx_completed_write,
614 .llseek = seq_lseek,
615 .release = single_release,
616};
617
618static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
619 {"state", 0400, &hctx_state_fops},
620 {"flags", 0400, &hctx_flags_fops},
621 {"dispatch", 0400, &hctx_dispatch_fops},
622 {"ctx_map", 0400, &hctx_ctx_map_fops},
623 {"tags", 0400, &hctx_tags_fops},
624 {"tags_bitmap", 0400, &hctx_tags_bitmap_fops},
625 {"sched_tags", 0400, &hctx_sched_tags_fops},
626 {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops},
627 {"io_poll", 0600, &hctx_io_poll_fops},
628 {"stats", 0600, &hctx_stats_fops},
629 {"dispatched", 0600, &hctx_dispatched_fops},
630 {"queued", 0600, &hctx_queued_fops},
631 {"run", 0600, &hctx_run_fops},
632 {"active", 0400, &hctx_active_fops},
633};
634
635static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
636 {"rq_list", 0400, &ctx_rq_list_fops},
637 {"dispatched", 0600, &ctx_dispatched_fops},
638 {"merged", 0600, &ctx_merged_fops},
639 {"completed", 0600, &ctx_completed_fops},
640};
641
642int blk_mq_debugfs_register(struct request_queue *q, const char *name)
643{
644 if (!block_debugfs_root)
645 return -ENOENT;
646
647 q->debugfs_dir = debugfs_create_dir(name, block_debugfs_root);
648 if (!q->debugfs_dir)
649 goto err;
650
651 if (blk_mq_debugfs_register_hctxs(q))
652 goto err;
653
654 return 0;
655
656err:
657 blk_mq_debugfs_unregister(q);
658 return -ENOMEM;
659}
660
661void blk_mq_debugfs_unregister(struct request_queue *q)
662{
663 debugfs_remove_recursive(q->debugfs_dir);
664 q->mq_debugfs_dir = NULL;
665 q->debugfs_dir = NULL;
666}
667
668static int blk_mq_debugfs_register_ctx(struct request_queue *q,
669 struct blk_mq_ctx *ctx,
670 struct dentry *hctx_dir)
671{
672 struct dentry *ctx_dir;
673 char name[20];
674 int i;
675
676 snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
677 ctx_dir = debugfs_create_dir(name, hctx_dir);
678 if (!ctx_dir)
679 return -ENOMEM;
680
681 for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_ctx_attrs); i++) {
682 const struct blk_mq_debugfs_attr *attr;
683
684 attr = &blk_mq_debugfs_ctx_attrs[i];
685 if (!debugfs_create_file(attr->name, attr->mode, ctx_dir, ctx,
686 attr->fops))
687 return -ENOMEM;
688 }
689
690 return 0;
691}
692
693static int blk_mq_debugfs_register_hctx(struct request_queue *q,
694 struct blk_mq_hw_ctx *hctx)
695{
696 struct blk_mq_ctx *ctx;
697 struct dentry *hctx_dir;
698 char name[20];
699 int i;
700
701 snprintf(name, sizeof(name), "%u", hctx->queue_num);
702 hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir);
703 if (!hctx_dir)
704 return -ENOMEM;
705
706 for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_hctx_attrs); i++) {
707 const struct blk_mq_debugfs_attr *attr;
708
709 attr = &blk_mq_debugfs_hctx_attrs[i];
710 if (!debugfs_create_file(attr->name, attr->mode, hctx_dir, hctx,
711 attr->fops))
712 return -ENOMEM;
713 }
714
715 hctx_for_each_ctx(hctx, ctx, i) {
716 if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir))
717 return -ENOMEM;
718 }
719
720 return 0;
721}
722
723int blk_mq_debugfs_register_hctxs(struct request_queue *q)
724{
725 struct blk_mq_hw_ctx *hctx;
726 int i;
727
728 if (!q->debugfs_dir)
729 return -ENOENT;
730
731 q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir);
732 if (!q->mq_debugfs_dir)
733 goto err;
734
735 queue_for_each_hw_ctx(q, hctx, i) {
736 if (blk_mq_debugfs_register_hctx(q, hctx))
737 goto err;
738 }
739
740 return 0;
741
742err:
743 blk_mq_debugfs_unregister_hctxs(q);
744 return -ENOMEM;
745}
746
747void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
748{
749 debugfs_remove_recursive(q->mq_debugfs_dir);
750 q->mq_debugfs_dir = NULL;
751}
752
753void blk_mq_debugfs_init(void)
754{
755 block_debugfs_root = debugfs_create_dir("block", NULL);
756}
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
new file mode 100644
index 000000000000..114814ec3d49
--- /dev/null
+++ b/block/blk-mq-sched.c
@@ -0,0 +1,481 @@
1/*
2 * blk-mq scheduling framework
3 *
4 * Copyright (C) 2016 Jens Axboe
5 */
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/blk-mq.h>
9
10#include <trace/events/block.h>
11
12#include "blk.h"
13#include "blk-mq.h"
14#include "blk-mq-sched.h"
15#include "blk-mq-tag.h"
16#include "blk-wbt.h"
17
18void blk_mq_sched_free_hctx_data(struct request_queue *q,
19 void (*exit)(struct blk_mq_hw_ctx *))
20{
21 struct blk_mq_hw_ctx *hctx;
22 int i;
23
24 queue_for_each_hw_ctx(q, hctx, i) {
25 if (exit && hctx->sched_data)
26 exit(hctx);
27 kfree(hctx->sched_data);
28 hctx->sched_data = NULL;
29 }
30}
31EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
32
33int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
34 int (*init)(struct blk_mq_hw_ctx *),
35 void (*exit)(struct blk_mq_hw_ctx *))
36{
37 struct blk_mq_hw_ctx *hctx;
38 int ret;
39 int i;
40
41 queue_for_each_hw_ctx(q, hctx, i) {
42 hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
43 if (!hctx->sched_data) {
44 ret = -ENOMEM;
45 goto error;
46 }
47
48 if (init) {
49 ret = init(hctx);
50 if (ret) {
51 /*
52 * We don't want to give exit() a partially
53 * initialized sched_data. init() must clean up
54 * if it fails.
55 */
56 kfree(hctx->sched_data);
57 hctx->sched_data = NULL;
58 goto error;
59 }
60 }
61 }
62
63 return 0;
64error:
65 blk_mq_sched_free_hctx_data(q, exit);
66 return ret;
67}
68EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
69
70static void __blk_mq_sched_assign_ioc(struct request_queue *q,
71 struct request *rq, struct io_context *ioc)
72{
73 struct io_cq *icq;
74
75 spin_lock_irq(q->queue_lock);
76 icq = ioc_lookup_icq(ioc, q);
77 spin_unlock_irq(q->queue_lock);
78
79 if (!icq) {
80 icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
81 if (!icq)
82 return;
83 }
84
85 rq->elv.icq = icq;
86 if (!blk_mq_sched_get_rq_priv(q, rq)) {
87 rq->rq_flags |= RQF_ELVPRIV;
88 get_io_context(icq->ioc);
89 return;
90 }
91
92 rq->elv.icq = NULL;
93}
94
95static void blk_mq_sched_assign_ioc(struct request_queue *q,
96 struct request *rq, struct bio *bio)
97{
98 struct io_context *ioc;
99
100 ioc = rq_ioc(bio);
101 if (ioc)
102 __blk_mq_sched_assign_ioc(q, rq, ioc);
103}
104
105struct request *blk_mq_sched_get_request(struct request_queue *q,
106 struct bio *bio,
107 unsigned int op,
108 struct blk_mq_alloc_data *data)
109{
110 struct elevator_queue *e = q->elevator;
111 struct blk_mq_hw_ctx *hctx;
112 struct blk_mq_ctx *ctx;
113 struct request *rq;
114
115 blk_queue_enter_live(q);
116 ctx = blk_mq_get_ctx(q);
117 hctx = blk_mq_map_queue(q, ctx->cpu);
118
119 blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx);
120
121 if (e) {
122 data->flags |= BLK_MQ_REQ_INTERNAL;
123
124 /*
125 * Flush requests are special and go directly to the
126 * dispatch list.
127 */
128 if (!op_is_flush(op) && e->type->ops.mq.get_request) {
129 rq = e->type->ops.mq.get_request(q, op, data);
130 if (rq)
131 rq->rq_flags |= RQF_QUEUED;
132 } else
133 rq = __blk_mq_alloc_request(data, op);
134 } else {
135 rq = __blk_mq_alloc_request(data, op);
136 if (rq)
137 data->hctx->tags->rqs[rq->tag] = rq;
138 }
139
140 if (rq) {
141 if (!op_is_flush(op)) {
142 rq->elv.icq = NULL;
143 if (e && e->type->icq_cache)
144 blk_mq_sched_assign_ioc(q, rq, bio);
145 }
146 data->hctx->queued++;
147 return rq;
148 }
149
150 blk_queue_exit(q);
151 return NULL;
152}
153
154void blk_mq_sched_put_request(struct request *rq)
155{
156 struct request_queue *q = rq->q;
157 struct elevator_queue *e = q->elevator;
158
159 if (rq->rq_flags & RQF_ELVPRIV) {
160 blk_mq_sched_put_rq_priv(rq->q, rq);
161 if (rq->elv.icq) {
162 put_io_context(rq->elv.icq->ioc);
163 rq->elv.icq = NULL;
164 }
165 }
166
167 if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
168 e->type->ops.mq.put_request(rq);
169 else
170 blk_mq_finish_request(rq);
171}
172
173void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
174{
175 struct elevator_queue *e = hctx->queue->elevator;
176 LIST_HEAD(rq_list);
177
178 if (unlikely(blk_mq_hctx_stopped(hctx)))
179 return;
180
181 hctx->run++;
182
183 /*
184 * If we have previous entries on our dispatch list, grab them first for
185 * more fair dispatch.
186 */
187 if (!list_empty_careful(&hctx->dispatch)) {
188 spin_lock(&hctx->lock);
189 if (!list_empty(&hctx->dispatch))
190 list_splice_init(&hctx->dispatch, &rq_list);
191 spin_unlock(&hctx->lock);
192 }
193
194 /*
195 * Only ask the scheduler for requests, if we didn't have residual
196 * requests from the dispatch list. This is to avoid the case where
197 * we only ever dispatch a fraction of the requests available because
198 * of low device queue depth. Once we pull requests out of the IO
199 * scheduler, we can no longer merge or sort them. So it's best to
200 * leave them there for as long as we can. Mark the hw queue as
201 * needing a restart in that case.
202 */
203 if (!list_empty(&rq_list)) {
204 blk_mq_sched_mark_restart(hctx);
205 blk_mq_dispatch_rq_list(hctx, &rq_list);
206 } else if (!e || !e->type->ops.mq.dispatch_request) {
207 blk_mq_flush_busy_ctxs(hctx, &rq_list);
208 blk_mq_dispatch_rq_list(hctx, &rq_list);
209 } else {
210 do {
211 struct request *rq;
212
213 rq = e->type->ops.mq.dispatch_request(hctx);
214 if (!rq)
215 break;
216 list_add(&rq->queuelist, &rq_list);
217 } while (blk_mq_dispatch_rq_list(hctx, &rq_list));
218 }
219}
220
221void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
222 struct list_head *rq_list,
223 struct request *(*get_rq)(struct blk_mq_hw_ctx *))
224{
225 do {
226 struct request *rq;
227
228 rq = get_rq(hctx);
229 if (!rq)
230 break;
231
232 list_add_tail(&rq->queuelist, rq_list);
233 } while (1);
234}
235EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
236
237bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio)
238{
239 struct request *rq;
240 int ret;
241
242 ret = elv_merge(q, &rq, bio);
243 if (ret == ELEVATOR_BACK_MERGE) {
244 if (!blk_mq_sched_allow_merge(q, rq, bio))
245 return false;
246 if (bio_attempt_back_merge(q, rq, bio)) {
247 if (!attempt_back_merge(q, rq))
248 elv_merged_request(q, rq, ret);
249 return true;
250 }
251 } else if (ret == ELEVATOR_FRONT_MERGE) {
252 if (!blk_mq_sched_allow_merge(q, rq, bio))
253 return false;
254 if (bio_attempt_front_merge(q, rq, bio)) {
255 if (!attempt_front_merge(q, rq))
256 elv_merged_request(q, rq, ret);
257 return true;
258 }
259 }
260
261 return false;
262}
263EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
264
265bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
266{
267 struct elevator_queue *e = q->elevator;
268
269 if (e->type->ops.mq.bio_merge) {
270 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
271 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
272
273 blk_mq_put_ctx(ctx);
274 return e->type->ops.mq.bio_merge(hctx, bio);
275 }
276
277 return false;
278}
279
280bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
281{
282 return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
283}
284EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
285
286void blk_mq_sched_request_inserted(struct request *rq)
287{
288 trace_block_rq_insert(rq->q, rq);
289}
290EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
291
292bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
293{
294 if (rq->tag == -1) {
295 rq->rq_flags |= RQF_SORTED;
296 return false;
297 }
298
299 /*
300 * If we already have a real request tag, send directly to
301 * the dispatch list.
302 */
303 spin_lock(&hctx->lock);
304 list_add(&rq->queuelist, &hctx->dispatch);
305 spin_unlock(&hctx->lock);
306 return true;
307}
308EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
309
310static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
311{
312 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
313 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
314 if (blk_mq_hctx_has_pending(hctx))
315 blk_mq_run_hw_queue(hctx, true);
316 }
317}
318
319void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
320{
321 unsigned int i;
322
323 if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
324 blk_mq_sched_restart_hctx(hctx);
325 else {
326 struct request_queue *q = hctx->queue;
327
328 if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
329 return;
330
331 clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
332
333 queue_for_each_hw_ctx(q, hctx, i)
334 blk_mq_sched_restart_hctx(hctx);
335 }
336}
337
338/*
339 * Add flush/fua to the queue. If we fail getting a driver tag, then
340 * punt to the requeue list. Requeue will re-invoke us from a context
341 * that's safe to block from.
342 */
343static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
344 struct request *rq, bool can_block)
345{
346 if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
347 blk_insert_flush(rq);
348 blk_mq_run_hw_queue(hctx, true);
349 } else
350 blk_mq_add_to_requeue_list(rq, true, true);
351}
352
353void blk_mq_sched_insert_request(struct request *rq, bool at_head,
354 bool run_queue, bool async, bool can_block)
355{
356 struct request_queue *q = rq->q;
357 struct elevator_queue *e = q->elevator;
358 struct blk_mq_ctx *ctx = rq->mq_ctx;
359 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
360
361 if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) {
362 blk_mq_sched_insert_flush(hctx, rq, can_block);
363 return;
364 }
365
366 if (e && e->type->ops.mq.insert_requests) {
367 LIST_HEAD(list);
368
369 list_add(&rq->queuelist, &list);
370 e->type->ops.mq.insert_requests(hctx, &list, at_head);
371 } else {
372 spin_lock(&ctx->lock);
373 __blk_mq_insert_request(hctx, rq, at_head);
374 spin_unlock(&ctx->lock);
375 }
376
377 if (run_queue)
378 blk_mq_run_hw_queue(hctx, async);
379}
380
381void blk_mq_sched_insert_requests(struct request_queue *q,
382 struct blk_mq_ctx *ctx,
383 struct list_head *list, bool run_queue_async)
384{
385 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
386 struct elevator_queue *e = hctx->queue->elevator;
387
388 if (e && e->type->ops.mq.insert_requests)
389 e->type->ops.mq.insert_requests(hctx, list, false);
390 else
391 blk_mq_insert_requests(hctx, ctx, list);
392
393 blk_mq_run_hw_queue(hctx, run_queue_async);
394}
395
396static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
397 struct blk_mq_hw_ctx *hctx,
398 unsigned int hctx_idx)
399{
400 if (hctx->sched_tags) {
401 blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
402 blk_mq_free_rq_map(hctx->sched_tags);
403 hctx->sched_tags = NULL;
404 }
405}
406
407int blk_mq_sched_setup(struct request_queue *q)
408{
409 struct blk_mq_tag_set *set = q->tag_set;
410 struct blk_mq_hw_ctx *hctx;
411 int ret, i;
412
413 /*
414 * Default to 256, since we don't split into sync/async like the
415 * old code did. Additionally, this is a per-hw queue depth.
416 */
417 q->nr_requests = 2 * BLKDEV_MAX_RQ;
418
419 /*
420 * We're switching to using an IO scheduler, so setup the hctx
421 * scheduler tags and switch the request map from the regular
422 * tags to scheduler tags. First allocate what we need, so we
423 * can safely fail and fallback, if needed.
424 */
425 ret = 0;
426 queue_for_each_hw_ctx(q, hctx, i) {
427 hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
428 if (!hctx->sched_tags) {
429 ret = -ENOMEM;
430 break;
431 }
432 ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
433 if (ret)
434 break;
435 }
436
437 /*
438 * If we failed, free what we did allocate
439 */
440 if (ret) {
441 queue_for_each_hw_ctx(q, hctx, i) {
442 if (!hctx->sched_tags)
443 continue;
444 blk_mq_sched_free_tags(set, hctx, i);
445 }
446
447 return ret;
448 }
449
450 return 0;
451}
452
453void blk_mq_sched_teardown(struct request_queue *q)
454{
455 struct blk_mq_tag_set *set = q->tag_set;
456 struct blk_mq_hw_ctx *hctx;
457 int i;
458
459 queue_for_each_hw_ctx(q, hctx, i)
460 blk_mq_sched_free_tags(set, hctx, i);
461}
462
463int blk_mq_sched_init(struct request_queue *q)
464{
465 int ret;
466
467#if defined(CONFIG_DEFAULT_SQ_NONE)
468 if (q->nr_hw_queues == 1)
469 return 0;
470#endif
471#if defined(CONFIG_DEFAULT_MQ_NONE)
472 if (q->nr_hw_queues > 1)
473 return 0;
474#endif
475
476 mutex_lock(&q->sysfs_lock);
477 ret = elevator_init(q, NULL);
478 mutex_unlock(&q->sysfs_lock);
479
480 return ret;
481}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
new file mode 100644
index 000000000000..9478aaeb48c5
--- /dev/null
+++ b/block/blk-mq-sched.h
@@ -0,0 +1,142 @@
1#ifndef BLK_MQ_SCHED_H
2#define BLK_MQ_SCHED_H
3
4#include "blk-mq.h"
5#include "blk-mq-tag.h"
6
7int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
8 int (*init)(struct blk_mq_hw_ctx *),
9 void (*exit)(struct blk_mq_hw_ctx *));
10
11void blk_mq_sched_free_hctx_data(struct request_queue *q,
12 void (*exit)(struct blk_mq_hw_ctx *));
13
14struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
15void blk_mq_sched_put_request(struct request *rq);
16
17void blk_mq_sched_request_inserted(struct request *rq);
18bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
19bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
20bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
21bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
22void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
23
24void blk_mq_sched_insert_request(struct request *rq, bool at_head,
25 bool run_queue, bool async, bool can_block);
26void blk_mq_sched_insert_requests(struct request_queue *q,
27 struct blk_mq_ctx *ctx,
28 struct list_head *list, bool run_queue_async);
29
30void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
31void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
32 struct list_head *rq_list,
33 struct request *(*get_rq)(struct blk_mq_hw_ctx *));
34
35int blk_mq_sched_setup(struct request_queue *q);
36void blk_mq_sched_teardown(struct request_queue *q);
37
38int blk_mq_sched_init(struct request_queue *q);
39
40static inline bool
41blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
42{
43 struct elevator_queue *e = q->elevator;
44
45 if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
46 return false;
47
48 return __blk_mq_sched_bio_merge(q, bio);
49}
50
51static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
52 struct request *rq)
53{
54 struct elevator_queue *e = q->elevator;
55
56 if (e && e->type->ops.mq.get_rq_priv)
57 return e->type->ops.mq.get_rq_priv(q, rq);
58
59 return 0;
60}
61
62static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
63 struct request *rq)
64{
65 struct elevator_queue *e = q->elevator;
66
67 if (e && e->type->ops.mq.put_rq_priv)
68 e->type->ops.mq.put_rq_priv(q, rq);
69}
70
71static inline bool
72blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
73 struct bio *bio)
74{
75 struct elevator_queue *e = q->elevator;
76
77 if (e && e->type->ops.mq.allow_merge)
78 return e->type->ops.mq.allow_merge(q, rq, bio);
79
80 return true;
81}
82
83static inline void
84blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
85{
86 struct elevator_queue *e = hctx->queue->elevator;
87
88 if (e && e->type->ops.mq.completed_request)
89 e->type->ops.mq.completed_request(hctx, rq);
90
91 BUG_ON(rq->internal_tag == -1);
92
93 blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
94}
95
96static inline void blk_mq_sched_started_request(struct request *rq)
97{
98 struct request_queue *q = rq->q;
99 struct elevator_queue *e = q->elevator;
100
101 if (e && e->type->ops.mq.started_request)
102 e->type->ops.mq.started_request(rq);
103}
104
105static inline void blk_mq_sched_requeue_request(struct request *rq)
106{
107 struct request_queue *q = rq->q;
108 struct elevator_queue *e = q->elevator;
109
110 if (e && e->type->ops.mq.requeue_request)
111 e->type->ops.mq.requeue_request(rq);
112}
113
114static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
115{
116 struct elevator_queue *e = hctx->queue->elevator;
117
118 if (e && e->type->ops.mq.has_work)
119 return e->type->ops.mq.has_work(hctx);
120
121 return false;
122}
123
124static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
125{
126 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
127 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
128 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
129 struct request_queue *q = hctx->queue;
130
131 if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
132 set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
133 }
134 }
135}
136
137static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
138{
139 return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
140}
141
142#endif
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index eacd3af72099..308b3f4fc310 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
122 return res; 122 return res;
123} 123}
124 124
125static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page) 125static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
126{
127 return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1],
128 ctx->rq_dispatched[0]);
129}
130
131static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page)
132{
133 return sprintf(page, "%lu\n", ctx->rq_merged);
134}
135
136static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page)
137{
138 return sprintf(page, "%lu %lu\n", ctx->rq_completed[1],
139 ctx->rq_completed[0]);
140}
141
142static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg)
143{
144 struct request *rq;
145 int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg);
146
147 list_for_each_entry(rq, list, queuelist) {
148 const int rq_len = 2 * sizeof(rq) + 2;
149
150 /* if the output will be truncated */
151 if (PAGE_SIZE - 1 < len + rq_len) {
152 /* backspacing if it can't hold '\t...\n' */
153 if (PAGE_SIZE - 1 < len + 5)
154 len -= rq_len;
155 len += snprintf(page + len, PAGE_SIZE - 1 - len,
156 "\t...\n");
157 break;
158 }
159 len += snprintf(page + len, PAGE_SIZE - 1 - len,
160 "\t%p\n", rq);
161 }
162
163 return len;
164}
165
166static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page)
167{
168 ssize_t ret;
169
170 spin_lock(&ctx->lock);
171 ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending");
172 spin_unlock(&ctx->lock);
173
174 return ret;
175}
176
177static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page)
178{
179 return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n",
180 hctx->poll_considered, hctx->poll_invoked,
181 hctx->poll_success);
182}
183
184static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx,
185 const char *page, size_t size)
186{
187 hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
188
189 return size;
190}
191
192static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx,
193 char *page)
194{
195 return sprintf(page, "%lu\n", hctx->queued);
196}
197
198static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page)
199{
200 return sprintf(page, "%lu\n", hctx->run);
201}
202
203static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx,
204 char *page)
205{
206 char *start_page = page;
207 int i;
208
209 page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
210
211 for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
212 unsigned int d = 1U << (i - 1);
213
214 page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]);
215 }
216
217 page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1),
218 hctx->dispatched[i]);
219 return page - start_page;
220}
221
222static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
223 char *page) 126 char *page)
224{ 127{
225 ssize_t ret; 128 return sprintf(page, "%u\n", hctx->tags->nr_tags);
226
227 spin_lock(&hctx->lock);
228 ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending");
229 spin_unlock(&hctx->lock);
230
231 return ret;
232} 129}
233 130
234static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) 131static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
132 char *page)
235{ 133{
236 return blk_mq_tag_sysfs_show(hctx->tags, page); 134 return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags);
237}
238
239static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
240{
241 return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
242} 135}
243 136
244static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) 137static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
259 return ret; 152 return ret;
260} 153}
261 154
262static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
263{
264 struct blk_mq_ctx *ctx;
265 unsigned int i;
266
267 hctx_for_each_ctx(hctx, ctx, i) {
268 blk_stat_init(&ctx->stat[BLK_STAT_READ]);
269 blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
270 }
271}
272
273static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
274 const char *page, size_t count)
275{
276 blk_mq_stat_clear(hctx);
277 return count;
278}
279
280static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
281{
282 return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
283 pre, (long long) stat->nr_samples,
284 (long long) stat->mean, (long long) stat->min,
285 (long long) stat->max);
286}
287
288static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
289{
290 struct blk_rq_stat stat[2];
291 ssize_t ret;
292
293 blk_stat_init(&stat[BLK_STAT_READ]);
294 blk_stat_init(&stat[BLK_STAT_WRITE]);
295
296 blk_hctx_stat_get(hctx, stat);
297
298 ret = print_stat(page, &stat[BLK_STAT_READ], "read :");
299 ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:");
300 return ret;
301}
302
303static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
304 .attr = {.name = "dispatched", .mode = S_IRUGO },
305 .show = blk_mq_sysfs_dispatched_show,
306};
307static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = {
308 .attr = {.name = "merged", .mode = S_IRUGO },
309 .show = blk_mq_sysfs_merged_show,
310};
311static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = {
312 .attr = {.name = "completed", .mode = S_IRUGO },
313 .show = blk_mq_sysfs_completed_show,
314};
315static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = {
316 .attr = {.name = "rq_list", .mode = S_IRUGO },
317 .show = blk_mq_sysfs_rq_list_show,
318};
319
320static struct attribute *default_ctx_attrs[] = { 155static struct attribute *default_ctx_attrs[] = {
321 &blk_mq_sysfs_dispatched.attr,
322 &blk_mq_sysfs_merged.attr,
323 &blk_mq_sysfs_completed.attr,
324 &blk_mq_sysfs_rq_list.attr,
325 NULL, 156 NULL,
326}; 157};
327 158
328static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = { 159static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
329 .attr = {.name = "queued", .mode = S_IRUGO }, 160 .attr = {.name = "nr_tags", .mode = S_IRUGO },
330 .show = blk_mq_hw_sysfs_queued_show, 161 .show = blk_mq_hw_sysfs_nr_tags_show,
331}; 162};
332static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = { 163static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
333 .attr = {.name = "run", .mode = S_IRUGO }, 164 .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
334 .show = blk_mq_hw_sysfs_run_show, 165 .show = blk_mq_hw_sysfs_nr_reserved_tags_show,
335};
336static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
337 .attr = {.name = "dispatched", .mode = S_IRUGO },
338 .show = blk_mq_hw_sysfs_dispatched_show,
339};
340static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
341 .attr = {.name = "active", .mode = S_IRUGO },
342 .show = blk_mq_hw_sysfs_active_show,
343};
344static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
345 .attr = {.name = "pending", .mode = S_IRUGO },
346 .show = blk_mq_hw_sysfs_rq_list_show,
347};
348static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
349 .attr = {.name = "tags", .mode = S_IRUGO },
350 .show = blk_mq_hw_sysfs_tags_show,
351}; 166};
352static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { 167static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
353 .attr = {.name = "cpu_list", .mode = S_IRUGO }, 168 .attr = {.name = "cpu_list", .mode = S_IRUGO },
354 .show = blk_mq_hw_sysfs_cpus_show, 169 .show = blk_mq_hw_sysfs_cpus_show,
355}; 170};
356static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
357 .attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO },
358 .show = blk_mq_hw_sysfs_poll_show,
359 .store = blk_mq_hw_sysfs_poll_store,
360};
361static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
362 .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
363 .show = blk_mq_hw_sysfs_stat_show,
364 .store = blk_mq_hw_sysfs_stat_store,
365};
366 171
367static struct attribute *default_hw_ctx_attrs[] = { 172static struct attribute *default_hw_ctx_attrs[] = {
368 &blk_mq_hw_sysfs_queued.attr, 173 &blk_mq_hw_sysfs_nr_tags.attr,
369 &blk_mq_hw_sysfs_run.attr, 174 &blk_mq_hw_sysfs_nr_reserved_tags.attr,
370 &blk_mq_hw_sysfs_dispatched.attr,
371 &blk_mq_hw_sysfs_pending.attr,
372 &blk_mq_hw_sysfs_tags.attr,
373 &blk_mq_hw_sysfs_cpus.attr, 175 &blk_mq_hw_sysfs_cpus.attr,
374 &blk_mq_hw_sysfs_active.attr,
375 &blk_mq_hw_sysfs_poll.attr,
376 &blk_mq_hw_sysfs_stat.attr,
377 NULL, 176 NULL,
378}; 177};
379 178
@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
455 kobject_put(&hctx->kobj); 254 kobject_put(&hctx->kobj);
456 } 255 }
457 256
257 blk_mq_debugfs_unregister(q);
258
458 kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); 259 kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
459 kobject_del(&q->mq_kobj); 260 kobject_del(&q->mq_kobj);
460 kobject_put(&q->mq_kobj); 261 kobject_put(&q->mq_kobj);
@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q)
504 305
505 kobject_uevent(&q->mq_kobj, KOBJ_ADD); 306 kobject_uevent(&q->mq_kobj, KOBJ_ADD);
506 307
308 blk_mq_debugfs_register(q, kobject_name(&dev->kobj));
309
507 queue_for_each_hw_ctx(q, hctx, i) { 310 queue_for_each_hw_ctx(q, hctx, i) {
508 ret = blk_mq_register_hctx(hctx); 311 ret = blk_mq_register_hctx(hctx);
509 if (ret) 312 if (ret)
@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
529 if (!q->mq_sysfs_init_done) 332 if (!q->mq_sysfs_init_done)
530 return; 333 return;
531 334
335 blk_mq_debugfs_unregister_hctxs(q);
336
532 queue_for_each_hw_ctx(q, hctx, i) 337 queue_for_each_hw_ctx(q, hctx, i)
533 blk_mq_unregister_hctx(hctx); 338 blk_mq_unregister_hctx(hctx);
534} 339}
@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q)
541 if (!q->mq_sysfs_init_done) 346 if (!q->mq_sysfs_init_done)
542 return ret; 347 return ret;
543 348
349 blk_mq_debugfs_register_hctxs(q);
350
544 queue_for_each_hw_ctx(q, hctx, i) { 351 queue_for_each_hw_ctx(q, hctx, i) {
545 ret = blk_mq_register_hctx(hctx); 352 ret = blk_mq_register_hctx(hctx);
546 if (ret) 353 if (ret)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index dcf5ce3ba4bf..54c84363c1b2 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
90 return atomic_read(&hctx->nr_active) < depth; 90 return atomic_read(&hctx->nr_active) < depth;
91} 91}
92 92
93static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) 93static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
94 struct sbitmap_queue *bt)
94{ 95{
95 if (!hctx_may_queue(hctx, bt)) 96 if (!(data->flags & BLK_MQ_REQ_INTERNAL) &&
97 !hctx_may_queue(data->hctx, bt))
96 return -1; 98 return -1;
97 return __sbitmap_queue_get(bt); 99 return __sbitmap_queue_get(bt);
98} 100}
99 101
100static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, 102unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
101 struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags)
102{ 103{
104 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
105 struct sbitmap_queue *bt;
103 struct sbq_wait_state *ws; 106 struct sbq_wait_state *ws;
104 DEFINE_WAIT(wait); 107 DEFINE_WAIT(wait);
108 unsigned int tag_offset;
109 bool drop_ctx;
105 int tag; 110 int tag;
106 111
107 tag = __bt_get(hctx, bt); 112 if (data->flags & BLK_MQ_REQ_RESERVED) {
113 if (unlikely(!tags->nr_reserved_tags)) {
114 WARN_ON_ONCE(1);
115 return BLK_MQ_TAG_FAIL;
116 }
117 bt = &tags->breserved_tags;
118 tag_offset = 0;
119 } else {
120 bt = &tags->bitmap_tags;
121 tag_offset = tags->nr_reserved_tags;
122 }
123
124 tag = __blk_mq_get_tag(data, bt);
108 if (tag != -1) 125 if (tag != -1)
109 return tag; 126 goto found_tag;
110 127
111 if (data->flags & BLK_MQ_REQ_NOWAIT) 128 if (data->flags & BLK_MQ_REQ_NOWAIT)
112 return -1; 129 return BLK_MQ_TAG_FAIL;
113 130
114 ws = bt_wait_ptr(bt, hctx); 131 ws = bt_wait_ptr(bt, data->hctx);
132 drop_ctx = data->ctx == NULL;
115 do { 133 do {
116 prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); 134 prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE);
117 135
118 tag = __bt_get(hctx, bt); 136 tag = __blk_mq_get_tag(data, bt);
119 if (tag != -1) 137 if (tag != -1)
120 break; 138 break;
121 139
122 /* 140 /*
123 * We're out of tags on this hardware queue, kick any 141 * We're out of tags on this hardware queue, kick any
124 * pending IO submits before going to sleep waiting for 142 * pending IO submits before going to sleep waiting for
125 * some to complete. Note that hctx can be NULL here for 143 * some to complete.
126 * reserved tag allocation.
127 */ 144 */
128 if (hctx) 145 blk_mq_run_hw_queue(data->hctx, false);
129 blk_mq_run_hw_queue(hctx, false);
130 146
131 /* 147 /*
132 * Retry tag allocation after running the hardware queue, 148 * Retry tag allocation after running the hardware queue,
133 * as running the queue may also have found completions. 149 * as running the queue may also have found completions.
134 */ 150 */
135 tag = __bt_get(hctx, bt); 151 tag = __blk_mq_get_tag(data, bt);
136 if (tag != -1) 152 if (tag != -1)
137 break; 153 break;
138 154
139 blk_mq_put_ctx(data->ctx); 155 if (data->ctx)
156 blk_mq_put_ctx(data->ctx);
140 157
141 io_schedule(); 158 io_schedule();
142 159
143 data->ctx = blk_mq_get_ctx(data->q); 160 data->ctx = blk_mq_get_ctx(data->q);
144 data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); 161 data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu);
145 if (data->flags & BLK_MQ_REQ_RESERVED) { 162 tags = blk_mq_tags_from_data(data);
146 bt = &data->hctx->tags->breserved_tags; 163 if (data->flags & BLK_MQ_REQ_RESERVED)
147 } else { 164 bt = &tags->breserved_tags;
148 hctx = data->hctx; 165 else
149 bt = &hctx->tags->bitmap_tags; 166 bt = &tags->bitmap_tags;
150 } 167
151 finish_wait(&ws->wait, &wait); 168 finish_wait(&ws->wait, &wait);
152 ws = bt_wait_ptr(bt, hctx); 169 ws = bt_wait_ptr(bt, data->hctx);
153 } while (1); 170 } while (1);
154 171
155 finish_wait(&ws->wait, &wait); 172 if (drop_ctx && data->ctx)
156 return tag; 173 blk_mq_put_ctx(data->ctx);
157}
158
159static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data)
160{
161 int tag;
162
163 tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx,
164 data->hctx->tags);
165 if (tag >= 0)
166 return tag + data->hctx->tags->nr_reserved_tags;
167
168 return BLK_MQ_TAG_FAIL;
169}
170
171static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
172{
173 int tag;
174
175 if (unlikely(!data->hctx->tags->nr_reserved_tags)) {
176 WARN_ON_ONCE(1);
177 return BLK_MQ_TAG_FAIL;
178 }
179
180 tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL,
181 data->hctx->tags);
182 if (tag < 0)
183 return BLK_MQ_TAG_FAIL;
184 174
185 return tag; 175 finish_wait(&ws->wait, &wait);
186}
187 176
188unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) 177found_tag:
189{ 178 return tag + tag_offset;
190 if (data->flags & BLK_MQ_REQ_RESERVED)
191 return __blk_mq_get_reserved_tag(data);
192 return __blk_mq_get_tag(data);
193} 179}
194 180
195void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 181void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
196 unsigned int tag) 182 struct blk_mq_ctx *ctx, unsigned int tag)
197{ 183{
198 struct blk_mq_tags *tags = hctx->tags;
199
200 if (tag >= tags->nr_reserved_tags) { 184 if (tag >= tags->nr_reserved_tags) {
201 const int real_tag = tag - tags->nr_reserved_tags; 185 const int real_tag = tag - tags->nr_reserved_tags;
202 186
@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
312 struct blk_mq_tags *tags = set->tags[i]; 296 struct blk_mq_tags *tags = set->tags[i];
313 297
314 for (j = 0; j < tags->nr_tags; j++) { 298 for (j = 0; j < tags->nr_tags; j++) {
315 if (!tags->rqs[j]) 299 if (!tags->static_rqs[j])
316 continue; 300 continue;
317 301
318 ret = set->ops->reinit_request(set->driver_data, 302 ret = set->ops->reinit_request(set->driver_data,
319 tags->rqs[j]); 303 tags->static_rqs[j]);
320 if (ret) 304 if (ret)
321 goto out; 305 goto out;
322 } 306 }
@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
351 335
352} 336}
353 337
354static unsigned int bt_unused_tags(const struct sbitmap_queue *bt)
355{
356 return bt->sb.depth - sbitmap_weight(&bt->sb);
357}
358
359static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, 338static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
360 bool round_robin, int node) 339 bool round_robin, int node)
361{ 340{
@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
411 kfree(tags); 390 kfree(tags);
412} 391}
413 392
414int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) 393int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
394 struct blk_mq_tags **tagsptr, unsigned int tdepth,
395 bool can_grow)
415{ 396{
416 tdepth -= tags->nr_reserved_tags; 397 struct blk_mq_tags *tags = *tagsptr;
417 if (tdepth > tags->nr_tags) 398
399 if (tdepth <= tags->nr_reserved_tags)
418 return -EINVAL; 400 return -EINVAL;
419 401
402 tdepth -= tags->nr_reserved_tags;
403
420 /* 404 /*
421 * Don't need (or can't) update reserved tags here, they remain 405 * If we are allowed to grow beyond the original size, allocate
422 * static and should never need resizing. 406 * a new set of tags before freeing the old one.
423 */ 407 */
424 sbitmap_queue_resize(&tags->bitmap_tags, tdepth); 408 if (tdepth > tags->nr_tags) {
409 struct blk_mq_tag_set *set = hctx->queue->tag_set;
410 struct blk_mq_tags *new;
411 bool ret;
412
413 if (!can_grow)
414 return -EINVAL;
415
416 /*
417 * We need some sort of upper limit, set it high enough that
418 * no valid use cases should require more.
419 */
420 if (tdepth > 16 * BLKDEV_MAX_RQ)
421 return -EINVAL;
422
423 new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0);
424 if (!new)
425 return -ENOMEM;
426 ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth);
427 if (ret) {
428 blk_mq_free_rq_map(new);
429 return -ENOMEM;
430 }
431
432 blk_mq_free_rqs(set, *tagsptr, hctx->queue_num);
433 blk_mq_free_rq_map(*tagsptr);
434 *tagsptr = new;
435 } else {
436 /*
437 * Don't need (or can't) update reserved tags here, they
438 * remain static and should never need resizing.
439 */
440 sbitmap_queue_resize(&tags->bitmap_tags, tdepth);
441 }
425 442
426 blk_mq_tag_wakeup_all(tags, false);
427 return 0; 443 return 0;
428} 444}
429 445
@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq)
454 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); 470 (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
455} 471}
456EXPORT_SYMBOL(blk_mq_unique_tag); 472EXPORT_SYMBOL(blk_mq_unique_tag);
457
458ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
459{
460 char *orig_page = page;
461 unsigned int free, res;
462
463 if (!tags)
464 return 0;
465
466 page += sprintf(page, "nr_tags=%u, reserved_tags=%u, "
467 "bits_per_word=%u\n",
468 tags->nr_tags, tags->nr_reserved_tags,
469 1U << tags->bitmap_tags.sb.shift);
470
471 free = bt_unused_tags(&tags->bitmap_tags);
472 res = bt_unused_tags(&tags->breserved_tags);
473
474 page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
475 page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
476
477 return page - orig_page;
478}
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index d1662734dc53..63497423c5cd 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -16,6 +16,7 @@ struct blk_mq_tags {
16 struct sbitmap_queue breserved_tags; 16 struct sbitmap_queue breserved_tags;
17 17
18 struct request **rqs; 18 struct request **rqs;
19 struct request **static_rqs;
19 struct list_head page_list; 20 struct list_head page_list;
20}; 21};
21 22
@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r
24extern void blk_mq_free_tags(struct blk_mq_tags *tags); 25extern void blk_mq_free_tags(struct blk_mq_tags *tags);
25 26
26extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); 27extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
27extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, 28extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
28 unsigned int tag); 29 struct blk_mq_ctx *ctx, unsigned int tag);
29extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); 30extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
30extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); 31extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
31extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); 32 struct blk_mq_tags **tags,
33 unsigned int depth, bool can_grow);
32extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); 34extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
33void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, 35void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
34 void *priv); 36 void *priv);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c3400b5444a7..489076e7ae15 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -32,6 +32,7 @@
32#include "blk-mq-tag.h" 32#include "blk-mq-tag.h"
33#include "blk-stat.h" 33#include "blk-stat.h"
34#include "blk-wbt.h" 34#include "blk-wbt.h"
35#include "blk-mq-sched.h"
35 36
36static DEFINE_MUTEX(all_q_mutex); 37static DEFINE_MUTEX(all_q_mutex);
37static LIST_HEAD(all_q_list); 38static LIST_HEAD(all_q_list);
@@ -39,9 +40,11 @@ static LIST_HEAD(all_q_list);
39/* 40/*
40 * Check if any of the ctx's have pending work in this hardware queue 41 * Check if any of the ctx's have pending work in this hardware queue
41 */ 42 */
42static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) 43bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
43{ 44{
44 return sbitmap_any_bit_set(&hctx->ctx_map); 45 return sbitmap_any_bit_set(&hctx->ctx_map) ||
46 !list_empty_careful(&hctx->dispatch) ||
47 blk_mq_sched_has_work(hctx);
45} 48}
46 49
47/* 50/*
@@ -167,8 +170,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
167} 170}
168EXPORT_SYMBOL(blk_mq_can_queue); 171EXPORT_SYMBOL(blk_mq_can_queue);
169 172
170static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, 173void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
171 struct request *rq, unsigned int op) 174 struct request *rq, unsigned int op)
172{ 175{
173 INIT_LIST_HEAD(&rq->queuelist); 176 INIT_LIST_HEAD(&rq->queuelist);
174 /* csd/requeue_work/fifo_time is initialized before use */ 177 /* csd/requeue_work/fifo_time is initialized before use */
@@ -213,53 +216,58 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
213 216
214 ctx->rq_dispatched[op_is_sync(op)]++; 217 ctx->rq_dispatched[op_is_sync(op)]++;
215} 218}
219EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init);
216 220
217static struct request * 221struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
218__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) 222 unsigned int op)
219{ 223{
220 struct request *rq; 224 struct request *rq;
221 unsigned int tag; 225 unsigned int tag;
222 226
223 tag = blk_mq_get_tag(data); 227 tag = blk_mq_get_tag(data);
224 if (tag != BLK_MQ_TAG_FAIL) { 228 if (tag != BLK_MQ_TAG_FAIL) {
225 rq = data->hctx->tags->rqs[tag]; 229 struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
226 230
227 if (blk_mq_tag_busy(data->hctx)) { 231 rq = tags->static_rqs[tag];
228 rq->rq_flags = RQF_MQ_INFLIGHT; 232
229 atomic_inc(&data->hctx->nr_active); 233 if (data->flags & BLK_MQ_REQ_INTERNAL) {
234 rq->tag = -1;
235 rq->internal_tag = tag;
236 } else {
237 if (blk_mq_tag_busy(data->hctx)) {
238 rq->rq_flags = RQF_MQ_INFLIGHT;
239 atomic_inc(&data->hctx->nr_active);
240 }
241 rq->tag = tag;
242 rq->internal_tag = -1;
230 } 243 }
231 244
232 rq->tag = tag;
233 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); 245 blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
234 return rq; 246 return rq;
235 } 247 }
236 248
237 return NULL; 249 return NULL;
238} 250}
251EXPORT_SYMBOL_GPL(__blk_mq_alloc_request);
239 252
240struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 253struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
241 unsigned int flags) 254 unsigned int flags)
242{ 255{
243 struct blk_mq_ctx *ctx; 256 struct blk_mq_alloc_data alloc_data = { .flags = flags };
244 struct blk_mq_hw_ctx *hctx;
245 struct request *rq; 257 struct request *rq;
246 struct blk_mq_alloc_data alloc_data;
247 int ret; 258 int ret;
248 259
249 ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); 260 ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
250 if (ret) 261 if (ret)
251 return ERR_PTR(ret); 262 return ERR_PTR(ret);
252 263
253 ctx = blk_mq_get_ctx(q); 264 rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
254 hctx = blk_mq_map_queue(q, ctx->cpu);
255 blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
256 rq = __blk_mq_alloc_request(&alloc_data, rw);
257 blk_mq_put_ctx(ctx);
258 265
259 if (!rq) { 266 blk_mq_put_ctx(alloc_data.ctx);
260 blk_queue_exit(q); 267 blk_queue_exit(q);
268
269 if (!rq)
261 return ERR_PTR(-EWOULDBLOCK); 270 return ERR_PTR(-EWOULDBLOCK);
262 }
263 271
264 rq->__data_len = 0; 272 rq->__data_len = 0;
265 rq->__sector = (sector_t) -1; 273 rq->__sector = (sector_t) -1;
@@ -319,10 +327,10 @@ out_queue_exit:
319} 327}
320EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); 328EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
321 329
322static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, 330void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
323 struct blk_mq_ctx *ctx, struct request *rq) 331 struct request *rq)
324{ 332{
325 const int tag = rq->tag; 333 const int sched_tag = rq->internal_tag;
326 struct request_queue *q = rq->q; 334 struct request_queue *q = rq->q;
327 335
328 if (rq->rq_flags & RQF_MQ_INFLIGHT) 336 if (rq->rq_flags & RQF_MQ_INFLIGHT)
@@ -333,23 +341,31 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
333 341
334 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 342 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
335 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); 343 clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags);
336 blk_mq_put_tag(hctx, ctx, tag); 344 if (rq->tag != -1)
345 blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
346 if (sched_tag != -1)
347 blk_mq_sched_completed_request(hctx, rq);
348 blk_mq_sched_restart_queues(hctx);
337 blk_queue_exit(q); 349 blk_queue_exit(q);
338} 350}
339 351
340void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq) 352static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx,
353 struct request *rq)
341{ 354{
342 struct blk_mq_ctx *ctx = rq->mq_ctx; 355 struct blk_mq_ctx *ctx = rq->mq_ctx;
343 356
344 ctx->rq_completed[rq_is_sync(rq)]++; 357 ctx->rq_completed[rq_is_sync(rq)]++;
345 __blk_mq_free_request(hctx, ctx, rq); 358 __blk_mq_finish_request(hctx, ctx, rq);
359}
346 360
361void blk_mq_finish_request(struct request *rq)
362{
363 blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
347} 364}
348EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
349 365
350void blk_mq_free_request(struct request *rq) 366void blk_mq_free_request(struct request *rq)
351{ 367{
352 blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); 368 blk_mq_sched_put_request(rq);
353} 369}
354EXPORT_SYMBOL_GPL(blk_mq_free_request); 370EXPORT_SYMBOL_GPL(blk_mq_free_request);
355 371
@@ -467,6 +483,8 @@ void blk_mq_start_request(struct request *rq)
467{ 483{
468 struct request_queue *q = rq->q; 484 struct request_queue *q = rq->q;
469 485
486 blk_mq_sched_started_request(rq);
487
470 trace_block_rq_issue(q, rq); 488 trace_block_rq_issue(q, rq);
471 489
472 rq->resid_len = blk_rq_bytes(rq); 490 rq->resid_len = blk_rq_bytes(rq);
@@ -515,6 +533,7 @@ static void __blk_mq_requeue_request(struct request *rq)
515 533
516 trace_block_rq_requeue(q, rq); 534 trace_block_rq_requeue(q, rq);
517 wbt_requeue(q->rq_wb, &rq->issue_stat); 535 wbt_requeue(q->rq_wb, &rq->issue_stat);
536 blk_mq_sched_requeue_request(rq);
518 537
519 if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { 538 if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
520 if (q->dma_drain_size && blk_rq_bytes(rq)) 539 if (q->dma_drain_size && blk_rq_bytes(rq))
@@ -549,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
549 568
550 rq->rq_flags &= ~RQF_SOFTBARRIER; 569 rq->rq_flags &= ~RQF_SOFTBARRIER;
551 list_del_init(&rq->queuelist); 570 list_del_init(&rq->queuelist);
552 blk_mq_insert_request(rq, true, false, false); 571 blk_mq_sched_insert_request(rq, true, false, false, true);
553 } 572 }
554 573
555 while (!list_empty(&rq_list)) { 574 while (!list_empty(&rq_list)) {
556 rq = list_entry(rq_list.next, struct request, queuelist); 575 rq = list_entry(rq_list.next, struct request, queuelist);
557 list_del_init(&rq->queuelist); 576 list_del_init(&rq->queuelist);
558 blk_mq_insert_request(rq, false, false, false); 577 blk_mq_sched_insert_request(rq, false, false, false, true);
559 } 578 }
560 579
561 blk_mq_run_hw_queues(q, false); 580 blk_mq_run_hw_queues(q, false);
@@ -639,7 +658,7 @@ struct blk_mq_timeout_data {
639 658
640void blk_mq_rq_timed_out(struct request *req, bool reserved) 659void blk_mq_rq_timed_out(struct request *req, bool reserved)
641{ 660{
642 struct blk_mq_ops *ops = req->q->mq_ops; 661 const struct blk_mq_ops *ops = req->q->mq_ops;
643 enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; 662 enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
644 663
645 /* 664 /*
@@ -763,6 +782,12 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
763 continue; 782 continue;
764 783
765 el_ret = blk_try_merge(rq, bio); 784 el_ret = blk_try_merge(rq, bio);
785 if (el_ret == ELEVATOR_NO_MERGE)
786 continue;
787
788 if (!blk_mq_sched_allow_merge(q, rq, bio))
789 break;
790
766 if (el_ret == ELEVATOR_BACK_MERGE) { 791 if (el_ret == ELEVATOR_BACK_MERGE) {
767 if (bio_attempt_back_merge(q, rq, bio)) { 792 if (bio_attempt_back_merge(q, rq, bio)) {
768 ctx->rq_merged++; 793 ctx->rq_merged++;
@@ -803,7 +828,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
803 * Process software queues that have been marked busy, splicing them 828 * Process software queues that have been marked busy, splicing them
804 * to the for-dispatch 829 * to the for-dispatch
805 */ 830 */
806static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) 831void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
807{ 832{
808 struct flush_busy_ctx_data data = { 833 struct flush_busy_ctx_data data = {
809 .hctx = hctx, 834 .hctx = hctx,
@@ -812,6 +837,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
812 837
813 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); 838 sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data);
814} 839}
840EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs);
815 841
816static inline unsigned int queued_to_index(unsigned int queued) 842static inline unsigned int queued_to_index(unsigned int queued)
817{ 843{
@@ -821,6 +847,74 @@ static inline unsigned int queued_to_index(unsigned int queued)
821 return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); 847 return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
822} 848}
823 849
850bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
851 bool wait)
852{
853 struct blk_mq_alloc_data data = {
854 .q = rq->q,
855 .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu),
856 .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
857 };
858
859 if (rq->tag != -1) {
860done:
861 if (hctx)
862 *hctx = data.hctx;
863 return true;
864 }
865
866 rq->tag = blk_mq_get_tag(&data);
867 if (rq->tag >= 0) {
868 if (blk_mq_tag_busy(data.hctx)) {
869 rq->rq_flags |= RQF_MQ_INFLIGHT;
870 atomic_inc(&data.hctx->nr_active);
871 }
872 data.hctx->tags->rqs[rq->tag] = rq;
873 goto done;
874 }
875
876 return false;
877}
878
879static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
880 struct request *rq)
881{
882 if (rq->tag == -1 || rq->internal_tag == -1)
883 return;
884
885 blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
886 rq->tag = -1;
887
888 if (rq->rq_flags & RQF_MQ_INFLIGHT) {
889 rq->rq_flags &= ~RQF_MQ_INFLIGHT;
890 atomic_dec(&hctx->nr_active);
891 }
892}
893
894/*
895 * If we fail getting a driver tag because all the driver tags are already
896 * assigned and on the dispatch list, BUT the first entry does not have a
897 * tag, then we could deadlock. For that case, move entries with assigned
898 * driver tags to the front, leaving the set of tagged requests in the
899 * same order, and the untagged set in the same order.
900 */
901static bool reorder_tags_to_front(struct list_head *list)
902{
903 struct request *rq, *tmp, *first = NULL;
904
905 list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
906 if (rq == first)
907 break;
908 if (rq->tag != -1) {
909 list_move(&rq->queuelist, list);
910 if (!first)
911 first = rq;
912 }
913 }
914
915 return first != NULL;
916}
917
824bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) 918bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
825{ 919{
826 struct request_queue *q = hctx->queue; 920 struct request_queue *q = hctx->queue;
@@ -843,6 +937,20 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
843 struct blk_mq_queue_data bd; 937 struct blk_mq_queue_data bd;
844 938
845 rq = list_first_entry(list, struct request, queuelist); 939 rq = list_first_entry(list, struct request, queuelist);
940 if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
941 if (!queued && reorder_tags_to_front(list))
942 continue;
943
944 /*
945 * We failed getting a driver tag. Mark the queue(s)
946 * as needing a restart. Retry getting a tag again,
947 * in case the needed IO completed right before we
948 * marked the queue as needing a restart.
949 */
950 blk_mq_sched_mark_restart(hctx);
951 if (!blk_mq_get_driver_tag(rq, &hctx, false))
952 break;
953 }
846 list_del_init(&rq->queuelist); 954 list_del_init(&rq->queuelist);
847 955
848 bd.rq = rq; 956 bd.rq = rq;
@@ -855,6 +963,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
855 queued++; 963 queued++;
856 break; 964 break;
857 case BLK_MQ_RQ_QUEUE_BUSY: 965 case BLK_MQ_RQ_QUEUE_BUSY:
966 blk_mq_put_driver_tag(hctx, rq);
858 list_add(&rq->queuelist, list); 967 list_add(&rq->queuelist, list);
859 __blk_mq_requeue_request(rq); 968 __blk_mq_requeue_request(rq);
860 break; 969 break;
@@ -885,7 +994,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
885 */ 994 */
886 if (!list_empty(list)) { 995 if (!list_empty(list)) {
887 spin_lock(&hctx->lock); 996 spin_lock(&hctx->lock);
888 list_splice(list, &hctx->dispatch); 997 list_splice_init(list, &hctx->dispatch);
889 spin_unlock(&hctx->lock); 998 spin_unlock(&hctx->lock);
890 999
891 /* 1000 /*
@@ -896,47 +1005,17 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
896 * the requests in rq_list might get lost. 1005 * the requests in rq_list might get lost.
897 * 1006 *
898 * blk_mq_run_hw_queue() already checks the STOPPED bit 1007 * blk_mq_run_hw_queue() already checks the STOPPED bit
899 **/ 1008 *
900 blk_mq_run_hw_queue(hctx, true); 1009 * If RESTART is set, then let completion restart the queue
1010 * instead of potentially looping here.
1011 */
1012 if (!blk_mq_sched_needs_restart(hctx))
1013 blk_mq_run_hw_queue(hctx, true);
901 } 1014 }
902 1015
903 return ret != BLK_MQ_RQ_QUEUE_BUSY; 1016 return ret != BLK_MQ_RQ_QUEUE_BUSY;
904} 1017}
905 1018
906/*
907 * Run this hardware queue, pulling any software queues mapped to it in.
908 * Note that this function currently has various problems around ordering
909 * of IO. In particular, we'd like FIFO behaviour on handling existing
910 * items on the hctx->dispatch list. Ignore that for now.
911 */
912static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
913{
914 LIST_HEAD(rq_list);
915
916 if (unlikely(blk_mq_hctx_stopped(hctx)))
917 return;
918
919 hctx->run++;
920
921 /*
922 * Touch any software queue that has pending entries.
923 */
924 flush_busy_ctxs(hctx, &rq_list);
925
926 /*
927 * If we have previous entries on our dispatch list, grab them
928 * and stuff them at the front for more fair dispatch.
929 */
930 if (!list_empty_careful(&hctx->dispatch)) {
931 spin_lock(&hctx->lock);
932 if (!list_empty(&hctx->dispatch))
933 list_splice_init(&hctx->dispatch, &rq_list);
934 spin_unlock(&hctx->lock);
935 }
936
937 blk_mq_dispatch_rq_list(hctx, &rq_list);
938}
939
940static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) 1019static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
941{ 1020{
942 int srcu_idx; 1021 int srcu_idx;
@@ -946,11 +1025,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
946 1025
947 if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { 1026 if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
948 rcu_read_lock(); 1027 rcu_read_lock();
949 blk_mq_process_rq_list(hctx); 1028 blk_mq_sched_dispatch_requests(hctx);
950 rcu_read_unlock(); 1029 rcu_read_unlock();
951 } else { 1030 } else {
952 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); 1031 srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
953 blk_mq_process_rq_list(hctx); 1032 blk_mq_sched_dispatch_requests(hctx);
954 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); 1033 srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
955 } 1034 }
956} 1035}
@@ -1006,8 +1085,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
1006 int i; 1085 int i;
1007 1086
1008 queue_for_each_hw_ctx(q, hctx, i) { 1087 queue_for_each_hw_ctx(q, hctx, i) {
1009 if ((!blk_mq_hctx_has_pending(hctx) && 1088 if (!blk_mq_hctx_has_pending(hctx) ||
1010 list_empty_careful(&hctx->dispatch)) ||
1011 blk_mq_hctx_stopped(hctx)) 1089 blk_mq_hctx_stopped(hctx))
1012 continue; 1090 continue;
1013 1091
@@ -1116,6 +1194,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
1116 if (unlikely(!blk_mq_hw_queue_mapped(hctx))) 1194 if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
1117 return; 1195 return;
1118 1196
1197 blk_mq_stop_hw_queue(hctx);
1119 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), 1198 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
1120 &hctx->delay_work, msecs_to_jiffies(msecs)); 1199 &hctx->delay_work, msecs_to_jiffies(msecs));
1121} 1200}
@@ -1135,8 +1214,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
1135 list_add_tail(&rq->queuelist, &ctx->rq_list); 1214 list_add_tail(&rq->queuelist, &ctx->rq_list);
1136} 1215}
1137 1216
1138static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 1217void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
1139 struct request *rq, bool at_head) 1218 bool at_head)
1140{ 1219{
1141 struct blk_mq_ctx *ctx = rq->mq_ctx; 1220 struct blk_mq_ctx *ctx = rq->mq_ctx;
1142 1221
@@ -1144,32 +1223,10 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
1144 blk_mq_hctx_mark_pending(hctx, ctx); 1223 blk_mq_hctx_mark_pending(hctx, ctx);
1145} 1224}
1146 1225
1147void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, 1226void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
1148 bool async) 1227 struct list_head *list)
1149{
1150 struct blk_mq_ctx *ctx = rq->mq_ctx;
1151 struct request_queue *q = rq->q;
1152 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
1153
1154 spin_lock(&ctx->lock);
1155 __blk_mq_insert_request(hctx, rq, at_head);
1156 spin_unlock(&ctx->lock);
1157
1158 if (run_queue)
1159 blk_mq_run_hw_queue(hctx, async);
1160}
1161
1162static void blk_mq_insert_requests(struct request_queue *q,
1163 struct blk_mq_ctx *ctx,
1164 struct list_head *list,
1165 int depth,
1166 bool from_schedule)
1167 1228
1168{ 1229{
1169 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
1170
1171 trace_block_unplug(q, depth, !from_schedule);
1172
1173 /* 1230 /*
1174 * preemption doesn't flush plug list, so it's possible ctx->cpu is 1231 * preemption doesn't flush plug list, so it's possible ctx->cpu is
1175 * offline now 1232 * offline now
@@ -1185,8 +1242,6 @@ static void blk_mq_insert_requests(struct request_queue *q,
1185 } 1242 }
1186 blk_mq_hctx_mark_pending(hctx, ctx); 1243 blk_mq_hctx_mark_pending(hctx, ctx);
1187 spin_unlock(&ctx->lock); 1244 spin_unlock(&ctx->lock);
1188
1189 blk_mq_run_hw_queue(hctx, from_schedule);
1190} 1245}
1191 1246
1192static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) 1247static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1222,9 +1277,10 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
1222 BUG_ON(!rq->q); 1277 BUG_ON(!rq->q);
1223 if (rq->mq_ctx != this_ctx) { 1278 if (rq->mq_ctx != this_ctx) {
1224 if (this_ctx) { 1279 if (this_ctx) {
1225 blk_mq_insert_requests(this_q, this_ctx, 1280 trace_block_unplug(this_q, depth, from_schedule);
1226 &ctx_list, depth, 1281 blk_mq_sched_insert_requests(this_q, this_ctx,
1227 from_schedule); 1282 &ctx_list,
1283 from_schedule);
1228 } 1284 }
1229 1285
1230 this_ctx = rq->mq_ctx; 1286 this_ctx = rq->mq_ctx;
@@ -1241,8 +1297,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
1241 * on 'ctx_list'. Do those. 1297 * on 'ctx_list'. Do those.
1242 */ 1298 */
1243 if (this_ctx) { 1299 if (this_ctx) {
1244 blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth, 1300 trace_block_unplug(this_q, depth, from_schedule);
1245 from_schedule); 1301 blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list,
1302 from_schedule);
1246 } 1303 }
1247} 1304}
1248 1305
@@ -1280,46 +1337,39 @@ insert_rq:
1280 } 1337 }
1281 1338
1282 spin_unlock(&ctx->lock); 1339 spin_unlock(&ctx->lock);
1283 __blk_mq_free_request(hctx, ctx, rq); 1340 __blk_mq_finish_request(hctx, ctx, rq);
1284 return true; 1341 return true;
1285 } 1342 }
1286} 1343}
1287 1344
1288static struct request *blk_mq_map_request(struct request_queue *q, 1345static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
1289 struct bio *bio,
1290 struct blk_mq_alloc_data *data)
1291{ 1346{
1292 struct blk_mq_hw_ctx *hctx; 1347 if (rq->tag != -1)
1293 struct blk_mq_ctx *ctx; 1348 return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false);
1294 struct request *rq;
1295
1296 blk_queue_enter_live(q);
1297 ctx = blk_mq_get_ctx(q);
1298 hctx = blk_mq_map_queue(q, ctx->cpu);
1299 1349
1300 trace_block_getrq(q, bio, bio->bi_opf); 1350 return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
1301 blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
1302 rq = __blk_mq_alloc_request(data, bio->bi_opf);
1303
1304 data->hctx->queued++;
1305 return rq;
1306} 1351}
1307 1352
1308static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) 1353static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
1309{ 1354{
1310 int ret;
1311 struct request_queue *q = rq->q; 1355 struct request_queue *q = rq->q;
1312 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
1313 struct blk_mq_queue_data bd = { 1356 struct blk_mq_queue_data bd = {
1314 .rq = rq, 1357 .rq = rq,
1315 .list = NULL, 1358 .list = NULL,
1316 .last = 1 1359 .last = 1
1317 }; 1360 };
1318 blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); 1361 struct blk_mq_hw_ctx *hctx;
1362 blk_qc_t new_cookie;
1363 int ret;
1319 1364
1320 if (blk_mq_hctx_stopped(hctx)) 1365 if (q->elevator)
1321 goto insert; 1366 goto insert;
1322 1367
1368 if (!blk_mq_get_driver_tag(rq, &hctx, false))
1369 goto insert;
1370
1371 new_cookie = request_to_qc_t(hctx, rq);
1372
1323 /* 1373 /*
1324 * For OK queue, we are done. For error, kill it. Any other 1374 * For OK queue, we are done. For error, kill it. Any other
1325 * error (busy), just add it to our list as we previously 1375 * error (busy), just add it to our list as we previously
@@ -1341,7 +1391,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie)
1341 } 1391 }
1342 1392
1343insert: 1393insert:
1344 blk_mq_insert_request(rq, false, true, true); 1394 blk_mq_sched_insert_request(rq, false, true, true, false);
1345} 1395}
1346 1396
1347/* 1397/*
@@ -1352,8 +1402,8 @@ insert:
1352static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) 1402static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1353{ 1403{
1354 const int is_sync = op_is_sync(bio->bi_opf); 1404 const int is_sync = op_is_sync(bio->bi_opf);
1355 const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); 1405 const int is_flush_fua = op_is_flush(bio->bi_opf);
1356 struct blk_mq_alloc_data data; 1406 struct blk_mq_alloc_data data = { .flags = 0 };
1357 struct request *rq; 1407 struct request *rq;
1358 unsigned int request_count = 0, srcu_idx; 1408 unsigned int request_count = 0, srcu_idx;
1359 struct blk_plug *plug; 1409 struct blk_plug *plug;
@@ -1374,9 +1424,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1374 blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) 1424 blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
1375 return BLK_QC_T_NONE; 1425 return BLK_QC_T_NONE;
1376 1426
1427 if (blk_mq_sched_bio_merge(q, bio))
1428 return BLK_QC_T_NONE;
1429
1377 wb_acct = wbt_wait(q->rq_wb, bio, NULL); 1430 wb_acct = wbt_wait(q->rq_wb, bio, NULL);
1378 1431
1379 rq = blk_mq_map_request(q, bio, &data); 1432 trace_block_getrq(q, bio, bio->bi_opf);
1433
1434 rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
1380 if (unlikely(!rq)) { 1435 if (unlikely(!rq)) {
1381 __wbt_done(q->rq_wb, wb_acct); 1436 __wbt_done(q->rq_wb, wb_acct);
1382 return BLK_QC_T_NONE; 1437 return BLK_QC_T_NONE;
@@ -1384,12 +1439,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1384 1439
1385 wbt_track(&rq->issue_stat, wb_acct); 1440 wbt_track(&rq->issue_stat, wb_acct);
1386 1441
1387 cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); 1442 cookie = request_to_qc_t(data.hctx, rq);
1388 1443
1389 if (unlikely(is_flush_fua)) { 1444 if (unlikely(is_flush_fua)) {
1445 blk_mq_put_ctx(data.ctx);
1390 blk_mq_bio_to_request(rq, bio); 1446 blk_mq_bio_to_request(rq, bio);
1447 blk_mq_get_driver_tag(rq, NULL, true);
1391 blk_insert_flush(rq); 1448 blk_insert_flush(rq);
1392 goto run_queue; 1449 blk_mq_run_hw_queue(data.hctx, true);
1450 goto done;
1393 } 1451 }
1394 1452
1395 plug = current->plug; 1453 plug = current->plug;
@@ -1438,6 +1496,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1438 goto done; 1496 goto done;
1439 } 1497 }
1440 1498
1499 if (q->elevator) {
1500 blk_mq_put_ctx(data.ctx);
1501 blk_mq_bio_to_request(rq, bio);
1502 blk_mq_sched_insert_request(rq, false, true,
1503 !is_sync || is_flush_fua, true);
1504 goto done;
1505 }
1441 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1506 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
1442 /* 1507 /*
1443 * For a SYNC request, send it to the hardware immediately. For 1508 * For a SYNC request, send it to the hardware immediately. For
@@ -1445,7 +1510,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
1445 * latter allows for merging opportunities and more efficient 1510 * latter allows for merging opportunities and more efficient
1446 * dispatching. 1511 * dispatching.
1447 */ 1512 */
1448run_queue:
1449 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); 1513 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
1450 } 1514 }
1451 blk_mq_put_ctx(data.ctx); 1515 blk_mq_put_ctx(data.ctx);
@@ -1460,10 +1524,10 @@ done:
1460static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) 1524static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1461{ 1525{
1462 const int is_sync = op_is_sync(bio->bi_opf); 1526 const int is_sync = op_is_sync(bio->bi_opf);
1463 const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); 1527 const int is_flush_fua = op_is_flush(bio->bi_opf);
1464 struct blk_plug *plug; 1528 struct blk_plug *plug;
1465 unsigned int request_count = 0; 1529 unsigned int request_count = 0;
1466 struct blk_mq_alloc_data data; 1530 struct blk_mq_alloc_data data = { .flags = 0 };
1467 struct request *rq; 1531 struct request *rq;
1468 blk_qc_t cookie; 1532 blk_qc_t cookie;
1469 unsigned int wb_acct; 1533 unsigned int wb_acct;
@@ -1483,9 +1547,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1483 } else 1547 } else
1484 request_count = blk_plug_queued_count(q); 1548 request_count = blk_plug_queued_count(q);
1485 1549
1550 if (blk_mq_sched_bio_merge(q, bio))
1551 return BLK_QC_T_NONE;
1552
1486 wb_acct = wbt_wait(q->rq_wb, bio, NULL); 1553 wb_acct = wbt_wait(q->rq_wb, bio, NULL);
1487 1554
1488 rq = blk_mq_map_request(q, bio, &data); 1555 trace_block_getrq(q, bio, bio->bi_opf);
1556
1557 rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data);
1489 if (unlikely(!rq)) { 1558 if (unlikely(!rq)) {
1490 __wbt_done(q->rq_wb, wb_acct); 1559 __wbt_done(q->rq_wb, wb_acct);
1491 return BLK_QC_T_NONE; 1560 return BLK_QC_T_NONE;
@@ -1493,12 +1562,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1493 1562
1494 wbt_track(&rq->issue_stat, wb_acct); 1563 wbt_track(&rq->issue_stat, wb_acct);
1495 1564
1496 cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); 1565 cookie = request_to_qc_t(data.hctx, rq);
1497 1566
1498 if (unlikely(is_flush_fua)) { 1567 if (unlikely(is_flush_fua)) {
1568 blk_mq_put_ctx(data.ctx);
1499 blk_mq_bio_to_request(rq, bio); 1569 blk_mq_bio_to_request(rq, bio);
1570 blk_mq_get_driver_tag(rq, NULL, true);
1500 blk_insert_flush(rq); 1571 blk_insert_flush(rq);
1501 goto run_queue; 1572 blk_mq_run_hw_queue(data.hctx, true);
1573 goto done;
1502 } 1574 }
1503 1575
1504 /* 1576 /*
@@ -1535,6 +1607,13 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1535 return cookie; 1607 return cookie;
1536 } 1608 }
1537 1609
1610 if (q->elevator) {
1611 blk_mq_put_ctx(data.ctx);
1612 blk_mq_bio_to_request(rq, bio);
1613 blk_mq_sched_insert_request(rq, false, true,
1614 !is_sync || is_flush_fua, true);
1615 goto done;
1616 }
1538 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { 1617 if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
1539 /* 1618 /*
1540 * For a SYNC request, send it to the hardware immediately. For 1619 * For a SYNC request, send it to the hardware immediately. For
@@ -1542,16 +1621,16 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
1542 * latter allows for merging opportunities and more efficient 1621 * latter allows for merging opportunities and more efficient
1543 * dispatching. 1622 * dispatching.
1544 */ 1623 */
1545run_queue:
1546 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); 1624 blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
1547 } 1625 }
1548 1626
1549 blk_mq_put_ctx(data.ctx); 1627 blk_mq_put_ctx(data.ctx);
1628done:
1550 return cookie; 1629 return cookie;
1551} 1630}
1552 1631
1553static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, 1632void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
1554 struct blk_mq_tags *tags, unsigned int hctx_idx) 1633 unsigned int hctx_idx)
1555{ 1634{
1556 struct page *page; 1635 struct page *page;
1557 1636
@@ -1559,11 +1638,13 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1559 int i; 1638 int i;
1560 1639
1561 for (i = 0; i < tags->nr_tags; i++) { 1640 for (i = 0; i < tags->nr_tags; i++) {
1562 if (!tags->rqs[i]) 1641 struct request *rq = tags->static_rqs[i];
1642
1643 if (!rq)
1563 continue; 1644 continue;
1564 set->ops->exit_request(set->driver_data, tags->rqs[i], 1645 set->ops->exit_request(set->driver_data, rq,
1565 hctx_idx, i); 1646 hctx_idx, i);
1566 tags->rqs[i] = NULL; 1647 tags->static_rqs[i] = NULL;
1567 } 1648 }
1568 } 1649 }
1569 1650
@@ -1577,33 +1658,32 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1577 kmemleak_free(page_address(page)); 1658 kmemleak_free(page_address(page));
1578 __free_pages(page, page->private); 1659 __free_pages(page, page->private);
1579 } 1660 }
1661}
1580 1662
1663void blk_mq_free_rq_map(struct blk_mq_tags *tags)
1664{
1581 kfree(tags->rqs); 1665 kfree(tags->rqs);
1666 tags->rqs = NULL;
1667 kfree(tags->static_rqs);
1668 tags->static_rqs = NULL;
1582 1669
1583 blk_mq_free_tags(tags); 1670 blk_mq_free_tags(tags);
1584} 1671}
1585 1672
1586static size_t order_to_size(unsigned int order) 1673struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
1587{ 1674 unsigned int hctx_idx,
1588 return (size_t)PAGE_SIZE << order; 1675 unsigned int nr_tags,
1589} 1676 unsigned int reserved_tags)
1590
1591static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1592 unsigned int hctx_idx)
1593{ 1677{
1594 struct blk_mq_tags *tags; 1678 struct blk_mq_tags *tags;
1595 unsigned int i, j, entries_per_page, max_order = 4;
1596 size_t rq_size, left;
1597 1679
1598 tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, 1680 tags = blk_mq_init_tags(nr_tags, reserved_tags,
1599 set->numa_node, 1681 set->numa_node,
1600 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); 1682 BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
1601 if (!tags) 1683 if (!tags)
1602 return NULL; 1684 return NULL;
1603 1685
1604 INIT_LIST_HEAD(&tags->page_list); 1686 tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
1605
1606 tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *),
1607 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, 1687 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
1608 set->numa_node); 1688 set->numa_node);
1609 if (!tags->rqs) { 1689 if (!tags->rqs) {
@@ -1611,15 +1691,40 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1611 return NULL; 1691 return NULL;
1612 } 1692 }
1613 1693
1694 tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
1695 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
1696 set->numa_node);
1697 if (!tags->static_rqs) {
1698 kfree(tags->rqs);
1699 blk_mq_free_tags(tags);
1700 return NULL;
1701 }
1702
1703 return tags;
1704}
1705
1706static size_t order_to_size(unsigned int order)
1707{
1708 return (size_t)PAGE_SIZE << order;
1709}
1710
1711int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
1712 unsigned int hctx_idx, unsigned int depth)
1713{
1714 unsigned int i, j, entries_per_page, max_order = 4;
1715 size_t rq_size, left;
1716
1717 INIT_LIST_HEAD(&tags->page_list);
1718
1614 /* 1719 /*
1615 * rq_size is the size of the request plus driver payload, rounded 1720 * rq_size is the size of the request plus driver payload, rounded
1616 * to the cacheline size 1721 * to the cacheline size
1617 */ 1722 */
1618 rq_size = round_up(sizeof(struct request) + set->cmd_size, 1723 rq_size = round_up(sizeof(struct request) + set->cmd_size,
1619 cache_line_size()); 1724 cache_line_size());
1620 left = rq_size * set->queue_depth; 1725 left = rq_size * depth;
1621 1726
1622 for (i = 0; i < set->queue_depth; ) { 1727 for (i = 0; i < depth; ) {
1623 int this_order = max_order; 1728 int this_order = max_order;
1624 struct page *page; 1729 struct page *page;
1625 int to_do; 1730 int to_do;
@@ -1653,15 +1758,17 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1653 */ 1758 */
1654 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); 1759 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO);
1655 entries_per_page = order_to_size(this_order) / rq_size; 1760 entries_per_page = order_to_size(this_order) / rq_size;
1656 to_do = min(entries_per_page, set->queue_depth - i); 1761 to_do = min(entries_per_page, depth - i);
1657 left -= to_do * rq_size; 1762 left -= to_do * rq_size;
1658 for (j = 0; j < to_do; j++) { 1763 for (j = 0; j < to_do; j++) {
1659 tags->rqs[i] = p; 1764 struct request *rq = p;
1765
1766 tags->static_rqs[i] = rq;
1660 if (set->ops->init_request) { 1767 if (set->ops->init_request) {
1661 if (set->ops->init_request(set->driver_data, 1768 if (set->ops->init_request(set->driver_data,
1662 tags->rqs[i], hctx_idx, i, 1769 rq, hctx_idx, i,
1663 set->numa_node)) { 1770 set->numa_node)) {
1664 tags->rqs[i] = NULL; 1771 tags->static_rqs[i] = NULL;
1665 goto fail; 1772 goto fail;
1666 } 1773 }
1667 } 1774 }
@@ -1670,11 +1777,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1670 i++; 1777 i++;
1671 } 1778 }
1672 } 1779 }
1673 return tags; 1780 return 0;
1674 1781
1675fail: 1782fail:
1676 blk_mq_free_rq_map(set, tags, hctx_idx); 1783 blk_mq_free_rqs(set, tags, hctx_idx);
1677 return NULL; 1784 return -ENOMEM;
1678} 1785}
1679 1786
1680/* 1787/*
@@ -1866,6 +1973,35 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
1866 } 1973 }
1867} 1974}
1868 1975
1976static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx)
1977{
1978 int ret = 0;
1979
1980 set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx,
1981 set->queue_depth, set->reserved_tags);
1982 if (!set->tags[hctx_idx])
1983 return false;
1984
1985 ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx,
1986 set->queue_depth);
1987 if (!ret)
1988 return true;
1989
1990 blk_mq_free_rq_map(set->tags[hctx_idx]);
1991 set->tags[hctx_idx] = NULL;
1992 return false;
1993}
1994
1995static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
1996 unsigned int hctx_idx)
1997{
1998 if (set->tags[hctx_idx]) {
1999 blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx);
2000 blk_mq_free_rq_map(set->tags[hctx_idx]);
2001 set->tags[hctx_idx] = NULL;
2002 }
2003}
2004
1869static void blk_mq_map_swqueue(struct request_queue *q, 2005static void blk_mq_map_swqueue(struct request_queue *q,
1870 const struct cpumask *online_mask) 2006 const struct cpumask *online_mask)
1871{ 2007{
@@ -1894,17 +2030,15 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1894 2030
1895 hctx_idx = q->mq_map[i]; 2031 hctx_idx = q->mq_map[i];
1896 /* unmapped hw queue can be remapped after CPU topo changed */ 2032 /* unmapped hw queue can be remapped after CPU topo changed */
1897 if (!set->tags[hctx_idx]) { 2033 if (!set->tags[hctx_idx] &&
1898 set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); 2034 !__blk_mq_alloc_rq_map(set, hctx_idx)) {
1899
1900 /* 2035 /*
1901 * If tags initialization fail for some hctx, 2036 * If tags initialization fail for some hctx,
1902 * that hctx won't be brought online. In this 2037 * that hctx won't be brought online. In this
1903 * case, remap the current ctx to hctx[0] which 2038 * case, remap the current ctx to hctx[0] which
1904 * is guaranteed to always have tags allocated 2039 * is guaranteed to always have tags allocated
1905 */ 2040 */
1906 if (!set->tags[hctx_idx]) 2041 q->mq_map[i] = 0;
1907 q->mq_map[i] = 0;
1908 } 2042 }
1909 2043
1910 ctx = per_cpu_ptr(q->queue_ctx, i); 2044 ctx = per_cpu_ptr(q->queue_ctx, i);
@@ -1927,10 +2061,9 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1927 * fallback in case of a new remap fails 2061 * fallback in case of a new remap fails
1928 * allocation 2062 * allocation
1929 */ 2063 */
1930 if (i && set->tags[i]) { 2064 if (i && set->tags[i])
1931 blk_mq_free_rq_map(set, set->tags[i], i); 2065 blk_mq_free_map_and_requests(set, i);
1932 set->tags[i] = NULL; 2066
1933 }
1934 hctx->tags = NULL; 2067 hctx->tags = NULL;
1935 continue; 2068 continue;
1936 } 2069 }
@@ -2023,6 +2156,8 @@ void blk_mq_release(struct request_queue *q)
2023 struct blk_mq_hw_ctx *hctx; 2156 struct blk_mq_hw_ctx *hctx;
2024 unsigned int i; 2157 unsigned int i;
2025 2158
2159 blk_mq_sched_teardown(q);
2160
2026 /* hctx kobj stays in hctx */ 2161 /* hctx kobj stays in hctx */
2027 queue_for_each_hw_ctx(q, hctx, i) { 2162 queue_for_each_hw_ctx(q, hctx, i) {
2028 if (!hctx) 2163 if (!hctx)
@@ -2097,10 +2232,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
2097 struct blk_mq_hw_ctx *hctx = hctxs[j]; 2232 struct blk_mq_hw_ctx *hctx = hctxs[j];
2098 2233
2099 if (hctx) { 2234 if (hctx) {
2100 if (hctx->tags) { 2235 if (hctx->tags)
2101 blk_mq_free_rq_map(set, hctx->tags, j); 2236 blk_mq_free_map_and_requests(set, j);
2102 set->tags[j] = NULL;
2103 }
2104 blk_mq_exit_hctx(q, set, hctx, j); 2237 blk_mq_exit_hctx(q, set, hctx, j);
2105 free_cpumask_var(hctx->cpumask); 2238 free_cpumask_var(hctx->cpumask);
2106 kobject_put(&hctx->kobj); 2239 kobject_put(&hctx->kobj);
@@ -2181,6 +2314,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
2181 mutex_unlock(&all_q_mutex); 2314 mutex_unlock(&all_q_mutex);
2182 put_online_cpus(); 2315 put_online_cpus();
2183 2316
2317 if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
2318 int ret;
2319
2320 ret = blk_mq_sched_init(q);
2321 if (ret)
2322 return ERR_PTR(ret);
2323 }
2324
2184 return q; 2325 return q;
2185 2326
2186err_hctxs: 2327err_hctxs:
@@ -2279,10 +2420,10 @@ static int blk_mq_queue_reinit_dead(unsigned int cpu)
2279 * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list 2420 * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
2280 * and set bit0 in pending bitmap as ctx1->index_hw is still zero. 2421 * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
2281 * 2422 *
2282 * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in 2423 * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
2283 * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. 2424 * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
2284 * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list 2425 * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
2285 * is ignored. 2426 * ignored.
2286 */ 2427 */
2287static int blk_mq_queue_reinit_prepare(unsigned int cpu) 2428static int blk_mq_queue_reinit_prepare(unsigned int cpu)
2288{ 2429{
@@ -2296,17 +2437,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
2296{ 2437{
2297 int i; 2438 int i;
2298 2439
2299 for (i = 0; i < set->nr_hw_queues; i++) { 2440 for (i = 0; i < set->nr_hw_queues; i++)
2300 set->tags[i] = blk_mq_init_rq_map(set, i); 2441 if (!__blk_mq_alloc_rq_map(set, i))
2301 if (!set->tags[i])
2302 goto out_unwind; 2442 goto out_unwind;
2303 }
2304 2443
2305 return 0; 2444 return 0;
2306 2445
2307out_unwind: 2446out_unwind:
2308 while (--i >= 0) 2447 while (--i >= 0)
2309 blk_mq_free_rq_map(set, set->tags[i], i); 2448 blk_mq_free_rq_map(set->tags[i]);
2310 2449
2311 return -ENOMEM; 2450 return -ENOMEM;
2312} 2451}
@@ -2430,10 +2569,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
2430{ 2569{
2431 int i; 2570 int i;
2432 2571
2433 for (i = 0; i < nr_cpu_ids; i++) { 2572 for (i = 0; i < nr_cpu_ids; i++)
2434 if (set->tags[i]) 2573 blk_mq_free_map_and_requests(set, i);
2435 blk_mq_free_rq_map(set, set->tags[i], i);
2436 }
2437 2574
2438 kfree(set->mq_map); 2575 kfree(set->mq_map);
2439 set->mq_map = NULL; 2576 set->mq_map = NULL;
@@ -2449,14 +2586,28 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
2449 struct blk_mq_hw_ctx *hctx; 2586 struct blk_mq_hw_ctx *hctx;
2450 int i, ret; 2587 int i, ret;
2451 2588
2452 if (!set || nr > set->queue_depth) 2589 if (!set)
2453 return -EINVAL; 2590 return -EINVAL;
2454 2591
2592 blk_mq_freeze_queue(q);
2593 blk_mq_quiesce_queue(q);
2594
2455 ret = 0; 2595 ret = 0;
2456 queue_for_each_hw_ctx(q, hctx, i) { 2596 queue_for_each_hw_ctx(q, hctx, i) {
2457 if (!hctx->tags) 2597 if (!hctx->tags)
2458 continue; 2598 continue;
2459 ret = blk_mq_tag_update_depth(hctx->tags, nr); 2599 /*
2600 * If we're using an MQ scheduler, just update the scheduler
2601 * queue depth. This is similar to what the old code would do.
2602 */
2603 if (!hctx->sched_tags) {
2604 ret = blk_mq_tag_update_depth(hctx, &hctx->tags,
2605 min(nr, set->queue_depth),
2606 false);
2607 } else {
2608 ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
2609 nr, true);
2610 }
2460 if (ret) 2611 if (ret)
2461 break; 2612 break;
2462 } 2613 }
@@ -2464,6 +2615,9 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
2464 if (!ret) 2615 if (!ret)
2465 q->nr_requests = nr; 2616 q->nr_requests = nr;
2466 2617
2618 blk_mq_unfreeze_queue(q);
2619 blk_mq_start_stopped_hw_queues(q, true);
2620
2467 return ret; 2621 return ret;
2468} 2622}
2469 2623
@@ -2649,7 +2803,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
2649 blk_flush_plug_list(plug, false); 2803 blk_flush_plug_list(plug, false);
2650 2804
2651 hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; 2805 hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
2652 rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); 2806 if (!blk_qc_t_is_internal(cookie))
2807 rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
2808 else
2809 rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
2653 2810
2654 return __blk_mq_poll(hctx, rq); 2811 return __blk_mq_poll(hctx, rq);
2655} 2812}
@@ -2667,6 +2824,8 @@ void blk_mq_enable_hotplug(void)
2667 2824
2668static int __init blk_mq_init(void) 2825static int __init blk_mq_init(void)
2669{ 2826{
2827 blk_mq_debugfs_init();
2828
2670 cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, 2829 cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
2671 blk_mq_hctx_notify_dead); 2830 blk_mq_hctx_notify_dead);
2672 2831
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 63e9116cddbd..b52abd62b1b0 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,8 +32,32 @@ void blk_mq_free_queue(struct request_queue *q);
32int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); 32int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
33void blk_mq_wake_waiters(struct request_queue *q); 33void blk_mq_wake_waiters(struct request_queue *q);
34bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); 34bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
35void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
36bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
37bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
38 bool wait);
35 39
36/* 40/*
41 * Internal helpers for allocating/freeing the request map
42 */
43void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
44 unsigned int hctx_idx);
45void blk_mq_free_rq_map(struct blk_mq_tags *tags);
46struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
47 unsigned int hctx_idx,
48 unsigned int nr_tags,
49 unsigned int reserved_tags);
50int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
51 unsigned int hctx_idx, unsigned int depth);
52
53/*
54 * Internal helpers for request insertion into sw queues
55 */
56void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
57 bool at_head);
58void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
59 struct list_head *list);
60/*
37 * CPU hotplug helpers 61 * CPU hotplug helpers
38 */ 62 */
39void blk_mq_enable_hotplug(void); 63void blk_mq_enable_hotplug(void);
@@ -57,6 +81,40 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
57extern void blk_mq_sysfs_unregister(struct request_queue *q); 81extern void blk_mq_sysfs_unregister(struct request_queue *q);
58extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); 82extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
59 83
84/*
85 * debugfs helpers
86 */
87#ifdef CONFIG_BLK_DEBUG_FS
88void blk_mq_debugfs_init(void);
89int blk_mq_debugfs_register(struct request_queue *q, const char *name);
90void blk_mq_debugfs_unregister(struct request_queue *q);
91int blk_mq_debugfs_register_hctxs(struct request_queue *q);
92void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
93#else
94static inline void blk_mq_debugfs_init(void)
95{
96}
97
98static inline int blk_mq_debugfs_register(struct request_queue *q,
99 const char *name)
100{
101 return 0;
102}
103
104static inline void blk_mq_debugfs_unregister(struct request_queue *q)
105{
106}
107
108static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
109{
110 return 0;
111}
112
113static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
114{
115}
116#endif
117
60extern void blk_mq_rq_timed_out(struct request *req, bool reserved); 118extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
61 119
62void blk_mq_release(struct request_queue *q); 120void blk_mq_release(struct request_queue *q);
@@ -103,6 +161,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
103 data->hctx = hctx; 161 data->hctx = hctx;
104} 162}
105 163
164static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
165{
166 if (data->flags & BLK_MQ_REQ_INTERNAL)
167 return data->hctx->sched_tags;
168
169 return data->hctx->tags;
170}
171
172/*
173 * Internal helpers for request allocation/init/free
174 */
175void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
176 struct request *rq, unsigned int op);
177void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
178 struct request *rq);
179void blk_mq_finish_request(struct request *rq);
180struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
181 unsigned int op);
182
106static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) 183static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
107{ 184{
108 return test_bit(BLK_MQ_S_STOPPED, &hctx->state); 185 return test_bit(BLK_MQ_S_STOPPED, &hctx->state);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index bae1decb6ec3..07cc329fa4b0 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
272 list_del_init(&rq->queuelist); 272 list_del_init(&rq->queuelist);
273 rq->rq_flags &= ~RQF_QUEUED; 273 rq->rq_flags &= ~RQF_QUEUED;
274 rq->tag = -1; 274 rq->tag = -1;
275 rq->internal_tag = -1;
275 276
276 if (unlikely(bqt->tag_index[tag] == NULL)) 277 if (unlikely(bqt->tag_index[tag] == NULL))
277 printk(KERN_ERR "%s: tag %d is missing\n", 278 printk(KERN_ERR "%s: tag %d is missing\n",
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a6bb4fe326c3..82fd0cc394eb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg)
866 unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; 866 unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
867 struct bio *bio; 867 struct bio *bio;
868 868
869 if ((bio = throtl_peek_queued(&sq->queued[READ]))) 869 bio = throtl_peek_queued(&sq->queued[READ]);
870 if (bio)
870 tg_may_dispatch(tg, bio, &read_wait); 871 tg_may_dispatch(tg, bio, &read_wait);
871 872
872 if ((bio = throtl_peek_queued(&sq->queued[WRITE]))) 873 bio = throtl_peek_queued(&sq->queued[WRITE]);
874 if (bio)
873 tg_may_dispatch(tg, bio, &write_wait); 875 tg_may_dispatch(tg, bio, &write_wait);
874 876
875 min_wait = min(read_wait, write_wait); 877 min_wait = min(read_wait, write_wait);
diff --git a/block/blk.h b/block/blk.h
index 041185e5f129..9a716b5925a4 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
167 return NULL; 167 return NULL;
168 } 168 }
169 if (unlikely(blk_queue_bypass(q)) || 169 if (unlikely(blk_queue_bypass(q)) ||
170 !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) 170 !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0))
171 return NULL; 171 return NULL;
172 } 172 }
173} 173}
@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq)
176{ 176{
177 struct elevator_queue *e = q->elevator; 177 struct elevator_queue *e = q->elevator;
178 178
179 if (e->type->ops.elevator_activate_req_fn) 179 if (e->type->ops.sq.elevator_activate_req_fn)
180 e->type->ops.elevator_activate_req_fn(q, rq); 180 e->type->ops.sq.elevator_activate_req_fn(q, rq);
181} 181}
182 182
183static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) 183static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq)
184{ 184{
185 struct elevator_queue *e = q->elevator; 185 struct elevator_queue *e = q->elevator;
186 186
187 if (e->type->ops.elevator_deactivate_req_fn) 187 if (e->type->ops.sq.elevator_deactivate_req_fn)
188 e->type->ops.elevator_deactivate_req_fn(q, rq); 188 e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
189} 189}
190 190
191#ifdef CONFIG_FAIL_IO_TIMEOUT 191#ifdef CONFIG_FAIL_IO_TIMEOUT
@@ -264,6 +264,22 @@ void ioc_clear_queue(struct request_queue *q);
264int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); 264int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
265 265
266/** 266/**
267 * rq_ioc - determine io_context for request allocation
268 * @bio: request being allocated is for this bio (can be %NULL)
269 *
270 * Determine io_context to use for request allocation for @bio. May return
271 * %NULL if %current->io_context doesn't exist.
272 */
273static inline struct io_context *rq_ioc(struct bio *bio)
274{
275#ifdef CONFIG_BLK_CGROUP
276 if (bio && bio->bi_ioc)
277 return bio->bi_ioc;
278#endif
279 return current->io_context;
280}
281
282/**
267 * create_io_context - try to create task->io_context 283 * create_io_context - try to create task->io_context
268 * @gfp_mask: allocation mask 284 * @gfp_mask: allocation mask
269 * @node: allocation node 285 * @node: allocation node
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c73a6fcaeb9d..f0f29ee731e1 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
2749 if (!cfqg) 2749 if (!cfqg)
2750 return NULL; 2750 return NULL;
2751 2751
2752 for_each_cfqg_st(cfqg, i, j, st) 2752 for_each_cfqg_st(cfqg, i, j, st) {
2753 if ((cfqq = cfq_rb_first(st)) != NULL) 2753 cfqq = cfq_rb_first(st);
2754 if (cfqq)
2754 return cfqq; 2755 return cfqq;
2756 }
2755 return NULL; 2757 return NULL;
2756} 2758}
2757 2759
@@ -3864,6 +3866,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
3864 goto out; 3866 goto out;
3865 } 3867 }
3866 3868
3869 /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */
3870 cfqq->ioprio_class = IOPRIO_CLASS_NONE;
3867 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); 3871 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
3868 cfq_init_prio_data(cfqq, cic); 3872 cfq_init_prio_data(cfqq, cic);
3869 cfq_link_cfqq_cfqg(cfqq, cfqg); 3873 cfq_link_cfqq_cfqg(cfqq, cfqg);
@@ -4837,7 +4841,7 @@ static struct elv_fs_entry cfq_attrs[] = {
4837}; 4841};
4838 4842
4839static struct elevator_type iosched_cfq = { 4843static struct elevator_type iosched_cfq = {
4840 .ops = { 4844 .ops.sq = {
4841 .elevator_merge_fn = cfq_merge, 4845 .elevator_merge_fn = cfq_merge,
4842 .elevator_merged_fn = cfq_merged_request, 4846 .elevator_merged_fn = cfq_merged_request,
4843 .elevator_merge_req_fn = cfq_merged_requests, 4847 .elevator_merge_req_fn = cfq_merged_requests,
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 55e0bb6d7da7..05fc0ea25a98 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = {
439}; 439};
440 440
441static struct elevator_type iosched_deadline = { 441static struct elevator_type iosched_deadline = {
442 .ops = { 442 .ops.sq = {
443 .elevator_merge_fn = deadline_merge, 443 .elevator_merge_fn = deadline_merge,
444 .elevator_merged_fn = deadline_merged_request, 444 .elevator_merged_fn = deadline_merged_request,
445 .elevator_merge_req_fn = deadline_merged_requests, 445 .elevator_merge_req_fn = deadline_merged_requests,
diff --git a/block/elevator.c b/block/elevator.c
index 40f0c04e5ad3..b2a55167f0c2 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -40,6 +40,7 @@
40#include <trace/events/block.h> 40#include <trace/events/block.h>
41 41
42#include "blk.h" 42#include "blk.h"
43#include "blk-mq-sched.h"
43 44
44static DEFINE_SPINLOCK(elv_list_lock); 45static DEFINE_SPINLOCK(elv_list_lock);
45static LIST_HEAD(elv_list); 46static LIST_HEAD(elv_list);
@@ -58,8 +59,10 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
58 struct request_queue *q = rq->q; 59 struct request_queue *q = rq->q;
59 struct elevator_queue *e = q->elevator; 60 struct elevator_queue *e = q->elevator;
60 61
61 if (e->type->ops.elevator_allow_bio_merge_fn) 62 if (e->uses_mq && e->type->ops.mq.allow_merge)
62 return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio); 63 return e->type->ops.mq.allow_merge(q, rq, bio);
64 else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
65 return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
63 66
64 return 1; 67 return 1;
65} 68}
@@ -163,6 +166,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
163 kobject_init(&eq->kobj, &elv_ktype); 166 kobject_init(&eq->kobj, &elv_ktype);
164 mutex_init(&eq->sysfs_lock); 167 mutex_init(&eq->sysfs_lock);
165 hash_init(eq->hash); 168 hash_init(eq->hash);
169 eq->uses_mq = e->uses_mq;
166 170
167 return eq; 171 return eq;
168} 172}
@@ -203,11 +207,12 @@ int elevator_init(struct request_queue *q, char *name)
203 } 207 }
204 208
205 /* 209 /*
206 * Use the default elevator specified by config boot param or 210 * Use the default elevator specified by config boot param for
207 * config option. Don't try to load modules as we could be running 211 * non-mq devices, or by config option. Don't try to load modules
208 * off async and request_module() isn't allowed from async. 212 * as we could be running off async and request_module() isn't
213 * allowed from async.
209 */ 214 */
210 if (!e && *chosen_elevator) { 215 if (!e && !q->mq_ops && *chosen_elevator) {
211 e = elevator_get(chosen_elevator, false); 216 e = elevator_get(chosen_elevator, false);
212 if (!e) 217 if (!e)
213 printk(KERN_ERR "I/O scheduler %s not found\n", 218 printk(KERN_ERR "I/O scheduler %s not found\n",
@@ -215,18 +220,32 @@ int elevator_init(struct request_queue *q, char *name)
215 } 220 }
216 221
217 if (!e) { 222 if (!e) {
218 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); 223 if (q->mq_ops && q->nr_hw_queues == 1)
224 e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
225 else if (q->mq_ops)
226 e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
227 else
228 e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
229
219 if (!e) { 230 if (!e) {
220 printk(KERN_ERR 231 printk(KERN_ERR
221 "Default I/O scheduler not found. " \ 232 "Default I/O scheduler not found. " \
222 "Using noop.\n"); 233 "Using noop/none.\n");
223 e = elevator_get("noop", false); 234 e = elevator_get("noop", false);
224 } 235 }
225 } 236 }
226 237
227 err = e->ops.elevator_init_fn(q, e); 238 if (e->uses_mq) {
228 if (err) 239 err = blk_mq_sched_setup(q);
240 if (!err)
241 err = e->ops.mq.init_sched(q, e);
242 } else
243 err = e->ops.sq.elevator_init_fn(q, e);
244 if (err) {
245 if (e->uses_mq)
246 blk_mq_sched_teardown(q);
229 elevator_put(e); 247 elevator_put(e);
248 }
230 return err; 249 return err;
231} 250}
232EXPORT_SYMBOL(elevator_init); 251EXPORT_SYMBOL(elevator_init);
@@ -234,8 +253,10 @@ EXPORT_SYMBOL(elevator_init);
234void elevator_exit(struct elevator_queue *e) 253void elevator_exit(struct elevator_queue *e)
235{ 254{
236 mutex_lock(&e->sysfs_lock); 255 mutex_lock(&e->sysfs_lock);
237 if (e->type->ops.elevator_exit_fn) 256 if (e->uses_mq && e->type->ops.mq.exit_sched)
238 e->type->ops.elevator_exit_fn(e); 257 e->type->ops.mq.exit_sched(e);
258 else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
259 e->type->ops.sq.elevator_exit_fn(e);
239 mutex_unlock(&e->sysfs_lock); 260 mutex_unlock(&e->sysfs_lock);
240 261
241 kobject_put(&e->kobj); 262 kobject_put(&e->kobj);
@@ -253,6 +274,7 @@ void elv_rqhash_del(struct request_queue *q, struct request *rq)
253 if (ELV_ON_HASH(rq)) 274 if (ELV_ON_HASH(rq))
254 __elv_rqhash_del(rq); 275 __elv_rqhash_del(rq);
255} 276}
277EXPORT_SYMBOL_GPL(elv_rqhash_del);
256 278
257void elv_rqhash_add(struct request_queue *q, struct request *rq) 279void elv_rqhash_add(struct request_queue *q, struct request *rq)
258{ 280{
@@ -262,6 +284,7 @@ void elv_rqhash_add(struct request_queue *q, struct request *rq)
262 hash_add(e->hash, &rq->hash, rq_hash_key(rq)); 284 hash_add(e->hash, &rq->hash, rq_hash_key(rq));
263 rq->rq_flags |= RQF_HASHED; 285 rq->rq_flags |= RQF_HASHED;
264} 286}
287EXPORT_SYMBOL_GPL(elv_rqhash_add);
265 288
266void elv_rqhash_reposition(struct request_queue *q, struct request *rq) 289void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
267{ 290{
@@ -443,8 +466,10 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
443 return ELEVATOR_BACK_MERGE; 466 return ELEVATOR_BACK_MERGE;
444 } 467 }
445 468
446 if (e->type->ops.elevator_merge_fn) 469 if (e->uses_mq && e->type->ops.mq.request_merge)
447 return e->type->ops.elevator_merge_fn(q, req, bio); 470 return e->type->ops.mq.request_merge(q, req, bio);
471 else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
472 return e->type->ops.sq.elevator_merge_fn(q, req, bio);
448 473
449 return ELEVATOR_NO_MERGE; 474 return ELEVATOR_NO_MERGE;
450} 475}
@@ -456,8 +481,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
456 * 481 *
457 * Returns true if we merged, false otherwise 482 * Returns true if we merged, false otherwise
458 */ 483 */
459static bool elv_attempt_insert_merge(struct request_queue *q, 484bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
460 struct request *rq)
461{ 485{
462 struct request *__rq; 486 struct request *__rq;
463 bool ret; 487 bool ret;
@@ -495,8 +519,10 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
495{ 519{
496 struct elevator_queue *e = q->elevator; 520 struct elevator_queue *e = q->elevator;
497 521
498 if (e->type->ops.elevator_merged_fn) 522 if (e->uses_mq && e->type->ops.mq.request_merged)
499 e->type->ops.elevator_merged_fn(q, rq, type); 523 e->type->ops.mq.request_merged(q, rq, type);
524 else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
525 e->type->ops.sq.elevator_merged_fn(q, rq, type);
500 526
501 if (type == ELEVATOR_BACK_MERGE) 527 if (type == ELEVATOR_BACK_MERGE)
502 elv_rqhash_reposition(q, rq); 528 elv_rqhash_reposition(q, rq);
@@ -508,10 +534,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
508 struct request *next) 534 struct request *next)
509{ 535{
510 struct elevator_queue *e = q->elevator; 536 struct elevator_queue *e = q->elevator;
511 const int next_sorted = next->rq_flags & RQF_SORTED; 537 bool next_sorted = false;
512 538
513 if (next_sorted && e->type->ops.elevator_merge_req_fn) 539 if (e->uses_mq && e->type->ops.mq.requests_merged)
514 e->type->ops.elevator_merge_req_fn(q, rq, next); 540 e->type->ops.mq.requests_merged(q, rq, next);
541 else if (e->type->ops.sq.elevator_merge_req_fn) {
542 next_sorted = next->rq_flags & RQF_SORTED;
543 if (next_sorted)
544 e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
545 }
515 546
516 elv_rqhash_reposition(q, rq); 547 elv_rqhash_reposition(q, rq);
517 548
@@ -528,8 +559,11 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
528{ 559{
529 struct elevator_queue *e = q->elevator; 560 struct elevator_queue *e = q->elevator;
530 561
531 if (e->type->ops.elevator_bio_merged_fn) 562 if (WARN_ON_ONCE(e->uses_mq))
532 e->type->ops.elevator_bio_merged_fn(q, rq, bio); 563 return;
564
565 if (e->type->ops.sq.elevator_bio_merged_fn)
566 e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
533} 567}
534 568
535#ifdef CONFIG_PM 569#ifdef CONFIG_PM
@@ -574,11 +608,15 @@ void elv_requeue_request(struct request_queue *q, struct request *rq)
574 608
575void elv_drain_elevator(struct request_queue *q) 609void elv_drain_elevator(struct request_queue *q)
576{ 610{
611 struct elevator_queue *e = q->elevator;
577 static int printed; 612 static int printed;
578 613
614 if (WARN_ON_ONCE(e->uses_mq))
615 return;
616
579 lockdep_assert_held(q->queue_lock); 617 lockdep_assert_held(q->queue_lock);
580 618
581 while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) 619 while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
582 ; 620 ;
583 if (q->nr_sorted && printed++ < 10) { 621 if (q->nr_sorted && printed++ < 10) {
584 printk(KERN_ERR "%s: forced dispatching is broken " 622 printk(KERN_ERR "%s: forced dispatching is broken "
@@ -653,7 +691,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
653 * rq cannot be accessed after calling 691 * rq cannot be accessed after calling
654 * elevator_add_req_fn. 692 * elevator_add_req_fn.
655 */ 693 */
656 q->elevator->type->ops.elevator_add_req_fn(q, rq); 694 q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
657 break; 695 break;
658 696
659 case ELEVATOR_INSERT_FLUSH: 697 case ELEVATOR_INSERT_FLUSH:
@@ -682,8 +720,11 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
682{ 720{
683 struct elevator_queue *e = q->elevator; 721 struct elevator_queue *e = q->elevator;
684 722
685 if (e->type->ops.elevator_latter_req_fn) 723 if (e->uses_mq && e->type->ops.mq.next_request)
686 return e->type->ops.elevator_latter_req_fn(q, rq); 724 return e->type->ops.mq.next_request(q, rq);
725 else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
726 return e->type->ops.sq.elevator_latter_req_fn(q, rq);
727
687 return NULL; 728 return NULL;
688} 729}
689 730
@@ -691,8 +732,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
691{ 732{
692 struct elevator_queue *e = q->elevator; 733 struct elevator_queue *e = q->elevator;
693 734
694 if (e->type->ops.elevator_former_req_fn) 735 if (e->uses_mq && e->type->ops.mq.former_request)
695 return e->type->ops.elevator_former_req_fn(q, rq); 736 return e->type->ops.mq.former_request(q, rq);
737 if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
738 return e->type->ops.sq.elevator_former_req_fn(q, rq);
696 return NULL; 739 return NULL;
697} 740}
698 741
@@ -701,8 +744,11 @@ int elv_set_request(struct request_queue *q, struct request *rq,
701{ 744{
702 struct elevator_queue *e = q->elevator; 745 struct elevator_queue *e = q->elevator;
703 746
704 if (e->type->ops.elevator_set_req_fn) 747 if (WARN_ON_ONCE(e->uses_mq))
705 return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask); 748 return 0;
749
750 if (e->type->ops.sq.elevator_set_req_fn)
751 return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
706 return 0; 752 return 0;
707} 753}
708 754
@@ -710,16 +756,22 @@ void elv_put_request(struct request_queue *q, struct request *rq)
710{ 756{
711 struct elevator_queue *e = q->elevator; 757 struct elevator_queue *e = q->elevator;
712 758
713 if (e->type->ops.elevator_put_req_fn) 759 if (WARN_ON_ONCE(e->uses_mq))
714 e->type->ops.elevator_put_req_fn(rq); 760 return;
761
762 if (e->type->ops.sq.elevator_put_req_fn)
763 e->type->ops.sq.elevator_put_req_fn(rq);
715} 764}
716 765
717int elv_may_queue(struct request_queue *q, unsigned int op) 766int elv_may_queue(struct request_queue *q, unsigned int op)
718{ 767{
719 struct elevator_queue *e = q->elevator; 768 struct elevator_queue *e = q->elevator;
720 769
721 if (e->type->ops.elevator_may_queue_fn) 770 if (WARN_ON_ONCE(e->uses_mq))
722 return e->type->ops.elevator_may_queue_fn(q, op); 771 return 0;
772
773 if (e->type->ops.sq.elevator_may_queue_fn)
774 return e->type->ops.sq.elevator_may_queue_fn(q, op);
723 775
724 return ELV_MQUEUE_MAY; 776 return ELV_MQUEUE_MAY;
725} 777}
@@ -728,14 +780,17 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
728{ 780{
729 struct elevator_queue *e = q->elevator; 781 struct elevator_queue *e = q->elevator;
730 782
783 if (WARN_ON_ONCE(e->uses_mq))
784 return;
785
731 /* 786 /*
732 * request is released from the driver, io must be done 787 * request is released from the driver, io must be done
733 */ 788 */
734 if (blk_account_rq(rq)) { 789 if (blk_account_rq(rq)) {
735 q->in_flight[rq_is_sync(rq)]--; 790 q->in_flight[rq_is_sync(rq)]--;
736 if ((rq->rq_flags & RQF_SORTED) && 791 if ((rq->rq_flags & RQF_SORTED) &&
737 e->type->ops.elevator_completed_req_fn) 792 e->type->ops.sq.elevator_completed_req_fn)
738 e->type->ops.elevator_completed_req_fn(q, rq); 793 e->type->ops.sq.elevator_completed_req_fn(q, rq);
739 } 794 }
740} 795}
741 796
@@ -803,8 +858,8 @@ int elv_register_queue(struct request_queue *q)
803 } 858 }
804 kobject_uevent(&e->kobj, KOBJ_ADD); 859 kobject_uevent(&e->kobj, KOBJ_ADD);
805 e->registered = 1; 860 e->registered = 1;
806 if (e->type->ops.elevator_registered_fn) 861 if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
807 e->type->ops.elevator_registered_fn(q); 862 e->type->ops.sq.elevator_registered_fn(q);
808 } 863 }
809 return error; 864 return error;
810} 865}
@@ -891,9 +946,14 @@ EXPORT_SYMBOL_GPL(elv_unregister);
891static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 946static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
892{ 947{
893 struct elevator_queue *old = q->elevator; 948 struct elevator_queue *old = q->elevator;
894 bool registered = old->registered; 949 bool old_registered = false;
895 int err; 950 int err;
896 951
952 if (q->mq_ops) {
953 blk_mq_freeze_queue(q);
954 blk_mq_quiesce_queue(q);
955 }
956
897 /* 957 /*
898 * Turn on BYPASS and drain all requests w/ elevator private data. 958 * Turn on BYPASS and drain all requests w/ elevator private data.
899 * Block layer doesn't call into a quiesced elevator - all requests 959 * Block layer doesn't call into a quiesced elevator - all requests
@@ -901,42 +961,76 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
901 * using INSERT_BACK. All requests have SOFTBARRIER set and no 961 * using INSERT_BACK. All requests have SOFTBARRIER set and no
902 * merge happens either. 962 * merge happens either.
903 */ 963 */
904 blk_queue_bypass_start(q); 964 if (old) {
965 old_registered = old->registered;
905 966
906 /* unregister and clear all auxiliary data of the old elevator */ 967 if (old->uses_mq)
907 if (registered) 968 blk_mq_sched_teardown(q);
908 elv_unregister_queue(q);
909 969
910 spin_lock_irq(q->queue_lock); 970 if (!q->mq_ops)
911 ioc_clear_queue(q); 971 blk_queue_bypass_start(q);
912 spin_unlock_irq(q->queue_lock); 972
973 /* unregister and clear all auxiliary data of the old elevator */
974 if (old_registered)
975 elv_unregister_queue(q);
976
977 spin_lock_irq(q->queue_lock);
978 ioc_clear_queue(q);
979 spin_unlock_irq(q->queue_lock);
980 }
913 981
914 /* allocate, init and register new elevator */ 982 /* allocate, init and register new elevator */
915 err = new_e->ops.elevator_init_fn(q, new_e); 983 if (new_e) {
916 if (err) 984 if (new_e->uses_mq) {
917 goto fail_init; 985 err = blk_mq_sched_setup(q);
986 if (!err)
987 err = new_e->ops.mq.init_sched(q, new_e);
988 } else
989 err = new_e->ops.sq.elevator_init_fn(q, new_e);
990 if (err)
991 goto fail_init;
918 992
919 if (registered) {
920 err = elv_register_queue(q); 993 err = elv_register_queue(q);
921 if (err) 994 if (err)
922 goto fail_register; 995 goto fail_register;
923 } 996 } else
997 q->elevator = NULL;
924 998
925 /* done, kill the old one and finish */ 999 /* done, kill the old one and finish */
926 elevator_exit(old); 1000 if (old) {
927 blk_queue_bypass_end(q); 1001 elevator_exit(old);
1002 if (!q->mq_ops)
1003 blk_queue_bypass_end(q);
1004 }
928 1005
929 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); 1006 if (q->mq_ops) {
1007 blk_mq_unfreeze_queue(q);
1008 blk_mq_start_stopped_hw_queues(q, true);
1009 }
1010
1011 if (new_e)
1012 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1013 else
1014 blk_add_trace_msg(q, "elv switch: none");
930 1015
931 return 0; 1016 return 0;
932 1017
933fail_register: 1018fail_register:
1019 if (q->mq_ops)
1020 blk_mq_sched_teardown(q);
934 elevator_exit(q->elevator); 1021 elevator_exit(q->elevator);
935fail_init: 1022fail_init:
936 /* switch failed, restore and re-register old elevator */ 1023 /* switch failed, restore and re-register old elevator */
937 q->elevator = old; 1024 if (old) {
938 elv_register_queue(q); 1025 q->elevator = old;
939 blk_queue_bypass_end(q); 1026 elv_register_queue(q);
1027 if (!q->mq_ops)
1028 blk_queue_bypass_end(q);
1029 }
1030 if (q->mq_ops) {
1031 blk_mq_unfreeze_queue(q);
1032 blk_mq_start_stopped_hw_queues(q, true);
1033 }
940 1034
941 return err; 1035 return err;
942} 1036}
@@ -949,8 +1043,11 @@ static int __elevator_change(struct request_queue *q, const char *name)
949 char elevator_name[ELV_NAME_MAX]; 1043 char elevator_name[ELV_NAME_MAX];
950 struct elevator_type *e; 1044 struct elevator_type *e;
951 1045
952 if (!q->elevator) 1046 /*
953 return -ENXIO; 1047 * Special case for mq, turn off scheduling
1048 */
1049 if (q->mq_ops && !strncmp(name, "none", 4))
1050 return elevator_switch(q, NULL);
954 1051
955 strlcpy(elevator_name, name, sizeof(elevator_name)); 1052 strlcpy(elevator_name, name, sizeof(elevator_name));
956 e = elevator_get(strstrip(elevator_name), true); 1053 e = elevator_get(strstrip(elevator_name), true);
@@ -959,11 +1056,21 @@ static int __elevator_change(struct request_queue *q, const char *name)
959 return -EINVAL; 1056 return -EINVAL;
960 } 1057 }
961 1058
962 if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { 1059 if (q->elevator &&
1060 !strcmp(elevator_name, q->elevator->type->elevator_name)) {
963 elevator_put(e); 1061 elevator_put(e);
964 return 0; 1062 return 0;
965 } 1063 }
966 1064
1065 if (!e->uses_mq && q->mq_ops) {
1066 elevator_put(e);
1067 return -EINVAL;
1068 }
1069 if (e->uses_mq && !q->mq_ops) {
1070 elevator_put(e);
1071 return -EINVAL;
1072 }
1073
967 return elevator_switch(q, e); 1074 return elevator_switch(q, e);
968} 1075}
969 1076
@@ -985,7 +1092,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
985{ 1092{
986 int ret; 1093 int ret;
987 1094
988 if (!q->elevator) 1095 if (!(q->mq_ops || q->request_fn))
989 return count; 1096 return count;
990 1097
991 ret = __elevator_change(q, name); 1098 ret = __elevator_change(q, name);
@@ -999,24 +1106,34 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
999ssize_t elv_iosched_show(struct request_queue *q, char *name) 1106ssize_t elv_iosched_show(struct request_queue *q, char *name)
1000{ 1107{
1001 struct elevator_queue *e = q->elevator; 1108 struct elevator_queue *e = q->elevator;
1002 struct elevator_type *elv; 1109 struct elevator_type *elv = NULL;
1003 struct elevator_type *__e; 1110 struct elevator_type *__e;
1004 int len = 0; 1111 int len = 0;
1005 1112
1006 if (!q->elevator || !blk_queue_stackable(q)) 1113 if (!blk_queue_stackable(q))
1007 return sprintf(name, "none\n"); 1114 return sprintf(name, "none\n");
1008 1115
1009 elv = e->type; 1116 if (!q->elevator)
1117 len += sprintf(name+len, "[none] ");
1118 else
1119 elv = e->type;
1010 1120
1011 spin_lock(&elv_list_lock); 1121 spin_lock(&elv_list_lock);
1012 list_for_each_entry(__e, &elv_list, list) { 1122 list_for_each_entry(__e, &elv_list, list) {
1013 if (!strcmp(elv->elevator_name, __e->elevator_name)) 1123 if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) {
1014 len += sprintf(name+len, "[%s] ", elv->elevator_name); 1124 len += sprintf(name+len, "[%s] ", elv->elevator_name);
1015 else 1125 continue;
1126 }
1127 if (__e->uses_mq && q->mq_ops)
1128 len += sprintf(name+len, "%s ", __e->elevator_name);
1129 else if (!__e->uses_mq && !q->mq_ops)
1016 len += sprintf(name+len, "%s ", __e->elevator_name); 1130 len += sprintf(name+len, "%s ", __e->elevator_name);
1017 } 1131 }
1018 spin_unlock(&elv_list_lock); 1132 spin_unlock(&elv_list_lock);
1019 1133
1134 if (q->mq_ops && q->elevator)
1135 len += sprintf(name+len, "none");
1136
1020 len += sprintf(len+name, "\n"); 1137 len += sprintf(len+name, "\n");
1021 return len; 1138 return len;
1022} 1139}
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
new file mode 100644
index 000000000000..d93ec713fa62
--- /dev/null
+++ b/block/mq-deadline.c
@@ -0,0 +1,555 @@
1/*
2 * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler,
3 * for the blk-mq scheduling framework
4 *
5 * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk>
6 */
7#include <linux/kernel.h>
8#include <linux/fs.h>
9#include <linux/blkdev.h>
10#include <linux/blk-mq.h>
11#include <linux/elevator.h>
12#include <linux/bio.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/init.h>
16#include <linux/compiler.h>
17#include <linux/rbtree.h>
18#include <linux/sbitmap.h>
19
20#include "blk.h"
21#include "blk-mq.h"
22#include "blk-mq-tag.h"
23#include "blk-mq-sched.h"
24
25/*
26 * See Documentation/block/deadline-iosched.txt
27 */
28static const int read_expire = HZ / 2; /* max time before a read is submitted. */
29static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
30static const int writes_starved = 2; /* max times reads can starve a write */
31static const int fifo_batch = 16; /* # of sequential requests treated as one
32 by the above parameters. For throughput. */
33
34struct deadline_data {
35 /*
36 * run time data
37 */
38
39 /*
40 * requests (deadline_rq s) are present on both sort_list and fifo_list
41 */
42 struct rb_root sort_list[2];
43 struct list_head fifo_list[2];
44
45 /*
46 * next in sort order. read, write or both are NULL
47 */
48 struct request *next_rq[2];
49 unsigned int batching; /* number of sequential requests made */
50 unsigned int starved; /* times reads have starved writes */
51
52 /*
53 * settings that change how the i/o scheduler behaves
54 */
55 int fifo_expire[2];
56 int fifo_batch;
57 int writes_starved;
58 int front_merges;
59
60 spinlock_t lock;
61 struct list_head dispatch;
62};
63
64static inline struct rb_root *
65deadline_rb_root(struct deadline_data *dd, struct request *rq)
66{
67 return &dd->sort_list[rq_data_dir(rq)];
68}
69
70/*
71 * get the request after `rq' in sector-sorted order
72 */
73static inline struct request *
74deadline_latter_request(struct request *rq)
75{
76 struct rb_node *node = rb_next(&rq->rb_node);
77
78 if (node)
79 return rb_entry_rq(node);
80
81 return NULL;
82}
83
84static void
85deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
86{
87 struct rb_root *root = deadline_rb_root(dd, rq);
88
89 elv_rb_add(root, rq);
90}
91
92static inline void
93deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
94{
95 const int data_dir = rq_data_dir(rq);
96
97 if (dd->next_rq[data_dir] == rq)
98 dd->next_rq[data_dir] = deadline_latter_request(rq);
99
100 elv_rb_del(deadline_rb_root(dd, rq), rq);
101}
102
103/*
104 * remove rq from rbtree and fifo.
105 */
106static void deadline_remove_request(struct request_queue *q, struct request *rq)
107{
108 struct deadline_data *dd = q->elevator->elevator_data;
109
110 list_del_init(&rq->queuelist);
111
112 /*
113 * We might not be on the rbtree, if we are doing an insert merge
114 */
115 if (!RB_EMPTY_NODE(&rq->rb_node))
116 deadline_del_rq_rb(dd, rq);
117
118 elv_rqhash_del(q, rq);
119 if (q->last_merge == rq)
120 q->last_merge = NULL;
121}
122
123static void dd_request_merged(struct request_queue *q, struct request *req,
124 int type)
125{
126 struct deadline_data *dd = q->elevator->elevator_data;
127
128 /*
129 * if the merge was a front merge, we need to reposition request
130 */
131 if (type == ELEVATOR_FRONT_MERGE) {
132 elv_rb_del(deadline_rb_root(dd, req), req);
133 deadline_add_rq_rb(dd, req);
134 }
135}
136
137static void dd_merged_requests(struct request_queue *q, struct request *req,
138 struct request *next)
139{
140 /*
141 * if next expires before rq, assign its expire time to rq
142 * and move into next position (next will be deleted) in fifo
143 */
144 if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
145 if (time_before((unsigned long)next->fifo_time,
146 (unsigned long)req->fifo_time)) {
147 list_move(&req->queuelist, &next->queuelist);
148 req->fifo_time = next->fifo_time;
149 }
150 }
151
152 /*
153 * kill knowledge of next, this one is a goner
154 */
155 deadline_remove_request(q, next);
156}
157
158/*
159 * move an entry to dispatch queue
160 */
161static void
162deadline_move_request(struct deadline_data *dd, struct request *rq)
163{
164 const int data_dir = rq_data_dir(rq);
165
166 dd->next_rq[READ] = NULL;
167 dd->next_rq[WRITE] = NULL;
168 dd->next_rq[data_dir] = deadline_latter_request(rq);
169
170 /*
171 * take it off the sort and fifo list
172 */
173 deadline_remove_request(rq->q, rq);
174}
175
176/*
177 * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
178 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
179 */
180static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
181{
182 struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
183
184 /*
185 * rq is expired!
186 */
187 if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
188 return 1;
189
190 return 0;
191}
192
193/*
194 * deadline_dispatch_requests selects the best request according to
195 * read/write expire, fifo_batch, etc
196 */
197static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
198{
199 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
200 struct request *rq;
201 bool reads, writes;
202 int data_dir;
203
204 if (!list_empty(&dd->dispatch)) {
205 rq = list_first_entry(&dd->dispatch, struct request, queuelist);
206 list_del_init(&rq->queuelist);
207 goto done;
208 }
209
210 reads = !list_empty(&dd->fifo_list[READ]);
211 writes = !list_empty(&dd->fifo_list[WRITE]);
212
213 /*
214 * batches are currently reads XOR writes
215 */
216 if (dd->next_rq[WRITE])
217 rq = dd->next_rq[WRITE];
218 else
219 rq = dd->next_rq[READ];
220
221 if (rq && dd->batching < dd->fifo_batch)
222 /* we have a next request are still entitled to batch */
223 goto dispatch_request;
224
225 /*
226 * at this point we are not running a batch. select the appropriate
227 * data direction (read / write)
228 */
229
230 if (reads) {
231 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
232
233 if (writes && (dd->starved++ >= dd->writes_starved))
234 goto dispatch_writes;
235
236 data_dir = READ;
237
238 goto dispatch_find_request;
239 }
240
241 /*
242 * there are either no reads or writes have been starved
243 */
244
245 if (writes) {
246dispatch_writes:
247 BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
248
249 dd->starved = 0;
250
251 data_dir = WRITE;
252
253 goto dispatch_find_request;
254 }
255
256 return NULL;
257
258dispatch_find_request:
259 /*
260 * we are not running a batch, find best request for selected data_dir
261 */
262 if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) {
263 /*
264 * A deadline has expired, the last request was in the other
265 * direction, or we have run out of higher-sectored requests.
266 * Start again from the request with the earliest expiry time.
267 */
268 rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
269 } else {
270 /*
271 * The last req was the same dir and we have a next request in
272 * sort order. No expired requests so continue on from here.
273 */
274 rq = dd->next_rq[data_dir];
275 }
276
277 dd->batching = 0;
278
279dispatch_request:
280 /*
281 * rq is the selected appropriate request.
282 */
283 dd->batching++;
284 deadline_move_request(dd, rq);
285done:
286 rq->rq_flags |= RQF_STARTED;
287 return rq;
288}
289
290static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
291{
292 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
293 struct request *rq;
294
295 spin_lock(&dd->lock);
296 rq = __dd_dispatch_request(hctx);
297 spin_unlock(&dd->lock);
298
299 return rq;
300}
301
302static void dd_exit_queue(struct elevator_queue *e)
303{
304 struct deadline_data *dd = e->elevator_data;
305
306 BUG_ON(!list_empty(&dd->fifo_list[READ]));
307 BUG_ON(!list_empty(&dd->fifo_list[WRITE]));
308
309 kfree(dd);
310}
311
312/*
313 * initialize elevator private data (deadline_data).
314 */
315static int dd_init_queue(struct request_queue *q, struct elevator_type *e)
316{
317 struct deadline_data *dd;
318 struct elevator_queue *eq;
319
320 eq = elevator_alloc(q, e);
321 if (!eq)
322 return -ENOMEM;
323
324 dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node);
325 if (!dd) {
326 kobject_put(&eq->kobj);
327 return -ENOMEM;
328 }
329 eq->elevator_data = dd;
330
331 INIT_LIST_HEAD(&dd->fifo_list[READ]);
332 INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
333 dd->sort_list[READ] = RB_ROOT;
334 dd->sort_list[WRITE] = RB_ROOT;
335 dd->fifo_expire[READ] = read_expire;
336 dd->fifo_expire[WRITE] = write_expire;
337 dd->writes_starved = writes_starved;
338 dd->front_merges = 1;
339 dd->fifo_batch = fifo_batch;
340 spin_lock_init(&dd->lock);
341 INIT_LIST_HEAD(&dd->dispatch);
342
343 q->elevator = eq;
344 return 0;
345}
346
347static int dd_request_merge(struct request_queue *q, struct request **rq,
348 struct bio *bio)
349{
350 struct deadline_data *dd = q->elevator->elevator_data;
351 sector_t sector = bio_end_sector(bio);
352 struct request *__rq;
353
354 if (!dd->front_merges)
355 return ELEVATOR_NO_MERGE;
356
357 __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
358 if (__rq) {
359 BUG_ON(sector != blk_rq_pos(__rq));
360
361 if (elv_bio_merge_ok(__rq, bio)) {
362 *rq = __rq;
363 return ELEVATOR_FRONT_MERGE;
364 }
365 }
366
367 return ELEVATOR_NO_MERGE;
368}
369
370static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
371{
372 struct request_queue *q = hctx->queue;
373 struct deadline_data *dd = q->elevator->elevator_data;
374 int ret;
375
376 spin_lock(&dd->lock);
377 ret = blk_mq_sched_try_merge(q, bio);
378 spin_unlock(&dd->lock);
379
380 return ret;
381}
382
383/*
384 * add rq to rbtree and fifo
385 */
386static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
387 bool at_head)
388{
389 struct request_queue *q = hctx->queue;
390 struct deadline_data *dd = q->elevator->elevator_data;
391 const int data_dir = rq_data_dir(rq);
392
393 if (blk_mq_sched_try_insert_merge(q, rq))
394 return;
395
396 blk_mq_sched_request_inserted(rq);
397
398 if (blk_mq_sched_bypass_insert(hctx, rq))
399 return;
400
401 if (at_head || rq->cmd_type != REQ_TYPE_FS) {
402 if (at_head)
403 list_add(&rq->queuelist, &dd->dispatch);
404 else
405 list_add_tail(&rq->queuelist, &dd->dispatch);
406 } else {
407 deadline_add_rq_rb(dd, rq);
408
409 if (rq_mergeable(rq)) {
410 elv_rqhash_add(q, rq);
411 if (!q->last_merge)
412 q->last_merge = rq;
413 }
414
415 /*
416 * set expire time and add to fifo list
417 */
418 rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
419 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
420 }
421}
422
423static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
424 struct list_head *list, bool at_head)
425{
426 struct request_queue *q = hctx->queue;
427 struct deadline_data *dd = q->elevator->elevator_data;
428
429 spin_lock(&dd->lock);
430 while (!list_empty(list)) {
431 struct request *rq;
432
433 rq = list_first_entry(list, struct request, queuelist);
434 list_del_init(&rq->queuelist);
435 dd_insert_request(hctx, rq, at_head);
436 }
437 spin_unlock(&dd->lock);
438}
439
440static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
441{
442 struct deadline_data *dd = hctx->queue->elevator->elevator_data;
443
444 return !list_empty_careful(&dd->dispatch) ||
445 !list_empty_careful(&dd->fifo_list[0]) ||
446 !list_empty_careful(&dd->fifo_list[1]);
447}
448
449/*
450 * sysfs parts below
451 */
452static ssize_t
453deadline_var_show(int var, char *page)
454{
455 return sprintf(page, "%d\n", var);
456}
457
458static ssize_t
459deadline_var_store(int *var, const char *page, size_t count)
460{
461 char *p = (char *) page;
462
463 *var = simple_strtol(p, &p, 10);
464 return count;
465}
466
467#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
468static ssize_t __FUNC(struct elevator_queue *e, char *page) \
469{ \
470 struct deadline_data *dd = e->elevator_data; \
471 int __data = __VAR; \
472 if (__CONV) \
473 __data = jiffies_to_msecs(__data); \
474 return deadline_var_show(__data, (page)); \
475}
476SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
477SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
478SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
479SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
480SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
481#undef SHOW_FUNCTION
482
483#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
484static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
485{ \
486 struct deadline_data *dd = e->elevator_data; \
487 int __data; \
488 int ret = deadline_var_store(&__data, (page), count); \
489 if (__data < (MIN)) \
490 __data = (MIN); \
491 else if (__data > (MAX)) \
492 __data = (MAX); \
493 if (__CONV) \
494 *(__PTR) = msecs_to_jiffies(__data); \
495 else \
496 *(__PTR) = __data; \
497 return ret; \
498}
499STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
500STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
501STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
502STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
503STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
504#undef STORE_FUNCTION
505
506#define DD_ATTR(name) \
507 __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
508 deadline_##name##_store)
509
510static struct elv_fs_entry deadline_attrs[] = {
511 DD_ATTR(read_expire),
512 DD_ATTR(write_expire),
513 DD_ATTR(writes_starved),
514 DD_ATTR(front_merges),
515 DD_ATTR(fifo_batch),
516 __ATTR_NULL
517};
518
519static struct elevator_type mq_deadline = {
520 .ops.mq = {
521 .insert_requests = dd_insert_requests,
522 .dispatch_request = dd_dispatch_request,
523 .next_request = elv_rb_latter_request,
524 .former_request = elv_rb_former_request,
525 .bio_merge = dd_bio_merge,
526 .request_merge = dd_request_merge,
527 .requests_merged = dd_merged_requests,
528 .request_merged = dd_request_merged,
529 .has_work = dd_has_work,
530 .init_sched = dd_init_queue,
531 .exit_sched = dd_exit_queue,
532 },
533
534 .uses_mq = true,
535 .elevator_attrs = deadline_attrs,
536 .elevator_name = "mq-deadline",
537 .elevator_owner = THIS_MODULE,
538};
539
540static int __init deadline_init(void)
541{
542 return elv_register(&mq_deadline);
543}
544
545static void __exit deadline_exit(void)
546{
547 elv_unregister(&mq_deadline);
548}
549
550module_init(deadline_init);
551module_exit(deadline_exit);
552
553MODULE_AUTHOR("Jens Axboe");
554MODULE_LICENSE("GPL");
555MODULE_DESCRIPTION("MQ deadline IO scheduler");
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index a163c487cf38..2d1b15d89b45 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e)
92} 92}
93 93
94static struct elevator_type elevator_noop = { 94static struct elevator_type elevator_noop = {
95 .ops = { 95 .ops.sq = {
96 .elevator_merge_req_fn = noop_merged_requests, 96 .elevator_merge_req_fn = noop_merged_requests,
97 .elevator_dispatch_fn = noop_dispatch, 97 .elevator_dispatch_fn = noop_dispatch,
98 .elevator_add_req_fn = noop_add_request, 98 .elevator_add_req_fn = noop_add_request,
diff --git a/block/opal_proto.h b/block/opal_proto.h
new file mode 100644
index 000000000000..f40c9acf8895
--- /dev/null
+++ b/block/opal_proto.h
@@ -0,0 +1,452 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Rafael Antognolli <rafael.antognolli@intel.com>
6 * Scott Bauer <scott.bauer@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17#include <linux/types.h>
18
19#ifndef _OPAL_PROTO_H
20#define _OPAL_PROTO_H
21
22/*
23 * These constant values come from:
24 * SPC-4 section
25 * 6.30 SECURITY PROTOCOL IN command / table 265.
26 */
27enum {
28 TCG_SECP_00 = 0,
29 TCG_SECP_01,
30};
31
32/*
33 * Token defs derived from:
34 * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
35 * 3.2.2 Data Stream Encoding
36 */
37enum opal_response_token {
38 OPAL_DTA_TOKENID_BYTESTRING = 0xe0,
39 OPAL_DTA_TOKENID_SINT = 0xe1,
40 OPAL_DTA_TOKENID_UINT = 0xe2,
41 OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */
42 OPAL_DTA_TOKENID_INVALID = 0X0
43};
44
45#define DTAERROR_NO_METHOD_STATUS 0x89
46#define GENERIC_HOST_SESSION_NUM 0x41
47
48#define TPER_SYNC_SUPPORTED 0x01
49
50#define TINY_ATOM_DATA_MASK 0x3F
51#define TINY_ATOM_SIGNED 0x40
52
53#define SHORT_ATOM_ID 0x80
54#define SHORT_ATOM_BYTESTRING 0x20
55#define SHORT_ATOM_SIGNED 0x10
56#define SHORT_ATOM_LEN_MASK 0xF
57
58#define MEDIUM_ATOM_ID 0xC0
59#define MEDIUM_ATOM_BYTESTRING 0x10
60#define MEDIUM_ATOM_SIGNED 0x8
61#define MEDIUM_ATOM_LEN_MASK 0x7
62
63#define LONG_ATOM_ID 0xe0
64#define LONG_ATOM_BYTESTRING 0x2
65#define LONG_ATOM_SIGNED 0x1
66
67/* Derived from TCG Core spec 2.01 Section:
68 * 3.2.2.1
69 * Data Type
70 */
71#define TINY_ATOM_BYTE 0x7F
72#define SHORT_ATOM_BYTE 0xBF
73#define MEDIUM_ATOM_BYTE 0xDF
74#define LONG_ATOM_BYTE 0xE3
75
76#define OPAL_INVAL_PARAM 12
77#define OPAL_MANUFACTURED_INACTIVE 0x08
78#define OPAL_DISCOVERY_COMID 0x0001
79
80#define LOCKING_RANGE_NON_GLOBAL 0x03
81/*
82 * User IDs used in the TCG storage SSCs
83 * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
84 * Section: 6.3 Assigned UIDs
85 */
86#define OPAL_UID_LENGTH 8
87#define OPAL_METHOD_LENGTH 8
88#define OPAL_MSID_KEYLEN 15
89#define OPAL_UID_LENGTH_HALF 4
90
91/* Enum to index OPALUID array */
92enum opal_uid {
93 /* users */
94 OPAL_SMUID_UID,
95 OPAL_THISSP_UID,
96 OPAL_ADMINSP_UID,
97 OPAL_LOCKINGSP_UID,
98 OPAL_ENTERPRISE_LOCKINGSP_UID,
99 OPAL_ANYBODY_UID,
100 OPAL_SID_UID,
101 OPAL_ADMIN1_UID,
102 OPAL_USER1_UID,
103 OPAL_USER2_UID,
104 OPAL_PSID_UID,
105 OPAL_ENTERPRISE_BANDMASTER0_UID,
106 OPAL_ENTERPRISE_ERASEMASTER_UID,
107 /* tables */
108 OPAL_LOCKINGRANGE_GLOBAL,
109 OPAL_LOCKINGRANGE_ACE_RDLOCKED,
110 OPAL_LOCKINGRANGE_ACE_WRLOCKED,
111 OPAL_MBRCONTROL,
112 OPAL_MBR,
113 OPAL_AUTHORITY_TABLE,
114 OPAL_C_PIN_TABLE,
115 OPAL_LOCKING_INFO_TABLE,
116 OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
117 /* C_PIN_TABLE object ID's */
118 OPAL_C_PIN_MSID,
119 OPAL_C_PIN_SID,
120 OPAL_C_PIN_ADMIN1,
121 /* half UID's (only first 4 bytes used) */
122 OPAL_HALF_UID_AUTHORITY_OBJ_REF,
123 OPAL_HALF_UID_BOOLEAN_ACE,
124 /* omitted optional parameter */
125 OPAL_UID_HEXFF,
126};
127
128#define OPAL_METHOD_LENGTH 8
129
130/* Enum for indexing the OPALMETHOD array */
131enum opal_method {
132 OPAL_PROPERTIES,
133 OPAL_STARTSESSION,
134 OPAL_REVERT,
135 OPAL_ACTIVATE,
136 OPAL_EGET,
137 OPAL_ESET,
138 OPAL_NEXT,
139 OPAL_EAUTHENTICATE,
140 OPAL_GETACL,
141 OPAL_GENKEY,
142 OPAL_REVERTSP,
143 OPAL_GET,
144 OPAL_SET,
145 OPAL_AUTHENTICATE,
146 OPAL_RANDOM,
147 OPAL_ERASE,
148};
149
150enum opal_token {
151 /* Boolean */
152 OPAL_TRUE = 0x01,
153 OPAL_FALSE = 0x00,
154 OPAL_BOOLEAN_EXPR = 0x03,
155 /* cellblocks */
156 OPAL_TABLE = 0x00,
157 OPAL_STARTROW = 0x01,
158 OPAL_ENDROW = 0x02,
159 OPAL_STARTCOLUMN = 0x03,
160 OPAL_ENDCOLUMN = 0x04,
161 OPAL_VALUES = 0x01,
162 /* authority table */
163 OPAL_PIN = 0x03,
164 /* locking tokens */
165 OPAL_RANGESTART = 0x03,
166 OPAL_RANGELENGTH = 0x04,
167 OPAL_READLOCKENABLED = 0x05,
168 OPAL_WRITELOCKENABLED = 0x06,
169 OPAL_READLOCKED = 0x07,
170 OPAL_WRITELOCKED = 0x08,
171 OPAL_ACTIVEKEY = 0x0A,
172 /* locking info table */
173 OPAL_MAXRANGES = 0x04,
174 /* mbr control */
175 OPAL_MBRENABLE = 0x01,
176 OPAL_MBRDONE = 0x02,
177 /* properties */
178 OPAL_HOSTPROPERTIES = 0x00,
179 /* atoms */
180 OPAL_STARTLIST = 0xf0,
181 OPAL_ENDLIST = 0xf1,
182 OPAL_STARTNAME = 0xf2,
183 OPAL_ENDNAME = 0xf3,
184 OPAL_CALL = 0xf8,
185 OPAL_ENDOFDATA = 0xf9,
186 OPAL_ENDOFSESSION = 0xfa,
187 OPAL_STARTTRANSACTON = 0xfb,
188 OPAL_ENDTRANSACTON = 0xfC,
189 OPAL_EMPTYATOM = 0xff,
190 OPAL_WHERE = 0x00,
191};
192
193/* Locking state for a locking range */
194enum opal_lockingstate {
195 OPAL_LOCKING_READWRITE = 0x01,
196 OPAL_LOCKING_READONLY = 0x02,
197 OPAL_LOCKING_LOCKED = 0x03,
198};
199
200/* Packets derived from:
201 * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
202 * Secion: 3.2.3 ComPackets, Packets & Subpackets
203 */
204
205/* Comm Packet (header) for transmissions. */
206struct opal_compacket {
207 __be32 reserved0;
208 u8 extendedComID[4];
209 __be32 outstandingData;
210 __be32 minTransfer;
211 __be32 length;
212};
213
214/* Packet structure. */
215struct opal_packet {
216 __be32 tsn;
217 __be32 hsn;
218 __be32 seq_number;
219 __be16 reserved0;
220 __be16 ack_type;
221 __be32 acknowledgment;
222 __be32 length;
223};
224
225/* Data sub packet header */
226struct opal_data_subpacket {
227 u8 reserved0[6];
228 __be16 kind;
229 __be32 length;
230};
231
232/* header of a response */
233struct opal_header {
234 struct opal_compacket cp;
235 struct opal_packet pkt;
236 struct opal_data_subpacket subpkt;
237};
238
239#define FC_TPER 0x0001
240#define FC_LOCKING 0x0002
241#define FC_GEOMETRY 0x0003
242#define FC_ENTERPRISE 0x0100
243#define FC_DATASTORE 0x0202
244#define FC_SINGLEUSER 0x0201
245#define FC_OPALV100 0x0200
246#define FC_OPALV200 0x0203
247
248/*
249 * The Discovery 0 Header. As defined in
250 * Opal SSC Documentation
251 * Section: 3.3.5 Capability Discovery
252 */
253struct d0_header {
254 __be32 length; /* the length of the header 48 in 2.00.100 */
255 __be32 revision; /**< revision of the header 1 in 2.00.100 */
256 __be32 reserved01;
257 __be32 reserved02;
258 /*
259 * the remainder of the structure is vendor specific and will not be
260 * addressed now
261 */
262 u8 ignored[32];
263};
264
265/*
266 * TPer Feature Descriptor. Contains flags indicating support for the
267 * TPer features described in the OPAL specification. The names match the
268 * OPAL terminology
269 *
270 * code == 0x001 in 2.00.100
271 */
272struct d0_tper_features {
273 /*
274 * supported_features bits:
275 * bit 7: reserved
276 * bit 6: com ID management
277 * bit 5: reserved
278 * bit 4: streaming support
279 * bit 3: buffer management
280 * bit 2: ACK/NACK
281 * bit 1: async
282 * bit 0: sync
283 */
284 u8 supported_features;
285 /*
286 * bytes 5 through 15 are reserved, but we represent the first 3 as
287 * u8 to keep the other two 32bits integers aligned.
288 */
289 u8 reserved01[3];
290 __be32 reserved02;
291 __be32 reserved03;
292};
293
294/*
295 * Locking Feature Descriptor. Contains flags indicating support for the
296 * locking features described in the OPAL specification. The names match the
297 * OPAL terminology
298 *
299 * code == 0x0002 in 2.00.100
300 */
301struct d0_locking_features {
302 /*
303 * supported_features bits:
304 * bits 6-7: reserved
305 * bit 5: MBR done
306 * bit 4: MBR enabled
307 * bit 3: media encryption
308 * bit 2: locked
309 * bit 1: locking enabled
310 * bit 0: locking supported
311 */
312 u8 supported_features;
313 /*
314 * bytes 5 through 15 are reserved, but we represent the first 3 as
315 * u8 to keep the other two 32bits integers aligned.
316 */
317 u8 reserved01[3];
318 __be32 reserved02;
319 __be32 reserved03;
320};
321
322/*
323 * Geometry Feature Descriptor. Contains flags indicating support for the
324 * geometry features described in the OPAL specification. The names match the
325 * OPAL terminology
326 *
327 * code == 0x0003 in 2.00.100
328 */
329struct d0_geometry_features {
330 /*
331 * skip 32 bits from header, needed to align the struct to 64 bits.
332 */
333 u8 header[4];
334 /*
335 * reserved01:
336 * bits 1-6: reserved
337 * bit 0: align
338 */
339 u8 reserved01;
340 u8 reserved02[7];
341 __be32 logical_block_size;
342 __be64 alignment_granularity;
343 __be64 lowest_aligned_lba;
344};
345
346/*
347 * Enterprise SSC Feature
348 *
349 * code == 0x0100
350 */
351struct d0_enterprise_ssc {
352 __be16 baseComID;
353 __be16 numComIDs;
354 /* range_crossing:
355 * bits 1-6: reserved
356 * bit 0: range crossing
357 */
358 u8 range_crossing;
359 u8 reserved01;
360 __be16 reserved02;
361 __be32 reserved03;
362 __be32 reserved04;
363};
364
365/*
366 * Opal V1 feature
367 *
368 * code == 0x0200
369 */
370struct d0_opal_v100 {
371 __be16 baseComID;
372 __be16 numComIDs;
373};
374
375/*
376 * Single User Mode feature
377 *
378 * code == 0x0201
379 */
380struct d0_single_user_mode {
381 __be32 num_locking_objects;
382 /* reserved01:
383 * bit 0: any
384 * bit 1: all
385 * bit 2: policy
386 * bits 3-7: reserved
387 */
388 u8 reserved01;
389 u8 reserved02;
390 __be16 reserved03;
391 __be32 reserved04;
392};
393
394/*
395 * Additonal Datastores feature
396 *
397 * code == 0x0202
398 */
399struct d0_datastore_table {
400 __be16 reserved01;
401 __be16 max_tables;
402 __be32 max_size_tables;
403 __be32 table_size_alignment;
404};
405
406/*
407 * OPAL 2.0 feature
408 *
409 * code == 0x0203
410 */
411struct d0_opal_v200 {
412 __be16 baseComID;
413 __be16 numComIDs;
414 /* range_crossing:
415 * bits 1-6: reserved
416 * bit 0: range crossing
417 */
418 u8 range_crossing;
419 /* num_locking_admin_auth:
420 * not aligned to 16 bits, so use two u8.
421 * stored in big endian:
422 * 0: MSB
423 * 1: LSB
424 */
425 u8 num_locking_admin_auth[2];
426 /* num_locking_user_auth:
427 * not aligned to 16 bits, so use two u8.
428 * stored in big endian:
429 * 0: MSB
430 * 1: LSB
431 */
432 u8 num_locking_user_auth[2];
433 u8 initialPIN;
434 u8 revertedPIN;
435 u8 reserved01;
436 __be32 reserved02;
437};
438
439/* Union of features used to parse the discovery 0 response */
440struct d0_features {
441 __be16 code;
442 /*
443 * r_version bits:
444 * bits 4-7: version
445 * bits 0-3: reserved
446 */
447 u8 r_version;
448 u8 length;
449 u8 features[];
450};
451
452#endif /* _OPAL_PROTO_H */
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index bcd86e5cd546..39f70d968754 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
293 if (!gpt) 293 if (!gpt)
294 return NULL; 294 return NULL;
295 295
296 count = le32_to_cpu(gpt->num_partition_entries) * 296 count = (size_t)le32_to_cpu(gpt->num_partition_entries) *
297 le32_to_cpu(gpt->sizeof_partition_entry); 297 le32_to_cpu(gpt->sizeof_partition_entry);
298 if (!count) 298 if (!count)
299 return NULL; 299 return NULL;
@@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
352 gpt_header **gpt, gpt_entry **ptes) 352 gpt_header **gpt, gpt_entry **ptes)
353{ 353{
354 u32 crc, origcrc; 354 u32 crc, origcrc;
355 u64 lastlba; 355 u64 lastlba, pt_size;
356 356
357 if (!ptes) 357 if (!ptes)
358 return 0; 358 return 0;
@@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
434 goto fail; 434 goto fail;
435 } 435 }
436 436
437 /* Sanity check partition table size */
438 pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) *
439 le32_to_cpu((*gpt)->sizeof_partition_entry);
440 if (pt_size > KMALLOC_MAX_SIZE) {
441 pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n",
442 (unsigned long long)pt_size, KMALLOC_MAX_SIZE);
443 goto fail;
444 }
445
437 if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) 446 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
438 goto fail; 447 goto fail;
439 448
440 /* Check the GUID Partition Entry Array CRC */ 449 /* Check the GUID Partition Entry Array CRC */
441 crc = efi_crc32((const unsigned char *) (*ptes), 450 crc = efi_crc32((const unsigned char *) (*ptes), pt_size);
442 le32_to_cpu((*gpt)->num_partition_entries) *
443 le32_to_cpu((*gpt)->sizeof_partition_entry));
444 451
445 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { 452 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
446 pr_debug("GUID Partition Entry Array CRC check failed.\n"); 453 pr_debug("GUID Partition Entry Array CRC check failed.\n");
diff --git a/block/sed-opal.c b/block/sed-opal.c
new file mode 100644
index 000000000000..d1c52ba4d62d
--- /dev/null
+++ b/block/sed-opal.c
@@ -0,0 +1,2488 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Scott Bauer <scott.bauer@intel.com>
6 * Rafael Antognolli <rafael.antognolli@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#define pr_fmt(fmt) KBUILD_MODNAME ":OPAL: " fmt
19
20#include <linux/delay.h>
21#include <linux/device.h>
22#include <linux/kernel.h>
23#include <linux/list.h>
24#include <linux/genhd.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <uapi/linux/sed-opal.h>
28#include <linux/sed-opal.h>
29#include <linux/string.h>
30#include <linux/kdev_t.h>
31
32#include "opal_proto.h"
33
34#define IO_BUFFER_LENGTH 2048
35#define MAX_TOKS 64
36
37typedef int (*opal_step)(struct opal_dev *dev);
38
39enum opal_atom_width {
40 OPAL_WIDTH_TINY,
41 OPAL_WIDTH_SHORT,
42 OPAL_WIDTH_MEDIUM,
43 OPAL_WIDTH_LONG,
44 OPAL_WIDTH_TOKEN
45};
46
47/*
48 * On the parsed response, we don't store again the toks that are already
49 * stored in the response buffer. Instead, for each token, we just store a
50 * pointer to the position in the buffer where the token starts, and the size
51 * of the token in bytes.
52 */
53struct opal_resp_tok {
54 const u8 *pos;
55 size_t len;
56 enum opal_response_token type;
57 enum opal_atom_width width;
58 union {
59 u64 u;
60 s64 s;
61 } stored;
62};
63
64/*
65 * From the response header it's not possible to know how many tokens there are
66 * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later
67 * if we start dealing with messages that have more than that, we can increase
68 * this number. This is done to avoid having to make two passes through the
69 * response, the first one counting how many tokens we have and the second one
70 * actually storing the positions.
71 */
72struct parsed_resp {
73 int num;
74 struct opal_resp_tok toks[MAX_TOKS];
75};
76
77struct opal_dev {
78 bool supported;
79
80 void *data;
81 sec_send_recv *send_recv;
82
83 const opal_step *funcs;
84 void **func_data;
85 int state;
86 struct mutex dev_lock;
87 u16 comid;
88 u32 hsn;
89 u32 tsn;
90 u64 align;
91 u64 lowest_lba;
92
93 size_t pos;
94 u8 cmd[IO_BUFFER_LENGTH];
95 u8 resp[IO_BUFFER_LENGTH];
96
97 struct parsed_resp parsed;
98 size_t prev_d_len;
99 void *prev_data;
100
101 struct list_head unlk_lst;
102};
103
104
105static const u8 opaluid[][OPAL_UID_LENGTH] = {
106 /* users */
107 [OPAL_SMUID_UID] =
108 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff },
109 [OPAL_THISSP_UID] =
110 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
111 [OPAL_ADMINSP_UID] =
112 { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x01 },
113 [OPAL_LOCKINGSP_UID] =
114 { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x02 },
115 [OPAL_ENTERPRISE_LOCKINGSP_UID] =
116 { 0x00, 0x00, 0x02, 0x05, 0x00, 0x01, 0x00, 0x01 },
117 [OPAL_ANYBODY_UID] =
118 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01 },
119 [OPAL_SID_UID] =
120 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06 },
121 [OPAL_ADMIN1_UID] =
122 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x01 },
123 [OPAL_USER1_UID] =
124 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x01 },
125 [OPAL_USER2_UID] =
126 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x02 },
127 [OPAL_PSID_UID] =
128 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0xff, 0x01 },
129 [OPAL_ENTERPRISE_BANDMASTER0_UID] =
130 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x80, 0x01 },
131 [OPAL_ENTERPRISE_ERASEMASTER_UID] =
132 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },
133
134 /* tables */
135
136 [OPAL_LOCKINGRANGE_GLOBAL] =
137 { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
138 [OPAL_LOCKINGRANGE_ACE_RDLOCKED] =
139 { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE0, 0x01 },
140 [OPAL_LOCKINGRANGE_ACE_WRLOCKED] =
141 { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE8, 0x01 },
142 [OPAL_MBRCONTROL] =
143 { 0x00, 0x00, 0x08, 0x03, 0x00, 0x00, 0x00, 0x01 },
144 [OPAL_MBR] =
145 { 0x00, 0x00, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00 },
146 [OPAL_AUTHORITY_TABLE] =
147 { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00},
148 [OPAL_C_PIN_TABLE] =
149 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00},
150 [OPAL_LOCKING_INFO_TABLE] =
151 { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 },
152 [OPAL_ENTERPRISE_LOCKING_INFO_TABLE] =
153 { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
154
155 /* C_PIN_TABLE object ID's */
156
157 [OPAL_C_PIN_MSID] =
158 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02},
159 [OPAL_C_PIN_SID] =
160 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01},
161 [OPAL_C_PIN_ADMIN1] =
162 { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x01, 0x00, 0x01},
163
164 /* half UID's (only first 4 bytes used) */
165
166 [OPAL_HALF_UID_AUTHORITY_OBJ_REF] =
167 { 0x00, 0x00, 0x0C, 0x05, 0xff, 0xff, 0xff, 0xff },
168 [OPAL_HALF_UID_BOOLEAN_ACE] =
169 { 0x00, 0x00, 0x04, 0x0E, 0xff, 0xff, 0xff, 0xff },
170
171 /* special value for omitted optional parameter */
172 [OPAL_UID_HEXFF] =
173 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
174};
175
176/*
177 * TCG Storage SSC Methods.
178 * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
179 * Section: 6.3 Assigned UIDs
180 */
181static const u8 opalmethod[][OPAL_UID_LENGTH] = {
182 [OPAL_PROPERTIES] =
183 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01 },
184 [OPAL_STARTSESSION] =
185 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x02 },
186 [OPAL_REVERT] =
187 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x02 },
188 [OPAL_ACTIVATE] =
189 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x03 },
190 [OPAL_EGET] =
191 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06 },
192 [OPAL_ESET] =
193 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x07 },
194 [OPAL_NEXT] =
195 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08 },
196 [OPAL_EAUTHENTICATE] =
197 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c },
198 [OPAL_GETACL] =
199 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0d },
200 [OPAL_GENKEY] =
201 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x10 },
202 [OPAL_REVERTSP] =
203 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11 },
204 [OPAL_GET] =
205 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16 },
206 [OPAL_SET] =
207 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17 },
208 [OPAL_AUTHENTICATE] =
209 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1c },
210 [OPAL_RANDOM] =
211 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x01 },
212 [OPAL_ERASE] =
213 { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 },
214};
215
216typedef int (cont_fn)(struct opal_dev *dev);
217
218static int end_opal_session_error(struct opal_dev *dev);
219
220struct opal_suspend_data {
221 struct opal_lock_unlock unlk;
222 u8 lr;
223 struct list_head node;
224};
225
226/*
227 * Derived from:
228 * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
229 * Section: 5.1.5 Method Status Codes
230 */
231static const char * const opal_errors[] = {
232 "Success",
233 "Not Authorized",
234 "Unknown Error",
235 "SP Busy",
236 "SP Failed",
237 "SP Disabled",
238 "SP Frozen",
239 "No Sessions Available",
240 "Uniqueness Conflict",
241 "Insufficient Space",
242 "Insufficient Rows",
243 "Invalid Function",
244 "Invalid Parameter",
245 "Invalid Reference",
246 "Unknown Error",
247 "TPER Malfunction",
248 "Transaction Failure",
249 "Response Overflow",
250 "Authority Locked Out",
251};
252
253static const char *opal_error_to_human(int error)
254{
255 if (error == 0x3f)
256 return "Failed";
257
258 if (error >= ARRAY_SIZE(opal_errors) || error < 0)
259 return "Unknown Error";
260
261 return opal_errors[error];
262}
263
264static void print_buffer(const u8 *ptr, u32 length)
265{
266#ifdef DEBUG
267 print_hex_dump_bytes("OPAL: ", DUMP_PREFIX_OFFSET, ptr, length);
268 pr_debug("\n");
269#endif
270}
271
272static bool check_tper(const void *data)
273{
274 const struct d0_tper_features *tper = data;
275 u8 flags = tper->supported_features;
276
277 if (!(flags & TPER_SYNC_SUPPORTED)) {
278 pr_err("TPer sync not supported. flags = %d\n",
279 tper->supported_features);
280 return false;
281 }
282
283 return true;
284}
285
286static bool check_sum(const void *data)
287{
288 const struct d0_single_user_mode *sum = data;
289 u32 nlo = be32_to_cpu(sum->num_locking_objects);
290
291 if (nlo == 0) {
292 pr_err("Need at least one locking object.\n");
293 return false;
294 }
295
296 pr_debug("Number of locking objects: %d\n", nlo);
297
298 return true;
299}
300
301static u16 get_comid_v100(const void *data)
302{
303 const struct d0_opal_v100 *v100 = data;
304
305 return be16_to_cpu(v100->baseComID);
306}
307
308static u16 get_comid_v200(const void *data)
309{
310 const struct d0_opal_v200 *v200 = data;
311
312 return be16_to_cpu(v200->baseComID);
313}
314
315static int opal_send_cmd(struct opal_dev *dev)
316{
317 return dev->send_recv(dev->data, dev->comid, TCG_SECP_01,
318 dev->cmd, IO_BUFFER_LENGTH,
319 true);
320}
321
322static int opal_recv_cmd(struct opal_dev *dev)
323{
324 return dev->send_recv(dev->data, dev->comid, TCG_SECP_01,
325 dev->resp, IO_BUFFER_LENGTH,
326 false);
327}
328
329static int opal_recv_check(struct opal_dev *dev)
330{
331 size_t buflen = IO_BUFFER_LENGTH;
332 void *buffer = dev->resp;
333 struct opal_header *hdr = buffer;
334 int ret;
335
336 do {
337 pr_debug("Sent OPAL command: outstanding=%d, minTransfer=%d\n",
338 hdr->cp.outstandingData,
339 hdr->cp.minTransfer);
340
341 if (hdr->cp.outstandingData == 0 ||
342 hdr->cp.minTransfer != 0)
343 return 0;
344
345 memset(buffer, 0, buflen);
346 ret = opal_recv_cmd(dev);
347 } while (!ret);
348
349 return ret;
350}
351
352static int opal_send_recv(struct opal_dev *dev, cont_fn *cont)
353{
354 int ret;
355
356 ret = opal_send_cmd(dev);
357 if (ret)
358 return ret;
359 ret = opal_recv_cmd(dev);
360 if (ret)
361 return ret;
362 ret = opal_recv_check(dev);
363 if (ret)
364 return ret;
365 return cont(dev);
366}
367
368static void check_geometry(struct opal_dev *dev, const void *data)
369{
370 const struct d0_geometry_features *geo = data;
371
372 dev->align = geo->alignment_granularity;
373 dev->lowest_lba = geo->lowest_aligned_lba;
374}
375
376static int next(struct opal_dev *dev)
377{
378 opal_step func;
379 int error = 0;
380
381 do {
382 func = dev->funcs[dev->state];
383 if (!func)
384 break;
385
386 error = func(dev);
387 if (error) {
388 pr_err("Error on step function: %d with error %d: %s\n",
389 dev->state, error,
390 opal_error_to_human(error));
391
392 /* For each OPAL command we do a discovery0 then we
393 * start some sort of session.
394 * If we haven't passed state 1 then there was an error
395 * on discovery0 or during the attempt to start a
396 * session. Therefore we shouldn't attempt to terminate
397 * a session, as one has not yet been created.
398 */
399 if (dev->state > 1)
400 return end_opal_session_error(dev);
401 }
402 dev->state++;
403 } while (!error);
404
405 return error;
406}
407
408static int opal_discovery0_end(struct opal_dev *dev)
409{
410 bool found_com_id = false, supported = true, single_user = false;
411 const struct d0_header *hdr = (struct d0_header *)dev->resp;
412 const u8 *epos = dev->resp, *cpos = dev->resp;
413 u16 comid = 0;
414
415 print_buffer(dev->resp, be32_to_cpu(hdr->length));
416
417 epos += be32_to_cpu(hdr->length); /* end of buffer */
418 cpos += sizeof(*hdr); /* current position on buffer */
419
420 while (cpos < epos && supported) {
421 const struct d0_features *body =
422 (const struct d0_features *)cpos;
423
424 switch (be16_to_cpu(body->code)) {
425 case FC_TPER:
426 supported = check_tper(body->features);
427 break;
428 case FC_SINGLEUSER:
429 single_user = check_sum(body->features);
430 break;
431 case FC_GEOMETRY:
432 check_geometry(dev, body);
433 break;
434 case FC_LOCKING:
435 case FC_ENTERPRISE:
436 case FC_DATASTORE:
437 /* some ignored properties */
438 pr_debug("Found OPAL feature description: %d\n",
439 be16_to_cpu(body->code));
440 break;
441 case FC_OPALV100:
442 comid = get_comid_v100(body->features);
443 found_com_id = true;
444 break;
445 case FC_OPALV200:
446 comid = get_comid_v200(body->features);
447 found_com_id = true;
448 break;
449 case 0xbfff ... 0xffff:
450 /* vendor specific, just ignore */
451 break;
452 default:
453 pr_debug("OPAL Unknown feature: %d\n",
454 be16_to_cpu(body->code));
455
456 }
457 cpos += body->length + 4;
458 }
459
460 if (!supported) {
461 pr_debug("This device is not Opal enabled. Not Supported!\n");
462 return -EOPNOTSUPP;
463 }
464
465 if (!single_user)
466 pr_debug("Device doesn't support single user mode\n");
467
468
469 if (!found_com_id) {
470 pr_debug("Could not find OPAL comid for device. Returning early\n");
471 return -EOPNOTSUPP;;
472 }
473
474 dev->comid = comid;
475
476 return 0;
477}
478
479static int opal_discovery0(struct opal_dev *dev)
480{
481 int ret;
482
483 memset(dev->resp, 0, IO_BUFFER_LENGTH);
484 dev->comid = OPAL_DISCOVERY_COMID;
485 ret = opal_recv_cmd(dev);
486 if (ret)
487 return ret;
488 return opal_discovery0_end(dev);
489}
490
491static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok)
492{
493 if (*err)
494 return;
495 if (cmd->pos >= IO_BUFFER_LENGTH - 1) {
496 pr_err("Error adding u8: end of buffer.\n");
497 *err = -ERANGE;
498 return;
499 }
500 cmd->cmd[cmd->pos++] = tok;
501}
502
503static void add_short_atom_header(struct opal_dev *cmd, bool bytestring,
504 bool has_sign, int len)
505{
506 u8 atom;
507 int err = 0;
508
509 atom = SHORT_ATOM_ID;
510 atom |= bytestring ? SHORT_ATOM_BYTESTRING : 0;
511 atom |= has_sign ? SHORT_ATOM_SIGNED : 0;
512 atom |= len & SHORT_ATOM_LEN_MASK;
513
514 add_token_u8(&err, cmd, atom);
515}
516
517static void add_medium_atom_header(struct opal_dev *cmd, bool bytestring,
518 bool has_sign, int len)
519{
520 u8 header0;
521
522 header0 = MEDIUM_ATOM_ID;
523 header0 |= bytestring ? MEDIUM_ATOM_BYTESTRING : 0;
524 header0 |= has_sign ? MEDIUM_ATOM_SIGNED : 0;
525 header0 |= (len >> 8) & MEDIUM_ATOM_LEN_MASK;
526 cmd->cmd[cmd->pos++] = header0;
527 cmd->cmd[cmd->pos++] = len;
528}
529
530static void add_token_u64(int *err, struct opal_dev *cmd, u64 number)
531{
532
533 size_t len;
534 int msb;
535 u8 n;
536
537 if (!(number & ~TINY_ATOM_DATA_MASK)) {
538 add_token_u8(err, cmd, number);
539 return;
540 }
541
542 msb = fls(number);
543 len = DIV_ROUND_UP(msb, 4);
544
545 if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) {
546 pr_err("Error adding u64: end of buffer.\n");
547 *err = -ERANGE;
548 return;
549 }
550 add_short_atom_header(cmd, false, false, len);
551 while (len--) {
552 n = number >> (len * 8);
553 add_token_u8(err, cmd, n);
554 }
555}
556
557static void add_token_bytestring(int *err, struct opal_dev *cmd,
558 const u8 *bytestring, size_t len)
559{
560 size_t header_len = 1;
561 bool is_short_atom = true;
562
563 if (*err)
564 return;
565
566 if (len & ~SHORT_ATOM_LEN_MASK) {
567 header_len = 2;
568 is_short_atom = false;
569 }
570
571 if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) {
572 pr_err("Error adding bytestring: end of buffer.\n");
573 *err = -ERANGE;
574 return;
575 }
576
577 if (is_short_atom)
578 add_short_atom_header(cmd, true, false, len);
579 else
580 add_medium_atom_header(cmd, true, false, len);
581
582 memcpy(&cmd->cmd[cmd->pos], bytestring, len);
583 cmd->pos += len;
584
585}
586
587static int build_locking_range(u8 *buffer, size_t length, u8 lr)
588{
589 if (length > OPAL_UID_LENGTH) {
590 pr_err("Can't build locking range. Length OOB\n");
591 return -ERANGE;
592 }
593
594 memcpy(buffer, opaluid[OPAL_LOCKINGRANGE_GLOBAL], OPAL_UID_LENGTH);
595
596 if (lr == 0)
597 return 0;
598 buffer[5] = LOCKING_RANGE_NON_GLOBAL;
599 buffer[7] = lr;
600
601 return 0;
602}
603
604static int build_locking_user(u8 *buffer, size_t length, u8 lr)
605{
606 if (length > OPAL_UID_LENGTH) {
607 pr_err("Can't build locking range user, Length OOB\n");
608 return -ERANGE;
609 }
610
611 memcpy(buffer, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
612
613 buffer[7] = lr + 1;
614
615 return 0;
616}
617
618static void set_comid(struct opal_dev *cmd, u16 comid)
619{
620 struct opal_header *hdr = (struct opal_header *)cmd->cmd;
621
622 hdr->cp.extendedComID[0] = comid >> 8;
623 hdr->cp.extendedComID[1] = comid;
624 hdr->cp.extendedComID[2] = 0;
625 hdr->cp.extendedComID[3] = 0;
626}
627
628static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn)
629{
630 struct opal_header *hdr;
631 int err = 0;
632
633 add_token_u8(&err, cmd, OPAL_ENDOFDATA);
634 add_token_u8(&err, cmd, OPAL_STARTLIST);
635 add_token_u8(&err, cmd, 0);
636 add_token_u8(&err, cmd, 0);
637 add_token_u8(&err, cmd, 0);
638 add_token_u8(&err, cmd, OPAL_ENDLIST);
639
640 if (err) {
641 pr_err("Error finalizing command.\n");
642 return -EFAULT;
643 }
644
645 hdr = (struct opal_header *) cmd->cmd;
646
647 hdr->pkt.tsn = cpu_to_be32(tsn);
648 hdr->pkt.hsn = cpu_to_be32(hsn);
649
650 hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr));
651 while (cmd->pos % 4) {
652 if (cmd->pos >= IO_BUFFER_LENGTH) {
653 pr_err("Error: Buffer overrun\n");
654 return -ERANGE;
655 }
656 cmd->cmd[cmd->pos++] = 0;
657 }
658 hdr->pkt.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp) -
659 sizeof(hdr->pkt));
660 hdr->cp.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp));
661
662 return 0;
663}
664
665static enum opal_response_token token_type(const struct parsed_resp *resp,
666 int n)
667{
668 const struct opal_resp_tok *tok;
669
670 if (n >= resp->num) {
671 pr_err("Token number doesn't exist: %d, resp: %d\n",
672 n, resp->num);
673 return OPAL_DTA_TOKENID_INVALID;
674 }
675
676 tok = &resp->toks[n];
677 if (tok->len == 0) {
678 pr_err("Token length must be non-zero\n");
679 return OPAL_DTA_TOKENID_INVALID;
680 }
681
682 return tok->type;
683}
684
685/*
686 * This function returns 0 in case of invalid token. One should call
687 * token_type() first to find out if the token is valid or not.
688 */
689static enum opal_token response_get_token(const struct parsed_resp *resp,
690 int n)
691{
692 const struct opal_resp_tok *tok;
693
694 if (n >= resp->num) {
695 pr_err("Token number doesn't exist: %d, resp: %d\n",
696 n, resp->num);
697 return 0;
698 }
699
700 tok = &resp->toks[n];
701 if (tok->len == 0) {
702 pr_err("Token length must be non-zero\n");
703 return 0;
704 }
705
706 return tok->pos[0];
707}
708
709static size_t response_parse_tiny(struct opal_resp_tok *tok,
710 const u8 *pos)
711{
712 tok->pos = pos;
713 tok->len = 1;
714 tok->width = OPAL_WIDTH_TINY;
715
716 if (pos[0] & TINY_ATOM_SIGNED) {
717 tok->type = OPAL_DTA_TOKENID_SINT;
718 } else {
719 tok->type = OPAL_DTA_TOKENID_UINT;
720 tok->stored.u = pos[0] & 0x3f;
721 }
722
723 return tok->len;
724}
725
726static size_t response_parse_short(struct opal_resp_tok *tok,
727 const u8 *pos)
728{
729 tok->pos = pos;
730 tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1;
731 tok->width = OPAL_WIDTH_SHORT;
732
733 if (pos[0] & SHORT_ATOM_BYTESTRING) {
734 tok->type = OPAL_DTA_TOKENID_BYTESTRING;
735 } else if (pos[0] & SHORT_ATOM_SIGNED) {
736 tok->type = OPAL_DTA_TOKENID_SINT;
737 } else {
738 u64 u_integer = 0;
739 int i, b = 0;
740
741 tok->type = OPAL_DTA_TOKENID_UINT;
742 if (tok->len > 9) {
743 pr_warn("uint64 with more than 8 bytes\n");
744 return -EINVAL;
745 }
746 for (i = tok->len - 1; i > 0; i--) {
747 u_integer |= ((u64)pos[i] << (8 * b));
748 b++;
749 }
750 tok->stored.u = u_integer;
751 }
752
753 return tok->len;
754}
755
756static size_t response_parse_medium(struct opal_resp_tok *tok,
757 const u8 *pos)
758{
759 tok->pos = pos;
760 tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2;
761 tok->width = OPAL_WIDTH_MEDIUM;
762
763 if (pos[0] & MEDIUM_ATOM_BYTESTRING)
764 tok->type = OPAL_DTA_TOKENID_BYTESTRING;
765 else if (pos[0] & MEDIUM_ATOM_SIGNED)
766 tok->type = OPAL_DTA_TOKENID_SINT;
767 else
768 tok->type = OPAL_DTA_TOKENID_UINT;
769
770 return tok->len;
771}
772
773static size_t response_parse_long(struct opal_resp_tok *tok,
774 const u8 *pos)
775{
776 tok->pos = pos;
777 tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4;
778 tok->width = OPAL_WIDTH_LONG;
779
780 if (pos[0] & LONG_ATOM_BYTESTRING)
781 tok->type = OPAL_DTA_TOKENID_BYTESTRING;
782 else if (pos[0] & LONG_ATOM_SIGNED)
783 tok->type = OPAL_DTA_TOKENID_SINT;
784 else
785 tok->type = OPAL_DTA_TOKENID_UINT;
786
787 return tok->len;
788}
789
790static size_t response_parse_token(struct opal_resp_tok *tok,
791 const u8 *pos)
792{
793 tok->pos = pos;
794 tok->len = 1;
795 tok->type = OPAL_DTA_TOKENID_TOKEN;
796 tok->width = OPAL_WIDTH_TOKEN;
797
798 return tok->len;
799}
800
801static int response_parse(const u8 *buf, size_t length,
802 struct parsed_resp *resp)
803{
804 const struct opal_header *hdr;
805 struct opal_resp_tok *iter;
806 int num_entries = 0;
807 int total;
808 size_t token_length;
809 const u8 *pos;
810
811 if (!buf)
812 return -EFAULT;
813
814 if (!resp)
815 return -EFAULT;
816
817 hdr = (struct opal_header *)buf;
818 pos = buf;
819 pos += sizeof(*hdr);
820
821 pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n",
822 be32_to_cpu(hdr->cp.length),
823 be32_to_cpu(hdr->pkt.length),
824 be32_to_cpu(hdr->subpkt.length));
825
826 if (hdr->cp.length == 0 || hdr->pkt.length == 0 ||
827 hdr->subpkt.length == 0) {
828 pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n",
829 be32_to_cpu(hdr->cp.length),
830 be32_to_cpu(hdr->pkt.length),
831 be32_to_cpu(hdr->subpkt.length));
832 print_buffer(pos, sizeof(*hdr));
833 return -EINVAL;
834 }
835
836 if (pos > buf + length)
837 return -EFAULT;
838
839 iter = resp->toks;
840 total = be32_to_cpu(hdr->subpkt.length);
841 print_buffer(pos, total);
842 while (total > 0) {
843 if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */
844 token_length = response_parse_tiny(iter, pos);
845 else if (pos[0] <= SHORT_ATOM_BYTE) /* short atom */
846 token_length = response_parse_short(iter, pos);
847 else if (pos[0] <= MEDIUM_ATOM_BYTE) /* medium atom */
848 token_length = response_parse_medium(iter, pos);
849 else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */
850 token_length = response_parse_long(iter, pos);
851 else /* TOKEN */
852 token_length = response_parse_token(iter, pos);
853
854 if (token_length == -EINVAL)
855 return -EINVAL;
856
857 pos += token_length;
858 total -= token_length;
859 iter++;
860 num_entries++;
861 }
862
863 if (num_entries == 0) {
864 pr_err("Couldn't parse response.\n");
865 return -EINVAL;
866 }
867 resp->num = num_entries;
868
869 return 0;
870}
871
872static size_t response_get_string(const struct parsed_resp *resp, int n,
873 const char **store)
874{
875 *store = NULL;
876 if (!resp) {
877 pr_err("Response is NULL\n");
878 return 0;
879 }
880
881 if (n > resp->num) {
882 pr_err("Response has %d tokens. Can't access %d\n",
883 resp->num, n);
884 return 0;
885 }
886
887 if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) {
888 pr_err("Token is not a byte string!\n");
889 return 0;
890 }
891
892 *store = resp->toks[n].pos + 1;
893 return resp->toks[n].len - 1;
894}
895
896static u64 response_get_u64(const struct parsed_resp *resp, int n)
897{
898 if (!resp) {
899 pr_err("Response is NULL\n");
900 return 0;
901 }
902
903 if (n > resp->num) {
904 pr_err("Response has %d tokens. Can't access %d\n",
905 resp->num, n);
906 return 0;
907 }
908
909 if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) {
910 pr_err("Token is not unsigned it: %d\n",
911 resp->toks[n].type);
912 return 0;
913 }
914
915 if (!(resp->toks[n].width == OPAL_WIDTH_TINY ||
916 resp->toks[n].width == OPAL_WIDTH_SHORT)) {
917 pr_err("Atom is not short or tiny: %d\n",
918 resp->toks[n].width);
919 return 0;
920 }
921
922 return resp->toks[n].stored.u;
923}
924
925static u8 response_status(const struct parsed_resp *resp)
926{
927 if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN &&
928 response_get_token(resp, 0) == OPAL_ENDOFSESSION) {
929 return 0;
930 }
931
932 if (resp->num < 5)
933 return DTAERROR_NO_METHOD_STATUS;
934
935 if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN ||
936 token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN ||
937 response_get_token(resp, resp->num - 1) != OPAL_ENDLIST ||
938 response_get_token(resp, resp->num - 5) != OPAL_STARTLIST)
939 return DTAERROR_NO_METHOD_STATUS;
940
941 return response_get_u64(resp, resp->num - 4);
942}
943
944/* Parses and checks for errors */
945static int parse_and_check_status(struct opal_dev *dev)
946{
947 int error;
948
949 print_buffer(dev->cmd, dev->pos);
950
951 error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed);
952 if (error) {
953 pr_err("Couldn't parse response.\n");
954 return error;
955 }
956
957 return response_status(&dev->parsed);
958}
959
960static void clear_opal_cmd(struct opal_dev *dev)
961{
962 dev->pos = sizeof(struct opal_header);
963 memset(dev->cmd, 0, IO_BUFFER_LENGTH);
964}
965
966static int start_opal_session_cont(struct opal_dev *dev)
967{
968 u32 hsn, tsn;
969 int error = 0;
970
971 error = parse_and_check_status(dev);
972 if (error)
973 return error;
974
975 hsn = response_get_u64(&dev->parsed, 4);
976 tsn = response_get_u64(&dev->parsed, 5);
977
978 if (hsn == 0 && tsn == 0) {
979 pr_err("Couldn't authenticate session\n");
980 return -EPERM;
981 }
982
983 dev->hsn = hsn;
984 dev->tsn = tsn;
985 return 0;
986}
987
988static void add_suspend_info(struct opal_dev *dev,
989 struct opal_suspend_data *sus)
990{
991 struct opal_suspend_data *iter;
992
993 list_for_each_entry(iter, &dev->unlk_lst, node) {
994 if (iter->lr == sus->lr) {
995 list_del(&iter->node);
996 kfree(iter);
997 break;
998 }
999 }
1000 list_add_tail(&sus->node, &dev->unlk_lst);
1001}
1002
1003static int end_session_cont(struct opal_dev *dev)
1004{
1005 dev->hsn = 0;
1006 dev->tsn = 0;
1007 return parse_and_check_status(dev);
1008}
1009
1010static int finalize_and_send(struct opal_dev *dev, cont_fn cont)
1011{
1012 int ret;
1013
1014 ret = cmd_finalize(dev, dev->hsn, dev->tsn);
1015 if (ret) {
1016 pr_err("Error finalizing command buffer: %d\n", ret);
1017 return ret;
1018 }
1019
1020 print_buffer(dev->cmd, dev->pos);
1021
1022 return opal_send_recv(dev, cont);
1023}
1024
1025static int gen_key(struct opal_dev *dev)
1026{
1027 const u8 *method;
1028 u8 uid[OPAL_UID_LENGTH];
1029 int err = 0;
1030
1031 clear_opal_cmd(dev);
1032 set_comid(dev, dev->comid);
1033
1034 memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len));
1035 method = opalmethod[OPAL_GENKEY];
1036 kfree(dev->prev_data);
1037 dev->prev_data = NULL;
1038
1039 add_token_u8(&err, dev, OPAL_CALL);
1040 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1041 add_token_bytestring(&err, dev, opalmethod[OPAL_GENKEY],
1042 OPAL_UID_LENGTH);
1043 add_token_u8(&err, dev, OPAL_STARTLIST);
1044 add_token_u8(&err, dev, OPAL_ENDLIST);
1045
1046 if (err) {
1047 pr_err("Error building gen key command\n");
1048 return err;
1049
1050 }
1051 return finalize_and_send(dev, parse_and_check_status);
1052}
1053
1054static int get_active_key_cont(struct opal_dev *dev)
1055{
1056 const char *activekey;
1057 size_t keylen;
1058 int error = 0;
1059
1060 error = parse_and_check_status(dev);
1061 if (error)
1062 return error;
1063 keylen = response_get_string(&dev->parsed, 4, &activekey);
1064 if (!activekey) {
1065 pr_err("%s: Couldn't extract the Activekey from the response\n",
1066 __func__);
1067 return OPAL_INVAL_PARAM;
1068 }
1069 dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL);
1070
1071 if (!dev->prev_data)
1072 return -ENOMEM;
1073
1074 dev->prev_d_len = keylen;
1075
1076 return 0;
1077}
1078
1079static int get_active_key(struct opal_dev *dev)
1080{
1081 u8 uid[OPAL_UID_LENGTH];
1082 int err = 0;
1083 u8 *lr;
1084
1085 clear_opal_cmd(dev);
1086 set_comid(dev, dev->comid);
1087 lr = dev->func_data[dev->state];
1088
1089 err = build_locking_range(uid, sizeof(uid), *lr);
1090 if (err)
1091 return err;
1092
1093 err = 0;
1094 add_token_u8(&err, dev, OPAL_CALL);
1095 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1096 add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH);
1097 add_token_u8(&err, dev, OPAL_STARTLIST);
1098 add_token_u8(&err, dev, OPAL_STARTLIST);
1099 add_token_u8(&err, dev, OPAL_STARTNAME);
1100 add_token_u8(&err, dev, 3); /* startCloumn */
1101 add_token_u8(&err, dev, 10); /* ActiveKey */
1102 add_token_u8(&err, dev, OPAL_ENDNAME);
1103 add_token_u8(&err, dev, OPAL_STARTNAME);
1104 add_token_u8(&err, dev, 4); /* endColumn */
1105 add_token_u8(&err, dev, 10); /* ActiveKey */
1106 add_token_u8(&err, dev, OPAL_ENDNAME);
1107 add_token_u8(&err, dev, OPAL_ENDLIST);
1108 add_token_u8(&err, dev, OPAL_ENDLIST);
1109 if (err) {
1110 pr_err("Error building get active key command\n");
1111 return err;
1112 }
1113
1114 return finalize_and_send(dev, get_active_key_cont);
1115}
1116
1117static int generic_lr_enable_disable(struct opal_dev *dev,
1118 u8 *uid, bool rle, bool wle,
1119 bool rl, bool wl)
1120{
1121 int err = 0;
1122
1123 add_token_u8(&err, dev, OPAL_CALL);
1124 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1125 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1126
1127 add_token_u8(&err, dev, OPAL_STARTLIST);
1128 add_token_u8(&err, dev, OPAL_STARTNAME);
1129 add_token_u8(&err, dev, OPAL_VALUES);
1130 add_token_u8(&err, dev, OPAL_STARTLIST);
1131
1132 add_token_u8(&err, dev, OPAL_STARTNAME);
1133 add_token_u8(&err, dev, 5); /* ReadLockEnabled */
1134 add_token_u8(&err, dev, rle);
1135 add_token_u8(&err, dev, OPAL_ENDNAME);
1136
1137 add_token_u8(&err, dev, OPAL_STARTNAME);
1138 add_token_u8(&err, dev, 6); /* WriteLockEnabled */
1139 add_token_u8(&err, dev, wle);
1140 add_token_u8(&err, dev, OPAL_ENDNAME);
1141
1142 add_token_u8(&err, dev, OPAL_STARTNAME);
1143 add_token_u8(&err, dev, OPAL_READLOCKED);
1144 add_token_u8(&err, dev, rl);
1145 add_token_u8(&err, dev, OPAL_ENDNAME);
1146
1147 add_token_u8(&err, dev, OPAL_STARTNAME);
1148 add_token_u8(&err, dev, OPAL_WRITELOCKED);
1149 add_token_u8(&err, dev, wl);
1150 add_token_u8(&err, dev, OPAL_ENDNAME);
1151
1152 add_token_u8(&err, dev, OPAL_ENDLIST);
1153 add_token_u8(&err, dev, OPAL_ENDNAME);
1154 add_token_u8(&err, dev, OPAL_ENDLIST);
1155 return err;
1156}
1157
1158static inline int enable_global_lr(struct opal_dev *dev, u8 *uid,
1159 struct opal_user_lr_setup *setup)
1160{
1161 int err;
1162
1163 err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE,
1164 0, 0);
1165 if (err)
1166 pr_err("Failed to create enable global lr command\n");
1167 return err;
1168}
1169
1170static int setup_locking_range(struct opal_dev *dev)
1171{
1172 u8 uid[OPAL_UID_LENGTH];
1173 struct opal_user_lr_setup *setup;
1174 u8 lr;
1175 int err = 0;
1176
1177 clear_opal_cmd(dev);
1178 set_comid(dev, dev->comid);
1179
1180 setup = dev->func_data[dev->state];
1181 lr = setup->session.opal_key.lr;
1182 err = build_locking_range(uid, sizeof(uid), lr);
1183 if (err)
1184 return err;
1185
1186 if (lr == 0)
1187 err = enable_global_lr(dev, uid, setup);
1188 else {
1189 add_token_u8(&err, dev, OPAL_CALL);
1190 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1191 add_token_bytestring(&err, dev, opalmethod[OPAL_SET],
1192 OPAL_UID_LENGTH);
1193
1194 add_token_u8(&err, dev, OPAL_STARTLIST);
1195 add_token_u8(&err, dev, OPAL_STARTNAME);
1196 add_token_u8(&err, dev, OPAL_VALUES);
1197 add_token_u8(&err, dev, OPAL_STARTLIST);
1198
1199 add_token_u8(&err, dev, OPAL_STARTNAME);
1200 add_token_u8(&err, dev, 3); /* Ranges Start */
1201 add_token_u64(&err, dev, setup->range_start);
1202 add_token_u8(&err, dev, OPAL_ENDNAME);
1203
1204 add_token_u8(&err, dev, OPAL_STARTNAME);
1205 add_token_u8(&err, dev, 4); /* Ranges length */
1206 add_token_u64(&err, dev, setup->range_length);
1207 add_token_u8(&err, dev, OPAL_ENDNAME);
1208
1209 add_token_u8(&err, dev, OPAL_STARTNAME);
1210 add_token_u8(&err, dev, 5); /*ReadLockEnabled */
1211 add_token_u64(&err, dev, !!setup->RLE);
1212 add_token_u8(&err, dev, OPAL_ENDNAME);
1213
1214 add_token_u8(&err, dev, OPAL_STARTNAME);
1215 add_token_u8(&err, dev, 6); /*WriteLockEnabled*/
1216 add_token_u64(&err, dev, !!setup->WLE);
1217 add_token_u8(&err, dev, OPAL_ENDNAME);
1218
1219 add_token_u8(&err, dev, OPAL_ENDLIST);
1220 add_token_u8(&err, dev, OPAL_ENDNAME);
1221 add_token_u8(&err, dev, OPAL_ENDLIST);
1222
1223 }
1224 if (err) {
1225 pr_err("Error building Setup Locking range command.\n");
1226 return err;
1227
1228 }
1229
1230 return finalize_and_send(dev, parse_and_check_status);
1231}
1232
1233static int start_generic_opal_session(struct opal_dev *dev,
1234 enum opal_uid auth,
1235 enum opal_uid sp_type,
1236 const char *key,
1237 u8 key_len)
1238{
1239 u32 hsn;
1240 int err = 0;
1241
1242 if (key == NULL && auth != OPAL_ANYBODY_UID) {
1243 pr_err("%s: Attempted to open ADMIN_SP Session without a Host" \
1244 "Challenge, and not as the Anybody UID\n", __func__);
1245 return OPAL_INVAL_PARAM;
1246 }
1247
1248 clear_opal_cmd(dev);
1249
1250 set_comid(dev, dev->comid);
1251 hsn = GENERIC_HOST_SESSION_NUM;
1252
1253 add_token_u8(&err, dev, OPAL_CALL);
1254 add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID],
1255 OPAL_UID_LENGTH);
1256 add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION],
1257 OPAL_UID_LENGTH);
1258 add_token_u8(&err, dev, OPAL_STARTLIST);
1259 add_token_u64(&err, dev, hsn);
1260 add_token_bytestring(&err, dev, opaluid[sp_type], OPAL_UID_LENGTH);
1261 add_token_u8(&err, dev, 1);
1262
1263 switch (auth) {
1264 case OPAL_ANYBODY_UID:
1265 add_token_u8(&err, dev, OPAL_ENDLIST);
1266 break;
1267 case OPAL_ADMIN1_UID:
1268 case OPAL_SID_UID:
1269 add_token_u8(&err, dev, OPAL_STARTNAME);
1270 add_token_u8(&err, dev, 0); /* HostChallenge */
1271 add_token_bytestring(&err, dev, key, key_len);
1272 add_token_u8(&err, dev, OPAL_ENDNAME);
1273 add_token_u8(&err, dev, OPAL_STARTNAME);
1274 add_token_u8(&err, dev, 3); /* HostSignAuth */
1275 add_token_bytestring(&err, dev, opaluid[auth],
1276 OPAL_UID_LENGTH);
1277 add_token_u8(&err, dev, OPAL_ENDNAME);
1278 add_token_u8(&err, dev, OPAL_ENDLIST);
1279 break;
1280 default:
1281 pr_err("Cannot start Admin SP session with auth %d\n", auth);
1282 return OPAL_INVAL_PARAM;
1283 }
1284
1285 if (err) {
1286 pr_err("Error building start adminsp session command.\n");
1287 return err;
1288 }
1289
1290 return finalize_and_send(dev, start_opal_session_cont);
1291}
1292
1293static int start_anybodyASP_opal_session(struct opal_dev *dev)
1294{
1295 return start_generic_opal_session(dev, OPAL_ANYBODY_UID,
1296 OPAL_ADMINSP_UID, NULL, 0);
1297}
1298
1299static int start_SIDASP_opal_session(struct opal_dev *dev)
1300{
1301 int ret;
1302 const u8 *key = dev->prev_data;
1303 struct opal_key *okey;
1304
1305 if (!key) {
1306 okey = dev->func_data[dev->state];
1307 ret = start_generic_opal_session(dev, OPAL_SID_UID,
1308 OPAL_ADMINSP_UID,
1309 okey->key,
1310 okey->key_len);
1311 } else {
1312 ret = start_generic_opal_session(dev, OPAL_SID_UID,
1313 OPAL_ADMINSP_UID,
1314 key, dev->prev_d_len);
1315 kfree(key);
1316 dev->prev_data = NULL;
1317 }
1318 return ret;
1319}
1320
1321static inline int start_admin1LSP_opal_session(struct opal_dev *dev)
1322{
1323 struct opal_key *key = dev->func_data[dev->state];
1324
1325 return start_generic_opal_session(dev, OPAL_ADMIN1_UID,
1326 OPAL_LOCKINGSP_UID,
1327 key->key, key->key_len);
1328}
1329
1330static int start_auth_opal_session(struct opal_dev *dev)
1331{
1332 u8 lk_ul_user[OPAL_UID_LENGTH];
1333 int err = 0;
1334
1335 struct opal_session_info *session = dev->func_data[dev->state];
1336 size_t keylen = session->opal_key.key_len;
1337 u8 *key = session->opal_key.key;
1338 u32 hsn = GENERIC_HOST_SESSION_NUM;
1339
1340 clear_opal_cmd(dev);
1341 set_comid(dev, dev->comid);
1342
1343 if (session->sum) {
1344 err = build_locking_user(lk_ul_user, sizeof(lk_ul_user),
1345 session->opal_key.lr);
1346 if (err)
1347 return err;
1348
1349 } else if (session->who != OPAL_ADMIN1 && !session->sum) {
1350 err = build_locking_user(lk_ul_user, sizeof(lk_ul_user),
1351 session->who - 1);
1352 if (err)
1353 return err;
1354 } else
1355 memcpy(lk_ul_user, opaluid[OPAL_ADMIN1_UID], OPAL_UID_LENGTH);
1356
1357 add_token_u8(&err, dev, OPAL_CALL);
1358 add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID],
1359 OPAL_UID_LENGTH);
1360 add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION],
1361 OPAL_UID_LENGTH);
1362
1363 add_token_u8(&err, dev, OPAL_STARTLIST);
1364 add_token_u64(&err, dev, hsn);
1365 add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
1366 OPAL_UID_LENGTH);
1367 add_token_u8(&err, dev, 1);
1368 add_token_u8(&err, dev, OPAL_STARTNAME);
1369 add_token_u8(&err, dev, 0);
1370 add_token_bytestring(&err, dev, key, keylen);
1371 add_token_u8(&err, dev, OPAL_ENDNAME);
1372 add_token_u8(&err, dev, OPAL_STARTNAME);
1373 add_token_u8(&err, dev, 3);
1374 add_token_bytestring(&err, dev, lk_ul_user, OPAL_UID_LENGTH);
1375 add_token_u8(&err, dev, OPAL_ENDNAME);
1376 add_token_u8(&err, dev, OPAL_ENDLIST);
1377
1378 if (err) {
1379 pr_err("Error building STARTSESSION command.\n");
1380 return err;
1381 }
1382
1383 return finalize_and_send(dev, start_opal_session_cont);
1384}
1385
1386static int revert_tper(struct opal_dev *dev)
1387{
1388 int err = 0;
1389
1390 clear_opal_cmd(dev);
1391 set_comid(dev, dev->comid);
1392
1393 add_token_u8(&err, dev, OPAL_CALL);
1394 add_token_bytestring(&err, dev, opaluid[OPAL_ADMINSP_UID],
1395 OPAL_UID_LENGTH);
1396 add_token_bytestring(&err, dev, opalmethod[OPAL_REVERT],
1397 OPAL_UID_LENGTH);
1398 add_token_u8(&err, dev, OPAL_STARTLIST);
1399 add_token_u8(&err, dev, OPAL_ENDLIST);
1400 if (err) {
1401 pr_err("Error building REVERT TPER command.\n");
1402 return err;
1403 }
1404
1405 return finalize_and_send(dev, parse_and_check_status);
1406}
1407
1408static int internal_activate_user(struct opal_dev *dev)
1409{
1410 struct opal_session_info *session = dev->func_data[dev->state];
1411 u8 uid[OPAL_UID_LENGTH];
1412 int err = 0;
1413
1414 clear_opal_cmd(dev);
1415 set_comid(dev, dev->comid);
1416
1417 memcpy(uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
1418 uid[7] = session->who;
1419
1420 add_token_u8(&err, dev, OPAL_CALL);
1421 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1422 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1423 add_token_u8(&err, dev, OPAL_STARTLIST);
1424 add_token_u8(&err, dev, OPAL_STARTNAME);
1425 add_token_u8(&err, dev, OPAL_VALUES);
1426 add_token_u8(&err, dev, OPAL_STARTLIST);
1427 add_token_u8(&err, dev, OPAL_STARTNAME);
1428 add_token_u8(&err, dev, 5); /* Enabled */
1429 add_token_u8(&err, dev, OPAL_TRUE);
1430 add_token_u8(&err, dev, OPAL_ENDNAME);
1431 add_token_u8(&err, dev, OPAL_ENDLIST);
1432 add_token_u8(&err, dev, OPAL_ENDNAME);
1433 add_token_u8(&err, dev, OPAL_ENDLIST);
1434
1435 if (err) {
1436 pr_err("Error building Activate UserN command.\n");
1437 return err;
1438 }
1439
1440 return finalize_and_send(dev, parse_and_check_status);
1441}
1442
1443static int erase_locking_range(struct opal_dev *dev)
1444{
1445 struct opal_session_info *session;
1446 u8 uid[OPAL_UID_LENGTH];
1447 int err = 0;
1448
1449 clear_opal_cmd(dev);
1450 set_comid(dev, dev->comid);
1451 session = dev->func_data[dev->state];
1452
1453 if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0)
1454 return -ERANGE;
1455
1456 add_token_u8(&err, dev, OPAL_CALL);
1457 add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH);
1458 add_token_bytestring(&err, dev, opalmethod[OPAL_ERASE],
1459 OPAL_UID_LENGTH);
1460 add_token_u8(&err, dev, OPAL_STARTLIST);
1461 add_token_u8(&err, dev, OPAL_ENDLIST);
1462
1463 if (err) {
1464 pr_err("Error building Erase Locking Range Command.\n");
1465 return err;
1466 }
1467 return finalize_and_send(dev, parse_and_check_status);
1468}
1469
1470static int set_mbr_done(struct opal_dev *dev)
1471{
1472 u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state];
1473 int err = 0;
1474
1475 clear_opal_cmd(dev);
1476 set_comid(dev, dev->comid);
1477
1478 add_token_u8(&err, dev, OPAL_CALL);
1479 add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL],
1480 OPAL_UID_LENGTH);
1481 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1482 add_token_u8(&err, dev, OPAL_STARTLIST);
1483 add_token_u8(&err, dev, OPAL_STARTNAME);
1484 add_token_u8(&err, dev, OPAL_VALUES);
1485 add_token_u8(&err, dev, OPAL_STARTLIST);
1486 add_token_u8(&err, dev, OPAL_STARTNAME);
1487 add_token_u8(&err, dev, 2); /* Done */
1488 add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */
1489 add_token_u8(&err, dev, OPAL_ENDNAME);
1490 add_token_u8(&err, dev, OPAL_ENDLIST);
1491 add_token_u8(&err, dev, OPAL_ENDNAME);
1492 add_token_u8(&err, dev, OPAL_ENDLIST);
1493
1494 if (err) {
1495 pr_err("Error Building set MBR Done command\n");
1496 return err;
1497 }
1498
1499 return finalize_and_send(dev, parse_and_check_status);
1500}
1501
1502static int set_mbr_enable_disable(struct opal_dev *dev)
1503{
1504 u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state];
1505 int err = 0;
1506
1507 clear_opal_cmd(dev);
1508 set_comid(dev, dev->comid);
1509
1510 add_token_u8(&err, dev, OPAL_CALL);
1511 add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL],
1512 OPAL_UID_LENGTH);
1513 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1514 add_token_u8(&err, dev, OPAL_STARTLIST);
1515 add_token_u8(&err, dev, OPAL_STARTNAME);
1516 add_token_u8(&err, dev, OPAL_VALUES);
1517 add_token_u8(&err, dev, OPAL_STARTLIST);
1518 add_token_u8(&err, dev, OPAL_STARTNAME);
1519 add_token_u8(&err, dev, 1);
1520 add_token_u8(&err, dev, mbr_en_dis);
1521 add_token_u8(&err, dev, OPAL_ENDNAME);
1522 add_token_u8(&err, dev, OPAL_ENDLIST);
1523 add_token_u8(&err, dev, OPAL_ENDNAME);
1524 add_token_u8(&err, dev, OPAL_ENDLIST);
1525
1526 if (err) {
1527 pr_err("Error Building set MBR done command\n");
1528 return err;
1529 }
1530
1531 return finalize_and_send(dev, parse_and_check_status);
1532}
1533
1534static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
1535 struct opal_dev *dev)
1536{
1537 int err = 0;
1538
1539 clear_opal_cmd(dev);
1540 set_comid(dev, dev->comid);
1541
1542 add_token_u8(&err, dev, OPAL_CALL);
1543 add_token_bytestring(&err, dev, cpin_uid, OPAL_UID_LENGTH);
1544 add_token_bytestring(&err, dev, opalmethod[OPAL_SET],
1545 OPAL_UID_LENGTH);
1546 add_token_u8(&err, dev, OPAL_STARTLIST);
1547 add_token_u8(&err, dev, OPAL_STARTNAME);
1548 add_token_u8(&err, dev, OPAL_VALUES);
1549 add_token_u8(&err, dev, OPAL_STARTLIST);
1550 add_token_u8(&err, dev, OPAL_STARTNAME);
1551 add_token_u8(&err, dev, 3); /* PIN */
1552 add_token_bytestring(&err, dev, key, key_len);
1553 add_token_u8(&err, dev, OPAL_ENDNAME);
1554 add_token_u8(&err, dev, OPAL_ENDLIST);
1555 add_token_u8(&err, dev, OPAL_ENDNAME);
1556 add_token_u8(&err, dev, OPAL_ENDLIST);
1557
1558 return err;
1559}
1560
1561static int set_new_pw(struct opal_dev *dev)
1562{
1563 u8 cpin_uid[OPAL_UID_LENGTH];
1564 struct opal_session_info *usr = dev->func_data[dev->state];
1565
1566
1567 memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH);
1568
1569 if (usr->who != OPAL_ADMIN1) {
1570 cpin_uid[5] = 0x03;
1571 if (usr->sum)
1572 cpin_uid[7] = usr->opal_key.lr + 1;
1573 else
1574 cpin_uid[7] = usr->who;
1575 }
1576
1577 if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len,
1578 cpin_uid, dev)) {
1579 pr_err("Error building set password command.\n");
1580 return -ERANGE;
1581 }
1582
1583 return finalize_and_send(dev, parse_and_check_status);
1584}
1585
1586static int set_sid_cpin_pin(struct opal_dev *dev)
1587{
1588 u8 cpin_uid[OPAL_UID_LENGTH];
1589 struct opal_key *key = dev->func_data[dev->state];
1590
1591 memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH);
1592
1593 if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) {
1594 pr_err("Error building Set SID cpin\n");
1595 return -ERANGE;
1596 }
1597 return finalize_and_send(dev, parse_and_check_status);
1598}
1599
1600static int add_user_to_lr(struct opal_dev *dev)
1601{
1602 u8 lr_buffer[OPAL_UID_LENGTH];
1603 u8 user_uid[OPAL_UID_LENGTH];
1604 struct opal_lock_unlock *lkul;
1605 int err = 0;
1606
1607 clear_opal_cmd(dev);
1608 set_comid(dev, dev->comid);
1609
1610 lkul = dev->func_data[dev->state];
1611
1612 memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED],
1613 OPAL_UID_LENGTH);
1614
1615 if (lkul->l_state == OPAL_RW)
1616 memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_WRLOCKED],
1617 OPAL_UID_LENGTH);
1618
1619 lr_buffer[7] = lkul->session.opal_key.lr;
1620
1621 memcpy(user_uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH);
1622
1623 user_uid[7] = lkul->session.who;
1624
1625 add_token_u8(&err, dev, OPAL_CALL);
1626 add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH);
1627 add_token_bytestring(&err, dev, opalmethod[OPAL_SET],
1628 OPAL_UID_LENGTH);
1629
1630 add_token_u8(&err, dev, OPAL_STARTLIST);
1631 add_token_u8(&err, dev, OPAL_STARTNAME);
1632 add_token_u8(&err, dev, OPAL_VALUES);
1633
1634 add_token_u8(&err, dev, OPAL_STARTLIST);
1635 add_token_u8(&err, dev, OPAL_STARTNAME);
1636 add_token_u8(&err, dev, 3);
1637
1638 add_token_u8(&err, dev, OPAL_STARTLIST);
1639
1640
1641 add_token_u8(&err, dev, OPAL_STARTNAME);
1642 add_token_bytestring(&err, dev,
1643 opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
1644 OPAL_UID_LENGTH/2);
1645 add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
1646 add_token_u8(&err, dev, OPAL_ENDNAME);
1647
1648
1649 add_token_u8(&err, dev, OPAL_STARTNAME);
1650 add_token_bytestring(&err, dev,
1651 opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF],
1652 OPAL_UID_LENGTH/2);
1653 add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH);
1654 add_token_u8(&err, dev, OPAL_ENDNAME);
1655
1656
1657 add_token_u8(&err, dev, OPAL_STARTNAME);
1658 add_token_bytestring(&err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE],
1659 OPAL_UID_LENGTH/2);
1660 add_token_u8(&err, dev, 1);
1661 add_token_u8(&err, dev, OPAL_ENDNAME);
1662
1663
1664 add_token_u8(&err, dev, OPAL_ENDLIST);
1665 add_token_u8(&err, dev, OPAL_ENDNAME);
1666 add_token_u8(&err, dev, OPAL_ENDLIST);
1667 add_token_u8(&err, dev, OPAL_ENDNAME);
1668 add_token_u8(&err, dev, OPAL_ENDLIST);
1669
1670 if (err) {
1671 pr_err("Error building add user to locking range command.\n");
1672 return err;
1673 }
1674
1675 return finalize_and_send(dev, parse_and_check_status);
1676}
1677
1678static int lock_unlock_locking_range(struct opal_dev *dev)
1679{
1680 u8 lr_buffer[OPAL_UID_LENGTH];
1681 const u8 *method;
1682 struct opal_lock_unlock *lkul;
1683 u8 read_locked = 1, write_locked = 1;
1684 int err = 0;
1685
1686 clear_opal_cmd(dev);
1687 set_comid(dev, dev->comid);
1688
1689 method = opalmethod[OPAL_SET];
1690 lkul = dev->func_data[dev->state];
1691 if (build_locking_range(lr_buffer, sizeof(lr_buffer),
1692 lkul->session.opal_key.lr) < 0)
1693 return -ERANGE;
1694
1695 switch (lkul->l_state) {
1696 case OPAL_RO:
1697 read_locked = 0;
1698 write_locked = 1;
1699 break;
1700 case OPAL_RW:
1701 read_locked = 0;
1702 write_locked = 0;
1703 break;
1704 case OPAL_LK:
1705 /* vars are initalized to locked */
1706 break;
1707 default:
1708 pr_err("Tried to set an invalid locking state... returning to uland\n");
1709 return OPAL_INVAL_PARAM;
1710 }
1711
1712 add_token_u8(&err, dev, OPAL_CALL);
1713 add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH);
1714 add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH);
1715 add_token_u8(&err, dev, OPAL_STARTLIST);
1716 add_token_u8(&err, dev, OPAL_STARTNAME);
1717 add_token_u8(&err, dev, OPAL_VALUES);
1718 add_token_u8(&err, dev, OPAL_STARTLIST);
1719
1720 add_token_u8(&err, dev, OPAL_STARTNAME);
1721 add_token_u8(&err, dev, OPAL_READLOCKED);
1722 add_token_u8(&err, dev, read_locked);
1723 add_token_u8(&err, dev, OPAL_ENDNAME);
1724
1725 add_token_u8(&err, dev, OPAL_STARTNAME);
1726 add_token_u8(&err, dev, OPAL_WRITELOCKED);
1727 add_token_u8(&err, dev, write_locked);
1728 add_token_u8(&err, dev, OPAL_ENDNAME);
1729
1730 add_token_u8(&err, dev, OPAL_ENDLIST);
1731 add_token_u8(&err, dev, OPAL_ENDNAME);
1732 add_token_u8(&err, dev, OPAL_ENDLIST);
1733
1734 if (err) {
1735 pr_err("Error building SET command.\n");
1736 return err;
1737 }
1738 return finalize_and_send(dev, parse_and_check_status);
1739}
1740
1741
1742static int lock_unlock_locking_range_sum(struct opal_dev *dev)
1743{
1744 u8 lr_buffer[OPAL_UID_LENGTH];
1745 u8 read_locked = 1, write_locked = 1;
1746 const u8 *method;
1747 struct opal_lock_unlock *lkul;
1748 int ret;
1749
1750 clear_opal_cmd(dev);
1751 set_comid(dev, dev->comid);
1752
1753 method = opalmethod[OPAL_SET];
1754 lkul = dev->func_data[dev->state];
1755 if (build_locking_range(lr_buffer, sizeof(lr_buffer),
1756 lkul->session.opal_key.lr) < 0)
1757 return -ERANGE;
1758
1759 switch (lkul->l_state) {
1760 case OPAL_RO:
1761 read_locked = 0;
1762 write_locked = 1;
1763 break;
1764 case OPAL_RW:
1765 read_locked = 0;
1766 write_locked = 0;
1767 break;
1768 case OPAL_LK:
1769 /* vars are initalized to locked */
1770 break;
1771 default:
1772 pr_err("Tried to set an invalid locking state.\n");
1773 return OPAL_INVAL_PARAM;
1774 }
1775 ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1,
1776 read_locked, write_locked);
1777
1778 if (ret < 0) {
1779 pr_err("Error building SET command.\n");
1780 return ret;
1781 }
1782 return finalize_and_send(dev, parse_and_check_status);
1783}
1784
1785static int activate_lsp(struct opal_dev *dev)
1786{
1787 struct opal_lr_act *opal_act;
1788 u8 user_lr[OPAL_UID_LENGTH];
1789 u8 uint_3 = 0x83;
1790 int err = 0, i;
1791
1792 clear_opal_cmd(dev);
1793 set_comid(dev, dev->comid);
1794
1795 opal_act = dev->func_data[dev->state];
1796
1797 add_token_u8(&err, dev, OPAL_CALL);
1798 add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
1799 OPAL_UID_LENGTH);
1800 add_token_bytestring(&err, dev, opalmethod[OPAL_ACTIVATE],
1801 OPAL_UID_LENGTH);
1802
1803
1804 if (opal_act->sum) {
1805 err = build_locking_range(user_lr, sizeof(user_lr),
1806 opal_act->lr[0]);
1807 if (err)
1808 return err;
1809
1810 add_token_u8(&err, dev, OPAL_STARTLIST);
1811 add_token_u8(&err, dev, OPAL_STARTNAME);
1812 add_token_u8(&err, dev, uint_3);
1813 add_token_u8(&err, dev, 6);
1814 add_token_u8(&err, dev, 0);
1815 add_token_u8(&err, dev, 0);
1816
1817 add_token_u8(&err, dev, OPAL_STARTLIST);
1818 add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
1819 for (i = 1; i < opal_act->num_lrs; i++) {
1820 user_lr[7] = opal_act->lr[i];
1821 add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
1822 }
1823 add_token_u8(&err, dev, OPAL_ENDLIST);
1824 add_token_u8(&err, dev, OPAL_ENDNAME);
1825 add_token_u8(&err, dev, OPAL_ENDLIST);
1826
1827 } else {
1828 add_token_u8(&err, dev, OPAL_STARTLIST);
1829 add_token_u8(&err, dev, OPAL_ENDLIST);
1830 }
1831
1832 if (err) {
1833 pr_err("Error building Activate LockingSP command.\n");
1834 return err;
1835 }
1836
1837 return finalize_and_send(dev, parse_and_check_status);
1838}
1839
1840static int get_lsp_lifecycle_cont(struct opal_dev *dev)
1841{
1842 u8 lc_status;
1843 int error = 0;
1844
1845 error = parse_and_check_status(dev);
1846 if (error)
1847 return error;
1848
1849 lc_status = response_get_u64(&dev->parsed, 4);
1850 /* 0x08 is Manufacured Inactive */
1851 /* 0x09 is Manufactured */
1852 if (lc_status != OPAL_MANUFACTURED_INACTIVE) {
1853 pr_err("Couldn't determine the status of the Lifcycle state\n");
1854 return -ENODEV;
1855 }
1856
1857 return 0;
1858}
1859
1860/* Determine if we're in the Manufactured Inactive or Active state */
1861static int get_lsp_lifecycle(struct opal_dev *dev)
1862{
1863 int err = 0;
1864
1865 clear_opal_cmd(dev);
1866 set_comid(dev, dev->comid);
1867
1868 add_token_u8(&err, dev, OPAL_CALL);
1869 add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID],
1870 OPAL_UID_LENGTH);
1871 add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH);
1872
1873 add_token_u8(&err, dev, OPAL_STARTLIST);
1874 add_token_u8(&err, dev, OPAL_STARTLIST);
1875
1876 add_token_u8(&err, dev, OPAL_STARTNAME);
1877 add_token_u8(&err, dev, 3); /* Start Column */
1878 add_token_u8(&err, dev, 6); /* Lifecycle Column */
1879 add_token_u8(&err, dev, OPAL_ENDNAME);
1880
1881 add_token_u8(&err, dev, OPAL_STARTNAME);
1882 add_token_u8(&err, dev, 4); /* End Column */
1883 add_token_u8(&err, dev, 6); /* Lifecycle Column */
1884 add_token_u8(&err, dev, OPAL_ENDNAME);
1885
1886 add_token_u8(&err, dev, OPAL_ENDLIST);
1887 add_token_u8(&err, dev, OPAL_ENDLIST);
1888
1889 if (err) {
1890 pr_err("Error Building GET Lifecycle Status command\n");
1891 return err;
1892 }
1893
1894 return finalize_and_send(dev, get_lsp_lifecycle_cont);
1895}
1896
1897static int get_msid_cpin_pin_cont(struct opal_dev *dev)
1898{
1899 const char *msid_pin;
1900 size_t strlen;
1901 int error = 0;
1902
1903 error = parse_and_check_status(dev);
1904 if (error)
1905 return error;
1906
1907 strlen = response_get_string(&dev->parsed, 4, &msid_pin);
1908 if (!msid_pin) {
1909 pr_err("%s: Couldn't extract PIN from response\n", __func__);
1910 return OPAL_INVAL_PARAM;
1911 }
1912
1913 dev->prev_data = kmemdup(msid_pin, strlen, GFP_KERNEL);
1914 if (!dev->prev_data)
1915 return -ENOMEM;
1916
1917 dev->prev_d_len = strlen;
1918
1919 return 0;
1920}
1921
1922static int get_msid_cpin_pin(struct opal_dev *dev)
1923{
1924 int err = 0;
1925
1926 clear_opal_cmd(dev);
1927 set_comid(dev, dev->comid);
1928
1929
1930 add_token_u8(&err, dev, OPAL_CALL);
1931 add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID],
1932 OPAL_UID_LENGTH);
1933 add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH);
1934
1935 add_token_u8(&err, dev, OPAL_STARTLIST);
1936 add_token_u8(&err, dev, OPAL_STARTLIST);
1937
1938 add_token_u8(&err, dev, OPAL_STARTNAME);
1939 add_token_u8(&err, dev, 3); /* Start Column */
1940 add_token_u8(&err, dev, 3); /* PIN */
1941 add_token_u8(&err, dev, OPAL_ENDNAME);
1942
1943 add_token_u8(&err, dev, OPAL_STARTNAME);
1944 add_token_u8(&err, dev, 4); /* End Column */
1945 add_token_u8(&err, dev, 3); /* Lifecycle Column */
1946 add_token_u8(&err, dev, OPAL_ENDNAME);
1947
1948 add_token_u8(&err, dev, OPAL_ENDLIST);
1949 add_token_u8(&err, dev, OPAL_ENDLIST);
1950
1951 if (err) {
1952 pr_err("Error building Get MSID CPIN PIN command.\n");
1953 return err;
1954 }
1955
1956 return finalize_and_send(dev, get_msid_cpin_pin_cont);
1957}
1958
1959static int build_end_opal_session(struct opal_dev *dev)
1960{
1961 int err = 0;
1962
1963 clear_opal_cmd(dev);
1964
1965 set_comid(dev, dev->comid);
1966 add_token_u8(&err, dev, OPAL_ENDOFSESSION);
1967 return err;
1968}
1969
1970static int end_opal_session(struct opal_dev *dev)
1971{
1972 int ret = build_end_opal_session(dev);
1973
1974 if (ret < 0)
1975 return ret;
1976 return finalize_and_send(dev, end_session_cont);
1977}
1978
1979static int end_opal_session_error(struct opal_dev *dev)
1980{
1981 const opal_step error_end_session[] = {
1982 end_opal_session,
1983 NULL,
1984 };
1985 dev->funcs = error_end_session;
1986 dev->state = 0;
1987 return next(dev);
1988}
1989
1990static inline void setup_opal_dev(struct opal_dev *dev,
1991 const opal_step *funcs)
1992{
1993 dev->state = 0;
1994 dev->funcs = funcs;
1995 dev->tsn = 0;
1996 dev->hsn = 0;
1997 dev->func_data = NULL;
1998 dev->prev_data = NULL;
1999}
2000
2001static int check_opal_support(struct opal_dev *dev)
2002{
2003 static const opal_step funcs[] = {
2004 opal_discovery0,
2005 NULL
2006 };
2007 int ret;
2008
2009 mutex_lock(&dev->dev_lock);
2010 setup_opal_dev(dev, funcs);
2011 ret = next(dev);
2012 dev->supported = !ret;
2013 mutex_unlock(&dev->dev_lock);
2014 return ret;
2015}
2016
2017struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv)
2018{
2019 struct opal_dev *dev;
2020
2021 dev = kmalloc(sizeof(*dev), GFP_KERNEL);
2022 if (!dev)
2023 return NULL;
2024
2025 INIT_LIST_HEAD(&dev->unlk_lst);
2026 mutex_init(&dev->dev_lock);
2027 dev->data = data;
2028 dev->send_recv = send_recv;
2029 if (check_opal_support(dev) != 0) {
2030 pr_debug("Opal is not supported on this device\n");
2031 kfree(dev);
2032 return NULL;
2033 }
2034 return dev;
2035}
2036EXPORT_SYMBOL(init_opal_dev);
2037
2038static int opal_secure_erase_locking_range(struct opal_dev *dev,
2039 struct opal_session_info *opal_session)
2040{
2041 void *data[3] = { NULL };
2042 static const opal_step erase_funcs[] = {
2043 opal_discovery0,
2044 start_auth_opal_session,
2045 get_active_key,
2046 gen_key,
2047 end_opal_session,
2048 NULL,
2049 };
2050 int ret;
2051
2052 mutex_lock(&dev->dev_lock);
2053 setup_opal_dev(dev, erase_funcs);
2054
2055 dev->func_data = data;
2056 dev->func_data[1] = opal_session;
2057 dev->func_data[2] = &opal_session->opal_key.lr;
2058
2059 ret = next(dev);
2060 mutex_unlock(&dev->dev_lock);
2061 return ret;
2062}
2063
2064static int opal_erase_locking_range(struct opal_dev *dev,
2065 struct opal_session_info *opal_session)
2066{
2067 void *data[3] = { NULL };
2068 static const opal_step erase_funcs[] = {
2069 opal_discovery0,
2070 start_auth_opal_session,
2071 erase_locking_range,
2072 end_opal_session,
2073 NULL,
2074 };
2075 int ret;
2076
2077 mutex_lock(&dev->dev_lock);
2078 setup_opal_dev(dev, erase_funcs);
2079
2080 dev->func_data = data;
2081 dev->func_data[1] = opal_session;
2082 dev->func_data[2] = opal_session;
2083
2084 ret = next(dev);
2085 mutex_unlock(&dev->dev_lock);
2086 return ret;
2087}
2088
2089static int opal_enable_disable_shadow_mbr(struct opal_dev *dev,
2090 struct opal_mbr_data *opal_mbr)
2091{
2092 void *func_data[6] = { NULL };
2093 static const opal_step mbr_funcs[] = {
2094 opal_discovery0,
2095 start_admin1LSP_opal_session,
2096 set_mbr_done,
2097 end_opal_session,
2098 start_admin1LSP_opal_session,
2099 set_mbr_enable_disable,
2100 end_opal_session,
2101 NULL,
2102 };
2103 int ret;
2104
2105 if (opal_mbr->enable_disable != OPAL_MBR_ENABLE &&
2106 opal_mbr->enable_disable != OPAL_MBR_DISABLE)
2107 return -EINVAL;
2108
2109 mutex_lock(&dev->dev_lock);
2110 setup_opal_dev(dev, mbr_funcs);
2111 dev->func_data = func_data;
2112 dev->func_data[1] = &opal_mbr->key;
2113 dev->func_data[2] = &opal_mbr->enable_disable;
2114 dev->func_data[4] = &opal_mbr->key;
2115 dev->func_data[5] = &opal_mbr->enable_disable;
2116 ret = next(dev);
2117 mutex_unlock(&dev->dev_lock);
2118 return ret;
2119}
2120
2121static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
2122{
2123 struct opal_suspend_data *suspend;
2124
2125 suspend = kzalloc(sizeof(*suspend), GFP_KERNEL);
2126 if (!suspend)
2127 return -ENOMEM;
2128
2129 suspend->unlk = *lk_unlk;
2130 suspend->lr = lk_unlk->session.opal_key.lr;
2131
2132 mutex_lock(&dev->dev_lock);
2133 setup_opal_dev(dev, NULL);
2134 add_suspend_info(dev, suspend);
2135 mutex_unlock(&dev->dev_lock);
2136 return 0;
2137}
2138
2139static int opal_add_user_to_lr(struct opal_dev *dev,
2140 struct opal_lock_unlock *lk_unlk)
2141{
2142 void *func_data[3] = { NULL };
2143 static const opal_step funcs[] = {
2144 opal_discovery0,
2145 start_admin1LSP_opal_session,
2146 add_user_to_lr,
2147 end_opal_session,
2148 NULL
2149 };
2150 int ret;
2151
2152 if (lk_unlk->l_state != OPAL_RO &&
2153 lk_unlk->l_state != OPAL_RW) {
2154 pr_err("Locking state was not RO or RW\n");
2155 return -EINVAL;
2156 }
2157 if (lk_unlk->session.who < OPAL_USER1 &&
2158 lk_unlk->session.who > OPAL_USER9) {
2159 pr_err("Authority was not within the range of users: %d\n",
2160 lk_unlk->session.who);
2161 return -EINVAL;
2162 }
2163 if (lk_unlk->session.sum) {
2164 pr_err("%s not supported in sum. Use setup locking range\n",
2165 __func__);
2166 return -EINVAL;
2167 }
2168
2169 mutex_lock(&dev->dev_lock);
2170 setup_opal_dev(dev, funcs);
2171 dev->func_data = func_data;
2172 dev->func_data[1] = &lk_unlk->session.opal_key;
2173 dev->func_data[2] = lk_unlk;
2174 ret = next(dev);
2175 mutex_unlock(&dev->dev_lock);
2176 return ret;
2177}
2178
2179static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal)
2180{
2181 void *data[2] = { NULL };
2182 static const opal_step revert_funcs[] = {
2183 opal_discovery0,
2184 start_SIDASP_opal_session,
2185 revert_tper, /* controller will terminate session */
2186 NULL,
2187 };
2188 int ret;
2189
2190 mutex_lock(&dev->dev_lock);
2191 setup_opal_dev(dev, revert_funcs);
2192 dev->func_data = data;
2193 dev->func_data[1] = opal;
2194 ret = next(dev);
2195 mutex_unlock(&dev->dev_lock);
2196 return ret;
2197}
2198
2199static int __opal_lock_unlock_sum(struct opal_dev *dev)
2200{
2201 static const opal_step ulk_funcs_sum[] = {
2202 opal_discovery0,
2203 start_auth_opal_session,
2204 lock_unlock_locking_range_sum,
2205 end_opal_session,
2206 NULL
2207 };
2208
2209 dev->funcs = ulk_funcs_sum;
2210 return next(dev);
2211}
2212
2213static int __opal_lock_unlock(struct opal_dev *dev)
2214{
2215 static const opal_step _unlock_funcs[] = {
2216 opal_discovery0,
2217 start_auth_opal_session,
2218 lock_unlock_locking_range,
2219 end_opal_session,
2220 NULL
2221 };
2222
2223 dev->funcs = _unlock_funcs;
2224 return next(dev);
2225}
2226
2227static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk)
2228{
2229 void *func_data[3] = { NULL };
2230 int ret;
2231
2232 if (lk_unlk->session.who < OPAL_ADMIN1 ||
2233 lk_unlk->session.who > OPAL_USER9)
2234 return -EINVAL;
2235
2236 mutex_lock(&dev->dev_lock);
2237 setup_opal_dev(dev, NULL);
2238 dev->func_data = func_data;
2239 dev->func_data[1] = &lk_unlk->session;
2240 dev->func_data[2] = lk_unlk;
2241
2242 if (lk_unlk->session.sum)
2243 ret = __opal_lock_unlock_sum(dev);
2244 else
2245 ret = __opal_lock_unlock(dev);
2246
2247 mutex_unlock(&dev->dev_lock);
2248 return ret;
2249}
2250
2251static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal)
2252{
2253 static const opal_step owner_funcs[] = {
2254 opal_discovery0,
2255 start_anybodyASP_opal_session,
2256 get_msid_cpin_pin,
2257 end_opal_session,
2258 start_SIDASP_opal_session,
2259 set_sid_cpin_pin,
2260 end_opal_session,
2261 NULL
2262 };
2263 void *data[6] = { NULL };
2264 int ret;
2265
2266 if (!dev)
2267 return -ENODEV;
2268
2269 mutex_lock(&dev->dev_lock);
2270 setup_opal_dev(dev, owner_funcs);
2271 dev->func_data = data;
2272 dev->func_data[4] = opal;
2273 dev->func_data[5] = opal;
2274 ret = next(dev);
2275 mutex_unlock(&dev->dev_lock);
2276 return ret;
2277}
2278
2279static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act)
2280{
2281 void *data[4] = { NULL };
2282 static const opal_step active_funcs[] = {
2283 opal_discovery0,
2284 start_SIDASP_opal_session, /* Open session as SID auth */
2285 get_lsp_lifecycle,
2286 activate_lsp,
2287 end_opal_session,
2288 NULL
2289 };
2290 int ret;
2291
2292 if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS)
2293 return -EINVAL;
2294
2295 mutex_lock(&dev->dev_lock);
2296 setup_opal_dev(dev, active_funcs);
2297 dev->func_data = data;
2298 dev->func_data[1] = &opal_lr_act->key;
2299 dev->func_data[3] = opal_lr_act;
2300 ret = next(dev);
2301 mutex_unlock(&dev->dev_lock);
2302 return ret;
2303}
2304
2305static int opal_setup_locking_range(struct opal_dev *dev,
2306 struct opal_user_lr_setup *opal_lrs)
2307{
2308 void *data[3] = { NULL };
2309 static const opal_step lr_funcs[] = {
2310 opal_discovery0,
2311 start_auth_opal_session,
2312 setup_locking_range,
2313 end_opal_session,
2314 NULL,
2315 };
2316 int ret;
2317
2318 mutex_lock(&dev->dev_lock);
2319 setup_opal_dev(dev, lr_funcs);
2320 dev->func_data = data;
2321 dev->func_data[1] = &opal_lrs->session;
2322 dev->func_data[2] = opal_lrs;
2323 ret = next(dev);
2324 mutex_unlock(&dev->dev_lock);
2325 return ret;
2326}
2327
2328static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw)
2329{
2330 static const opal_step pw_funcs[] = {
2331 opal_discovery0,
2332 start_auth_opal_session,
2333 set_new_pw,
2334 end_opal_session,
2335 NULL
2336 };
2337 void *data[3] = { NULL };
2338 int ret;
2339
2340 if (opal_pw->session.who < OPAL_ADMIN1 ||
2341 opal_pw->session.who > OPAL_USER9 ||
2342 opal_pw->new_user_pw.who < OPAL_ADMIN1 ||
2343 opal_pw->new_user_pw.who > OPAL_USER9)
2344 return -EINVAL;
2345
2346 mutex_lock(&dev->dev_lock);
2347 setup_opal_dev(dev, pw_funcs);
2348 dev->func_data = data;
2349 dev->func_data[1] = (void *) &opal_pw->session;
2350 dev->func_data[2] = (void *) &opal_pw->new_user_pw;
2351
2352 ret = next(dev);
2353 mutex_unlock(&dev->dev_lock);
2354 return ret;
2355}
2356
2357static int opal_activate_user(struct opal_dev *dev,
2358 struct opal_session_info *opal_session)
2359{
2360 static const opal_step act_funcs[] = {
2361 opal_discovery0,
2362 start_admin1LSP_opal_session,
2363 internal_activate_user,
2364 end_opal_session,
2365 NULL
2366 };
2367 void *data[3] = { NULL };
2368 int ret;
2369
2370 /* We can't activate Admin1 it's active as manufactured */
2371 if (opal_session->who < OPAL_USER1 &&
2372 opal_session->who > OPAL_USER9) {
2373 pr_err("Who was not a valid user: %d\n", opal_session->who);
2374 return -EINVAL;
2375 }
2376
2377 mutex_lock(&dev->dev_lock);
2378 setup_opal_dev(dev, act_funcs);
2379 dev->func_data = data;
2380 dev->func_data[1] = &opal_session->opal_key;
2381 dev->func_data[2] = opal_session;
2382 ret = next(dev);
2383 mutex_unlock(&dev->dev_lock);
2384 return ret;
2385}
2386
2387bool opal_unlock_from_suspend(struct opal_dev *dev)
2388{
2389 struct opal_suspend_data *suspend;
2390 void *func_data[3] = { NULL };
2391 bool was_failure = false;
2392 int ret = 0;
2393
2394 if (!dev)
2395 return false;
2396 if (!dev->supported)
2397 return false;
2398
2399 mutex_lock(&dev->dev_lock);
2400 setup_opal_dev(dev, NULL);
2401 dev->func_data = func_data;
2402
2403 list_for_each_entry(suspend, &dev->unlk_lst, node) {
2404 dev->state = 0;
2405 dev->func_data[1] = &suspend->unlk.session;
2406 dev->func_data[2] = &suspend->unlk;
2407 dev->tsn = 0;
2408 dev->hsn = 0;
2409
2410 if (suspend->unlk.session.sum)
2411 ret = __opal_lock_unlock_sum(dev);
2412 else
2413 ret = __opal_lock_unlock(dev);
2414 if (ret) {
2415 pr_warn("Failed to unlock LR %hhu with sum %d\n",
2416 suspend->unlk.session.opal_key.lr,
2417 suspend->unlk.session.sum);
2418 was_failure = true;
2419 }
2420 }
2421 mutex_unlock(&dev->dev_lock);
2422 return was_failure;
2423}
2424EXPORT_SYMBOL(opal_unlock_from_suspend);
2425
2426int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
2427{
2428 void *p;
2429 int ret = -ENOTTY;
2430
2431 if (!capable(CAP_SYS_ADMIN))
2432 return -EACCES;
2433 if (!dev)
2434 return -ENOTSUPP;
2435 if (!dev->supported) {
2436 pr_err("Not supported\n");
2437 return -ENOTSUPP;
2438 }
2439
2440 p = memdup_user(arg, _IOC_SIZE(cmd));
2441 if (IS_ERR(p))
2442 return PTR_ERR(p);
2443
2444 switch (cmd) {
2445 case IOC_OPAL_SAVE:
2446 ret = opal_save(dev, p);
2447 break;
2448 case IOC_OPAL_LOCK_UNLOCK:
2449 ret = opal_lock_unlock(dev, p);
2450 break;
2451 case IOC_OPAL_TAKE_OWNERSHIP:
2452 ret = opal_take_ownership(dev, p);
2453 break;
2454 case IOC_OPAL_ACTIVATE_LSP:
2455 ret = opal_activate_lsp(dev, p);
2456 break;
2457 case IOC_OPAL_SET_PW:
2458 ret = opal_set_new_pw(dev, p);
2459 break;
2460 case IOC_OPAL_ACTIVATE_USR:
2461 ret = opal_activate_user(dev, p);
2462 break;
2463 case IOC_OPAL_REVERT_TPR:
2464 ret = opal_reverttper(dev, p);
2465 break;
2466 case IOC_OPAL_LR_SETUP:
2467 ret = opal_setup_locking_range(dev, p);
2468 break;
2469 case IOC_OPAL_ADD_USR_TO_LR:
2470 ret = opal_add_user_to_lr(dev, p);
2471 break;
2472 case IOC_OPAL_ENABLE_DISABLE_MBR:
2473 ret = opal_enable_disable_shadow_mbr(dev, p);
2474 break;
2475 case IOC_OPAL_ERASE_LR:
2476 ret = opal_erase_locking_range(dev, p);
2477 break;
2478 case IOC_OPAL_SECURE_ERASE_LR:
2479 ret = opal_secure_erase_locking_range(dev, p);
2480 break;
2481 default:
2482 pr_warn("No such Opal Ioctl %u\n", cmd);
2483 }
2484
2485 kfree(p);
2486 return ret;
2487}
2488EXPORT_SYMBOL_GPL(sed_ioctl);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e5c5b8eb14a9..3a44438a1195 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4074,41 +4074,27 @@ clean_up:
4074 4074
4075static void cciss_interrupt_mode(ctlr_info_t *h) 4075static void cciss_interrupt_mode(ctlr_info_t *h)
4076{ 4076{
4077#ifdef CONFIG_PCI_MSI 4077 int ret;
4078 int err;
4079 struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
4080 {0, 2}, {0, 3}
4081 };
4082 4078
4083 /* Some boards advertise MSI but don't really support it */ 4079 /* Some boards advertise MSI but don't really support it */
4084 if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || 4080 if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
4085 (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11)) 4081 (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
4086 goto default_int_mode; 4082 goto default_int_mode;
4087 4083
4088 if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { 4084 ret = pci_alloc_irq_vectors(h->pdev, 4, 4, PCI_IRQ_MSIX);
4089 err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); 4085 if (ret >= 0) {
4090 if (!err) { 4086 h->intr[0] = pci_irq_vector(h->pdev, 0);
4091 h->intr[0] = cciss_msix_entries[0].vector; 4087 h->intr[1] = pci_irq_vector(h->pdev, 1);
4092 h->intr[1] = cciss_msix_entries[1].vector; 4088 h->intr[2] = pci_irq_vector(h->pdev, 2);
4093 h->intr[2] = cciss_msix_entries[2].vector; 4089 h->intr[3] = pci_irq_vector(h->pdev, 3);
4094 h->intr[3] = cciss_msix_entries[3].vector; 4090 return;
4095 h->msix_vector = 1;
4096 return;
4097 } else {
4098 dev_warn(&h->pdev->dev,
4099 "MSI-X init failed %d\n", err);
4100 }
4101 }
4102 if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
4103 if (!pci_enable_msi(h->pdev))
4104 h->msi_vector = 1;
4105 else
4106 dev_warn(&h->pdev->dev, "MSI init failed\n");
4107 } 4091 }
4092
4093 ret = pci_alloc_irq_vectors(h->pdev, 1, 1, PCI_IRQ_MSI);
4094
4108default_int_mode: 4095default_int_mode:
4109#endif /* CONFIG_PCI_MSI */
4110 /* if we get here we're going to use the default interrupt mode */ 4096 /* if we get here we're going to use the default interrupt mode */
4111 h->intr[h->intr_mode] = h->pdev->irq; 4097 h->intr[h->intr_mode] = pci_irq_vector(h->pdev, 0);
4112 return; 4098 return;
4113} 4099}
4114 4100
@@ -4888,7 +4874,7 @@ static int cciss_request_irq(ctlr_info_t *h,
4888 irqreturn_t (*msixhandler)(int, void *), 4874 irqreturn_t (*msixhandler)(int, void *),
4889 irqreturn_t (*intxhandler)(int, void *)) 4875 irqreturn_t (*intxhandler)(int, void *))
4890{ 4876{
4891 if (h->msix_vector || h->msi_vector) { 4877 if (h->pdev->msi_enabled || h->pdev->msix_enabled) {
4892 if (!request_irq(h->intr[h->intr_mode], msixhandler, 4878 if (!request_irq(h->intr[h->intr_mode], msixhandler,
4893 0, h->devname, h)) 4879 0, h->devname, h))
4894 return 0; 4880 return 0;
@@ -4934,12 +4920,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
4934 int ctlr = h->ctlr; 4920 int ctlr = h->ctlr;
4935 4921
4936 free_irq(h->intr[h->intr_mode], h); 4922 free_irq(h->intr[h->intr_mode], h);
4937#ifdef CONFIG_PCI_MSI 4923 pci_free_irq_vectors(h->pdev);
4938 if (h->msix_vector)
4939 pci_disable_msix(h->pdev);
4940 else if (h->msi_vector)
4941 pci_disable_msi(h->pdev);
4942#endif /* CONFIG_PCI_MSI */
4943 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); 4924 cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
4944 cciss_free_scatterlists(h); 4925 cciss_free_scatterlists(h);
4945 cciss_free_cmd_pool(h); 4926 cciss_free_cmd_pool(h);
@@ -5295,12 +5276,7 @@ static void cciss_remove_one(struct pci_dev *pdev)
5295 5276
5296 cciss_shutdown(pdev); 5277 cciss_shutdown(pdev);
5297 5278
5298#ifdef CONFIG_PCI_MSI 5279 pci_free_irq_vectors(h->pdev);
5299 if (h->msix_vector)
5300 pci_disable_msix(h->pdev);
5301 else if (h->msi_vector)
5302 pci_disable_msi(h->pdev);
5303#endif /* CONFIG_PCI_MSI */
5304 5280
5305 iounmap(h->transtable); 5281 iounmap(h->transtable);
5306 iounmap(h->cfgtable); 5282 iounmap(h->cfgtable);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 7fda30e4a241..4affa94ca17b 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -90,8 +90,6 @@ struct ctlr_info
90# define SIMPLE_MODE_INT 2 90# define SIMPLE_MODE_INT 2
91# define MEMQ_MODE_INT 3 91# define MEMQ_MODE_INT 3
92 unsigned int intr[4]; 92 unsigned int intr[4];
93 unsigned int msix_vector;
94 unsigned int msi_vector;
95 int intr_mode; 93 int intr_mode;
96 int cciss_max_sectors; 94 int cciss_max_sectors;
97 BYTE cciss_read; 95 BYTE cciss_read;
@@ -333,7 +331,7 @@ static unsigned long SA5_performant_completed(ctlr_info_t *h)
333 */ 331 */
334 register_value = readl(h->vaddr + SA5_OUTDB_STATUS); 332 register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
335 /* msi auto clears the interrupt pending bit. */ 333 /* msi auto clears the interrupt pending bit. */
336 if (!(h->msi_vector || h->msix_vector)) { 334 if (!(h->pdev->msi_enabled || h->pdev->msix_enabled)) {
337 writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); 335 writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
338 /* Do a read in order to flush the write to the controller 336 /* Do a read in order to flush the write to the controller
339 * (as per spec.) 337 * (as per spec.)
@@ -393,7 +391,7 @@ static bool SA5_performant_intr_pending(ctlr_info_t *h)
393 if (!register_value) 391 if (!register_value)
394 return false; 392 return false;
395 393
396 if (h->msi_vector || h->msix_vector) 394 if (h->pdev->msi_enabled || h->pdev->msix_enabled)
397 return true; 395 return true;
398 396
399 /* Read outbound doorbell to flush */ 397 /* Read outbound doorbell to flush */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index a391a3cfb3fe..184887af4b9f 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param,
3119 *rcmd = NULL; 3119 *rcmd = NULL;
3120 3120
3121loop: 3121loop:
3122 ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER); 3122 ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL);
3123 if (!ptr) 3123 if (!ptr)
3124 return -ENOMEM; 3124 return -ENOMEM;
3125 *rcmd = ptr; 3125 *rcmd = ptr;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f347285c67ec..304377182c1a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1097 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 1097 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1098 return -EINVAL; 1098 return -EINVAL;
1099 1099
1100 /* I/O need to be drained during transfer transition */
1101 blk_mq_freeze_queue(lo->lo_queue);
1102
1100 err = loop_release_xfer(lo); 1103 err = loop_release_xfer(lo);
1101 if (err) 1104 if (err)
1102 return err; 1105 goto exit;
1103 1106
1104 if (info->lo_encrypt_type) { 1107 if (info->lo_encrypt_type) {
1105 unsigned int type = info->lo_encrypt_type; 1108 unsigned int type = info->lo_encrypt_type;
@@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1114 1117
1115 err = loop_init_xfer(lo, xfer, info); 1118 err = loop_init_xfer(lo, xfer, info);
1116 if (err) 1119 if (err)
1117 return err; 1120 goto exit;
1118 1121
1119 if (lo->lo_offset != info->lo_offset || 1122 if (lo->lo_offset != info->lo_offset ||
1120 lo->lo_sizelimit != info->lo_sizelimit) 1123 lo->lo_sizelimit != info->lo_sizelimit)
1121 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) 1124 if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
1122 return -EFBIG; 1125 err = -EFBIG;
1126 goto exit;
1127 }
1123 1128
1124 loop_config_discard(lo); 1129 loop_config_discard(lo);
1125 1130
@@ -1156,7 +1161,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1156 /* update dio if lo_offset or transfer is changed */ 1161 /* update dio if lo_offset or transfer is changed */
1157 __loop_update_dio(lo, lo->use_dio); 1162 __loop_update_dio(lo, lo->use_dio);
1158 1163
1159 return 0; 1164 exit:
1165 blk_mq_unfreeze_queue(lo->lo_queue);
1166 return err;
1160} 1167}
1161 1168
1162static int 1169static int
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index c0e14e54909b..a67b7ea1e3bf 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -420,7 +420,8 @@ static void null_lnvm_end_io(struct request *rq, int error)
420{ 420{
421 struct nvm_rq *rqd = rq->end_io_data; 421 struct nvm_rq *rqd = rq->end_io_data;
422 422
423 nvm_end_io(rqd, error); 423 rqd->error = error;
424 nvm_end_io(rqd);
424 425
425 blk_put_request(rq); 426 blk_put_request(rq);
426} 427}
@@ -460,7 +461,6 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
460 461
461 id->ver_id = 0x1; 462 id->ver_id = 0x1;
462 id->vmnt = 0; 463 id->vmnt = 0;
463 id->cgrps = 1;
464 id->cap = 0x2; 464 id->cap = 0x2;
465 id->dom = 0x1; 465 id->dom = 0x1;
466 466
@@ -479,7 +479,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
479 479
480 sector_div(size, bs); /* convert size to pages */ 480 sector_div(size, bs); /* convert size to pages */
481 size >>= 8; /* concert size to pgs pr blk */ 481 size >>= 8; /* concert size to pgs pr blk */
482 grp = &id->groups[0]; 482 grp = &id->grp;
483 grp->mtype = 0; 483 grp->mtype = 0;
484 grp->fmtype = 0; 484 grp->fmtype = 0;
485 grp->num_ch = 1; 485 grp->num_ch = 1;
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 5fd2d0e25567..10aed84244f5 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -273,7 +273,7 @@ static const struct block_device_operations pcd_bdops = {
273 .check_events = pcd_block_check_events, 273 .check_events = pcd_block_check_events,
274}; 274};
275 275
276static struct cdrom_device_ops pcd_dops = { 276static const struct cdrom_device_ops pcd_dops = {
277 .open = pcd_open, 277 .open = pcd_open,
278 .release = pcd_release, 278 .release = pcd_release,
279 .drive_status = pcd_drive_status, 279 .drive_status = pcd_drive_status,
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 59cca72647a6..bbbd3caa927c 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -342,8 +342,8 @@ static void cdrom_sysctl_register(void);
342 342
343static LIST_HEAD(cdrom_list); 343static LIST_HEAD(cdrom_list);
344 344
345static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, 345int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
346 struct packet_command *cgc) 346 struct packet_command *cgc)
347{ 347{
348 if (cgc->sense) { 348 if (cgc->sense) {
349 cgc->sense->sense_key = 0x05; 349 cgc->sense->sense_key = 0x05;
@@ -354,6 +354,7 @@ static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
354 cgc->stat = -EIO; 354 cgc->stat = -EIO;
355 return -EIO; 355 return -EIO;
356} 356}
357EXPORT_SYMBOL(cdrom_dummy_generic_packet);
357 358
358static int cdrom_flush_cache(struct cdrom_device_info *cdi) 359static int cdrom_flush_cache(struct cdrom_device_info *cdi)
359{ 360{
@@ -371,7 +372,7 @@ static int cdrom_flush_cache(struct cdrom_device_info *cdi)
371static int cdrom_get_disc_info(struct cdrom_device_info *cdi, 372static int cdrom_get_disc_info(struct cdrom_device_info *cdi,
372 disc_information *di) 373 disc_information *di)
373{ 374{
374 struct cdrom_device_ops *cdo = cdi->ops; 375 const struct cdrom_device_ops *cdo = cdi->ops;
375 struct packet_command cgc; 376 struct packet_command cgc;
376 int ret, buflen; 377 int ret, buflen;
377 378
@@ -586,7 +587,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space)
586int register_cdrom(struct cdrom_device_info *cdi) 587int register_cdrom(struct cdrom_device_info *cdi)
587{ 588{
588 static char banner_printed; 589 static char banner_printed;
589 struct cdrom_device_ops *cdo = cdi->ops; 590 const struct cdrom_device_ops *cdo = cdi->ops;
590 int *change_capability = (int *)&cdo->capability; /* hack */ 591 int *change_capability = (int *)&cdo->capability; /* hack */
591 592
592 cd_dbg(CD_OPEN, "entering register_cdrom\n"); 593 cd_dbg(CD_OPEN, "entering register_cdrom\n");
@@ -610,7 +611,6 @@ int register_cdrom(struct cdrom_device_info *cdi)
610 ENSURE(reset, CDC_RESET); 611 ENSURE(reset, CDC_RESET);
611 ENSURE(generic_packet, CDC_GENERIC_PACKET); 612 ENSURE(generic_packet, CDC_GENERIC_PACKET);
612 cdi->mc_flags = 0; 613 cdi->mc_flags = 0;
613 cdo->n_minors = 0;
614 cdi->options = CDO_USE_FFLAGS; 614 cdi->options = CDO_USE_FFLAGS;
615 615
616 if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY)) 616 if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY))
@@ -630,8 +630,7 @@ int register_cdrom(struct cdrom_device_info *cdi)
630 else 630 else
631 cdi->cdda_method = CDDA_OLD; 631 cdi->cdda_method = CDDA_OLD;
632 632
633 if (!cdo->generic_packet) 633 WARN_ON(!cdo->generic_packet);
634 cdo->generic_packet = cdrom_dummy_generic_packet;
635 634
636 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); 635 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name);
637 mutex_lock(&cdrom_mutex); 636 mutex_lock(&cdrom_mutex);
@@ -652,7 +651,6 @@ void unregister_cdrom(struct cdrom_device_info *cdi)
652 if (cdi->exit) 651 if (cdi->exit)
653 cdi->exit(cdi); 652 cdi->exit(cdi);
654 653
655 cdi->ops->n_minors--;
656 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); 654 cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name);
657} 655}
658 656
@@ -1036,7 +1034,7 @@ static
1036int open_for_data(struct cdrom_device_info *cdi) 1034int open_for_data(struct cdrom_device_info *cdi)
1037{ 1035{
1038 int ret; 1036 int ret;
1039 struct cdrom_device_ops *cdo = cdi->ops; 1037 const struct cdrom_device_ops *cdo = cdi->ops;
1040 tracktype tracks; 1038 tracktype tracks;
1041 cd_dbg(CD_OPEN, "entering open_for_data\n"); 1039 cd_dbg(CD_OPEN, "entering open_for_data\n");
1042 /* Check if the driver can report drive status. If it can, we 1040 /* Check if the driver can report drive status. If it can, we
@@ -1198,8 +1196,8 @@ err:
1198/* This code is similar to that in open_for_data. The routine is called 1196/* This code is similar to that in open_for_data. The routine is called
1199 whenever an audio play operation is requested. 1197 whenever an audio play operation is requested.
1200*/ 1198*/
1201static int check_for_audio_disc(struct cdrom_device_info * cdi, 1199static int check_for_audio_disc(struct cdrom_device_info *cdi,
1202 struct cdrom_device_ops * cdo) 1200 const struct cdrom_device_ops *cdo)
1203{ 1201{
1204 int ret; 1202 int ret;
1205 tracktype tracks; 1203 tracktype tracks;
@@ -1254,7 +1252,7 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi,
1254 1252
1255void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) 1253void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
1256{ 1254{
1257 struct cdrom_device_ops *cdo = cdi->ops; 1255 const struct cdrom_device_ops *cdo = cdi->ops;
1258 int opened_for_data; 1256 int opened_for_data;
1259 1257
1260 cd_dbg(CD_CLOSE, "entering cdrom_release\n"); 1258 cd_dbg(CD_CLOSE, "entering cdrom_release\n");
@@ -1294,7 +1292,7 @@ static int cdrom_read_mech_status(struct cdrom_device_info *cdi,
1294 struct cdrom_changer_info *buf) 1292 struct cdrom_changer_info *buf)
1295{ 1293{
1296 struct packet_command cgc; 1294 struct packet_command cgc;
1297 struct cdrom_device_ops *cdo = cdi->ops; 1295 const struct cdrom_device_ops *cdo = cdi->ops;
1298 int length; 1296 int length;
1299 1297
1300 /* 1298 /*
@@ -1643,7 +1641,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1643 int ret; 1641 int ret;
1644 u_char buf[20]; 1642 u_char buf[20];
1645 struct packet_command cgc; 1643 struct packet_command cgc;
1646 struct cdrom_device_ops *cdo = cdi->ops; 1644 const struct cdrom_device_ops *cdo = cdi->ops;
1647 rpc_state_t rpc_state; 1645 rpc_state_t rpc_state;
1648 1646
1649 memset(buf, 0, sizeof(buf)); 1647 memset(buf, 0, sizeof(buf));
@@ -1791,7 +1789,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s,
1791{ 1789{
1792 unsigned char buf[21], *base; 1790 unsigned char buf[21], *base;
1793 struct dvd_layer *layer; 1791 struct dvd_layer *layer;
1794 struct cdrom_device_ops *cdo = cdi->ops; 1792 const struct cdrom_device_ops *cdo = cdi->ops;
1795 int ret, layer_num = s->physical.layer_num; 1793 int ret, layer_num = s->physical.layer_num;
1796 1794
1797 if (layer_num >= DVD_LAYERS) 1795 if (layer_num >= DVD_LAYERS)
@@ -1842,7 +1840,7 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s,
1842{ 1840{
1843 int ret; 1841 int ret;
1844 u_char buf[8]; 1842 u_char buf[8];
1845 struct cdrom_device_ops *cdo = cdi->ops; 1843 const struct cdrom_device_ops *cdo = cdi->ops;
1846 1844
1847 init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); 1845 init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ);
1848 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1846 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
@@ -1866,7 +1864,7 @@ static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s,
1866{ 1864{
1867 int ret, size; 1865 int ret, size;
1868 u_char *buf; 1866 u_char *buf;
1869 struct cdrom_device_ops *cdo = cdi->ops; 1867 const struct cdrom_device_ops *cdo = cdi->ops;
1870 1868
1871 size = sizeof(s->disckey.value) + 4; 1869 size = sizeof(s->disckey.value) + 4;
1872 1870
@@ -1894,7 +1892,7 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s,
1894{ 1892{
1895 int ret, size = 4 + 188; 1893 int ret, size = 4 + 188;
1896 u_char *buf; 1894 u_char *buf;
1897 struct cdrom_device_ops *cdo = cdi->ops; 1895 const struct cdrom_device_ops *cdo = cdi->ops;
1898 1896
1899 buf = kmalloc(size, GFP_KERNEL); 1897 buf = kmalloc(size, GFP_KERNEL);
1900 if (!buf) 1898 if (!buf)
@@ -1928,7 +1926,7 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
1928{ 1926{
1929 int ret = 0, size; 1927 int ret = 0, size;
1930 u_char *buf; 1928 u_char *buf;
1931 struct cdrom_device_ops *cdo = cdi->ops; 1929 const struct cdrom_device_ops *cdo = cdi->ops;
1932 1930
1933 size = sizeof(s->manufact.value) + 4; 1931 size = sizeof(s->manufact.value) + 4;
1934 1932
@@ -1995,7 +1993,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
1995 struct packet_command *cgc, 1993 struct packet_command *cgc,
1996 int page_code, int page_control) 1994 int page_code, int page_control)
1997{ 1995{
1998 struct cdrom_device_ops *cdo = cdi->ops; 1996 const struct cdrom_device_ops *cdo = cdi->ops;
1999 1997
2000 memset(cgc->cmd, 0, sizeof(cgc->cmd)); 1998 memset(cgc->cmd, 0, sizeof(cgc->cmd));
2001 1999
@@ -2010,7 +2008,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi,
2010int cdrom_mode_select(struct cdrom_device_info *cdi, 2008int cdrom_mode_select(struct cdrom_device_info *cdi,
2011 struct packet_command *cgc) 2009 struct packet_command *cgc)
2012{ 2010{
2013 struct cdrom_device_ops *cdo = cdi->ops; 2011 const struct cdrom_device_ops *cdo = cdi->ops;
2014 2012
2015 memset(cgc->cmd, 0, sizeof(cgc->cmd)); 2013 memset(cgc->cmd, 0, sizeof(cgc->cmd));
2016 memset(cgc->buffer, 0, 2); 2014 memset(cgc->buffer, 0, 2);
@@ -2025,7 +2023,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi,
2025static int cdrom_read_subchannel(struct cdrom_device_info *cdi, 2023static int cdrom_read_subchannel(struct cdrom_device_info *cdi,
2026 struct cdrom_subchnl *subchnl, int mcn) 2024 struct cdrom_subchnl *subchnl, int mcn)
2027{ 2025{
2028 struct cdrom_device_ops *cdo = cdi->ops; 2026 const struct cdrom_device_ops *cdo = cdi->ops;
2029 struct packet_command cgc; 2027 struct packet_command cgc;
2030 char buffer[32]; 2028 char buffer[32];
2031 int ret; 2029 int ret;
@@ -2073,7 +2071,7 @@ static int cdrom_read_cd(struct cdrom_device_info *cdi,
2073 struct packet_command *cgc, int lba, 2071 struct packet_command *cgc, int lba,
2074 int blocksize, int nblocks) 2072 int blocksize, int nblocks)
2075{ 2073{
2076 struct cdrom_device_ops *cdo = cdi->ops; 2074 const struct cdrom_device_ops *cdo = cdi->ops;
2077 2075
2078 memset(&cgc->cmd, 0, sizeof(cgc->cmd)); 2076 memset(&cgc->cmd, 0, sizeof(cgc->cmd));
2079 cgc->cmd[0] = GPCMD_READ_10; 2077 cgc->cmd[0] = GPCMD_READ_10;
@@ -2093,7 +2091,7 @@ static int cdrom_read_block(struct cdrom_device_info *cdi,
2093 struct packet_command *cgc, 2091 struct packet_command *cgc,
2094 int lba, int nblocks, int format, int blksize) 2092 int lba, int nblocks, int format, int blksize)
2095{ 2093{
2096 struct cdrom_device_ops *cdo = cdi->ops; 2094 const struct cdrom_device_ops *cdo = cdi->ops;
2097 2095
2098 memset(&cgc->cmd, 0, sizeof(cgc->cmd)); 2096 memset(&cgc->cmd, 0, sizeof(cgc->cmd));
2099 cgc->cmd[0] = GPCMD_READ_CD; 2097 cgc->cmd[0] = GPCMD_READ_CD;
@@ -2764,7 +2762,7 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi,
2764 */ 2762 */
2765static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) 2763static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
2766{ 2764{
2767 struct cdrom_device_ops *cdo = cdi->ops; 2765 const struct cdrom_device_ops *cdo = cdi->ops;
2768 struct packet_command cgc; 2766 struct packet_command cgc;
2769 struct modesel_head mh; 2767 struct modesel_head mh;
2770 2768
@@ -2790,7 +2788,7 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
2790static int cdrom_get_track_info(struct cdrom_device_info *cdi, 2788static int cdrom_get_track_info(struct cdrom_device_info *cdi,
2791 __u16 track, __u8 type, track_information *ti) 2789 __u16 track, __u8 type, track_information *ti)
2792{ 2790{
2793 struct cdrom_device_ops *cdo = cdi->ops; 2791 const struct cdrom_device_ops *cdo = cdi->ops;
2794 struct packet_command cgc; 2792 struct packet_command cgc;
2795 int ret, buflen; 2793 int ret, buflen;
2796 2794
@@ -3049,7 +3047,7 @@ static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
3049 void __user *arg, 3047 void __user *arg,
3050 struct packet_command *cgc) 3048 struct packet_command *cgc)
3051{ 3049{
3052 struct cdrom_device_ops *cdo = cdi->ops; 3050 const struct cdrom_device_ops *cdo = cdi->ops;
3053 struct cdrom_msf msf; 3051 struct cdrom_msf msf;
3054 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); 3052 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
3055 if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) 3053 if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf)))
@@ -3069,7 +3067,7 @@ static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
3069 void __user *arg, 3067 void __user *arg,
3070 struct packet_command *cgc) 3068 struct packet_command *cgc)
3071{ 3069{
3072 struct cdrom_device_ops *cdo = cdi->ops; 3070 const struct cdrom_device_ops *cdo = cdi->ops;
3073 struct cdrom_blk blk; 3071 struct cdrom_blk blk;
3074 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); 3072 cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
3075 if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk))) 3073 if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk)))
@@ -3164,7 +3162,7 @@ static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
3164 struct packet_command *cgc, 3162 struct packet_command *cgc,
3165 int cmd) 3163 int cmd)
3166{ 3164{
3167 struct cdrom_device_ops *cdo = cdi->ops; 3165 const struct cdrom_device_ops *cdo = cdi->ops;
3168 cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); 3166 cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
3169 cgc->cmd[0] = GPCMD_START_STOP_UNIT; 3167 cgc->cmd[0] = GPCMD_START_STOP_UNIT;
3170 cgc->cmd[1] = 1; 3168 cgc->cmd[1] = 1;
@@ -3177,7 +3175,7 @@ static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
3177 struct packet_command *cgc, 3175 struct packet_command *cgc,
3178 int cmd) 3176 int cmd)
3179{ 3177{
3180 struct cdrom_device_ops *cdo = cdi->ops; 3178 const struct cdrom_device_ops *cdo = cdi->ops;
3181 cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); 3179 cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
3182 cgc->cmd[0] = GPCMD_PAUSE_RESUME; 3180 cgc->cmd[0] = GPCMD_PAUSE_RESUME;
3183 cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; 3181 cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 584bc3126403..1afab6558d0c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -481,7 +481,7 @@ static int gdrom_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
481 return -EINVAL; 481 return -EINVAL;
482} 482}
483 483
484static struct cdrom_device_ops gdrom_ops = { 484static const struct cdrom_device_ops gdrom_ops = {
485 .open = gdrom_open, 485 .open = gdrom_open,
486 .release = gdrom_release, 486 .release = gdrom_release,
487 .drive_status = gdrom_drivestatus, 487 .drive_status = gdrom_drivestatus,
@@ -489,9 +489,9 @@ static struct cdrom_device_ops gdrom_ops = {
489 .get_last_session = gdrom_get_last_session, 489 .get_last_session = gdrom_get_last_session,
490 .reset = gdrom_hardreset, 490 .reset = gdrom_hardreset,
491 .audio_ioctl = gdrom_audio_ioctl, 491 .audio_ioctl = gdrom_audio_ioctl,
492 .generic_packet = cdrom_dummy_generic_packet,
492 .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED | 493 .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED |
493 CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R, 494 CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R,
494 .n_minors = 1,
495}; 495};
496 496
497static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) 497static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
@@ -807,16 +807,20 @@ static int probe_gdrom(struct platform_device *devptr)
807 if (err) 807 if (err)
808 goto probe_fail_cmdirq_register; 808 goto probe_fail_cmdirq_register;
809 gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); 809 gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock);
810 if (!gd.gdrom_rq) 810 if (!gd.gdrom_rq) {
811 err = -ENOMEM;
811 goto probe_fail_requestq; 812 goto probe_fail_requestq;
813 }
812 814
813 err = probe_gdrom_setupqueue(); 815 err = probe_gdrom_setupqueue();
814 if (err) 816 if (err)
815 goto probe_fail_toc; 817 goto probe_fail_toc;
816 818
817 gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL); 819 gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL);
818 if (!gd.toc) 820 if (!gd.toc) {
821 err = -ENOMEM;
819 goto probe_fail_toc; 822 goto probe_fail_toc;
823 }
820 add_disk(gd.disk); 824 add_disk(gd.disk);
821 return 0; 825 return 0;
822 826
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 9cbd217bc0c9..ab9232e1e16f 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1166,7 +1166,7 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
1166 CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \ 1166 CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \
1167 CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM) 1167 CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM)
1168 1168
1169static struct cdrom_device_ops ide_cdrom_dops = { 1169static const struct cdrom_device_ops ide_cdrom_dops = {
1170 .open = ide_cdrom_open_real, 1170 .open = ide_cdrom_open_real,
1171 .release = ide_cdrom_release_real, 1171 .release = ide_cdrom_release_real,
1172 .drive_status = ide_cdrom_drive_status, 1172 .drive_status = ide_cdrom_drive_status,
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 2f5d5f4a4c75..052714106b7b 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -26,15 +26,6 @@ config NVM_DEBUG
26 26
27 It is required to create/remove targets without IOCTLs. 27 It is required to create/remove targets without IOCTLs.
28 28
29config NVM_GENNVM
30 tristate "General Non-Volatile Memory Manager for Open-Channel SSDs"
31 ---help---
32 Non-volatile memory media manager for Open-Channel SSDs that implements
33 physical media metadata management and block provisioning API.
34
35 This is the standard media manager for using Open-Channel SSDs, and
36 required for targets to be instantiated.
37
38config NVM_RRPC 29config NVM_RRPC
39 tristate "Round-robin Hybrid Open-Channel SSD target" 30 tristate "Round-robin Hybrid Open-Channel SSD target"
40 ---help--- 31 ---help---
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index a7a0a22cf1a5..b2a39e2d2895 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -2,6 +2,5 @@
2# Makefile for Open-Channel SSDs. 2# Makefile for Open-Channel SSDs.
3# 3#
4 4
5obj-$(CONFIG_NVM) := core.o sysblk.o 5obj-$(CONFIG_NVM) := core.o
6obj-$(CONFIG_NVM_GENNVM) += gennvm.o
7obj-$(CONFIG_NVM_RRPC) += rrpc.o 6obj-$(CONFIG_NVM_RRPC) += rrpc.o
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 02240a0b39c9..5262ba66a7a7 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -29,10 +29,483 @@
29 29
30static LIST_HEAD(nvm_tgt_types); 30static LIST_HEAD(nvm_tgt_types);
31static DECLARE_RWSEM(nvm_tgtt_lock); 31static DECLARE_RWSEM(nvm_tgtt_lock);
32static LIST_HEAD(nvm_mgrs);
33static LIST_HEAD(nvm_devices); 32static LIST_HEAD(nvm_devices);
34static DECLARE_RWSEM(nvm_lock); 33static DECLARE_RWSEM(nvm_lock);
35 34
35/* Map between virtual and physical channel and lun */
36struct nvm_ch_map {
37 int ch_off;
38 int nr_luns;
39 int *lun_offs;
40};
41
42struct nvm_dev_map {
43 struct nvm_ch_map *chnls;
44 int nr_chnls;
45};
46
47struct nvm_area {
48 struct list_head list;
49 sector_t begin;
50 sector_t end; /* end is excluded */
51};
52
53static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
54{
55 struct nvm_target *tgt;
56
57 list_for_each_entry(tgt, &dev->targets, list)
58 if (!strcmp(name, tgt->disk->disk_name))
59 return tgt;
60
61 return NULL;
62}
63
64static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end)
65{
66 int i;
67
68 for (i = lun_begin; i <= lun_end; i++) {
69 if (test_and_set_bit(i, dev->lun_map)) {
70 pr_err("nvm: lun %d already allocated\n", i);
71 goto err;
72 }
73 }
74
75 return 0;
76err:
77 while (--i > lun_begin)
78 clear_bit(i, dev->lun_map);
79
80 return -EBUSY;
81}
82
83static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin,
84 int lun_end)
85{
86 int i;
87
88 for (i = lun_begin; i <= lun_end; i++)
89 WARN_ON(!test_and_clear_bit(i, dev->lun_map));
90}
91
92static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
93{
94 struct nvm_dev *dev = tgt_dev->parent;
95 struct nvm_dev_map *dev_map = tgt_dev->map;
96 int i, j;
97
98 for (i = 0; i < dev_map->nr_chnls; i++) {
99 struct nvm_ch_map *ch_map = &dev_map->chnls[i];
100 int *lun_offs = ch_map->lun_offs;
101 int ch = i + ch_map->ch_off;
102
103 for (j = 0; j < ch_map->nr_luns; j++) {
104 int lun = j + lun_offs[j];
105 int lunid = (ch * dev->geo.luns_per_chnl) + lun;
106
107 WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
108 }
109
110 kfree(ch_map->lun_offs);
111 }
112
113 kfree(dev_map->chnls);
114 kfree(dev_map);
115
116 kfree(tgt_dev->luns);
117 kfree(tgt_dev);
118}
119
120static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev,
121 int lun_begin, int lun_end)
122{
123 struct nvm_tgt_dev *tgt_dev = NULL;
124 struct nvm_dev_map *dev_rmap = dev->rmap;
125 struct nvm_dev_map *dev_map;
126 struct ppa_addr *luns;
127 int nr_luns = lun_end - lun_begin + 1;
128 int luns_left = nr_luns;
129 int nr_chnls = nr_luns / dev->geo.luns_per_chnl;
130 int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl;
131 int bch = lun_begin / dev->geo.luns_per_chnl;
132 int blun = lun_begin % dev->geo.luns_per_chnl;
133 int lunid = 0;
134 int lun_balanced = 1;
135 int prev_nr_luns;
136 int i, j;
137
138 nr_chnls = nr_luns / dev->geo.luns_per_chnl;
139 nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
140
141 dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
142 if (!dev_map)
143 goto err_dev;
144
145 dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map),
146 GFP_KERNEL);
147 if (!dev_map->chnls)
148 goto err_chnls;
149
150 luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
151 if (!luns)
152 goto err_luns;
153
154 prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ?
155 dev->geo.luns_per_chnl : luns_left;
156 for (i = 0; i < nr_chnls; i++) {
157 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
158 int *lun_roffs = ch_rmap->lun_offs;
159 struct nvm_ch_map *ch_map = &dev_map->chnls[i];
160 int *lun_offs;
161 int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ?
162 dev->geo.luns_per_chnl : luns_left;
163
164 if (lun_balanced && prev_nr_luns != luns_in_chnl)
165 lun_balanced = 0;
166
167 ch_map->ch_off = ch_rmap->ch_off = bch;
168 ch_map->nr_luns = luns_in_chnl;
169
170 lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
171 if (!lun_offs)
172 goto err_ch;
173
174 for (j = 0; j < luns_in_chnl; j++) {
175 luns[lunid].ppa = 0;
176 luns[lunid].g.ch = i;
177 luns[lunid++].g.lun = j;
178
179 lun_offs[j] = blun;
180 lun_roffs[j + blun] = blun;
181 }
182
183 ch_map->lun_offs = lun_offs;
184
185 /* when starting a new channel, lun offset is reset */
186 blun = 0;
187 luns_left -= luns_in_chnl;
188 }
189
190 dev_map->nr_chnls = nr_chnls;
191
192 tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
193 if (!tgt_dev)
194 goto err_ch;
195
196 memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
197 /* Target device only owns a portion of the physical device */
198 tgt_dev->geo.nr_chnls = nr_chnls;
199 tgt_dev->geo.nr_luns = nr_luns;
200 tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1;
201 tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
202 tgt_dev->q = dev->q;
203 tgt_dev->map = dev_map;
204 tgt_dev->luns = luns;
205 memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
206
207 tgt_dev->parent = dev;
208
209 return tgt_dev;
210err_ch:
211 while (--i > 0)
212 kfree(dev_map->chnls[i].lun_offs);
213 kfree(luns);
214err_luns:
215 kfree(dev_map->chnls);
216err_chnls:
217 kfree(dev_map);
218err_dev:
219 return tgt_dev;
220}
221
222static const struct block_device_operations nvm_fops = {
223 .owner = THIS_MODULE,
224};
225
226static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
227{
228 struct nvm_ioctl_create_simple *s = &create->conf.s;
229 struct request_queue *tqueue;
230 struct gendisk *tdisk;
231 struct nvm_tgt_type *tt;
232 struct nvm_target *t;
233 struct nvm_tgt_dev *tgt_dev;
234 void *targetdata;
235
236 tt = nvm_find_target_type(create->tgttype, 1);
237 if (!tt) {
238 pr_err("nvm: target type %s not found\n", create->tgttype);
239 return -EINVAL;
240 }
241
242 mutex_lock(&dev->mlock);
243 t = nvm_find_target(dev, create->tgtname);
244 if (t) {
245 pr_err("nvm: target name already exists.\n");
246 mutex_unlock(&dev->mlock);
247 return -EINVAL;
248 }
249 mutex_unlock(&dev->mlock);
250
251 if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end))
252 return -ENOMEM;
253
254 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
255 if (!t)
256 goto err_reserve;
257
258 tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end);
259 if (!tgt_dev) {
260 pr_err("nvm: could not create target device\n");
261 goto err_t;
262 }
263
264 tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
265 if (!tqueue)
266 goto err_dev;
267 blk_queue_make_request(tqueue, tt->make_rq);
268
269 tdisk = alloc_disk(0);
270 if (!tdisk)
271 goto err_queue;
272
273 sprintf(tdisk->disk_name, "%s", create->tgtname);
274 tdisk->flags = GENHD_FL_EXT_DEVT;
275 tdisk->major = 0;
276 tdisk->first_minor = 0;
277 tdisk->fops = &nvm_fops;
278 tdisk->queue = tqueue;
279
280 targetdata = tt->init(tgt_dev, tdisk);
281 if (IS_ERR(targetdata))
282 goto err_init;
283
284 tdisk->private_data = targetdata;
285 tqueue->queuedata = targetdata;
286
287 blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
288
289 set_capacity(tdisk, tt->capacity(targetdata));
290 add_disk(tdisk);
291
292 if (tt->sysfs_init && tt->sysfs_init(tdisk))
293 goto err_sysfs;
294
295 t->type = tt;
296 t->disk = tdisk;
297 t->dev = tgt_dev;
298
299 mutex_lock(&dev->mlock);
300 list_add_tail(&t->list, &dev->targets);
301 mutex_unlock(&dev->mlock);
302
303 return 0;
304err_sysfs:
305 if (tt->exit)
306 tt->exit(targetdata);
307err_init:
308 put_disk(tdisk);
309err_queue:
310 blk_cleanup_queue(tqueue);
311err_dev:
312 nvm_remove_tgt_dev(tgt_dev);
313err_t:
314 kfree(t);
315err_reserve:
316 nvm_release_luns_err(dev, s->lun_begin, s->lun_end);
317 return -ENOMEM;
318}
319
320static void __nvm_remove_target(struct nvm_target *t)
321{
322 struct nvm_tgt_type *tt = t->type;
323 struct gendisk *tdisk = t->disk;
324 struct request_queue *q = tdisk->queue;
325
326 del_gendisk(tdisk);
327 blk_cleanup_queue(q);
328
329 if (tt->sysfs_exit)
330 tt->sysfs_exit(tdisk);
331
332 if (tt->exit)
333 tt->exit(tdisk->private_data);
334
335 nvm_remove_tgt_dev(t->dev);
336 put_disk(tdisk);
337
338 list_del(&t->list);
339 kfree(t);
340}
341
342/**
343 * nvm_remove_tgt - Removes a target from the media manager
344 * @dev: device
345 * @remove: ioctl structure with target name to remove.
346 *
347 * Returns:
348 * 0: on success
349 * 1: on not found
350 * <0: on error
351 */
352static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
353{
354 struct nvm_target *t;
355
356 mutex_lock(&dev->mlock);
357 t = nvm_find_target(dev, remove->tgtname);
358 if (!t) {
359 mutex_unlock(&dev->mlock);
360 return 1;
361 }
362 __nvm_remove_target(t);
363 mutex_unlock(&dev->mlock);
364
365 return 0;
366}
367
368static int nvm_register_map(struct nvm_dev *dev)
369{
370 struct nvm_dev_map *rmap;
371 int i, j;
372
373 rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL);
374 if (!rmap)
375 goto err_rmap;
376
377 rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map),
378 GFP_KERNEL);
379 if (!rmap->chnls)
380 goto err_chnls;
381
382 for (i = 0; i < dev->geo.nr_chnls; i++) {
383 struct nvm_ch_map *ch_rmap;
384 int *lun_roffs;
385 int luns_in_chnl = dev->geo.luns_per_chnl;
386
387 ch_rmap = &rmap->chnls[i];
388
389 ch_rmap->ch_off = -1;
390 ch_rmap->nr_luns = luns_in_chnl;
391
392 lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
393 if (!lun_roffs)
394 goto err_ch;
395
396 for (j = 0; j < luns_in_chnl; j++)
397 lun_roffs[j] = -1;
398
399 ch_rmap->lun_offs = lun_roffs;
400 }
401
402 dev->rmap = rmap;
403
404 return 0;
405err_ch:
406 while (--i >= 0)
407 kfree(rmap->chnls[i].lun_offs);
408err_chnls:
409 kfree(rmap);
410err_rmap:
411 return -ENOMEM;
412}
413
414static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
415{
416 struct nvm_dev_map *dev_map = tgt_dev->map;
417 struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch];
418 int lun_off = ch_map->lun_offs[p->g.lun];
419
420 p->g.ch += ch_map->ch_off;
421 p->g.lun += lun_off;
422}
423
424static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
425{
426 struct nvm_dev *dev = tgt_dev->parent;
427 struct nvm_dev_map *dev_rmap = dev->rmap;
428 struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
429 int lun_roff = ch_rmap->lun_offs[p->g.lun];
430
431 p->g.ch -= ch_rmap->ch_off;
432 p->g.lun -= lun_roff;
433}
434
435static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev,
436 struct ppa_addr *ppa_list, int nr_ppas)
437{
438 int i;
439
440 for (i = 0; i < nr_ppas; i++) {
441 nvm_map_to_dev(tgt_dev, &ppa_list[i]);
442 ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]);
443 }
444}
445
446static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
447 struct ppa_addr *ppa_list, int nr_ppas)
448{
449 int i;
450
451 for (i = 0; i < nr_ppas; i++) {
452 ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]);
453 nvm_map_to_tgt(tgt_dev, &ppa_list[i]);
454 }
455}
456
457static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
458{
459 if (rqd->nr_ppas == 1) {
460 nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1);
461 return;
462 }
463
464 nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
465}
466
467static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
468{
469 if (rqd->nr_ppas == 1) {
470 nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1);
471 return;
472 }
473
474 nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
475}
476
477void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
478 int len)
479{
480 struct nvm_geo *geo = &dev->geo;
481 struct nvm_dev_map *dev_rmap = dev->rmap;
482 u64 i;
483
484 for (i = 0; i < len; i++) {
485 struct nvm_ch_map *ch_rmap;
486 int *lun_roffs;
487 struct ppa_addr gaddr;
488 u64 pba = le64_to_cpu(entries[i]);
489 int off;
490 u64 diff;
491
492 if (!pba)
493 continue;
494
495 gaddr = linear_to_generic_addr(geo, pba);
496 ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
497 lun_roffs = ch_rmap->lun_offs;
498
499 off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
500
501 diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
502 (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
503
504 entries[i] -= cpu_to_le64(diff);
505 }
506}
507EXPORT_SYMBOL(nvm_part_to_tgt);
508
36struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) 509struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock)
37{ 510{
38 struct nvm_tgt_type *tmp, *tt = NULL; 511 struct nvm_tgt_type *tmp, *tt = NULL;
@@ -92,78 +565,6 @@ void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler)
92} 565}
93EXPORT_SYMBOL(nvm_dev_dma_free); 566EXPORT_SYMBOL(nvm_dev_dma_free);
94 567
95static struct nvmm_type *nvm_find_mgr_type(const char *name)
96{
97 struct nvmm_type *mt;
98
99 list_for_each_entry(mt, &nvm_mgrs, list)
100 if (!strcmp(name, mt->name))
101 return mt;
102
103 return NULL;
104}
105
106static struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
107{
108 struct nvmm_type *mt;
109 int ret;
110
111 lockdep_assert_held(&nvm_lock);
112
113 list_for_each_entry(mt, &nvm_mgrs, list) {
114 if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN))
115 continue;
116
117 ret = mt->register_mgr(dev);
118 if (ret < 0) {
119 pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
120 ret, dev->name);
121 return NULL; /* initialization failed */
122 } else if (ret > 0)
123 return mt;
124 }
125
126 return NULL;
127}
128
129int nvm_register_mgr(struct nvmm_type *mt)
130{
131 struct nvm_dev *dev;
132 int ret = 0;
133
134 down_write(&nvm_lock);
135 if (nvm_find_mgr_type(mt->name)) {
136 ret = -EEXIST;
137 goto finish;
138 } else {
139 list_add(&mt->list, &nvm_mgrs);
140 }
141
142 /* try to register media mgr if any device have none configured */
143 list_for_each_entry(dev, &nvm_devices, devices) {
144 if (dev->mt)
145 continue;
146
147 dev->mt = nvm_init_mgr(dev);
148 }
149finish:
150 up_write(&nvm_lock);
151
152 return ret;
153}
154EXPORT_SYMBOL(nvm_register_mgr);
155
156void nvm_unregister_mgr(struct nvmm_type *mt)
157{
158 if (!mt)
159 return;
160
161 down_write(&nvm_lock);
162 list_del(&mt->list);
163 up_write(&nvm_lock);
164}
165EXPORT_SYMBOL(nvm_unregister_mgr);
166
167static struct nvm_dev *nvm_find_nvm_dev(const char *name) 568static struct nvm_dev *nvm_find_nvm_dev(const char *name)
168{ 569{
169 struct nvm_dev *dev; 570 struct nvm_dev *dev;
@@ -175,53 +576,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name)
175 return NULL; 576 return NULL;
176} 577}
177 578
178static void nvm_tgt_generic_to_addr_mode(struct nvm_tgt_dev *tgt_dev,
179 struct nvm_rq *rqd)
180{
181 struct nvm_dev *dev = tgt_dev->parent;
182 int i;
183
184 if (rqd->nr_ppas > 1) {
185 for (i = 0; i < rqd->nr_ppas; i++) {
186 rqd->ppa_list[i] = dev->mt->trans_ppa(tgt_dev,
187 rqd->ppa_list[i], TRANS_TGT_TO_DEV);
188 rqd->ppa_list[i] = generic_to_dev_addr(dev,
189 rqd->ppa_list[i]);
190 }
191 } else {
192 rqd->ppa_addr = dev->mt->trans_ppa(tgt_dev, rqd->ppa_addr,
193 TRANS_TGT_TO_DEV);
194 rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
195 }
196}
197
198int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas,
199 int type)
200{
201 struct nvm_rq rqd;
202 int ret;
203
204 if (nr_ppas > dev->ops->max_phys_sect) {
205 pr_err("nvm: unable to update all sysblocks atomically\n");
206 return -EINVAL;
207 }
208
209 memset(&rqd, 0, sizeof(struct nvm_rq));
210
211 nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
212 nvm_generic_to_addr_mode(dev, &rqd);
213
214 ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
215 nvm_free_rqd_ppalist(dev, &rqd);
216 if (ret) {
217 pr_err("nvm: sysblk failed bb mark\n");
218 return -EINVAL;
219 }
220
221 return 0;
222}
223EXPORT_SYMBOL(nvm_set_bb_tbl);
224
225int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, 579int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
226 int nr_ppas, int type) 580 int nr_ppas, int type)
227{ 581{
@@ -237,12 +591,12 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
237 memset(&rqd, 0, sizeof(struct nvm_rq)); 591 memset(&rqd, 0, sizeof(struct nvm_rq));
238 592
239 nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); 593 nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
240 nvm_tgt_generic_to_addr_mode(tgt_dev, &rqd); 594 nvm_rq_tgt_to_dev(tgt_dev, &rqd);
241 595
242 ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); 596 ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
243 nvm_free_rqd_ppalist(dev, &rqd); 597 nvm_free_rqd_ppalist(dev, &rqd);
244 if (ret) { 598 if (ret) {
245 pr_err("nvm: sysblk failed bb mark\n"); 599 pr_err("nvm: failed bb mark\n");
246 return -EINVAL; 600 return -EINVAL;
247 } 601 }
248 602
@@ -262,15 +616,42 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
262{ 616{
263 struct nvm_dev *dev = tgt_dev->parent; 617 struct nvm_dev *dev = tgt_dev->parent;
264 618
265 return dev->mt->submit_io(tgt_dev, rqd); 619 if (!dev->ops->submit_io)
620 return -ENODEV;
621
622 nvm_rq_tgt_to_dev(tgt_dev, rqd);
623
624 rqd->dev = tgt_dev;
625 return dev->ops->submit_io(dev, rqd);
266} 626}
267EXPORT_SYMBOL(nvm_submit_io); 627EXPORT_SYMBOL(nvm_submit_io);
268 628
269int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags) 629int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags)
270{ 630{
271 struct nvm_dev *dev = tgt_dev->parent; 631 struct nvm_dev *dev = tgt_dev->parent;
632 struct nvm_rq rqd;
633 int ret;
634
635 if (!dev->ops->erase_block)
636 return 0;
637
638 nvm_map_to_dev(tgt_dev, ppas);
639
640 memset(&rqd, 0, sizeof(struct nvm_rq));
641
642 ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1);
643 if (ret)
644 return ret;
645
646 nvm_rq_tgt_to_dev(tgt_dev, &rqd);
647
648 rqd.flags = flags;
649
650 ret = dev->ops->erase_block(dev, &rqd);
272 651
273 return dev->mt->erase_blk(tgt_dev, p, flags); 652 nvm_free_rqd_ppalist(dev, &rqd);
653
654 return ret;
274} 655}
275EXPORT_SYMBOL(nvm_erase_blk); 656EXPORT_SYMBOL(nvm_erase_blk);
276 657
@@ -289,46 +670,67 @@ EXPORT_SYMBOL(nvm_get_l2p_tbl);
289int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len) 670int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len)
290{ 671{
291 struct nvm_dev *dev = tgt_dev->parent; 672 struct nvm_dev *dev = tgt_dev->parent;
673 struct nvm_geo *geo = &dev->geo;
674 struct nvm_area *area, *prev, *next;
675 sector_t begin = 0;
676 sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9;
292 677
293 return dev->mt->get_area(dev, lba, len); 678 if (len > max_sectors)
294} 679 return -EINVAL;
295EXPORT_SYMBOL(nvm_get_area);
296 680
297void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t lba) 681 area = kmalloc(sizeof(struct nvm_area), GFP_KERNEL);
298{ 682 if (!area)
299 struct nvm_dev *dev = tgt_dev->parent; 683 return -ENOMEM;
300 684
301 dev->mt->put_area(dev, lba); 685 prev = NULL;
302}
303EXPORT_SYMBOL(nvm_put_area);
304 686
305void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) 687 spin_lock(&dev->lock);
306{ 688 list_for_each_entry(next, &dev->area_list, list) {
307 int i; 689 if (begin + len > next->begin) {
690 begin = next->end;
691 prev = next;
692 continue;
693 }
694 break;
695 }
308 696
309 if (rqd->nr_ppas > 1) { 697 if ((begin + len) > max_sectors) {
310 for (i = 0; i < rqd->nr_ppas; i++) 698 spin_unlock(&dev->lock);
311 rqd->ppa_list[i] = dev_to_generic_addr(dev, 699 kfree(area);
312 rqd->ppa_list[i]); 700 return -EINVAL;
313 } else {
314 rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
315 } 701 }
702
703 area->begin = *lba = begin;
704 area->end = begin + len;
705
706 if (prev) /* insert into sorted order */
707 list_add(&area->list, &prev->list);
708 else
709 list_add(&area->list, &dev->area_list);
710 spin_unlock(&dev->lock);
711
712 return 0;
316} 713}
317EXPORT_SYMBOL(nvm_addr_to_generic_mode); 714EXPORT_SYMBOL(nvm_get_area);
318 715
319void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) 716void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin)
320{ 717{
321 int i; 718 struct nvm_dev *dev = tgt_dev->parent;
719 struct nvm_area *area;
322 720
323 if (rqd->nr_ppas > 1) { 721 spin_lock(&dev->lock);
324 for (i = 0; i < rqd->nr_ppas; i++) 722 list_for_each_entry(area, &dev->area_list, list) {
325 rqd->ppa_list[i] = generic_to_dev_addr(dev, 723 if (area->begin != begin)
326 rqd->ppa_list[i]); 724 continue;
327 } else { 725
328 rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); 726 list_del(&area->list);
727 spin_unlock(&dev->lock);
728 kfree(area);
729 return;
329 } 730 }
731 spin_unlock(&dev->lock);
330} 732}
331EXPORT_SYMBOL(nvm_generic_to_addr_mode); 733EXPORT_SYMBOL(nvm_put_area);
332 734
333int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, 735int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
334 const struct ppa_addr *ppas, int nr_ppas, int vblk) 736 const struct ppa_addr *ppas, int nr_ppas, int vblk)
@@ -380,149 +782,19 @@ void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
380} 782}
381EXPORT_SYMBOL(nvm_free_rqd_ppalist); 783EXPORT_SYMBOL(nvm_free_rqd_ppalist);
382 784
383int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas, 785void nvm_end_io(struct nvm_rq *rqd)
384 int flags)
385{ 786{
386 struct nvm_rq rqd; 787 struct nvm_tgt_dev *tgt_dev = rqd->dev;
387 int ret;
388 788
389 if (!dev->ops->erase_block) 789 /* Convert address space */
390 return 0; 790 if (tgt_dev)
791 nvm_rq_dev_to_tgt(tgt_dev, rqd);
391 792
392 memset(&rqd, 0, sizeof(struct nvm_rq)); 793 if (rqd->end_io)
393 794 rqd->end_io(rqd);
394 ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1);
395 if (ret)
396 return ret;
397
398 nvm_generic_to_addr_mode(dev, &rqd);
399
400 rqd.flags = flags;
401
402 ret = dev->ops->erase_block(dev, &rqd);
403
404 nvm_free_rqd_ppalist(dev, &rqd);
405
406 return ret;
407}
408EXPORT_SYMBOL(nvm_erase_ppa);
409
410void nvm_end_io(struct nvm_rq *rqd, int error)
411{
412 rqd->error = error;
413 rqd->end_io(rqd);
414} 795}
415EXPORT_SYMBOL(nvm_end_io); 796EXPORT_SYMBOL(nvm_end_io);
416 797
417static void nvm_end_io_sync(struct nvm_rq *rqd)
418{
419 struct completion *waiting = rqd->wait;
420
421 rqd->wait = NULL;
422
423 complete(waiting);
424}
425
426static int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode,
427 int flags, void *buf, int len)
428{
429 DECLARE_COMPLETION_ONSTACK(wait);
430 struct bio *bio;
431 int ret;
432 unsigned long hang_check;
433
434 bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL);
435 if (IS_ERR_OR_NULL(bio))
436 return -ENOMEM;
437
438 nvm_generic_to_addr_mode(dev, rqd);
439
440 rqd->dev = NULL;
441 rqd->opcode = opcode;
442 rqd->flags = flags;
443 rqd->bio = bio;
444 rqd->wait = &wait;
445 rqd->end_io = nvm_end_io_sync;
446
447 ret = dev->ops->submit_io(dev, rqd);
448 if (ret) {
449 bio_put(bio);
450 return ret;
451 }
452
453 /* Prevent hang_check timer from firing at us during very long I/O */
454 hang_check = sysctl_hung_task_timeout_secs;
455 if (hang_check)
456 while (!wait_for_completion_io_timeout(&wait,
457 hang_check * (HZ/2)))
458 ;
459 else
460 wait_for_completion_io(&wait);
461
462 return rqd->error;
463}
464
465/**
466 * nvm_submit_ppa_list - submit user-defined ppa list to device. The user must
467 * take to free ppa list if necessary.
468 * @dev: device
469 * @ppa_list: user created ppa_list
470 * @nr_ppas: length of ppa_list
471 * @opcode: device opcode
472 * @flags: device flags
473 * @buf: data buffer
474 * @len: data buffer length
475 */
476int nvm_submit_ppa_list(struct nvm_dev *dev, struct ppa_addr *ppa_list,
477 int nr_ppas, int opcode, int flags, void *buf, int len)
478{
479 struct nvm_rq rqd;
480
481 if (dev->ops->max_phys_sect < nr_ppas)
482 return -EINVAL;
483
484 memset(&rqd, 0, sizeof(struct nvm_rq));
485
486 rqd.nr_ppas = nr_ppas;
487 if (nr_ppas > 1)
488 rqd.ppa_list = ppa_list;
489 else
490 rqd.ppa_addr = ppa_list[0];
491
492 return __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len);
493}
494EXPORT_SYMBOL(nvm_submit_ppa_list);
495
496/**
497 * nvm_submit_ppa - submit PPAs to device. PPAs will automatically be unfolded
498 * as single, dual, quad plane PPAs depending on device type.
499 * @dev: device
500 * @ppa: user created ppa_list
501 * @nr_ppas: length of ppa_list
502 * @opcode: device opcode
503 * @flags: device flags
504 * @buf: data buffer
505 * @len: data buffer length
506 */
507int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas,
508 int opcode, int flags, void *buf, int len)
509{
510 struct nvm_rq rqd;
511 int ret;
512
513 memset(&rqd, 0, sizeof(struct nvm_rq));
514 ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas, 1);
515 if (ret)
516 return ret;
517
518 ret = __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len);
519
520 nvm_free_rqd_ppalist(dev, &rqd);
521
522 return ret;
523}
524EXPORT_SYMBOL(nvm_submit_ppa);
525
526/* 798/*
527 * folds a bad block list from its plane representation to its virtual 799 * folds a bad block list from its plane representation to its virtual
528 * block representation. The fold is done in place and reduced size is 800 * block representation. The fold is done in place and reduced size is
@@ -559,21 +831,14 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
559} 831}
560EXPORT_SYMBOL(nvm_bb_tbl_fold); 832EXPORT_SYMBOL(nvm_bb_tbl_fold);
561 833
562int nvm_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, u8 *blks)
563{
564 ppa = generic_to_dev_addr(dev, ppa);
565
566 return dev->ops->get_bb_tbl(dev, ppa, blks);
567}
568EXPORT_SYMBOL(nvm_get_bb_tbl);
569
570int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, 834int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
571 u8 *blks) 835 u8 *blks)
572{ 836{
573 struct nvm_dev *dev = tgt_dev->parent; 837 struct nvm_dev *dev = tgt_dev->parent;
574 838
575 ppa = dev->mt->trans_ppa(tgt_dev, ppa, TRANS_TGT_TO_DEV); 839 nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
576 return nvm_get_bb_tbl(dev, ppa, blks); 840
841 return dev->ops->get_bb_tbl(dev, ppa, blks);
577} 842}
578EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); 843EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
579 844
@@ -627,7 +892,7 @@ static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
627static int nvm_core_init(struct nvm_dev *dev) 892static int nvm_core_init(struct nvm_dev *dev)
628{ 893{
629 struct nvm_id *id = &dev->identity; 894 struct nvm_id *id = &dev->identity;
630 struct nvm_id_group *grp = &id->groups[0]; 895 struct nvm_id_group *grp = &id->grp;
631 struct nvm_geo *geo = &dev->geo; 896 struct nvm_geo *geo = &dev->geo;
632 int ret; 897 int ret;
633 898
@@ -691,36 +956,31 @@ static int nvm_core_init(struct nvm_dev *dev)
691 goto err_fmtype; 956 goto err_fmtype;
692 } 957 }
693 958
959 INIT_LIST_HEAD(&dev->area_list);
960 INIT_LIST_HEAD(&dev->targets);
694 mutex_init(&dev->mlock); 961 mutex_init(&dev->mlock);
695 spin_lock_init(&dev->lock); 962 spin_lock_init(&dev->lock);
696 963
697 blk_queue_logical_block_size(dev->q, geo->sec_size); 964 ret = nvm_register_map(dev);
965 if (ret)
966 goto err_fmtype;
698 967
968 blk_queue_logical_block_size(dev->q, geo->sec_size);
699 return 0; 969 return 0;
700err_fmtype: 970err_fmtype:
701 kfree(dev->lun_map); 971 kfree(dev->lun_map);
702 return ret; 972 return ret;
703} 973}
704 974
705static void nvm_free_mgr(struct nvm_dev *dev)
706{
707 if (!dev->mt)
708 return;
709
710 dev->mt->unregister_mgr(dev);
711 dev->mt = NULL;
712}
713
714void nvm_free(struct nvm_dev *dev) 975void nvm_free(struct nvm_dev *dev)
715{ 976{
716 if (!dev) 977 if (!dev)
717 return; 978 return;
718 979
719 nvm_free_mgr(dev);
720
721 if (dev->dma_pool) 980 if (dev->dma_pool)
722 dev->ops->destroy_dma_pool(dev->dma_pool); 981 dev->ops->destroy_dma_pool(dev->dma_pool);
723 982
983 kfree(dev->rmap);
724 kfree(dev->lptbl); 984 kfree(dev->lptbl);
725 kfree(dev->lun_map); 985 kfree(dev->lun_map);
726 kfree(dev); 986 kfree(dev);
@@ -731,28 +991,19 @@ static int nvm_init(struct nvm_dev *dev)
731 struct nvm_geo *geo = &dev->geo; 991 struct nvm_geo *geo = &dev->geo;
732 int ret = -EINVAL; 992 int ret = -EINVAL;
733 993
734 if (!dev->q || !dev->ops)
735 return ret;
736
737 if (dev->ops->identity(dev, &dev->identity)) { 994 if (dev->ops->identity(dev, &dev->identity)) {
738 pr_err("nvm: device could not be identified\n"); 995 pr_err("nvm: device could not be identified\n");
739 goto err; 996 goto err;
740 } 997 }
741 998
742 pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n", 999 pr_debug("nvm: ver:%x nvm_vendor:%x\n",
743 dev->identity.ver_id, dev->identity.vmnt, 1000 dev->identity.ver_id, dev->identity.vmnt);
744 dev->identity.cgrps);
745 1001
746 if (dev->identity.ver_id != 1) { 1002 if (dev->identity.ver_id != 1) {
747 pr_err("nvm: device not supported by kernel."); 1003 pr_err("nvm: device not supported by kernel.");
748 goto err; 1004 goto err;
749 } 1005 }
750 1006
751 if (dev->identity.cgrps != 1) {
752 pr_err("nvm: only one group configuration supported.");
753 goto err;
754 }
755
756 ret = nvm_core_init(dev); 1007 ret = nvm_core_init(dev);
757 if (ret) { 1008 if (ret) {
758 pr_err("nvm: could not initialize core structures.\n"); 1009 pr_err("nvm: could not initialize core structures.\n");
@@ -779,49 +1030,50 @@ int nvm_register(struct nvm_dev *dev)
779{ 1030{
780 int ret; 1031 int ret;
781 1032
782 ret = nvm_init(dev); 1033 if (!dev->q || !dev->ops)
783 if (ret) 1034 return -EINVAL;
784 goto err_init;
785 1035
786 if (dev->ops->max_phys_sect > 256) { 1036 if (dev->ops->max_phys_sect > 256) {
787 pr_info("nvm: max sectors supported is 256.\n"); 1037 pr_info("nvm: max sectors supported is 256.\n");
788 ret = -EINVAL; 1038 return -EINVAL;
789 goto err_init;
790 } 1039 }
791 1040
792 if (dev->ops->max_phys_sect > 1) { 1041 if (dev->ops->max_phys_sect > 1) {
793 dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); 1042 dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist");
794 if (!dev->dma_pool) { 1043 if (!dev->dma_pool) {
795 pr_err("nvm: could not create dma pool\n"); 1044 pr_err("nvm: could not create dma pool\n");
796 ret = -ENOMEM; 1045 return -ENOMEM;
797 goto err_init;
798 } 1046 }
799 } 1047 }
800 1048
801 if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { 1049 ret = nvm_init(dev);
802 ret = nvm_get_sysblock(dev, &dev->sb); 1050 if (ret)
803 if (!ret) 1051 goto err_init;
804 pr_err("nvm: device not initialized.\n");
805 else if (ret < 0)
806 pr_err("nvm: err (%d) on device initialization\n", ret);
807 }
808 1052
809 /* register device with a supported media manager */ 1053 /* register device with a supported media manager */
810 down_write(&nvm_lock); 1054 down_write(&nvm_lock);
811 if (ret > 0)
812 dev->mt = nvm_init_mgr(dev);
813 list_add(&dev->devices, &nvm_devices); 1055 list_add(&dev->devices, &nvm_devices);
814 up_write(&nvm_lock); 1056 up_write(&nvm_lock);
815 1057
816 return 0; 1058 return 0;
817err_init: 1059err_init:
818 kfree(dev->lun_map); 1060 dev->ops->destroy_dma_pool(dev->dma_pool);
819 return ret; 1061 return ret;
820} 1062}
821EXPORT_SYMBOL(nvm_register); 1063EXPORT_SYMBOL(nvm_register);
822 1064
823void nvm_unregister(struct nvm_dev *dev) 1065void nvm_unregister(struct nvm_dev *dev)
824{ 1066{
1067 struct nvm_target *t, *tmp;
1068
1069 mutex_lock(&dev->mlock);
1070 list_for_each_entry_safe(t, tmp, &dev->targets, list) {
1071 if (t->dev->parent != dev)
1072 continue;
1073 __nvm_remove_target(t);
1074 }
1075 mutex_unlock(&dev->mlock);
1076
825 down_write(&nvm_lock); 1077 down_write(&nvm_lock);
826 list_del(&dev->devices); 1078 list_del(&dev->devices);
827 up_write(&nvm_lock); 1079 up_write(&nvm_lock);
@@ -844,24 +1096,24 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
844 return -EINVAL; 1096 return -EINVAL;
845 } 1097 }
846 1098
847 if (!dev->mt) {
848 pr_info("nvm: device has no media manager registered.\n");
849 return -ENODEV;
850 }
851
852 if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { 1099 if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) {
853 pr_err("nvm: config type not valid\n"); 1100 pr_err("nvm: config type not valid\n");
854 return -EINVAL; 1101 return -EINVAL;
855 } 1102 }
856 s = &create->conf.s; 1103 s = &create->conf.s;
857 1104
858 if (s->lun_begin > s->lun_end || s->lun_end > dev->geo.nr_luns) { 1105 if (s->lun_begin == -1 && s->lun_end == -1) {
1106 s->lun_begin = 0;
1107 s->lun_end = dev->geo.nr_luns - 1;
1108 }
1109
1110 if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.nr_luns) {
859 pr_err("nvm: lun out of bound (%u:%u > %u)\n", 1111 pr_err("nvm: lun out of bound (%u:%u > %u)\n",
860 s->lun_begin, s->lun_end, dev->geo.nr_luns); 1112 s->lun_begin, s->lun_end, dev->geo.nr_luns - 1);
861 return -EINVAL; 1113 return -EINVAL;
862 } 1114 }
863 1115
864 return dev->mt->create_tgt(dev, create); 1116 return nvm_create_tgt(dev, create);
865} 1117}
866 1118
867static long nvm_ioctl_info(struct file *file, void __user *arg) 1119static long nvm_ioctl_info(struct file *file, void __user *arg)
@@ -923,16 +1175,14 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
923 struct nvm_ioctl_device_info *info = &devices->info[i]; 1175 struct nvm_ioctl_device_info *info = &devices->info[i];
924 1176
925 sprintf(info->devname, "%s", dev->name); 1177 sprintf(info->devname, "%s", dev->name);
926 if (dev->mt) {
927 info->bmversion[0] = dev->mt->version[0];
928 info->bmversion[1] = dev->mt->version[1];
929 info->bmversion[2] = dev->mt->version[2];
930 sprintf(info->bmname, "%s", dev->mt->name);
931 } else {
932 sprintf(info->bmname, "none");
933 }
934 1178
1179 /* kept for compatibility */
1180 info->bmversion[0] = 1;
1181 info->bmversion[1] = 0;
1182 info->bmversion[2] = 0;
1183 sprintf(info->bmname, "%s", "gennvm");
935 i++; 1184 i++;
1185
936 if (i > 31) { 1186 if (i > 31) {
937 pr_err("nvm: max 31 devices can be reported.\n"); 1187 pr_err("nvm: max 31 devices can be reported.\n");
938 break; 1188 break;
@@ -994,7 +1244,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
994 } 1244 }
995 1245
996 list_for_each_entry(dev, &nvm_devices, devices) { 1246 list_for_each_entry(dev, &nvm_devices, devices) {
997 ret = dev->mt->remove_tgt(dev, &remove); 1247 ret = nvm_remove_tgt(dev, &remove);
998 if (!ret) 1248 if (!ret)
999 break; 1249 break;
1000 } 1250 }
@@ -1002,47 +1252,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
1002 return ret; 1252 return ret;
1003} 1253}
1004 1254
1005static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info) 1255/* kept for compatibility reasons */
1006{
1007 info->seqnr = 1;
1008 info->erase_cnt = 0;
1009 info->version = 1;
1010}
1011
1012static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
1013{
1014 struct nvm_dev *dev;
1015 struct nvm_sb_info info;
1016 int ret;
1017
1018 down_write(&nvm_lock);
1019 dev = nvm_find_nvm_dev(init->dev);
1020 up_write(&nvm_lock);
1021 if (!dev) {
1022 pr_err("nvm: device not found\n");
1023 return -EINVAL;
1024 }
1025
1026 nvm_setup_nvm_sb_info(&info);
1027
1028 strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
1029 info.fs_ppa.ppa = -1;
1030
1031 if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) {
1032 ret = nvm_init_sysblock(dev, &info);
1033 if (ret)
1034 return ret;
1035 }
1036
1037 memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
1038
1039 down_write(&nvm_lock);
1040 dev->mt = nvm_init_mgr(dev);
1041 up_write(&nvm_lock);
1042
1043 return 0;
1044}
1045
1046static long nvm_ioctl_dev_init(struct file *file, void __user *arg) 1256static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
1047{ 1257{
1048 struct nvm_ioctl_dev_init init; 1258 struct nvm_ioctl_dev_init init;
@@ -1058,15 +1268,13 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
1058 return -EINVAL; 1268 return -EINVAL;
1059 } 1269 }
1060 1270
1061 init.dev[DISK_NAME_LEN - 1] = '\0'; 1271 return 0;
1062
1063 return __nvm_ioctl_dev_init(&init);
1064} 1272}
1065 1273
1274/* Kept for compatibility reasons */
1066static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) 1275static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
1067{ 1276{
1068 struct nvm_ioctl_dev_factory fact; 1277 struct nvm_ioctl_dev_factory fact;
1069 struct nvm_dev *dev;
1070 1278
1071 if (!capable(CAP_SYS_ADMIN)) 1279 if (!capable(CAP_SYS_ADMIN))
1072 return -EPERM; 1280 return -EPERM;
@@ -1079,19 +1287,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
1079 if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) 1287 if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
1080 return -EINVAL; 1288 return -EINVAL;
1081 1289
1082 down_write(&nvm_lock);
1083 dev = nvm_find_nvm_dev(fact.dev);
1084 up_write(&nvm_lock);
1085 if (!dev) {
1086 pr_err("nvm: device not found\n");
1087 return -EINVAL;
1088 }
1089
1090 nvm_free_mgr(dev);
1091
1092 if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT)
1093 return nvm_dev_factory(dev, fact.flags);
1094
1095 return 0; 1290 return 0;
1096} 1291}
1097 1292
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
deleted file mode 100644
index ca7880082d80..000000000000
--- a/drivers/lightnvm/gennvm.c
+++ /dev/null
@@ -1,657 +0,0 @@
1/*
2 * Copyright (C) 2015 Matias Bjorling <m@bjorling.me>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
16 * USA.
17 *
18 * Implementation of a general nvm manager for Open-Channel SSDs.
19 */
20
21#include "gennvm.h"
22
23static struct nvm_target *gen_find_target(struct gen_dev *gn, const char *name)
24{
25 struct nvm_target *tgt;
26
27 list_for_each_entry(tgt, &gn->targets, list)
28 if (!strcmp(name, tgt->disk->disk_name))
29 return tgt;
30
31 return NULL;
32}
33
34static const struct block_device_operations gen_fops = {
35 .owner = THIS_MODULE,
36};
37
38static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t,
39 int lun_begin, int lun_end)
40{
41 int i;
42
43 for (i = lun_begin; i <= lun_end; i++) {
44 if (test_and_set_bit(i, dev->lun_map)) {
45 pr_err("nvm: lun %d already allocated\n", i);
46 goto err;
47 }
48 }
49
50 return 0;
51
52err:
53 while (--i > lun_begin)
54 clear_bit(i, dev->lun_map);
55
56 return -EBUSY;
57}
58
59static void gen_release_luns_err(struct nvm_dev *dev, int lun_begin,
60 int lun_end)
61{
62 int i;
63
64 for (i = lun_begin; i <= lun_end; i++)
65 WARN_ON(!test_and_clear_bit(i, dev->lun_map));
66}
67
68static void gen_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev)
69{
70 struct nvm_dev *dev = tgt_dev->parent;
71 struct gen_dev_map *dev_map = tgt_dev->map;
72 int i, j;
73
74 for (i = 0; i < dev_map->nr_chnls; i++) {
75 struct gen_ch_map *ch_map = &dev_map->chnls[i];
76 int *lun_offs = ch_map->lun_offs;
77 int ch = i + ch_map->ch_off;
78
79 for (j = 0; j < ch_map->nr_luns; j++) {
80 int lun = j + lun_offs[j];
81 int lunid = (ch * dev->geo.luns_per_chnl) + lun;
82
83 WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
84 }
85
86 kfree(ch_map->lun_offs);
87 }
88
89 kfree(dev_map->chnls);
90 kfree(dev_map);
91 kfree(tgt_dev->luns);
92 kfree(tgt_dev);
93}
94
95static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev,
96 int lun_begin, int lun_end)
97{
98 struct nvm_tgt_dev *tgt_dev = NULL;
99 struct gen_dev_map *dev_rmap = dev->rmap;
100 struct gen_dev_map *dev_map;
101 struct ppa_addr *luns;
102 int nr_luns = lun_end - lun_begin + 1;
103 int luns_left = nr_luns;
104 int nr_chnls = nr_luns / dev->geo.luns_per_chnl;
105 int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl;
106 int bch = lun_begin / dev->geo.luns_per_chnl;
107 int blun = lun_begin % dev->geo.luns_per_chnl;
108 int lunid = 0;
109 int lun_balanced = 1;
110 int prev_nr_luns;
111 int i, j;
112
113 nr_chnls = nr_luns / dev->geo.luns_per_chnl;
114 nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1;
115
116 dev_map = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL);
117 if (!dev_map)
118 goto err_dev;
119
120 dev_map->chnls = kcalloc(nr_chnls, sizeof(struct gen_ch_map),
121 GFP_KERNEL);
122 if (!dev_map->chnls)
123 goto err_chnls;
124
125 luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL);
126 if (!luns)
127 goto err_luns;
128
129 prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ?
130 dev->geo.luns_per_chnl : luns_left;
131 for (i = 0; i < nr_chnls; i++) {
132 struct gen_ch_map *ch_rmap = &dev_rmap->chnls[i + bch];
133 int *lun_roffs = ch_rmap->lun_offs;
134 struct gen_ch_map *ch_map = &dev_map->chnls[i];
135 int *lun_offs;
136 int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ?
137 dev->geo.luns_per_chnl : luns_left;
138
139 if (lun_balanced && prev_nr_luns != luns_in_chnl)
140 lun_balanced = 0;
141
142 ch_map->ch_off = ch_rmap->ch_off = bch;
143 ch_map->nr_luns = luns_in_chnl;
144
145 lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
146 if (!lun_offs)
147 goto err_ch;
148
149 for (j = 0; j < luns_in_chnl; j++) {
150 luns[lunid].ppa = 0;
151 luns[lunid].g.ch = i;
152 luns[lunid++].g.lun = j;
153
154 lun_offs[j] = blun;
155 lun_roffs[j + blun] = blun;
156 }
157
158 ch_map->lun_offs = lun_offs;
159
160 /* when starting a new channel, lun offset is reset */
161 blun = 0;
162 luns_left -= luns_in_chnl;
163 }
164
165 dev_map->nr_chnls = nr_chnls;
166
167 tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL);
168 if (!tgt_dev)
169 goto err_ch;
170
171 memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo));
172 /* Target device only owns a portion of the physical device */
173 tgt_dev->geo.nr_chnls = nr_chnls;
174 tgt_dev->geo.nr_luns = nr_luns;
175 tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1;
176 tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun;
177 tgt_dev->q = dev->q;
178 tgt_dev->map = dev_map;
179 tgt_dev->luns = luns;
180 memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id));
181
182 tgt_dev->parent = dev;
183
184 return tgt_dev;
185err_ch:
186 while (--i > 0)
187 kfree(dev_map->chnls[i].lun_offs);
188 kfree(luns);
189err_luns:
190 kfree(dev_map->chnls);
191err_chnls:
192 kfree(dev_map);
193err_dev:
194 return tgt_dev;
195}
196
197static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
198{
199 struct gen_dev *gn = dev->mp;
200 struct nvm_ioctl_create_simple *s = &create->conf.s;
201 struct request_queue *tqueue;
202 struct gendisk *tdisk;
203 struct nvm_tgt_type *tt;
204 struct nvm_target *t;
205 struct nvm_tgt_dev *tgt_dev;
206 void *targetdata;
207
208 tt = nvm_find_target_type(create->tgttype, 1);
209 if (!tt) {
210 pr_err("nvm: target type %s not found\n", create->tgttype);
211 return -EINVAL;
212 }
213
214 mutex_lock(&gn->lock);
215 t = gen_find_target(gn, create->tgtname);
216 if (t) {
217 pr_err("nvm: target name already exists.\n");
218 mutex_unlock(&gn->lock);
219 return -EINVAL;
220 }
221 mutex_unlock(&gn->lock);
222
223 t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL);
224 if (!t)
225 return -ENOMEM;
226
227 if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end))
228 goto err_t;
229
230 tgt_dev = gen_create_tgt_dev(dev, s->lun_begin, s->lun_end);
231 if (!tgt_dev) {
232 pr_err("nvm: could not create target device\n");
233 goto err_reserve;
234 }
235
236 tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node);
237 if (!tqueue)
238 goto err_dev;
239 blk_queue_make_request(tqueue, tt->make_rq);
240
241 tdisk = alloc_disk(0);
242 if (!tdisk)
243 goto err_queue;
244
245 sprintf(tdisk->disk_name, "%s", create->tgtname);
246 tdisk->flags = GENHD_FL_EXT_DEVT;
247 tdisk->major = 0;
248 tdisk->first_minor = 0;
249 tdisk->fops = &gen_fops;
250 tdisk->queue = tqueue;
251
252 targetdata = tt->init(tgt_dev, tdisk);
253 if (IS_ERR(targetdata))
254 goto err_init;
255
256 tdisk->private_data = targetdata;
257 tqueue->queuedata = targetdata;
258
259 blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect);
260
261 set_capacity(tdisk, tt->capacity(targetdata));
262 add_disk(tdisk);
263
264 t->type = tt;
265 t->disk = tdisk;
266 t->dev = tgt_dev;
267
268 mutex_lock(&gn->lock);
269 list_add_tail(&t->list, &gn->targets);
270 mutex_unlock(&gn->lock);
271
272 return 0;
273err_init:
274 put_disk(tdisk);
275err_queue:
276 blk_cleanup_queue(tqueue);
277err_dev:
278 kfree(tgt_dev);
279err_reserve:
280 gen_release_luns_err(dev, s->lun_begin, s->lun_end);
281err_t:
282 kfree(t);
283 return -ENOMEM;
284}
285
286static void __gen_remove_target(struct nvm_target *t)
287{
288 struct nvm_tgt_type *tt = t->type;
289 struct gendisk *tdisk = t->disk;
290 struct request_queue *q = tdisk->queue;
291
292 del_gendisk(tdisk);
293 blk_cleanup_queue(q);
294
295 if (tt->exit)
296 tt->exit(tdisk->private_data);
297
298 gen_remove_tgt_dev(t->dev);
299 put_disk(tdisk);
300
301 list_del(&t->list);
302 kfree(t);
303}
304
305/**
306 * gen_remove_tgt - Removes a target from the media manager
307 * @dev: device
308 * @remove: ioctl structure with target name to remove.
309 *
310 * Returns:
311 * 0: on success
312 * 1: on not found
313 * <0: on error
314 */
315static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove)
316{
317 struct gen_dev *gn = dev->mp;
318 struct nvm_target *t;
319
320 if (!gn)
321 return 1;
322
323 mutex_lock(&gn->lock);
324 t = gen_find_target(gn, remove->tgtname);
325 if (!t) {
326 mutex_unlock(&gn->lock);
327 return 1;
328 }
329 __gen_remove_target(t);
330 mutex_unlock(&gn->lock);
331
332 return 0;
333}
334
335static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
336{
337 struct nvm_geo *geo = &dev->geo;
338 struct gen_dev *gn = dev->mp;
339 struct gen_area *area, *prev, *next;
340 sector_t begin = 0;
341 sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9;
342
343 if (len > max_sectors)
344 return -EINVAL;
345
346 area = kmalloc(sizeof(struct gen_area), GFP_KERNEL);
347 if (!area)
348 return -ENOMEM;
349
350 prev = NULL;
351
352 spin_lock(&dev->lock);
353 list_for_each_entry(next, &gn->area_list, list) {
354 if (begin + len > next->begin) {
355 begin = next->end;
356 prev = next;
357 continue;
358 }
359 break;
360 }
361
362 if ((begin + len) > max_sectors) {
363 spin_unlock(&dev->lock);
364 kfree(area);
365 return -EINVAL;
366 }
367
368 area->begin = *lba = begin;
369 area->end = begin + len;
370
371 if (prev) /* insert into sorted order */
372 list_add(&area->list, &prev->list);
373 else
374 list_add(&area->list, &gn->area_list);
375 spin_unlock(&dev->lock);
376
377 return 0;
378}
379
380static void gen_put_area(struct nvm_dev *dev, sector_t begin)
381{
382 struct gen_dev *gn = dev->mp;
383 struct gen_area *area;
384
385 spin_lock(&dev->lock);
386 list_for_each_entry(area, &gn->area_list, list) {
387 if (area->begin != begin)
388 continue;
389
390 list_del(&area->list);
391 spin_unlock(&dev->lock);
392 kfree(area);
393 return;
394 }
395 spin_unlock(&dev->lock);
396}
397
398static void gen_free(struct nvm_dev *dev)
399{
400 kfree(dev->mp);
401 kfree(dev->rmap);
402 dev->mp = NULL;
403}
404
405static int gen_register(struct nvm_dev *dev)
406{
407 struct gen_dev *gn;
408 struct gen_dev_map *dev_rmap;
409 int i, j;
410
411 if (!try_module_get(THIS_MODULE))
412 return -ENODEV;
413
414 gn = kzalloc(sizeof(struct gen_dev), GFP_KERNEL);
415 if (!gn)
416 goto err_gn;
417
418 dev_rmap = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL);
419 if (!dev_rmap)
420 goto err_rmap;
421
422 dev_rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct gen_ch_map),
423 GFP_KERNEL);
424 if (!dev_rmap->chnls)
425 goto err_chnls;
426
427 for (i = 0; i < dev->geo.nr_chnls; i++) {
428 struct gen_ch_map *ch_rmap;
429 int *lun_roffs;
430 int luns_in_chnl = dev->geo.luns_per_chnl;
431
432 ch_rmap = &dev_rmap->chnls[i];
433
434 ch_rmap->ch_off = -1;
435 ch_rmap->nr_luns = luns_in_chnl;
436
437 lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL);
438 if (!lun_roffs)
439 goto err_ch;
440
441 for (j = 0; j < luns_in_chnl; j++)
442 lun_roffs[j] = -1;
443
444 ch_rmap->lun_offs = lun_roffs;
445 }
446
447 gn->dev = dev;
448 gn->nr_luns = dev->geo.nr_luns;
449 INIT_LIST_HEAD(&gn->area_list);
450 mutex_init(&gn->lock);
451 INIT_LIST_HEAD(&gn->targets);
452 dev->mp = gn;
453 dev->rmap = dev_rmap;
454
455 return 1;
456err_ch:
457 while (--i >= 0)
458 kfree(dev_rmap->chnls[i].lun_offs);
459err_chnls:
460 kfree(dev_rmap);
461err_rmap:
462 gen_free(dev);
463err_gn:
464 module_put(THIS_MODULE);
465 return -ENOMEM;
466}
467
468static void gen_unregister(struct nvm_dev *dev)
469{
470 struct gen_dev *gn = dev->mp;
471 struct nvm_target *t, *tmp;
472
473 mutex_lock(&gn->lock);
474 list_for_each_entry_safe(t, tmp, &gn->targets, list) {
475 if (t->dev->parent != dev)
476 continue;
477 __gen_remove_target(t);
478 }
479 mutex_unlock(&gn->lock);
480
481 gen_free(dev);
482 module_put(THIS_MODULE);
483}
484
485static int gen_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
486{
487 struct gen_dev_map *dev_map = tgt_dev->map;
488 struct gen_ch_map *ch_map = &dev_map->chnls[p->g.ch];
489 int lun_off = ch_map->lun_offs[p->g.lun];
490 struct nvm_dev *dev = tgt_dev->parent;
491 struct gen_dev_map *dev_rmap = dev->rmap;
492 struct gen_ch_map *ch_rmap;
493 int lun_roff;
494
495 p->g.ch += ch_map->ch_off;
496 p->g.lun += lun_off;
497
498 ch_rmap = &dev_rmap->chnls[p->g.ch];
499 lun_roff = ch_rmap->lun_offs[p->g.lun];
500
501 if (unlikely(ch_rmap->ch_off < 0 || lun_roff < 0)) {
502 pr_err("nvm: corrupted device partition table\n");
503 return -EINVAL;
504 }
505
506 return 0;
507}
508
509static int gen_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p)
510{
511 struct nvm_dev *dev = tgt_dev->parent;
512 struct gen_dev_map *dev_rmap = dev->rmap;
513 struct gen_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch];
514 int lun_roff = ch_rmap->lun_offs[p->g.lun];
515
516 p->g.ch -= ch_rmap->ch_off;
517 p->g.lun -= lun_roff;
518
519 return 0;
520}
521
522static int gen_trans_rq(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
523 int flag)
524{
525 gen_trans_fn *f;
526 int i;
527 int ret = 0;
528
529 f = (flag == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt;
530
531 if (rqd->nr_ppas == 1)
532 return f(tgt_dev, &rqd->ppa_addr);
533
534 for (i = 0; i < rqd->nr_ppas; i++) {
535 ret = f(tgt_dev, &rqd->ppa_list[i]);
536 if (ret)
537 goto out;
538 }
539
540out:
541 return ret;
542}
543
544static void gen_end_io(struct nvm_rq *rqd)
545{
546 struct nvm_tgt_dev *tgt_dev = rqd->dev;
547 struct nvm_tgt_instance *ins = rqd->ins;
548
549 /* Convert address space */
550 if (tgt_dev)
551 gen_trans_rq(tgt_dev, rqd, TRANS_DEV_TO_TGT);
552
553 ins->tt->end_io(rqd);
554}
555
556static int gen_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
557{
558 struct nvm_dev *dev = tgt_dev->parent;
559
560 if (!dev->ops->submit_io)
561 return -ENODEV;
562
563 /* Convert address space */
564 gen_trans_rq(tgt_dev, rqd, TRANS_TGT_TO_DEV);
565 nvm_generic_to_addr_mode(dev, rqd);
566
567 rqd->dev = tgt_dev;
568 rqd->end_io = gen_end_io;
569 return dev->ops->submit_io(dev, rqd);
570}
571
572static int gen_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p,
573 int flags)
574{
575 /* Convert address space */
576 gen_map_to_dev(tgt_dev, p);
577
578 return nvm_erase_ppa(tgt_dev->parent, p, 1, flags);
579}
580
581static struct ppa_addr gen_trans_ppa(struct nvm_tgt_dev *tgt_dev,
582 struct ppa_addr p, int direction)
583{
584 gen_trans_fn *f;
585 struct ppa_addr ppa = p;
586
587 f = (direction == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt;
588 f(tgt_dev, &ppa);
589
590 return ppa;
591}
592
593static void gen_part_to_tgt(struct nvm_dev *dev, sector_t *entries,
594 int len)
595{
596 struct nvm_geo *geo = &dev->geo;
597 struct gen_dev_map *dev_rmap = dev->rmap;
598 u64 i;
599
600 for (i = 0; i < len; i++) {
601 struct gen_ch_map *ch_rmap;
602 int *lun_roffs;
603 struct ppa_addr gaddr;
604 u64 pba = le64_to_cpu(entries[i]);
605 int off;
606 u64 diff;
607
608 if (!pba)
609 continue;
610
611 gaddr = linear_to_generic_addr(geo, pba);
612 ch_rmap = &dev_rmap->chnls[gaddr.g.ch];
613 lun_roffs = ch_rmap->lun_offs;
614
615 off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun;
616
617 diff = ((ch_rmap->ch_off * geo->luns_per_chnl) +
618 (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun;
619
620 entries[i] -= cpu_to_le64(diff);
621 }
622}
623
624static struct nvmm_type gen = {
625 .name = "gennvm",
626 .version = {0, 1, 0},
627
628 .register_mgr = gen_register,
629 .unregister_mgr = gen_unregister,
630
631 .create_tgt = gen_create_tgt,
632 .remove_tgt = gen_remove_tgt,
633
634 .submit_io = gen_submit_io,
635 .erase_blk = gen_erase_blk,
636
637 .get_area = gen_get_area,
638 .put_area = gen_put_area,
639
640 .trans_ppa = gen_trans_ppa,
641 .part_to_tgt = gen_part_to_tgt,
642};
643
644static int __init gen_module_init(void)
645{
646 return nvm_register_mgr(&gen);
647}
648
649static void gen_module_exit(void)
650{
651 nvm_unregister_mgr(&gen);
652}
653
654module_init(gen_module_init);
655module_exit(gen_module_exit);
656MODULE_LICENSE("GPL v2");
657MODULE_DESCRIPTION("General media manager for Open-Channel SSDs");
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
deleted file mode 100644
index 6a4b3f368848..000000000000
--- a/drivers/lightnvm/gennvm.h
+++ /dev/null
@@ -1,62 +0,0 @@
1/*
2 * Copyright: Matias Bjorling <mb@bjorling.me>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 */
14
15#ifndef GENNVM_H_
16#define GENNVM_H_
17
18#include <linux/module.h>
19#include <linux/vmalloc.h>
20
21#include <linux/lightnvm.h>
22
23struct gen_dev {
24 struct nvm_dev *dev;
25
26 int nr_luns;
27 struct list_head area_list;
28
29 struct mutex lock;
30 struct list_head targets;
31};
32
33/* Map between virtual and physical channel and lun */
34struct gen_ch_map {
35 int ch_off;
36 int nr_luns;
37 int *lun_offs;
38};
39
40struct gen_dev_map {
41 struct gen_ch_map *chnls;
42 int nr_chnls;
43};
44
45struct gen_area {
46 struct list_head list;
47 sector_t begin;
48 sector_t end; /* end is excluded */
49};
50
51static inline void *ch_map_to_lun_offs(struct gen_ch_map *ch_map)
52{
53 return ch_map + 1;
54}
55
56typedef int (gen_trans_fn)(struct nvm_tgt_dev *, struct ppa_addr *);
57
58#define gen_for_each_lun(bm, lun, i) \
59 for ((i) = 0, lun = &(bm)->luns[0]; \
60 (i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
61
62#endif /* GENNVM_H_ */
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 9fb7de395915..e00b1d7b976f 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -779,7 +779,7 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
779 779
780static void rrpc_end_io(struct nvm_rq *rqd) 780static void rrpc_end_io(struct nvm_rq *rqd)
781{ 781{
782 struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); 782 struct rrpc *rrpc = rqd->private;
783 struct nvm_tgt_dev *dev = rrpc->dev; 783 struct nvm_tgt_dev *dev = rrpc->dev;
784 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); 784 struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
785 uint8_t npages = rqd->nr_ppas; 785 uint8_t npages = rqd->nr_ppas;
@@ -972,8 +972,9 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
972 972
973 bio_get(bio); 973 bio_get(bio);
974 rqd->bio = bio; 974 rqd->bio = bio;
975 rqd->ins = &rrpc->instance; 975 rqd->private = rrpc;
976 rqd->nr_ppas = nr_pages; 976 rqd->nr_ppas = nr_pages;
977 rqd->end_io = rrpc_end_io;
977 rrq->flags = flags; 978 rrq->flags = flags;
978 979
979 err = nvm_submit_io(dev, rqd); 980 err = nvm_submit_io(dev, rqd);
@@ -1532,7 +1533,6 @@ static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk)
1532 if (!rrpc) 1533 if (!rrpc)
1533 return ERR_PTR(-ENOMEM); 1534 return ERR_PTR(-ENOMEM);
1534 1535
1535 rrpc->instance.tt = &tt_rrpc;
1536 rrpc->dev = dev; 1536 rrpc->dev = dev;
1537 rrpc->disk = tdisk; 1537 rrpc->disk = tdisk;
1538 1538
@@ -1611,7 +1611,6 @@ static struct nvm_tgt_type tt_rrpc = {
1611 1611
1612 .make_rq = rrpc_make_rq, 1612 .make_rq = rrpc_make_rq,
1613 .capacity = rrpc_capacity, 1613 .capacity = rrpc_capacity,
1614 .end_io = rrpc_end_io,
1615 1614
1616 .init = rrpc_init, 1615 .init = rrpc_init,
1617 .exit = rrpc_exit, 1616 .exit = rrpc_exit,
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 94e4d73116b2..fdb6ff902903 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -102,9 +102,6 @@ struct rrpc_lun {
102}; 102};
103 103
104struct rrpc { 104struct rrpc {
105 /* instance must be kept in top to resolve rrpc in unprep */
106 struct nvm_tgt_instance instance;
107
108 struct nvm_tgt_dev *dev; 105 struct nvm_tgt_dev *dev;
109 struct gendisk *disk; 106 struct gendisk *disk;
110 107
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c
deleted file mode 100644
index 12002bf4efc2..000000000000
--- a/drivers/lightnvm/sysblk.c
+++ /dev/null
@@ -1,733 +0,0 @@
1/*
2 * Copyright (C) 2015 Matias Bjorling. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License version
6 * 2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; see the file COPYING. If not, write to
15 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
16 * USA.
17 *
18 */
19
20#include <linux/lightnvm.h>
21
22#define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */
23#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases
24 * enables ~1.5M updates per sysblk unit
25 */
26
27struct sysblk_scan {
28 /* A row is a collection of flash blocks for a system block. */
29 int nr_rows;
30 int row;
31 int act_blk[MAX_SYSBLKS];
32
33 int nr_ppas;
34 struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */
35};
36
37static inline int scan_ppa_idx(int row, int blkid)
38{
39 return (row * MAX_BLKS_PR_SYSBLK) + blkid;
40}
41
42static void nvm_sysblk_to_cpu(struct nvm_sb_info *info,
43 struct nvm_system_block *sb)
44{
45 info->seqnr = be32_to_cpu(sb->seqnr);
46 info->erase_cnt = be32_to_cpu(sb->erase_cnt);
47 info->version = be16_to_cpu(sb->version);
48 strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN);
49 info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
50}
51
52static void nvm_cpu_to_sysblk(struct nvm_system_block *sb,
53 struct nvm_sb_info *info)
54{
55 sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
56 sb->seqnr = cpu_to_be32(info->seqnr);
57 sb->erase_cnt = cpu_to_be32(info->erase_cnt);
58 sb->version = cpu_to_be16(info->version);
59 strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN);
60 sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa);
61}
62
63static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
64{
65 struct nvm_geo *geo = &dev->geo;
66 int nr_rows = min_t(int, MAX_SYSBLKS, geo->nr_chnls);
67 int i;
68
69 for (i = 0; i < nr_rows; i++)
70 sysblk_ppas[i].ppa = 0;
71
72 /* if possible, place sysblk at first channel, middle channel and last
73 * channel of the device. If not, create only one or two sys blocks
74 */
75 switch (geo->nr_chnls) {
76 case 2:
77 sysblk_ppas[1].g.ch = 1;
78 /* fall-through */
79 case 1:
80 sysblk_ppas[0].g.ch = 0;
81 break;
82 default:
83 sysblk_ppas[0].g.ch = 0;
84 sysblk_ppas[1].g.ch = geo->nr_chnls / 2;
85 sysblk_ppas[2].g.ch = geo->nr_chnls - 1;
86 break;
87 }
88
89 return nr_rows;
90}
91
92static void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
93 struct ppa_addr *sysblk_ppas)
94{
95 memset(s, 0, sizeof(struct sysblk_scan));
96 s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas);
97}
98
99static int sysblk_get_free_blks(struct nvm_dev *dev, struct ppa_addr ppa,
100 u8 *blks, int nr_blks,
101 struct sysblk_scan *s)
102{
103 struct ppa_addr *sppa;
104 int i, blkid = 0;
105
106 nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
107 if (nr_blks < 0)
108 return nr_blks;
109
110 for (i = 0; i < nr_blks; i++) {
111 if (blks[i] == NVM_BLK_T_HOST)
112 return -EEXIST;
113
114 if (blks[i] != NVM_BLK_T_FREE)
115 continue;
116
117 sppa = &s->ppas[scan_ppa_idx(s->row, blkid)];
118 sppa->g.ch = ppa.g.ch;
119 sppa->g.lun = ppa.g.lun;
120 sppa->g.blk = i;
121 s->nr_ppas++;
122 blkid++;
123
124 pr_debug("nvm: use (%u %u %u) as sysblk\n",
125 sppa->g.ch, sppa->g.lun, sppa->g.blk);
126 if (blkid > MAX_BLKS_PR_SYSBLK - 1)
127 return 0;
128 }
129
130 pr_err("nvm: sysblk failed get sysblk\n");
131 return -EINVAL;
132}
133
134static int sysblk_get_host_blks(struct nvm_dev *dev, struct ppa_addr ppa,
135 u8 *blks, int nr_blks,
136 struct sysblk_scan *s)
137{
138 int i, nr_sysblk = 0;
139
140 nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
141 if (nr_blks < 0)
142 return nr_blks;
143
144 for (i = 0; i < nr_blks; i++) {
145 if (blks[i] != NVM_BLK_T_HOST)
146 continue;
147
148 if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) {
149 pr_err("nvm: too many host blks\n");
150 return -EINVAL;
151 }
152
153 ppa.g.blk = i;
154
155 s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa;
156 s->nr_ppas++;
157 nr_sysblk++;
158 }
159
160 return 0;
161}
162
163static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
164 struct ppa_addr *ppas, int get_free)
165{
166 struct nvm_geo *geo = &dev->geo;
167 int i, nr_blks, ret = 0;
168 u8 *blks;
169
170 s->nr_ppas = 0;
171 nr_blks = geo->blks_per_lun * geo->plane_mode;
172
173 blks = kmalloc(nr_blks, GFP_KERNEL);
174 if (!blks)
175 return -ENOMEM;
176
177 for (i = 0; i < s->nr_rows; i++) {
178 s->row = i;
179
180 ret = nvm_get_bb_tbl(dev, ppas[i], blks);
181 if (ret) {
182 pr_err("nvm: failed bb tbl for ppa (%u %u)\n",
183 ppas[i].g.ch,
184 ppas[i].g.blk);
185 goto err_get;
186 }
187
188 if (get_free)
189 ret = sysblk_get_free_blks(dev, ppas[i], blks, nr_blks,
190 s);
191 else
192 ret = sysblk_get_host_blks(dev, ppas[i], blks, nr_blks,
193 s);
194
195 if (ret)
196 goto err_get;
197 }
198
199err_get:
200 kfree(blks);
201 return ret;
202}
203
204/*
205 * scans a block for latest sysblk.
206 * Returns:
207 * 0 - newer sysblk not found. PPA is updated to latest page.
208 * 1 - newer sysblk found and stored in *cur. PPA is updated to
209 * next valid page.
210 * <0- error.
211 */
212static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
213 struct nvm_system_block *sblk)
214{
215 struct nvm_geo *geo = &dev->geo;
216 struct nvm_system_block *cur;
217 int pg, ret, found = 0;
218
219 /* the full buffer for a flash page is allocated. Only the first of it
220 * contains the system block information
221 */
222 cur = kmalloc(geo->pfpg_size, GFP_KERNEL);
223 if (!cur)
224 return -ENOMEM;
225
226 /* perform linear scan through the block */
227 for (pg = 0; pg < dev->lps_per_blk; pg++) {
228 ppa->g.pg = ppa_to_slc(dev, pg);
229
230 ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
231 cur, geo->pfpg_size);
232 if (ret) {
233 if (ret == NVM_RSP_ERR_EMPTYPAGE) {
234 pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
235 ppa->g.ch,
236 ppa->g.lun,
237 ppa->g.blk,
238 ppa->g.pg);
239 break;
240 }
241 pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)",
242 ret,
243 ppa->g.ch,
244 ppa->g.lun,
245 ppa->g.blk,
246 ppa->g.pg);
247 break; /* if we can't read a page, continue to the
248 * next blk
249 */
250 }
251
252 if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) {
253 pr_debug("nvm: scan break for ppa (%u %u %u %u)\n",
254 ppa->g.ch,
255 ppa->g.lun,
256 ppa->g.blk,
257 ppa->g.pg);
258 break; /* last valid page already found */
259 }
260
261 if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr))
262 continue;
263
264 memcpy(sblk, cur, sizeof(struct nvm_system_block));
265 found = 1;
266 }
267
268 kfree(cur);
269
270 return found;
271}
272
273static int nvm_sysblk_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s,
274 int type)
275{
276 return nvm_set_bb_tbl(dev, s->ppas, s->nr_ppas, type);
277}
278
279static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
280 struct sysblk_scan *s)
281{
282 struct nvm_geo *geo = &dev->geo;
283 struct nvm_system_block nvmsb;
284 void *buf;
285 int i, sect, ret = 0;
286 struct ppa_addr *ppas;
287
288 nvm_cpu_to_sysblk(&nvmsb, info);
289
290 buf = kzalloc(geo->pfpg_size, GFP_KERNEL);
291 if (!buf)
292 return -ENOMEM;
293 memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
294
295 ppas = kcalloc(geo->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
296 if (!ppas) {
297 ret = -ENOMEM;
298 goto err;
299 }
300
301 /* Write and verify */
302 for (i = 0; i < s->nr_rows; i++) {
303 ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])];
304
305 pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n",
306 ppas[0].g.ch,
307 ppas[0].g.lun,
308 ppas[0].g.blk,
309 ppas[0].g.pg);
310
311 /* Expand to all sectors within a flash page */
312 if (geo->sec_per_pg > 1) {
313 for (sect = 1; sect < geo->sec_per_pg; sect++) {
314 ppas[sect].ppa = ppas[0].ppa;
315 ppas[sect].g.sec = sect;
316 }
317 }
318
319 ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PWRITE,
320 NVM_IO_SLC_MODE, buf, geo->pfpg_size);
321 if (ret) {
322 pr_err("nvm: sysblk failed program (%u %u %u)\n",
323 ppas[0].g.ch,
324 ppas[0].g.lun,
325 ppas[0].g.blk);
326 break;
327 }
328
329 ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PREAD,
330 NVM_IO_SLC_MODE, buf, geo->pfpg_size);
331 if (ret) {
332 pr_err("nvm: sysblk failed read (%u %u %u)\n",
333 ppas[0].g.ch,
334 ppas[0].g.lun,
335 ppas[0].g.blk);
336 break;
337 }
338
339 if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) {
340 pr_err("nvm: sysblk failed verify (%u %u %u)\n",
341 ppas[0].g.ch,
342 ppas[0].g.lun,
343 ppas[0].g.blk);
344 ret = -EINVAL;
345 break;
346 }
347 }
348
349 kfree(ppas);
350err:
351 kfree(buf);
352
353 return ret;
354}
355
356static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
357{
358 int i, ret;
359 unsigned long nxt_blk;
360 struct ppa_addr *ppa;
361
362 for (i = 0; i < s->nr_rows; i++) {
363 nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK;
364 ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
365 ppa->g.pg = ppa_to_slc(dev, 0);
366
367 ret = nvm_erase_ppa(dev, ppa, 1, 0);
368 if (ret)
369 return ret;
370
371 s->act_blk[i] = nxt_blk;
372 }
373
374 return 0;
375}
376
377int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
378{
379 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
380 struct sysblk_scan s;
381 struct nvm_system_block *cur;
382 int i, j, found = 0;
383 int ret = -ENOMEM;
384
385 /*
386 * 1. setup sysblk locations
387 * 2. get bad block list
388 * 3. filter on host-specific (type 3)
389 * 4. iterate through all and find the highest seq nr.
390 * 5. return superblock information
391 */
392
393 if (!dev->ops->get_bb_tbl)
394 return -EINVAL;
395
396 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
397
398 mutex_lock(&dev->mlock);
399 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
400 if (ret)
401 goto err_sysblk;
402
403 /* no sysblocks initialized */
404 if (!s.nr_ppas)
405 goto err_sysblk;
406
407 cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
408 if (!cur)
409 goto err_sysblk;
410
411 /* find the latest block across all sysblocks */
412 for (i = 0; i < s.nr_rows; i++) {
413 for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
414 struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)];
415
416 ret = nvm_scan_block(dev, &ppa, cur);
417 if (ret > 0)
418 found = 1;
419 else if (ret < 0)
420 break;
421 }
422 }
423
424 nvm_sysblk_to_cpu(info, cur);
425
426 kfree(cur);
427err_sysblk:
428 mutex_unlock(&dev->mlock);
429
430 if (found)
431 return 1;
432 return ret;
433}
434
435int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new)
436{
437 /* 1. for each latest superblock
438 * 2. if room
439 * a. write new flash page entry with the updated information
440 * 3. if no room
441 * a. find next available block on lun (linear search)
442 * if none, continue to next lun
443 * if none at all, report error. also report that it wasn't
444 * possible to write to all superblocks.
445 * c. write data to block.
446 */
447 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
448 struct sysblk_scan s;
449 struct nvm_system_block *cur;
450 int i, j, ppaidx, found = 0;
451 int ret = -ENOMEM;
452
453 if (!dev->ops->get_bb_tbl)
454 return -EINVAL;
455
456 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
457
458 mutex_lock(&dev->mlock);
459 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
460 if (ret)
461 goto err_sysblk;
462
463 cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
464 if (!cur)
465 goto err_sysblk;
466
467 /* Get the latest sysblk for each sysblk row */
468 for (i = 0; i < s.nr_rows; i++) {
469 found = 0;
470 for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
471 ppaidx = scan_ppa_idx(i, j);
472 ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur);
473 if (ret > 0) {
474 s.act_blk[i] = j;
475 found = 1;
476 } else if (ret < 0)
477 break;
478 }
479 }
480
481 if (!found) {
482 pr_err("nvm: no valid sysblks found to update\n");
483 ret = -EINVAL;
484 goto err_cur;
485 }
486
487 /*
488 * All sysblocks found. Check that they have same page id in their flash
489 * blocks
490 */
491 for (i = 1; i < s.nr_rows; i++) {
492 struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])];
493 struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])];
494
495 if (l.g.pg != r.g.pg) {
496 pr_err("nvm: sysblks not on same page. Previous update failed.\n");
497 ret = -EINVAL;
498 goto err_cur;
499 }
500 }
501
502 /*
503 * Check that there haven't been another update to the seqnr since we
504 * began
505 */
506 if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) {
507 pr_err("nvm: seq is not sequential\n");
508 ret = -EINVAL;
509 goto err_cur;
510 }
511
512 /*
513 * When all pages in a block has been written, a new block is selected
514 * and writing is performed on the new block.
515 */
516 if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg ==
517 dev->lps_per_blk - 1) {
518 ret = nvm_prepare_new_sysblks(dev, &s);
519 if (ret)
520 goto err_cur;
521 }
522
523 ret = nvm_write_and_verify(dev, new, &s);
524err_cur:
525 kfree(cur);
526err_sysblk:
527 mutex_unlock(&dev->mlock);
528
529 return ret;
530}
531
532int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
533{
534 struct nvm_geo *geo = &dev->geo;
535 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
536 struct sysblk_scan s;
537 int ret;
538
539 /*
540 * 1. select master blocks and select first available blks
541 * 2. get bad block list
542 * 3. mark MAX_SYSBLKS block as host-based device allocated.
543 * 4. write and verify data to block
544 */
545
546 if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
547 return -EINVAL;
548
549 if (!(geo->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
550 pr_err("nvm: memory does not support SLC access\n");
551 return -EINVAL;
552 }
553
554 /* Index all sysblocks and mark them as host-driven */
555 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
556
557 mutex_lock(&dev->mlock);
558 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 1);
559 if (ret)
560 goto err_mark;
561
562 ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
563 if (ret)
564 goto err_mark;
565
566 /* Write to the first block of each row */
567 ret = nvm_write_and_verify(dev, info, &s);
568err_mark:
569 mutex_unlock(&dev->mlock);
570 return ret;
571}
572
573static int factory_nblks(int nblks)
574{
575 /* Round up to nearest BITS_PER_LONG */
576 return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
577}
578
579static unsigned int factory_blk_offset(struct nvm_geo *geo, struct ppa_addr ppa)
580{
581 int nblks = factory_nblks(geo->blks_per_lun);
582
583 return ((ppa.g.ch * geo->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) /
584 BITS_PER_LONG;
585}
586
587static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa,
588 u8 *blks, int nr_blks,
589 unsigned long *blk_bitmap, int flags)
590{
591 int i, lunoff;
592
593 nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks);
594 if (nr_blks < 0)
595 return nr_blks;
596
597 lunoff = factory_blk_offset(&dev->geo, ppa);
598
599 /* non-set bits correspond to the block must be erased */
600 for (i = 0; i < nr_blks; i++) {
601 switch (blks[i]) {
602 case NVM_BLK_T_FREE:
603 if (flags & NVM_FACTORY_ERASE_ONLY_USER)
604 set_bit(i, &blk_bitmap[lunoff]);
605 break;
606 case NVM_BLK_T_HOST:
607 if (!(flags & NVM_FACTORY_RESET_HOST_BLKS))
608 set_bit(i, &blk_bitmap[lunoff]);
609 break;
610 case NVM_BLK_T_GRWN_BAD:
611 if (!(flags & NVM_FACTORY_RESET_GRWN_BBLKS))
612 set_bit(i, &blk_bitmap[lunoff]);
613 break;
614 default:
615 set_bit(i, &blk_bitmap[lunoff]);
616 break;
617 }
618 }
619
620 return 0;
621}
622
623static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
624 int max_ppas, unsigned long *blk_bitmap)
625{
626 struct nvm_geo *geo = &dev->geo;
627 struct ppa_addr ppa;
628 int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
629 unsigned long *offset;
630
631 while (!done) {
632 done = 1;
633 nvm_for_each_lun_ppa(geo, ppa, ch, lun) {
634 idx = factory_blk_offset(geo, ppa);
635 offset = &blk_bitmap[idx];
636
637 blkid = find_first_zero_bit(offset, geo->blks_per_lun);
638 if (blkid >= geo->blks_per_lun)
639 continue;
640 set_bit(blkid, offset);
641
642 ppa.g.blk = blkid;
643 pr_debug("nvm: erase ppa (%u %u %u)\n",
644 ppa.g.ch,
645 ppa.g.lun,
646 ppa.g.blk);
647
648 erase_list[ppa_cnt] = ppa;
649 ppa_cnt++;
650 done = 0;
651
652 if (ppa_cnt == max_ppas)
653 return ppa_cnt;
654 }
655 }
656
657 return ppa_cnt;
658}
659
660static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap,
661 int flags)
662{
663 struct nvm_geo *geo = &dev->geo;
664 struct ppa_addr ppa;
665 int ch, lun, nr_blks, ret = 0;
666 u8 *blks;
667
668 nr_blks = geo->blks_per_lun * geo->plane_mode;
669 blks = kmalloc(nr_blks, GFP_KERNEL);
670 if (!blks)
671 return -ENOMEM;
672
673 nvm_for_each_lun_ppa(geo, ppa, ch, lun) {
674 ret = nvm_get_bb_tbl(dev, ppa, blks);
675 if (ret)
676 pr_err("nvm: failed bb tbl for ch%u lun%u\n",
677 ppa.g.ch, ppa.g.blk);
678
679 ret = nvm_factory_blks(dev, ppa, blks, nr_blks, blk_bitmap,
680 flags);
681 if (ret)
682 break;
683 }
684
685 kfree(blks);
686 return ret;
687}
688
689int nvm_dev_factory(struct nvm_dev *dev, int flags)
690{
691 struct nvm_geo *geo = &dev->geo;
692 struct ppa_addr *ppas;
693 int ppa_cnt, ret = -ENOMEM;
694 int max_ppas = dev->ops->max_phys_sect / geo->nr_planes;
695 struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
696 struct sysblk_scan s;
697 unsigned long *blk_bitmap;
698
699 blk_bitmap = kzalloc(factory_nblks(geo->blks_per_lun) * geo->nr_luns,
700 GFP_KERNEL);
701 if (!blk_bitmap)
702 return ret;
703
704 ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL);
705 if (!ppas)
706 goto err_blks;
707
708 /* create list of blks to be erased */
709 ret = nvm_fact_select_blks(dev, blk_bitmap, flags);
710 if (ret)
711 goto err_ppas;
712
713 /* continue to erase until list of blks until empty */
714 while ((ppa_cnt =
715 nvm_fact_get_blks(dev, ppas, max_ppas, blk_bitmap)) > 0)
716 nvm_erase_ppa(dev, ppas, ppa_cnt, 0);
717
718 /* mark host reserved blocks free */
719 if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
720 nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
721 mutex_lock(&dev->mlock);
722 ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0);
723 if (!ret)
724 ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
725 mutex_unlock(&dev->mlock);
726 }
727err_ppas:
728 kfree(ppas);
729err_blks:
730 kfree(blk_bitmap);
731 return ret;
732}
733EXPORT_SYMBOL(nvm_dev_factory);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 76d20875503c..01035e718c1c 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio,
666 s->iop.write_prio = 0; 666 s->iop.write_prio = 0;
667 s->iop.error = 0; 667 s->iop.error = 0;
668 s->iop.flags = 0; 668 s->iop.flags = 0;
669 s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0; 669 s->iop.flush_journal = op_is_flush(bio->bi_opf);
670 s->iop.wq = bcache_wq; 670 s->iop.wq = bcache_wq;
671 671
672 return s; 672 return s;
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index e04c61e0839e..5b9cf56de8ef 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
788 788
789 spin_lock_irqsave(&cache->lock, flags); 789 spin_lock_irqsave(&cache->lock, flags);
790 if (cache->need_tick_bio && 790 if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
791 !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) &&
792 bio_op(bio) != REQ_OP_DISCARD) { 791 bio_op(bio) != REQ_OP_DISCARD) {
793 pb->tick = true; 792 pb->tick = true;
794 cache->need_tick_bio = false; 793 cache->need_tick_bio = false;
@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
828 return to_oblock(block_nr); 827 return to_oblock(block_nr);
829} 828}
830 829
831static int bio_triggers_commit(struct cache *cache, struct bio *bio)
832{
833 return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
834}
835
836/* 830/*
837 * You must increment the deferred set whilst the prison cell is held. To 831 * You must increment the deferred set whilst the prison cell is held. To
838 * encourage this, we ask for 'cell' to be passed in. 832 * encourage this, we ask for 'cell' to be passed in.
@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio)
884{ 878{
885 unsigned long flags; 879 unsigned long flags;
886 880
887 if (!bio_triggers_commit(cache, bio)) { 881 if (!op_is_flush(bio->bi_opf)) {
888 accounted_request(cache, bio); 882 accounted_request(cache, bio);
889 return; 883 return;
890 } 884 }
@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache)
1069 1063
1070static bool discard_or_flush(struct bio *bio) 1064static bool discard_or_flush(struct bio *bio)
1071{ 1065{
1072 return bio_op(bio) == REQ_OP_DISCARD || 1066 return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1073 bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
1074} 1067}
1075 1068
1076static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) 1069static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index d1c05c12a9db..110982db4b48 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
699 699
700static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) 700static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
701{ 701{
702 return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && 702 return op_is_flush(bio->bi_opf) &&
703 dm_thin_changed_this_transaction(tc->td); 703 dm_thin_changed_this_transaction(tc->td);
704} 704}
705 705
@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context,
870 struct bio *bio; 870 struct bio *bio;
871 871
872 while ((bio = bio_list_pop(&cell->bios))) { 872 while ((bio = bio_list_pop(&cell->bios))) {
873 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || 873 if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
874 bio_op(bio) == REQ_OP_DISCARD)
875 bio_list_add(&info->defer_bios, bio); 874 bio_list_add(&info->defer_bios, bio);
876 else { 875 else {
877 inc_all_io_entry(info->tc->pool, bio); 876 inc_all_io_entry(info->tc->pool, bio);
@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context,
1716 struct bio *bio; 1715 struct bio *bio;
1717 1716
1718 while ((bio = bio_list_pop(&cell->bios))) { 1717 while ((bio = bio_list_pop(&cell->bios))) {
1719 if ((bio_data_dir(bio) == WRITE) || 1718 if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
1720 (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || 1719 bio_op(bio) == REQ_OP_DISCARD)
1721 bio_op(bio) == REQ_OP_DISCARD))
1722 bio_list_add(&info->defer_bios, bio); 1720 bio_list_add(&info->defer_bios, bio);
1723 else { 1721 else {
1724 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; 1722 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
2635 return DM_MAPIO_SUBMITTED; 2633 return DM_MAPIO_SUBMITTED;
2636 } 2634 }
2637 2635
2638 if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || 2636 if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
2639 bio_op(bio) == REQ_OP_DISCARD) {
2640 thin_defer_bio_with_throttle(tc, bio); 2637 thin_defer_bio_with_throttle(tc, bio);
2641 return DM_MAPIO_SUBMITTED; 2638 return DM_MAPIO_SUBMITTED;
2642 } 2639 }
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8a3c3e32a704..138c6fa00cd5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -784,6 +784,13 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
784 return nvme_sg_io(ns, (void __user *)arg); 784 return nvme_sg_io(ns, (void __user *)arg);
785#endif 785#endif
786 default: 786 default:
787#ifdef CONFIG_NVM
788 if (ns->ndev)
789 return nvme_nvm_ioctl(ns, cmd, arg);
790#endif
791 if (is_sed_ioctl(cmd))
792 return sed_ioctl(ns->ctrl->opal_dev, cmd,
793 (void __user *) arg);
787 return -ENOTTY; 794 return -ENOTTY;
788 } 795 }
789} 796}
@@ -1051,6 +1058,28 @@ static const struct pr_ops nvme_pr_ops = {
1051 .pr_clear = nvme_pr_clear, 1058 .pr_clear = nvme_pr_clear,
1052}; 1059};
1053 1060
1061#ifdef CONFIG_BLK_SED_OPAL
1062int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
1063 bool send)
1064{
1065 struct nvme_ctrl *ctrl = data;
1066 struct nvme_command cmd;
1067
1068 memset(&cmd, 0, sizeof(cmd));
1069 if (send)
1070 cmd.common.opcode = nvme_admin_security_send;
1071 else
1072 cmd.common.opcode = nvme_admin_security_recv;
1073 cmd.common.nsid = 0;
1074 cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
1075 cmd.common.cdw10[1] = cpu_to_le32(len);
1076
1077 return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
1078 ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0);
1079}
1080EXPORT_SYMBOL_GPL(nvme_sec_submit);
1081#endif /* CONFIG_BLK_SED_OPAL */
1082
1054static const struct block_device_operations nvme_fops = { 1083static const struct block_device_operations nvme_fops = {
1055 .owner = THIS_MODULE, 1084 .owner = THIS_MODULE,
1056 .ioctl = nvme_ioctl, 1085 .ioctl = nvme_ioctl,
@@ -1230,6 +1259,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
1230 return -EIO; 1259 return -EIO;
1231 } 1260 }
1232 1261
1262 ctrl->oacs = le16_to_cpu(id->oacs);
1233 ctrl->vid = le16_to_cpu(id->vid); 1263 ctrl->vid = le16_to_cpu(id->vid);
1234 ctrl->oncs = le16_to_cpup(&id->oncs); 1264 ctrl->oncs = le16_to_cpup(&id->oncs);
1235 atomic_set(&ctrl->abort_limit, id->acl + 1); 1265 atomic_set(&ctrl->abort_limit, id->acl + 1);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 588d4a34c083..21cac8523bd8 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -26,6 +26,8 @@
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/lightnvm.h> 27#include <linux/lightnvm.h>
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/sched/sysctl.h>
30#include <uapi/linux/lightnvm.h>
29 31
30enum nvme_nvm_admin_opcode { 32enum nvme_nvm_admin_opcode {
31 nvme_nvm_admin_identity = 0xe2, 33 nvme_nvm_admin_identity = 0xe2,
@@ -248,50 +250,48 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
248{ 250{
249 struct nvme_nvm_id_group *src; 251 struct nvme_nvm_id_group *src;
250 struct nvm_id_group *dst; 252 struct nvm_id_group *dst;
251 int i, end;
252
253 end = min_t(u32, 4, nvm_id->cgrps);
254
255 for (i = 0; i < end; i++) {
256 src = &nvme_nvm_id->groups[i];
257 dst = &nvm_id->groups[i];
258
259 dst->mtype = src->mtype;
260 dst->fmtype = src->fmtype;
261 dst->num_ch = src->num_ch;
262 dst->num_lun = src->num_lun;
263 dst->num_pln = src->num_pln;
264
265 dst->num_pg = le16_to_cpu(src->num_pg);
266 dst->num_blk = le16_to_cpu(src->num_blk);
267 dst->fpg_sz = le16_to_cpu(src->fpg_sz);
268 dst->csecs = le16_to_cpu(src->csecs);
269 dst->sos = le16_to_cpu(src->sos);
270
271 dst->trdt = le32_to_cpu(src->trdt);
272 dst->trdm = le32_to_cpu(src->trdm);
273 dst->tprt = le32_to_cpu(src->tprt);
274 dst->tprm = le32_to_cpu(src->tprm);
275 dst->tbet = le32_to_cpu(src->tbet);
276 dst->tbem = le32_to_cpu(src->tbem);
277 dst->mpos = le32_to_cpu(src->mpos);
278 dst->mccap = le32_to_cpu(src->mccap);
279
280 dst->cpar = le16_to_cpu(src->cpar);
281
282 if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
283 memcpy(dst->lptbl.id, src->lptbl.id, 8);
284 dst->lptbl.mlc.num_pairs =
285 le16_to_cpu(src->lptbl.mlc.num_pairs);
286
287 if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
288 pr_err("nvm: number of MLC pairs not supported\n");
289 return -EINVAL;
290 }
291 253
292 memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, 254 if (nvme_nvm_id->cgrps != 1)
293 dst->lptbl.mlc.num_pairs); 255 return -EINVAL;
256
257 src = &nvme_nvm_id->groups[0];
258 dst = &nvm_id->grp;
259
260 dst->mtype = src->mtype;
261 dst->fmtype = src->fmtype;
262 dst->num_ch = src->num_ch;
263 dst->num_lun = src->num_lun;
264 dst->num_pln = src->num_pln;
265
266 dst->num_pg = le16_to_cpu(src->num_pg);
267 dst->num_blk = le16_to_cpu(src->num_blk);
268 dst->fpg_sz = le16_to_cpu(src->fpg_sz);
269 dst->csecs = le16_to_cpu(src->csecs);
270 dst->sos = le16_to_cpu(src->sos);
271
272 dst->trdt = le32_to_cpu(src->trdt);
273 dst->trdm = le32_to_cpu(src->trdm);
274 dst->tprt = le32_to_cpu(src->tprt);
275 dst->tprm = le32_to_cpu(src->tprm);
276 dst->tbet = le32_to_cpu(src->tbet);
277 dst->tbem = le32_to_cpu(src->tbem);
278 dst->mpos = le32_to_cpu(src->mpos);
279 dst->mccap = le32_to_cpu(src->mccap);
280
281 dst->cpar = le16_to_cpu(src->cpar);
282
283 if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
284 memcpy(dst->lptbl.id, src->lptbl.id, 8);
285 dst->lptbl.mlc.num_pairs =
286 le16_to_cpu(src->lptbl.mlc.num_pairs);
287
288 if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
289 pr_err("nvm: number of MLC pairs not supported\n");
290 return -EINVAL;
294 } 291 }
292
293 memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
294 dst->lptbl.mlc.num_pairs);
295 } 295 }
296 296
297 return 0; 297 return 0;
@@ -321,7 +321,6 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
321 321
322 nvm_id->ver_id = nvme_nvm_id->ver_id; 322 nvm_id->ver_id = nvme_nvm_id->ver_id;
323 nvm_id->vmnt = nvme_nvm_id->vmnt; 323 nvm_id->vmnt = nvme_nvm_id->vmnt;
324 nvm_id->cgrps = nvme_nvm_id->cgrps;
325 nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); 324 nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
326 nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); 325 nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
327 memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, 326 memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
@@ -372,7 +371,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
372 } 371 }
373 372
374 /* Transform physical address to target address space */ 373 /* Transform physical address to target address space */
375 nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb); 374 nvm_part_to_tgt(nvmdev, entries, cmd_nlb);
376 375
377 if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { 376 if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
378 ret = -EINTR; 377 ret = -EINTR;
@@ -485,7 +484,8 @@ static void nvme_nvm_end_io(struct request *rq, int error)
485 struct nvm_rq *rqd = rq->end_io_data; 484 struct nvm_rq *rqd = rq->end_io_data;
486 485
487 rqd->ppa_status = nvme_req(rq)->result.u64; 486 rqd->ppa_status = nvme_req(rq)->result.u64;
488 nvm_end_io(rqd, error); 487 rqd->error = error;
488 nvm_end_io(rqd);
489 489
490 kfree(nvme_req(rq)->cmd); 490 kfree(nvme_req(rq)->cmd);
491 blk_mq_free_request(rq); 491 blk_mq_free_request(rq);
@@ -586,6 +586,224 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
586 .max_phys_sect = 64, 586 .max_phys_sect = 64,
587}; 587};
588 588
589static void nvme_nvm_end_user_vio(struct request *rq, int error)
590{
591 struct completion *waiting = rq->end_io_data;
592
593 complete(waiting);
594}
595
596static int nvme_nvm_submit_user_cmd(struct request_queue *q,
597 struct nvme_ns *ns,
598 struct nvme_nvm_command *vcmd,
599 void __user *ubuf, unsigned int bufflen,
600 void __user *meta_buf, unsigned int meta_len,
601 void __user *ppa_buf, unsigned int ppa_len,
602 u32 *result, u64 *status, unsigned int timeout)
603{
604 bool write = nvme_is_write((struct nvme_command *)vcmd);
605 struct nvm_dev *dev = ns->ndev;
606 struct gendisk *disk = ns->disk;
607 struct request *rq;
608 struct bio *bio = NULL;
609 __le64 *ppa_list = NULL;
610 dma_addr_t ppa_dma;
611 __le64 *metadata = NULL;
612 dma_addr_t metadata_dma;
613 DECLARE_COMPLETION_ONSTACK(wait);
614 int ret;
615
616 rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0,
617 NVME_QID_ANY);
618 if (IS_ERR(rq)) {
619 ret = -ENOMEM;
620 goto err_cmd;
621 }
622
623 rq->timeout = timeout ? timeout : ADMIN_TIMEOUT;
624
625 rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
626 rq->end_io_data = &wait;
627
628 if (ppa_buf && ppa_len) {
629 ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma);
630 if (!ppa_list) {
631 ret = -ENOMEM;
632 goto err_rq;
633 }
634 if (copy_from_user(ppa_list, (void __user *)ppa_buf,
635 sizeof(u64) * (ppa_len + 1))) {
636 ret = -EFAULT;
637 goto err_ppa;
638 }
639 vcmd->ph_rw.spba = cpu_to_le64(ppa_dma);
640 } else {
641 vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf);
642 }
643
644 if (ubuf && bufflen) {
645 ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL);
646 if (ret)
647 goto err_ppa;
648 bio = rq->bio;
649
650 if (meta_buf && meta_len) {
651 metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL,
652 &metadata_dma);
653 if (!metadata) {
654 ret = -ENOMEM;
655 goto err_map;
656 }
657
658 if (write) {
659 if (copy_from_user(metadata,
660 (void __user *)meta_buf,
661 meta_len)) {
662 ret = -EFAULT;
663 goto err_meta;
664 }
665 }
666 vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma);
667 }
668
669 if (!disk)
670 goto submit;
671
672 bio->bi_bdev = bdget_disk(disk, 0);
673 if (!bio->bi_bdev) {
674 ret = -ENODEV;
675 goto err_meta;
676 }
677 }
678
679submit:
680 blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio);
681
682 wait_for_completion_io(&wait);
683
684 ret = nvme_error_status(rq->errors);
685 if (result)
686 *result = rq->errors & 0x7ff;
687 if (status)
688 *status = le64_to_cpu(nvme_req(rq)->result.u64);
689
690 if (metadata && !ret && !write) {
691 if (copy_to_user(meta_buf, (void *)metadata, meta_len))
692 ret = -EFAULT;
693 }
694err_meta:
695 if (meta_buf && meta_len)
696 dma_pool_free(dev->dma_pool, metadata, metadata_dma);
697err_map:
698 if (bio) {
699 if (disk && bio->bi_bdev)
700 bdput(bio->bi_bdev);
701 blk_rq_unmap_user(bio);
702 }
703err_ppa:
704 if (ppa_buf && ppa_len)
705 dma_pool_free(dev->dma_pool, ppa_list, ppa_dma);
706err_rq:
707 blk_mq_free_request(rq);
708err_cmd:
709 return ret;
710}
711
712static int nvme_nvm_submit_vio(struct nvme_ns *ns,
713 struct nvm_user_vio __user *uvio)
714{
715 struct nvm_user_vio vio;
716 struct nvme_nvm_command c;
717 unsigned int length;
718 int ret;
719
720 if (copy_from_user(&vio, uvio, sizeof(vio)))
721 return -EFAULT;
722 if (vio.flags)
723 return -EINVAL;
724
725 memset(&c, 0, sizeof(c));
726 c.ph_rw.opcode = vio.opcode;
727 c.ph_rw.nsid = cpu_to_le32(ns->ns_id);
728 c.ph_rw.control = cpu_to_le16(vio.control);
729 c.ph_rw.length = cpu_to_le16(vio.nppas);
730
731 length = (vio.nppas + 1) << ns->lba_shift;
732
733 ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c,
734 (void __user *)(uintptr_t)vio.addr, length,
735 (void __user *)(uintptr_t)vio.metadata,
736 vio.metadata_len,
737 (void __user *)(uintptr_t)vio.ppa_list, vio.nppas,
738 &vio.result, &vio.status, 0);
739
740 if (ret && copy_to_user(uvio, &vio, sizeof(vio)))
741 return -EFAULT;
742
743 return ret;
744}
745
746static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin,
747 struct nvm_passthru_vio __user *uvcmd)
748{
749 struct nvm_passthru_vio vcmd;
750 struct nvme_nvm_command c;
751 struct request_queue *q;
752 unsigned int timeout = 0;
753 int ret;
754
755 if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd)))
756 return -EFAULT;
757 if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN)))
758 return -EACCES;
759 if (vcmd.flags)
760 return -EINVAL;
761
762 memset(&c, 0, sizeof(c));
763 c.common.opcode = vcmd.opcode;
764 c.common.nsid = cpu_to_le32(ns->ns_id);
765 c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2);
766 c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3);
767 /* cdw11-12 */
768 c.ph_rw.length = cpu_to_le16(vcmd.nppas);
769 c.ph_rw.control = cpu_to_le32(vcmd.control);
770 c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13);
771 c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14);
772 c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15);
773
774 if (vcmd.timeout_ms)
775 timeout = msecs_to_jiffies(vcmd.timeout_ms);
776
777 q = admin ? ns->ctrl->admin_q : ns->queue;
778
779 ret = nvme_nvm_submit_user_cmd(q, ns,
780 (struct nvme_nvm_command *)&c,
781 (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len,
782 (void __user *)(uintptr_t)vcmd.metadata,
783 vcmd.metadata_len,
784 (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas,
785 &vcmd.result, &vcmd.status, timeout);
786
787 if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd)))
788 return -EFAULT;
789
790 return ret;
791}
792
793int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg)
794{
795 switch (cmd) {
796 case NVME_NVM_IOCTL_ADMIN_VIO:
797 return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg);
798 case NVME_NVM_IOCTL_IO_VIO:
799 return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg);
800 case NVME_NVM_IOCTL_SUBMIT_VIO:
801 return nvme_nvm_submit_vio(ns, (void __user *)arg);
802 default:
803 return -ENOTTY;
804 }
805}
806
589int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) 807int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
590{ 808{
591 struct request_queue *q = ns->queue; 809 struct request_queue *q = ns->queue;
@@ -622,7 +840,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
622 return 0; 840 return 0;
623 841
624 id = &ndev->identity; 842 id = &ndev->identity;
625 grp = &id->groups[0]; 843 grp = &id->grp;
626 attr = &dattr->attr; 844 attr = &dattr->attr;
627 845
628 if (strcmp(attr->name, "version") == 0) { 846 if (strcmp(attr->name, "version") == 0) {
@@ -633,10 +851,9 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
633 return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); 851 return scnprintf(page, PAGE_SIZE, "%u\n", id->cap);
634 } else if (strcmp(attr->name, "device_mode") == 0) { 852 } else if (strcmp(attr->name, "device_mode") == 0) {
635 return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); 853 return scnprintf(page, PAGE_SIZE, "%u\n", id->dom);
854 /* kept for compatibility */
636 } else if (strcmp(attr->name, "media_manager") == 0) { 855 } else if (strcmp(attr->name, "media_manager") == 0) {
637 if (!ndev->mt) 856 return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm");
638 return scnprintf(page, PAGE_SIZE, "%s\n", "none");
639 return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name);
640 } else if (strcmp(attr->name, "ppa_format") == 0) { 857 } else if (strcmp(attr->name, "ppa_format") == 0) {
641 return scnprintf(page, PAGE_SIZE, 858 return scnprintf(page, PAGE_SIZE,
642 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", 859 "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index aead6d08ed2c..14cfc6f7facb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -19,6 +19,7 @@
19#include <linux/kref.h> 19#include <linux/kref.h>
20#include <linux/blk-mq.h> 20#include <linux/blk-mq.h>
21#include <linux/lightnvm.h> 21#include <linux/lightnvm.h>
22#include <linux/sed-opal.h>
22 23
23enum { 24enum {
24 /* 25 /*
@@ -125,6 +126,8 @@ struct nvme_ctrl {
125 struct list_head node; 126 struct list_head node;
126 struct ida ns_ida; 127 struct ida ns_ida;
127 128
129 struct opal_dev *opal_dev;
130
128 char name[12]; 131 char name[12];
129 char serial[20]; 132 char serial[20];
130 char model[40]; 133 char model[40];
@@ -137,6 +140,7 @@ struct nvme_ctrl {
137 u32 max_hw_sectors; 140 u32 max_hw_sectors;
138 u16 oncs; 141 u16 oncs;
139 u16 vid; 142 u16 vid;
143 u16 oacs;
140 atomic_t abort_limit; 144 atomic_t abort_limit;
141 u8 event_limit; 145 u8 event_limit;
142 u8 vwc; 146 u8 vwc;
@@ -267,6 +271,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl);
267void nvme_queue_scan(struct nvme_ctrl *ctrl); 271void nvme_queue_scan(struct nvme_ctrl *ctrl);
268void nvme_remove_namespaces(struct nvme_ctrl *ctrl); 272void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
269 273
274int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
275 bool send);
276
270#define NVME_NR_AERS 1 277#define NVME_NR_AERS 1
271void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, 278void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
272 union nvme_result *res); 279 union nvme_result *res);
@@ -318,6 +325,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
318void nvme_nvm_unregister(struct nvme_ns *ns); 325void nvme_nvm_unregister(struct nvme_ns *ns);
319int nvme_nvm_register_sysfs(struct nvme_ns *ns); 326int nvme_nvm_register_sysfs(struct nvme_ns *ns);
320void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); 327void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
328int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
321#else 329#else
322static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, 330static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
323 int node) 331 int node)
@@ -335,6 +343,11 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i
335{ 343{
336 return 0; 344 return 0;
337} 345}
346static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
347 unsigned long arg)
348{
349 return -ENOTTY;
350}
338#endif /* CONFIG_NVM */ 351#endif /* CONFIG_NVM */
339 352
340static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) 353static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3faefabf339c..d67d0d0a3bc0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -43,6 +43,7 @@
43#include <linux/types.h> 43#include <linux/types.h>
44#include <linux/io-64-nonatomic-lo-hi.h> 44#include <linux/io-64-nonatomic-lo-hi.h>
45#include <asm/unaligned.h> 45#include <asm/unaligned.h>
46#include <linux/sed-opal.h>
46 47
47#include "nvme.h" 48#include "nvme.h"
48 49
@@ -895,12 +896,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
895 return BLK_EH_HANDLED; 896 return BLK_EH_HANDLED;
896 } 897 }
897 898
898 iod->aborted = 1;
899
900 if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { 899 if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
901 atomic_inc(&dev->ctrl.abort_limit); 900 atomic_inc(&dev->ctrl.abort_limit);
902 return BLK_EH_RESET_TIMER; 901 return BLK_EH_RESET_TIMER;
903 } 902 }
903 iod->aborted = 1;
904 904
905 memset(&cmd, 0, sizeof(cmd)); 905 memset(&cmd, 0, sizeof(cmd));
906 cmd.abort.opcode = nvme_admin_abort_cmd; 906 cmd.abort.opcode = nvme_admin_abort_cmd;
@@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
1178 dev->admin_tagset.timeout = ADMIN_TIMEOUT; 1178 dev->admin_tagset.timeout = ADMIN_TIMEOUT;
1179 dev->admin_tagset.numa_node = dev_to_node(dev->dev); 1179 dev->admin_tagset.numa_node = dev_to_node(dev->dev);
1180 dev->admin_tagset.cmd_size = nvme_cmd_size(dev); 1180 dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
1181 dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
1181 dev->admin_tagset.driver_data = dev; 1182 dev->admin_tagset.driver_data = dev;
1182 1183
1183 if (blk_mq_alloc_tag_set(&dev->admin_tagset)) 1184 if (blk_mq_alloc_tag_set(&dev->admin_tagset))
@@ -1738,6 +1739,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
1738 if (dev->ctrl.admin_q) 1739 if (dev->ctrl.admin_q)
1739 blk_put_queue(dev->ctrl.admin_q); 1740 blk_put_queue(dev->ctrl.admin_q);
1740 kfree(dev->queues); 1741 kfree(dev->queues);
1742 kfree(dev->ctrl.opal_dev);
1741 kfree(dev); 1743 kfree(dev);
1742} 1744}
1743 1745
@@ -1754,6 +1756,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
1754static void nvme_reset_work(struct work_struct *work) 1756static void nvme_reset_work(struct work_struct *work)
1755{ 1757{
1756 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); 1758 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
1759 bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
1757 int result = -ENODEV; 1760 int result = -ENODEV;
1758 1761
1759 if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) 1762 if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING))
@@ -1786,6 +1789,14 @@ static void nvme_reset_work(struct work_struct *work)
1786 if (result) 1789 if (result)
1787 goto out; 1790 goto out;
1788 1791
1792 if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
1793 dev->ctrl.opal_dev =
1794 init_opal_dev(&dev->ctrl, &nvme_sec_submit);
1795 }
1796
1797 if (was_suspend)
1798 opal_unlock_from_suspend(dev->ctrl.opal_dev);
1799
1789 result = nvme_setup_io_queues(dev); 1800 result = nvme_setup_io_queues(dev);
1790 if (result) 1801 if (result)
1791 goto out; 1802 goto out;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 94352e4df831..013bfe049a48 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -117,7 +117,7 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
117 unsigned int clearing, int slot); 117 unsigned int clearing, int slot);
118static int sr_packet(struct cdrom_device_info *, struct packet_command *); 118static int sr_packet(struct cdrom_device_info *, struct packet_command *);
119 119
120static struct cdrom_device_ops sr_dops = { 120static const struct cdrom_device_ops sr_dops = {
121 .open = sr_open, 121 .open = sr_open,
122 .release = sr_release, 122 .release = sr_release,
123 .drive_status = sr_drive_status, 123 .drive_status = sr_drive_status,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4a2ab5d99ff7..8e4df3d6c8cd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
22 22
23 unsigned long flags; /* BLK_MQ_F_* flags */ 23 unsigned long flags; /* BLK_MQ_F_* flags */
24 24
25 void *sched_data;
25 struct request_queue *queue; 26 struct request_queue *queue;
26 struct blk_flush_queue *fq; 27 struct blk_flush_queue *fq;
27 28
@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
35 atomic_t wait_index; 36 atomic_t wait_index;
36 37
37 struct blk_mq_tags *tags; 38 struct blk_mq_tags *tags;
39 struct blk_mq_tags *sched_tags;
38 40
39 struct srcu_struct queue_rq_srcu; 41 struct srcu_struct queue_rq_srcu;
40 42
@@ -60,7 +62,7 @@ struct blk_mq_hw_ctx {
60 62
61struct blk_mq_tag_set { 63struct blk_mq_tag_set {
62 unsigned int *mq_map; 64 unsigned int *mq_map;
63 struct blk_mq_ops *ops; 65 const struct blk_mq_ops *ops;
64 unsigned int nr_hw_queues; 66 unsigned int nr_hw_queues;
65 unsigned int queue_depth; /* max hw supported */ 67 unsigned int queue_depth; /* max hw supported */
66 unsigned int reserved_tags; 68 unsigned int reserved_tags;
@@ -151,11 +153,13 @@ enum {
151 BLK_MQ_F_SG_MERGE = 1 << 2, 153 BLK_MQ_F_SG_MERGE = 1 << 2,
152 BLK_MQ_F_DEFER_ISSUE = 1 << 4, 154 BLK_MQ_F_DEFER_ISSUE = 1 << 4,
153 BLK_MQ_F_BLOCKING = 1 << 5, 155 BLK_MQ_F_BLOCKING = 1 << 5,
156 BLK_MQ_F_NO_SCHED = 1 << 6,
154 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, 157 BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
155 BLK_MQ_F_ALLOC_POLICY_BITS = 1, 158 BLK_MQ_F_ALLOC_POLICY_BITS = 1,
156 159
157 BLK_MQ_S_STOPPED = 0, 160 BLK_MQ_S_STOPPED = 0,
158 BLK_MQ_S_TAG_ACTIVE = 1, 161 BLK_MQ_S_TAG_ACTIVE = 1,
162 BLK_MQ_S_SCHED_RESTART = 2,
159 163
160 BLK_MQ_MAX_DEPTH = 10240, 164 BLK_MQ_MAX_DEPTH = 10240,
161 165
@@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
179 183
180void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); 184void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
181 185
182void blk_mq_insert_request(struct request *, bool, bool, bool);
183void blk_mq_free_request(struct request *rq); 186void blk_mq_free_request(struct request *rq);
184void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
185bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 187bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
186 188
187enum { 189enum {
188 BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ 190 BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
189 BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ 191 BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
192 BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
190}; 193};
191 194
192struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 195struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 519ea2c9df61..37c9a43c5e78 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -221,6 +221,15 @@ static inline bool op_is_write(unsigned int op)
221} 221}
222 222
223/* 223/*
224 * Check if the bio or request is one that needs special treatment in the
225 * flush state machine.
226 */
227static inline bool op_is_flush(unsigned int op)
228{
229 return op & (REQ_FUA | REQ_PREFLUSH);
230}
231
232/*
224 * Reads are always treated as synchronous, as are requests with the FUA or 233 * Reads are always treated as synchronous, as are requests with the FUA or
225 * PREFLUSH flag. Other operations may be marked as synchronous using the 234 * PREFLUSH flag. Other operations may be marked as synchronous using the
226 * REQ_SYNC flag. 235 * REQ_SYNC flag.
@@ -232,22 +241,29 @@ static inline bool op_is_sync(unsigned int op)
232} 241}
233 242
234typedef unsigned int blk_qc_t; 243typedef unsigned int blk_qc_t;
235#define BLK_QC_T_NONE -1U 244#define BLK_QC_T_NONE -1U
236#define BLK_QC_T_SHIFT 16 245#define BLK_QC_T_SHIFT 16
246#define BLK_QC_T_INTERNAL (1U << 31)
237 247
238static inline bool blk_qc_t_valid(blk_qc_t cookie) 248static inline bool blk_qc_t_valid(blk_qc_t cookie)
239{ 249{
240 return cookie != BLK_QC_T_NONE; 250 return cookie != BLK_QC_T_NONE;
241} 251}
242 252
243static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) 253static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num,
254 bool internal)
244{ 255{
245 return tag | (queue_num << BLK_QC_T_SHIFT); 256 blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT);
257
258 if (internal)
259 ret |= BLK_QC_T_INTERNAL;
260
261 return ret;
246} 262}
247 263
248static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) 264static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
249{ 265{
250 return cookie >> BLK_QC_T_SHIFT; 266 return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
251} 267}
252 268
253static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) 269static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
@@ -255,6 +271,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
255 return cookie & ((1u << BLK_QC_T_SHIFT) - 1); 271 return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
256} 272}
257 273
274static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
275{
276 return (cookie & BLK_QC_T_INTERNAL) != 0;
277}
278
258struct blk_issue_stat { 279struct blk_issue_stat {
259 u64 time; 280 u64 time;
260}; 281};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ca8e8fd1078..05675b1dfd20 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -154,6 +154,7 @@ struct request {
154 154
155 /* the following two fields are internal, NEVER access directly */ 155 /* the following two fields are internal, NEVER access directly */
156 unsigned int __data_len; /* total data len */ 156 unsigned int __data_len; /* total data len */
157 int tag;
157 sector_t __sector; /* sector cursor */ 158 sector_t __sector; /* sector cursor */
158 159
159 struct bio *bio; 160 struct bio *bio;
@@ -220,9 +221,10 @@ struct request {
220 221
221 unsigned short ioprio; 222 unsigned short ioprio;
222 223
224 int internal_tag;
225
223 void *special; /* opaque pointer available for LLD use */ 226 void *special; /* opaque pointer available for LLD use */
224 227
225 int tag;
226 int errors; 228 int errors;
227 229
228 /* 230 /*
@@ -407,7 +409,7 @@ struct request_queue {
407 dma_drain_needed_fn *dma_drain_needed; 409 dma_drain_needed_fn *dma_drain_needed;
408 lld_busy_fn *lld_busy_fn; 410 lld_busy_fn *lld_busy_fn;
409 411
410 struct blk_mq_ops *mq_ops; 412 const struct blk_mq_ops *mq_ops;
411 413
412 unsigned int *mq_map; 414 unsigned int *mq_map;
413 415
@@ -569,6 +571,11 @@ struct request_queue {
569 struct list_head tag_set_list; 571 struct list_head tag_set_list;
570 struct bio_set *bio_split; 572 struct bio_set *bio_split;
571 573
574#ifdef CONFIG_DEBUG_FS
575 struct dentry *debugfs_dir;
576 struct dentry *mq_debugfs_dir;
577#endif
578
572 bool mq_sysfs_init_done; 579 bool mq_sysfs_init_done;
573}; 580};
574 581
@@ -600,6 +607,7 @@ struct request_queue {
600#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ 607#define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */
601#define QUEUE_FLAG_DAX 26 /* device supports DAX */ 608#define QUEUE_FLAG_DAX 26 /* device supports DAX */
602#define QUEUE_FLAG_STATS 27 /* track rq completion times */ 609#define QUEUE_FLAG_STATS 27 /* track rq completion times */
610#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */
603 611
604#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 612#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
605 (1 << QUEUE_FLAG_STACKABLE) | \ 613 (1 << QUEUE_FLAG_STACKABLE) | \
@@ -1620,6 +1628,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
1620 return __bvec_gap_to_prev(q, bprv, offset); 1628 return __bvec_gap_to_prev(q, bprv, offset);
1621} 1629}
1622 1630
1631/*
1632 * Check if the two bvecs from two bios can be merged to one segment.
1633 * If yes, no need to check gap between the two bios since the 1st bio
1634 * and the 1st bvec in the 2nd bio can be handled in one segment.
1635 */
1636static inline bool bios_segs_mergeable(struct request_queue *q,
1637 struct bio *prev, struct bio_vec *prev_last_bv,
1638 struct bio_vec *next_first_bv)
1639{
1640 if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
1641 return false;
1642 if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
1643 return false;
1644 if (prev->bi_seg_back_size + next_first_bv->bv_len >
1645 queue_max_segment_size(q))
1646 return false;
1647 return true;
1648}
1649
1623static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, 1650static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
1624 struct bio *next) 1651 struct bio *next)
1625{ 1652{
@@ -1629,7 +1656,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
1629 bio_get_last_bvec(prev, &pb); 1656 bio_get_last_bvec(prev, &pb);
1630 bio_get_first_bvec(next, &nb); 1657 bio_get_first_bvec(next, &nb);
1631 1658
1632 return __bvec_gap_to_prev(q, &pb, nb.bv_offset); 1659 if (!bios_segs_mergeable(q, prev, &pb, &nb))
1660 return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
1633 } 1661 }
1634 1662
1635 return false; 1663 return false;
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 8609d577bb66..6e8f209a6dff 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -36,7 +36,7 @@ struct packet_command
36 36
37/* Uniform cdrom data structures for cdrom.c */ 37/* Uniform cdrom data structures for cdrom.c */
38struct cdrom_device_info { 38struct cdrom_device_info {
39 struct cdrom_device_ops *ops; /* link to device_ops */ 39 const struct cdrom_device_ops *ops; /* link to device_ops */
40 struct list_head list; /* linked list of all device_info */ 40 struct list_head list; /* linked list of all device_info */
41 struct gendisk *disk; /* matching block layer disk */ 41 struct gendisk *disk; /* matching block layer disk */
42 void *handle; /* driver-dependent data */ 42 void *handle; /* driver-dependent data */
@@ -87,7 +87,6 @@ struct cdrom_device_ops {
87 87
88/* driver specifications */ 88/* driver specifications */
89 const int capability; /* capability flags */ 89 const int capability; /* capability flags */
90 int n_minors; /* number of active minor devices */
91 /* handle uniform packets for scsi type devices (scsi,atapi) */ 90 /* handle uniform packets for scsi type devices (scsi,atapi) */
92 int (*generic_packet) (struct cdrom_device_info *, 91 int (*generic_packet) (struct cdrom_device_info *,
93 struct packet_command *); 92 struct packet_command *);
@@ -123,6 +122,8 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi,
123 int page_code, int page_control); 122 int page_code, int page_control);
124extern void init_cdrom_command(struct packet_command *cgc, 123extern void init_cdrom_command(struct packet_command *cgc,
125 void *buffer, int len, int type); 124 void *buffer, int len, int type);
125extern int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
126 struct packet_command *cgc);
126 127
127/* The SCSI spec says there could be 256 slots. */ 128/* The SCSI spec says there could be 256 slots. */
128#define CDROM_MAX_SLOTS 256 129#define CDROM_MAX_SLOTS 256
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b276e9ef0e0b..b5825c4f06f7 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -77,6 +77,34 @@ struct elevator_ops
77 elevator_registered_fn *elevator_registered_fn; 77 elevator_registered_fn *elevator_registered_fn;
78}; 78};
79 79
80struct blk_mq_alloc_data;
81struct blk_mq_hw_ctx;
82
83struct elevator_mq_ops {
84 int (*init_sched)(struct request_queue *, struct elevator_type *);
85 void (*exit_sched)(struct elevator_queue *);
86
87 bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
88 bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
89 int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
90 void (*request_merged)(struct request_queue *, struct request *, int);
91 void (*requests_merged)(struct request_queue *, struct request *, struct request *);
92 struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
93 void (*put_request)(struct request *);
94 void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
95 struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
96 bool (*has_work)(struct blk_mq_hw_ctx *);
97 void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
98 void (*started_request)(struct request *);
99 void (*requeue_request)(struct request *);
100 struct request *(*former_request)(struct request_queue *, struct request *);
101 struct request *(*next_request)(struct request_queue *, struct request *);
102 int (*get_rq_priv)(struct request_queue *, struct request *);
103 void (*put_rq_priv)(struct request_queue *, struct request *);
104 void (*init_icq)(struct io_cq *);
105 void (*exit_icq)(struct io_cq *);
106};
107
80#define ELV_NAME_MAX (16) 108#define ELV_NAME_MAX (16)
81 109
82struct elv_fs_entry { 110struct elv_fs_entry {
@@ -94,12 +122,16 @@ struct elevator_type
94 struct kmem_cache *icq_cache; 122 struct kmem_cache *icq_cache;
95 123
96 /* fields provided by elevator implementation */ 124 /* fields provided by elevator implementation */
97 struct elevator_ops ops; 125 union {
126 struct elevator_ops sq;
127 struct elevator_mq_ops mq;
128 } ops;
98 size_t icq_size; /* see iocontext.h */ 129 size_t icq_size; /* see iocontext.h */
99 size_t icq_align; /* ditto */ 130 size_t icq_align; /* ditto */
100 struct elv_fs_entry *elevator_attrs; 131 struct elv_fs_entry *elevator_attrs;
101 char elevator_name[ELV_NAME_MAX]; 132 char elevator_name[ELV_NAME_MAX];
102 struct module *elevator_owner; 133 struct module *elevator_owner;
134 bool uses_mq;
103 135
104 /* managed by elevator core */ 136 /* managed by elevator core */
105 char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ 137 char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
@@ -123,6 +155,7 @@ struct elevator_queue
123 struct kobject kobj; 155 struct kobject kobj;
124 struct mutex sysfs_lock; 156 struct mutex sysfs_lock;
125 unsigned int registered:1; 157 unsigned int registered:1;
158 unsigned int uses_mq:1;
126 DECLARE_HASHTABLE(hash, ELV_HASH_BITS); 159 DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
127}; 160};
128 161
@@ -139,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
139extern void elv_merged_request(struct request_queue *, struct request *, int); 172extern void elv_merged_request(struct request_queue *, struct request *, int);
140extern void elv_bio_merged(struct request_queue *q, struct request *, 173extern void elv_bio_merged(struct request_queue *q, struct request *,
141 struct bio *); 174 struct bio *);
175extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
142extern void elv_requeue_request(struct request_queue *, struct request *); 176extern void elv_requeue_request(struct request_queue *, struct request *);
143extern struct request *elv_former_request(struct request_queue *, struct request *); 177extern struct request *elv_former_request(struct request_queue *, struct request *);
144extern struct request *elv_latter_request(struct request_queue *, struct request *); 178extern struct request *elv_latter_request(struct request_queue *, struct request *);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 7c273bbc5351..ca45e4a088a9 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -80,8 +80,6 @@ struct nvm_dev_ops {
80 unsigned int max_phys_sect; 80 unsigned int max_phys_sect;
81}; 81};
82 82
83
84
85#ifdef CONFIG_NVM 83#ifdef CONFIG_NVM
86 84
87#include <linux/blkdev.h> 85#include <linux/blkdev.h>
@@ -109,6 +107,7 @@ enum {
109 NVM_RSP_ERR_FAILWRITE = 0x40ff, 107 NVM_RSP_ERR_FAILWRITE = 0x40ff,
110 NVM_RSP_ERR_EMPTYPAGE = 0x42ff, 108 NVM_RSP_ERR_EMPTYPAGE = 0x42ff,
111 NVM_RSP_ERR_FAILECC = 0x4281, 109 NVM_RSP_ERR_FAILECC = 0x4281,
110 NVM_RSP_ERR_FAILCRC = 0x4004,
112 NVM_RSP_WARN_HIGHECC = 0x4700, 111 NVM_RSP_WARN_HIGHECC = 0x4700,
113 112
114 /* Device opcodes */ 113 /* Device opcodes */
@@ -202,11 +201,10 @@ struct nvm_addr_format {
202struct nvm_id { 201struct nvm_id {
203 u8 ver_id; 202 u8 ver_id;
204 u8 vmnt; 203 u8 vmnt;
205 u8 cgrps;
206 u32 cap; 204 u32 cap;
207 u32 dom; 205 u32 dom;
208 struct nvm_addr_format ppaf; 206 struct nvm_addr_format ppaf;
209 struct nvm_id_group groups[4]; 207 struct nvm_id_group grp;
210} __packed; 208} __packed;
211 209
212struct nvm_target { 210struct nvm_target {
@@ -216,10 +214,6 @@ struct nvm_target {
216 struct gendisk *disk; 214 struct gendisk *disk;
217}; 215};
218 216
219struct nvm_tgt_instance {
220 struct nvm_tgt_type *tt;
221};
222
223#define ADDR_EMPTY (~0ULL) 217#define ADDR_EMPTY (~0ULL)
224 218
225#define NVM_VERSION_MAJOR 1 219#define NVM_VERSION_MAJOR 1
@@ -230,7 +224,6 @@ struct nvm_rq;
230typedef void (nvm_end_io_fn)(struct nvm_rq *); 224typedef void (nvm_end_io_fn)(struct nvm_rq *);
231 225
232struct nvm_rq { 226struct nvm_rq {
233 struct nvm_tgt_instance *ins;
234 struct nvm_tgt_dev *dev; 227 struct nvm_tgt_dev *dev;
235 228
236 struct bio *bio; 229 struct bio *bio;
@@ -254,6 +247,8 @@ struct nvm_rq {
254 247
255 u64 ppa_status; /* ppa media status */ 248 u64 ppa_status; /* ppa media status */
256 int error; 249 int error;
250
251 void *private;
257}; 252};
258 253
259static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) 254static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
@@ -272,15 +267,6 @@ enum {
272 NVM_BLK_ST_BAD = 0x8, /* Bad block */ 267 NVM_BLK_ST_BAD = 0x8, /* Bad block */
273}; 268};
274 269
275/* system block cpu representation */
276struct nvm_sb_info {
277 unsigned long seqnr;
278 unsigned long erase_cnt;
279 unsigned int version;
280 char mmtype[NVM_MMTYPE_LEN];
281 struct ppa_addr fs_ppa;
282};
283
284/* Device generic information */ 270/* Device generic information */
285struct nvm_geo { 271struct nvm_geo {
286 int nr_chnls; 272 int nr_chnls;
@@ -308,6 +294,7 @@ struct nvm_geo {
308 int sec_per_lun; 294 int sec_per_lun;
309}; 295};
310 296
297/* sub-device structure */
311struct nvm_tgt_dev { 298struct nvm_tgt_dev {
312 /* Device information */ 299 /* Device information */
313 struct nvm_geo geo; 300 struct nvm_geo geo;
@@ -329,17 +316,10 @@ struct nvm_dev {
329 316
330 struct list_head devices; 317 struct list_head devices;
331 318
332 /* Media manager */
333 struct nvmm_type *mt;
334 void *mp;
335
336 /* System blocks */
337 struct nvm_sb_info sb;
338
339 /* Device information */ 319 /* Device information */
340 struct nvm_geo geo; 320 struct nvm_geo geo;
341 321
342 /* lower page table */ 322 /* lower page table */
343 int lps_per_blk; 323 int lps_per_blk;
344 int *lptbl; 324 int *lptbl;
345 325
@@ -359,6 +339,10 @@ struct nvm_dev {
359 339
360 struct mutex mlock; 340 struct mutex mlock;
361 spinlock_t lock; 341 spinlock_t lock;
342
343 /* target management */
344 struct list_head area_list;
345 struct list_head targets;
362}; 346};
363 347
364static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, 348static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
@@ -391,10 +375,10 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
391 return l; 375 return l;
392} 376}
393 377
394static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, 378static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev,
395 struct ppa_addr r) 379 struct ppa_addr r)
396{ 380{
397 struct nvm_geo *geo = &dev->geo; 381 struct nvm_geo *geo = &tgt_dev->geo;
398 struct ppa_addr l; 382 struct ppa_addr l;
399 383
400 l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; 384 l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
@@ -407,10 +391,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
407 return l; 391 return l;
408} 392}
409 393
410static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, 394static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev,
411 struct ppa_addr r) 395 struct ppa_addr r)
412{ 396{
413 struct nvm_geo *geo = &dev->geo; 397 struct nvm_geo *geo = &tgt_dev->geo;
414 struct ppa_addr l; 398 struct ppa_addr l;
415 399
416 l.ppa = 0; 400 l.ppa = 0;
@@ -452,15 +436,12 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
452 (ppa1.g.blk == ppa2.g.blk)); 436 (ppa1.g.blk == ppa2.g.blk));
453} 437}
454 438
455static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
456{
457 return dev->lptbl[slc_pg];
458}
459
460typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); 439typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
461typedef sector_t (nvm_tgt_capacity_fn)(void *); 440typedef sector_t (nvm_tgt_capacity_fn)(void *);
462typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); 441typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
463typedef void (nvm_tgt_exit_fn)(void *); 442typedef void (nvm_tgt_exit_fn)(void *);
443typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
444typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
464 445
465struct nvm_tgt_type { 446struct nvm_tgt_type {
466 const char *name; 447 const char *name;
@@ -469,12 +450,15 @@ struct nvm_tgt_type {
469 /* target entry points */ 450 /* target entry points */
470 nvm_tgt_make_rq_fn *make_rq; 451 nvm_tgt_make_rq_fn *make_rq;
471 nvm_tgt_capacity_fn *capacity; 452 nvm_tgt_capacity_fn *capacity;
472 nvm_end_io_fn *end_io;
473 453
474 /* module-specific init/teardown */ 454 /* module-specific init/teardown */
475 nvm_tgt_init_fn *init; 455 nvm_tgt_init_fn *init;
476 nvm_tgt_exit_fn *exit; 456 nvm_tgt_exit_fn *exit;
477 457
458 /* sysfs */
459 nvm_tgt_sysfs_init_fn *sysfs_init;
460 nvm_tgt_sysfs_exit_fn *sysfs_exit;
461
478 /* For internal use */ 462 /* For internal use */
479 struct list_head list; 463 struct list_head list;
480}; 464};
@@ -487,103 +471,29 @@ extern void nvm_unregister_tgt_type(struct nvm_tgt_type *);
487extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); 471extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *);
488extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); 472extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t);
489 473
490typedef int (nvmm_register_fn)(struct nvm_dev *);
491typedef void (nvmm_unregister_fn)(struct nvm_dev *);
492
493typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
494typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
495typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
496typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
497typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
498typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
499typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
500 struct ppa_addr, int);
501typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
502
503enum {
504 TRANS_TGT_TO_DEV = 0x0,
505 TRANS_DEV_TO_TGT = 0x1,
506};
507
508struct nvmm_type {
509 const char *name;
510 unsigned int version[3];
511
512 nvmm_register_fn *register_mgr;
513 nvmm_unregister_fn *unregister_mgr;
514
515 nvmm_create_tgt_fn *create_tgt;
516 nvmm_remove_tgt_fn *remove_tgt;
517
518 nvmm_submit_io_fn *submit_io;
519 nvmm_erase_blk_fn *erase_blk;
520
521 nvmm_get_area_fn *get_area;
522 nvmm_put_area_fn *put_area;
523
524 nvmm_trans_ppa_fn *trans_ppa;
525 nvmm_part_to_tgt_fn *part_to_tgt;
526
527 struct list_head list;
528};
529
530extern int nvm_register_mgr(struct nvmm_type *);
531extern void nvm_unregister_mgr(struct nvmm_type *);
532
533extern struct nvm_dev *nvm_alloc_dev(int); 474extern struct nvm_dev *nvm_alloc_dev(int);
534extern int nvm_register(struct nvm_dev *); 475extern int nvm_register(struct nvm_dev *);
535extern void nvm_unregister(struct nvm_dev *); 476extern void nvm_unregister(struct nvm_dev *);
536 477
537extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
538extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, 478extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
539 int, int); 479 int, int);
540extern int nvm_max_phys_sects(struct nvm_tgt_dev *); 480extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
541extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); 481extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
542extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
543extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
544extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, 482extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
545 const struct ppa_addr *, int, int); 483 const struct ppa_addr *, int, int);
546extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); 484extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
547extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
548extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); 485extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
549extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, 486extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
550 void *); 487 void *);
551extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); 488extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
552extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); 489extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
553extern void nvm_end_io(struct nvm_rq *, int); 490extern void nvm_end_io(struct nvm_rq *);
554extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
555 void *, int);
556extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
557 int, void *, int);
558extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); 491extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
559extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
560extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); 492extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
561 493
562/* sysblk.c */
563#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
564
565/* system block on disk representation */
566struct nvm_system_block {
567 __be32 magic; /* magic signature */
568 __be32 seqnr; /* sequence number */
569 __be32 erase_cnt; /* erase count */
570 __be16 version; /* version number */
571 u8 mmtype[NVM_MMTYPE_LEN]; /* media manager name */
572 __be64 fs_ppa; /* PPA for media manager
573 * superblock */
574};
575
576extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *);
577extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *);
578extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
579
580extern int nvm_dev_factory(struct nvm_dev *, int flags); 494extern int nvm_dev_factory(struct nvm_dev *, int flags);
581 495
582#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid) \ 496extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
583 for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls; \
584 (chid)++, (ppa).g.ch = (chid)) \
585 for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl; \
586 (lunid)++, (ppa).g.lun = (lunid))
587 497
588#else /* CONFIG_NVM */ 498#else /* CONFIG_NVM */
589struct nvm_dev_ops; 499struct nvm_dev_ops;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 3d1c6f1b15c9..00eac863a9c7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -244,6 +244,7 @@ enum {
244 NVME_CTRL_ONCS_DSM = 1 << 2, 244 NVME_CTRL_ONCS_DSM = 1 << 2,
245 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, 245 NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
246 NVME_CTRL_VWC_PRESENT = 1 << 0, 246 NVME_CTRL_VWC_PRESENT = 1 << 0,
247 NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
247}; 248};
248 249
249struct nvme_lbaf { 250struct nvme_lbaf {
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index f017fd6e69c4..d4e0a204c118 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -259,6 +259,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
259unsigned int sbitmap_weight(const struct sbitmap *sb); 259unsigned int sbitmap_weight(const struct sbitmap *sb);
260 260
261/** 261/**
262 * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
263 * @sb: Bitmap to show.
264 * @m: struct seq_file to write to.
265 *
266 * This is intended for debugging. The format may change at any time.
267 */
268void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
269
270/**
271 * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
272 * seq_file.
273 * @sb: Bitmap to show.
274 * @m: struct seq_file to write to.
275 *
276 * This is intended for debugging. The output isn't guaranteed to be internally
277 * consistent.
278 */
279void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
280
281/**
262 * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific 282 * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
263 * memory node. 283 * memory node.
264 * @sbq: Bitmap queue to initialize. 284 * @sbq: Bitmap queue to initialize.
@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
370 */ 390 */
371void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); 391void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
372 392
393/**
394 * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
395 * seq_file.
396 * @sbq: Bitmap queue to show.
397 * @m: struct seq_file to write to.
398 *
399 * This is intended for debugging. The format may change at any time.
400 */
401void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
402
373#endif /* __LINUX_SCALE_BITMAP_H */ 403#endif /* __LINUX_SCALE_BITMAP_H */
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
new file mode 100644
index 000000000000..deee23d012e7
--- /dev/null
+++ b/include/linux/sed-opal.h
@@ -0,0 +1,70 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Rafael Antognolli <rafael.antognolli@intel.com>
6 * Scott Bauer <scott.bauer@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef LINUX_OPAL_H
19#define LINUX_OPAL_H
20
21#include <uapi/linux/sed-opal.h>
22#include <linux/kernel.h>
23
24struct opal_dev;
25
26typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer,
27 size_t len, bool send);
28
29#ifdef CONFIG_BLK_SED_OPAL
30bool opal_unlock_from_suspend(struct opal_dev *dev);
31struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv);
32int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr);
33
34static inline bool is_sed_ioctl(unsigned int cmd)
35{
36 switch (cmd) {
37 case IOC_OPAL_SAVE:
38 case IOC_OPAL_LOCK_UNLOCK:
39 case IOC_OPAL_TAKE_OWNERSHIP:
40 case IOC_OPAL_ACTIVATE_LSP:
41 case IOC_OPAL_SET_PW:
42 case IOC_OPAL_ACTIVATE_USR:
43 case IOC_OPAL_REVERT_TPR:
44 case IOC_OPAL_LR_SETUP:
45 case IOC_OPAL_ADD_USR_TO_LR:
46 case IOC_OPAL_ENABLE_DISABLE_MBR:
47 case IOC_OPAL_ERASE_LR:
48 case IOC_OPAL_SECURE_ERASE_LR:
49 return true;
50 }
51 return false;
52}
53#else
54static inline bool is_sed_ioctl(unsigned int cmd)
55{
56 return false;
57}
58
59static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd,
60 void __user *ioctl_ptr)
61{
62 return 0;
63}
64static inline bool opal_unlock_from_suspend(struct opal_dev *dev)
65{
66 return false;
67}
68#define init_opal_dev(data, send_recv) NULL
69#endif /* CONFIG_BLK_SED_OPAL */
70#endif /* LINUX_OPAL_H */
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h
index 774a43128a7a..fd19f36b3129 100644
--- a/include/uapi/linux/lightnvm.h
+++ b/include/uapi/linux/lightnvm.h
@@ -122,6 +122,44 @@ struct nvm_ioctl_dev_factory {
122 __u32 flags; 122 __u32 flags;
123}; 123};
124 124
125struct nvm_user_vio {
126 __u8 opcode;
127 __u8 flags;
128 __u16 control;
129 __u16 nppas;
130 __u16 rsvd;
131 __u64 metadata;
132 __u64 addr;
133 __u64 ppa_list;
134 __u32 metadata_len;
135 __u32 data_len;
136 __u64 status;
137 __u32 result;
138 __u32 rsvd3[3];
139};
140
141struct nvm_passthru_vio {
142 __u8 opcode;
143 __u8 flags;
144 __u8 rsvd[2];
145 __u32 nsid;
146 __u32 cdw2;
147 __u32 cdw3;
148 __u64 metadata;
149 __u64 addr;
150 __u32 metadata_len;
151 __u32 data_len;
152 __u64 ppa_list;
153 __u16 nppas;
154 __u16 control;
155 __u32 cdw13;
156 __u32 cdw14;
157 __u32 cdw15;
158 __u64 status;
159 __u32 result;
160 __u32 timeout_ms;
161};
162
125/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ 163/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
126enum { 164enum {
127 /* top level cmds */ 165 /* top level cmds */
@@ -137,6 +175,11 @@ enum {
137 175
138 /* Factory reset device */ 176 /* Factory reset device */
139 NVM_DEV_FACTORY_CMD, 177 NVM_DEV_FACTORY_CMD,
178
179 /* Vector user I/O */
180 NVM_DEV_VIO_ADMIN_CMD = 0x41,
181 NVM_DEV_VIO_CMD = 0x42,
182 NVM_DEV_VIO_USER_CMD = 0x43,
140}; 183};
141 184
142#define NVM_IOCTL 'L' /* 0x4c */ 185#define NVM_IOCTL 'L' /* 0x4c */
@@ -154,6 +197,13 @@ enum {
154#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ 197#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
155 struct nvm_ioctl_dev_factory) 198 struct nvm_ioctl_dev_factory)
156 199
200#define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \
201 struct nvm_passthru_vio)
202#define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\
203 struct nvm_passthru_vio)
204#define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\
205 struct nvm_user_vio)
206
157#define NVM_VERSION_MAJOR 1 207#define NVM_VERSION_MAJOR 1
158#define NVM_VERSION_MINOR 0 208#define NVM_VERSION_MINOR 0
159#define NVM_VERSION_PATCHLEVEL 0 209#define NVM_VERSION_PATCHLEVEL 0
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
new file mode 100644
index 000000000000..c72e0735532d
--- /dev/null
+++ b/include/uapi/linux/sed-opal.h
@@ -0,0 +1,119 @@
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Authors:
5 * Rafael Antognolli <rafael.antognolli@intel.com>
6 * Scott Bauer <scott.bauer@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 */
17
18#ifndef _UAPI_SED_OPAL_H
19#define _UAPI_SED_OPAL_H
20
21#include <linux/types.h>
22
23#define OPAL_KEY_MAX 256
24#define OPAL_MAX_LRS 9
25
26enum opal_mbr {
27 OPAL_MBR_ENABLE = 0x0,
28 OPAL_MBR_DISABLE = 0x01,
29};
30
31enum opal_user {
32 OPAL_ADMIN1 = 0x0,
33 OPAL_USER1 = 0x01,
34 OPAL_USER2 = 0x02,
35 OPAL_USER3 = 0x03,
36 OPAL_USER4 = 0x04,
37 OPAL_USER5 = 0x05,
38 OPAL_USER6 = 0x06,
39 OPAL_USER7 = 0x07,
40 OPAL_USER8 = 0x08,
41 OPAL_USER9 = 0x09,
42};
43
44enum opal_lock_state {
45 OPAL_RO = 0x01, /* 0001 */
46 OPAL_RW = 0x02, /* 0010 */
47 OPAL_LK = 0x04, /* 0100 */
48};
49
50struct opal_key {
51 __u8 lr;
52 __u8 key_len;
53 __u8 __align[6];
54 __u8 key[OPAL_KEY_MAX];
55};
56
57struct opal_lr_act {
58 struct opal_key key;
59 __u32 sum;
60 __u8 num_lrs;
61 __u8 lr[OPAL_MAX_LRS];
62 __u8 align[2]; /* Align to 8 byte boundary */
63};
64
65struct opal_session_info {
66 __u32 sum;
67 __u32 who;
68 struct opal_key opal_key;
69};
70
71struct opal_user_lr_setup {
72 __u64 range_start;
73 __u64 range_length;
74 __u32 RLE; /* Read Lock enabled */
75 __u32 WLE; /* Write Lock Enabled */
76 struct opal_session_info session;
77};
78
79struct opal_lock_unlock {
80 struct opal_session_info session;
81 __u32 l_state;
82 __u8 __align[4];
83};
84
85struct opal_new_pw {
86 struct opal_session_info session;
87
88 /* When we're not operating in sum, and we first set
89 * passwords we need to set them via ADMIN authority.
90 * After passwords are changed, we can set them via,
91 * User authorities.
92 * Because of this restriction we need to know about
93 * Two different users. One in 'session' which we will use
94 * to start the session and new_userr_pw as the user we're
95 * chaning the pw for.
96 */
97 struct opal_session_info new_user_pw;
98};
99
100struct opal_mbr_data {
101 struct opal_key key;
102 __u8 enable_disable;
103 __u8 __align[7];
104};
105
106#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
107#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
108#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
109#define IOC_OPAL_ACTIVATE_LSP _IOW('p', 223, struct opal_lr_act)
110#define IOC_OPAL_SET_PW _IOW('p', 224, struct opal_new_pw)
111#define IOC_OPAL_ACTIVATE_USR _IOW('p', 225, struct opal_session_info)
112#define IOC_OPAL_REVERT_TPR _IOW('p', 226, struct opal_key)
113#define IOC_OPAL_LR_SETUP _IOW('p', 227, struct opal_user_lr_setup)
114#define IOC_OPAL_ADD_USR_TO_LR _IOW('p', 228, struct opal_lock_unlock)
115#define IOC_OPAL_ENABLE_DISABLE_MBR _IOW('p', 229, struct opal_mbr_data)
116#define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info)
117#define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info)
118
119#endif /* _UAPI_SED_OPAL_H */
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 2cecf05c82fd..55e11c4b2f3b 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -17,6 +17,7 @@
17 17
18#include <linux/random.h> 18#include <linux/random.h>
19#include <linux/sbitmap.h> 19#include <linux/sbitmap.h>
20#include <linux/seq_file.h>
20 21
21int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 22int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
22 gfp_t flags, int node) 23 gfp_t flags, int node)
@@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb)
180} 181}
181EXPORT_SYMBOL_GPL(sbitmap_weight); 182EXPORT_SYMBOL_GPL(sbitmap_weight);
182 183
184void sbitmap_show(struct sbitmap *sb, struct seq_file *m)
185{
186 seq_printf(m, "depth=%u\n", sb->depth);
187 seq_printf(m, "busy=%u\n", sbitmap_weight(sb));
188 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift);
189 seq_printf(m, "map_nr=%u\n", sb->map_nr);
190}
191EXPORT_SYMBOL_GPL(sbitmap_show);
192
193static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte)
194{
195 if ((offset & 0xf) == 0) {
196 if (offset != 0)
197 seq_putc(m, '\n');
198 seq_printf(m, "%08x:", offset);
199 }
200 if ((offset & 0x1) == 0)
201 seq_putc(m, ' ');
202 seq_printf(m, "%02x", byte);
203}
204
205void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m)
206{
207 u8 byte = 0;
208 unsigned int byte_bits = 0;
209 unsigned int offset = 0;
210 int i;
211
212 for (i = 0; i < sb->map_nr; i++) {
213 unsigned long word = READ_ONCE(sb->map[i].word);
214 unsigned int word_bits = READ_ONCE(sb->map[i].depth);
215
216 while (word_bits > 0) {
217 unsigned int bits = min(8 - byte_bits, word_bits);
218
219 byte |= (word & (BIT(bits) - 1)) << byte_bits;
220 byte_bits += bits;
221 if (byte_bits == 8) {
222 emit_byte(m, offset, byte);
223 byte = 0;
224 byte_bits = 0;
225 offset++;
226 }
227 word >>= bits;
228 word_bits -= bits;
229 }
230 }
231 if (byte_bits) {
232 emit_byte(m, offset, byte);
233 offset++;
234 }
235 if (offset)
236 seq_putc(m, '\n');
237}
238EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
239
183static unsigned int sbq_calc_wake_batch(unsigned int depth) 240static unsigned int sbq_calc_wake_batch(unsigned int depth)
184{ 241{
185 unsigned int wake_batch; 242 unsigned int wake_batch;
@@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
239 296
240void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 297void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
241{ 298{
242 sbq->wake_batch = sbq_calc_wake_batch(depth); 299 unsigned int wake_batch = sbq_calc_wake_batch(depth);
300 int i;
301
302 if (sbq->wake_batch != wake_batch) {
303 WRITE_ONCE(sbq->wake_batch, wake_batch);
304 /*
305 * Pairs with the memory barrier in sbq_wake_up() to ensure that
306 * the batch size is updated before the wait counts.
307 */
308 smp_mb__before_atomic();
309 for (i = 0; i < SBQ_WAIT_QUEUES; i++)
310 atomic_set(&sbq->ws[i].wait_cnt, 1);
311 }
243 sbitmap_resize(&sbq->sb, depth); 312 sbitmap_resize(&sbq->sb, depth);
244} 313}
245EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 314EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
297static void sbq_wake_up(struct sbitmap_queue *sbq) 366static void sbq_wake_up(struct sbitmap_queue *sbq)
298{ 367{
299 struct sbq_wait_state *ws; 368 struct sbq_wait_state *ws;
369 unsigned int wake_batch;
300 int wait_cnt; 370 int wait_cnt;
301 371
302 /* Ensure that the wait list checks occur after clear_bit(). */ 372 /*
303 smp_mb(); 373 * Pairs with the memory barrier in set_current_state() to ensure the
374 * proper ordering of clear_bit()/waitqueue_active() in the waker and
375 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
376 * the comment on waitqueue_active(). This is __after_atomic because we
377 * just did clear_bit() in the caller.
378 */
379 smp_mb__after_atomic();
304 380
305 ws = sbq_wake_ptr(sbq); 381 ws = sbq_wake_ptr(sbq);
306 if (!ws) 382 if (!ws)
307 return; 383 return;
308 384
309 wait_cnt = atomic_dec_return(&ws->wait_cnt); 385 wait_cnt = atomic_dec_return(&ws->wait_cnt);
310 if (unlikely(wait_cnt < 0)) 386 if (wait_cnt <= 0) {
311 wait_cnt = atomic_inc_return(&ws->wait_cnt); 387 wake_batch = READ_ONCE(sbq->wake_batch);
312 if (wait_cnt == 0) { 388 /*
313 atomic_add(sbq->wake_batch, &ws->wait_cnt); 389 * Pairs with the memory barrier in sbitmap_queue_resize() to
390 * ensure that we see the batch size update before the wait
391 * count is reset.
392 */
393 smp_mb__before_atomic();
394 /*
395 * If there are concurrent callers to sbq_wake_up(), the last
396 * one to decrement the wait count below zero will bump it back
397 * up. If there is a concurrent resize, the count reset will
398 * either cause the cmpxchg to fail or overwrite after the
399 * cmpxchg.
400 */
401 atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
314 sbq_index_atomic_inc(&sbq->wake_index); 402 sbq_index_atomic_inc(&sbq->wake_index);
315 wake_up(&ws->wait); 403 wake_up(&ws->wait);
316 } 404 }
@@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
331 int i, wake_index; 419 int i, wake_index;
332 420
333 /* 421 /*
334 * Make sure all changes prior to this are visible from other CPUs. 422 * Pairs with the memory barrier in set_current_state() like in
423 * sbq_wake_up().
335 */ 424 */
336 smp_mb(); 425 smp_mb();
337 wake_index = atomic_read(&sbq->wake_index); 426 wake_index = atomic_read(&sbq->wake_index);
@@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
345 } 434 }
346} 435}
347EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 436EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
437
438void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m)
439{
440 bool first;
441 int i;
442
443 sbitmap_show(&sbq->sb, m);
444
445 seq_puts(m, "alloc_hint={");
446 first = true;
447 for_each_possible_cpu(i) {
448 if (!first)
449 seq_puts(m, ", ");
450 first = false;
451 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i));
452 }
453 seq_puts(m, "}\n");
454
455 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch);
456 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index));
457
458 seq_puts(m, "ws={\n");
459 for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
460 struct sbq_wait_state *ws = &sbq->ws[i];
461
462 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n",
463 atomic_read(&ws->wait_cnt),
464 waitqueue_active(&ws->wait) ? "active" : "inactive");
465 }
466 seq_puts(m, "}\n");
467
468 seq_printf(m, "round_robin=%d\n", sbq->round_robin);
469}
470EXPORT_SYMBOL_GPL(sbitmap_queue_show);