diff options
author | Jens Axboe <axboe@fb.com> | 2017-02-17 16:06:45 -0500 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2017-02-17 16:06:45 -0500 |
commit | 6010720da8aab51f33beee63b73cf88016e9b250 (patch) | |
tree | a4c5a7f645998e86a1f49cb05f8e0c4e51448294 | |
parent | 2fe1e8a7b2f4dcac3fcb07ff06b0ae7396201fd6 (diff) | |
parent | 8a9ae523282f324989850fcf41312b42a2fb9296 (diff) |
Merge branch 'for-4.11/block' into for-4.11/linus-merge
Signed-off-by: Jens Axboe <axboe@fb.com>
68 files changed, 7340 insertions, 2832 deletions
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex index c06233fe52ac..8f85b0e41046 100644 --- a/Documentation/cdrom/cdrom-standard.tex +++ b/Documentation/cdrom/cdrom-standard.tex | |||
@@ -249,7 +249,6 @@ struct& cdrom_device_ops\ \{ \hidewidth\cr | |||
249 | unsigned\ long);\cr | 249 | unsigned\ long);\cr |
250 | \noalign{\medskip} | 250 | \noalign{\medskip} |
251 | &const\ int& capability;& capability flags \cr | 251 | &const\ int& capability;& capability flags \cr |
252 | &int& n_minors;& number of active minor devices \cr | ||
253 | \};\cr | 252 | \};\cr |
254 | } | 253 | } |
255 | $$ | 254 | $$ |
@@ -258,13 +257,7 @@ it should add a function pointer to this $struct$. When a particular | |||
258 | function is not implemented, however, this $struct$ should contain a | 257 | function is not implemented, however, this $struct$ should contain a |
259 | NULL instead. The $capability$ flags specify the capabilities of the | 258 | NULL instead. The $capability$ flags specify the capabilities of the |
260 | \cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive | 259 | \cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive |
261 | is registered with the \UCD. The value $n_minors$ should be a positive | 260 | is registered with the \UCD. |
262 | value indicating the number of minor devices that are supported by | ||
263 | the low-level device driver, normally~1. Although these two variables | ||
264 | are `informative' rather than `operational,' they are included in | ||
265 | $cdrom_device_ops$ because they describe the capability of the {\em | ||
266 | driver\/} rather than the {\em drive}. Nomenclature has always been | ||
267 | difficult in computer programming. | ||
268 | 261 | ||
269 | Note that most functions have fewer parameters than their | 262 | Note that most functions have fewer parameters than their |
270 | $blkdev_fops$ counterparts. This is because very little of the | 263 | $blkdev_fops$ counterparts. This is because very little of the |
diff --git a/MAINTAINERS b/MAINTAINERS index 527d13759ecc..864e1fd31f0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -8612,10 +8612,10 @@ S: Maintained | |||
8612 | F: drivers/net/ethernet/netronome/ | 8612 | F: drivers/net/ethernet/netronome/ |
8613 | 8613 | ||
8614 | NETWORK BLOCK DEVICE (NBD) | 8614 | NETWORK BLOCK DEVICE (NBD) |
8615 | M: Markus Pargmann <mpa@pengutronix.de> | 8615 | M: Josef Bacik <jbacik@fb.com> |
8616 | S: Maintained | 8616 | S: Maintained |
8617 | L: linux-block@vger.kernel.org | ||
8617 | L: nbd-general@lists.sourceforge.net | 8618 | L: nbd-general@lists.sourceforge.net |
8618 | T: git git://git.pengutronix.de/git/mpa/linux-nbd.git | ||
8619 | F: Documentation/blockdev/nbd.txt | 8619 | F: Documentation/blockdev/nbd.txt |
8620 | F: drivers/block/nbd.c | 8620 | F: drivers/block/nbd.c |
8621 | F: include/uapi/linux/nbd.h | 8621 | F: include/uapi/linux/nbd.h |
@@ -11089,6 +11089,17 @@ L: linux-mmc@vger.kernel.org | |||
11089 | S: Maintained | 11089 | S: Maintained |
11090 | F: drivers/mmc/host/sdhci-spear.c | 11090 | F: drivers/mmc/host/sdhci-spear.c |
11091 | 11091 | ||
11092 | SECURE ENCRYPTING DEVICE (SED) OPAL DRIVER | ||
11093 | M: Scott Bauer <scott.bauer@intel.com> | ||
11094 | M: Jonathan Derrick <jonathan.derrick@intel.com> | ||
11095 | M: Rafael Antognolli <rafael.antognolli@intel.com> | ||
11096 | L: linux-block@vger.kernel.org | ||
11097 | S: Supported | ||
11098 | F: block/sed* | ||
11099 | F: block/opal_proto.h | ||
11100 | F: include/linux/sed* | ||
11101 | F: include/uapi/linux/sed* | ||
11102 | |||
11092 | SECURITY SUBSYSTEM | 11103 | SECURITY SUBSYSTEM |
11093 | M: James Morris <james.l.morris@oracle.com> | 11104 | M: James Morris <james.l.morris@oracle.com> |
11094 | M: "Serge E. Hallyn" <serge@hallyn.com> | 11105 | M: "Serge E. Hallyn" <serge@hallyn.com> |
diff --git a/block/Kconfig b/block/Kconfig index 8bf114a3858a..1aef809affae 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -147,6 +147,25 @@ config BLK_WBT_MQ | |||
147 | Multiqueue currently doesn't have support for IO scheduling, | 147 | Multiqueue currently doesn't have support for IO scheduling, |
148 | enabling this option is recommended. | 148 | enabling this option is recommended. |
149 | 149 | ||
150 | config BLK_DEBUG_FS | ||
151 | bool "Block layer debugging information in debugfs" | ||
152 | default y | ||
153 | depends on DEBUG_FS | ||
154 | ---help--- | ||
155 | Include block layer debugging information in debugfs. This information | ||
156 | is mostly useful for kernel developers, but it doesn't incur any cost | ||
157 | at runtime. | ||
158 | |||
159 | Unless you are building a kernel for a tiny system, you should | ||
160 | say Y here. | ||
161 | |||
162 | config BLK_SED_OPAL | ||
163 | bool "Logic for interfacing with Opal enabled SEDs" | ||
164 | ---help--- | ||
165 | Builds Logic for interfacing with Opal enabled controllers. | ||
166 | Enabling this option enables users to setup/unlock/lock | ||
167 | Locking ranges for SED devices using the Opal protocol. | ||
168 | |||
150 | menu "Partition Types" | 169 | menu "Partition Types" |
151 | 170 | ||
152 | source "block/partitions/Kconfig" | 171 | source "block/partitions/Kconfig" |
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 421bef9c4c48..0715ce93daef 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched | |||
@@ -63,6 +63,56 @@ config DEFAULT_IOSCHED | |||
63 | default "cfq" if DEFAULT_CFQ | 63 | default "cfq" if DEFAULT_CFQ |
64 | default "noop" if DEFAULT_NOOP | 64 | default "noop" if DEFAULT_NOOP |
65 | 65 | ||
66 | config MQ_IOSCHED_DEADLINE | ||
67 | tristate "MQ deadline I/O scheduler" | ||
68 | default y | ||
69 | ---help--- | ||
70 | MQ version of the deadline IO scheduler. | ||
71 | |||
72 | config MQ_IOSCHED_NONE | ||
73 | bool | ||
74 | default y | ||
75 | |||
76 | choice | ||
77 | prompt "Default single-queue blk-mq I/O scheduler" | ||
78 | default DEFAULT_SQ_NONE | ||
79 | help | ||
80 | Select the I/O scheduler which will be used by default for blk-mq | ||
81 | managed block devices with a single queue. | ||
82 | |||
83 | config DEFAULT_SQ_DEADLINE | ||
84 | bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y | ||
85 | |||
86 | config DEFAULT_SQ_NONE | ||
87 | bool "None" | ||
88 | |||
89 | endchoice | ||
90 | |||
91 | config DEFAULT_SQ_IOSCHED | ||
92 | string | ||
93 | default "mq-deadline" if DEFAULT_SQ_DEADLINE | ||
94 | default "none" if DEFAULT_SQ_NONE | ||
95 | |||
96 | choice | ||
97 | prompt "Default multi-queue blk-mq I/O scheduler" | ||
98 | default DEFAULT_MQ_NONE | ||
99 | help | ||
100 | Select the I/O scheduler which will be used by default for blk-mq | ||
101 | managed block devices with multiple queues. | ||
102 | |||
103 | config DEFAULT_MQ_DEADLINE | ||
104 | bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y | ||
105 | |||
106 | config DEFAULT_MQ_NONE | ||
107 | bool "None" | ||
108 | |||
109 | endchoice | ||
110 | |||
111 | config DEFAULT_MQ_IOSCHED | ||
112 | string | ||
113 | default "mq-deadline" if DEFAULT_MQ_DEADLINE | ||
114 | default "none" if DEFAULT_MQ_NONE | ||
115 | |||
66 | endmenu | 116 | endmenu |
67 | 117 | ||
68 | endif | 118 | endif |
diff --git a/block/Makefile b/block/Makefile index a827f988c4e6..6ba1b1bc9529 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | |||
6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ | 8 | blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ |
9 | blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ | 9 | blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ |
10 | genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ | 10 | genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ |
11 | badblocks.o partitions/ | 11 | badblocks.o partitions/ |
12 | 12 | ||
@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o | |||
18 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | 18 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
19 | obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o | 19 | obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o |
20 | obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o | 20 | obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o |
21 | obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o | ||
21 | 22 | ||
22 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o | 23 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o |
23 | obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o | 24 | obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o |
@@ -25,3 +26,5 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o | |||
25 | obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o | 26 | obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o |
26 | obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o | 27 | obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o |
27 | obj-$(CONFIG_BLK_WBT) += blk-wbt.o | 28 | obj-$(CONFIG_BLK_WBT) += blk-wbt.o |
29 | obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o | ||
30 | obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o | ||
diff --git a/block/bio.c b/block/bio.c index 2b375020fc49..d3c26d1cb1da 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -1403,7 +1403,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, | |||
1403 | bio_set_flag(bio, BIO_USER_MAPPED); | 1403 | bio_set_flag(bio, BIO_USER_MAPPED); |
1404 | 1404 | ||
1405 | /* | 1405 | /* |
1406 | * subtle -- if __bio_map_user() ended up bouncing a bio, | 1406 | * subtle -- if bio_map_user_iov() ended up bouncing a bio, |
1407 | * it would normally disappear when its bi_end_io is run. | 1407 | * it would normally disappear when its bi_end_io is run. |
1408 | * however, we need it for the unmap, so grab an extra | 1408 | * however, we need it for the unmap, so grab an extra |
1409 | * reference to it | 1409 | * reference to it |
@@ -1445,8 +1445,8 @@ static void __bio_unmap_user(struct bio *bio) | |||
1445 | * bio_unmap_user - unmap a bio | 1445 | * bio_unmap_user - unmap a bio |
1446 | * @bio: the bio being unmapped | 1446 | * @bio: the bio being unmapped |
1447 | * | 1447 | * |
1448 | * Unmap a bio previously mapped by bio_map_user(). Must be called with | 1448 | * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from |
1449 | * a process context. | 1449 | * process context. |
1450 | * | 1450 | * |
1451 | * bio_unmap_user() may sleep. | 1451 | * bio_unmap_user() may sleep. |
1452 | */ | 1452 | */ |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8ba0af780e88..fb59a3edc778 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -1223,7 +1223,10 @@ int blkcg_activate_policy(struct request_queue *q, | |||
1223 | if (blkcg_policy_enabled(q, pol)) | 1223 | if (blkcg_policy_enabled(q, pol)) |
1224 | return 0; | 1224 | return 0; |
1225 | 1225 | ||
1226 | blk_queue_bypass_start(q); | 1226 | if (q->mq_ops) |
1227 | blk_mq_freeze_queue(q); | ||
1228 | else | ||
1229 | blk_queue_bypass_start(q); | ||
1227 | pd_prealloc: | 1230 | pd_prealloc: |
1228 | if (!pd_prealloc) { | 1231 | if (!pd_prealloc) { |
1229 | pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); | 1232 | pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); |
@@ -1261,7 +1264,10 @@ pd_prealloc: | |||
1261 | 1264 | ||
1262 | spin_unlock_irq(q->queue_lock); | 1265 | spin_unlock_irq(q->queue_lock); |
1263 | out_bypass_end: | 1266 | out_bypass_end: |
1264 | blk_queue_bypass_end(q); | 1267 | if (q->mq_ops) |
1268 | blk_mq_unfreeze_queue(q); | ||
1269 | else | ||
1270 | blk_queue_bypass_end(q); | ||
1265 | if (pd_prealloc) | 1271 | if (pd_prealloc) |
1266 | pol->pd_free_fn(pd_prealloc); | 1272 | pol->pd_free_fn(pd_prealloc); |
1267 | return ret; | 1273 | return ret; |
@@ -1284,7 +1290,11 @@ void blkcg_deactivate_policy(struct request_queue *q, | |||
1284 | if (!blkcg_policy_enabled(q, pol)) | 1290 | if (!blkcg_policy_enabled(q, pol)) |
1285 | return; | 1291 | return; |
1286 | 1292 | ||
1287 | blk_queue_bypass_start(q); | 1293 | if (q->mq_ops) |
1294 | blk_mq_freeze_queue(q); | ||
1295 | else | ||
1296 | blk_queue_bypass_start(q); | ||
1297 | |||
1288 | spin_lock_irq(q->queue_lock); | 1298 | spin_lock_irq(q->queue_lock); |
1289 | 1299 | ||
1290 | __clear_bit(pol->plid, q->blkcg_pols); | 1300 | __clear_bit(pol->plid, q->blkcg_pols); |
@@ -1304,7 +1314,11 @@ void blkcg_deactivate_policy(struct request_queue *q, | |||
1304 | } | 1314 | } |
1305 | 1315 | ||
1306 | spin_unlock_irq(q->queue_lock); | 1316 | spin_unlock_irq(q->queue_lock); |
1307 | blk_queue_bypass_end(q); | 1317 | |
1318 | if (q->mq_ops) | ||
1319 | blk_mq_unfreeze_queue(q); | ||
1320 | else | ||
1321 | blk_queue_bypass_end(q); | ||
1308 | } | 1322 | } |
1309 | EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); | 1323 | EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); |
1310 | 1324 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 61ba08c58b64..b2df55a65250 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -39,6 +39,7 @@ | |||
39 | 39 | ||
40 | #include "blk.h" | 40 | #include "blk.h" |
41 | #include "blk-mq.h" | 41 | #include "blk-mq.h" |
42 | #include "blk-mq-sched.h" | ||
42 | #include "blk-wbt.h" | 43 | #include "blk-wbt.h" |
43 | 44 | ||
44 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | 45 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
134 | rq->cmd = rq->__cmd; | 135 | rq->cmd = rq->__cmd; |
135 | rq->cmd_len = BLK_MAX_CDB; | 136 | rq->cmd_len = BLK_MAX_CDB; |
136 | rq->tag = -1; | 137 | rq->tag = -1; |
138 | rq->internal_tag = -1; | ||
137 | rq->start_time = jiffies; | 139 | rq->start_time = jiffies; |
138 | set_start_time_ns(rq); | 140 | set_start_time_ns(rq); |
139 | rq->part = NULL; | 141 | rq->part = NULL; |
@@ -525,12 +527,14 @@ void blk_set_queue_dying(struct request_queue *q) | |||
525 | else { | 527 | else { |
526 | struct request_list *rl; | 528 | struct request_list *rl; |
527 | 529 | ||
530 | spin_lock_irq(q->queue_lock); | ||
528 | blk_queue_for_each_rl(rl, q) { | 531 | blk_queue_for_each_rl(rl, q) { |
529 | if (rl->rq_pool) { | 532 | if (rl->rq_pool) { |
530 | wake_up(&rl->wait[BLK_RW_SYNC]); | 533 | wake_up(&rl->wait[BLK_RW_SYNC]); |
531 | wake_up(&rl->wait[BLK_RW_ASYNC]); | 534 | wake_up(&rl->wait[BLK_RW_ASYNC]); |
532 | } | 535 | } |
533 | } | 536 | } |
537 | spin_unlock_irq(q->queue_lock); | ||
534 | } | 538 | } |
535 | } | 539 | } |
536 | EXPORT_SYMBOL_GPL(blk_set_queue_dying); | 540 | EXPORT_SYMBOL_GPL(blk_set_queue_dying); |
@@ -1033,29 +1037,13 @@ static bool blk_rq_should_init_elevator(struct bio *bio) | |||
1033 | * Flush requests do not use the elevator so skip initialization. | 1037 | * Flush requests do not use the elevator so skip initialization. |
1034 | * This allows a request to share the flush and elevator data. | 1038 | * This allows a request to share the flush and elevator data. |
1035 | */ | 1039 | */ |
1036 | if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) | 1040 | if (op_is_flush(bio->bi_opf)) |
1037 | return false; | 1041 | return false; |
1038 | 1042 | ||
1039 | return true; | 1043 | return true; |
1040 | } | 1044 | } |
1041 | 1045 | ||
1042 | /** | 1046 | /** |
1043 | * rq_ioc - determine io_context for request allocation | ||
1044 | * @bio: request being allocated is for this bio (can be %NULL) | ||
1045 | * | ||
1046 | * Determine io_context to use for request allocation for @bio. May return | ||
1047 | * %NULL if %current->io_context doesn't exist. | ||
1048 | */ | ||
1049 | static struct io_context *rq_ioc(struct bio *bio) | ||
1050 | { | ||
1051 | #ifdef CONFIG_BLK_CGROUP | ||
1052 | if (bio && bio->bi_ioc) | ||
1053 | return bio->bi_ioc; | ||
1054 | #endif | ||
1055 | return current->io_context; | ||
1056 | } | ||
1057 | |||
1058 | /** | ||
1059 | * __get_request - get a free request | 1047 | * __get_request - get a free request |
1060 | * @rl: request list to allocate from | 1048 | * @rl: request list to allocate from |
1061 | * @op: operation and flags | 1049 | * @op: operation and flags |
@@ -1655,7 +1643,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio) | |||
1655 | return BLK_QC_T_NONE; | 1643 | return BLK_QC_T_NONE; |
1656 | } | 1644 | } |
1657 | 1645 | ||
1658 | if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) { | 1646 | if (op_is_flush(bio->bi_opf)) { |
1659 | spin_lock_irq(q->queue_lock); | 1647 | spin_lock_irq(q->queue_lock); |
1660 | where = ELEVATOR_INSERT_FLUSH; | 1648 | where = ELEVATOR_INSERT_FLUSH; |
1661 | goto get_rq; | 1649 | goto get_rq; |
@@ -1894,7 +1882,7 @@ generic_make_request_checks(struct bio *bio) | |||
1894 | * drivers without flush support don't have to worry | 1882 | * drivers without flush support don't have to worry |
1895 | * about them. | 1883 | * about them. |
1896 | */ | 1884 | */ |
1897 | if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && | 1885 | if (op_is_flush(bio->bi_opf) && |
1898 | !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { | 1886 | !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { |
1899 | bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); | 1887 | bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); |
1900 | if (!nr_sectors) { | 1888 | if (!nr_sectors) { |
@@ -2143,7 +2131,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
2143 | if (q->mq_ops) { | 2131 | if (q->mq_ops) { |
2144 | if (blk_queue_io_stat(q)) | 2132 | if (blk_queue_io_stat(q)) |
2145 | blk_account_io_start(rq, true); | 2133 | blk_account_io_start(rq, true); |
2146 | blk_mq_insert_request(rq, false, true, false); | 2134 | blk_mq_sched_insert_request(rq, false, true, false, false); |
2147 | return 0; | 2135 | return 0; |
2148 | } | 2136 | } |
2149 | 2137 | ||
@@ -2159,7 +2147,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
2159 | */ | 2147 | */ |
2160 | BUG_ON(blk_queued_rq(rq)); | 2148 | BUG_ON(blk_queued_rq(rq)); |
2161 | 2149 | ||
2162 | if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) | 2150 | if (op_is_flush(rq->cmd_flags)) |
2163 | where = ELEVATOR_INSERT_FLUSH; | 2151 | where = ELEVATOR_INSERT_FLUSH; |
2164 | 2152 | ||
2165 | add_acct_request(q, rq, where); | 2153 | add_acct_request(q, rq, where); |
@@ -3270,7 +3258,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
3270 | /* | 3258 | /* |
3271 | * rq is already accounted, so use raw insert | 3259 | * rq is already accounted, so use raw insert |
3272 | */ | 3260 | */ |
3273 | if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA)) | 3261 | if (op_is_flush(rq->cmd_flags)) |
3274 | __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); | 3262 | __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); |
3275 | else | 3263 | else |
3276 | __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); | 3264 | __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); |
diff --git a/block/blk-exec.c b/block/blk-exec.c index 3ecb00a6cf45..ed1f10165268 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/sched/sysctl.h> | 9 | #include <linux/sched/sysctl.h> |
10 | 10 | ||
11 | #include "blk.h" | 11 | #include "blk.h" |
12 | #include "blk-mq-sched.h" | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * for max sense size | 15 | * for max sense size |
@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
65 | * be reused after dying flag is set | 66 | * be reused after dying flag is set |
66 | */ | 67 | */ |
67 | if (q->mq_ops) { | 68 | if (q->mq_ops) { |
68 | blk_mq_insert_request(rq, at_head, true, false); | 69 | blk_mq_sched_insert_request(rq, at_head, true, false, false); |
69 | return; | 70 | return; |
70 | } | 71 | } |
71 | 72 | ||
diff --git a/block/blk-flush.c b/block/blk-flush.c index 20b7c7a02f1c..4427896641ac 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -74,6 +74,7 @@ | |||
74 | #include "blk.h" | 74 | #include "blk.h" |
75 | #include "blk-mq.h" | 75 | #include "blk-mq.h" |
76 | #include "blk-mq-tag.h" | 76 | #include "blk-mq-tag.h" |
77 | #include "blk-mq-sched.h" | ||
77 | 78 | ||
78 | /* FLUSH/FUA sequences */ | 79 | /* FLUSH/FUA sequences */ |
79 | enum { | 80 | enum { |
@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error) | |||
391 | * the comment in flush_end_io(). | 392 | * the comment in flush_end_io(). |
392 | */ | 393 | */ |
393 | spin_lock_irqsave(&fq->mq_flush_lock, flags); | 394 | spin_lock_irqsave(&fq->mq_flush_lock, flags); |
394 | if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) | 395 | blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); |
395 | blk_mq_run_hw_queue(hctx, true); | ||
396 | spin_unlock_irqrestore(&fq->mq_flush_lock, flags); | 396 | spin_unlock_irqrestore(&fq->mq_flush_lock, flags); |
397 | |||
398 | blk_mq_run_hw_queue(hctx, true); | ||
397 | } | 399 | } |
398 | 400 | ||
399 | /** | 401 | /** |
@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq) | |||
453 | */ | 455 | */ |
454 | if ((policy & REQ_FSEQ_DATA) && | 456 | if ((policy & REQ_FSEQ_DATA) && |
455 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { | 457 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { |
456 | if (q->mq_ops) { | 458 | if (q->mq_ops) |
457 | blk_mq_insert_request(rq, false, true, false); | 459 | blk_mq_sched_insert_request(rq, false, true, false, false); |
458 | } else | 460 | else |
459 | list_add_tail(&rq->queuelist, &q->queue_head); | 461 | list_add_tail(&rq->queuelist, &q->queue_head); |
460 | return; | 462 | return; |
461 | } | 463 | } |
diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 381cb50a673c..fe186a9eade9 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c | |||
@@ -43,8 +43,10 @@ static void ioc_exit_icq(struct io_cq *icq) | |||
43 | if (icq->flags & ICQ_EXITED) | 43 | if (icq->flags & ICQ_EXITED) |
44 | return; | 44 | return; |
45 | 45 | ||
46 | if (et->ops.elevator_exit_icq_fn) | 46 | if (et->uses_mq && et->ops.mq.exit_icq) |
47 | et->ops.elevator_exit_icq_fn(icq); | 47 | et->ops.mq.exit_icq(icq); |
48 | else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn) | ||
49 | et->ops.sq.elevator_exit_icq_fn(icq); | ||
48 | 50 | ||
49 | icq->flags |= ICQ_EXITED; | 51 | icq->flags |= ICQ_EXITED; |
50 | } | 52 | } |
@@ -383,8 +385,10 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, | |||
383 | if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { | 385 | if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { |
384 | hlist_add_head(&icq->ioc_node, &ioc->icq_list); | 386 | hlist_add_head(&icq->ioc_node, &ioc->icq_list); |
385 | list_add(&icq->q_node, &q->icq_list); | 387 | list_add(&icq->q_node, &q->icq_list); |
386 | if (et->ops.elevator_init_icq_fn) | 388 | if (et->uses_mq && et->ops.mq.init_icq) |
387 | et->ops.elevator_init_icq_fn(icq); | 389 | et->ops.mq.init_icq(icq); |
390 | else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn) | ||
391 | et->ops.sq.elevator_init_icq_fn(icq); | ||
388 | } else { | 392 | } else { |
389 | kmem_cache_free(et->icq_cache, icq); | 393 | kmem_cache_free(et->icq_cache, icq); |
390 | icq = ioc_lookup_icq(ioc, q); | 394 | icq = ioc_lookup_icq(ioc, q); |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 182398cb1524..6aa43dec5af4 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | |||
763 | { | 763 | { |
764 | struct elevator_queue *e = q->elevator; | 764 | struct elevator_queue *e = q->elevator; |
765 | 765 | ||
766 | if (e->type->ops.elevator_allow_rq_merge_fn) | 766 | if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) |
767 | if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) | 767 | if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) |
768 | return 0; | 768 | return 0; |
769 | 769 | ||
770 | return attempt_merge(q, rq, next); | 770 | return attempt_merge(q, rq, next); |
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c new file mode 100644 index 000000000000..5cd2b435a9f5 --- /dev/null +++ b/block/blk-mq-debugfs.c | |||
@@ -0,0 +1,756 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Facebook | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/blkdev.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | |||
21 | #include <linux/blk-mq.h> | ||
22 | #include "blk-mq.h" | ||
23 | #include "blk-mq-tag.h" | ||
24 | |||
25 | struct blk_mq_debugfs_attr { | ||
26 | const char *name; | ||
27 | umode_t mode; | ||
28 | const struct file_operations *fops; | ||
29 | }; | ||
30 | |||
31 | static struct dentry *block_debugfs_root; | ||
32 | |||
33 | static int blk_mq_debugfs_seq_open(struct inode *inode, struct file *file, | ||
34 | const struct seq_operations *ops) | ||
35 | { | ||
36 | struct seq_file *m; | ||
37 | int ret; | ||
38 | |||
39 | ret = seq_open(file, ops); | ||
40 | if (!ret) { | ||
41 | m = file->private_data; | ||
42 | m->private = inode->i_private; | ||
43 | } | ||
44 | return ret; | ||
45 | } | ||
46 | |||
47 | static int hctx_state_show(struct seq_file *m, void *v) | ||
48 | { | ||
49 | struct blk_mq_hw_ctx *hctx = m->private; | ||
50 | |||
51 | seq_printf(m, "0x%lx\n", hctx->state); | ||
52 | return 0; | ||
53 | } | ||
54 | |||
55 | static int hctx_state_open(struct inode *inode, struct file *file) | ||
56 | { | ||
57 | return single_open(file, hctx_state_show, inode->i_private); | ||
58 | } | ||
59 | |||
60 | static const struct file_operations hctx_state_fops = { | ||
61 | .open = hctx_state_open, | ||
62 | .read = seq_read, | ||
63 | .llseek = seq_lseek, | ||
64 | .release = single_release, | ||
65 | }; | ||
66 | |||
67 | static int hctx_flags_show(struct seq_file *m, void *v) | ||
68 | { | ||
69 | struct blk_mq_hw_ctx *hctx = m->private; | ||
70 | |||
71 | seq_printf(m, "0x%lx\n", hctx->flags); | ||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | static int hctx_flags_open(struct inode *inode, struct file *file) | ||
76 | { | ||
77 | return single_open(file, hctx_flags_show, inode->i_private); | ||
78 | } | ||
79 | |||
80 | static const struct file_operations hctx_flags_fops = { | ||
81 | .open = hctx_flags_open, | ||
82 | .read = seq_read, | ||
83 | .llseek = seq_lseek, | ||
84 | .release = single_release, | ||
85 | }; | ||
86 | |||
87 | static int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) | ||
88 | { | ||
89 | struct request *rq = list_entry_rq(v); | ||
90 | |||
91 | seq_printf(m, "%p {.cmd_type=%u, .cmd_flags=0x%x, .rq_flags=0x%x, .tag=%d, .internal_tag=%d}\n", | ||
92 | rq, rq->cmd_type, rq->cmd_flags, (unsigned int)rq->rq_flags, | ||
93 | rq->tag, rq->internal_tag); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) | ||
98 | { | ||
99 | struct blk_mq_hw_ctx *hctx = m->private; | ||
100 | |||
101 | spin_lock(&hctx->lock); | ||
102 | return seq_list_start(&hctx->dispatch, *pos); | ||
103 | } | ||
104 | |||
105 | static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos) | ||
106 | { | ||
107 | struct blk_mq_hw_ctx *hctx = m->private; | ||
108 | |||
109 | return seq_list_next(v, &hctx->dispatch, pos); | ||
110 | } | ||
111 | |||
112 | static void hctx_dispatch_stop(struct seq_file *m, void *v) | ||
113 | { | ||
114 | struct blk_mq_hw_ctx *hctx = m->private; | ||
115 | |||
116 | spin_unlock(&hctx->lock); | ||
117 | } | ||
118 | |||
119 | static const struct seq_operations hctx_dispatch_seq_ops = { | ||
120 | .start = hctx_dispatch_start, | ||
121 | .next = hctx_dispatch_next, | ||
122 | .stop = hctx_dispatch_stop, | ||
123 | .show = blk_mq_debugfs_rq_show, | ||
124 | }; | ||
125 | |||
126 | static int hctx_dispatch_open(struct inode *inode, struct file *file) | ||
127 | { | ||
128 | return blk_mq_debugfs_seq_open(inode, file, &hctx_dispatch_seq_ops); | ||
129 | } | ||
130 | |||
131 | static const struct file_operations hctx_dispatch_fops = { | ||
132 | .open = hctx_dispatch_open, | ||
133 | .read = seq_read, | ||
134 | .llseek = seq_lseek, | ||
135 | .release = seq_release, | ||
136 | }; | ||
137 | |||
138 | static int hctx_ctx_map_show(struct seq_file *m, void *v) | ||
139 | { | ||
140 | struct blk_mq_hw_ctx *hctx = m->private; | ||
141 | |||
142 | sbitmap_bitmap_show(&hctx->ctx_map, m); | ||
143 | return 0; | ||
144 | } | ||
145 | |||
146 | static int hctx_ctx_map_open(struct inode *inode, struct file *file) | ||
147 | { | ||
148 | return single_open(file, hctx_ctx_map_show, inode->i_private); | ||
149 | } | ||
150 | |||
151 | static const struct file_operations hctx_ctx_map_fops = { | ||
152 | .open = hctx_ctx_map_open, | ||
153 | .read = seq_read, | ||
154 | .llseek = seq_lseek, | ||
155 | .release = single_release, | ||
156 | }; | ||
157 | |||
158 | static void blk_mq_debugfs_tags_show(struct seq_file *m, | ||
159 | struct blk_mq_tags *tags) | ||
160 | { | ||
161 | seq_printf(m, "nr_tags=%u\n", tags->nr_tags); | ||
162 | seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags); | ||
163 | seq_printf(m, "active_queues=%d\n", | ||
164 | atomic_read(&tags->active_queues)); | ||
165 | |||
166 | seq_puts(m, "\nbitmap_tags:\n"); | ||
167 | sbitmap_queue_show(&tags->bitmap_tags, m); | ||
168 | |||
169 | if (tags->nr_reserved_tags) { | ||
170 | seq_puts(m, "\nbreserved_tags:\n"); | ||
171 | sbitmap_queue_show(&tags->breserved_tags, m); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | static int hctx_tags_show(struct seq_file *m, void *v) | ||
176 | { | ||
177 | struct blk_mq_hw_ctx *hctx = m->private; | ||
178 | struct request_queue *q = hctx->queue; | ||
179 | |||
180 | mutex_lock(&q->sysfs_lock); | ||
181 | if (hctx->tags) | ||
182 | blk_mq_debugfs_tags_show(m, hctx->tags); | ||
183 | mutex_unlock(&q->sysfs_lock); | ||
184 | |||
185 | return 0; | ||
186 | } | ||
187 | |||
188 | static int hctx_tags_open(struct inode *inode, struct file *file) | ||
189 | { | ||
190 | return single_open(file, hctx_tags_show, inode->i_private); | ||
191 | } | ||
192 | |||
193 | static const struct file_operations hctx_tags_fops = { | ||
194 | .open = hctx_tags_open, | ||
195 | .read = seq_read, | ||
196 | .llseek = seq_lseek, | ||
197 | .release = single_release, | ||
198 | }; | ||
199 | |||
200 | static int hctx_tags_bitmap_show(struct seq_file *m, void *v) | ||
201 | { | ||
202 | struct blk_mq_hw_ctx *hctx = m->private; | ||
203 | struct request_queue *q = hctx->queue; | ||
204 | |||
205 | mutex_lock(&q->sysfs_lock); | ||
206 | if (hctx->tags) | ||
207 | sbitmap_bitmap_show(&hctx->tags->bitmap_tags.sb, m); | ||
208 | mutex_unlock(&q->sysfs_lock); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | static int hctx_tags_bitmap_open(struct inode *inode, struct file *file) | ||
213 | { | ||
214 | return single_open(file, hctx_tags_bitmap_show, inode->i_private); | ||
215 | } | ||
216 | |||
217 | static const struct file_operations hctx_tags_bitmap_fops = { | ||
218 | .open = hctx_tags_bitmap_open, | ||
219 | .read = seq_read, | ||
220 | .llseek = seq_lseek, | ||
221 | .release = single_release, | ||
222 | }; | ||
223 | |||
224 | static int hctx_sched_tags_show(struct seq_file *m, void *v) | ||
225 | { | ||
226 | struct blk_mq_hw_ctx *hctx = m->private; | ||
227 | struct request_queue *q = hctx->queue; | ||
228 | |||
229 | mutex_lock(&q->sysfs_lock); | ||
230 | if (hctx->sched_tags) | ||
231 | blk_mq_debugfs_tags_show(m, hctx->sched_tags); | ||
232 | mutex_unlock(&q->sysfs_lock); | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | static int hctx_sched_tags_open(struct inode *inode, struct file *file) | ||
238 | { | ||
239 | return single_open(file, hctx_sched_tags_show, inode->i_private); | ||
240 | } | ||
241 | |||
242 | static const struct file_operations hctx_sched_tags_fops = { | ||
243 | .open = hctx_sched_tags_open, | ||
244 | .read = seq_read, | ||
245 | .llseek = seq_lseek, | ||
246 | .release = single_release, | ||
247 | }; | ||
248 | |||
249 | static int hctx_sched_tags_bitmap_show(struct seq_file *m, void *v) | ||
250 | { | ||
251 | struct blk_mq_hw_ctx *hctx = m->private; | ||
252 | struct request_queue *q = hctx->queue; | ||
253 | |||
254 | mutex_lock(&q->sysfs_lock); | ||
255 | if (hctx->sched_tags) | ||
256 | sbitmap_bitmap_show(&hctx->sched_tags->bitmap_tags.sb, m); | ||
257 | mutex_unlock(&q->sysfs_lock); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static int hctx_sched_tags_bitmap_open(struct inode *inode, struct file *file) | ||
262 | { | ||
263 | return single_open(file, hctx_sched_tags_bitmap_show, inode->i_private); | ||
264 | } | ||
265 | |||
266 | static const struct file_operations hctx_sched_tags_bitmap_fops = { | ||
267 | .open = hctx_sched_tags_bitmap_open, | ||
268 | .read = seq_read, | ||
269 | .llseek = seq_lseek, | ||
270 | .release = single_release, | ||
271 | }; | ||
272 | |||
273 | static int hctx_io_poll_show(struct seq_file *m, void *v) | ||
274 | { | ||
275 | struct blk_mq_hw_ctx *hctx = m->private; | ||
276 | |||
277 | seq_printf(m, "considered=%lu\n", hctx->poll_considered); | ||
278 | seq_printf(m, "invoked=%lu\n", hctx->poll_invoked); | ||
279 | seq_printf(m, "success=%lu\n", hctx->poll_success); | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int hctx_io_poll_open(struct inode *inode, struct file *file) | ||
284 | { | ||
285 | return single_open(file, hctx_io_poll_show, inode->i_private); | ||
286 | } | ||
287 | |||
288 | static ssize_t hctx_io_poll_write(struct file *file, const char __user *buf, | ||
289 | size_t count, loff_t *ppos) | ||
290 | { | ||
291 | struct seq_file *m = file->private_data; | ||
292 | struct blk_mq_hw_ctx *hctx = m->private; | ||
293 | |||
294 | hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; | ||
295 | return count; | ||
296 | } | ||
297 | |||
298 | static const struct file_operations hctx_io_poll_fops = { | ||
299 | .open = hctx_io_poll_open, | ||
300 | .read = seq_read, | ||
301 | .write = hctx_io_poll_write, | ||
302 | .llseek = seq_lseek, | ||
303 | .release = single_release, | ||
304 | }; | ||
305 | |||
306 | static void print_stat(struct seq_file *m, struct blk_rq_stat *stat) | ||
307 | { | ||
308 | seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu", | ||
309 | stat->nr_samples, stat->mean, stat->min, stat->max); | ||
310 | } | ||
311 | |||
312 | static int hctx_stats_show(struct seq_file *m, void *v) | ||
313 | { | ||
314 | struct blk_mq_hw_ctx *hctx = m->private; | ||
315 | struct blk_rq_stat stat[2]; | ||
316 | |||
317 | blk_stat_init(&stat[BLK_STAT_READ]); | ||
318 | blk_stat_init(&stat[BLK_STAT_WRITE]); | ||
319 | |||
320 | blk_hctx_stat_get(hctx, stat); | ||
321 | |||
322 | seq_puts(m, "read: "); | ||
323 | print_stat(m, &stat[BLK_STAT_READ]); | ||
324 | seq_puts(m, "\n"); | ||
325 | |||
326 | seq_puts(m, "write: "); | ||
327 | print_stat(m, &stat[BLK_STAT_WRITE]); | ||
328 | seq_puts(m, "\n"); | ||
329 | return 0; | ||
330 | } | ||
331 | |||
332 | static int hctx_stats_open(struct inode *inode, struct file *file) | ||
333 | { | ||
334 | return single_open(file, hctx_stats_show, inode->i_private); | ||
335 | } | ||
336 | |||
337 | static ssize_t hctx_stats_write(struct file *file, const char __user *buf, | ||
338 | size_t count, loff_t *ppos) | ||
339 | { | ||
340 | struct seq_file *m = file->private_data; | ||
341 | struct blk_mq_hw_ctx *hctx = m->private; | ||
342 | struct blk_mq_ctx *ctx; | ||
343 | int i; | ||
344 | |||
345 | hctx_for_each_ctx(hctx, ctx, i) { | ||
346 | blk_stat_init(&ctx->stat[BLK_STAT_READ]); | ||
347 | blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); | ||
348 | } | ||
349 | return count; | ||
350 | } | ||
351 | |||
352 | static const struct file_operations hctx_stats_fops = { | ||
353 | .open = hctx_stats_open, | ||
354 | .read = seq_read, | ||
355 | .write = hctx_stats_write, | ||
356 | .llseek = seq_lseek, | ||
357 | .release = single_release, | ||
358 | }; | ||
359 | |||
360 | static int hctx_dispatched_show(struct seq_file *m, void *v) | ||
361 | { | ||
362 | struct blk_mq_hw_ctx *hctx = m->private; | ||
363 | int i; | ||
364 | |||
365 | seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]); | ||
366 | |||
367 | for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) { | ||
368 | unsigned int d = 1U << (i - 1); | ||
369 | |||
370 | seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]); | ||
371 | } | ||
372 | |||
373 | seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static int hctx_dispatched_open(struct inode *inode, struct file *file) | ||
378 | { | ||
379 | return single_open(file, hctx_dispatched_show, inode->i_private); | ||
380 | } | ||
381 | |||
382 | static ssize_t hctx_dispatched_write(struct file *file, const char __user *buf, | ||
383 | size_t count, loff_t *ppos) | ||
384 | { | ||
385 | struct seq_file *m = file->private_data; | ||
386 | struct blk_mq_hw_ctx *hctx = m->private; | ||
387 | int i; | ||
388 | |||
389 | for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) | ||
390 | hctx->dispatched[i] = 0; | ||
391 | return count; | ||
392 | } | ||
393 | |||
394 | static const struct file_operations hctx_dispatched_fops = { | ||
395 | .open = hctx_dispatched_open, | ||
396 | .read = seq_read, | ||
397 | .write = hctx_dispatched_write, | ||
398 | .llseek = seq_lseek, | ||
399 | .release = single_release, | ||
400 | }; | ||
401 | |||
402 | static int hctx_queued_show(struct seq_file *m, void *v) | ||
403 | { | ||
404 | struct blk_mq_hw_ctx *hctx = m->private; | ||
405 | |||
406 | seq_printf(m, "%lu\n", hctx->queued); | ||
407 | return 0; | ||
408 | } | ||
409 | |||
410 | static int hctx_queued_open(struct inode *inode, struct file *file) | ||
411 | { | ||
412 | return single_open(file, hctx_queued_show, inode->i_private); | ||
413 | } | ||
414 | |||
415 | static ssize_t hctx_queued_write(struct file *file, const char __user *buf, | ||
416 | size_t count, loff_t *ppos) | ||
417 | { | ||
418 | struct seq_file *m = file->private_data; | ||
419 | struct blk_mq_hw_ctx *hctx = m->private; | ||
420 | |||
421 | hctx->queued = 0; | ||
422 | return count; | ||
423 | } | ||
424 | |||
425 | static const struct file_operations hctx_queued_fops = { | ||
426 | .open = hctx_queued_open, | ||
427 | .read = seq_read, | ||
428 | .write = hctx_queued_write, | ||
429 | .llseek = seq_lseek, | ||
430 | .release = single_release, | ||
431 | }; | ||
432 | |||
433 | static int hctx_run_show(struct seq_file *m, void *v) | ||
434 | { | ||
435 | struct blk_mq_hw_ctx *hctx = m->private; | ||
436 | |||
437 | seq_printf(m, "%lu\n", hctx->run); | ||
438 | return 0; | ||
439 | } | ||
440 | |||
441 | static int hctx_run_open(struct inode *inode, struct file *file) | ||
442 | { | ||
443 | return single_open(file, hctx_run_show, inode->i_private); | ||
444 | } | ||
445 | |||
446 | static ssize_t hctx_run_write(struct file *file, const char __user *buf, | ||
447 | size_t count, loff_t *ppos) | ||
448 | { | ||
449 | struct seq_file *m = file->private_data; | ||
450 | struct blk_mq_hw_ctx *hctx = m->private; | ||
451 | |||
452 | hctx->run = 0; | ||
453 | return count; | ||
454 | } | ||
455 | |||
456 | static const struct file_operations hctx_run_fops = { | ||
457 | .open = hctx_run_open, | ||
458 | .read = seq_read, | ||
459 | .write = hctx_run_write, | ||
460 | .llseek = seq_lseek, | ||
461 | .release = single_release, | ||
462 | }; | ||
463 | |||
464 | static int hctx_active_show(struct seq_file *m, void *v) | ||
465 | { | ||
466 | struct blk_mq_hw_ctx *hctx = m->private; | ||
467 | |||
468 | seq_printf(m, "%d\n", atomic_read(&hctx->nr_active)); | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | static int hctx_active_open(struct inode *inode, struct file *file) | ||
473 | { | ||
474 | return single_open(file, hctx_active_show, inode->i_private); | ||
475 | } | ||
476 | |||
477 | static const struct file_operations hctx_active_fops = { | ||
478 | .open = hctx_active_open, | ||
479 | .read = seq_read, | ||
480 | .llseek = seq_lseek, | ||
481 | .release = single_release, | ||
482 | }; | ||
483 | |||
484 | static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) | ||
485 | { | ||
486 | struct blk_mq_ctx *ctx = m->private; | ||
487 | |||
488 | spin_lock(&ctx->lock); | ||
489 | return seq_list_start(&ctx->rq_list, *pos); | ||
490 | } | ||
491 | |||
492 | static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos) | ||
493 | { | ||
494 | struct blk_mq_ctx *ctx = m->private; | ||
495 | |||
496 | return seq_list_next(v, &ctx->rq_list, pos); | ||
497 | } | ||
498 | |||
499 | static void ctx_rq_list_stop(struct seq_file *m, void *v) | ||
500 | { | ||
501 | struct blk_mq_ctx *ctx = m->private; | ||
502 | |||
503 | spin_unlock(&ctx->lock); | ||
504 | } | ||
505 | |||
506 | static const struct seq_operations ctx_rq_list_seq_ops = { | ||
507 | .start = ctx_rq_list_start, | ||
508 | .next = ctx_rq_list_next, | ||
509 | .stop = ctx_rq_list_stop, | ||
510 | .show = blk_mq_debugfs_rq_show, | ||
511 | }; | ||
512 | |||
513 | static int ctx_rq_list_open(struct inode *inode, struct file *file) | ||
514 | { | ||
515 | return blk_mq_debugfs_seq_open(inode, file, &ctx_rq_list_seq_ops); | ||
516 | } | ||
517 | |||
518 | static const struct file_operations ctx_rq_list_fops = { | ||
519 | .open = ctx_rq_list_open, | ||
520 | .read = seq_read, | ||
521 | .llseek = seq_lseek, | ||
522 | .release = seq_release, | ||
523 | }; | ||
524 | |||
525 | static int ctx_dispatched_show(struct seq_file *m, void *v) | ||
526 | { | ||
527 | struct blk_mq_ctx *ctx = m->private; | ||
528 | |||
529 | seq_printf(m, "%lu %lu\n", ctx->rq_dispatched[1], ctx->rq_dispatched[0]); | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | static int ctx_dispatched_open(struct inode *inode, struct file *file) | ||
534 | { | ||
535 | return single_open(file, ctx_dispatched_show, inode->i_private); | ||
536 | } | ||
537 | |||
538 | static ssize_t ctx_dispatched_write(struct file *file, const char __user *buf, | ||
539 | size_t count, loff_t *ppos) | ||
540 | { | ||
541 | struct seq_file *m = file->private_data; | ||
542 | struct blk_mq_ctx *ctx = m->private; | ||
543 | |||
544 | ctx->rq_dispatched[0] = ctx->rq_dispatched[1] = 0; | ||
545 | return count; | ||
546 | } | ||
547 | |||
548 | static const struct file_operations ctx_dispatched_fops = { | ||
549 | .open = ctx_dispatched_open, | ||
550 | .read = seq_read, | ||
551 | .write = ctx_dispatched_write, | ||
552 | .llseek = seq_lseek, | ||
553 | .release = single_release, | ||
554 | }; | ||
555 | |||
556 | static int ctx_merged_show(struct seq_file *m, void *v) | ||
557 | { | ||
558 | struct blk_mq_ctx *ctx = m->private; | ||
559 | |||
560 | seq_printf(m, "%lu\n", ctx->rq_merged); | ||
561 | return 0; | ||
562 | } | ||
563 | |||
564 | static int ctx_merged_open(struct inode *inode, struct file *file) | ||
565 | { | ||
566 | return single_open(file, ctx_merged_show, inode->i_private); | ||
567 | } | ||
568 | |||
569 | static ssize_t ctx_merged_write(struct file *file, const char __user *buf, | ||
570 | size_t count, loff_t *ppos) | ||
571 | { | ||
572 | struct seq_file *m = file->private_data; | ||
573 | struct blk_mq_ctx *ctx = m->private; | ||
574 | |||
575 | ctx->rq_merged = 0; | ||
576 | return count; | ||
577 | } | ||
578 | |||
579 | static const struct file_operations ctx_merged_fops = { | ||
580 | .open = ctx_merged_open, | ||
581 | .read = seq_read, | ||
582 | .write = ctx_merged_write, | ||
583 | .llseek = seq_lseek, | ||
584 | .release = single_release, | ||
585 | }; | ||
586 | |||
587 | static int ctx_completed_show(struct seq_file *m, void *v) | ||
588 | { | ||
589 | struct blk_mq_ctx *ctx = m->private; | ||
590 | |||
591 | seq_printf(m, "%lu %lu\n", ctx->rq_completed[1], ctx->rq_completed[0]); | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | static int ctx_completed_open(struct inode *inode, struct file *file) | ||
596 | { | ||
597 | return single_open(file, ctx_completed_show, inode->i_private); | ||
598 | } | ||
599 | |||
600 | static ssize_t ctx_completed_write(struct file *file, const char __user *buf, | ||
601 | size_t count, loff_t *ppos) | ||
602 | { | ||
603 | struct seq_file *m = file->private_data; | ||
604 | struct blk_mq_ctx *ctx = m->private; | ||
605 | |||
606 | ctx->rq_completed[0] = ctx->rq_completed[1] = 0; | ||
607 | return count; | ||
608 | } | ||
609 | |||
610 | static const struct file_operations ctx_completed_fops = { | ||
611 | .open = ctx_completed_open, | ||
612 | .read = seq_read, | ||
613 | .write = ctx_completed_write, | ||
614 | .llseek = seq_lseek, | ||
615 | .release = single_release, | ||
616 | }; | ||
617 | |||
618 | static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { | ||
619 | {"state", 0400, &hctx_state_fops}, | ||
620 | {"flags", 0400, &hctx_flags_fops}, | ||
621 | {"dispatch", 0400, &hctx_dispatch_fops}, | ||
622 | {"ctx_map", 0400, &hctx_ctx_map_fops}, | ||
623 | {"tags", 0400, &hctx_tags_fops}, | ||
624 | {"tags_bitmap", 0400, &hctx_tags_bitmap_fops}, | ||
625 | {"sched_tags", 0400, &hctx_sched_tags_fops}, | ||
626 | {"sched_tags_bitmap", 0400, &hctx_sched_tags_bitmap_fops}, | ||
627 | {"io_poll", 0600, &hctx_io_poll_fops}, | ||
628 | {"stats", 0600, &hctx_stats_fops}, | ||
629 | {"dispatched", 0600, &hctx_dispatched_fops}, | ||
630 | {"queued", 0600, &hctx_queued_fops}, | ||
631 | {"run", 0600, &hctx_run_fops}, | ||
632 | {"active", 0400, &hctx_active_fops}, | ||
633 | }; | ||
634 | |||
635 | static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { | ||
636 | {"rq_list", 0400, &ctx_rq_list_fops}, | ||
637 | {"dispatched", 0600, &ctx_dispatched_fops}, | ||
638 | {"merged", 0600, &ctx_merged_fops}, | ||
639 | {"completed", 0600, &ctx_completed_fops}, | ||
640 | }; | ||
641 | |||
642 | int blk_mq_debugfs_register(struct request_queue *q, const char *name) | ||
643 | { | ||
644 | if (!block_debugfs_root) | ||
645 | return -ENOENT; | ||
646 | |||
647 | q->debugfs_dir = debugfs_create_dir(name, block_debugfs_root); | ||
648 | if (!q->debugfs_dir) | ||
649 | goto err; | ||
650 | |||
651 | if (blk_mq_debugfs_register_hctxs(q)) | ||
652 | goto err; | ||
653 | |||
654 | return 0; | ||
655 | |||
656 | err: | ||
657 | blk_mq_debugfs_unregister(q); | ||
658 | return -ENOMEM; | ||
659 | } | ||
660 | |||
661 | void blk_mq_debugfs_unregister(struct request_queue *q) | ||
662 | { | ||
663 | debugfs_remove_recursive(q->debugfs_dir); | ||
664 | q->mq_debugfs_dir = NULL; | ||
665 | q->debugfs_dir = NULL; | ||
666 | } | ||
667 | |||
668 | static int blk_mq_debugfs_register_ctx(struct request_queue *q, | ||
669 | struct blk_mq_ctx *ctx, | ||
670 | struct dentry *hctx_dir) | ||
671 | { | ||
672 | struct dentry *ctx_dir; | ||
673 | char name[20]; | ||
674 | int i; | ||
675 | |||
676 | snprintf(name, sizeof(name), "cpu%u", ctx->cpu); | ||
677 | ctx_dir = debugfs_create_dir(name, hctx_dir); | ||
678 | if (!ctx_dir) | ||
679 | return -ENOMEM; | ||
680 | |||
681 | for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_ctx_attrs); i++) { | ||
682 | const struct blk_mq_debugfs_attr *attr; | ||
683 | |||
684 | attr = &blk_mq_debugfs_ctx_attrs[i]; | ||
685 | if (!debugfs_create_file(attr->name, attr->mode, ctx_dir, ctx, | ||
686 | attr->fops)) | ||
687 | return -ENOMEM; | ||
688 | } | ||
689 | |||
690 | return 0; | ||
691 | } | ||
692 | |||
693 | static int blk_mq_debugfs_register_hctx(struct request_queue *q, | ||
694 | struct blk_mq_hw_ctx *hctx) | ||
695 | { | ||
696 | struct blk_mq_ctx *ctx; | ||
697 | struct dentry *hctx_dir; | ||
698 | char name[20]; | ||
699 | int i; | ||
700 | |||
701 | snprintf(name, sizeof(name), "%u", hctx->queue_num); | ||
702 | hctx_dir = debugfs_create_dir(name, q->mq_debugfs_dir); | ||
703 | if (!hctx_dir) | ||
704 | return -ENOMEM; | ||
705 | |||
706 | for (i = 0; i < ARRAY_SIZE(blk_mq_debugfs_hctx_attrs); i++) { | ||
707 | const struct blk_mq_debugfs_attr *attr; | ||
708 | |||
709 | attr = &blk_mq_debugfs_hctx_attrs[i]; | ||
710 | if (!debugfs_create_file(attr->name, attr->mode, hctx_dir, hctx, | ||
711 | attr->fops)) | ||
712 | return -ENOMEM; | ||
713 | } | ||
714 | |||
715 | hctx_for_each_ctx(hctx, ctx, i) { | ||
716 | if (blk_mq_debugfs_register_ctx(q, ctx, hctx_dir)) | ||
717 | return -ENOMEM; | ||
718 | } | ||
719 | |||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | int blk_mq_debugfs_register_hctxs(struct request_queue *q) | ||
724 | { | ||
725 | struct blk_mq_hw_ctx *hctx; | ||
726 | int i; | ||
727 | |||
728 | if (!q->debugfs_dir) | ||
729 | return -ENOENT; | ||
730 | |||
731 | q->mq_debugfs_dir = debugfs_create_dir("mq", q->debugfs_dir); | ||
732 | if (!q->mq_debugfs_dir) | ||
733 | goto err; | ||
734 | |||
735 | queue_for_each_hw_ctx(q, hctx, i) { | ||
736 | if (blk_mq_debugfs_register_hctx(q, hctx)) | ||
737 | goto err; | ||
738 | } | ||
739 | |||
740 | return 0; | ||
741 | |||
742 | err: | ||
743 | blk_mq_debugfs_unregister_hctxs(q); | ||
744 | return -ENOMEM; | ||
745 | } | ||
746 | |||
747 | void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) | ||
748 | { | ||
749 | debugfs_remove_recursive(q->mq_debugfs_dir); | ||
750 | q->mq_debugfs_dir = NULL; | ||
751 | } | ||
752 | |||
753 | void blk_mq_debugfs_init(void) | ||
754 | { | ||
755 | block_debugfs_root = debugfs_create_dir("block", NULL); | ||
756 | } | ||
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c new file mode 100644 index 000000000000..114814ec3d49 --- /dev/null +++ b/block/blk-mq-sched.c | |||
@@ -0,0 +1,481 @@ | |||
1 | /* | ||
2 | * blk-mq scheduling framework | ||
3 | * | ||
4 | * Copyright (C) 2016 Jens Axboe | ||
5 | */ | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/module.h> | ||
8 | #include <linux/blk-mq.h> | ||
9 | |||
10 | #include <trace/events/block.h> | ||
11 | |||
12 | #include "blk.h" | ||
13 | #include "blk-mq.h" | ||
14 | #include "blk-mq-sched.h" | ||
15 | #include "blk-mq-tag.h" | ||
16 | #include "blk-wbt.h" | ||
17 | |||
18 | void blk_mq_sched_free_hctx_data(struct request_queue *q, | ||
19 | void (*exit)(struct blk_mq_hw_ctx *)) | ||
20 | { | ||
21 | struct blk_mq_hw_ctx *hctx; | ||
22 | int i; | ||
23 | |||
24 | queue_for_each_hw_ctx(q, hctx, i) { | ||
25 | if (exit && hctx->sched_data) | ||
26 | exit(hctx); | ||
27 | kfree(hctx->sched_data); | ||
28 | hctx->sched_data = NULL; | ||
29 | } | ||
30 | } | ||
31 | EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); | ||
32 | |||
33 | int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, | ||
34 | int (*init)(struct blk_mq_hw_ctx *), | ||
35 | void (*exit)(struct blk_mq_hw_ctx *)) | ||
36 | { | ||
37 | struct blk_mq_hw_ctx *hctx; | ||
38 | int ret; | ||
39 | int i; | ||
40 | |||
41 | queue_for_each_hw_ctx(q, hctx, i) { | ||
42 | hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node); | ||
43 | if (!hctx->sched_data) { | ||
44 | ret = -ENOMEM; | ||
45 | goto error; | ||
46 | } | ||
47 | |||
48 | if (init) { | ||
49 | ret = init(hctx); | ||
50 | if (ret) { | ||
51 | /* | ||
52 | * We don't want to give exit() a partially | ||
53 | * initialized sched_data. init() must clean up | ||
54 | * if it fails. | ||
55 | */ | ||
56 | kfree(hctx->sched_data); | ||
57 | hctx->sched_data = NULL; | ||
58 | goto error; | ||
59 | } | ||
60 | } | ||
61 | } | ||
62 | |||
63 | return 0; | ||
64 | error: | ||
65 | blk_mq_sched_free_hctx_data(q, exit); | ||
66 | return ret; | ||
67 | } | ||
68 | EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data); | ||
69 | |||
70 | static void __blk_mq_sched_assign_ioc(struct request_queue *q, | ||
71 | struct request *rq, struct io_context *ioc) | ||
72 | { | ||
73 | struct io_cq *icq; | ||
74 | |||
75 | spin_lock_irq(q->queue_lock); | ||
76 | icq = ioc_lookup_icq(ioc, q); | ||
77 | spin_unlock_irq(q->queue_lock); | ||
78 | |||
79 | if (!icq) { | ||
80 | icq = ioc_create_icq(ioc, q, GFP_ATOMIC); | ||
81 | if (!icq) | ||
82 | return; | ||
83 | } | ||
84 | |||
85 | rq->elv.icq = icq; | ||
86 | if (!blk_mq_sched_get_rq_priv(q, rq)) { | ||
87 | rq->rq_flags |= RQF_ELVPRIV; | ||
88 | get_io_context(icq->ioc); | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | rq->elv.icq = NULL; | ||
93 | } | ||
94 | |||
95 | static void blk_mq_sched_assign_ioc(struct request_queue *q, | ||
96 | struct request *rq, struct bio *bio) | ||
97 | { | ||
98 | struct io_context *ioc; | ||
99 | |||
100 | ioc = rq_ioc(bio); | ||
101 | if (ioc) | ||
102 | __blk_mq_sched_assign_ioc(q, rq, ioc); | ||
103 | } | ||
104 | |||
105 | struct request *blk_mq_sched_get_request(struct request_queue *q, | ||
106 | struct bio *bio, | ||
107 | unsigned int op, | ||
108 | struct blk_mq_alloc_data *data) | ||
109 | { | ||
110 | struct elevator_queue *e = q->elevator; | ||
111 | struct blk_mq_hw_ctx *hctx; | ||
112 | struct blk_mq_ctx *ctx; | ||
113 | struct request *rq; | ||
114 | |||
115 | blk_queue_enter_live(q); | ||
116 | ctx = blk_mq_get_ctx(q); | ||
117 | hctx = blk_mq_map_queue(q, ctx->cpu); | ||
118 | |||
119 | blk_mq_set_alloc_data(data, q, data->flags, ctx, hctx); | ||
120 | |||
121 | if (e) { | ||
122 | data->flags |= BLK_MQ_REQ_INTERNAL; | ||
123 | |||
124 | /* | ||
125 | * Flush requests are special and go directly to the | ||
126 | * dispatch list. | ||
127 | */ | ||
128 | if (!op_is_flush(op) && e->type->ops.mq.get_request) { | ||
129 | rq = e->type->ops.mq.get_request(q, op, data); | ||
130 | if (rq) | ||
131 | rq->rq_flags |= RQF_QUEUED; | ||
132 | } else | ||
133 | rq = __blk_mq_alloc_request(data, op); | ||
134 | } else { | ||
135 | rq = __blk_mq_alloc_request(data, op); | ||
136 | if (rq) | ||
137 | data->hctx->tags->rqs[rq->tag] = rq; | ||
138 | } | ||
139 | |||
140 | if (rq) { | ||
141 | if (!op_is_flush(op)) { | ||
142 | rq->elv.icq = NULL; | ||
143 | if (e && e->type->icq_cache) | ||
144 | blk_mq_sched_assign_ioc(q, rq, bio); | ||
145 | } | ||
146 | data->hctx->queued++; | ||
147 | return rq; | ||
148 | } | ||
149 | |||
150 | blk_queue_exit(q); | ||
151 | return NULL; | ||
152 | } | ||
153 | |||
154 | void blk_mq_sched_put_request(struct request *rq) | ||
155 | { | ||
156 | struct request_queue *q = rq->q; | ||
157 | struct elevator_queue *e = q->elevator; | ||
158 | |||
159 | if (rq->rq_flags & RQF_ELVPRIV) { | ||
160 | blk_mq_sched_put_rq_priv(rq->q, rq); | ||
161 | if (rq->elv.icq) { | ||
162 | put_io_context(rq->elv.icq->ioc); | ||
163 | rq->elv.icq = NULL; | ||
164 | } | ||
165 | } | ||
166 | |||
167 | if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request) | ||
168 | e->type->ops.mq.put_request(rq); | ||
169 | else | ||
170 | blk_mq_finish_request(rq); | ||
171 | } | ||
172 | |||
173 | void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) | ||
174 | { | ||
175 | struct elevator_queue *e = hctx->queue->elevator; | ||
176 | LIST_HEAD(rq_list); | ||
177 | |||
178 | if (unlikely(blk_mq_hctx_stopped(hctx))) | ||
179 | return; | ||
180 | |||
181 | hctx->run++; | ||
182 | |||
183 | /* | ||
184 | * If we have previous entries on our dispatch list, grab them first for | ||
185 | * more fair dispatch. | ||
186 | */ | ||
187 | if (!list_empty_careful(&hctx->dispatch)) { | ||
188 | spin_lock(&hctx->lock); | ||
189 | if (!list_empty(&hctx->dispatch)) | ||
190 | list_splice_init(&hctx->dispatch, &rq_list); | ||
191 | spin_unlock(&hctx->lock); | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Only ask the scheduler for requests, if we didn't have residual | ||
196 | * requests from the dispatch list. This is to avoid the case where | ||
197 | * we only ever dispatch a fraction of the requests available because | ||
198 | * of low device queue depth. Once we pull requests out of the IO | ||
199 | * scheduler, we can no longer merge or sort them. So it's best to | ||
200 | * leave them there for as long as we can. Mark the hw queue as | ||
201 | * needing a restart in that case. | ||
202 | */ | ||
203 | if (!list_empty(&rq_list)) { | ||
204 | blk_mq_sched_mark_restart(hctx); | ||
205 | blk_mq_dispatch_rq_list(hctx, &rq_list); | ||
206 | } else if (!e || !e->type->ops.mq.dispatch_request) { | ||
207 | blk_mq_flush_busy_ctxs(hctx, &rq_list); | ||
208 | blk_mq_dispatch_rq_list(hctx, &rq_list); | ||
209 | } else { | ||
210 | do { | ||
211 | struct request *rq; | ||
212 | |||
213 | rq = e->type->ops.mq.dispatch_request(hctx); | ||
214 | if (!rq) | ||
215 | break; | ||
216 | list_add(&rq->queuelist, &rq_list); | ||
217 | } while (blk_mq_dispatch_rq_list(hctx, &rq_list)); | ||
218 | } | ||
219 | } | ||
220 | |||
221 | void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, | ||
222 | struct list_head *rq_list, | ||
223 | struct request *(*get_rq)(struct blk_mq_hw_ctx *)) | ||
224 | { | ||
225 | do { | ||
226 | struct request *rq; | ||
227 | |||
228 | rq = get_rq(hctx); | ||
229 | if (!rq) | ||
230 | break; | ||
231 | |||
232 | list_add_tail(&rq->queuelist, rq_list); | ||
233 | } while (1); | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch); | ||
236 | |||
237 | bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio) | ||
238 | { | ||
239 | struct request *rq; | ||
240 | int ret; | ||
241 | |||
242 | ret = elv_merge(q, &rq, bio); | ||
243 | if (ret == ELEVATOR_BACK_MERGE) { | ||
244 | if (!blk_mq_sched_allow_merge(q, rq, bio)) | ||
245 | return false; | ||
246 | if (bio_attempt_back_merge(q, rq, bio)) { | ||
247 | if (!attempt_back_merge(q, rq)) | ||
248 | elv_merged_request(q, rq, ret); | ||
249 | return true; | ||
250 | } | ||
251 | } else if (ret == ELEVATOR_FRONT_MERGE) { | ||
252 | if (!blk_mq_sched_allow_merge(q, rq, bio)) | ||
253 | return false; | ||
254 | if (bio_attempt_front_merge(q, rq, bio)) { | ||
255 | if (!attempt_front_merge(q, rq)) | ||
256 | elv_merged_request(q, rq, ret); | ||
257 | return true; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | return false; | ||
262 | } | ||
263 | EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); | ||
264 | |||
265 | bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) | ||
266 | { | ||
267 | struct elevator_queue *e = q->elevator; | ||
268 | |||
269 | if (e->type->ops.mq.bio_merge) { | ||
270 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); | ||
271 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); | ||
272 | |||
273 | blk_mq_put_ctx(ctx); | ||
274 | return e->type->ops.mq.bio_merge(hctx, bio); | ||
275 | } | ||
276 | |||
277 | return false; | ||
278 | } | ||
279 | |||
280 | bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) | ||
281 | { | ||
282 | return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); | ||
283 | } | ||
284 | EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); | ||
285 | |||
286 | void blk_mq_sched_request_inserted(struct request *rq) | ||
287 | { | ||
288 | trace_block_rq_insert(rq->q, rq); | ||
289 | } | ||
290 | EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); | ||
291 | |||
292 | bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||
293 | { | ||
294 | if (rq->tag == -1) { | ||
295 | rq->rq_flags |= RQF_SORTED; | ||
296 | return false; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * If we already have a real request tag, send directly to | ||
301 | * the dispatch list. | ||
302 | */ | ||
303 | spin_lock(&hctx->lock); | ||
304 | list_add(&rq->queuelist, &hctx->dispatch); | ||
305 | spin_unlock(&hctx->lock); | ||
306 | return true; | ||
307 | } | ||
308 | EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert); | ||
309 | |||
310 | static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) | ||
311 | { | ||
312 | if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { | ||
313 | clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); | ||
314 | if (blk_mq_hctx_has_pending(hctx)) | ||
315 | blk_mq_run_hw_queue(hctx, true); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) | ||
320 | { | ||
321 | unsigned int i; | ||
322 | |||
323 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
324 | blk_mq_sched_restart_hctx(hctx); | ||
325 | else { | ||
326 | struct request_queue *q = hctx->queue; | ||
327 | |||
328 | if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) | ||
329 | return; | ||
330 | |||
331 | clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags); | ||
332 | |||
333 | queue_for_each_hw_ctx(q, hctx, i) | ||
334 | blk_mq_sched_restart_hctx(hctx); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Add flush/fua to the queue. If we fail getting a driver tag, then | ||
340 | * punt to the requeue list. Requeue will re-invoke us from a context | ||
341 | * that's safe to block from. | ||
342 | */ | ||
343 | static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, | ||
344 | struct request *rq, bool can_block) | ||
345 | { | ||
346 | if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { | ||
347 | blk_insert_flush(rq); | ||
348 | blk_mq_run_hw_queue(hctx, true); | ||
349 | } else | ||
350 | blk_mq_add_to_requeue_list(rq, true, true); | ||
351 | } | ||
352 | |||
353 | void blk_mq_sched_insert_request(struct request *rq, bool at_head, | ||
354 | bool run_queue, bool async, bool can_block) | ||
355 | { | ||
356 | struct request_queue *q = rq->q; | ||
357 | struct elevator_queue *e = q->elevator; | ||
358 | struct blk_mq_ctx *ctx = rq->mq_ctx; | ||
359 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); | ||
360 | |||
361 | if (rq->tag == -1 && op_is_flush(rq->cmd_flags)) { | ||
362 | blk_mq_sched_insert_flush(hctx, rq, can_block); | ||
363 | return; | ||
364 | } | ||
365 | |||
366 | if (e && e->type->ops.mq.insert_requests) { | ||
367 | LIST_HEAD(list); | ||
368 | |||
369 | list_add(&rq->queuelist, &list); | ||
370 | e->type->ops.mq.insert_requests(hctx, &list, at_head); | ||
371 | } else { | ||
372 | spin_lock(&ctx->lock); | ||
373 | __blk_mq_insert_request(hctx, rq, at_head); | ||
374 | spin_unlock(&ctx->lock); | ||
375 | } | ||
376 | |||
377 | if (run_queue) | ||
378 | blk_mq_run_hw_queue(hctx, async); | ||
379 | } | ||
380 | |||
381 | void blk_mq_sched_insert_requests(struct request_queue *q, | ||
382 | struct blk_mq_ctx *ctx, | ||
383 | struct list_head *list, bool run_queue_async) | ||
384 | { | ||
385 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); | ||
386 | struct elevator_queue *e = hctx->queue->elevator; | ||
387 | |||
388 | if (e && e->type->ops.mq.insert_requests) | ||
389 | e->type->ops.mq.insert_requests(hctx, list, false); | ||
390 | else | ||
391 | blk_mq_insert_requests(hctx, ctx, list); | ||
392 | |||
393 | blk_mq_run_hw_queue(hctx, run_queue_async); | ||
394 | } | ||
395 | |||
396 | static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, | ||
397 | struct blk_mq_hw_ctx *hctx, | ||
398 | unsigned int hctx_idx) | ||
399 | { | ||
400 | if (hctx->sched_tags) { | ||
401 | blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); | ||
402 | blk_mq_free_rq_map(hctx->sched_tags); | ||
403 | hctx->sched_tags = NULL; | ||
404 | } | ||
405 | } | ||
406 | |||
407 | int blk_mq_sched_setup(struct request_queue *q) | ||
408 | { | ||
409 | struct blk_mq_tag_set *set = q->tag_set; | ||
410 | struct blk_mq_hw_ctx *hctx; | ||
411 | int ret, i; | ||
412 | |||
413 | /* | ||
414 | * Default to 256, since we don't split into sync/async like the | ||
415 | * old code did. Additionally, this is a per-hw queue depth. | ||
416 | */ | ||
417 | q->nr_requests = 2 * BLKDEV_MAX_RQ; | ||
418 | |||
419 | /* | ||
420 | * We're switching to using an IO scheduler, so setup the hctx | ||
421 | * scheduler tags and switch the request map from the regular | ||
422 | * tags to scheduler tags. First allocate what we need, so we | ||
423 | * can safely fail and fallback, if needed. | ||
424 | */ | ||
425 | ret = 0; | ||
426 | queue_for_each_hw_ctx(q, hctx, i) { | ||
427 | hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0); | ||
428 | if (!hctx->sched_tags) { | ||
429 | ret = -ENOMEM; | ||
430 | break; | ||
431 | } | ||
432 | ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); | ||
433 | if (ret) | ||
434 | break; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * If we failed, free what we did allocate | ||
439 | */ | ||
440 | if (ret) { | ||
441 | queue_for_each_hw_ctx(q, hctx, i) { | ||
442 | if (!hctx->sched_tags) | ||
443 | continue; | ||
444 | blk_mq_sched_free_tags(set, hctx, i); | ||
445 | } | ||
446 | |||
447 | return ret; | ||
448 | } | ||
449 | |||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | void blk_mq_sched_teardown(struct request_queue *q) | ||
454 | { | ||
455 | struct blk_mq_tag_set *set = q->tag_set; | ||
456 | struct blk_mq_hw_ctx *hctx; | ||
457 | int i; | ||
458 | |||
459 | queue_for_each_hw_ctx(q, hctx, i) | ||
460 | blk_mq_sched_free_tags(set, hctx, i); | ||
461 | } | ||
462 | |||
463 | int blk_mq_sched_init(struct request_queue *q) | ||
464 | { | ||
465 | int ret; | ||
466 | |||
467 | #if defined(CONFIG_DEFAULT_SQ_NONE) | ||
468 | if (q->nr_hw_queues == 1) | ||
469 | return 0; | ||
470 | #endif | ||
471 | #if defined(CONFIG_DEFAULT_MQ_NONE) | ||
472 | if (q->nr_hw_queues > 1) | ||
473 | return 0; | ||
474 | #endif | ||
475 | |||
476 | mutex_lock(&q->sysfs_lock); | ||
477 | ret = elevator_init(q, NULL); | ||
478 | mutex_unlock(&q->sysfs_lock); | ||
479 | |||
480 | return ret; | ||
481 | } | ||
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h new file mode 100644 index 000000000000..9478aaeb48c5 --- /dev/null +++ b/block/blk-mq-sched.h | |||
@@ -0,0 +1,142 @@ | |||
1 | #ifndef BLK_MQ_SCHED_H | ||
2 | #define BLK_MQ_SCHED_H | ||
3 | |||
4 | #include "blk-mq.h" | ||
5 | #include "blk-mq-tag.h" | ||
6 | |||
7 | int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, | ||
8 | int (*init)(struct blk_mq_hw_ctx *), | ||
9 | void (*exit)(struct blk_mq_hw_ctx *)); | ||
10 | |||
11 | void blk_mq_sched_free_hctx_data(struct request_queue *q, | ||
12 | void (*exit)(struct blk_mq_hw_ctx *)); | ||
13 | |||
14 | struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data); | ||
15 | void blk_mq_sched_put_request(struct request *rq); | ||
16 | |||
17 | void blk_mq_sched_request_inserted(struct request *rq); | ||
18 | bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq); | ||
19 | bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio); | ||
20 | bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); | ||
21 | bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); | ||
22 | void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); | ||
23 | |||
24 | void blk_mq_sched_insert_request(struct request *rq, bool at_head, | ||
25 | bool run_queue, bool async, bool can_block); | ||
26 | void blk_mq_sched_insert_requests(struct request_queue *q, | ||
27 | struct blk_mq_ctx *ctx, | ||
28 | struct list_head *list, bool run_queue_async); | ||
29 | |||
30 | void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); | ||
31 | void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, | ||
32 | struct list_head *rq_list, | ||
33 | struct request *(*get_rq)(struct blk_mq_hw_ctx *)); | ||
34 | |||
35 | int blk_mq_sched_setup(struct request_queue *q); | ||
36 | void blk_mq_sched_teardown(struct request_queue *q); | ||
37 | |||
38 | int blk_mq_sched_init(struct request_queue *q); | ||
39 | |||
40 | static inline bool | ||
41 | blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) | ||
42 | { | ||
43 | struct elevator_queue *e = q->elevator; | ||
44 | |||
45 | if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio)) | ||
46 | return false; | ||
47 | |||
48 | return __blk_mq_sched_bio_merge(q, bio); | ||
49 | } | ||
50 | |||
51 | static inline int blk_mq_sched_get_rq_priv(struct request_queue *q, | ||
52 | struct request *rq) | ||
53 | { | ||
54 | struct elevator_queue *e = q->elevator; | ||
55 | |||
56 | if (e && e->type->ops.mq.get_rq_priv) | ||
57 | return e->type->ops.mq.get_rq_priv(q, rq); | ||
58 | |||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static inline void blk_mq_sched_put_rq_priv(struct request_queue *q, | ||
63 | struct request *rq) | ||
64 | { | ||
65 | struct elevator_queue *e = q->elevator; | ||
66 | |||
67 | if (e && e->type->ops.mq.put_rq_priv) | ||
68 | e->type->ops.mq.put_rq_priv(q, rq); | ||
69 | } | ||
70 | |||
71 | static inline bool | ||
72 | blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, | ||
73 | struct bio *bio) | ||
74 | { | ||
75 | struct elevator_queue *e = q->elevator; | ||
76 | |||
77 | if (e && e->type->ops.mq.allow_merge) | ||
78 | return e->type->ops.mq.allow_merge(q, rq, bio); | ||
79 | |||
80 | return true; | ||
81 | } | ||
82 | |||
83 | static inline void | ||
84 | blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||
85 | { | ||
86 | struct elevator_queue *e = hctx->queue->elevator; | ||
87 | |||
88 | if (e && e->type->ops.mq.completed_request) | ||
89 | e->type->ops.mq.completed_request(hctx, rq); | ||
90 | |||
91 | BUG_ON(rq->internal_tag == -1); | ||
92 | |||
93 | blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag); | ||
94 | } | ||
95 | |||
96 | static inline void blk_mq_sched_started_request(struct request *rq) | ||
97 | { | ||
98 | struct request_queue *q = rq->q; | ||
99 | struct elevator_queue *e = q->elevator; | ||
100 | |||
101 | if (e && e->type->ops.mq.started_request) | ||
102 | e->type->ops.mq.started_request(rq); | ||
103 | } | ||
104 | |||
105 | static inline void blk_mq_sched_requeue_request(struct request *rq) | ||
106 | { | ||
107 | struct request_queue *q = rq->q; | ||
108 | struct elevator_queue *e = q->elevator; | ||
109 | |||
110 | if (e && e->type->ops.mq.requeue_request) | ||
111 | e->type->ops.mq.requeue_request(rq); | ||
112 | } | ||
113 | |||
114 | static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) | ||
115 | { | ||
116 | struct elevator_queue *e = hctx->queue->elevator; | ||
117 | |||
118 | if (e && e->type->ops.mq.has_work) | ||
119 | return e->type->ops.mq.has_work(hctx); | ||
120 | |||
121 | return false; | ||
122 | } | ||
123 | |||
124 | static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx) | ||
125 | { | ||
126 | if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { | ||
127 | set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); | ||
128 | if (hctx->flags & BLK_MQ_F_TAG_SHARED) { | ||
129 | struct request_queue *q = hctx->queue; | ||
130 | |||
131 | if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) | ||
132 | set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) | ||
138 | { | ||
139 | return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); | ||
140 | } | ||
141 | |||
142 | #endif | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index eacd3af72099..308b3f4fc310 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -122,123 +122,16 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj, | |||
122 | return res; | 122 | return res; |
123 | } | 123 | } |
124 | 124 | ||
125 | static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page) | 125 | static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx, |
126 | { | ||
127 | return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1], | ||
128 | ctx->rq_dispatched[0]); | ||
129 | } | ||
130 | |||
131 | static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page) | ||
132 | { | ||
133 | return sprintf(page, "%lu\n", ctx->rq_merged); | ||
134 | } | ||
135 | |||
136 | static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page) | ||
137 | { | ||
138 | return sprintf(page, "%lu %lu\n", ctx->rq_completed[1], | ||
139 | ctx->rq_completed[0]); | ||
140 | } | ||
141 | |||
142 | static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg) | ||
143 | { | ||
144 | struct request *rq; | ||
145 | int len = snprintf(page, PAGE_SIZE - 1, "%s:\n", msg); | ||
146 | |||
147 | list_for_each_entry(rq, list, queuelist) { | ||
148 | const int rq_len = 2 * sizeof(rq) + 2; | ||
149 | |||
150 | /* if the output will be truncated */ | ||
151 | if (PAGE_SIZE - 1 < len + rq_len) { | ||
152 | /* backspacing if it can't hold '\t...\n' */ | ||
153 | if (PAGE_SIZE - 1 < len + 5) | ||
154 | len -= rq_len; | ||
155 | len += snprintf(page + len, PAGE_SIZE - 1 - len, | ||
156 | "\t...\n"); | ||
157 | break; | ||
158 | } | ||
159 | len += snprintf(page + len, PAGE_SIZE - 1 - len, | ||
160 | "\t%p\n", rq); | ||
161 | } | ||
162 | |||
163 | return len; | ||
164 | } | ||
165 | |||
166 | static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) | ||
167 | { | ||
168 | ssize_t ret; | ||
169 | |||
170 | spin_lock(&ctx->lock); | ||
171 | ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending"); | ||
172 | spin_unlock(&ctx->lock); | ||
173 | |||
174 | return ret; | ||
175 | } | ||
176 | |||
177 | static ssize_t blk_mq_hw_sysfs_poll_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
178 | { | ||
179 | return sprintf(page, "considered=%lu, invoked=%lu, success=%lu\n", | ||
180 | hctx->poll_considered, hctx->poll_invoked, | ||
181 | hctx->poll_success); | ||
182 | } | ||
183 | |||
184 | static ssize_t blk_mq_hw_sysfs_poll_store(struct blk_mq_hw_ctx *hctx, | ||
185 | const char *page, size_t size) | ||
186 | { | ||
187 | hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0; | ||
188 | |||
189 | return size; | ||
190 | } | ||
191 | |||
192 | static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx, | ||
193 | char *page) | ||
194 | { | ||
195 | return sprintf(page, "%lu\n", hctx->queued); | ||
196 | } | ||
197 | |||
198 | static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
199 | { | ||
200 | return sprintf(page, "%lu\n", hctx->run); | ||
201 | } | ||
202 | |||
203 | static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx, | ||
204 | char *page) | ||
205 | { | ||
206 | char *start_page = page; | ||
207 | int i; | ||
208 | |||
209 | page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]); | ||
210 | |||
211 | for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) { | ||
212 | unsigned int d = 1U << (i - 1); | ||
213 | |||
214 | page += sprintf(page, "%8u\t%lu\n", d, hctx->dispatched[i]); | ||
215 | } | ||
216 | |||
217 | page += sprintf(page, "%8u+\t%lu\n", 1U << (i - 1), | ||
218 | hctx->dispatched[i]); | ||
219 | return page - start_page; | ||
220 | } | ||
221 | |||
222 | static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, | ||
223 | char *page) | 126 | char *page) |
224 | { | 127 | { |
225 | ssize_t ret; | 128 | return sprintf(page, "%u\n", hctx->tags->nr_tags); |
226 | |||
227 | spin_lock(&hctx->lock); | ||
228 | ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending"); | ||
229 | spin_unlock(&hctx->lock); | ||
230 | |||
231 | return ret; | ||
232 | } | 129 | } |
233 | 130 | ||
234 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) | 131 | static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx, |
132 | char *page) | ||
235 | { | 133 | { |
236 | return blk_mq_tag_sysfs_show(hctx->tags, page); | 134 | return sprintf(page, "%u\n", hctx->tags->nr_reserved_tags); |
237 | } | ||
238 | |||
239 | static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
240 | { | ||
241 | return sprintf(page, "%u\n", atomic_read(&hctx->nr_active)); | ||
242 | } | 135 | } |
243 | 136 | ||
244 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) | 137 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) |
@@ -259,121 +152,27 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) | |||
259 | return ret; | 152 | return ret; |
260 | } | 153 | } |
261 | 154 | ||
262 | static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx) | ||
263 | { | ||
264 | struct blk_mq_ctx *ctx; | ||
265 | unsigned int i; | ||
266 | |||
267 | hctx_for_each_ctx(hctx, ctx, i) { | ||
268 | blk_stat_init(&ctx->stat[BLK_STAT_READ]); | ||
269 | blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); | ||
270 | } | ||
271 | } | ||
272 | |||
273 | static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx, | ||
274 | const char *page, size_t count) | ||
275 | { | ||
276 | blk_mq_stat_clear(hctx); | ||
277 | return count; | ||
278 | } | ||
279 | |||
280 | static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre) | ||
281 | { | ||
282 | return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n", | ||
283 | pre, (long long) stat->nr_samples, | ||
284 | (long long) stat->mean, (long long) stat->min, | ||
285 | (long long) stat->max); | ||
286 | } | ||
287 | |||
288 | static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
289 | { | ||
290 | struct blk_rq_stat stat[2]; | ||
291 | ssize_t ret; | ||
292 | |||
293 | blk_stat_init(&stat[BLK_STAT_READ]); | ||
294 | blk_stat_init(&stat[BLK_STAT_WRITE]); | ||
295 | |||
296 | blk_hctx_stat_get(hctx, stat); | ||
297 | |||
298 | ret = print_stat(page, &stat[BLK_STAT_READ], "read :"); | ||
299 | ret += print_stat(page + ret, &stat[BLK_STAT_WRITE], "write:"); | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { | ||
304 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | ||
305 | .show = blk_mq_sysfs_dispatched_show, | ||
306 | }; | ||
307 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = { | ||
308 | .attr = {.name = "merged", .mode = S_IRUGO }, | ||
309 | .show = blk_mq_sysfs_merged_show, | ||
310 | }; | ||
311 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = { | ||
312 | .attr = {.name = "completed", .mode = S_IRUGO }, | ||
313 | .show = blk_mq_sysfs_completed_show, | ||
314 | }; | ||
315 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = { | ||
316 | .attr = {.name = "rq_list", .mode = S_IRUGO }, | ||
317 | .show = blk_mq_sysfs_rq_list_show, | ||
318 | }; | ||
319 | |||
320 | static struct attribute *default_ctx_attrs[] = { | 155 | static struct attribute *default_ctx_attrs[] = { |
321 | &blk_mq_sysfs_dispatched.attr, | ||
322 | &blk_mq_sysfs_merged.attr, | ||
323 | &blk_mq_sysfs_completed.attr, | ||
324 | &blk_mq_sysfs_rq_list.attr, | ||
325 | NULL, | 156 | NULL, |
326 | }; | 157 | }; |
327 | 158 | ||
328 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = { | 159 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = { |
329 | .attr = {.name = "queued", .mode = S_IRUGO }, | 160 | .attr = {.name = "nr_tags", .mode = S_IRUGO }, |
330 | .show = blk_mq_hw_sysfs_queued_show, | 161 | .show = blk_mq_hw_sysfs_nr_tags_show, |
331 | }; | 162 | }; |
332 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = { | 163 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = { |
333 | .attr = {.name = "run", .mode = S_IRUGO }, | 164 | .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO }, |
334 | .show = blk_mq_hw_sysfs_run_show, | 165 | .show = blk_mq_hw_sysfs_nr_reserved_tags_show, |
335 | }; | ||
336 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = { | ||
337 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | ||
338 | .show = blk_mq_hw_sysfs_dispatched_show, | ||
339 | }; | ||
340 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = { | ||
341 | .attr = {.name = "active", .mode = S_IRUGO }, | ||
342 | .show = blk_mq_hw_sysfs_active_show, | ||
343 | }; | ||
344 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { | ||
345 | .attr = {.name = "pending", .mode = S_IRUGO }, | ||
346 | .show = blk_mq_hw_sysfs_rq_list_show, | ||
347 | }; | ||
348 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { | ||
349 | .attr = {.name = "tags", .mode = S_IRUGO }, | ||
350 | .show = blk_mq_hw_sysfs_tags_show, | ||
351 | }; | 166 | }; |
352 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { | 167 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = { |
353 | .attr = {.name = "cpu_list", .mode = S_IRUGO }, | 168 | .attr = {.name = "cpu_list", .mode = S_IRUGO }, |
354 | .show = blk_mq_hw_sysfs_cpus_show, | 169 | .show = blk_mq_hw_sysfs_cpus_show, |
355 | }; | 170 | }; |
356 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = { | ||
357 | .attr = {.name = "io_poll", .mode = S_IWUSR | S_IRUGO }, | ||
358 | .show = blk_mq_hw_sysfs_poll_show, | ||
359 | .store = blk_mq_hw_sysfs_poll_store, | ||
360 | }; | ||
361 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = { | ||
362 | .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR }, | ||
363 | .show = blk_mq_hw_sysfs_stat_show, | ||
364 | .store = blk_mq_hw_sysfs_stat_store, | ||
365 | }; | ||
366 | 171 | ||
367 | static struct attribute *default_hw_ctx_attrs[] = { | 172 | static struct attribute *default_hw_ctx_attrs[] = { |
368 | &blk_mq_hw_sysfs_queued.attr, | 173 | &blk_mq_hw_sysfs_nr_tags.attr, |
369 | &blk_mq_hw_sysfs_run.attr, | 174 | &blk_mq_hw_sysfs_nr_reserved_tags.attr, |
370 | &blk_mq_hw_sysfs_dispatched.attr, | ||
371 | &blk_mq_hw_sysfs_pending.attr, | ||
372 | &blk_mq_hw_sysfs_tags.attr, | ||
373 | &blk_mq_hw_sysfs_cpus.attr, | 175 | &blk_mq_hw_sysfs_cpus.attr, |
374 | &blk_mq_hw_sysfs_active.attr, | ||
375 | &blk_mq_hw_sysfs_poll.attr, | ||
376 | &blk_mq_hw_sysfs_stat.attr, | ||
377 | NULL, | 176 | NULL, |
378 | }; | 177 | }; |
379 | 178 | ||
@@ -455,6 +254,8 @@ static void __blk_mq_unregister_dev(struct device *dev, struct request_queue *q) | |||
455 | kobject_put(&hctx->kobj); | 254 | kobject_put(&hctx->kobj); |
456 | } | 255 | } |
457 | 256 | ||
257 | blk_mq_debugfs_unregister(q); | ||
258 | |||
458 | kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); | 259 | kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); |
459 | kobject_del(&q->mq_kobj); | 260 | kobject_del(&q->mq_kobj); |
460 | kobject_put(&q->mq_kobj); | 261 | kobject_put(&q->mq_kobj); |
@@ -504,6 +305,8 @@ int blk_mq_register_dev(struct device *dev, struct request_queue *q) | |||
504 | 305 | ||
505 | kobject_uevent(&q->mq_kobj, KOBJ_ADD); | 306 | kobject_uevent(&q->mq_kobj, KOBJ_ADD); |
506 | 307 | ||
308 | blk_mq_debugfs_register(q, kobject_name(&dev->kobj)); | ||
309 | |||
507 | queue_for_each_hw_ctx(q, hctx, i) { | 310 | queue_for_each_hw_ctx(q, hctx, i) { |
508 | ret = blk_mq_register_hctx(hctx); | 311 | ret = blk_mq_register_hctx(hctx); |
509 | if (ret) | 312 | if (ret) |
@@ -529,6 +332,8 @@ void blk_mq_sysfs_unregister(struct request_queue *q) | |||
529 | if (!q->mq_sysfs_init_done) | 332 | if (!q->mq_sysfs_init_done) |
530 | return; | 333 | return; |
531 | 334 | ||
335 | blk_mq_debugfs_unregister_hctxs(q); | ||
336 | |||
532 | queue_for_each_hw_ctx(q, hctx, i) | 337 | queue_for_each_hw_ctx(q, hctx, i) |
533 | blk_mq_unregister_hctx(hctx); | 338 | blk_mq_unregister_hctx(hctx); |
534 | } | 339 | } |
@@ -541,6 +346,8 @@ int blk_mq_sysfs_register(struct request_queue *q) | |||
541 | if (!q->mq_sysfs_init_done) | 346 | if (!q->mq_sysfs_init_done) |
542 | return ret; | 347 | return ret; |
543 | 348 | ||
349 | blk_mq_debugfs_register_hctxs(q); | ||
350 | |||
544 | queue_for_each_hw_ctx(q, hctx, i) { | 351 | queue_for_each_hw_ctx(q, hctx, i) { |
545 | ret = blk_mq_register_hctx(hctx); | 352 | ret = blk_mq_register_hctx(hctx); |
546 | if (ret) | 353 | if (ret) |
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index dcf5ce3ba4bf..54c84363c1b2 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -90,113 +90,97 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | |||
90 | return atomic_read(&hctx->nr_active) < depth; | 90 | return atomic_read(&hctx->nr_active) < depth; |
91 | } | 91 | } |
92 | 92 | ||
93 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) | 93 | static int __blk_mq_get_tag(struct blk_mq_alloc_data *data, |
94 | struct sbitmap_queue *bt) | ||
94 | { | 95 | { |
95 | if (!hctx_may_queue(hctx, bt)) | 96 | if (!(data->flags & BLK_MQ_REQ_INTERNAL) && |
97 | !hctx_may_queue(data->hctx, bt)) | ||
96 | return -1; | 98 | return -1; |
97 | return __sbitmap_queue_get(bt); | 99 | return __sbitmap_queue_get(bt); |
98 | } | 100 | } |
99 | 101 | ||
100 | static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, | 102 | unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) |
101 | struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags) | ||
102 | { | 103 | { |
104 | struct blk_mq_tags *tags = blk_mq_tags_from_data(data); | ||
105 | struct sbitmap_queue *bt; | ||
103 | struct sbq_wait_state *ws; | 106 | struct sbq_wait_state *ws; |
104 | DEFINE_WAIT(wait); | 107 | DEFINE_WAIT(wait); |
108 | unsigned int tag_offset; | ||
109 | bool drop_ctx; | ||
105 | int tag; | 110 | int tag; |
106 | 111 | ||
107 | tag = __bt_get(hctx, bt); | 112 | if (data->flags & BLK_MQ_REQ_RESERVED) { |
113 | if (unlikely(!tags->nr_reserved_tags)) { | ||
114 | WARN_ON_ONCE(1); | ||
115 | return BLK_MQ_TAG_FAIL; | ||
116 | } | ||
117 | bt = &tags->breserved_tags; | ||
118 | tag_offset = 0; | ||
119 | } else { | ||
120 | bt = &tags->bitmap_tags; | ||
121 | tag_offset = tags->nr_reserved_tags; | ||
122 | } | ||
123 | |||
124 | tag = __blk_mq_get_tag(data, bt); | ||
108 | if (tag != -1) | 125 | if (tag != -1) |
109 | return tag; | 126 | goto found_tag; |
110 | 127 | ||
111 | if (data->flags & BLK_MQ_REQ_NOWAIT) | 128 | if (data->flags & BLK_MQ_REQ_NOWAIT) |
112 | return -1; | 129 | return BLK_MQ_TAG_FAIL; |
113 | 130 | ||
114 | ws = bt_wait_ptr(bt, hctx); | 131 | ws = bt_wait_ptr(bt, data->hctx); |
132 | drop_ctx = data->ctx == NULL; | ||
115 | do { | 133 | do { |
116 | prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); | 134 | prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); |
117 | 135 | ||
118 | tag = __bt_get(hctx, bt); | 136 | tag = __blk_mq_get_tag(data, bt); |
119 | if (tag != -1) | 137 | if (tag != -1) |
120 | break; | 138 | break; |
121 | 139 | ||
122 | /* | 140 | /* |
123 | * We're out of tags on this hardware queue, kick any | 141 | * We're out of tags on this hardware queue, kick any |
124 | * pending IO submits before going to sleep waiting for | 142 | * pending IO submits before going to sleep waiting for |
125 | * some to complete. Note that hctx can be NULL here for | 143 | * some to complete. |
126 | * reserved tag allocation. | ||
127 | */ | 144 | */ |
128 | if (hctx) | 145 | blk_mq_run_hw_queue(data->hctx, false); |
129 | blk_mq_run_hw_queue(hctx, false); | ||
130 | 146 | ||
131 | /* | 147 | /* |
132 | * Retry tag allocation after running the hardware queue, | 148 | * Retry tag allocation after running the hardware queue, |
133 | * as running the queue may also have found completions. | 149 | * as running the queue may also have found completions. |
134 | */ | 150 | */ |
135 | tag = __bt_get(hctx, bt); | 151 | tag = __blk_mq_get_tag(data, bt); |
136 | if (tag != -1) | 152 | if (tag != -1) |
137 | break; | 153 | break; |
138 | 154 | ||
139 | blk_mq_put_ctx(data->ctx); | 155 | if (data->ctx) |
156 | blk_mq_put_ctx(data->ctx); | ||
140 | 157 | ||
141 | io_schedule(); | 158 | io_schedule(); |
142 | 159 | ||
143 | data->ctx = blk_mq_get_ctx(data->q); | 160 | data->ctx = blk_mq_get_ctx(data->q); |
144 | data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); | 161 | data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); |
145 | if (data->flags & BLK_MQ_REQ_RESERVED) { | 162 | tags = blk_mq_tags_from_data(data); |
146 | bt = &data->hctx->tags->breserved_tags; | 163 | if (data->flags & BLK_MQ_REQ_RESERVED) |
147 | } else { | 164 | bt = &tags->breserved_tags; |
148 | hctx = data->hctx; | 165 | else |
149 | bt = &hctx->tags->bitmap_tags; | 166 | bt = &tags->bitmap_tags; |
150 | } | 167 | |
151 | finish_wait(&ws->wait, &wait); | 168 | finish_wait(&ws->wait, &wait); |
152 | ws = bt_wait_ptr(bt, hctx); | 169 | ws = bt_wait_ptr(bt, data->hctx); |
153 | } while (1); | 170 | } while (1); |
154 | 171 | ||
155 | finish_wait(&ws->wait, &wait); | 172 | if (drop_ctx && data->ctx) |
156 | return tag; | 173 | blk_mq_put_ctx(data->ctx); |
157 | } | ||
158 | |||
159 | static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) | ||
160 | { | ||
161 | int tag; | ||
162 | |||
163 | tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, | ||
164 | data->hctx->tags); | ||
165 | if (tag >= 0) | ||
166 | return tag + data->hctx->tags->nr_reserved_tags; | ||
167 | |||
168 | return BLK_MQ_TAG_FAIL; | ||
169 | } | ||
170 | |||
171 | static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) | ||
172 | { | ||
173 | int tag; | ||
174 | |||
175 | if (unlikely(!data->hctx->tags->nr_reserved_tags)) { | ||
176 | WARN_ON_ONCE(1); | ||
177 | return BLK_MQ_TAG_FAIL; | ||
178 | } | ||
179 | |||
180 | tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, | ||
181 | data->hctx->tags); | ||
182 | if (tag < 0) | ||
183 | return BLK_MQ_TAG_FAIL; | ||
184 | 174 | ||
185 | return tag; | 175 | finish_wait(&ws->wait, &wait); |
186 | } | ||
187 | 176 | ||
188 | unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) | 177 | found_tag: |
189 | { | 178 | return tag + tag_offset; |
190 | if (data->flags & BLK_MQ_REQ_RESERVED) | ||
191 | return __blk_mq_get_reserved_tag(data); | ||
192 | return __blk_mq_get_tag(data); | ||
193 | } | 179 | } |
194 | 180 | ||
195 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, | 181 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, |
196 | unsigned int tag) | 182 | struct blk_mq_ctx *ctx, unsigned int tag) |
197 | { | 183 | { |
198 | struct blk_mq_tags *tags = hctx->tags; | ||
199 | |||
200 | if (tag >= tags->nr_reserved_tags) { | 184 | if (tag >= tags->nr_reserved_tags) { |
201 | const int real_tag = tag - tags->nr_reserved_tags; | 185 | const int real_tag = tag - tags->nr_reserved_tags; |
202 | 186 | ||
@@ -312,11 +296,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) | |||
312 | struct blk_mq_tags *tags = set->tags[i]; | 296 | struct blk_mq_tags *tags = set->tags[i]; |
313 | 297 | ||
314 | for (j = 0; j < tags->nr_tags; j++) { | 298 | for (j = 0; j < tags->nr_tags; j++) { |
315 | if (!tags->rqs[j]) | 299 | if (!tags->static_rqs[j]) |
316 | continue; | 300 | continue; |
317 | 301 | ||
318 | ret = set->ops->reinit_request(set->driver_data, | 302 | ret = set->ops->reinit_request(set->driver_data, |
319 | tags->rqs[j]); | 303 | tags->static_rqs[j]); |
320 | if (ret) | 304 | if (ret) |
321 | goto out; | 305 | goto out; |
322 | } | 306 | } |
@@ -351,11 +335,6 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, | |||
351 | 335 | ||
352 | } | 336 | } |
353 | 337 | ||
354 | static unsigned int bt_unused_tags(const struct sbitmap_queue *bt) | ||
355 | { | ||
356 | return bt->sb.depth - sbitmap_weight(&bt->sb); | ||
357 | } | ||
358 | |||
359 | static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, | 338 | static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, |
360 | bool round_robin, int node) | 339 | bool round_robin, int node) |
361 | { | 340 | { |
@@ -411,19 +390,56 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) | |||
411 | kfree(tags); | 390 | kfree(tags); |
412 | } | 391 | } |
413 | 392 | ||
414 | int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) | 393 | int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, |
394 | struct blk_mq_tags **tagsptr, unsigned int tdepth, | ||
395 | bool can_grow) | ||
415 | { | 396 | { |
416 | tdepth -= tags->nr_reserved_tags; | 397 | struct blk_mq_tags *tags = *tagsptr; |
417 | if (tdepth > tags->nr_tags) | 398 | |
399 | if (tdepth <= tags->nr_reserved_tags) | ||
418 | return -EINVAL; | 400 | return -EINVAL; |
419 | 401 | ||
402 | tdepth -= tags->nr_reserved_tags; | ||
403 | |||
420 | /* | 404 | /* |
421 | * Don't need (or can't) update reserved tags here, they remain | 405 | * If we are allowed to grow beyond the original size, allocate |
422 | * static and should never need resizing. | 406 | * a new set of tags before freeing the old one. |
423 | */ | 407 | */ |
424 | sbitmap_queue_resize(&tags->bitmap_tags, tdepth); | 408 | if (tdepth > tags->nr_tags) { |
409 | struct blk_mq_tag_set *set = hctx->queue->tag_set; | ||
410 | struct blk_mq_tags *new; | ||
411 | bool ret; | ||
412 | |||
413 | if (!can_grow) | ||
414 | return -EINVAL; | ||
415 | |||
416 | /* | ||
417 | * We need some sort of upper limit, set it high enough that | ||
418 | * no valid use cases should require more. | ||
419 | */ | ||
420 | if (tdepth > 16 * BLKDEV_MAX_RQ) | ||
421 | return -EINVAL; | ||
422 | |||
423 | new = blk_mq_alloc_rq_map(set, hctx->queue_num, tdepth, 0); | ||
424 | if (!new) | ||
425 | return -ENOMEM; | ||
426 | ret = blk_mq_alloc_rqs(set, new, hctx->queue_num, tdepth); | ||
427 | if (ret) { | ||
428 | blk_mq_free_rq_map(new); | ||
429 | return -ENOMEM; | ||
430 | } | ||
431 | |||
432 | blk_mq_free_rqs(set, *tagsptr, hctx->queue_num); | ||
433 | blk_mq_free_rq_map(*tagsptr); | ||
434 | *tagsptr = new; | ||
435 | } else { | ||
436 | /* | ||
437 | * Don't need (or can't) update reserved tags here, they | ||
438 | * remain static and should never need resizing. | ||
439 | */ | ||
440 | sbitmap_queue_resize(&tags->bitmap_tags, tdepth); | ||
441 | } | ||
425 | 442 | ||
426 | blk_mq_tag_wakeup_all(tags, false); | ||
427 | return 0; | 443 | return 0; |
428 | } | 444 | } |
429 | 445 | ||
@@ -454,25 +470,3 @@ u32 blk_mq_unique_tag(struct request *rq) | |||
454 | (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); | 470 | (rq->tag & BLK_MQ_UNIQUE_TAG_MASK); |
455 | } | 471 | } |
456 | EXPORT_SYMBOL(blk_mq_unique_tag); | 472 | EXPORT_SYMBOL(blk_mq_unique_tag); |
457 | |||
458 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | ||
459 | { | ||
460 | char *orig_page = page; | ||
461 | unsigned int free, res; | ||
462 | |||
463 | if (!tags) | ||
464 | return 0; | ||
465 | |||
466 | page += sprintf(page, "nr_tags=%u, reserved_tags=%u, " | ||
467 | "bits_per_word=%u\n", | ||
468 | tags->nr_tags, tags->nr_reserved_tags, | ||
469 | 1U << tags->bitmap_tags.sb.shift); | ||
470 | |||
471 | free = bt_unused_tags(&tags->bitmap_tags); | ||
472 | res = bt_unused_tags(&tags->breserved_tags); | ||
473 | |||
474 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); | ||
475 | page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues)); | ||
476 | |||
477 | return page - orig_page; | ||
478 | } | ||
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index d1662734dc53..63497423c5cd 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -16,6 +16,7 @@ struct blk_mq_tags { | |||
16 | struct sbitmap_queue breserved_tags; | 16 | struct sbitmap_queue breserved_tags; |
17 | 17 | ||
18 | struct request **rqs; | 18 | struct request **rqs; |
19 | struct request **static_rqs; | ||
19 | struct list_head page_list; | 20 | struct list_head page_list; |
20 | }; | 21 | }; |
21 | 22 | ||
@@ -24,11 +25,12 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r | |||
24 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | 25 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); |
25 | 26 | ||
26 | extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); | 27 | extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); |
27 | extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, | 28 | extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, |
28 | unsigned int tag); | 29 | struct blk_mq_ctx *ctx, unsigned int tag); |
29 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); | 30 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); |
30 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); | 31 | extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx, |
31 | extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); | 32 | struct blk_mq_tags **tags, |
33 | unsigned int depth, bool can_grow); | ||
32 | extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); | 34 | extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); |
33 | void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, | 35 | void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, |
34 | void *priv); | 36 | void *priv); |
diff --git a/block/blk-mq.c b/block/blk-mq.c index c3400b5444a7..489076e7ae15 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include "blk-mq-tag.h" | 32 | #include "blk-mq-tag.h" |
33 | #include "blk-stat.h" | 33 | #include "blk-stat.h" |
34 | #include "blk-wbt.h" | 34 | #include "blk-wbt.h" |
35 | #include "blk-mq-sched.h" | ||
35 | 36 | ||
36 | static DEFINE_MUTEX(all_q_mutex); | 37 | static DEFINE_MUTEX(all_q_mutex); |
37 | static LIST_HEAD(all_q_list); | 38 | static LIST_HEAD(all_q_list); |
@@ -39,9 +40,11 @@ static LIST_HEAD(all_q_list); | |||
39 | /* | 40 | /* |
40 | * Check if any of the ctx's have pending work in this hardware queue | 41 | * Check if any of the ctx's have pending work in this hardware queue |
41 | */ | 42 | */ |
42 | static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) | 43 | bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) |
43 | { | 44 | { |
44 | return sbitmap_any_bit_set(&hctx->ctx_map); | 45 | return sbitmap_any_bit_set(&hctx->ctx_map) || |
46 | !list_empty_careful(&hctx->dispatch) || | ||
47 | blk_mq_sched_has_work(hctx); | ||
45 | } | 48 | } |
46 | 49 | ||
47 | /* | 50 | /* |
@@ -167,8 +170,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | |||
167 | } | 170 | } |
168 | EXPORT_SYMBOL(blk_mq_can_queue); | 171 | EXPORT_SYMBOL(blk_mq_can_queue); |
169 | 172 | ||
170 | static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | 173 | void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, |
171 | struct request *rq, unsigned int op) | 174 | struct request *rq, unsigned int op) |
172 | { | 175 | { |
173 | INIT_LIST_HEAD(&rq->queuelist); | 176 | INIT_LIST_HEAD(&rq->queuelist); |
174 | /* csd/requeue_work/fifo_time is initialized before use */ | 177 | /* csd/requeue_work/fifo_time is initialized before use */ |
@@ -213,53 +216,58 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |||
213 | 216 | ||
214 | ctx->rq_dispatched[op_is_sync(op)]++; | 217 | ctx->rq_dispatched[op_is_sync(op)]++; |
215 | } | 218 | } |
219 | EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init); | ||
216 | 220 | ||
217 | static struct request * | 221 | struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, |
218 | __blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) | 222 | unsigned int op) |
219 | { | 223 | { |
220 | struct request *rq; | 224 | struct request *rq; |
221 | unsigned int tag; | 225 | unsigned int tag; |
222 | 226 | ||
223 | tag = blk_mq_get_tag(data); | 227 | tag = blk_mq_get_tag(data); |
224 | if (tag != BLK_MQ_TAG_FAIL) { | 228 | if (tag != BLK_MQ_TAG_FAIL) { |
225 | rq = data->hctx->tags->rqs[tag]; | 229 | struct blk_mq_tags *tags = blk_mq_tags_from_data(data); |
226 | 230 | ||
227 | if (blk_mq_tag_busy(data->hctx)) { | 231 | rq = tags->static_rqs[tag]; |
228 | rq->rq_flags = RQF_MQ_INFLIGHT; | 232 | |
229 | atomic_inc(&data->hctx->nr_active); | 233 | if (data->flags & BLK_MQ_REQ_INTERNAL) { |
234 | rq->tag = -1; | ||
235 | rq->internal_tag = tag; | ||
236 | } else { | ||
237 | if (blk_mq_tag_busy(data->hctx)) { | ||
238 | rq->rq_flags = RQF_MQ_INFLIGHT; | ||
239 | atomic_inc(&data->hctx->nr_active); | ||
240 | } | ||
241 | rq->tag = tag; | ||
242 | rq->internal_tag = -1; | ||
230 | } | 243 | } |
231 | 244 | ||
232 | rq->tag = tag; | ||
233 | blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); | 245 | blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); |
234 | return rq; | 246 | return rq; |
235 | } | 247 | } |
236 | 248 | ||
237 | return NULL; | 249 | return NULL; |
238 | } | 250 | } |
251 | EXPORT_SYMBOL_GPL(__blk_mq_alloc_request); | ||
239 | 252 | ||
240 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, | 253 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, |
241 | unsigned int flags) | 254 | unsigned int flags) |
242 | { | 255 | { |
243 | struct blk_mq_ctx *ctx; | 256 | struct blk_mq_alloc_data alloc_data = { .flags = flags }; |
244 | struct blk_mq_hw_ctx *hctx; | ||
245 | struct request *rq; | 257 | struct request *rq; |
246 | struct blk_mq_alloc_data alloc_data; | ||
247 | int ret; | 258 | int ret; |
248 | 259 | ||
249 | ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); | 260 | ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); |
250 | if (ret) | 261 | if (ret) |
251 | return ERR_PTR(ret); | 262 | return ERR_PTR(ret); |
252 | 263 | ||
253 | ctx = blk_mq_get_ctx(q); | 264 | rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); |
254 | hctx = blk_mq_map_queue(q, ctx->cpu); | ||
255 | blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); | ||
256 | rq = __blk_mq_alloc_request(&alloc_data, rw); | ||
257 | blk_mq_put_ctx(ctx); | ||
258 | 265 | ||
259 | if (!rq) { | 266 | blk_mq_put_ctx(alloc_data.ctx); |
260 | blk_queue_exit(q); | 267 | blk_queue_exit(q); |
268 | |||
269 | if (!rq) | ||
261 | return ERR_PTR(-EWOULDBLOCK); | 270 | return ERR_PTR(-EWOULDBLOCK); |
262 | } | ||
263 | 271 | ||
264 | rq->__data_len = 0; | 272 | rq->__data_len = 0; |
265 | rq->__sector = (sector_t) -1; | 273 | rq->__sector = (sector_t) -1; |
@@ -319,10 +327,10 @@ out_queue_exit: | |||
319 | } | 327 | } |
320 | EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); | 328 | EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); |
321 | 329 | ||
322 | static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | 330 | void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, |
323 | struct blk_mq_ctx *ctx, struct request *rq) | 331 | struct request *rq) |
324 | { | 332 | { |
325 | const int tag = rq->tag; | 333 | const int sched_tag = rq->internal_tag; |
326 | struct request_queue *q = rq->q; | 334 | struct request_queue *q = rq->q; |
327 | 335 | ||
328 | if (rq->rq_flags & RQF_MQ_INFLIGHT) | 336 | if (rq->rq_flags & RQF_MQ_INFLIGHT) |
@@ -333,23 +341,31 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | |||
333 | 341 | ||
334 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 342 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
335 | clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); | 343 | clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); |
336 | blk_mq_put_tag(hctx, ctx, tag); | 344 | if (rq->tag != -1) |
345 | blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); | ||
346 | if (sched_tag != -1) | ||
347 | blk_mq_sched_completed_request(hctx, rq); | ||
348 | blk_mq_sched_restart_queues(hctx); | ||
337 | blk_queue_exit(q); | 349 | blk_queue_exit(q); |
338 | } | 350 | } |
339 | 351 | ||
340 | void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq) | 352 | static void blk_mq_finish_hctx_request(struct blk_mq_hw_ctx *hctx, |
353 | struct request *rq) | ||
341 | { | 354 | { |
342 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 355 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
343 | 356 | ||
344 | ctx->rq_completed[rq_is_sync(rq)]++; | 357 | ctx->rq_completed[rq_is_sync(rq)]++; |
345 | __blk_mq_free_request(hctx, ctx, rq); | 358 | __blk_mq_finish_request(hctx, ctx, rq); |
359 | } | ||
346 | 360 | ||
361 | void blk_mq_finish_request(struct request *rq) | ||
362 | { | ||
363 | blk_mq_finish_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); | ||
347 | } | 364 | } |
348 | EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request); | ||
349 | 365 | ||
350 | void blk_mq_free_request(struct request *rq) | 366 | void blk_mq_free_request(struct request *rq) |
351 | { | 367 | { |
352 | blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq); | 368 | blk_mq_sched_put_request(rq); |
353 | } | 369 | } |
354 | EXPORT_SYMBOL_GPL(blk_mq_free_request); | 370 | EXPORT_SYMBOL_GPL(blk_mq_free_request); |
355 | 371 | ||
@@ -467,6 +483,8 @@ void blk_mq_start_request(struct request *rq) | |||
467 | { | 483 | { |
468 | struct request_queue *q = rq->q; | 484 | struct request_queue *q = rq->q; |
469 | 485 | ||
486 | blk_mq_sched_started_request(rq); | ||
487 | |||
470 | trace_block_rq_issue(q, rq); | 488 | trace_block_rq_issue(q, rq); |
471 | 489 | ||
472 | rq->resid_len = blk_rq_bytes(rq); | 490 | rq->resid_len = blk_rq_bytes(rq); |
@@ -515,6 +533,7 @@ static void __blk_mq_requeue_request(struct request *rq) | |||
515 | 533 | ||
516 | trace_block_rq_requeue(q, rq); | 534 | trace_block_rq_requeue(q, rq); |
517 | wbt_requeue(q->rq_wb, &rq->issue_stat); | 535 | wbt_requeue(q->rq_wb, &rq->issue_stat); |
536 | blk_mq_sched_requeue_request(rq); | ||
518 | 537 | ||
519 | if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { | 538 | if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { |
520 | if (q->dma_drain_size && blk_rq_bytes(rq)) | 539 | if (q->dma_drain_size && blk_rq_bytes(rq)) |
@@ -549,13 +568,13 @@ static void blk_mq_requeue_work(struct work_struct *work) | |||
549 | 568 | ||
550 | rq->rq_flags &= ~RQF_SOFTBARRIER; | 569 | rq->rq_flags &= ~RQF_SOFTBARRIER; |
551 | list_del_init(&rq->queuelist); | 570 | list_del_init(&rq->queuelist); |
552 | blk_mq_insert_request(rq, true, false, false); | 571 | blk_mq_sched_insert_request(rq, true, false, false, true); |
553 | } | 572 | } |
554 | 573 | ||
555 | while (!list_empty(&rq_list)) { | 574 | while (!list_empty(&rq_list)) { |
556 | rq = list_entry(rq_list.next, struct request, queuelist); | 575 | rq = list_entry(rq_list.next, struct request, queuelist); |
557 | list_del_init(&rq->queuelist); | 576 | list_del_init(&rq->queuelist); |
558 | blk_mq_insert_request(rq, false, false, false); | 577 | blk_mq_sched_insert_request(rq, false, false, false, true); |
559 | } | 578 | } |
560 | 579 | ||
561 | blk_mq_run_hw_queues(q, false); | 580 | blk_mq_run_hw_queues(q, false); |
@@ -639,7 +658,7 @@ struct blk_mq_timeout_data { | |||
639 | 658 | ||
640 | void blk_mq_rq_timed_out(struct request *req, bool reserved) | 659 | void blk_mq_rq_timed_out(struct request *req, bool reserved) |
641 | { | 660 | { |
642 | struct blk_mq_ops *ops = req->q->mq_ops; | 661 | const struct blk_mq_ops *ops = req->q->mq_ops; |
643 | enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; | 662 | enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; |
644 | 663 | ||
645 | /* | 664 | /* |
@@ -763,6 +782,12 @@ static bool blk_mq_attempt_merge(struct request_queue *q, | |||
763 | continue; | 782 | continue; |
764 | 783 | ||
765 | el_ret = blk_try_merge(rq, bio); | 784 | el_ret = blk_try_merge(rq, bio); |
785 | if (el_ret == ELEVATOR_NO_MERGE) | ||
786 | continue; | ||
787 | |||
788 | if (!blk_mq_sched_allow_merge(q, rq, bio)) | ||
789 | break; | ||
790 | |||
766 | if (el_ret == ELEVATOR_BACK_MERGE) { | 791 | if (el_ret == ELEVATOR_BACK_MERGE) { |
767 | if (bio_attempt_back_merge(q, rq, bio)) { | 792 | if (bio_attempt_back_merge(q, rq, bio)) { |
768 | ctx->rq_merged++; | 793 | ctx->rq_merged++; |
@@ -803,7 +828,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data) | |||
803 | * Process software queues that have been marked busy, splicing them | 828 | * Process software queues that have been marked busy, splicing them |
804 | * to the for-dispatch | 829 | * to the for-dispatch |
805 | */ | 830 | */ |
806 | static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) | 831 | void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) |
807 | { | 832 | { |
808 | struct flush_busy_ctx_data data = { | 833 | struct flush_busy_ctx_data data = { |
809 | .hctx = hctx, | 834 | .hctx = hctx, |
@@ -812,6 +837,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) | |||
812 | 837 | ||
813 | sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); | 838 | sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); |
814 | } | 839 | } |
840 | EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs); | ||
815 | 841 | ||
816 | static inline unsigned int queued_to_index(unsigned int queued) | 842 | static inline unsigned int queued_to_index(unsigned int queued) |
817 | { | 843 | { |
@@ -821,6 +847,74 @@ static inline unsigned int queued_to_index(unsigned int queued) | |||
821 | return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); | 847 | return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); |
822 | } | 848 | } |
823 | 849 | ||
850 | bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, | ||
851 | bool wait) | ||
852 | { | ||
853 | struct blk_mq_alloc_data data = { | ||
854 | .q = rq->q, | ||
855 | .hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), | ||
856 | .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, | ||
857 | }; | ||
858 | |||
859 | if (rq->tag != -1) { | ||
860 | done: | ||
861 | if (hctx) | ||
862 | *hctx = data.hctx; | ||
863 | return true; | ||
864 | } | ||
865 | |||
866 | rq->tag = blk_mq_get_tag(&data); | ||
867 | if (rq->tag >= 0) { | ||
868 | if (blk_mq_tag_busy(data.hctx)) { | ||
869 | rq->rq_flags |= RQF_MQ_INFLIGHT; | ||
870 | atomic_inc(&data.hctx->nr_active); | ||
871 | } | ||
872 | data.hctx->tags->rqs[rq->tag] = rq; | ||
873 | goto done; | ||
874 | } | ||
875 | |||
876 | return false; | ||
877 | } | ||
878 | |||
879 | static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, | ||
880 | struct request *rq) | ||
881 | { | ||
882 | if (rq->tag == -1 || rq->internal_tag == -1) | ||
883 | return; | ||
884 | |||
885 | blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag); | ||
886 | rq->tag = -1; | ||
887 | |||
888 | if (rq->rq_flags & RQF_MQ_INFLIGHT) { | ||
889 | rq->rq_flags &= ~RQF_MQ_INFLIGHT; | ||
890 | atomic_dec(&hctx->nr_active); | ||
891 | } | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * If we fail getting a driver tag because all the driver tags are already | ||
896 | * assigned and on the dispatch list, BUT the first entry does not have a | ||
897 | * tag, then we could deadlock. For that case, move entries with assigned | ||
898 | * driver tags to the front, leaving the set of tagged requests in the | ||
899 | * same order, and the untagged set in the same order. | ||
900 | */ | ||
901 | static bool reorder_tags_to_front(struct list_head *list) | ||
902 | { | ||
903 | struct request *rq, *tmp, *first = NULL; | ||
904 | |||
905 | list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) { | ||
906 | if (rq == first) | ||
907 | break; | ||
908 | if (rq->tag != -1) { | ||
909 | list_move(&rq->queuelist, list); | ||
910 | if (!first) | ||
911 | first = rq; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | return first != NULL; | ||
916 | } | ||
917 | |||
824 | bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) | 918 | bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) |
825 | { | 919 | { |
826 | struct request_queue *q = hctx->queue; | 920 | struct request_queue *q = hctx->queue; |
@@ -843,6 +937,20 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) | |||
843 | struct blk_mq_queue_data bd; | 937 | struct blk_mq_queue_data bd; |
844 | 938 | ||
845 | rq = list_first_entry(list, struct request, queuelist); | 939 | rq = list_first_entry(list, struct request, queuelist); |
940 | if (!blk_mq_get_driver_tag(rq, &hctx, false)) { | ||
941 | if (!queued && reorder_tags_to_front(list)) | ||
942 | continue; | ||
943 | |||
944 | /* | ||
945 | * We failed getting a driver tag. Mark the queue(s) | ||
946 | * as needing a restart. Retry getting a tag again, | ||
947 | * in case the needed IO completed right before we | ||
948 | * marked the queue as needing a restart. | ||
949 | */ | ||
950 | blk_mq_sched_mark_restart(hctx); | ||
951 | if (!blk_mq_get_driver_tag(rq, &hctx, false)) | ||
952 | break; | ||
953 | } | ||
846 | list_del_init(&rq->queuelist); | 954 | list_del_init(&rq->queuelist); |
847 | 955 | ||
848 | bd.rq = rq; | 956 | bd.rq = rq; |
@@ -855,6 +963,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) | |||
855 | queued++; | 963 | queued++; |
856 | break; | 964 | break; |
857 | case BLK_MQ_RQ_QUEUE_BUSY: | 965 | case BLK_MQ_RQ_QUEUE_BUSY: |
966 | blk_mq_put_driver_tag(hctx, rq); | ||
858 | list_add(&rq->queuelist, list); | 967 | list_add(&rq->queuelist, list); |
859 | __blk_mq_requeue_request(rq); | 968 | __blk_mq_requeue_request(rq); |
860 | break; | 969 | break; |
@@ -885,7 +994,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) | |||
885 | */ | 994 | */ |
886 | if (!list_empty(list)) { | 995 | if (!list_empty(list)) { |
887 | spin_lock(&hctx->lock); | 996 | spin_lock(&hctx->lock); |
888 | list_splice(list, &hctx->dispatch); | 997 | list_splice_init(list, &hctx->dispatch); |
889 | spin_unlock(&hctx->lock); | 998 | spin_unlock(&hctx->lock); |
890 | 999 | ||
891 | /* | 1000 | /* |
@@ -896,47 +1005,17 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) | |||
896 | * the requests in rq_list might get lost. | 1005 | * the requests in rq_list might get lost. |
897 | * | 1006 | * |
898 | * blk_mq_run_hw_queue() already checks the STOPPED bit | 1007 | * blk_mq_run_hw_queue() already checks the STOPPED bit |
899 | **/ | 1008 | * |
900 | blk_mq_run_hw_queue(hctx, true); | 1009 | * If RESTART is set, then let completion restart the queue |
1010 | * instead of potentially looping here. | ||
1011 | */ | ||
1012 | if (!blk_mq_sched_needs_restart(hctx)) | ||
1013 | blk_mq_run_hw_queue(hctx, true); | ||
901 | } | 1014 | } |
902 | 1015 | ||
903 | return ret != BLK_MQ_RQ_QUEUE_BUSY; | 1016 | return ret != BLK_MQ_RQ_QUEUE_BUSY; |
904 | } | 1017 | } |
905 | 1018 | ||
906 | /* | ||
907 | * Run this hardware queue, pulling any software queues mapped to it in. | ||
908 | * Note that this function currently has various problems around ordering | ||
909 | * of IO. In particular, we'd like FIFO behaviour on handling existing | ||
910 | * items on the hctx->dispatch list. Ignore that for now. | ||
911 | */ | ||
912 | static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx) | ||
913 | { | ||
914 | LIST_HEAD(rq_list); | ||
915 | |||
916 | if (unlikely(blk_mq_hctx_stopped(hctx))) | ||
917 | return; | ||
918 | |||
919 | hctx->run++; | ||
920 | |||
921 | /* | ||
922 | * Touch any software queue that has pending entries. | ||
923 | */ | ||
924 | flush_busy_ctxs(hctx, &rq_list); | ||
925 | |||
926 | /* | ||
927 | * If we have previous entries on our dispatch list, grab them | ||
928 | * and stuff them at the front for more fair dispatch. | ||
929 | */ | ||
930 | if (!list_empty_careful(&hctx->dispatch)) { | ||
931 | spin_lock(&hctx->lock); | ||
932 | if (!list_empty(&hctx->dispatch)) | ||
933 | list_splice_init(&hctx->dispatch, &rq_list); | ||
934 | spin_unlock(&hctx->lock); | ||
935 | } | ||
936 | |||
937 | blk_mq_dispatch_rq_list(hctx, &rq_list); | ||
938 | } | ||
939 | |||
940 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | 1019 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) |
941 | { | 1020 | { |
942 | int srcu_idx; | 1021 | int srcu_idx; |
@@ -946,11 +1025,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
946 | 1025 | ||
947 | if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { | 1026 | if (!(hctx->flags & BLK_MQ_F_BLOCKING)) { |
948 | rcu_read_lock(); | 1027 | rcu_read_lock(); |
949 | blk_mq_process_rq_list(hctx); | 1028 | blk_mq_sched_dispatch_requests(hctx); |
950 | rcu_read_unlock(); | 1029 | rcu_read_unlock(); |
951 | } else { | 1030 | } else { |
952 | srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); | 1031 | srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu); |
953 | blk_mq_process_rq_list(hctx); | 1032 | blk_mq_sched_dispatch_requests(hctx); |
954 | srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); | 1033 | srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx); |
955 | } | 1034 | } |
956 | } | 1035 | } |
@@ -1006,8 +1085,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async) | |||
1006 | int i; | 1085 | int i; |
1007 | 1086 | ||
1008 | queue_for_each_hw_ctx(q, hctx, i) { | 1087 | queue_for_each_hw_ctx(q, hctx, i) { |
1009 | if ((!blk_mq_hctx_has_pending(hctx) && | 1088 | if (!blk_mq_hctx_has_pending(hctx) || |
1010 | list_empty_careful(&hctx->dispatch)) || | ||
1011 | blk_mq_hctx_stopped(hctx)) | 1089 | blk_mq_hctx_stopped(hctx)) |
1012 | continue; | 1090 | continue; |
1013 | 1091 | ||
@@ -1116,6 +1194,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) | |||
1116 | if (unlikely(!blk_mq_hw_queue_mapped(hctx))) | 1194 | if (unlikely(!blk_mq_hw_queue_mapped(hctx))) |
1117 | return; | 1195 | return; |
1118 | 1196 | ||
1197 | blk_mq_stop_hw_queue(hctx); | ||
1119 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), | 1198 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), |
1120 | &hctx->delay_work, msecs_to_jiffies(msecs)); | 1199 | &hctx->delay_work, msecs_to_jiffies(msecs)); |
1121 | } | 1200 | } |
@@ -1135,8 +1214,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, | |||
1135 | list_add_tail(&rq->queuelist, &ctx->rq_list); | 1214 | list_add_tail(&rq->queuelist, &ctx->rq_list); |
1136 | } | 1215 | } |
1137 | 1216 | ||
1138 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | 1217 | void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, |
1139 | struct request *rq, bool at_head) | 1218 | bool at_head) |
1140 | { | 1219 | { |
1141 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 1220 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
1142 | 1221 | ||
@@ -1144,32 +1223,10 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | |||
1144 | blk_mq_hctx_mark_pending(hctx, ctx); | 1223 | blk_mq_hctx_mark_pending(hctx, ctx); |
1145 | } | 1224 | } |
1146 | 1225 | ||
1147 | void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, | 1226 | void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, |
1148 | bool async) | 1227 | struct list_head *list) |
1149 | { | ||
1150 | struct blk_mq_ctx *ctx = rq->mq_ctx; | ||
1151 | struct request_queue *q = rq->q; | ||
1152 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); | ||
1153 | |||
1154 | spin_lock(&ctx->lock); | ||
1155 | __blk_mq_insert_request(hctx, rq, at_head); | ||
1156 | spin_unlock(&ctx->lock); | ||
1157 | |||
1158 | if (run_queue) | ||
1159 | blk_mq_run_hw_queue(hctx, async); | ||
1160 | } | ||
1161 | |||
1162 | static void blk_mq_insert_requests(struct request_queue *q, | ||
1163 | struct blk_mq_ctx *ctx, | ||
1164 | struct list_head *list, | ||
1165 | int depth, | ||
1166 | bool from_schedule) | ||
1167 | 1228 | ||
1168 | { | 1229 | { |
1169 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); | ||
1170 | |||
1171 | trace_block_unplug(q, depth, !from_schedule); | ||
1172 | |||
1173 | /* | 1230 | /* |
1174 | * preemption doesn't flush plug list, so it's possible ctx->cpu is | 1231 | * preemption doesn't flush plug list, so it's possible ctx->cpu is |
1175 | * offline now | 1232 | * offline now |
@@ -1185,8 +1242,6 @@ static void blk_mq_insert_requests(struct request_queue *q, | |||
1185 | } | 1242 | } |
1186 | blk_mq_hctx_mark_pending(hctx, ctx); | 1243 | blk_mq_hctx_mark_pending(hctx, ctx); |
1187 | spin_unlock(&ctx->lock); | 1244 | spin_unlock(&ctx->lock); |
1188 | |||
1189 | blk_mq_run_hw_queue(hctx, from_schedule); | ||
1190 | } | 1245 | } |
1191 | 1246 | ||
1192 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) | 1247 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) |
@@ -1222,9 +1277,10 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
1222 | BUG_ON(!rq->q); | 1277 | BUG_ON(!rq->q); |
1223 | if (rq->mq_ctx != this_ctx) { | 1278 | if (rq->mq_ctx != this_ctx) { |
1224 | if (this_ctx) { | 1279 | if (this_ctx) { |
1225 | blk_mq_insert_requests(this_q, this_ctx, | 1280 | trace_block_unplug(this_q, depth, from_schedule); |
1226 | &ctx_list, depth, | 1281 | blk_mq_sched_insert_requests(this_q, this_ctx, |
1227 | from_schedule); | 1282 | &ctx_list, |
1283 | from_schedule); | ||
1228 | } | 1284 | } |
1229 | 1285 | ||
1230 | this_ctx = rq->mq_ctx; | 1286 | this_ctx = rq->mq_ctx; |
@@ -1241,8 +1297,9 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
1241 | * on 'ctx_list'. Do those. | 1297 | * on 'ctx_list'. Do those. |
1242 | */ | 1298 | */ |
1243 | if (this_ctx) { | 1299 | if (this_ctx) { |
1244 | blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth, | 1300 | trace_block_unplug(this_q, depth, from_schedule); |
1245 | from_schedule); | 1301 | blk_mq_sched_insert_requests(this_q, this_ctx, &ctx_list, |
1302 | from_schedule); | ||
1246 | } | 1303 | } |
1247 | } | 1304 | } |
1248 | 1305 | ||
@@ -1280,46 +1337,39 @@ insert_rq: | |||
1280 | } | 1337 | } |
1281 | 1338 | ||
1282 | spin_unlock(&ctx->lock); | 1339 | spin_unlock(&ctx->lock); |
1283 | __blk_mq_free_request(hctx, ctx, rq); | 1340 | __blk_mq_finish_request(hctx, ctx, rq); |
1284 | return true; | 1341 | return true; |
1285 | } | 1342 | } |
1286 | } | 1343 | } |
1287 | 1344 | ||
1288 | static struct request *blk_mq_map_request(struct request_queue *q, | 1345 | static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) |
1289 | struct bio *bio, | ||
1290 | struct blk_mq_alloc_data *data) | ||
1291 | { | 1346 | { |
1292 | struct blk_mq_hw_ctx *hctx; | 1347 | if (rq->tag != -1) |
1293 | struct blk_mq_ctx *ctx; | 1348 | return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false); |
1294 | struct request *rq; | ||
1295 | |||
1296 | blk_queue_enter_live(q); | ||
1297 | ctx = blk_mq_get_ctx(q); | ||
1298 | hctx = blk_mq_map_queue(q, ctx->cpu); | ||
1299 | 1349 | ||
1300 | trace_block_getrq(q, bio, bio->bi_opf); | 1350 | return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); |
1301 | blk_mq_set_alloc_data(data, q, 0, ctx, hctx); | ||
1302 | rq = __blk_mq_alloc_request(data, bio->bi_opf); | ||
1303 | |||
1304 | data->hctx->queued++; | ||
1305 | return rq; | ||
1306 | } | 1351 | } |
1307 | 1352 | ||
1308 | static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) | 1353 | static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) |
1309 | { | 1354 | { |
1310 | int ret; | ||
1311 | struct request_queue *q = rq->q; | 1355 | struct request_queue *q = rq->q; |
1312 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu); | ||
1313 | struct blk_mq_queue_data bd = { | 1356 | struct blk_mq_queue_data bd = { |
1314 | .rq = rq, | 1357 | .rq = rq, |
1315 | .list = NULL, | 1358 | .list = NULL, |
1316 | .last = 1 | 1359 | .last = 1 |
1317 | }; | 1360 | }; |
1318 | blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); | 1361 | struct blk_mq_hw_ctx *hctx; |
1362 | blk_qc_t new_cookie; | ||
1363 | int ret; | ||
1319 | 1364 | ||
1320 | if (blk_mq_hctx_stopped(hctx)) | 1365 | if (q->elevator) |
1321 | goto insert; | 1366 | goto insert; |
1322 | 1367 | ||
1368 | if (!blk_mq_get_driver_tag(rq, &hctx, false)) | ||
1369 | goto insert; | ||
1370 | |||
1371 | new_cookie = request_to_qc_t(hctx, rq); | ||
1372 | |||
1323 | /* | 1373 | /* |
1324 | * For OK queue, we are done. For error, kill it. Any other | 1374 | * For OK queue, we are done. For error, kill it. Any other |
1325 | * error (busy), just add it to our list as we previously | 1375 | * error (busy), just add it to our list as we previously |
@@ -1341,7 +1391,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) | |||
1341 | } | 1391 | } |
1342 | 1392 | ||
1343 | insert: | 1393 | insert: |
1344 | blk_mq_insert_request(rq, false, true, true); | 1394 | blk_mq_sched_insert_request(rq, false, true, true, false); |
1345 | } | 1395 | } |
1346 | 1396 | ||
1347 | /* | 1397 | /* |
@@ -1352,8 +1402,8 @@ insert: | |||
1352 | static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | 1402 | static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) |
1353 | { | 1403 | { |
1354 | const int is_sync = op_is_sync(bio->bi_opf); | 1404 | const int is_sync = op_is_sync(bio->bi_opf); |
1355 | const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); | 1405 | const int is_flush_fua = op_is_flush(bio->bi_opf); |
1356 | struct blk_mq_alloc_data data; | 1406 | struct blk_mq_alloc_data data = { .flags = 0 }; |
1357 | struct request *rq; | 1407 | struct request *rq; |
1358 | unsigned int request_count = 0, srcu_idx; | 1408 | unsigned int request_count = 0, srcu_idx; |
1359 | struct blk_plug *plug; | 1409 | struct blk_plug *plug; |
@@ -1374,9 +1424,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1374 | blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) | 1424 | blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) |
1375 | return BLK_QC_T_NONE; | 1425 | return BLK_QC_T_NONE; |
1376 | 1426 | ||
1427 | if (blk_mq_sched_bio_merge(q, bio)) | ||
1428 | return BLK_QC_T_NONE; | ||
1429 | |||
1377 | wb_acct = wbt_wait(q->rq_wb, bio, NULL); | 1430 | wb_acct = wbt_wait(q->rq_wb, bio, NULL); |
1378 | 1431 | ||
1379 | rq = blk_mq_map_request(q, bio, &data); | 1432 | trace_block_getrq(q, bio, bio->bi_opf); |
1433 | |||
1434 | rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data); | ||
1380 | if (unlikely(!rq)) { | 1435 | if (unlikely(!rq)) { |
1381 | __wbt_done(q->rq_wb, wb_acct); | 1436 | __wbt_done(q->rq_wb, wb_acct); |
1382 | return BLK_QC_T_NONE; | 1437 | return BLK_QC_T_NONE; |
@@ -1384,12 +1439,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1384 | 1439 | ||
1385 | wbt_track(&rq->issue_stat, wb_acct); | 1440 | wbt_track(&rq->issue_stat, wb_acct); |
1386 | 1441 | ||
1387 | cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); | 1442 | cookie = request_to_qc_t(data.hctx, rq); |
1388 | 1443 | ||
1389 | if (unlikely(is_flush_fua)) { | 1444 | if (unlikely(is_flush_fua)) { |
1445 | blk_mq_put_ctx(data.ctx); | ||
1390 | blk_mq_bio_to_request(rq, bio); | 1446 | blk_mq_bio_to_request(rq, bio); |
1447 | blk_mq_get_driver_tag(rq, NULL, true); | ||
1391 | blk_insert_flush(rq); | 1448 | blk_insert_flush(rq); |
1392 | goto run_queue; | 1449 | blk_mq_run_hw_queue(data.hctx, true); |
1450 | goto done; | ||
1393 | } | 1451 | } |
1394 | 1452 | ||
1395 | plug = current->plug; | 1453 | plug = current->plug; |
@@ -1438,6 +1496,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1438 | goto done; | 1496 | goto done; |
1439 | } | 1497 | } |
1440 | 1498 | ||
1499 | if (q->elevator) { | ||
1500 | blk_mq_put_ctx(data.ctx); | ||
1501 | blk_mq_bio_to_request(rq, bio); | ||
1502 | blk_mq_sched_insert_request(rq, false, true, | ||
1503 | !is_sync || is_flush_fua, true); | ||
1504 | goto done; | ||
1505 | } | ||
1441 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { | 1506 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { |
1442 | /* | 1507 | /* |
1443 | * For a SYNC request, send it to the hardware immediately. For | 1508 | * For a SYNC request, send it to the hardware immediately. For |
@@ -1445,7 +1510,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1445 | * latter allows for merging opportunities and more efficient | 1510 | * latter allows for merging opportunities and more efficient |
1446 | * dispatching. | 1511 | * dispatching. |
1447 | */ | 1512 | */ |
1448 | run_queue: | ||
1449 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | 1513 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); |
1450 | } | 1514 | } |
1451 | blk_mq_put_ctx(data.ctx); | 1515 | blk_mq_put_ctx(data.ctx); |
@@ -1460,10 +1524,10 @@ done: | |||
1460 | static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) | 1524 | static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) |
1461 | { | 1525 | { |
1462 | const int is_sync = op_is_sync(bio->bi_opf); | 1526 | const int is_sync = op_is_sync(bio->bi_opf); |
1463 | const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); | 1527 | const int is_flush_fua = op_is_flush(bio->bi_opf); |
1464 | struct blk_plug *plug; | 1528 | struct blk_plug *plug; |
1465 | unsigned int request_count = 0; | 1529 | unsigned int request_count = 0; |
1466 | struct blk_mq_alloc_data data; | 1530 | struct blk_mq_alloc_data data = { .flags = 0 }; |
1467 | struct request *rq; | 1531 | struct request *rq; |
1468 | blk_qc_t cookie; | 1532 | blk_qc_t cookie; |
1469 | unsigned int wb_acct; | 1533 | unsigned int wb_acct; |
@@ -1483,9 +1547,14 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1483 | } else | 1547 | } else |
1484 | request_count = blk_plug_queued_count(q); | 1548 | request_count = blk_plug_queued_count(q); |
1485 | 1549 | ||
1550 | if (blk_mq_sched_bio_merge(q, bio)) | ||
1551 | return BLK_QC_T_NONE; | ||
1552 | |||
1486 | wb_acct = wbt_wait(q->rq_wb, bio, NULL); | 1553 | wb_acct = wbt_wait(q->rq_wb, bio, NULL); |
1487 | 1554 | ||
1488 | rq = blk_mq_map_request(q, bio, &data); | 1555 | trace_block_getrq(q, bio, bio->bi_opf); |
1556 | |||
1557 | rq = blk_mq_sched_get_request(q, bio, bio->bi_opf, &data); | ||
1489 | if (unlikely(!rq)) { | 1558 | if (unlikely(!rq)) { |
1490 | __wbt_done(q->rq_wb, wb_acct); | 1559 | __wbt_done(q->rq_wb, wb_acct); |
1491 | return BLK_QC_T_NONE; | 1560 | return BLK_QC_T_NONE; |
@@ -1493,12 +1562,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1493 | 1562 | ||
1494 | wbt_track(&rq->issue_stat, wb_acct); | 1563 | wbt_track(&rq->issue_stat, wb_acct); |
1495 | 1564 | ||
1496 | cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); | 1565 | cookie = request_to_qc_t(data.hctx, rq); |
1497 | 1566 | ||
1498 | if (unlikely(is_flush_fua)) { | 1567 | if (unlikely(is_flush_fua)) { |
1568 | blk_mq_put_ctx(data.ctx); | ||
1499 | blk_mq_bio_to_request(rq, bio); | 1569 | blk_mq_bio_to_request(rq, bio); |
1570 | blk_mq_get_driver_tag(rq, NULL, true); | ||
1500 | blk_insert_flush(rq); | 1571 | blk_insert_flush(rq); |
1501 | goto run_queue; | 1572 | blk_mq_run_hw_queue(data.hctx, true); |
1573 | goto done; | ||
1502 | } | 1574 | } |
1503 | 1575 | ||
1504 | /* | 1576 | /* |
@@ -1535,6 +1607,13 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1535 | return cookie; | 1607 | return cookie; |
1536 | } | 1608 | } |
1537 | 1609 | ||
1610 | if (q->elevator) { | ||
1611 | blk_mq_put_ctx(data.ctx); | ||
1612 | blk_mq_bio_to_request(rq, bio); | ||
1613 | blk_mq_sched_insert_request(rq, false, true, | ||
1614 | !is_sync || is_flush_fua, true); | ||
1615 | goto done; | ||
1616 | } | ||
1538 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { | 1617 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { |
1539 | /* | 1618 | /* |
1540 | * For a SYNC request, send it to the hardware immediately. For | 1619 | * For a SYNC request, send it to the hardware immediately. For |
@@ -1542,16 +1621,16 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) | |||
1542 | * latter allows for merging opportunities and more efficient | 1621 | * latter allows for merging opportunities and more efficient |
1543 | * dispatching. | 1622 | * dispatching. |
1544 | */ | 1623 | */ |
1545 | run_queue: | ||
1546 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | 1624 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); |
1547 | } | 1625 | } |
1548 | 1626 | ||
1549 | blk_mq_put_ctx(data.ctx); | 1627 | blk_mq_put_ctx(data.ctx); |
1628 | done: | ||
1550 | return cookie; | 1629 | return cookie; |
1551 | } | 1630 | } |
1552 | 1631 | ||
1553 | static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, | 1632 | void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, |
1554 | struct blk_mq_tags *tags, unsigned int hctx_idx) | 1633 | unsigned int hctx_idx) |
1555 | { | 1634 | { |
1556 | struct page *page; | 1635 | struct page *page; |
1557 | 1636 | ||
@@ -1559,11 +1638,13 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, | |||
1559 | int i; | 1638 | int i; |
1560 | 1639 | ||
1561 | for (i = 0; i < tags->nr_tags; i++) { | 1640 | for (i = 0; i < tags->nr_tags; i++) { |
1562 | if (!tags->rqs[i]) | 1641 | struct request *rq = tags->static_rqs[i]; |
1642 | |||
1643 | if (!rq) | ||
1563 | continue; | 1644 | continue; |
1564 | set->ops->exit_request(set->driver_data, tags->rqs[i], | 1645 | set->ops->exit_request(set->driver_data, rq, |
1565 | hctx_idx, i); | 1646 | hctx_idx, i); |
1566 | tags->rqs[i] = NULL; | 1647 | tags->static_rqs[i] = NULL; |
1567 | } | 1648 | } |
1568 | } | 1649 | } |
1569 | 1650 | ||
@@ -1577,33 +1658,32 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, | |||
1577 | kmemleak_free(page_address(page)); | 1658 | kmemleak_free(page_address(page)); |
1578 | __free_pages(page, page->private); | 1659 | __free_pages(page, page->private); |
1579 | } | 1660 | } |
1661 | } | ||
1580 | 1662 | ||
1663 | void blk_mq_free_rq_map(struct blk_mq_tags *tags) | ||
1664 | { | ||
1581 | kfree(tags->rqs); | 1665 | kfree(tags->rqs); |
1666 | tags->rqs = NULL; | ||
1667 | kfree(tags->static_rqs); | ||
1668 | tags->static_rqs = NULL; | ||
1582 | 1669 | ||
1583 | blk_mq_free_tags(tags); | 1670 | blk_mq_free_tags(tags); |
1584 | } | 1671 | } |
1585 | 1672 | ||
1586 | static size_t order_to_size(unsigned int order) | 1673 | struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, |
1587 | { | 1674 | unsigned int hctx_idx, |
1588 | return (size_t)PAGE_SIZE << order; | 1675 | unsigned int nr_tags, |
1589 | } | 1676 | unsigned int reserved_tags) |
1590 | |||
1591 | static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | ||
1592 | unsigned int hctx_idx) | ||
1593 | { | 1677 | { |
1594 | struct blk_mq_tags *tags; | 1678 | struct blk_mq_tags *tags; |
1595 | unsigned int i, j, entries_per_page, max_order = 4; | ||
1596 | size_t rq_size, left; | ||
1597 | 1679 | ||
1598 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, | 1680 | tags = blk_mq_init_tags(nr_tags, reserved_tags, |
1599 | set->numa_node, | 1681 | set->numa_node, |
1600 | BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); | 1682 | BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); |
1601 | if (!tags) | 1683 | if (!tags) |
1602 | return NULL; | 1684 | return NULL; |
1603 | 1685 | ||
1604 | INIT_LIST_HEAD(&tags->page_list); | 1686 | tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *), |
1605 | |||
1606 | tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), | ||
1607 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | 1687 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, |
1608 | set->numa_node); | 1688 | set->numa_node); |
1609 | if (!tags->rqs) { | 1689 | if (!tags->rqs) { |
@@ -1611,15 +1691,40 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1611 | return NULL; | 1691 | return NULL; |
1612 | } | 1692 | } |
1613 | 1693 | ||
1694 | tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *), | ||
1695 | GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, | ||
1696 | set->numa_node); | ||
1697 | if (!tags->static_rqs) { | ||
1698 | kfree(tags->rqs); | ||
1699 | blk_mq_free_tags(tags); | ||
1700 | return NULL; | ||
1701 | } | ||
1702 | |||
1703 | return tags; | ||
1704 | } | ||
1705 | |||
1706 | static size_t order_to_size(unsigned int order) | ||
1707 | { | ||
1708 | return (size_t)PAGE_SIZE << order; | ||
1709 | } | ||
1710 | |||
1711 | int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, | ||
1712 | unsigned int hctx_idx, unsigned int depth) | ||
1713 | { | ||
1714 | unsigned int i, j, entries_per_page, max_order = 4; | ||
1715 | size_t rq_size, left; | ||
1716 | |||
1717 | INIT_LIST_HEAD(&tags->page_list); | ||
1718 | |||
1614 | /* | 1719 | /* |
1615 | * rq_size is the size of the request plus driver payload, rounded | 1720 | * rq_size is the size of the request plus driver payload, rounded |
1616 | * to the cacheline size | 1721 | * to the cacheline size |
1617 | */ | 1722 | */ |
1618 | rq_size = round_up(sizeof(struct request) + set->cmd_size, | 1723 | rq_size = round_up(sizeof(struct request) + set->cmd_size, |
1619 | cache_line_size()); | 1724 | cache_line_size()); |
1620 | left = rq_size * set->queue_depth; | 1725 | left = rq_size * depth; |
1621 | 1726 | ||
1622 | for (i = 0; i < set->queue_depth; ) { | 1727 | for (i = 0; i < depth; ) { |
1623 | int this_order = max_order; | 1728 | int this_order = max_order; |
1624 | struct page *page; | 1729 | struct page *page; |
1625 | int to_do; | 1730 | int to_do; |
@@ -1653,15 +1758,17 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1653 | */ | 1758 | */ |
1654 | kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); | 1759 | kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); |
1655 | entries_per_page = order_to_size(this_order) / rq_size; | 1760 | entries_per_page = order_to_size(this_order) / rq_size; |
1656 | to_do = min(entries_per_page, set->queue_depth - i); | 1761 | to_do = min(entries_per_page, depth - i); |
1657 | left -= to_do * rq_size; | 1762 | left -= to_do * rq_size; |
1658 | for (j = 0; j < to_do; j++) { | 1763 | for (j = 0; j < to_do; j++) { |
1659 | tags->rqs[i] = p; | 1764 | struct request *rq = p; |
1765 | |||
1766 | tags->static_rqs[i] = rq; | ||
1660 | if (set->ops->init_request) { | 1767 | if (set->ops->init_request) { |
1661 | if (set->ops->init_request(set->driver_data, | 1768 | if (set->ops->init_request(set->driver_data, |
1662 | tags->rqs[i], hctx_idx, i, | 1769 | rq, hctx_idx, i, |
1663 | set->numa_node)) { | 1770 | set->numa_node)) { |
1664 | tags->rqs[i] = NULL; | 1771 | tags->static_rqs[i] = NULL; |
1665 | goto fail; | 1772 | goto fail; |
1666 | } | 1773 | } |
1667 | } | 1774 | } |
@@ -1670,11 +1777,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, | |||
1670 | i++; | 1777 | i++; |
1671 | } | 1778 | } |
1672 | } | 1779 | } |
1673 | return tags; | 1780 | return 0; |
1674 | 1781 | ||
1675 | fail: | 1782 | fail: |
1676 | blk_mq_free_rq_map(set, tags, hctx_idx); | 1783 | blk_mq_free_rqs(set, tags, hctx_idx); |
1677 | return NULL; | 1784 | return -ENOMEM; |
1678 | } | 1785 | } |
1679 | 1786 | ||
1680 | /* | 1787 | /* |
@@ -1866,6 +1973,35 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1866 | } | 1973 | } |
1867 | } | 1974 | } |
1868 | 1975 | ||
1976 | static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx) | ||
1977 | { | ||
1978 | int ret = 0; | ||
1979 | |||
1980 | set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, | ||
1981 | set->queue_depth, set->reserved_tags); | ||
1982 | if (!set->tags[hctx_idx]) | ||
1983 | return false; | ||
1984 | |||
1985 | ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx, | ||
1986 | set->queue_depth); | ||
1987 | if (!ret) | ||
1988 | return true; | ||
1989 | |||
1990 | blk_mq_free_rq_map(set->tags[hctx_idx]); | ||
1991 | set->tags[hctx_idx] = NULL; | ||
1992 | return false; | ||
1993 | } | ||
1994 | |||
1995 | static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, | ||
1996 | unsigned int hctx_idx) | ||
1997 | { | ||
1998 | if (set->tags[hctx_idx]) { | ||
1999 | blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx); | ||
2000 | blk_mq_free_rq_map(set->tags[hctx_idx]); | ||
2001 | set->tags[hctx_idx] = NULL; | ||
2002 | } | ||
2003 | } | ||
2004 | |||
1869 | static void blk_mq_map_swqueue(struct request_queue *q, | 2005 | static void blk_mq_map_swqueue(struct request_queue *q, |
1870 | const struct cpumask *online_mask) | 2006 | const struct cpumask *online_mask) |
1871 | { | 2007 | { |
@@ -1894,17 +2030,15 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1894 | 2030 | ||
1895 | hctx_idx = q->mq_map[i]; | 2031 | hctx_idx = q->mq_map[i]; |
1896 | /* unmapped hw queue can be remapped after CPU topo changed */ | 2032 | /* unmapped hw queue can be remapped after CPU topo changed */ |
1897 | if (!set->tags[hctx_idx]) { | 2033 | if (!set->tags[hctx_idx] && |
1898 | set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); | 2034 | !__blk_mq_alloc_rq_map(set, hctx_idx)) { |
1899 | |||
1900 | /* | 2035 | /* |
1901 | * If tags initialization fail for some hctx, | 2036 | * If tags initialization fail for some hctx, |
1902 | * that hctx won't be brought online. In this | 2037 | * that hctx won't be brought online. In this |
1903 | * case, remap the current ctx to hctx[0] which | 2038 | * case, remap the current ctx to hctx[0] which |
1904 | * is guaranteed to always have tags allocated | 2039 | * is guaranteed to always have tags allocated |
1905 | */ | 2040 | */ |
1906 | if (!set->tags[hctx_idx]) | 2041 | q->mq_map[i] = 0; |
1907 | q->mq_map[i] = 0; | ||
1908 | } | 2042 | } |
1909 | 2043 | ||
1910 | ctx = per_cpu_ptr(q->queue_ctx, i); | 2044 | ctx = per_cpu_ptr(q->queue_ctx, i); |
@@ -1927,10 +2061,9 @@ static void blk_mq_map_swqueue(struct request_queue *q, | |||
1927 | * fallback in case of a new remap fails | 2061 | * fallback in case of a new remap fails |
1928 | * allocation | 2062 | * allocation |
1929 | */ | 2063 | */ |
1930 | if (i && set->tags[i]) { | 2064 | if (i && set->tags[i]) |
1931 | blk_mq_free_rq_map(set, set->tags[i], i); | 2065 | blk_mq_free_map_and_requests(set, i); |
1932 | set->tags[i] = NULL; | 2066 | |
1933 | } | ||
1934 | hctx->tags = NULL; | 2067 | hctx->tags = NULL; |
1935 | continue; | 2068 | continue; |
1936 | } | 2069 | } |
@@ -2023,6 +2156,8 @@ void blk_mq_release(struct request_queue *q) | |||
2023 | struct blk_mq_hw_ctx *hctx; | 2156 | struct blk_mq_hw_ctx *hctx; |
2024 | unsigned int i; | 2157 | unsigned int i; |
2025 | 2158 | ||
2159 | blk_mq_sched_teardown(q); | ||
2160 | |||
2026 | /* hctx kobj stays in hctx */ | 2161 | /* hctx kobj stays in hctx */ |
2027 | queue_for_each_hw_ctx(q, hctx, i) { | 2162 | queue_for_each_hw_ctx(q, hctx, i) { |
2028 | if (!hctx) | 2163 | if (!hctx) |
@@ -2097,10 +2232,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, | |||
2097 | struct blk_mq_hw_ctx *hctx = hctxs[j]; | 2232 | struct blk_mq_hw_ctx *hctx = hctxs[j]; |
2098 | 2233 | ||
2099 | if (hctx) { | 2234 | if (hctx) { |
2100 | if (hctx->tags) { | 2235 | if (hctx->tags) |
2101 | blk_mq_free_rq_map(set, hctx->tags, j); | 2236 | blk_mq_free_map_and_requests(set, j); |
2102 | set->tags[j] = NULL; | ||
2103 | } | ||
2104 | blk_mq_exit_hctx(q, set, hctx, j); | 2237 | blk_mq_exit_hctx(q, set, hctx, j); |
2105 | free_cpumask_var(hctx->cpumask); | 2238 | free_cpumask_var(hctx->cpumask); |
2106 | kobject_put(&hctx->kobj); | 2239 | kobject_put(&hctx->kobj); |
@@ -2181,6 +2314,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | |||
2181 | mutex_unlock(&all_q_mutex); | 2314 | mutex_unlock(&all_q_mutex); |
2182 | put_online_cpus(); | 2315 | put_online_cpus(); |
2183 | 2316 | ||
2317 | if (!(set->flags & BLK_MQ_F_NO_SCHED)) { | ||
2318 | int ret; | ||
2319 | |||
2320 | ret = blk_mq_sched_init(q); | ||
2321 | if (ret) | ||
2322 | return ERR_PTR(ret); | ||
2323 | } | ||
2324 | |||
2184 | return q; | 2325 | return q; |
2185 | 2326 | ||
2186 | err_hctxs: | 2327 | err_hctxs: |
@@ -2279,10 +2420,10 @@ static int blk_mq_queue_reinit_dead(unsigned int cpu) | |||
2279 | * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list | 2420 | * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list |
2280 | * and set bit0 in pending bitmap as ctx1->index_hw is still zero. | 2421 | * and set bit0 in pending bitmap as ctx1->index_hw is still zero. |
2281 | * | 2422 | * |
2282 | * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in | 2423 | * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set |
2283 | * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. | 2424 | * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. |
2284 | * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list | 2425 | * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is |
2285 | * is ignored. | 2426 | * ignored. |
2286 | */ | 2427 | */ |
2287 | static int blk_mq_queue_reinit_prepare(unsigned int cpu) | 2428 | static int blk_mq_queue_reinit_prepare(unsigned int cpu) |
2288 | { | 2429 | { |
@@ -2296,17 +2437,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) | |||
2296 | { | 2437 | { |
2297 | int i; | 2438 | int i; |
2298 | 2439 | ||
2299 | for (i = 0; i < set->nr_hw_queues; i++) { | 2440 | for (i = 0; i < set->nr_hw_queues; i++) |
2300 | set->tags[i] = blk_mq_init_rq_map(set, i); | 2441 | if (!__blk_mq_alloc_rq_map(set, i)) |
2301 | if (!set->tags[i]) | ||
2302 | goto out_unwind; | 2442 | goto out_unwind; |
2303 | } | ||
2304 | 2443 | ||
2305 | return 0; | 2444 | return 0; |
2306 | 2445 | ||
2307 | out_unwind: | 2446 | out_unwind: |
2308 | while (--i >= 0) | 2447 | while (--i >= 0) |
2309 | blk_mq_free_rq_map(set, set->tags[i], i); | 2448 | blk_mq_free_rq_map(set->tags[i]); |
2310 | 2449 | ||
2311 | return -ENOMEM; | 2450 | return -ENOMEM; |
2312 | } | 2451 | } |
@@ -2430,10 +2569,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) | |||
2430 | { | 2569 | { |
2431 | int i; | 2570 | int i; |
2432 | 2571 | ||
2433 | for (i = 0; i < nr_cpu_ids; i++) { | 2572 | for (i = 0; i < nr_cpu_ids; i++) |
2434 | if (set->tags[i]) | 2573 | blk_mq_free_map_and_requests(set, i); |
2435 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
2436 | } | ||
2437 | 2574 | ||
2438 | kfree(set->mq_map); | 2575 | kfree(set->mq_map); |
2439 | set->mq_map = NULL; | 2576 | set->mq_map = NULL; |
@@ -2449,14 +2586,28 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) | |||
2449 | struct blk_mq_hw_ctx *hctx; | 2586 | struct blk_mq_hw_ctx *hctx; |
2450 | int i, ret; | 2587 | int i, ret; |
2451 | 2588 | ||
2452 | if (!set || nr > set->queue_depth) | 2589 | if (!set) |
2453 | return -EINVAL; | 2590 | return -EINVAL; |
2454 | 2591 | ||
2592 | blk_mq_freeze_queue(q); | ||
2593 | blk_mq_quiesce_queue(q); | ||
2594 | |||
2455 | ret = 0; | 2595 | ret = 0; |
2456 | queue_for_each_hw_ctx(q, hctx, i) { | 2596 | queue_for_each_hw_ctx(q, hctx, i) { |
2457 | if (!hctx->tags) | 2597 | if (!hctx->tags) |
2458 | continue; | 2598 | continue; |
2459 | ret = blk_mq_tag_update_depth(hctx->tags, nr); | 2599 | /* |
2600 | * If we're using an MQ scheduler, just update the scheduler | ||
2601 | * queue depth. This is similar to what the old code would do. | ||
2602 | */ | ||
2603 | if (!hctx->sched_tags) { | ||
2604 | ret = blk_mq_tag_update_depth(hctx, &hctx->tags, | ||
2605 | min(nr, set->queue_depth), | ||
2606 | false); | ||
2607 | } else { | ||
2608 | ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags, | ||
2609 | nr, true); | ||
2610 | } | ||
2460 | if (ret) | 2611 | if (ret) |
2461 | break; | 2612 | break; |
2462 | } | 2613 | } |
@@ -2464,6 +2615,9 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) | |||
2464 | if (!ret) | 2615 | if (!ret) |
2465 | q->nr_requests = nr; | 2616 | q->nr_requests = nr; |
2466 | 2617 | ||
2618 | blk_mq_unfreeze_queue(q); | ||
2619 | blk_mq_start_stopped_hw_queues(q, true); | ||
2620 | |||
2467 | return ret; | 2621 | return ret; |
2468 | } | 2622 | } |
2469 | 2623 | ||
@@ -2649,7 +2803,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) | |||
2649 | blk_flush_plug_list(plug, false); | 2803 | blk_flush_plug_list(plug, false); |
2650 | 2804 | ||
2651 | hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; | 2805 | hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)]; |
2652 | rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); | 2806 | if (!blk_qc_t_is_internal(cookie)) |
2807 | rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie)); | ||
2808 | else | ||
2809 | rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie)); | ||
2653 | 2810 | ||
2654 | return __blk_mq_poll(hctx, rq); | 2811 | return __blk_mq_poll(hctx, rq); |
2655 | } | 2812 | } |
@@ -2667,6 +2824,8 @@ void blk_mq_enable_hotplug(void) | |||
2667 | 2824 | ||
2668 | static int __init blk_mq_init(void) | 2825 | static int __init blk_mq_init(void) |
2669 | { | 2826 | { |
2827 | blk_mq_debugfs_init(); | ||
2828 | |||
2670 | cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, | 2829 | cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, |
2671 | blk_mq_hctx_notify_dead); | 2830 | blk_mq_hctx_notify_dead); |
2672 | 2831 | ||
diff --git a/block/blk-mq.h b/block/blk-mq.h index 63e9116cddbd..b52abd62b1b0 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -32,8 +32,32 @@ void blk_mq_free_queue(struct request_queue *q); | |||
32 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); | 32 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); |
33 | void blk_mq_wake_waiters(struct request_queue *q); | 33 | void blk_mq_wake_waiters(struct request_queue *q); |
34 | bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); | 34 | bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); |
35 | void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); | ||
36 | bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); | ||
37 | bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, | ||
38 | bool wait); | ||
35 | 39 | ||
36 | /* | 40 | /* |
41 | * Internal helpers for allocating/freeing the request map | ||
42 | */ | ||
43 | void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, | ||
44 | unsigned int hctx_idx); | ||
45 | void blk_mq_free_rq_map(struct blk_mq_tags *tags); | ||
46 | struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, | ||
47 | unsigned int hctx_idx, | ||
48 | unsigned int nr_tags, | ||
49 | unsigned int reserved_tags); | ||
50 | int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, | ||
51 | unsigned int hctx_idx, unsigned int depth); | ||
52 | |||
53 | /* | ||
54 | * Internal helpers for request insertion into sw queues | ||
55 | */ | ||
56 | void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, | ||
57 | bool at_head); | ||
58 | void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, | ||
59 | struct list_head *list); | ||
60 | /* | ||
37 | * CPU hotplug helpers | 61 | * CPU hotplug helpers |
38 | */ | 62 | */ |
39 | void blk_mq_enable_hotplug(void); | 63 | void blk_mq_enable_hotplug(void); |
@@ -57,6 +81,40 @@ extern int blk_mq_sysfs_register(struct request_queue *q); | |||
57 | extern void blk_mq_sysfs_unregister(struct request_queue *q); | 81 | extern void blk_mq_sysfs_unregister(struct request_queue *q); |
58 | extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); | 82 | extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); |
59 | 83 | ||
84 | /* | ||
85 | * debugfs helpers | ||
86 | */ | ||
87 | #ifdef CONFIG_BLK_DEBUG_FS | ||
88 | void blk_mq_debugfs_init(void); | ||
89 | int blk_mq_debugfs_register(struct request_queue *q, const char *name); | ||
90 | void blk_mq_debugfs_unregister(struct request_queue *q); | ||
91 | int blk_mq_debugfs_register_hctxs(struct request_queue *q); | ||
92 | void blk_mq_debugfs_unregister_hctxs(struct request_queue *q); | ||
93 | #else | ||
94 | static inline void blk_mq_debugfs_init(void) | ||
95 | { | ||
96 | } | ||
97 | |||
98 | static inline int blk_mq_debugfs_register(struct request_queue *q, | ||
99 | const char *name) | ||
100 | { | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | static inline void blk_mq_debugfs_unregister(struct request_queue *q) | ||
105 | { | ||
106 | } | ||
107 | |||
108 | static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q) | ||
109 | { | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) | ||
114 | { | ||
115 | } | ||
116 | #endif | ||
117 | |||
60 | extern void blk_mq_rq_timed_out(struct request *req, bool reserved); | 118 | extern void blk_mq_rq_timed_out(struct request *req, bool reserved); |
61 | 119 | ||
62 | void blk_mq_release(struct request_queue *q); | 120 | void blk_mq_release(struct request_queue *q); |
@@ -103,6 +161,25 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, | |||
103 | data->hctx = hctx; | 161 | data->hctx = hctx; |
104 | } | 162 | } |
105 | 163 | ||
164 | static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) | ||
165 | { | ||
166 | if (data->flags & BLK_MQ_REQ_INTERNAL) | ||
167 | return data->hctx->sched_tags; | ||
168 | |||
169 | return data->hctx->tags; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Internal helpers for request allocation/init/free | ||
174 | */ | ||
175 | void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | ||
176 | struct request *rq, unsigned int op); | ||
177 | void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, | ||
178 | struct request *rq); | ||
179 | void blk_mq_finish_request(struct request *rq); | ||
180 | struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, | ||
181 | unsigned int op); | ||
182 | |||
106 | static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) | 183 | static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) |
107 | { | 184 | { |
108 | return test_bit(BLK_MQ_S_STOPPED, &hctx->state); | 185 | return test_bit(BLK_MQ_S_STOPPED, &hctx->state); |
diff --git a/block/blk-tag.c b/block/blk-tag.c index bae1decb6ec3..07cc329fa4b0 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) | |||
272 | list_del_init(&rq->queuelist); | 272 | list_del_init(&rq->queuelist); |
273 | rq->rq_flags &= ~RQF_QUEUED; | 273 | rq->rq_flags &= ~RQF_QUEUED; |
274 | rq->tag = -1; | 274 | rq->tag = -1; |
275 | rq->internal_tag = -1; | ||
275 | 276 | ||
276 | if (unlikely(bqt->tag_index[tag] == NULL)) | 277 | if (unlikely(bqt->tag_index[tag] == NULL)) |
277 | printk(KERN_ERR "%s: tag %d is missing\n", | 278 | printk(KERN_ERR "%s: tag %d is missing\n", |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a6bb4fe326c3..82fd0cc394eb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -866,10 +866,12 @@ static void tg_update_disptime(struct throtl_grp *tg) | |||
866 | unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; | 866 | unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; |
867 | struct bio *bio; | 867 | struct bio *bio; |
868 | 868 | ||
869 | if ((bio = throtl_peek_queued(&sq->queued[READ]))) | 869 | bio = throtl_peek_queued(&sq->queued[READ]); |
870 | if (bio) | ||
870 | tg_may_dispatch(tg, bio, &read_wait); | 871 | tg_may_dispatch(tg, bio, &read_wait); |
871 | 872 | ||
872 | if ((bio = throtl_peek_queued(&sq->queued[WRITE]))) | 873 | bio = throtl_peek_queued(&sq->queued[WRITE]); |
874 | if (bio) | ||
873 | tg_may_dispatch(tg, bio, &write_wait); | 875 | tg_may_dispatch(tg, bio, &write_wait); |
874 | 876 | ||
875 | min_wait = min(read_wait, write_wait); | 877 | min_wait = min(read_wait, write_wait); |
diff --git a/block/blk.h b/block/blk.h index 041185e5f129..9a716b5925a4 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) | |||
167 | return NULL; | 167 | return NULL; |
168 | } | 168 | } |
169 | if (unlikely(blk_queue_bypass(q)) || | 169 | if (unlikely(blk_queue_bypass(q)) || |
170 | !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) | 170 | !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0)) |
171 | return NULL; | 171 | return NULL; |
172 | } | 172 | } |
173 | } | 173 | } |
@@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq) | |||
176 | { | 176 | { |
177 | struct elevator_queue *e = q->elevator; | 177 | struct elevator_queue *e = q->elevator; |
178 | 178 | ||
179 | if (e->type->ops.elevator_activate_req_fn) | 179 | if (e->type->ops.sq.elevator_activate_req_fn) |
180 | e->type->ops.elevator_activate_req_fn(q, rq); | 180 | e->type->ops.sq.elevator_activate_req_fn(q, rq); |
181 | } | 181 | } |
182 | 182 | ||
183 | static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) | 183 | static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) |
184 | { | 184 | { |
185 | struct elevator_queue *e = q->elevator; | 185 | struct elevator_queue *e = q->elevator; |
186 | 186 | ||
187 | if (e->type->ops.elevator_deactivate_req_fn) | 187 | if (e->type->ops.sq.elevator_deactivate_req_fn) |
188 | e->type->ops.elevator_deactivate_req_fn(q, rq); | 188 | e->type->ops.sq.elevator_deactivate_req_fn(q, rq); |
189 | } | 189 | } |
190 | 190 | ||
191 | #ifdef CONFIG_FAIL_IO_TIMEOUT | 191 | #ifdef CONFIG_FAIL_IO_TIMEOUT |
@@ -264,6 +264,22 @@ void ioc_clear_queue(struct request_queue *q); | |||
264 | int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); | 264 | int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); |
265 | 265 | ||
266 | /** | 266 | /** |
267 | * rq_ioc - determine io_context for request allocation | ||
268 | * @bio: request being allocated is for this bio (can be %NULL) | ||
269 | * | ||
270 | * Determine io_context to use for request allocation for @bio. May return | ||
271 | * %NULL if %current->io_context doesn't exist. | ||
272 | */ | ||
273 | static inline struct io_context *rq_ioc(struct bio *bio) | ||
274 | { | ||
275 | #ifdef CONFIG_BLK_CGROUP | ||
276 | if (bio && bio->bi_ioc) | ||
277 | return bio->bi_ioc; | ||
278 | #endif | ||
279 | return current->io_context; | ||
280 | } | ||
281 | |||
282 | /** | ||
267 | * create_io_context - try to create task->io_context | 283 | * create_io_context - try to create task->io_context |
268 | * @gfp_mask: allocation mask | 284 | * @gfp_mask: allocation mask |
269 | * @node: allocation node | 285 | * @node: allocation node |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c73a6fcaeb9d..f0f29ee731e1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -2749,9 +2749,11 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) | |||
2749 | if (!cfqg) | 2749 | if (!cfqg) |
2750 | return NULL; | 2750 | return NULL; |
2751 | 2751 | ||
2752 | for_each_cfqg_st(cfqg, i, j, st) | 2752 | for_each_cfqg_st(cfqg, i, j, st) { |
2753 | if ((cfqq = cfq_rb_first(st)) != NULL) | 2753 | cfqq = cfq_rb_first(st); |
2754 | if (cfqq) | ||
2754 | return cfqq; | 2755 | return cfqq; |
2756 | } | ||
2755 | return NULL; | 2757 | return NULL; |
2756 | } | 2758 | } |
2757 | 2759 | ||
@@ -3864,6 +3866,8 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, | |||
3864 | goto out; | 3866 | goto out; |
3865 | } | 3867 | } |
3866 | 3868 | ||
3869 | /* cfq_init_cfqq() assumes cfqq->ioprio_class is initialized. */ | ||
3870 | cfqq->ioprio_class = IOPRIO_CLASS_NONE; | ||
3867 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); | 3871 | cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync); |
3868 | cfq_init_prio_data(cfqq, cic); | 3872 | cfq_init_prio_data(cfqq, cic); |
3869 | cfq_link_cfqq_cfqg(cfqq, cfqg); | 3873 | cfq_link_cfqq_cfqg(cfqq, cfqg); |
@@ -4837,7 +4841,7 @@ static struct elv_fs_entry cfq_attrs[] = { | |||
4837 | }; | 4841 | }; |
4838 | 4842 | ||
4839 | static struct elevator_type iosched_cfq = { | 4843 | static struct elevator_type iosched_cfq = { |
4840 | .ops = { | 4844 | .ops.sq = { |
4841 | .elevator_merge_fn = cfq_merge, | 4845 | .elevator_merge_fn = cfq_merge, |
4842 | .elevator_merged_fn = cfq_merged_request, | 4846 | .elevator_merged_fn = cfq_merged_request, |
4843 | .elevator_merge_req_fn = cfq_merged_requests, | 4847 | .elevator_merge_req_fn = cfq_merged_requests, |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 55e0bb6d7da7..05fc0ea25a98 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = { | |||
439 | }; | 439 | }; |
440 | 440 | ||
441 | static struct elevator_type iosched_deadline = { | 441 | static struct elevator_type iosched_deadline = { |
442 | .ops = { | 442 | .ops.sq = { |
443 | .elevator_merge_fn = deadline_merge, | 443 | .elevator_merge_fn = deadline_merge, |
444 | .elevator_merged_fn = deadline_merged_request, | 444 | .elevator_merged_fn = deadline_merged_request, |
445 | .elevator_merge_req_fn = deadline_merged_requests, | 445 | .elevator_merge_req_fn = deadline_merged_requests, |
diff --git a/block/elevator.c b/block/elevator.c index 40f0c04e5ad3..b2a55167f0c2 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <trace/events/block.h> | 40 | #include <trace/events/block.h> |
41 | 41 | ||
42 | #include "blk.h" | 42 | #include "blk.h" |
43 | #include "blk-mq-sched.h" | ||
43 | 44 | ||
44 | static DEFINE_SPINLOCK(elv_list_lock); | 45 | static DEFINE_SPINLOCK(elv_list_lock); |
45 | static LIST_HEAD(elv_list); | 46 | static LIST_HEAD(elv_list); |
@@ -58,8 +59,10 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) | |||
58 | struct request_queue *q = rq->q; | 59 | struct request_queue *q = rq->q; |
59 | struct elevator_queue *e = q->elevator; | 60 | struct elevator_queue *e = q->elevator; |
60 | 61 | ||
61 | if (e->type->ops.elevator_allow_bio_merge_fn) | 62 | if (e->uses_mq && e->type->ops.mq.allow_merge) |
62 | return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio); | 63 | return e->type->ops.mq.allow_merge(q, rq, bio); |
64 | else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn) | ||
65 | return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); | ||
63 | 66 | ||
64 | return 1; | 67 | return 1; |
65 | } | 68 | } |
@@ -163,6 +166,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, | |||
163 | kobject_init(&eq->kobj, &elv_ktype); | 166 | kobject_init(&eq->kobj, &elv_ktype); |
164 | mutex_init(&eq->sysfs_lock); | 167 | mutex_init(&eq->sysfs_lock); |
165 | hash_init(eq->hash); | 168 | hash_init(eq->hash); |
169 | eq->uses_mq = e->uses_mq; | ||
166 | 170 | ||
167 | return eq; | 171 | return eq; |
168 | } | 172 | } |
@@ -203,11 +207,12 @@ int elevator_init(struct request_queue *q, char *name) | |||
203 | } | 207 | } |
204 | 208 | ||
205 | /* | 209 | /* |
206 | * Use the default elevator specified by config boot param or | 210 | * Use the default elevator specified by config boot param for |
207 | * config option. Don't try to load modules as we could be running | 211 | * non-mq devices, or by config option. Don't try to load modules |
208 | * off async and request_module() isn't allowed from async. | 212 | * as we could be running off async and request_module() isn't |
213 | * allowed from async. | ||
209 | */ | 214 | */ |
210 | if (!e && *chosen_elevator) { | 215 | if (!e && !q->mq_ops && *chosen_elevator) { |
211 | e = elevator_get(chosen_elevator, false); | 216 | e = elevator_get(chosen_elevator, false); |
212 | if (!e) | 217 | if (!e) |
213 | printk(KERN_ERR "I/O scheduler %s not found\n", | 218 | printk(KERN_ERR "I/O scheduler %s not found\n", |
@@ -215,18 +220,32 @@ int elevator_init(struct request_queue *q, char *name) | |||
215 | } | 220 | } |
216 | 221 | ||
217 | if (!e) { | 222 | if (!e) { |
218 | e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); | 223 | if (q->mq_ops && q->nr_hw_queues == 1) |
224 | e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false); | ||
225 | else if (q->mq_ops) | ||
226 | e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false); | ||
227 | else | ||
228 | e = elevator_get(CONFIG_DEFAULT_IOSCHED, false); | ||
229 | |||
219 | if (!e) { | 230 | if (!e) { |
220 | printk(KERN_ERR | 231 | printk(KERN_ERR |
221 | "Default I/O scheduler not found. " \ | 232 | "Default I/O scheduler not found. " \ |
222 | "Using noop.\n"); | 233 | "Using noop/none.\n"); |
223 | e = elevator_get("noop", false); | 234 | e = elevator_get("noop", false); |
224 | } | 235 | } |
225 | } | 236 | } |
226 | 237 | ||
227 | err = e->ops.elevator_init_fn(q, e); | 238 | if (e->uses_mq) { |
228 | if (err) | 239 | err = blk_mq_sched_setup(q); |
240 | if (!err) | ||
241 | err = e->ops.mq.init_sched(q, e); | ||
242 | } else | ||
243 | err = e->ops.sq.elevator_init_fn(q, e); | ||
244 | if (err) { | ||
245 | if (e->uses_mq) | ||
246 | blk_mq_sched_teardown(q); | ||
229 | elevator_put(e); | 247 | elevator_put(e); |
248 | } | ||
230 | return err; | 249 | return err; |
231 | } | 250 | } |
232 | EXPORT_SYMBOL(elevator_init); | 251 | EXPORT_SYMBOL(elevator_init); |
@@ -234,8 +253,10 @@ EXPORT_SYMBOL(elevator_init); | |||
234 | void elevator_exit(struct elevator_queue *e) | 253 | void elevator_exit(struct elevator_queue *e) |
235 | { | 254 | { |
236 | mutex_lock(&e->sysfs_lock); | 255 | mutex_lock(&e->sysfs_lock); |
237 | if (e->type->ops.elevator_exit_fn) | 256 | if (e->uses_mq && e->type->ops.mq.exit_sched) |
238 | e->type->ops.elevator_exit_fn(e); | 257 | e->type->ops.mq.exit_sched(e); |
258 | else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) | ||
259 | e->type->ops.sq.elevator_exit_fn(e); | ||
239 | mutex_unlock(&e->sysfs_lock); | 260 | mutex_unlock(&e->sysfs_lock); |
240 | 261 | ||
241 | kobject_put(&e->kobj); | 262 | kobject_put(&e->kobj); |
@@ -253,6 +274,7 @@ void elv_rqhash_del(struct request_queue *q, struct request *rq) | |||
253 | if (ELV_ON_HASH(rq)) | 274 | if (ELV_ON_HASH(rq)) |
254 | __elv_rqhash_del(rq); | 275 | __elv_rqhash_del(rq); |
255 | } | 276 | } |
277 | EXPORT_SYMBOL_GPL(elv_rqhash_del); | ||
256 | 278 | ||
257 | void elv_rqhash_add(struct request_queue *q, struct request *rq) | 279 | void elv_rqhash_add(struct request_queue *q, struct request *rq) |
258 | { | 280 | { |
@@ -262,6 +284,7 @@ void elv_rqhash_add(struct request_queue *q, struct request *rq) | |||
262 | hash_add(e->hash, &rq->hash, rq_hash_key(rq)); | 284 | hash_add(e->hash, &rq->hash, rq_hash_key(rq)); |
263 | rq->rq_flags |= RQF_HASHED; | 285 | rq->rq_flags |= RQF_HASHED; |
264 | } | 286 | } |
287 | EXPORT_SYMBOL_GPL(elv_rqhash_add); | ||
265 | 288 | ||
266 | void elv_rqhash_reposition(struct request_queue *q, struct request *rq) | 289 | void elv_rqhash_reposition(struct request_queue *q, struct request *rq) |
267 | { | 290 | { |
@@ -443,8 +466,10 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
443 | return ELEVATOR_BACK_MERGE; | 466 | return ELEVATOR_BACK_MERGE; |
444 | } | 467 | } |
445 | 468 | ||
446 | if (e->type->ops.elevator_merge_fn) | 469 | if (e->uses_mq && e->type->ops.mq.request_merge) |
447 | return e->type->ops.elevator_merge_fn(q, req, bio); | 470 | return e->type->ops.mq.request_merge(q, req, bio); |
471 | else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn) | ||
472 | return e->type->ops.sq.elevator_merge_fn(q, req, bio); | ||
448 | 473 | ||
449 | return ELEVATOR_NO_MERGE; | 474 | return ELEVATOR_NO_MERGE; |
450 | } | 475 | } |
@@ -456,8 +481,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
456 | * | 481 | * |
457 | * Returns true if we merged, false otherwise | 482 | * Returns true if we merged, false otherwise |
458 | */ | 483 | */ |
459 | static bool elv_attempt_insert_merge(struct request_queue *q, | 484 | bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) |
460 | struct request *rq) | ||
461 | { | 485 | { |
462 | struct request *__rq; | 486 | struct request *__rq; |
463 | bool ret; | 487 | bool ret; |
@@ -495,8 +519,10 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type) | |||
495 | { | 519 | { |
496 | struct elevator_queue *e = q->elevator; | 520 | struct elevator_queue *e = q->elevator; |
497 | 521 | ||
498 | if (e->type->ops.elevator_merged_fn) | 522 | if (e->uses_mq && e->type->ops.mq.request_merged) |
499 | e->type->ops.elevator_merged_fn(q, rq, type); | 523 | e->type->ops.mq.request_merged(q, rq, type); |
524 | else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn) | ||
525 | e->type->ops.sq.elevator_merged_fn(q, rq, type); | ||
500 | 526 | ||
501 | if (type == ELEVATOR_BACK_MERGE) | 527 | if (type == ELEVATOR_BACK_MERGE) |
502 | elv_rqhash_reposition(q, rq); | 528 | elv_rqhash_reposition(q, rq); |
@@ -508,10 +534,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, | |||
508 | struct request *next) | 534 | struct request *next) |
509 | { | 535 | { |
510 | struct elevator_queue *e = q->elevator; | 536 | struct elevator_queue *e = q->elevator; |
511 | const int next_sorted = next->rq_flags & RQF_SORTED; | 537 | bool next_sorted = false; |
512 | 538 | ||
513 | if (next_sorted && e->type->ops.elevator_merge_req_fn) | 539 | if (e->uses_mq && e->type->ops.mq.requests_merged) |
514 | e->type->ops.elevator_merge_req_fn(q, rq, next); | 540 | e->type->ops.mq.requests_merged(q, rq, next); |
541 | else if (e->type->ops.sq.elevator_merge_req_fn) { | ||
542 | next_sorted = next->rq_flags & RQF_SORTED; | ||
543 | if (next_sorted) | ||
544 | e->type->ops.sq.elevator_merge_req_fn(q, rq, next); | ||
545 | } | ||
515 | 546 | ||
516 | elv_rqhash_reposition(q, rq); | 547 | elv_rqhash_reposition(q, rq); |
517 | 548 | ||
@@ -528,8 +559,11 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, | |||
528 | { | 559 | { |
529 | struct elevator_queue *e = q->elevator; | 560 | struct elevator_queue *e = q->elevator; |
530 | 561 | ||
531 | if (e->type->ops.elevator_bio_merged_fn) | 562 | if (WARN_ON_ONCE(e->uses_mq)) |
532 | e->type->ops.elevator_bio_merged_fn(q, rq, bio); | 563 | return; |
564 | |||
565 | if (e->type->ops.sq.elevator_bio_merged_fn) | ||
566 | e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); | ||
533 | } | 567 | } |
534 | 568 | ||
535 | #ifdef CONFIG_PM | 569 | #ifdef CONFIG_PM |
@@ -574,11 +608,15 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) | |||
574 | 608 | ||
575 | void elv_drain_elevator(struct request_queue *q) | 609 | void elv_drain_elevator(struct request_queue *q) |
576 | { | 610 | { |
611 | struct elevator_queue *e = q->elevator; | ||
577 | static int printed; | 612 | static int printed; |
578 | 613 | ||
614 | if (WARN_ON_ONCE(e->uses_mq)) | ||
615 | return; | ||
616 | |||
579 | lockdep_assert_held(q->queue_lock); | 617 | lockdep_assert_held(q->queue_lock); |
580 | 618 | ||
581 | while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) | 619 | while (e->type->ops.sq.elevator_dispatch_fn(q, 1)) |
582 | ; | 620 | ; |
583 | if (q->nr_sorted && printed++ < 10) { | 621 | if (q->nr_sorted && printed++ < 10) { |
584 | printk(KERN_ERR "%s: forced dispatching is broken " | 622 | printk(KERN_ERR "%s: forced dispatching is broken " |
@@ -653,7 +691,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
653 | * rq cannot be accessed after calling | 691 | * rq cannot be accessed after calling |
654 | * elevator_add_req_fn. | 692 | * elevator_add_req_fn. |
655 | */ | 693 | */ |
656 | q->elevator->type->ops.elevator_add_req_fn(q, rq); | 694 | q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); |
657 | break; | 695 | break; |
658 | 696 | ||
659 | case ELEVATOR_INSERT_FLUSH: | 697 | case ELEVATOR_INSERT_FLUSH: |
@@ -682,8 +720,11 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) | |||
682 | { | 720 | { |
683 | struct elevator_queue *e = q->elevator; | 721 | struct elevator_queue *e = q->elevator; |
684 | 722 | ||
685 | if (e->type->ops.elevator_latter_req_fn) | 723 | if (e->uses_mq && e->type->ops.mq.next_request) |
686 | return e->type->ops.elevator_latter_req_fn(q, rq); | 724 | return e->type->ops.mq.next_request(q, rq); |
725 | else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn) | ||
726 | return e->type->ops.sq.elevator_latter_req_fn(q, rq); | ||
727 | |||
687 | return NULL; | 728 | return NULL; |
688 | } | 729 | } |
689 | 730 | ||
@@ -691,8 +732,10 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) | |||
691 | { | 732 | { |
692 | struct elevator_queue *e = q->elevator; | 733 | struct elevator_queue *e = q->elevator; |
693 | 734 | ||
694 | if (e->type->ops.elevator_former_req_fn) | 735 | if (e->uses_mq && e->type->ops.mq.former_request) |
695 | return e->type->ops.elevator_former_req_fn(q, rq); | 736 | return e->type->ops.mq.former_request(q, rq); |
737 | if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn) | ||
738 | return e->type->ops.sq.elevator_former_req_fn(q, rq); | ||
696 | return NULL; | 739 | return NULL; |
697 | } | 740 | } |
698 | 741 | ||
@@ -701,8 +744,11 @@ int elv_set_request(struct request_queue *q, struct request *rq, | |||
701 | { | 744 | { |
702 | struct elevator_queue *e = q->elevator; | 745 | struct elevator_queue *e = q->elevator; |
703 | 746 | ||
704 | if (e->type->ops.elevator_set_req_fn) | 747 | if (WARN_ON_ONCE(e->uses_mq)) |
705 | return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask); | 748 | return 0; |
749 | |||
750 | if (e->type->ops.sq.elevator_set_req_fn) | ||
751 | return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); | ||
706 | return 0; | 752 | return 0; |
707 | } | 753 | } |
708 | 754 | ||
@@ -710,16 +756,22 @@ void elv_put_request(struct request_queue *q, struct request *rq) | |||
710 | { | 756 | { |
711 | struct elevator_queue *e = q->elevator; | 757 | struct elevator_queue *e = q->elevator; |
712 | 758 | ||
713 | if (e->type->ops.elevator_put_req_fn) | 759 | if (WARN_ON_ONCE(e->uses_mq)) |
714 | e->type->ops.elevator_put_req_fn(rq); | 760 | return; |
761 | |||
762 | if (e->type->ops.sq.elevator_put_req_fn) | ||
763 | e->type->ops.sq.elevator_put_req_fn(rq); | ||
715 | } | 764 | } |
716 | 765 | ||
717 | int elv_may_queue(struct request_queue *q, unsigned int op) | 766 | int elv_may_queue(struct request_queue *q, unsigned int op) |
718 | { | 767 | { |
719 | struct elevator_queue *e = q->elevator; | 768 | struct elevator_queue *e = q->elevator; |
720 | 769 | ||
721 | if (e->type->ops.elevator_may_queue_fn) | 770 | if (WARN_ON_ONCE(e->uses_mq)) |
722 | return e->type->ops.elevator_may_queue_fn(q, op); | 771 | return 0; |
772 | |||
773 | if (e->type->ops.sq.elevator_may_queue_fn) | ||
774 | return e->type->ops.sq.elevator_may_queue_fn(q, op); | ||
723 | 775 | ||
724 | return ELV_MQUEUE_MAY; | 776 | return ELV_MQUEUE_MAY; |
725 | } | 777 | } |
@@ -728,14 +780,17 @@ void elv_completed_request(struct request_queue *q, struct request *rq) | |||
728 | { | 780 | { |
729 | struct elevator_queue *e = q->elevator; | 781 | struct elevator_queue *e = q->elevator; |
730 | 782 | ||
783 | if (WARN_ON_ONCE(e->uses_mq)) | ||
784 | return; | ||
785 | |||
731 | /* | 786 | /* |
732 | * request is released from the driver, io must be done | 787 | * request is released from the driver, io must be done |
733 | */ | 788 | */ |
734 | if (blk_account_rq(rq)) { | 789 | if (blk_account_rq(rq)) { |
735 | q->in_flight[rq_is_sync(rq)]--; | 790 | q->in_flight[rq_is_sync(rq)]--; |
736 | if ((rq->rq_flags & RQF_SORTED) && | 791 | if ((rq->rq_flags & RQF_SORTED) && |
737 | e->type->ops.elevator_completed_req_fn) | 792 | e->type->ops.sq.elevator_completed_req_fn) |
738 | e->type->ops.elevator_completed_req_fn(q, rq); | 793 | e->type->ops.sq.elevator_completed_req_fn(q, rq); |
739 | } | 794 | } |
740 | } | 795 | } |
741 | 796 | ||
@@ -803,8 +858,8 @@ int elv_register_queue(struct request_queue *q) | |||
803 | } | 858 | } |
804 | kobject_uevent(&e->kobj, KOBJ_ADD); | 859 | kobject_uevent(&e->kobj, KOBJ_ADD); |
805 | e->registered = 1; | 860 | e->registered = 1; |
806 | if (e->type->ops.elevator_registered_fn) | 861 | if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) |
807 | e->type->ops.elevator_registered_fn(q); | 862 | e->type->ops.sq.elevator_registered_fn(q); |
808 | } | 863 | } |
809 | return error; | 864 | return error; |
810 | } | 865 | } |
@@ -891,9 +946,14 @@ EXPORT_SYMBOL_GPL(elv_unregister); | |||
891 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | 946 | static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) |
892 | { | 947 | { |
893 | struct elevator_queue *old = q->elevator; | 948 | struct elevator_queue *old = q->elevator; |
894 | bool registered = old->registered; | 949 | bool old_registered = false; |
895 | int err; | 950 | int err; |
896 | 951 | ||
952 | if (q->mq_ops) { | ||
953 | blk_mq_freeze_queue(q); | ||
954 | blk_mq_quiesce_queue(q); | ||
955 | } | ||
956 | |||
897 | /* | 957 | /* |
898 | * Turn on BYPASS and drain all requests w/ elevator private data. | 958 | * Turn on BYPASS and drain all requests w/ elevator private data. |
899 | * Block layer doesn't call into a quiesced elevator - all requests | 959 | * Block layer doesn't call into a quiesced elevator - all requests |
@@ -901,42 +961,76 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
901 | * using INSERT_BACK. All requests have SOFTBARRIER set and no | 961 | * using INSERT_BACK. All requests have SOFTBARRIER set and no |
902 | * merge happens either. | 962 | * merge happens either. |
903 | */ | 963 | */ |
904 | blk_queue_bypass_start(q); | 964 | if (old) { |
965 | old_registered = old->registered; | ||
905 | 966 | ||
906 | /* unregister and clear all auxiliary data of the old elevator */ | 967 | if (old->uses_mq) |
907 | if (registered) | 968 | blk_mq_sched_teardown(q); |
908 | elv_unregister_queue(q); | ||
909 | 969 | ||
910 | spin_lock_irq(q->queue_lock); | 970 | if (!q->mq_ops) |
911 | ioc_clear_queue(q); | 971 | blk_queue_bypass_start(q); |
912 | spin_unlock_irq(q->queue_lock); | 972 | |
973 | /* unregister and clear all auxiliary data of the old elevator */ | ||
974 | if (old_registered) | ||
975 | elv_unregister_queue(q); | ||
976 | |||
977 | spin_lock_irq(q->queue_lock); | ||
978 | ioc_clear_queue(q); | ||
979 | spin_unlock_irq(q->queue_lock); | ||
980 | } | ||
913 | 981 | ||
914 | /* allocate, init and register new elevator */ | 982 | /* allocate, init and register new elevator */ |
915 | err = new_e->ops.elevator_init_fn(q, new_e); | 983 | if (new_e) { |
916 | if (err) | 984 | if (new_e->uses_mq) { |
917 | goto fail_init; | 985 | err = blk_mq_sched_setup(q); |
986 | if (!err) | ||
987 | err = new_e->ops.mq.init_sched(q, new_e); | ||
988 | } else | ||
989 | err = new_e->ops.sq.elevator_init_fn(q, new_e); | ||
990 | if (err) | ||
991 | goto fail_init; | ||
918 | 992 | ||
919 | if (registered) { | ||
920 | err = elv_register_queue(q); | 993 | err = elv_register_queue(q); |
921 | if (err) | 994 | if (err) |
922 | goto fail_register; | 995 | goto fail_register; |
923 | } | 996 | } else |
997 | q->elevator = NULL; | ||
924 | 998 | ||
925 | /* done, kill the old one and finish */ | 999 | /* done, kill the old one and finish */ |
926 | elevator_exit(old); | 1000 | if (old) { |
927 | blk_queue_bypass_end(q); | 1001 | elevator_exit(old); |
1002 | if (!q->mq_ops) | ||
1003 | blk_queue_bypass_end(q); | ||
1004 | } | ||
928 | 1005 | ||
929 | blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); | 1006 | if (q->mq_ops) { |
1007 | blk_mq_unfreeze_queue(q); | ||
1008 | blk_mq_start_stopped_hw_queues(q, true); | ||
1009 | } | ||
1010 | |||
1011 | if (new_e) | ||
1012 | blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); | ||
1013 | else | ||
1014 | blk_add_trace_msg(q, "elv switch: none"); | ||
930 | 1015 | ||
931 | return 0; | 1016 | return 0; |
932 | 1017 | ||
933 | fail_register: | 1018 | fail_register: |
1019 | if (q->mq_ops) | ||
1020 | blk_mq_sched_teardown(q); | ||
934 | elevator_exit(q->elevator); | 1021 | elevator_exit(q->elevator); |
935 | fail_init: | 1022 | fail_init: |
936 | /* switch failed, restore and re-register old elevator */ | 1023 | /* switch failed, restore and re-register old elevator */ |
937 | q->elevator = old; | 1024 | if (old) { |
938 | elv_register_queue(q); | 1025 | q->elevator = old; |
939 | blk_queue_bypass_end(q); | 1026 | elv_register_queue(q); |
1027 | if (!q->mq_ops) | ||
1028 | blk_queue_bypass_end(q); | ||
1029 | } | ||
1030 | if (q->mq_ops) { | ||
1031 | blk_mq_unfreeze_queue(q); | ||
1032 | blk_mq_start_stopped_hw_queues(q, true); | ||
1033 | } | ||
940 | 1034 | ||
941 | return err; | 1035 | return err; |
942 | } | 1036 | } |
@@ -949,8 +1043,11 @@ static int __elevator_change(struct request_queue *q, const char *name) | |||
949 | char elevator_name[ELV_NAME_MAX]; | 1043 | char elevator_name[ELV_NAME_MAX]; |
950 | struct elevator_type *e; | 1044 | struct elevator_type *e; |
951 | 1045 | ||
952 | if (!q->elevator) | 1046 | /* |
953 | return -ENXIO; | 1047 | * Special case for mq, turn off scheduling |
1048 | */ | ||
1049 | if (q->mq_ops && !strncmp(name, "none", 4)) | ||
1050 | return elevator_switch(q, NULL); | ||
954 | 1051 | ||
955 | strlcpy(elevator_name, name, sizeof(elevator_name)); | 1052 | strlcpy(elevator_name, name, sizeof(elevator_name)); |
956 | e = elevator_get(strstrip(elevator_name), true); | 1053 | e = elevator_get(strstrip(elevator_name), true); |
@@ -959,11 +1056,21 @@ static int __elevator_change(struct request_queue *q, const char *name) | |||
959 | return -EINVAL; | 1056 | return -EINVAL; |
960 | } | 1057 | } |
961 | 1058 | ||
962 | if (!strcmp(elevator_name, q->elevator->type->elevator_name)) { | 1059 | if (q->elevator && |
1060 | !strcmp(elevator_name, q->elevator->type->elevator_name)) { | ||
963 | elevator_put(e); | 1061 | elevator_put(e); |
964 | return 0; | 1062 | return 0; |
965 | } | 1063 | } |
966 | 1064 | ||
1065 | if (!e->uses_mq && q->mq_ops) { | ||
1066 | elevator_put(e); | ||
1067 | return -EINVAL; | ||
1068 | } | ||
1069 | if (e->uses_mq && !q->mq_ops) { | ||
1070 | elevator_put(e); | ||
1071 | return -EINVAL; | ||
1072 | } | ||
1073 | |||
967 | return elevator_switch(q, e); | 1074 | return elevator_switch(q, e); |
968 | } | 1075 | } |
969 | 1076 | ||
@@ -985,7 +1092,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, | |||
985 | { | 1092 | { |
986 | int ret; | 1093 | int ret; |
987 | 1094 | ||
988 | if (!q->elevator) | 1095 | if (!(q->mq_ops || q->request_fn)) |
989 | return count; | 1096 | return count; |
990 | 1097 | ||
991 | ret = __elevator_change(q, name); | 1098 | ret = __elevator_change(q, name); |
@@ -999,24 +1106,34 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, | |||
999 | ssize_t elv_iosched_show(struct request_queue *q, char *name) | 1106 | ssize_t elv_iosched_show(struct request_queue *q, char *name) |
1000 | { | 1107 | { |
1001 | struct elevator_queue *e = q->elevator; | 1108 | struct elevator_queue *e = q->elevator; |
1002 | struct elevator_type *elv; | 1109 | struct elevator_type *elv = NULL; |
1003 | struct elevator_type *__e; | 1110 | struct elevator_type *__e; |
1004 | int len = 0; | 1111 | int len = 0; |
1005 | 1112 | ||
1006 | if (!q->elevator || !blk_queue_stackable(q)) | 1113 | if (!blk_queue_stackable(q)) |
1007 | return sprintf(name, "none\n"); | 1114 | return sprintf(name, "none\n"); |
1008 | 1115 | ||
1009 | elv = e->type; | 1116 | if (!q->elevator) |
1117 | len += sprintf(name+len, "[none] "); | ||
1118 | else | ||
1119 | elv = e->type; | ||
1010 | 1120 | ||
1011 | spin_lock(&elv_list_lock); | 1121 | spin_lock(&elv_list_lock); |
1012 | list_for_each_entry(__e, &elv_list, list) { | 1122 | list_for_each_entry(__e, &elv_list, list) { |
1013 | if (!strcmp(elv->elevator_name, __e->elevator_name)) | 1123 | if (elv && !strcmp(elv->elevator_name, __e->elevator_name)) { |
1014 | len += sprintf(name+len, "[%s] ", elv->elevator_name); | 1124 | len += sprintf(name+len, "[%s] ", elv->elevator_name); |
1015 | else | 1125 | continue; |
1126 | } | ||
1127 | if (__e->uses_mq && q->mq_ops) | ||
1128 | len += sprintf(name+len, "%s ", __e->elevator_name); | ||
1129 | else if (!__e->uses_mq && !q->mq_ops) | ||
1016 | len += sprintf(name+len, "%s ", __e->elevator_name); | 1130 | len += sprintf(name+len, "%s ", __e->elevator_name); |
1017 | } | 1131 | } |
1018 | spin_unlock(&elv_list_lock); | 1132 | spin_unlock(&elv_list_lock); |
1019 | 1133 | ||
1134 | if (q->mq_ops && q->elevator) | ||
1135 | len += sprintf(name+len, "none"); | ||
1136 | |||
1020 | len += sprintf(len+name, "\n"); | 1137 | len += sprintf(len+name, "\n"); |
1021 | return len; | 1138 | return len; |
1022 | } | 1139 | } |
diff --git a/block/mq-deadline.c b/block/mq-deadline.c new file mode 100644 index 000000000000..d93ec713fa62 --- /dev/null +++ b/block/mq-deadline.c | |||
@@ -0,0 +1,555 @@ | |||
1 | /* | ||
2 | * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, | ||
3 | * for the blk-mq scheduling framework | ||
4 | * | ||
5 | * Copyright (C) 2016 Jens Axboe <axboe@kernel.dk> | ||
6 | */ | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/fs.h> | ||
9 | #include <linux/blkdev.h> | ||
10 | #include <linux/blk-mq.h> | ||
11 | #include <linux/elevator.h> | ||
12 | #include <linux/bio.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/compiler.h> | ||
17 | #include <linux/rbtree.h> | ||
18 | #include <linux/sbitmap.h> | ||
19 | |||
20 | #include "blk.h" | ||
21 | #include "blk-mq.h" | ||
22 | #include "blk-mq-tag.h" | ||
23 | #include "blk-mq-sched.h" | ||
24 | |||
25 | /* | ||
26 | * See Documentation/block/deadline-iosched.txt | ||
27 | */ | ||
28 | static const int read_expire = HZ / 2; /* max time before a read is submitted. */ | ||
29 | static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ | ||
30 | static const int writes_starved = 2; /* max times reads can starve a write */ | ||
31 | static const int fifo_batch = 16; /* # of sequential requests treated as one | ||
32 | by the above parameters. For throughput. */ | ||
33 | |||
34 | struct deadline_data { | ||
35 | /* | ||
36 | * run time data | ||
37 | */ | ||
38 | |||
39 | /* | ||
40 | * requests (deadline_rq s) are present on both sort_list and fifo_list | ||
41 | */ | ||
42 | struct rb_root sort_list[2]; | ||
43 | struct list_head fifo_list[2]; | ||
44 | |||
45 | /* | ||
46 | * next in sort order. read, write or both are NULL | ||
47 | */ | ||
48 | struct request *next_rq[2]; | ||
49 | unsigned int batching; /* number of sequential requests made */ | ||
50 | unsigned int starved; /* times reads have starved writes */ | ||
51 | |||
52 | /* | ||
53 | * settings that change how the i/o scheduler behaves | ||
54 | */ | ||
55 | int fifo_expire[2]; | ||
56 | int fifo_batch; | ||
57 | int writes_starved; | ||
58 | int front_merges; | ||
59 | |||
60 | spinlock_t lock; | ||
61 | struct list_head dispatch; | ||
62 | }; | ||
63 | |||
64 | static inline struct rb_root * | ||
65 | deadline_rb_root(struct deadline_data *dd, struct request *rq) | ||
66 | { | ||
67 | return &dd->sort_list[rq_data_dir(rq)]; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * get the request after `rq' in sector-sorted order | ||
72 | */ | ||
73 | static inline struct request * | ||
74 | deadline_latter_request(struct request *rq) | ||
75 | { | ||
76 | struct rb_node *node = rb_next(&rq->rb_node); | ||
77 | |||
78 | if (node) | ||
79 | return rb_entry_rq(node); | ||
80 | |||
81 | return NULL; | ||
82 | } | ||
83 | |||
84 | static void | ||
85 | deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) | ||
86 | { | ||
87 | struct rb_root *root = deadline_rb_root(dd, rq); | ||
88 | |||
89 | elv_rb_add(root, rq); | ||
90 | } | ||
91 | |||
92 | static inline void | ||
93 | deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) | ||
94 | { | ||
95 | const int data_dir = rq_data_dir(rq); | ||
96 | |||
97 | if (dd->next_rq[data_dir] == rq) | ||
98 | dd->next_rq[data_dir] = deadline_latter_request(rq); | ||
99 | |||
100 | elv_rb_del(deadline_rb_root(dd, rq), rq); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * remove rq from rbtree and fifo. | ||
105 | */ | ||
106 | static void deadline_remove_request(struct request_queue *q, struct request *rq) | ||
107 | { | ||
108 | struct deadline_data *dd = q->elevator->elevator_data; | ||
109 | |||
110 | list_del_init(&rq->queuelist); | ||
111 | |||
112 | /* | ||
113 | * We might not be on the rbtree, if we are doing an insert merge | ||
114 | */ | ||
115 | if (!RB_EMPTY_NODE(&rq->rb_node)) | ||
116 | deadline_del_rq_rb(dd, rq); | ||
117 | |||
118 | elv_rqhash_del(q, rq); | ||
119 | if (q->last_merge == rq) | ||
120 | q->last_merge = NULL; | ||
121 | } | ||
122 | |||
123 | static void dd_request_merged(struct request_queue *q, struct request *req, | ||
124 | int type) | ||
125 | { | ||
126 | struct deadline_data *dd = q->elevator->elevator_data; | ||
127 | |||
128 | /* | ||
129 | * if the merge was a front merge, we need to reposition request | ||
130 | */ | ||
131 | if (type == ELEVATOR_FRONT_MERGE) { | ||
132 | elv_rb_del(deadline_rb_root(dd, req), req); | ||
133 | deadline_add_rq_rb(dd, req); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static void dd_merged_requests(struct request_queue *q, struct request *req, | ||
138 | struct request *next) | ||
139 | { | ||
140 | /* | ||
141 | * if next expires before rq, assign its expire time to rq | ||
142 | * and move into next position (next will be deleted) in fifo | ||
143 | */ | ||
144 | if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { | ||
145 | if (time_before((unsigned long)next->fifo_time, | ||
146 | (unsigned long)req->fifo_time)) { | ||
147 | list_move(&req->queuelist, &next->queuelist); | ||
148 | req->fifo_time = next->fifo_time; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * kill knowledge of next, this one is a goner | ||
154 | */ | ||
155 | deadline_remove_request(q, next); | ||
156 | } | ||
157 | |||
158 | /* | ||
159 | * move an entry to dispatch queue | ||
160 | */ | ||
161 | static void | ||
162 | deadline_move_request(struct deadline_data *dd, struct request *rq) | ||
163 | { | ||
164 | const int data_dir = rq_data_dir(rq); | ||
165 | |||
166 | dd->next_rq[READ] = NULL; | ||
167 | dd->next_rq[WRITE] = NULL; | ||
168 | dd->next_rq[data_dir] = deadline_latter_request(rq); | ||
169 | |||
170 | /* | ||
171 | * take it off the sort and fifo list | ||
172 | */ | ||
173 | deadline_remove_request(rq->q, rq); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * deadline_check_fifo returns 0 if there are no expired requests on the fifo, | ||
178 | * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) | ||
179 | */ | ||
180 | static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) | ||
181 | { | ||
182 | struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next); | ||
183 | |||
184 | /* | ||
185 | * rq is expired! | ||
186 | */ | ||
187 | if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) | ||
188 | return 1; | ||
189 | |||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * deadline_dispatch_requests selects the best request according to | ||
195 | * read/write expire, fifo_batch, etc | ||
196 | */ | ||
197 | static struct request *__dd_dispatch_request(struct blk_mq_hw_ctx *hctx) | ||
198 | { | ||
199 | struct deadline_data *dd = hctx->queue->elevator->elevator_data; | ||
200 | struct request *rq; | ||
201 | bool reads, writes; | ||
202 | int data_dir; | ||
203 | |||
204 | if (!list_empty(&dd->dispatch)) { | ||
205 | rq = list_first_entry(&dd->dispatch, struct request, queuelist); | ||
206 | list_del_init(&rq->queuelist); | ||
207 | goto done; | ||
208 | } | ||
209 | |||
210 | reads = !list_empty(&dd->fifo_list[READ]); | ||
211 | writes = !list_empty(&dd->fifo_list[WRITE]); | ||
212 | |||
213 | /* | ||
214 | * batches are currently reads XOR writes | ||
215 | */ | ||
216 | if (dd->next_rq[WRITE]) | ||
217 | rq = dd->next_rq[WRITE]; | ||
218 | else | ||
219 | rq = dd->next_rq[READ]; | ||
220 | |||
221 | if (rq && dd->batching < dd->fifo_batch) | ||
222 | /* we have a next request are still entitled to batch */ | ||
223 | goto dispatch_request; | ||
224 | |||
225 | /* | ||
226 | * at this point we are not running a batch. select the appropriate | ||
227 | * data direction (read / write) | ||
228 | */ | ||
229 | |||
230 | if (reads) { | ||
231 | BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ])); | ||
232 | |||
233 | if (writes && (dd->starved++ >= dd->writes_starved)) | ||
234 | goto dispatch_writes; | ||
235 | |||
236 | data_dir = READ; | ||
237 | |||
238 | goto dispatch_find_request; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * there are either no reads or writes have been starved | ||
243 | */ | ||
244 | |||
245 | if (writes) { | ||
246 | dispatch_writes: | ||
247 | BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE])); | ||
248 | |||
249 | dd->starved = 0; | ||
250 | |||
251 | data_dir = WRITE; | ||
252 | |||
253 | goto dispatch_find_request; | ||
254 | } | ||
255 | |||
256 | return NULL; | ||
257 | |||
258 | dispatch_find_request: | ||
259 | /* | ||
260 | * we are not running a batch, find best request for selected data_dir | ||
261 | */ | ||
262 | if (deadline_check_fifo(dd, data_dir) || !dd->next_rq[data_dir]) { | ||
263 | /* | ||
264 | * A deadline has expired, the last request was in the other | ||
265 | * direction, or we have run out of higher-sectored requests. | ||
266 | * Start again from the request with the earliest expiry time. | ||
267 | */ | ||
268 | rq = rq_entry_fifo(dd->fifo_list[data_dir].next); | ||
269 | } else { | ||
270 | /* | ||
271 | * The last req was the same dir and we have a next request in | ||
272 | * sort order. No expired requests so continue on from here. | ||
273 | */ | ||
274 | rq = dd->next_rq[data_dir]; | ||
275 | } | ||
276 | |||
277 | dd->batching = 0; | ||
278 | |||
279 | dispatch_request: | ||
280 | /* | ||
281 | * rq is the selected appropriate request. | ||
282 | */ | ||
283 | dd->batching++; | ||
284 | deadline_move_request(dd, rq); | ||
285 | done: | ||
286 | rq->rq_flags |= RQF_STARTED; | ||
287 | return rq; | ||
288 | } | ||
289 | |||
290 | static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) | ||
291 | { | ||
292 | struct deadline_data *dd = hctx->queue->elevator->elevator_data; | ||
293 | struct request *rq; | ||
294 | |||
295 | spin_lock(&dd->lock); | ||
296 | rq = __dd_dispatch_request(hctx); | ||
297 | spin_unlock(&dd->lock); | ||
298 | |||
299 | return rq; | ||
300 | } | ||
301 | |||
302 | static void dd_exit_queue(struct elevator_queue *e) | ||
303 | { | ||
304 | struct deadline_data *dd = e->elevator_data; | ||
305 | |||
306 | BUG_ON(!list_empty(&dd->fifo_list[READ])); | ||
307 | BUG_ON(!list_empty(&dd->fifo_list[WRITE])); | ||
308 | |||
309 | kfree(dd); | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * initialize elevator private data (deadline_data). | ||
314 | */ | ||
315 | static int dd_init_queue(struct request_queue *q, struct elevator_type *e) | ||
316 | { | ||
317 | struct deadline_data *dd; | ||
318 | struct elevator_queue *eq; | ||
319 | |||
320 | eq = elevator_alloc(q, e); | ||
321 | if (!eq) | ||
322 | return -ENOMEM; | ||
323 | |||
324 | dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); | ||
325 | if (!dd) { | ||
326 | kobject_put(&eq->kobj); | ||
327 | return -ENOMEM; | ||
328 | } | ||
329 | eq->elevator_data = dd; | ||
330 | |||
331 | INIT_LIST_HEAD(&dd->fifo_list[READ]); | ||
332 | INIT_LIST_HEAD(&dd->fifo_list[WRITE]); | ||
333 | dd->sort_list[READ] = RB_ROOT; | ||
334 | dd->sort_list[WRITE] = RB_ROOT; | ||
335 | dd->fifo_expire[READ] = read_expire; | ||
336 | dd->fifo_expire[WRITE] = write_expire; | ||
337 | dd->writes_starved = writes_starved; | ||
338 | dd->front_merges = 1; | ||
339 | dd->fifo_batch = fifo_batch; | ||
340 | spin_lock_init(&dd->lock); | ||
341 | INIT_LIST_HEAD(&dd->dispatch); | ||
342 | |||
343 | q->elevator = eq; | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | static int dd_request_merge(struct request_queue *q, struct request **rq, | ||
348 | struct bio *bio) | ||
349 | { | ||
350 | struct deadline_data *dd = q->elevator->elevator_data; | ||
351 | sector_t sector = bio_end_sector(bio); | ||
352 | struct request *__rq; | ||
353 | |||
354 | if (!dd->front_merges) | ||
355 | return ELEVATOR_NO_MERGE; | ||
356 | |||
357 | __rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector); | ||
358 | if (__rq) { | ||
359 | BUG_ON(sector != blk_rq_pos(__rq)); | ||
360 | |||
361 | if (elv_bio_merge_ok(__rq, bio)) { | ||
362 | *rq = __rq; | ||
363 | return ELEVATOR_FRONT_MERGE; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | return ELEVATOR_NO_MERGE; | ||
368 | } | ||
369 | |||
370 | static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio) | ||
371 | { | ||
372 | struct request_queue *q = hctx->queue; | ||
373 | struct deadline_data *dd = q->elevator->elevator_data; | ||
374 | int ret; | ||
375 | |||
376 | spin_lock(&dd->lock); | ||
377 | ret = blk_mq_sched_try_merge(q, bio); | ||
378 | spin_unlock(&dd->lock); | ||
379 | |||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * add rq to rbtree and fifo | ||
385 | */ | ||
386 | static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, | ||
387 | bool at_head) | ||
388 | { | ||
389 | struct request_queue *q = hctx->queue; | ||
390 | struct deadline_data *dd = q->elevator->elevator_data; | ||
391 | const int data_dir = rq_data_dir(rq); | ||
392 | |||
393 | if (blk_mq_sched_try_insert_merge(q, rq)) | ||
394 | return; | ||
395 | |||
396 | blk_mq_sched_request_inserted(rq); | ||
397 | |||
398 | if (blk_mq_sched_bypass_insert(hctx, rq)) | ||
399 | return; | ||
400 | |||
401 | if (at_head || rq->cmd_type != REQ_TYPE_FS) { | ||
402 | if (at_head) | ||
403 | list_add(&rq->queuelist, &dd->dispatch); | ||
404 | else | ||
405 | list_add_tail(&rq->queuelist, &dd->dispatch); | ||
406 | } else { | ||
407 | deadline_add_rq_rb(dd, rq); | ||
408 | |||
409 | if (rq_mergeable(rq)) { | ||
410 | elv_rqhash_add(q, rq); | ||
411 | if (!q->last_merge) | ||
412 | q->last_merge = rq; | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * set expire time and add to fifo list | ||
417 | */ | ||
418 | rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; | ||
419 | list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); | ||
420 | } | ||
421 | } | ||
422 | |||
423 | static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, | ||
424 | struct list_head *list, bool at_head) | ||
425 | { | ||
426 | struct request_queue *q = hctx->queue; | ||
427 | struct deadline_data *dd = q->elevator->elevator_data; | ||
428 | |||
429 | spin_lock(&dd->lock); | ||
430 | while (!list_empty(list)) { | ||
431 | struct request *rq; | ||
432 | |||
433 | rq = list_first_entry(list, struct request, queuelist); | ||
434 | list_del_init(&rq->queuelist); | ||
435 | dd_insert_request(hctx, rq, at_head); | ||
436 | } | ||
437 | spin_unlock(&dd->lock); | ||
438 | } | ||
439 | |||
440 | static bool dd_has_work(struct blk_mq_hw_ctx *hctx) | ||
441 | { | ||
442 | struct deadline_data *dd = hctx->queue->elevator->elevator_data; | ||
443 | |||
444 | return !list_empty_careful(&dd->dispatch) || | ||
445 | !list_empty_careful(&dd->fifo_list[0]) || | ||
446 | !list_empty_careful(&dd->fifo_list[1]); | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * sysfs parts below | ||
451 | */ | ||
452 | static ssize_t | ||
453 | deadline_var_show(int var, char *page) | ||
454 | { | ||
455 | return sprintf(page, "%d\n", var); | ||
456 | } | ||
457 | |||
458 | static ssize_t | ||
459 | deadline_var_store(int *var, const char *page, size_t count) | ||
460 | { | ||
461 | char *p = (char *) page; | ||
462 | |||
463 | *var = simple_strtol(p, &p, 10); | ||
464 | return count; | ||
465 | } | ||
466 | |||
467 | #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ | ||
468 | static ssize_t __FUNC(struct elevator_queue *e, char *page) \ | ||
469 | { \ | ||
470 | struct deadline_data *dd = e->elevator_data; \ | ||
471 | int __data = __VAR; \ | ||
472 | if (__CONV) \ | ||
473 | __data = jiffies_to_msecs(__data); \ | ||
474 | return deadline_var_show(__data, (page)); \ | ||
475 | } | ||
476 | SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1); | ||
477 | SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1); | ||
478 | SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0); | ||
479 | SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0); | ||
480 | SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0); | ||
481 | #undef SHOW_FUNCTION | ||
482 | |||
483 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | ||
484 | static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ | ||
485 | { \ | ||
486 | struct deadline_data *dd = e->elevator_data; \ | ||
487 | int __data; \ | ||
488 | int ret = deadline_var_store(&__data, (page), count); \ | ||
489 | if (__data < (MIN)) \ | ||
490 | __data = (MIN); \ | ||
491 | else if (__data > (MAX)) \ | ||
492 | __data = (MAX); \ | ||
493 | if (__CONV) \ | ||
494 | *(__PTR) = msecs_to_jiffies(__data); \ | ||
495 | else \ | ||
496 | *(__PTR) = __data; \ | ||
497 | return ret; \ | ||
498 | } | ||
499 | STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1); | ||
500 | STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1); | ||
501 | STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0); | ||
502 | STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0); | ||
503 | STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0); | ||
504 | #undef STORE_FUNCTION | ||
505 | |||
506 | #define DD_ATTR(name) \ | ||
507 | __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \ | ||
508 | deadline_##name##_store) | ||
509 | |||
510 | static struct elv_fs_entry deadline_attrs[] = { | ||
511 | DD_ATTR(read_expire), | ||
512 | DD_ATTR(write_expire), | ||
513 | DD_ATTR(writes_starved), | ||
514 | DD_ATTR(front_merges), | ||
515 | DD_ATTR(fifo_batch), | ||
516 | __ATTR_NULL | ||
517 | }; | ||
518 | |||
519 | static struct elevator_type mq_deadline = { | ||
520 | .ops.mq = { | ||
521 | .insert_requests = dd_insert_requests, | ||
522 | .dispatch_request = dd_dispatch_request, | ||
523 | .next_request = elv_rb_latter_request, | ||
524 | .former_request = elv_rb_former_request, | ||
525 | .bio_merge = dd_bio_merge, | ||
526 | .request_merge = dd_request_merge, | ||
527 | .requests_merged = dd_merged_requests, | ||
528 | .request_merged = dd_request_merged, | ||
529 | .has_work = dd_has_work, | ||
530 | .init_sched = dd_init_queue, | ||
531 | .exit_sched = dd_exit_queue, | ||
532 | }, | ||
533 | |||
534 | .uses_mq = true, | ||
535 | .elevator_attrs = deadline_attrs, | ||
536 | .elevator_name = "mq-deadline", | ||
537 | .elevator_owner = THIS_MODULE, | ||
538 | }; | ||
539 | |||
540 | static int __init deadline_init(void) | ||
541 | { | ||
542 | return elv_register(&mq_deadline); | ||
543 | } | ||
544 | |||
545 | static void __exit deadline_exit(void) | ||
546 | { | ||
547 | elv_unregister(&mq_deadline); | ||
548 | } | ||
549 | |||
550 | module_init(deadline_init); | ||
551 | module_exit(deadline_exit); | ||
552 | |||
553 | MODULE_AUTHOR("Jens Axboe"); | ||
554 | MODULE_LICENSE("GPL"); | ||
555 | MODULE_DESCRIPTION("MQ deadline IO scheduler"); | ||
diff --git a/block/noop-iosched.c b/block/noop-iosched.c index a163c487cf38..2d1b15d89b45 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c | |||
@@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e) | |||
92 | } | 92 | } |
93 | 93 | ||
94 | static struct elevator_type elevator_noop = { | 94 | static struct elevator_type elevator_noop = { |
95 | .ops = { | 95 | .ops.sq = { |
96 | .elevator_merge_req_fn = noop_merged_requests, | 96 | .elevator_merge_req_fn = noop_merged_requests, |
97 | .elevator_dispatch_fn = noop_dispatch, | 97 | .elevator_dispatch_fn = noop_dispatch, |
98 | .elevator_add_req_fn = noop_add_request, | 98 | .elevator_add_req_fn = noop_add_request, |
diff --git a/block/opal_proto.h b/block/opal_proto.h new file mode 100644 index 000000000000..f40c9acf8895 --- /dev/null +++ b/block/opal_proto.h | |||
@@ -0,0 +1,452 @@ | |||
1 | /* | ||
2 | * Copyright © 2016 Intel Corporation | ||
3 | * | ||
4 | * Authors: | ||
5 | * Rafael Antognolli <rafael.antognolli@intel.com> | ||
6 | * Scott Bauer <scott.bauer@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | #include <linux/types.h> | ||
18 | |||
19 | #ifndef _OPAL_PROTO_H | ||
20 | #define _OPAL_PROTO_H | ||
21 | |||
22 | /* | ||
23 | * These constant values come from: | ||
24 | * SPC-4 section | ||
25 | * 6.30 SECURITY PROTOCOL IN command / table 265. | ||
26 | */ | ||
27 | enum { | ||
28 | TCG_SECP_00 = 0, | ||
29 | TCG_SECP_01, | ||
30 | }; | ||
31 | |||
32 | /* | ||
33 | * Token defs derived from: | ||
34 | * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 | ||
35 | * 3.2.2 Data Stream Encoding | ||
36 | */ | ||
37 | enum opal_response_token { | ||
38 | OPAL_DTA_TOKENID_BYTESTRING = 0xe0, | ||
39 | OPAL_DTA_TOKENID_SINT = 0xe1, | ||
40 | OPAL_DTA_TOKENID_UINT = 0xe2, | ||
41 | OPAL_DTA_TOKENID_TOKEN = 0xe3, /* actual token is returned */ | ||
42 | OPAL_DTA_TOKENID_INVALID = 0X0 | ||
43 | }; | ||
44 | |||
45 | #define DTAERROR_NO_METHOD_STATUS 0x89 | ||
46 | #define GENERIC_HOST_SESSION_NUM 0x41 | ||
47 | |||
48 | #define TPER_SYNC_SUPPORTED 0x01 | ||
49 | |||
50 | #define TINY_ATOM_DATA_MASK 0x3F | ||
51 | #define TINY_ATOM_SIGNED 0x40 | ||
52 | |||
53 | #define SHORT_ATOM_ID 0x80 | ||
54 | #define SHORT_ATOM_BYTESTRING 0x20 | ||
55 | #define SHORT_ATOM_SIGNED 0x10 | ||
56 | #define SHORT_ATOM_LEN_MASK 0xF | ||
57 | |||
58 | #define MEDIUM_ATOM_ID 0xC0 | ||
59 | #define MEDIUM_ATOM_BYTESTRING 0x10 | ||
60 | #define MEDIUM_ATOM_SIGNED 0x8 | ||
61 | #define MEDIUM_ATOM_LEN_MASK 0x7 | ||
62 | |||
63 | #define LONG_ATOM_ID 0xe0 | ||
64 | #define LONG_ATOM_BYTESTRING 0x2 | ||
65 | #define LONG_ATOM_SIGNED 0x1 | ||
66 | |||
67 | /* Derived from TCG Core spec 2.01 Section: | ||
68 | * 3.2.2.1 | ||
69 | * Data Type | ||
70 | */ | ||
71 | #define TINY_ATOM_BYTE 0x7F | ||
72 | #define SHORT_ATOM_BYTE 0xBF | ||
73 | #define MEDIUM_ATOM_BYTE 0xDF | ||
74 | #define LONG_ATOM_BYTE 0xE3 | ||
75 | |||
76 | #define OPAL_INVAL_PARAM 12 | ||
77 | #define OPAL_MANUFACTURED_INACTIVE 0x08 | ||
78 | #define OPAL_DISCOVERY_COMID 0x0001 | ||
79 | |||
80 | #define LOCKING_RANGE_NON_GLOBAL 0x03 | ||
81 | /* | ||
82 | * User IDs used in the TCG storage SSCs | ||
83 | * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 | ||
84 | * Section: 6.3 Assigned UIDs | ||
85 | */ | ||
86 | #define OPAL_UID_LENGTH 8 | ||
87 | #define OPAL_METHOD_LENGTH 8 | ||
88 | #define OPAL_MSID_KEYLEN 15 | ||
89 | #define OPAL_UID_LENGTH_HALF 4 | ||
90 | |||
91 | /* Enum to index OPALUID array */ | ||
92 | enum opal_uid { | ||
93 | /* users */ | ||
94 | OPAL_SMUID_UID, | ||
95 | OPAL_THISSP_UID, | ||
96 | OPAL_ADMINSP_UID, | ||
97 | OPAL_LOCKINGSP_UID, | ||
98 | OPAL_ENTERPRISE_LOCKINGSP_UID, | ||
99 | OPAL_ANYBODY_UID, | ||
100 | OPAL_SID_UID, | ||
101 | OPAL_ADMIN1_UID, | ||
102 | OPAL_USER1_UID, | ||
103 | OPAL_USER2_UID, | ||
104 | OPAL_PSID_UID, | ||
105 | OPAL_ENTERPRISE_BANDMASTER0_UID, | ||
106 | OPAL_ENTERPRISE_ERASEMASTER_UID, | ||
107 | /* tables */ | ||
108 | OPAL_LOCKINGRANGE_GLOBAL, | ||
109 | OPAL_LOCKINGRANGE_ACE_RDLOCKED, | ||
110 | OPAL_LOCKINGRANGE_ACE_WRLOCKED, | ||
111 | OPAL_MBRCONTROL, | ||
112 | OPAL_MBR, | ||
113 | OPAL_AUTHORITY_TABLE, | ||
114 | OPAL_C_PIN_TABLE, | ||
115 | OPAL_LOCKING_INFO_TABLE, | ||
116 | OPAL_ENTERPRISE_LOCKING_INFO_TABLE, | ||
117 | /* C_PIN_TABLE object ID's */ | ||
118 | OPAL_C_PIN_MSID, | ||
119 | OPAL_C_PIN_SID, | ||
120 | OPAL_C_PIN_ADMIN1, | ||
121 | /* half UID's (only first 4 bytes used) */ | ||
122 | OPAL_HALF_UID_AUTHORITY_OBJ_REF, | ||
123 | OPAL_HALF_UID_BOOLEAN_ACE, | ||
124 | /* omitted optional parameter */ | ||
125 | OPAL_UID_HEXFF, | ||
126 | }; | ||
127 | |||
128 | #define OPAL_METHOD_LENGTH 8 | ||
129 | |||
130 | /* Enum for indexing the OPALMETHOD array */ | ||
131 | enum opal_method { | ||
132 | OPAL_PROPERTIES, | ||
133 | OPAL_STARTSESSION, | ||
134 | OPAL_REVERT, | ||
135 | OPAL_ACTIVATE, | ||
136 | OPAL_EGET, | ||
137 | OPAL_ESET, | ||
138 | OPAL_NEXT, | ||
139 | OPAL_EAUTHENTICATE, | ||
140 | OPAL_GETACL, | ||
141 | OPAL_GENKEY, | ||
142 | OPAL_REVERTSP, | ||
143 | OPAL_GET, | ||
144 | OPAL_SET, | ||
145 | OPAL_AUTHENTICATE, | ||
146 | OPAL_RANDOM, | ||
147 | OPAL_ERASE, | ||
148 | }; | ||
149 | |||
150 | enum opal_token { | ||
151 | /* Boolean */ | ||
152 | OPAL_TRUE = 0x01, | ||
153 | OPAL_FALSE = 0x00, | ||
154 | OPAL_BOOLEAN_EXPR = 0x03, | ||
155 | /* cellblocks */ | ||
156 | OPAL_TABLE = 0x00, | ||
157 | OPAL_STARTROW = 0x01, | ||
158 | OPAL_ENDROW = 0x02, | ||
159 | OPAL_STARTCOLUMN = 0x03, | ||
160 | OPAL_ENDCOLUMN = 0x04, | ||
161 | OPAL_VALUES = 0x01, | ||
162 | /* authority table */ | ||
163 | OPAL_PIN = 0x03, | ||
164 | /* locking tokens */ | ||
165 | OPAL_RANGESTART = 0x03, | ||
166 | OPAL_RANGELENGTH = 0x04, | ||
167 | OPAL_READLOCKENABLED = 0x05, | ||
168 | OPAL_WRITELOCKENABLED = 0x06, | ||
169 | OPAL_READLOCKED = 0x07, | ||
170 | OPAL_WRITELOCKED = 0x08, | ||
171 | OPAL_ACTIVEKEY = 0x0A, | ||
172 | /* locking info table */ | ||
173 | OPAL_MAXRANGES = 0x04, | ||
174 | /* mbr control */ | ||
175 | OPAL_MBRENABLE = 0x01, | ||
176 | OPAL_MBRDONE = 0x02, | ||
177 | /* properties */ | ||
178 | OPAL_HOSTPROPERTIES = 0x00, | ||
179 | /* atoms */ | ||
180 | OPAL_STARTLIST = 0xf0, | ||
181 | OPAL_ENDLIST = 0xf1, | ||
182 | OPAL_STARTNAME = 0xf2, | ||
183 | OPAL_ENDNAME = 0xf3, | ||
184 | OPAL_CALL = 0xf8, | ||
185 | OPAL_ENDOFDATA = 0xf9, | ||
186 | OPAL_ENDOFSESSION = 0xfa, | ||
187 | OPAL_STARTTRANSACTON = 0xfb, | ||
188 | OPAL_ENDTRANSACTON = 0xfC, | ||
189 | OPAL_EMPTYATOM = 0xff, | ||
190 | OPAL_WHERE = 0x00, | ||
191 | }; | ||
192 | |||
193 | /* Locking state for a locking range */ | ||
194 | enum opal_lockingstate { | ||
195 | OPAL_LOCKING_READWRITE = 0x01, | ||
196 | OPAL_LOCKING_READONLY = 0x02, | ||
197 | OPAL_LOCKING_LOCKED = 0x03, | ||
198 | }; | ||
199 | |||
200 | /* Packets derived from: | ||
201 | * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 | ||
202 | * Secion: 3.2.3 ComPackets, Packets & Subpackets | ||
203 | */ | ||
204 | |||
205 | /* Comm Packet (header) for transmissions. */ | ||
206 | struct opal_compacket { | ||
207 | __be32 reserved0; | ||
208 | u8 extendedComID[4]; | ||
209 | __be32 outstandingData; | ||
210 | __be32 minTransfer; | ||
211 | __be32 length; | ||
212 | }; | ||
213 | |||
214 | /* Packet structure. */ | ||
215 | struct opal_packet { | ||
216 | __be32 tsn; | ||
217 | __be32 hsn; | ||
218 | __be32 seq_number; | ||
219 | __be16 reserved0; | ||
220 | __be16 ack_type; | ||
221 | __be32 acknowledgment; | ||
222 | __be32 length; | ||
223 | }; | ||
224 | |||
225 | /* Data sub packet header */ | ||
226 | struct opal_data_subpacket { | ||
227 | u8 reserved0[6]; | ||
228 | __be16 kind; | ||
229 | __be32 length; | ||
230 | }; | ||
231 | |||
232 | /* header of a response */ | ||
233 | struct opal_header { | ||
234 | struct opal_compacket cp; | ||
235 | struct opal_packet pkt; | ||
236 | struct opal_data_subpacket subpkt; | ||
237 | }; | ||
238 | |||
239 | #define FC_TPER 0x0001 | ||
240 | #define FC_LOCKING 0x0002 | ||
241 | #define FC_GEOMETRY 0x0003 | ||
242 | #define FC_ENTERPRISE 0x0100 | ||
243 | #define FC_DATASTORE 0x0202 | ||
244 | #define FC_SINGLEUSER 0x0201 | ||
245 | #define FC_OPALV100 0x0200 | ||
246 | #define FC_OPALV200 0x0203 | ||
247 | |||
248 | /* | ||
249 | * The Discovery 0 Header. As defined in | ||
250 | * Opal SSC Documentation | ||
251 | * Section: 3.3.5 Capability Discovery | ||
252 | */ | ||
253 | struct d0_header { | ||
254 | __be32 length; /* the length of the header 48 in 2.00.100 */ | ||
255 | __be32 revision; /**< revision of the header 1 in 2.00.100 */ | ||
256 | __be32 reserved01; | ||
257 | __be32 reserved02; | ||
258 | /* | ||
259 | * the remainder of the structure is vendor specific and will not be | ||
260 | * addressed now | ||
261 | */ | ||
262 | u8 ignored[32]; | ||
263 | }; | ||
264 | |||
265 | /* | ||
266 | * TPer Feature Descriptor. Contains flags indicating support for the | ||
267 | * TPer features described in the OPAL specification. The names match the | ||
268 | * OPAL terminology | ||
269 | * | ||
270 | * code == 0x001 in 2.00.100 | ||
271 | */ | ||
272 | struct d0_tper_features { | ||
273 | /* | ||
274 | * supported_features bits: | ||
275 | * bit 7: reserved | ||
276 | * bit 6: com ID management | ||
277 | * bit 5: reserved | ||
278 | * bit 4: streaming support | ||
279 | * bit 3: buffer management | ||
280 | * bit 2: ACK/NACK | ||
281 | * bit 1: async | ||
282 | * bit 0: sync | ||
283 | */ | ||
284 | u8 supported_features; | ||
285 | /* | ||
286 | * bytes 5 through 15 are reserved, but we represent the first 3 as | ||
287 | * u8 to keep the other two 32bits integers aligned. | ||
288 | */ | ||
289 | u8 reserved01[3]; | ||
290 | __be32 reserved02; | ||
291 | __be32 reserved03; | ||
292 | }; | ||
293 | |||
294 | /* | ||
295 | * Locking Feature Descriptor. Contains flags indicating support for the | ||
296 | * locking features described in the OPAL specification. The names match the | ||
297 | * OPAL terminology | ||
298 | * | ||
299 | * code == 0x0002 in 2.00.100 | ||
300 | */ | ||
301 | struct d0_locking_features { | ||
302 | /* | ||
303 | * supported_features bits: | ||
304 | * bits 6-7: reserved | ||
305 | * bit 5: MBR done | ||
306 | * bit 4: MBR enabled | ||
307 | * bit 3: media encryption | ||
308 | * bit 2: locked | ||
309 | * bit 1: locking enabled | ||
310 | * bit 0: locking supported | ||
311 | */ | ||
312 | u8 supported_features; | ||
313 | /* | ||
314 | * bytes 5 through 15 are reserved, but we represent the first 3 as | ||
315 | * u8 to keep the other two 32bits integers aligned. | ||
316 | */ | ||
317 | u8 reserved01[3]; | ||
318 | __be32 reserved02; | ||
319 | __be32 reserved03; | ||
320 | }; | ||
321 | |||
322 | /* | ||
323 | * Geometry Feature Descriptor. Contains flags indicating support for the | ||
324 | * geometry features described in the OPAL specification. The names match the | ||
325 | * OPAL terminology | ||
326 | * | ||
327 | * code == 0x0003 in 2.00.100 | ||
328 | */ | ||
329 | struct d0_geometry_features { | ||
330 | /* | ||
331 | * skip 32 bits from header, needed to align the struct to 64 bits. | ||
332 | */ | ||
333 | u8 header[4]; | ||
334 | /* | ||
335 | * reserved01: | ||
336 | * bits 1-6: reserved | ||
337 | * bit 0: align | ||
338 | */ | ||
339 | u8 reserved01; | ||
340 | u8 reserved02[7]; | ||
341 | __be32 logical_block_size; | ||
342 | __be64 alignment_granularity; | ||
343 | __be64 lowest_aligned_lba; | ||
344 | }; | ||
345 | |||
346 | /* | ||
347 | * Enterprise SSC Feature | ||
348 | * | ||
349 | * code == 0x0100 | ||
350 | */ | ||
351 | struct d0_enterprise_ssc { | ||
352 | __be16 baseComID; | ||
353 | __be16 numComIDs; | ||
354 | /* range_crossing: | ||
355 | * bits 1-6: reserved | ||
356 | * bit 0: range crossing | ||
357 | */ | ||
358 | u8 range_crossing; | ||
359 | u8 reserved01; | ||
360 | __be16 reserved02; | ||
361 | __be32 reserved03; | ||
362 | __be32 reserved04; | ||
363 | }; | ||
364 | |||
365 | /* | ||
366 | * Opal V1 feature | ||
367 | * | ||
368 | * code == 0x0200 | ||
369 | */ | ||
370 | struct d0_opal_v100 { | ||
371 | __be16 baseComID; | ||
372 | __be16 numComIDs; | ||
373 | }; | ||
374 | |||
375 | /* | ||
376 | * Single User Mode feature | ||
377 | * | ||
378 | * code == 0x0201 | ||
379 | */ | ||
380 | struct d0_single_user_mode { | ||
381 | __be32 num_locking_objects; | ||
382 | /* reserved01: | ||
383 | * bit 0: any | ||
384 | * bit 1: all | ||
385 | * bit 2: policy | ||
386 | * bits 3-7: reserved | ||
387 | */ | ||
388 | u8 reserved01; | ||
389 | u8 reserved02; | ||
390 | __be16 reserved03; | ||
391 | __be32 reserved04; | ||
392 | }; | ||
393 | |||
394 | /* | ||
395 | * Additonal Datastores feature | ||
396 | * | ||
397 | * code == 0x0202 | ||
398 | */ | ||
399 | struct d0_datastore_table { | ||
400 | __be16 reserved01; | ||
401 | __be16 max_tables; | ||
402 | __be32 max_size_tables; | ||
403 | __be32 table_size_alignment; | ||
404 | }; | ||
405 | |||
406 | /* | ||
407 | * OPAL 2.0 feature | ||
408 | * | ||
409 | * code == 0x0203 | ||
410 | */ | ||
411 | struct d0_opal_v200 { | ||
412 | __be16 baseComID; | ||
413 | __be16 numComIDs; | ||
414 | /* range_crossing: | ||
415 | * bits 1-6: reserved | ||
416 | * bit 0: range crossing | ||
417 | */ | ||
418 | u8 range_crossing; | ||
419 | /* num_locking_admin_auth: | ||
420 | * not aligned to 16 bits, so use two u8. | ||
421 | * stored in big endian: | ||
422 | * 0: MSB | ||
423 | * 1: LSB | ||
424 | */ | ||
425 | u8 num_locking_admin_auth[2]; | ||
426 | /* num_locking_user_auth: | ||
427 | * not aligned to 16 bits, so use two u8. | ||
428 | * stored in big endian: | ||
429 | * 0: MSB | ||
430 | * 1: LSB | ||
431 | */ | ||
432 | u8 num_locking_user_auth[2]; | ||
433 | u8 initialPIN; | ||
434 | u8 revertedPIN; | ||
435 | u8 reserved01; | ||
436 | __be32 reserved02; | ||
437 | }; | ||
438 | |||
439 | /* Union of features used to parse the discovery 0 response */ | ||
440 | struct d0_features { | ||
441 | __be16 code; | ||
442 | /* | ||
443 | * r_version bits: | ||
444 | * bits 4-7: version | ||
445 | * bits 0-3: reserved | ||
446 | */ | ||
447 | u8 r_version; | ||
448 | u8 length; | ||
449 | u8 features[]; | ||
450 | }; | ||
451 | |||
452 | #endif /* _OPAL_PROTO_H */ | ||
diff --git a/block/partitions/efi.c b/block/partitions/efi.c index bcd86e5cd546..39f70d968754 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c | |||
@@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, | |||
293 | if (!gpt) | 293 | if (!gpt) |
294 | return NULL; | 294 | return NULL; |
295 | 295 | ||
296 | count = le32_to_cpu(gpt->num_partition_entries) * | 296 | count = (size_t)le32_to_cpu(gpt->num_partition_entries) * |
297 | le32_to_cpu(gpt->sizeof_partition_entry); | 297 | le32_to_cpu(gpt->sizeof_partition_entry); |
298 | if (!count) | 298 | if (!count) |
299 | return NULL; | 299 | return NULL; |
@@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, | |||
352 | gpt_header **gpt, gpt_entry **ptes) | 352 | gpt_header **gpt, gpt_entry **ptes) |
353 | { | 353 | { |
354 | u32 crc, origcrc; | 354 | u32 crc, origcrc; |
355 | u64 lastlba; | 355 | u64 lastlba, pt_size; |
356 | 356 | ||
357 | if (!ptes) | 357 | if (!ptes) |
358 | return 0; | 358 | return 0; |
@@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, | |||
434 | goto fail; | 434 | goto fail; |
435 | } | 435 | } |
436 | 436 | ||
437 | /* Sanity check partition table size */ | ||
438 | pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) * | ||
439 | le32_to_cpu((*gpt)->sizeof_partition_entry); | ||
440 | if (pt_size > KMALLOC_MAX_SIZE) { | ||
441 | pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n", | ||
442 | (unsigned long long)pt_size, KMALLOC_MAX_SIZE); | ||
443 | goto fail; | ||
444 | } | ||
445 | |||
437 | if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) | 446 | if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) |
438 | goto fail; | 447 | goto fail; |
439 | 448 | ||
440 | /* Check the GUID Partition Entry Array CRC */ | 449 | /* Check the GUID Partition Entry Array CRC */ |
441 | crc = efi_crc32((const unsigned char *) (*ptes), | 450 | crc = efi_crc32((const unsigned char *) (*ptes), pt_size); |
442 | le32_to_cpu((*gpt)->num_partition_entries) * | ||
443 | le32_to_cpu((*gpt)->sizeof_partition_entry)); | ||
444 | 451 | ||
445 | if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { | 452 | if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { |
446 | pr_debug("GUID Partition Entry Array CRC check failed.\n"); | 453 | pr_debug("GUID Partition Entry Array CRC check failed.\n"); |
diff --git a/block/sed-opal.c b/block/sed-opal.c new file mode 100644 index 000000000000..d1c52ba4d62d --- /dev/null +++ b/block/sed-opal.c | |||
@@ -0,0 +1,2488 @@ | |||
1 | /* | ||
2 | * Copyright © 2016 Intel Corporation | ||
3 | * | ||
4 | * Authors: | ||
5 | * Scott Bauer <scott.bauer@intel.com> | ||
6 | * Rafael Antognolli <rafael.antognolli@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #define pr_fmt(fmt) KBUILD_MODNAME ":OPAL: " fmt | ||
19 | |||
20 | #include <linux/delay.h> | ||
21 | #include <linux/device.h> | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/list.h> | ||
24 | #include <linux/genhd.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <uapi/linux/sed-opal.h> | ||
28 | #include <linux/sed-opal.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/kdev_t.h> | ||
31 | |||
32 | #include "opal_proto.h" | ||
33 | |||
34 | #define IO_BUFFER_LENGTH 2048 | ||
35 | #define MAX_TOKS 64 | ||
36 | |||
37 | typedef int (*opal_step)(struct opal_dev *dev); | ||
38 | |||
39 | enum opal_atom_width { | ||
40 | OPAL_WIDTH_TINY, | ||
41 | OPAL_WIDTH_SHORT, | ||
42 | OPAL_WIDTH_MEDIUM, | ||
43 | OPAL_WIDTH_LONG, | ||
44 | OPAL_WIDTH_TOKEN | ||
45 | }; | ||
46 | |||
47 | /* | ||
48 | * On the parsed response, we don't store again the toks that are already | ||
49 | * stored in the response buffer. Instead, for each token, we just store a | ||
50 | * pointer to the position in the buffer where the token starts, and the size | ||
51 | * of the token in bytes. | ||
52 | */ | ||
53 | struct opal_resp_tok { | ||
54 | const u8 *pos; | ||
55 | size_t len; | ||
56 | enum opal_response_token type; | ||
57 | enum opal_atom_width width; | ||
58 | union { | ||
59 | u64 u; | ||
60 | s64 s; | ||
61 | } stored; | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * From the response header it's not possible to know how many tokens there are | ||
66 | * on the payload. So we hardcode that the maximum will be MAX_TOKS, and later | ||
67 | * if we start dealing with messages that have more than that, we can increase | ||
68 | * this number. This is done to avoid having to make two passes through the | ||
69 | * response, the first one counting how many tokens we have and the second one | ||
70 | * actually storing the positions. | ||
71 | */ | ||
72 | struct parsed_resp { | ||
73 | int num; | ||
74 | struct opal_resp_tok toks[MAX_TOKS]; | ||
75 | }; | ||
76 | |||
77 | struct opal_dev { | ||
78 | bool supported; | ||
79 | |||
80 | void *data; | ||
81 | sec_send_recv *send_recv; | ||
82 | |||
83 | const opal_step *funcs; | ||
84 | void **func_data; | ||
85 | int state; | ||
86 | struct mutex dev_lock; | ||
87 | u16 comid; | ||
88 | u32 hsn; | ||
89 | u32 tsn; | ||
90 | u64 align; | ||
91 | u64 lowest_lba; | ||
92 | |||
93 | size_t pos; | ||
94 | u8 cmd[IO_BUFFER_LENGTH]; | ||
95 | u8 resp[IO_BUFFER_LENGTH]; | ||
96 | |||
97 | struct parsed_resp parsed; | ||
98 | size_t prev_d_len; | ||
99 | void *prev_data; | ||
100 | |||
101 | struct list_head unlk_lst; | ||
102 | }; | ||
103 | |||
104 | |||
105 | static const u8 opaluid[][OPAL_UID_LENGTH] = { | ||
106 | /* users */ | ||
107 | [OPAL_SMUID_UID] = | ||
108 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff }, | ||
109 | [OPAL_THISSP_UID] = | ||
110 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }, | ||
111 | [OPAL_ADMINSP_UID] = | ||
112 | { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x01 }, | ||
113 | [OPAL_LOCKINGSP_UID] = | ||
114 | { 0x00, 0x00, 0x02, 0x05, 0x00, 0x00, 0x00, 0x02 }, | ||
115 | [OPAL_ENTERPRISE_LOCKINGSP_UID] = | ||
116 | { 0x00, 0x00, 0x02, 0x05, 0x00, 0x01, 0x00, 0x01 }, | ||
117 | [OPAL_ANYBODY_UID] = | ||
118 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01 }, | ||
119 | [OPAL_SID_UID] = | ||
120 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06 }, | ||
121 | [OPAL_ADMIN1_UID] = | ||
122 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x00, 0x01 }, | ||
123 | [OPAL_USER1_UID] = | ||
124 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x01 }, | ||
125 | [OPAL_USER2_UID] = | ||
126 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x03, 0x00, 0x02 }, | ||
127 | [OPAL_PSID_UID] = | ||
128 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0xff, 0x01 }, | ||
129 | [OPAL_ENTERPRISE_BANDMASTER0_UID] = | ||
130 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x80, 0x01 }, | ||
131 | [OPAL_ENTERPRISE_ERASEMASTER_UID] = | ||
132 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 }, | ||
133 | |||
134 | /* tables */ | ||
135 | |||
136 | [OPAL_LOCKINGRANGE_GLOBAL] = | ||
137 | { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 }, | ||
138 | [OPAL_LOCKINGRANGE_ACE_RDLOCKED] = | ||
139 | { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE0, 0x01 }, | ||
140 | [OPAL_LOCKINGRANGE_ACE_WRLOCKED] = | ||
141 | { 0x00, 0x00, 0x00, 0x08, 0x00, 0x03, 0xE8, 0x01 }, | ||
142 | [OPAL_MBRCONTROL] = | ||
143 | { 0x00, 0x00, 0x08, 0x03, 0x00, 0x00, 0x00, 0x01 }, | ||
144 | [OPAL_MBR] = | ||
145 | { 0x00, 0x00, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00 }, | ||
146 | [OPAL_AUTHORITY_TABLE] = | ||
147 | { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00}, | ||
148 | [OPAL_C_PIN_TABLE] = | ||
149 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x00}, | ||
150 | [OPAL_LOCKING_INFO_TABLE] = | ||
151 | { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 }, | ||
152 | [OPAL_ENTERPRISE_LOCKING_INFO_TABLE] = | ||
153 | { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 }, | ||
154 | |||
155 | /* C_PIN_TABLE object ID's */ | ||
156 | |||
157 | [OPAL_C_PIN_MSID] = | ||
158 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x84, 0x02}, | ||
159 | [OPAL_C_PIN_SID] = | ||
160 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01}, | ||
161 | [OPAL_C_PIN_ADMIN1] = | ||
162 | { 0x00, 0x00, 0x00, 0x0B, 0x00, 0x01, 0x00, 0x01}, | ||
163 | |||
164 | /* half UID's (only first 4 bytes used) */ | ||
165 | |||
166 | [OPAL_HALF_UID_AUTHORITY_OBJ_REF] = | ||
167 | { 0x00, 0x00, 0x0C, 0x05, 0xff, 0xff, 0xff, 0xff }, | ||
168 | [OPAL_HALF_UID_BOOLEAN_ACE] = | ||
169 | { 0x00, 0x00, 0x04, 0x0E, 0xff, 0xff, 0xff, 0xff }, | ||
170 | |||
171 | /* special value for omitted optional parameter */ | ||
172 | [OPAL_UID_HEXFF] = | ||
173 | { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, | ||
174 | }; | ||
175 | |||
176 | /* | ||
177 | * TCG Storage SSC Methods. | ||
178 | * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 | ||
179 | * Section: 6.3 Assigned UIDs | ||
180 | */ | ||
181 | static const u8 opalmethod[][OPAL_UID_LENGTH] = { | ||
182 | [OPAL_PROPERTIES] = | ||
183 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01 }, | ||
184 | [OPAL_STARTSESSION] = | ||
185 | { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x02 }, | ||
186 | [OPAL_REVERT] = | ||
187 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x02 }, | ||
188 | [OPAL_ACTIVATE] = | ||
189 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x02, 0x03 }, | ||
190 | [OPAL_EGET] = | ||
191 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x06 }, | ||
192 | [OPAL_ESET] = | ||
193 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x07 }, | ||
194 | [OPAL_NEXT] = | ||
195 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08 }, | ||
196 | [OPAL_EAUTHENTICATE] = | ||
197 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c }, | ||
198 | [OPAL_GETACL] = | ||
199 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0d }, | ||
200 | [OPAL_GENKEY] = | ||
201 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x10 }, | ||
202 | [OPAL_REVERTSP] = | ||
203 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11 }, | ||
204 | [OPAL_GET] = | ||
205 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16 }, | ||
206 | [OPAL_SET] = | ||
207 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17 }, | ||
208 | [OPAL_AUTHENTICATE] = | ||
209 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1c }, | ||
210 | [OPAL_RANDOM] = | ||
211 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x06, 0x01 }, | ||
212 | [OPAL_ERASE] = | ||
213 | { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x08, 0x03 }, | ||
214 | }; | ||
215 | |||
216 | typedef int (cont_fn)(struct opal_dev *dev); | ||
217 | |||
218 | static int end_opal_session_error(struct opal_dev *dev); | ||
219 | |||
220 | struct opal_suspend_data { | ||
221 | struct opal_lock_unlock unlk; | ||
222 | u8 lr; | ||
223 | struct list_head node; | ||
224 | }; | ||
225 | |||
226 | /* | ||
227 | * Derived from: | ||
228 | * TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 | ||
229 | * Section: 5.1.5 Method Status Codes | ||
230 | */ | ||
231 | static const char * const opal_errors[] = { | ||
232 | "Success", | ||
233 | "Not Authorized", | ||
234 | "Unknown Error", | ||
235 | "SP Busy", | ||
236 | "SP Failed", | ||
237 | "SP Disabled", | ||
238 | "SP Frozen", | ||
239 | "No Sessions Available", | ||
240 | "Uniqueness Conflict", | ||
241 | "Insufficient Space", | ||
242 | "Insufficient Rows", | ||
243 | "Invalid Function", | ||
244 | "Invalid Parameter", | ||
245 | "Invalid Reference", | ||
246 | "Unknown Error", | ||
247 | "TPER Malfunction", | ||
248 | "Transaction Failure", | ||
249 | "Response Overflow", | ||
250 | "Authority Locked Out", | ||
251 | }; | ||
252 | |||
253 | static const char *opal_error_to_human(int error) | ||
254 | { | ||
255 | if (error == 0x3f) | ||
256 | return "Failed"; | ||
257 | |||
258 | if (error >= ARRAY_SIZE(opal_errors) || error < 0) | ||
259 | return "Unknown Error"; | ||
260 | |||
261 | return opal_errors[error]; | ||
262 | } | ||
263 | |||
264 | static void print_buffer(const u8 *ptr, u32 length) | ||
265 | { | ||
266 | #ifdef DEBUG | ||
267 | print_hex_dump_bytes("OPAL: ", DUMP_PREFIX_OFFSET, ptr, length); | ||
268 | pr_debug("\n"); | ||
269 | #endif | ||
270 | } | ||
271 | |||
272 | static bool check_tper(const void *data) | ||
273 | { | ||
274 | const struct d0_tper_features *tper = data; | ||
275 | u8 flags = tper->supported_features; | ||
276 | |||
277 | if (!(flags & TPER_SYNC_SUPPORTED)) { | ||
278 | pr_err("TPer sync not supported. flags = %d\n", | ||
279 | tper->supported_features); | ||
280 | return false; | ||
281 | } | ||
282 | |||
283 | return true; | ||
284 | } | ||
285 | |||
286 | static bool check_sum(const void *data) | ||
287 | { | ||
288 | const struct d0_single_user_mode *sum = data; | ||
289 | u32 nlo = be32_to_cpu(sum->num_locking_objects); | ||
290 | |||
291 | if (nlo == 0) { | ||
292 | pr_err("Need at least one locking object.\n"); | ||
293 | return false; | ||
294 | } | ||
295 | |||
296 | pr_debug("Number of locking objects: %d\n", nlo); | ||
297 | |||
298 | return true; | ||
299 | } | ||
300 | |||
301 | static u16 get_comid_v100(const void *data) | ||
302 | { | ||
303 | const struct d0_opal_v100 *v100 = data; | ||
304 | |||
305 | return be16_to_cpu(v100->baseComID); | ||
306 | } | ||
307 | |||
308 | static u16 get_comid_v200(const void *data) | ||
309 | { | ||
310 | const struct d0_opal_v200 *v200 = data; | ||
311 | |||
312 | return be16_to_cpu(v200->baseComID); | ||
313 | } | ||
314 | |||
315 | static int opal_send_cmd(struct opal_dev *dev) | ||
316 | { | ||
317 | return dev->send_recv(dev->data, dev->comid, TCG_SECP_01, | ||
318 | dev->cmd, IO_BUFFER_LENGTH, | ||
319 | true); | ||
320 | } | ||
321 | |||
322 | static int opal_recv_cmd(struct opal_dev *dev) | ||
323 | { | ||
324 | return dev->send_recv(dev->data, dev->comid, TCG_SECP_01, | ||
325 | dev->resp, IO_BUFFER_LENGTH, | ||
326 | false); | ||
327 | } | ||
328 | |||
329 | static int opal_recv_check(struct opal_dev *dev) | ||
330 | { | ||
331 | size_t buflen = IO_BUFFER_LENGTH; | ||
332 | void *buffer = dev->resp; | ||
333 | struct opal_header *hdr = buffer; | ||
334 | int ret; | ||
335 | |||
336 | do { | ||
337 | pr_debug("Sent OPAL command: outstanding=%d, minTransfer=%d\n", | ||
338 | hdr->cp.outstandingData, | ||
339 | hdr->cp.minTransfer); | ||
340 | |||
341 | if (hdr->cp.outstandingData == 0 || | ||
342 | hdr->cp.minTransfer != 0) | ||
343 | return 0; | ||
344 | |||
345 | memset(buffer, 0, buflen); | ||
346 | ret = opal_recv_cmd(dev); | ||
347 | } while (!ret); | ||
348 | |||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | static int opal_send_recv(struct opal_dev *dev, cont_fn *cont) | ||
353 | { | ||
354 | int ret; | ||
355 | |||
356 | ret = opal_send_cmd(dev); | ||
357 | if (ret) | ||
358 | return ret; | ||
359 | ret = opal_recv_cmd(dev); | ||
360 | if (ret) | ||
361 | return ret; | ||
362 | ret = opal_recv_check(dev); | ||
363 | if (ret) | ||
364 | return ret; | ||
365 | return cont(dev); | ||
366 | } | ||
367 | |||
368 | static void check_geometry(struct opal_dev *dev, const void *data) | ||
369 | { | ||
370 | const struct d0_geometry_features *geo = data; | ||
371 | |||
372 | dev->align = geo->alignment_granularity; | ||
373 | dev->lowest_lba = geo->lowest_aligned_lba; | ||
374 | } | ||
375 | |||
376 | static int next(struct opal_dev *dev) | ||
377 | { | ||
378 | opal_step func; | ||
379 | int error = 0; | ||
380 | |||
381 | do { | ||
382 | func = dev->funcs[dev->state]; | ||
383 | if (!func) | ||
384 | break; | ||
385 | |||
386 | error = func(dev); | ||
387 | if (error) { | ||
388 | pr_err("Error on step function: %d with error %d: %s\n", | ||
389 | dev->state, error, | ||
390 | opal_error_to_human(error)); | ||
391 | |||
392 | /* For each OPAL command we do a discovery0 then we | ||
393 | * start some sort of session. | ||
394 | * If we haven't passed state 1 then there was an error | ||
395 | * on discovery0 or during the attempt to start a | ||
396 | * session. Therefore we shouldn't attempt to terminate | ||
397 | * a session, as one has not yet been created. | ||
398 | */ | ||
399 | if (dev->state > 1) | ||
400 | return end_opal_session_error(dev); | ||
401 | } | ||
402 | dev->state++; | ||
403 | } while (!error); | ||
404 | |||
405 | return error; | ||
406 | } | ||
407 | |||
408 | static int opal_discovery0_end(struct opal_dev *dev) | ||
409 | { | ||
410 | bool found_com_id = false, supported = true, single_user = false; | ||
411 | const struct d0_header *hdr = (struct d0_header *)dev->resp; | ||
412 | const u8 *epos = dev->resp, *cpos = dev->resp; | ||
413 | u16 comid = 0; | ||
414 | |||
415 | print_buffer(dev->resp, be32_to_cpu(hdr->length)); | ||
416 | |||
417 | epos += be32_to_cpu(hdr->length); /* end of buffer */ | ||
418 | cpos += sizeof(*hdr); /* current position on buffer */ | ||
419 | |||
420 | while (cpos < epos && supported) { | ||
421 | const struct d0_features *body = | ||
422 | (const struct d0_features *)cpos; | ||
423 | |||
424 | switch (be16_to_cpu(body->code)) { | ||
425 | case FC_TPER: | ||
426 | supported = check_tper(body->features); | ||
427 | break; | ||
428 | case FC_SINGLEUSER: | ||
429 | single_user = check_sum(body->features); | ||
430 | break; | ||
431 | case FC_GEOMETRY: | ||
432 | check_geometry(dev, body); | ||
433 | break; | ||
434 | case FC_LOCKING: | ||
435 | case FC_ENTERPRISE: | ||
436 | case FC_DATASTORE: | ||
437 | /* some ignored properties */ | ||
438 | pr_debug("Found OPAL feature description: %d\n", | ||
439 | be16_to_cpu(body->code)); | ||
440 | break; | ||
441 | case FC_OPALV100: | ||
442 | comid = get_comid_v100(body->features); | ||
443 | found_com_id = true; | ||
444 | break; | ||
445 | case FC_OPALV200: | ||
446 | comid = get_comid_v200(body->features); | ||
447 | found_com_id = true; | ||
448 | break; | ||
449 | case 0xbfff ... 0xffff: | ||
450 | /* vendor specific, just ignore */ | ||
451 | break; | ||
452 | default: | ||
453 | pr_debug("OPAL Unknown feature: %d\n", | ||
454 | be16_to_cpu(body->code)); | ||
455 | |||
456 | } | ||
457 | cpos += body->length + 4; | ||
458 | } | ||
459 | |||
460 | if (!supported) { | ||
461 | pr_debug("This device is not Opal enabled. Not Supported!\n"); | ||
462 | return -EOPNOTSUPP; | ||
463 | } | ||
464 | |||
465 | if (!single_user) | ||
466 | pr_debug("Device doesn't support single user mode\n"); | ||
467 | |||
468 | |||
469 | if (!found_com_id) { | ||
470 | pr_debug("Could not find OPAL comid for device. Returning early\n"); | ||
471 | return -EOPNOTSUPP;; | ||
472 | } | ||
473 | |||
474 | dev->comid = comid; | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static int opal_discovery0(struct opal_dev *dev) | ||
480 | { | ||
481 | int ret; | ||
482 | |||
483 | memset(dev->resp, 0, IO_BUFFER_LENGTH); | ||
484 | dev->comid = OPAL_DISCOVERY_COMID; | ||
485 | ret = opal_recv_cmd(dev); | ||
486 | if (ret) | ||
487 | return ret; | ||
488 | return opal_discovery0_end(dev); | ||
489 | } | ||
490 | |||
491 | static void add_token_u8(int *err, struct opal_dev *cmd, u8 tok) | ||
492 | { | ||
493 | if (*err) | ||
494 | return; | ||
495 | if (cmd->pos >= IO_BUFFER_LENGTH - 1) { | ||
496 | pr_err("Error adding u8: end of buffer.\n"); | ||
497 | *err = -ERANGE; | ||
498 | return; | ||
499 | } | ||
500 | cmd->cmd[cmd->pos++] = tok; | ||
501 | } | ||
502 | |||
503 | static void add_short_atom_header(struct opal_dev *cmd, bool bytestring, | ||
504 | bool has_sign, int len) | ||
505 | { | ||
506 | u8 atom; | ||
507 | int err = 0; | ||
508 | |||
509 | atom = SHORT_ATOM_ID; | ||
510 | atom |= bytestring ? SHORT_ATOM_BYTESTRING : 0; | ||
511 | atom |= has_sign ? SHORT_ATOM_SIGNED : 0; | ||
512 | atom |= len & SHORT_ATOM_LEN_MASK; | ||
513 | |||
514 | add_token_u8(&err, cmd, atom); | ||
515 | } | ||
516 | |||
517 | static void add_medium_atom_header(struct opal_dev *cmd, bool bytestring, | ||
518 | bool has_sign, int len) | ||
519 | { | ||
520 | u8 header0; | ||
521 | |||
522 | header0 = MEDIUM_ATOM_ID; | ||
523 | header0 |= bytestring ? MEDIUM_ATOM_BYTESTRING : 0; | ||
524 | header0 |= has_sign ? MEDIUM_ATOM_SIGNED : 0; | ||
525 | header0 |= (len >> 8) & MEDIUM_ATOM_LEN_MASK; | ||
526 | cmd->cmd[cmd->pos++] = header0; | ||
527 | cmd->cmd[cmd->pos++] = len; | ||
528 | } | ||
529 | |||
530 | static void add_token_u64(int *err, struct opal_dev *cmd, u64 number) | ||
531 | { | ||
532 | |||
533 | size_t len; | ||
534 | int msb; | ||
535 | u8 n; | ||
536 | |||
537 | if (!(number & ~TINY_ATOM_DATA_MASK)) { | ||
538 | add_token_u8(err, cmd, number); | ||
539 | return; | ||
540 | } | ||
541 | |||
542 | msb = fls(number); | ||
543 | len = DIV_ROUND_UP(msb, 4); | ||
544 | |||
545 | if (cmd->pos >= IO_BUFFER_LENGTH - len - 1) { | ||
546 | pr_err("Error adding u64: end of buffer.\n"); | ||
547 | *err = -ERANGE; | ||
548 | return; | ||
549 | } | ||
550 | add_short_atom_header(cmd, false, false, len); | ||
551 | while (len--) { | ||
552 | n = number >> (len * 8); | ||
553 | add_token_u8(err, cmd, n); | ||
554 | } | ||
555 | } | ||
556 | |||
557 | static void add_token_bytestring(int *err, struct opal_dev *cmd, | ||
558 | const u8 *bytestring, size_t len) | ||
559 | { | ||
560 | size_t header_len = 1; | ||
561 | bool is_short_atom = true; | ||
562 | |||
563 | if (*err) | ||
564 | return; | ||
565 | |||
566 | if (len & ~SHORT_ATOM_LEN_MASK) { | ||
567 | header_len = 2; | ||
568 | is_short_atom = false; | ||
569 | } | ||
570 | |||
571 | if (len >= IO_BUFFER_LENGTH - cmd->pos - header_len) { | ||
572 | pr_err("Error adding bytestring: end of buffer.\n"); | ||
573 | *err = -ERANGE; | ||
574 | return; | ||
575 | } | ||
576 | |||
577 | if (is_short_atom) | ||
578 | add_short_atom_header(cmd, true, false, len); | ||
579 | else | ||
580 | add_medium_atom_header(cmd, true, false, len); | ||
581 | |||
582 | memcpy(&cmd->cmd[cmd->pos], bytestring, len); | ||
583 | cmd->pos += len; | ||
584 | |||
585 | } | ||
586 | |||
587 | static int build_locking_range(u8 *buffer, size_t length, u8 lr) | ||
588 | { | ||
589 | if (length > OPAL_UID_LENGTH) { | ||
590 | pr_err("Can't build locking range. Length OOB\n"); | ||
591 | return -ERANGE; | ||
592 | } | ||
593 | |||
594 | memcpy(buffer, opaluid[OPAL_LOCKINGRANGE_GLOBAL], OPAL_UID_LENGTH); | ||
595 | |||
596 | if (lr == 0) | ||
597 | return 0; | ||
598 | buffer[5] = LOCKING_RANGE_NON_GLOBAL; | ||
599 | buffer[7] = lr; | ||
600 | |||
601 | return 0; | ||
602 | } | ||
603 | |||
604 | static int build_locking_user(u8 *buffer, size_t length, u8 lr) | ||
605 | { | ||
606 | if (length > OPAL_UID_LENGTH) { | ||
607 | pr_err("Can't build locking range user, Length OOB\n"); | ||
608 | return -ERANGE; | ||
609 | } | ||
610 | |||
611 | memcpy(buffer, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); | ||
612 | |||
613 | buffer[7] = lr + 1; | ||
614 | |||
615 | return 0; | ||
616 | } | ||
617 | |||
618 | static void set_comid(struct opal_dev *cmd, u16 comid) | ||
619 | { | ||
620 | struct opal_header *hdr = (struct opal_header *)cmd->cmd; | ||
621 | |||
622 | hdr->cp.extendedComID[0] = comid >> 8; | ||
623 | hdr->cp.extendedComID[1] = comid; | ||
624 | hdr->cp.extendedComID[2] = 0; | ||
625 | hdr->cp.extendedComID[3] = 0; | ||
626 | } | ||
627 | |||
628 | static int cmd_finalize(struct opal_dev *cmd, u32 hsn, u32 tsn) | ||
629 | { | ||
630 | struct opal_header *hdr; | ||
631 | int err = 0; | ||
632 | |||
633 | add_token_u8(&err, cmd, OPAL_ENDOFDATA); | ||
634 | add_token_u8(&err, cmd, OPAL_STARTLIST); | ||
635 | add_token_u8(&err, cmd, 0); | ||
636 | add_token_u8(&err, cmd, 0); | ||
637 | add_token_u8(&err, cmd, 0); | ||
638 | add_token_u8(&err, cmd, OPAL_ENDLIST); | ||
639 | |||
640 | if (err) { | ||
641 | pr_err("Error finalizing command.\n"); | ||
642 | return -EFAULT; | ||
643 | } | ||
644 | |||
645 | hdr = (struct opal_header *) cmd->cmd; | ||
646 | |||
647 | hdr->pkt.tsn = cpu_to_be32(tsn); | ||
648 | hdr->pkt.hsn = cpu_to_be32(hsn); | ||
649 | |||
650 | hdr->subpkt.length = cpu_to_be32(cmd->pos - sizeof(*hdr)); | ||
651 | while (cmd->pos % 4) { | ||
652 | if (cmd->pos >= IO_BUFFER_LENGTH) { | ||
653 | pr_err("Error: Buffer overrun\n"); | ||
654 | return -ERANGE; | ||
655 | } | ||
656 | cmd->cmd[cmd->pos++] = 0; | ||
657 | } | ||
658 | hdr->pkt.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp) - | ||
659 | sizeof(hdr->pkt)); | ||
660 | hdr->cp.length = cpu_to_be32(cmd->pos - sizeof(hdr->cp)); | ||
661 | |||
662 | return 0; | ||
663 | } | ||
664 | |||
665 | static enum opal_response_token token_type(const struct parsed_resp *resp, | ||
666 | int n) | ||
667 | { | ||
668 | const struct opal_resp_tok *tok; | ||
669 | |||
670 | if (n >= resp->num) { | ||
671 | pr_err("Token number doesn't exist: %d, resp: %d\n", | ||
672 | n, resp->num); | ||
673 | return OPAL_DTA_TOKENID_INVALID; | ||
674 | } | ||
675 | |||
676 | tok = &resp->toks[n]; | ||
677 | if (tok->len == 0) { | ||
678 | pr_err("Token length must be non-zero\n"); | ||
679 | return OPAL_DTA_TOKENID_INVALID; | ||
680 | } | ||
681 | |||
682 | return tok->type; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * This function returns 0 in case of invalid token. One should call | ||
687 | * token_type() first to find out if the token is valid or not. | ||
688 | */ | ||
689 | static enum opal_token response_get_token(const struct parsed_resp *resp, | ||
690 | int n) | ||
691 | { | ||
692 | const struct opal_resp_tok *tok; | ||
693 | |||
694 | if (n >= resp->num) { | ||
695 | pr_err("Token number doesn't exist: %d, resp: %d\n", | ||
696 | n, resp->num); | ||
697 | return 0; | ||
698 | } | ||
699 | |||
700 | tok = &resp->toks[n]; | ||
701 | if (tok->len == 0) { | ||
702 | pr_err("Token length must be non-zero\n"); | ||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | return tok->pos[0]; | ||
707 | } | ||
708 | |||
709 | static size_t response_parse_tiny(struct opal_resp_tok *tok, | ||
710 | const u8 *pos) | ||
711 | { | ||
712 | tok->pos = pos; | ||
713 | tok->len = 1; | ||
714 | tok->width = OPAL_WIDTH_TINY; | ||
715 | |||
716 | if (pos[0] & TINY_ATOM_SIGNED) { | ||
717 | tok->type = OPAL_DTA_TOKENID_SINT; | ||
718 | } else { | ||
719 | tok->type = OPAL_DTA_TOKENID_UINT; | ||
720 | tok->stored.u = pos[0] & 0x3f; | ||
721 | } | ||
722 | |||
723 | return tok->len; | ||
724 | } | ||
725 | |||
726 | static size_t response_parse_short(struct opal_resp_tok *tok, | ||
727 | const u8 *pos) | ||
728 | { | ||
729 | tok->pos = pos; | ||
730 | tok->len = (pos[0] & SHORT_ATOM_LEN_MASK) + 1; | ||
731 | tok->width = OPAL_WIDTH_SHORT; | ||
732 | |||
733 | if (pos[0] & SHORT_ATOM_BYTESTRING) { | ||
734 | tok->type = OPAL_DTA_TOKENID_BYTESTRING; | ||
735 | } else if (pos[0] & SHORT_ATOM_SIGNED) { | ||
736 | tok->type = OPAL_DTA_TOKENID_SINT; | ||
737 | } else { | ||
738 | u64 u_integer = 0; | ||
739 | int i, b = 0; | ||
740 | |||
741 | tok->type = OPAL_DTA_TOKENID_UINT; | ||
742 | if (tok->len > 9) { | ||
743 | pr_warn("uint64 with more than 8 bytes\n"); | ||
744 | return -EINVAL; | ||
745 | } | ||
746 | for (i = tok->len - 1; i > 0; i--) { | ||
747 | u_integer |= ((u64)pos[i] << (8 * b)); | ||
748 | b++; | ||
749 | } | ||
750 | tok->stored.u = u_integer; | ||
751 | } | ||
752 | |||
753 | return tok->len; | ||
754 | } | ||
755 | |||
756 | static size_t response_parse_medium(struct opal_resp_tok *tok, | ||
757 | const u8 *pos) | ||
758 | { | ||
759 | tok->pos = pos; | ||
760 | tok->len = (((pos[0] & MEDIUM_ATOM_LEN_MASK) << 8) | pos[1]) + 2; | ||
761 | tok->width = OPAL_WIDTH_MEDIUM; | ||
762 | |||
763 | if (pos[0] & MEDIUM_ATOM_BYTESTRING) | ||
764 | tok->type = OPAL_DTA_TOKENID_BYTESTRING; | ||
765 | else if (pos[0] & MEDIUM_ATOM_SIGNED) | ||
766 | tok->type = OPAL_DTA_TOKENID_SINT; | ||
767 | else | ||
768 | tok->type = OPAL_DTA_TOKENID_UINT; | ||
769 | |||
770 | return tok->len; | ||
771 | } | ||
772 | |||
773 | static size_t response_parse_long(struct opal_resp_tok *tok, | ||
774 | const u8 *pos) | ||
775 | { | ||
776 | tok->pos = pos; | ||
777 | tok->len = ((pos[1] << 16) | (pos[2] << 8) | pos[3]) + 4; | ||
778 | tok->width = OPAL_WIDTH_LONG; | ||
779 | |||
780 | if (pos[0] & LONG_ATOM_BYTESTRING) | ||
781 | tok->type = OPAL_DTA_TOKENID_BYTESTRING; | ||
782 | else if (pos[0] & LONG_ATOM_SIGNED) | ||
783 | tok->type = OPAL_DTA_TOKENID_SINT; | ||
784 | else | ||
785 | tok->type = OPAL_DTA_TOKENID_UINT; | ||
786 | |||
787 | return tok->len; | ||
788 | } | ||
789 | |||
790 | static size_t response_parse_token(struct opal_resp_tok *tok, | ||
791 | const u8 *pos) | ||
792 | { | ||
793 | tok->pos = pos; | ||
794 | tok->len = 1; | ||
795 | tok->type = OPAL_DTA_TOKENID_TOKEN; | ||
796 | tok->width = OPAL_WIDTH_TOKEN; | ||
797 | |||
798 | return tok->len; | ||
799 | } | ||
800 | |||
801 | static int response_parse(const u8 *buf, size_t length, | ||
802 | struct parsed_resp *resp) | ||
803 | { | ||
804 | const struct opal_header *hdr; | ||
805 | struct opal_resp_tok *iter; | ||
806 | int num_entries = 0; | ||
807 | int total; | ||
808 | size_t token_length; | ||
809 | const u8 *pos; | ||
810 | |||
811 | if (!buf) | ||
812 | return -EFAULT; | ||
813 | |||
814 | if (!resp) | ||
815 | return -EFAULT; | ||
816 | |||
817 | hdr = (struct opal_header *)buf; | ||
818 | pos = buf; | ||
819 | pos += sizeof(*hdr); | ||
820 | |||
821 | pr_debug("Response size: cp: %d, pkt: %d, subpkt: %d\n", | ||
822 | be32_to_cpu(hdr->cp.length), | ||
823 | be32_to_cpu(hdr->pkt.length), | ||
824 | be32_to_cpu(hdr->subpkt.length)); | ||
825 | |||
826 | if (hdr->cp.length == 0 || hdr->pkt.length == 0 || | ||
827 | hdr->subpkt.length == 0) { | ||
828 | pr_err("Bad header length. cp: %d, pkt: %d, subpkt: %d\n", | ||
829 | be32_to_cpu(hdr->cp.length), | ||
830 | be32_to_cpu(hdr->pkt.length), | ||
831 | be32_to_cpu(hdr->subpkt.length)); | ||
832 | print_buffer(pos, sizeof(*hdr)); | ||
833 | return -EINVAL; | ||
834 | } | ||
835 | |||
836 | if (pos > buf + length) | ||
837 | return -EFAULT; | ||
838 | |||
839 | iter = resp->toks; | ||
840 | total = be32_to_cpu(hdr->subpkt.length); | ||
841 | print_buffer(pos, total); | ||
842 | while (total > 0) { | ||
843 | if (pos[0] <= TINY_ATOM_BYTE) /* tiny atom */ | ||
844 | token_length = response_parse_tiny(iter, pos); | ||
845 | else if (pos[0] <= SHORT_ATOM_BYTE) /* short atom */ | ||
846 | token_length = response_parse_short(iter, pos); | ||
847 | else if (pos[0] <= MEDIUM_ATOM_BYTE) /* medium atom */ | ||
848 | token_length = response_parse_medium(iter, pos); | ||
849 | else if (pos[0] <= LONG_ATOM_BYTE) /* long atom */ | ||
850 | token_length = response_parse_long(iter, pos); | ||
851 | else /* TOKEN */ | ||
852 | token_length = response_parse_token(iter, pos); | ||
853 | |||
854 | if (token_length == -EINVAL) | ||
855 | return -EINVAL; | ||
856 | |||
857 | pos += token_length; | ||
858 | total -= token_length; | ||
859 | iter++; | ||
860 | num_entries++; | ||
861 | } | ||
862 | |||
863 | if (num_entries == 0) { | ||
864 | pr_err("Couldn't parse response.\n"); | ||
865 | return -EINVAL; | ||
866 | } | ||
867 | resp->num = num_entries; | ||
868 | |||
869 | return 0; | ||
870 | } | ||
871 | |||
872 | static size_t response_get_string(const struct parsed_resp *resp, int n, | ||
873 | const char **store) | ||
874 | { | ||
875 | *store = NULL; | ||
876 | if (!resp) { | ||
877 | pr_err("Response is NULL\n"); | ||
878 | return 0; | ||
879 | } | ||
880 | |||
881 | if (n > resp->num) { | ||
882 | pr_err("Response has %d tokens. Can't access %d\n", | ||
883 | resp->num, n); | ||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | if (resp->toks[n].type != OPAL_DTA_TOKENID_BYTESTRING) { | ||
888 | pr_err("Token is not a byte string!\n"); | ||
889 | return 0; | ||
890 | } | ||
891 | |||
892 | *store = resp->toks[n].pos + 1; | ||
893 | return resp->toks[n].len - 1; | ||
894 | } | ||
895 | |||
896 | static u64 response_get_u64(const struct parsed_resp *resp, int n) | ||
897 | { | ||
898 | if (!resp) { | ||
899 | pr_err("Response is NULL\n"); | ||
900 | return 0; | ||
901 | } | ||
902 | |||
903 | if (n > resp->num) { | ||
904 | pr_err("Response has %d tokens. Can't access %d\n", | ||
905 | resp->num, n); | ||
906 | return 0; | ||
907 | } | ||
908 | |||
909 | if (resp->toks[n].type != OPAL_DTA_TOKENID_UINT) { | ||
910 | pr_err("Token is not unsigned it: %d\n", | ||
911 | resp->toks[n].type); | ||
912 | return 0; | ||
913 | } | ||
914 | |||
915 | if (!(resp->toks[n].width == OPAL_WIDTH_TINY || | ||
916 | resp->toks[n].width == OPAL_WIDTH_SHORT)) { | ||
917 | pr_err("Atom is not short or tiny: %d\n", | ||
918 | resp->toks[n].width); | ||
919 | return 0; | ||
920 | } | ||
921 | |||
922 | return resp->toks[n].stored.u; | ||
923 | } | ||
924 | |||
925 | static u8 response_status(const struct parsed_resp *resp) | ||
926 | { | ||
927 | if (token_type(resp, 0) == OPAL_DTA_TOKENID_TOKEN && | ||
928 | response_get_token(resp, 0) == OPAL_ENDOFSESSION) { | ||
929 | return 0; | ||
930 | } | ||
931 | |||
932 | if (resp->num < 5) | ||
933 | return DTAERROR_NO_METHOD_STATUS; | ||
934 | |||
935 | if (token_type(resp, resp->num - 1) != OPAL_DTA_TOKENID_TOKEN || | ||
936 | token_type(resp, resp->num - 5) != OPAL_DTA_TOKENID_TOKEN || | ||
937 | response_get_token(resp, resp->num - 1) != OPAL_ENDLIST || | ||
938 | response_get_token(resp, resp->num - 5) != OPAL_STARTLIST) | ||
939 | return DTAERROR_NO_METHOD_STATUS; | ||
940 | |||
941 | return response_get_u64(resp, resp->num - 4); | ||
942 | } | ||
943 | |||
944 | /* Parses and checks for errors */ | ||
945 | static int parse_and_check_status(struct opal_dev *dev) | ||
946 | { | ||
947 | int error; | ||
948 | |||
949 | print_buffer(dev->cmd, dev->pos); | ||
950 | |||
951 | error = response_parse(dev->resp, IO_BUFFER_LENGTH, &dev->parsed); | ||
952 | if (error) { | ||
953 | pr_err("Couldn't parse response.\n"); | ||
954 | return error; | ||
955 | } | ||
956 | |||
957 | return response_status(&dev->parsed); | ||
958 | } | ||
959 | |||
960 | static void clear_opal_cmd(struct opal_dev *dev) | ||
961 | { | ||
962 | dev->pos = sizeof(struct opal_header); | ||
963 | memset(dev->cmd, 0, IO_BUFFER_LENGTH); | ||
964 | } | ||
965 | |||
966 | static int start_opal_session_cont(struct opal_dev *dev) | ||
967 | { | ||
968 | u32 hsn, tsn; | ||
969 | int error = 0; | ||
970 | |||
971 | error = parse_and_check_status(dev); | ||
972 | if (error) | ||
973 | return error; | ||
974 | |||
975 | hsn = response_get_u64(&dev->parsed, 4); | ||
976 | tsn = response_get_u64(&dev->parsed, 5); | ||
977 | |||
978 | if (hsn == 0 && tsn == 0) { | ||
979 | pr_err("Couldn't authenticate session\n"); | ||
980 | return -EPERM; | ||
981 | } | ||
982 | |||
983 | dev->hsn = hsn; | ||
984 | dev->tsn = tsn; | ||
985 | return 0; | ||
986 | } | ||
987 | |||
988 | static void add_suspend_info(struct opal_dev *dev, | ||
989 | struct opal_suspend_data *sus) | ||
990 | { | ||
991 | struct opal_suspend_data *iter; | ||
992 | |||
993 | list_for_each_entry(iter, &dev->unlk_lst, node) { | ||
994 | if (iter->lr == sus->lr) { | ||
995 | list_del(&iter->node); | ||
996 | kfree(iter); | ||
997 | break; | ||
998 | } | ||
999 | } | ||
1000 | list_add_tail(&sus->node, &dev->unlk_lst); | ||
1001 | } | ||
1002 | |||
1003 | static int end_session_cont(struct opal_dev *dev) | ||
1004 | { | ||
1005 | dev->hsn = 0; | ||
1006 | dev->tsn = 0; | ||
1007 | return parse_and_check_status(dev); | ||
1008 | } | ||
1009 | |||
1010 | static int finalize_and_send(struct opal_dev *dev, cont_fn cont) | ||
1011 | { | ||
1012 | int ret; | ||
1013 | |||
1014 | ret = cmd_finalize(dev, dev->hsn, dev->tsn); | ||
1015 | if (ret) { | ||
1016 | pr_err("Error finalizing command buffer: %d\n", ret); | ||
1017 | return ret; | ||
1018 | } | ||
1019 | |||
1020 | print_buffer(dev->cmd, dev->pos); | ||
1021 | |||
1022 | return opal_send_recv(dev, cont); | ||
1023 | } | ||
1024 | |||
1025 | static int gen_key(struct opal_dev *dev) | ||
1026 | { | ||
1027 | const u8 *method; | ||
1028 | u8 uid[OPAL_UID_LENGTH]; | ||
1029 | int err = 0; | ||
1030 | |||
1031 | clear_opal_cmd(dev); | ||
1032 | set_comid(dev, dev->comid); | ||
1033 | |||
1034 | memcpy(uid, dev->prev_data, min(sizeof(uid), dev->prev_d_len)); | ||
1035 | method = opalmethod[OPAL_GENKEY]; | ||
1036 | kfree(dev->prev_data); | ||
1037 | dev->prev_data = NULL; | ||
1038 | |||
1039 | add_token_u8(&err, dev, OPAL_CALL); | ||
1040 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1041 | add_token_bytestring(&err, dev, opalmethod[OPAL_GENKEY], | ||
1042 | OPAL_UID_LENGTH); | ||
1043 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1044 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1045 | |||
1046 | if (err) { | ||
1047 | pr_err("Error building gen key command\n"); | ||
1048 | return err; | ||
1049 | |||
1050 | } | ||
1051 | return finalize_and_send(dev, parse_and_check_status); | ||
1052 | } | ||
1053 | |||
1054 | static int get_active_key_cont(struct opal_dev *dev) | ||
1055 | { | ||
1056 | const char *activekey; | ||
1057 | size_t keylen; | ||
1058 | int error = 0; | ||
1059 | |||
1060 | error = parse_and_check_status(dev); | ||
1061 | if (error) | ||
1062 | return error; | ||
1063 | keylen = response_get_string(&dev->parsed, 4, &activekey); | ||
1064 | if (!activekey) { | ||
1065 | pr_err("%s: Couldn't extract the Activekey from the response\n", | ||
1066 | __func__); | ||
1067 | return OPAL_INVAL_PARAM; | ||
1068 | } | ||
1069 | dev->prev_data = kmemdup(activekey, keylen, GFP_KERNEL); | ||
1070 | |||
1071 | if (!dev->prev_data) | ||
1072 | return -ENOMEM; | ||
1073 | |||
1074 | dev->prev_d_len = keylen; | ||
1075 | |||
1076 | return 0; | ||
1077 | } | ||
1078 | |||
1079 | static int get_active_key(struct opal_dev *dev) | ||
1080 | { | ||
1081 | u8 uid[OPAL_UID_LENGTH]; | ||
1082 | int err = 0; | ||
1083 | u8 *lr; | ||
1084 | |||
1085 | clear_opal_cmd(dev); | ||
1086 | set_comid(dev, dev->comid); | ||
1087 | lr = dev->func_data[dev->state]; | ||
1088 | |||
1089 | err = build_locking_range(uid, sizeof(uid), *lr); | ||
1090 | if (err) | ||
1091 | return err; | ||
1092 | |||
1093 | err = 0; | ||
1094 | add_token_u8(&err, dev, OPAL_CALL); | ||
1095 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1096 | add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); | ||
1097 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1098 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1099 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1100 | add_token_u8(&err, dev, 3); /* startCloumn */ | ||
1101 | add_token_u8(&err, dev, 10); /* ActiveKey */ | ||
1102 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1103 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1104 | add_token_u8(&err, dev, 4); /* endColumn */ | ||
1105 | add_token_u8(&err, dev, 10); /* ActiveKey */ | ||
1106 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1107 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1108 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1109 | if (err) { | ||
1110 | pr_err("Error building get active key command\n"); | ||
1111 | return err; | ||
1112 | } | ||
1113 | |||
1114 | return finalize_and_send(dev, get_active_key_cont); | ||
1115 | } | ||
1116 | |||
1117 | static int generic_lr_enable_disable(struct opal_dev *dev, | ||
1118 | u8 *uid, bool rle, bool wle, | ||
1119 | bool rl, bool wl) | ||
1120 | { | ||
1121 | int err = 0; | ||
1122 | |||
1123 | add_token_u8(&err, dev, OPAL_CALL); | ||
1124 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1125 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1126 | |||
1127 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1128 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1129 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1130 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1131 | |||
1132 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1133 | add_token_u8(&err, dev, 5); /* ReadLockEnabled */ | ||
1134 | add_token_u8(&err, dev, rle); | ||
1135 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1136 | |||
1137 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1138 | add_token_u8(&err, dev, 6); /* WriteLockEnabled */ | ||
1139 | add_token_u8(&err, dev, wle); | ||
1140 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1141 | |||
1142 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1143 | add_token_u8(&err, dev, OPAL_READLOCKED); | ||
1144 | add_token_u8(&err, dev, rl); | ||
1145 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1146 | |||
1147 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1148 | add_token_u8(&err, dev, OPAL_WRITELOCKED); | ||
1149 | add_token_u8(&err, dev, wl); | ||
1150 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1151 | |||
1152 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1153 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1154 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1155 | return err; | ||
1156 | } | ||
1157 | |||
1158 | static inline int enable_global_lr(struct opal_dev *dev, u8 *uid, | ||
1159 | struct opal_user_lr_setup *setup) | ||
1160 | { | ||
1161 | int err; | ||
1162 | |||
1163 | err = generic_lr_enable_disable(dev, uid, !!setup->RLE, !!setup->WLE, | ||
1164 | 0, 0); | ||
1165 | if (err) | ||
1166 | pr_err("Failed to create enable global lr command\n"); | ||
1167 | return err; | ||
1168 | } | ||
1169 | |||
1170 | static int setup_locking_range(struct opal_dev *dev) | ||
1171 | { | ||
1172 | u8 uid[OPAL_UID_LENGTH]; | ||
1173 | struct opal_user_lr_setup *setup; | ||
1174 | u8 lr; | ||
1175 | int err = 0; | ||
1176 | |||
1177 | clear_opal_cmd(dev); | ||
1178 | set_comid(dev, dev->comid); | ||
1179 | |||
1180 | setup = dev->func_data[dev->state]; | ||
1181 | lr = setup->session.opal_key.lr; | ||
1182 | err = build_locking_range(uid, sizeof(uid), lr); | ||
1183 | if (err) | ||
1184 | return err; | ||
1185 | |||
1186 | if (lr == 0) | ||
1187 | err = enable_global_lr(dev, uid, setup); | ||
1188 | else { | ||
1189 | add_token_u8(&err, dev, OPAL_CALL); | ||
1190 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1191 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], | ||
1192 | OPAL_UID_LENGTH); | ||
1193 | |||
1194 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1195 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1196 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1197 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1198 | |||
1199 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1200 | add_token_u8(&err, dev, 3); /* Ranges Start */ | ||
1201 | add_token_u64(&err, dev, setup->range_start); | ||
1202 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1203 | |||
1204 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1205 | add_token_u8(&err, dev, 4); /* Ranges length */ | ||
1206 | add_token_u64(&err, dev, setup->range_length); | ||
1207 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1208 | |||
1209 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1210 | add_token_u8(&err, dev, 5); /*ReadLockEnabled */ | ||
1211 | add_token_u64(&err, dev, !!setup->RLE); | ||
1212 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1213 | |||
1214 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1215 | add_token_u8(&err, dev, 6); /*WriteLockEnabled*/ | ||
1216 | add_token_u64(&err, dev, !!setup->WLE); | ||
1217 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1218 | |||
1219 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1220 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1221 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1222 | |||
1223 | } | ||
1224 | if (err) { | ||
1225 | pr_err("Error building Setup Locking range command.\n"); | ||
1226 | return err; | ||
1227 | |||
1228 | } | ||
1229 | |||
1230 | return finalize_and_send(dev, parse_and_check_status); | ||
1231 | } | ||
1232 | |||
1233 | static int start_generic_opal_session(struct opal_dev *dev, | ||
1234 | enum opal_uid auth, | ||
1235 | enum opal_uid sp_type, | ||
1236 | const char *key, | ||
1237 | u8 key_len) | ||
1238 | { | ||
1239 | u32 hsn; | ||
1240 | int err = 0; | ||
1241 | |||
1242 | if (key == NULL && auth != OPAL_ANYBODY_UID) { | ||
1243 | pr_err("%s: Attempted to open ADMIN_SP Session without a Host" \ | ||
1244 | "Challenge, and not as the Anybody UID\n", __func__); | ||
1245 | return OPAL_INVAL_PARAM; | ||
1246 | } | ||
1247 | |||
1248 | clear_opal_cmd(dev); | ||
1249 | |||
1250 | set_comid(dev, dev->comid); | ||
1251 | hsn = GENERIC_HOST_SESSION_NUM; | ||
1252 | |||
1253 | add_token_u8(&err, dev, OPAL_CALL); | ||
1254 | add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID], | ||
1255 | OPAL_UID_LENGTH); | ||
1256 | add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION], | ||
1257 | OPAL_UID_LENGTH); | ||
1258 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1259 | add_token_u64(&err, dev, hsn); | ||
1260 | add_token_bytestring(&err, dev, opaluid[sp_type], OPAL_UID_LENGTH); | ||
1261 | add_token_u8(&err, dev, 1); | ||
1262 | |||
1263 | switch (auth) { | ||
1264 | case OPAL_ANYBODY_UID: | ||
1265 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1266 | break; | ||
1267 | case OPAL_ADMIN1_UID: | ||
1268 | case OPAL_SID_UID: | ||
1269 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1270 | add_token_u8(&err, dev, 0); /* HostChallenge */ | ||
1271 | add_token_bytestring(&err, dev, key, key_len); | ||
1272 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1273 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1274 | add_token_u8(&err, dev, 3); /* HostSignAuth */ | ||
1275 | add_token_bytestring(&err, dev, opaluid[auth], | ||
1276 | OPAL_UID_LENGTH); | ||
1277 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1278 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1279 | break; | ||
1280 | default: | ||
1281 | pr_err("Cannot start Admin SP session with auth %d\n", auth); | ||
1282 | return OPAL_INVAL_PARAM; | ||
1283 | } | ||
1284 | |||
1285 | if (err) { | ||
1286 | pr_err("Error building start adminsp session command.\n"); | ||
1287 | return err; | ||
1288 | } | ||
1289 | |||
1290 | return finalize_and_send(dev, start_opal_session_cont); | ||
1291 | } | ||
1292 | |||
1293 | static int start_anybodyASP_opal_session(struct opal_dev *dev) | ||
1294 | { | ||
1295 | return start_generic_opal_session(dev, OPAL_ANYBODY_UID, | ||
1296 | OPAL_ADMINSP_UID, NULL, 0); | ||
1297 | } | ||
1298 | |||
1299 | static int start_SIDASP_opal_session(struct opal_dev *dev) | ||
1300 | { | ||
1301 | int ret; | ||
1302 | const u8 *key = dev->prev_data; | ||
1303 | struct opal_key *okey; | ||
1304 | |||
1305 | if (!key) { | ||
1306 | okey = dev->func_data[dev->state]; | ||
1307 | ret = start_generic_opal_session(dev, OPAL_SID_UID, | ||
1308 | OPAL_ADMINSP_UID, | ||
1309 | okey->key, | ||
1310 | okey->key_len); | ||
1311 | } else { | ||
1312 | ret = start_generic_opal_session(dev, OPAL_SID_UID, | ||
1313 | OPAL_ADMINSP_UID, | ||
1314 | key, dev->prev_d_len); | ||
1315 | kfree(key); | ||
1316 | dev->prev_data = NULL; | ||
1317 | } | ||
1318 | return ret; | ||
1319 | } | ||
1320 | |||
1321 | static inline int start_admin1LSP_opal_session(struct opal_dev *dev) | ||
1322 | { | ||
1323 | struct opal_key *key = dev->func_data[dev->state]; | ||
1324 | |||
1325 | return start_generic_opal_session(dev, OPAL_ADMIN1_UID, | ||
1326 | OPAL_LOCKINGSP_UID, | ||
1327 | key->key, key->key_len); | ||
1328 | } | ||
1329 | |||
1330 | static int start_auth_opal_session(struct opal_dev *dev) | ||
1331 | { | ||
1332 | u8 lk_ul_user[OPAL_UID_LENGTH]; | ||
1333 | int err = 0; | ||
1334 | |||
1335 | struct opal_session_info *session = dev->func_data[dev->state]; | ||
1336 | size_t keylen = session->opal_key.key_len; | ||
1337 | u8 *key = session->opal_key.key; | ||
1338 | u32 hsn = GENERIC_HOST_SESSION_NUM; | ||
1339 | |||
1340 | clear_opal_cmd(dev); | ||
1341 | set_comid(dev, dev->comid); | ||
1342 | |||
1343 | if (session->sum) { | ||
1344 | err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), | ||
1345 | session->opal_key.lr); | ||
1346 | if (err) | ||
1347 | return err; | ||
1348 | |||
1349 | } else if (session->who != OPAL_ADMIN1 && !session->sum) { | ||
1350 | err = build_locking_user(lk_ul_user, sizeof(lk_ul_user), | ||
1351 | session->who - 1); | ||
1352 | if (err) | ||
1353 | return err; | ||
1354 | } else | ||
1355 | memcpy(lk_ul_user, opaluid[OPAL_ADMIN1_UID], OPAL_UID_LENGTH); | ||
1356 | |||
1357 | add_token_u8(&err, dev, OPAL_CALL); | ||
1358 | add_token_bytestring(&err, dev, opaluid[OPAL_SMUID_UID], | ||
1359 | OPAL_UID_LENGTH); | ||
1360 | add_token_bytestring(&err, dev, opalmethod[OPAL_STARTSESSION], | ||
1361 | OPAL_UID_LENGTH); | ||
1362 | |||
1363 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1364 | add_token_u64(&err, dev, hsn); | ||
1365 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], | ||
1366 | OPAL_UID_LENGTH); | ||
1367 | add_token_u8(&err, dev, 1); | ||
1368 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1369 | add_token_u8(&err, dev, 0); | ||
1370 | add_token_bytestring(&err, dev, key, keylen); | ||
1371 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1372 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1373 | add_token_u8(&err, dev, 3); | ||
1374 | add_token_bytestring(&err, dev, lk_ul_user, OPAL_UID_LENGTH); | ||
1375 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1376 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1377 | |||
1378 | if (err) { | ||
1379 | pr_err("Error building STARTSESSION command.\n"); | ||
1380 | return err; | ||
1381 | } | ||
1382 | |||
1383 | return finalize_and_send(dev, start_opal_session_cont); | ||
1384 | } | ||
1385 | |||
1386 | static int revert_tper(struct opal_dev *dev) | ||
1387 | { | ||
1388 | int err = 0; | ||
1389 | |||
1390 | clear_opal_cmd(dev); | ||
1391 | set_comid(dev, dev->comid); | ||
1392 | |||
1393 | add_token_u8(&err, dev, OPAL_CALL); | ||
1394 | add_token_bytestring(&err, dev, opaluid[OPAL_ADMINSP_UID], | ||
1395 | OPAL_UID_LENGTH); | ||
1396 | add_token_bytestring(&err, dev, opalmethod[OPAL_REVERT], | ||
1397 | OPAL_UID_LENGTH); | ||
1398 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1399 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1400 | if (err) { | ||
1401 | pr_err("Error building REVERT TPER command.\n"); | ||
1402 | return err; | ||
1403 | } | ||
1404 | |||
1405 | return finalize_and_send(dev, parse_and_check_status); | ||
1406 | } | ||
1407 | |||
1408 | static int internal_activate_user(struct opal_dev *dev) | ||
1409 | { | ||
1410 | struct opal_session_info *session = dev->func_data[dev->state]; | ||
1411 | u8 uid[OPAL_UID_LENGTH]; | ||
1412 | int err = 0; | ||
1413 | |||
1414 | clear_opal_cmd(dev); | ||
1415 | set_comid(dev, dev->comid); | ||
1416 | |||
1417 | memcpy(uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); | ||
1418 | uid[7] = session->who; | ||
1419 | |||
1420 | add_token_u8(&err, dev, OPAL_CALL); | ||
1421 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1422 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1423 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1424 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1425 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1426 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1427 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1428 | add_token_u8(&err, dev, 5); /* Enabled */ | ||
1429 | add_token_u8(&err, dev, OPAL_TRUE); | ||
1430 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1431 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1432 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1433 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1434 | |||
1435 | if (err) { | ||
1436 | pr_err("Error building Activate UserN command.\n"); | ||
1437 | return err; | ||
1438 | } | ||
1439 | |||
1440 | return finalize_and_send(dev, parse_and_check_status); | ||
1441 | } | ||
1442 | |||
1443 | static int erase_locking_range(struct opal_dev *dev) | ||
1444 | { | ||
1445 | struct opal_session_info *session; | ||
1446 | u8 uid[OPAL_UID_LENGTH]; | ||
1447 | int err = 0; | ||
1448 | |||
1449 | clear_opal_cmd(dev); | ||
1450 | set_comid(dev, dev->comid); | ||
1451 | session = dev->func_data[dev->state]; | ||
1452 | |||
1453 | if (build_locking_range(uid, sizeof(uid), session->opal_key.lr) < 0) | ||
1454 | return -ERANGE; | ||
1455 | |||
1456 | add_token_u8(&err, dev, OPAL_CALL); | ||
1457 | add_token_bytestring(&err, dev, uid, OPAL_UID_LENGTH); | ||
1458 | add_token_bytestring(&err, dev, opalmethod[OPAL_ERASE], | ||
1459 | OPAL_UID_LENGTH); | ||
1460 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1461 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1462 | |||
1463 | if (err) { | ||
1464 | pr_err("Error building Erase Locking Range Command.\n"); | ||
1465 | return err; | ||
1466 | } | ||
1467 | return finalize_and_send(dev, parse_and_check_status); | ||
1468 | } | ||
1469 | |||
1470 | static int set_mbr_done(struct opal_dev *dev) | ||
1471 | { | ||
1472 | u8 mbr_done_tf = *(u8 *)dev->func_data[dev->state]; | ||
1473 | int err = 0; | ||
1474 | |||
1475 | clear_opal_cmd(dev); | ||
1476 | set_comid(dev, dev->comid); | ||
1477 | |||
1478 | add_token_u8(&err, dev, OPAL_CALL); | ||
1479 | add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL], | ||
1480 | OPAL_UID_LENGTH); | ||
1481 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1482 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1483 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1484 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1485 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1486 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1487 | add_token_u8(&err, dev, 2); /* Done */ | ||
1488 | add_token_u8(&err, dev, mbr_done_tf); /* Done T or F */ | ||
1489 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1490 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1491 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1492 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1493 | |||
1494 | if (err) { | ||
1495 | pr_err("Error Building set MBR Done command\n"); | ||
1496 | return err; | ||
1497 | } | ||
1498 | |||
1499 | return finalize_and_send(dev, parse_and_check_status); | ||
1500 | } | ||
1501 | |||
1502 | static int set_mbr_enable_disable(struct opal_dev *dev) | ||
1503 | { | ||
1504 | u8 mbr_en_dis = *(u8 *)dev->func_data[dev->state]; | ||
1505 | int err = 0; | ||
1506 | |||
1507 | clear_opal_cmd(dev); | ||
1508 | set_comid(dev, dev->comid); | ||
1509 | |||
1510 | add_token_u8(&err, dev, OPAL_CALL); | ||
1511 | add_token_bytestring(&err, dev, opaluid[OPAL_MBRCONTROL], | ||
1512 | OPAL_UID_LENGTH); | ||
1513 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1514 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1515 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1516 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1517 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1518 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1519 | add_token_u8(&err, dev, 1); | ||
1520 | add_token_u8(&err, dev, mbr_en_dis); | ||
1521 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1522 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1523 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1524 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1525 | |||
1526 | if (err) { | ||
1527 | pr_err("Error Building set MBR done command\n"); | ||
1528 | return err; | ||
1529 | } | ||
1530 | |||
1531 | return finalize_and_send(dev, parse_and_check_status); | ||
1532 | } | ||
1533 | |||
1534 | static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, | ||
1535 | struct opal_dev *dev) | ||
1536 | { | ||
1537 | int err = 0; | ||
1538 | |||
1539 | clear_opal_cmd(dev); | ||
1540 | set_comid(dev, dev->comid); | ||
1541 | |||
1542 | add_token_u8(&err, dev, OPAL_CALL); | ||
1543 | add_token_bytestring(&err, dev, cpin_uid, OPAL_UID_LENGTH); | ||
1544 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], | ||
1545 | OPAL_UID_LENGTH); | ||
1546 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1547 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1548 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1549 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1550 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1551 | add_token_u8(&err, dev, 3); /* PIN */ | ||
1552 | add_token_bytestring(&err, dev, key, key_len); | ||
1553 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1554 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1555 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1556 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1557 | |||
1558 | return err; | ||
1559 | } | ||
1560 | |||
1561 | static int set_new_pw(struct opal_dev *dev) | ||
1562 | { | ||
1563 | u8 cpin_uid[OPAL_UID_LENGTH]; | ||
1564 | struct opal_session_info *usr = dev->func_data[dev->state]; | ||
1565 | |||
1566 | |||
1567 | memcpy(cpin_uid, opaluid[OPAL_C_PIN_ADMIN1], OPAL_UID_LENGTH); | ||
1568 | |||
1569 | if (usr->who != OPAL_ADMIN1) { | ||
1570 | cpin_uid[5] = 0x03; | ||
1571 | if (usr->sum) | ||
1572 | cpin_uid[7] = usr->opal_key.lr + 1; | ||
1573 | else | ||
1574 | cpin_uid[7] = usr->who; | ||
1575 | } | ||
1576 | |||
1577 | if (generic_pw_cmd(usr->opal_key.key, usr->opal_key.key_len, | ||
1578 | cpin_uid, dev)) { | ||
1579 | pr_err("Error building set password command.\n"); | ||
1580 | return -ERANGE; | ||
1581 | } | ||
1582 | |||
1583 | return finalize_and_send(dev, parse_and_check_status); | ||
1584 | } | ||
1585 | |||
1586 | static int set_sid_cpin_pin(struct opal_dev *dev) | ||
1587 | { | ||
1588 | u8 cpin_uid[OPAL_UID_LENGTH]; | ||
1589 | struct opal_key *key = dev->func_data[dev->state]; | ||
1590 | |||
1591 | memcpy(cpin_uid, opaluid[OPAL_C_PIN_SID], OPAL_UID_LENGTH); | ||
1592 | |||
1593 | if (generic_pw_cmd(key->key, key->key_len, cpin_uid, dev)) { | ||
1594 | pr_err("Error building Set SID cpin\n"); | ||
1595 | return -ERANGE; | ||
1596 | } | ||
1597 | return finalize_and_send(dev, parse_and_check_status); | ||
1598 | } | ||
1599 | |||
1600 | static int add_user_to_lr(struct opal_dev *dev) | ||
1601 | { | ||
1602 | u8 lr_buffer[OPAL_UID_LENGTH]; | ||
1603 | u8 user_uid[OPAL_UID_LENGTH]; | ||
1604 | struct opal_lock_unlock *lkul; | ||
1605 | int err = 0; | ||
1606 | |||
1607 | clear_opal_cmd(dev); | ||
1608 | set_comid(dev, dev->comid); | ||
1609 | |||
1610 | lkul = dev->func_data[dev->state]; | ||
1611 | |||
1612 | memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_RDLOCKED], | ||
1613 | OPAL_UID_LENGTH); | ||
1614 | |||
1615 | if (lkul->l_state == OPAL_RW) | ||
1616 | memcpy(lr_buffer, opaluid[OPAL_LOCKINGRANGE_ACE_WRLOCKED], | ||
1617 | OPAL_UID_LENGTH); | ||
1618 | |||
1619 | lr_buffer[7] = lkul->session.opal_key.lr; | ||
1620 | |||
1621 | memcpy(user_uid, opaluid[OPAL_USER1_UID], OPAL_UID_LENGTH); | ||
1622 | |||
1623 | user_uid[7] = lkul->session.who; | ||
1624 | |||
1625 | add_token_u8(&err, dev, OPAL_CALL); | ||
1626 | add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH); | ||
1627 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], | ||
1628 | OPAL_UID_LENGTH); | ||
1629 | |||
1630 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1631 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1632 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1633 | |||
1634 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1635 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1636 | add_token_u8(&err, dev, 3); | ||
1637 | |||
1638 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1639 | |||
1640 | |||
1641 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1642 | add_token_bytestring(&err, dev, | ||
1643 | opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF], | ||
1644 | OPAL_UID_LENGTH/2); | ||
1645 | add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH); | ||
1646 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1647 | |||
1648 | |||
1649 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1650 | add_token_bytestring(&err, dev, | ||
1651 | opaluid[OPAL_HALF_UID_AUTHORITY_OBJ_REF], | ||
1652 | OPAL_UID_LENGTH/2); | ||
1653 | add_token_bytestring(&err, dev, user_uid, OPAL_UID_LENGTH); | ||
1654 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1655 | |||
1656 | |||
1657 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1658 | add_token_bytestring(&err, dev, opaluid[OPAL_HALF_UID_BOOLEAN_ACE], | ||
1659 | OPAL_UID_LENGTH/2); | ||
1660 | add_token_u8(&err, dev, 1); | ||
1661 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1662 | |||
1663 | |||
1664 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1665 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1666 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1667 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1668 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1669 | |||
1670 | if (err) { | ||
1671 | pr_err("Error building add user to locking range command.\n"); | ||
1672 | return err; | ||
1673 | } | ||
1674 | |||
1675 | return finalize_and_send(dev, parse_and_check_status); | ||
1676 | } | ||
1677 | |||
1678 | static int lock_unlock_locking_range(struct opal_dev *dev) | ||
1679 | { | ||
1680 | u8 lr_buffer[OPAL_UID_LENGTH]; | ||
1681 | const u8 *method; | ||
1682 | struct opal_lock_unlock *lkul; | ||
1683 | u8 read_locked = 1, write_locked = 1; | ||
1684 | int err = 0; | ||
1685 | |||
1686 | clear_opal_cmd(dev); | ||
1687 | set_comid(dev, dev->comid); | ||
1688 | |||
1689 | method = opalmethod[OPAL_SET]; | ||
1690 | lkul = dev->func_data[dev->state]; | ||
1691 | if (build_locking_range(lr_buffer, sizeof(lr_buffer), | ||
1692 | lkul->session.opal_key.lr) < 0) | ||
1693 | return -ERANGE; | ||
1694 | |||
1695 | switch (lkul->l_state) { | ||
1696 | case OPAL_RO: | ||
1697 | read_locked = 0; | ||
1698 | write_locked = 1; | ||
1699 | break; | ||
1700 | case OPAL_RW: | ||
1701 | read_locked = 0; | ||
1702 | write_locked = 0; | ||
1703 | break; | ||
1704 | case OPAL_LK: | ||
1705 | /* vars are initalized to locked */ | ||
1706 | break; | ||
1707 | default: | ||
1708 | pr_err("Tried to set an invalid locking state... returning to uland\n"); | ||
1709 | return OPAL_INVAL_PARAM; | ||
1710 | } | ||
1711 | |||
1712 | add_token_u8(&err, dev, OPAL_CALL); | ||
1713 | add_token_bytestring(&err, dev, lr_buffer, OPAL_UID_LENGTH); | ||
1714 | add_token_bytestring(&err, dev, opalmethod[OPAL_SET], OPAL_UID_LENGTH); | ||
1715 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1716 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1717 | add_token_u8(&err, dev, OPAL_VALUES); | ||
1718 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1719 | |||
1720 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1721 | add_token_u8(&err, dev, OPAL_READLOCKED); | ||
1722 | add_token_u8(&err, dev, read_locked); | ||
1723 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1724 | |||
1725 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1726 | add_token_u8(&err, dev, OPAL_WRITELOCKED); | ||
1727 | add_token_u8(&err, dev, write_locked); | ||
1728 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1729 | |||
1730 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1731 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1732 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1733 | |||
1734 | if (err) { | ||
1735 | pr_err("Error building SET command.\n"); | ||
1736 | return err; | ||
1737 | } | ||
1738 | return finalize_and_send(dev, parse_and_check_status); | ||
1739 | } | ||
1740 | |||
1741 | |||
1742 | static int lock_unlock_locking_range_sum(struct opal_dev *dev) | ||
1743 | { | ||
1744 | u8 lr_buffer[OPAL_UID_LENGTH]; | ||
1745 | u8 read_locked = 1, write_locked = 1; | ||
1746 | const u8 *method; | ||
1747 | struct opal_lock_unlock *lkul; | ||
1748 | int ret; | ||
1749 | |||
1750 | clear_opal_cmd(dev); | ||
1751 | set_comid(dev, dev->comid); | ||
1752 | |||
1753 | method = opalmethod[OPAL_SET]; | ||
1754 | lkul = dev->func_data[dev->state]; | ||
1755 | if (build_locking_range(lr_buffer, sizeof(lr_buffer), | ||
1756 | lkul->session.opal_key.lr) < 0) | ||
1757 | return -ERANGE; | ||
1758 | |||
1759 | switch (lkul->l_state) { | ||
1760 | case OPAL_RO: | ||
1761 | read_locked = 0; | ||
1762 | write_locked = 1; | ||
1763 | break; | ||
1764 | case OPAL_RW: | ||
1765 | read_locked = 0; | ||
1766 | write_locked = 0; | ||
1767 | break; | ||
1768 | case OPAL_LK: | ||
1769 | /* vars are initalized to locked */ | ||
1770 | break; | ||
1771 | default: | ||
1772 | pr_err("Tried to set an invalid locking state.\n"); | ||
1773 | return OPAL_INVAL_PARAM; | ||
1774 | } | ||
1775 | ret = generic_lr_enable_disable(dev, lr_buffer, 1, 1, | ||
1776 | read_locked, write_locked); | ||
1777 | |||
1778 | if (ret < 0) { | ||
1779 | pr_err("Error building SET command.\n"); | ||
1780 | return ret; | ||
1781 | } | ||
1782 | return finalize_and_send(dev, parse_and_check_status); | ||
1783 | } | ||
1784 | |||
1785 | static int activate_lsp(struct opal_dev *dev) | ||
1786 | { | ||
1787 | struct opal_lr_act *opal_act; | ||
1788 | u8 user_lr[OPAL_UID_LENGTH]; | ||
1789 | u8 uint_3 = 0x83; | ||
1790 | int err = 0, i; | ||
1791 | |||
1792 | clear_opal_cmd(dev); | ||
1793 | set_comid(dev, dev->comid); | ||
1794 | |||
1795 | opal_act = dev->func_data[dev->state]; | ||
1796 | |||
1797 | add_token_u8(&err, dev, OPAL_CALL); | ||
1798 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], | ||
1799 | OPAL_UID_LENGTH); | ||
1800 | add_token_bytestring(&err, dev, opalmethod[OPAL_ACTIVATE], | ||
1801 | OPAL_UID_LENGTH); | ||
1802 | |||
1803 | |||
1804 | if (opal_act->sum) { | ||
1805 | err = build_locking_range(user_lr, sizeof(user_lr), | ||
1806 | opal_act->lr[0]); | ||
1807 | if (err) | ||
1808 | return err; | ||
1809 | |||
1810 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1811 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1812 | add_token_u8(&err, dev, uint_3); | ||
1813 | add_token_u8(&err, dev, 6); | ||
1814 | add_token_u8(&err, dev, 0); | ||
1815 | add_token_u8(&err, dev, 0); | ||
1816 | |||
1817 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1818 | add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH); | ||
1819 | for (i = 1; i < opal_act->num_lrs; i++) { | ||
1820 | user_lr[7] = opal_act->lr[i]; | ||
1821 | add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH); | ||
1822 | } | ||
1823 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1824 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1825 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1826 | |||
1827 | } else { | ||
1828 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1829 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1830 | } | ||
1831 | |||
1832 | if (err) { | ||
1833 | pr_err("Error building Activate LockingSP command.\n"); | ||
1834 | return err; | ||
1835 | } | ||
1836 | |||
1837 | return finalize_and_send(dev, parse_and_check_status); | ||
1838 | } | ||
1839 | |||
1840 | static int get_lsp_lifecycle_cont(struct opal_dev *dev) | ||
1841 | { | ||
1842 | u8 lc_status; | ||
1843 | int error = 0; | ||
1844 | |||
1845 | error = parse_and_check_status(dev); | ||
1846 | if (error) | ||
1847 | return error; | ||
1848 | |||
1849 | lc_status = response_get_u64(&dev->parsed, 4); | ||
1850 | /* 0x08 is Manufacured Inactive */ | ||
1851 | /* 0x09 is Manufactured */ | ||
1852 | if (lc_status != OPAL_MANUFACTURED_INACTIVE) { | ||
1853 | pr_err("Couldn't determine the status of the Lifcycle state\n"); | ||
1854 | return -ENODEV; | ||
1855 | } | ||
1856 | |||
1857 | return 0; | ||
1858 | } | ||
1859 | |||
1860 | /* Determine if we're in the Manufactured Inactive or Active state */ | ||
1861 | static int get_lsp_lifecycle(struct opal_dev *dev) | ||
1862 | { | ||
1863 | int err = 0; | ||
1864 | |||
1865 | clear_opal_cmd(dev); | ||
1866 | set_comid(dev, dev->comid); | ||
1867 | |||
1868 | add_token_u8(&err, dev, OPAL_CALL); | ||
1869 | add_token_bytestring(&err, dev, opaluid[OPAL_LOCKINGSP_UID], | ||
1870 | OPAL_UID_LENGTH); | ||
1871 | add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); | ||
1872 | |||
1873 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1874 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1875 | |||
1876 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1877 | add_token_u8(&err, dev, 3); /* Start Column */ | ||
1878 | add_token_u8(&err, dev, 6); /* Lifecycle Column */ | ||
1879 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1880 | |||
1881 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1882 | add_token_u8(&err, dev, 4); /* End Column */ | ||
1883 | add_token_u8(&err, dev, 6); /* Lifecycle Column */ | ||
1884 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1885 | |||
1886 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1887 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1888 | |||
1889 | if (err) { | ||
1890 | pr_err("Error Building GET Lifecycle Status command\n"); | ||
1891 | return err; | ||
1892 | } | ||
1893 | |||
1894 | return finalize_and_send(dev, get_lsp_lifecycle_cont); | ||
1895 | } | ||
1896 | |||
1897 | static int get_msid_cpin_pin_cont(struct opal_dev *dev) | ||
1898 | { | ||
1899 | const char *msid_pin; | ||
1900 | size_t strlen; | ||
1901 | int error = 0; | ||
1902 | |||
1903 | error = parse_and_check_status(dev); | ||
1904 | if (error) | ||
1905 | return error; | ||
1906 | |||
1907 | strlen = response_get_string(&dev->parsed, 4, &msid_pin); | ||
1908 | if (!msid_pin) { | ||
1909 | pr_err("%s: Couldn't extract PIN from response\n", __func__); | ||
1910 | return OPAL_INVAL_PARAM; | ||
1911 | } | ||
1912 | |||
1913 | dev->prev_data = kmemdup(msid_pin, strlen, GFP_KERNEL); | ||
1914 | if (!dev->prev_data) | ||
1915 | return -ENOMEM; | ||
1916 | |||
1917 | dev->prev_d_len = strlen; | ||
1918 | |||
1919 | return 0; | ||
1920 | } | ||
1921 | |||
1922 | static int get_msid_cpin_pin(struct opal_dev *dev) | ||
1923 | { | ||
1924 | int err = 0; | ||
1925 | |||
1926 | clear_opal_cmd(dev); | ||
1927 | set_comid(dev, dev->comid); | ||
1928 | |||
1929 | |||
1930 | add_token_u8(&err, dev, OPAL_CALL); | ||
1931 | add_token_bytestring(&err, dev, opaluid[OPAL_C_PIN_MSID], | ||
1932 | OPAL_UID_LENGTH); | ||
1933 | add_token_bytestring(&err, dev, opalmethod[OPAL_GET], OPAL_UID_LENGTH); | ||
1934 | |||
1935 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1936 | add_token_u8(&err, dev, OPAL_STARTLIST); | ||
1937 | |||
1938 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1939 | add_token_u8(&err, dev, 3); /* Start Column */ | ||
1940 | add_token_u8(&err, dev, 3); /* PIN */ | ||
1941 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1942 | |||
1943 | add_token_u8(&err, dev, OPAL_STARTNAME); | ||
1944 | add_token_u8(&err, dev, 4); /* End Column */ | ||
1945 | add_token_u8(&err, dev, 3); /* Lifecycle Column */ | ||
1946 | add_token_u8(&err, dev, OPAL_ENDNAME); | ||
1947 | |||
1948 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1949 | add_token_u8(&err, dev, OPAL_ENDLIST); | ||
1950 | |||
1951 | if (err) { | ||
1952 | pr_err("Error building Get MSID CPIN PIN command.\n"); | ||
1953 | return err; | ||
1954 | } | ||
1955 | |||
1956 | return finalize_and_send(dev, get_msid_cpin_pin_cont); | ||
1957 | } | ||
1958 | |||
1959 | static int build_end_opal_session(struct opal_dev *dev) | ||
1960 | { | ||
1961 | int err = 0; | ||
1962 | |||
1963 | clear_opal_cmd(dev); | ||
1964 | |||
1965 | set_comid(dev, dev->comid); | ||
1966 | add_token_u8(&err, dev, OPAL_ENDOFSESSION); | ||
1967 | return err; | ||
1968 | } | ||
1969 | |||
1970 | static int end_opal_session(struct opal_dev *dev) | ||
1971 | { | ||
1972 | int ret = build_end_opal_session(dev); | ||
1973 | |||
1974 | if (ret < 0) | ||
1975 | return ret; | ||
1976 | return finalize_and_send(dev, end_session_cont); | ||
1977 | } | ||
1978 | |||
1979 | static int end_opal_session_error(struct opal_dev *dev) | ||
1980 | { | ||
1981 | const opal_step error_end_session[] = { | ||
1982 | end_opal_session, | ||
1983 | NULL, | ||
1984 | }; | ||
1985 | dev->funcs = error_end_session; | ||
1986 | dev->state = 0; | ||
1987 | return next(dev); | ||
1988 | } | ||
1989 | |||
1990 | static inline void setup_opal_dev(struct opal_dev *dev, | ||
1991 | const opal_step *funcs) | ||
1992 | { | ||
1993 | dev->state = 0; | ||
1994 | dev->funcs = funcs; | ||
1995 | dev->tsn = 0; | ||
1996 | dev->hsn = 0; | ||
1997 | dev->func_data = NULL; | ||
1998 | dev->prev_data = NULL; | ||
1999 | } | ||
2000 | |||
2001 | static int check_opal_support(struct opal_dev *dev) | ||
2002 | { | ||
2003 | static const opal_step funcs[] = { | ||
2004 | opal_discovery0, | ||
2005 | NULL | ||
2006 | }; | ||
2007 | int ret; | ||
2008 | |||
2009 | mutex_lock(&dev->dev_lock); | ||
2010 | setup_opal_dev(dev, funcs); | ||
2011 | ret = next(dev); | ||
2012 | dev->supported = !ret; | ||
2013 | mutex_unlock(&dev->dev_lock); | ||
2014 | return ret; | ||
2015 | } | ||
2016 | |||
2017 | struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv) | ||
2018 | { | ||
2019 | struct opal_dev *dev; | ||
2020 | |||
2021 | dev = kmalloc(sizeof(*dev), GFP_KERNEL); | ||
2022 | if (!dev) | ||
2023 | return NULL; | ||
2024 | |||
2025 | INIT_LIST_HEAD(&dev->unlk_lst); | ||
2026 | mutex_init(&dev->dev_lock); | ||
2027 | dev->data = data; | ||
2028 | dev->send_recv = send_recv; | ||
2029 | if (check_opal_support(dev) != 0) { | ||
2030 | pr_debug("Opal is not supported on this device\n"); | ||
2031 | kfree(dev); | ||
2032 | return NULL; | ||
2033 | } | ||
2034 | return dev; | ||
2035 | } | ||
2036 | EXPORT_SYMBOL(init_opal_dev); | ||
2037 | |||
2038 | static int opal_secure_erase_locking_range(struct opal_dev *dev, | ||
2039 | struct opal_session_info *opal_session) | ||
2040 | { | ||
2041 | void *data[3] = { NULL }; | ||
2042 | static const opal_step erase_funcs[] = { | ||
2043 | opal_discovery0, | ||
2044 | start_auth_opal_session, | ||
2045 | get_active_key, | ||
2046 | gen_key, | ||
2047 | end_opal_session, | ||
2048 | NULL, | ||
2049 | }; | ||
2050 | int ret; | ||
2051 | |||
2052 | mutex_lock(&dev->dev_lock); | ||
2053 | setup_opal_dev(dev, erase_funcs); | ||
2054 | |||
2055 | dev->func_data = data; | ||
2056 | dev->func_data[1] = opal_session; | ||
2057 | dev->func_data[2] = &opal_session->opal_key.lr; | ||
2058 | |||
2059 | ret = next(dev); | ||
2060 | mutex_unlock(&dev->dev_lock); | ||
2061 | return ret; | ||
2062 | } | ||
2063 | |||
2064 | static int opal_erase_locking_range(struct opal_dev *dev, | ||
2065 | struct opal_session_info *opal_session) | ||
2066 | { | ||
2067 | void *data[3] = { NULL }; | ||
2068 | static const opal_step erase_funcs[] = { | ||
2069 | opal_discovery0, | ||
2070 | start_auth_opal_session, | ||
2071 | erase_locking_range, | ||
2072 | end_opal_session, | ||
2073 | NULL, | ||
2074 | }; | ||
2075 | int ret; | ||
2076 | |||
2077 | mutex_lock(&dev->dev_lock); | ||
2078 | setup_opal_dev(dev, erase_funcs); | ||
2079 | |||
2080 | dev->func_data = data; | ||
2081 | dev->func_data[1] = opal_session; | ||
2082 | dev->func_data[2] = opal_session; | ||
2083 | |||
2084 | ret = next(dev); | ||
2085 | mutex_unlock(&dev->dev_lock); | ||
2086 | return ret; | ||
2087 | } | ||
2088 | |||
2089 | static int opal_enable_disable_shadow_mbr(struct opal_dev *dev, | ||
2090 | struct opal_mbr_data *opal_mbr) | ||
2091 | { | ||
2092 | void *func_data[6] = { NULL }; | ||
2093 | static const opal_step mbr_funcs[] = { | ||
2094 | opal_discovery0, | ||
2095 | start_admin1LSP_opal_session, | ||
2096 | set_mbr_done, | ||
2097 | end_opal_session, | ||
2098 | start_admin1LSP_opal_session, | ||
2099 | set_mbr_enable_disable, | ||
2100 | end_opal_session, | ||
2101 | NULL, | ||
2102 | }; | ||
2103 | int ret; | ||
2104 | |||
2105 | if (opal_mbr->enable_disable != OPAL_MBR_ENABLE && | ||
2106 | opal_mbr->enable_disable != OPAL_MBR_DISABLE) | ||
2107 | return -EINVAL; | ||
2108 | |||
2109 | mutex_lock(&dev->dev_lock); | ||
2110 | setup_opal_dev(dev, mbr_funcs); | ||
2111 | dev->func_data = func_data; | ||
2112 | dev->func_data[1] = &opal_mbr->key; | ||
2113 | dev->func_data[2] = &opal_mbr->enable_disable; | ||
2114 | dev->func_data[4] = &opal_mbr->key; | ||
2115 | dev->func_data[5] = &opal_mbr->enable_disable; | ||
2116 | ret = next(dev); | ||
2117 | mutex_unlock(&dev->dev_lock); | ||
2118 | return ret; | ||
2119 | } | ||
2120 | |||
2121 | static int opal_save(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) | ||
2122 | { | ||
2123 | struct opal_suspend_data *suspend; | ||
2124 | |||
2125 | suspend = kzalloc(sizeof(*suspend), GFP_KERNEL); | ||
2126 | if (!suspend) | ||
2127 | return -ENOMEM; | ||
2128 | |||
2129 | suspend->unlk = *lk_unlk; | ||
2130 | suspend->lr = lk_unlk->session.opal_key.lr; | ||
2131 | |||
2132 | mutex_lock(&dev->dev_lock); | ||
2133 | setup_opal_dev(dev, NULL); | ||
2134 | add_suspend_info(dev, suspend); | ||
2135 | mutex_unlock(&dev->dev_lock); | ||
2136 | return 0; | ||
2137 | } | ||
2138 | |||
2139 | static int opal_add_user_to_lr(struct opal_dev *dev, | ||
2140 | struct opal_lock_unlock *lk_unlk) | ||
2141 | { | ||
2142 | void *func_data[3] = { NULL }; | ||
2143 | static const opal_step funcs[] = { | ||
2144 | opal_discovery0, | ||
2145 | start_admin1LSP_opal_session, | ||
2146 | add_user_to_lr, | ||
2147 | end_opal_session, | ||
2148 | NULL | ||
2149 | }; | ||
2150 | int ret; | ||
2151 | |||
2152 | if (lk_unlk->l_state != OPAL_RO && | ||
2153 | lk_unlk->l_state != OPAL_RW) { | ||
2154 | pr_err("Locking state was not RO or RW\n"); | ||
2155 | return -EINVAL; | ||
2156 | } | ||
2157 | if (lk_unlk->session.who < OPAL_USER1 && | ||
2158 | lk_unlk->session.who > OPAL_USER9) { | ||
2159 | pr_err("Authority was not within the range of users: %d\n", | ||
2160 | lk_unlk->session.who); | ||
2161 | return -EINVAL; | ||
2162 | } | ||
2163 | if (lk_unlk->session.sum) { | ||
2164 | pr_err("%s not supported in sum. Use setup locking range\n", | ||
2165 | __func__); | ||
2166 | return -EINVAL; | ||
2167 | } | ||
2168 | |||
2169 | mutex_lock(&dev->dev_lock); | ||
2170 | setup_opal_dev(dev, funcs); | ||
2171 | dev->func_data = func_data; | ||
2172 | dev->func_data[1] = &lk_unlk->session.opal_key; | ||
2173 | dev->func_data[2] = lk_unlk; | ||
2174 | ret = next(dev); | ||
2175 | mutex_unlock(&dev->dev_lock); | ||
2176 | return ret; | ||
2177 | } | ||
2178 | |||
2179 | static int opal_reverttper(struct opal_dev *dev, struct opal_key *opal) | ||
2180 | { | ||
2181 | void *data[2] = { NULL }; | ||
2182 | static const opal_step revert_funcs[] = { | ||
2183 | opal_discovery0, | ||
2184 | start_SIDASP_opal_session, | ||
2185 | revert_tper, /* controller will terminate session */ | ||
2186 | NULL, | ||
2187 | }; | ||
2188 | int ret; | ||
2189 | |||
2190 | mutex_lock(&dev->dev_lock); | ||
2191 | setup_opal_dev(dev, revert_funcs); | ||
2192 | dev->func_data = data; | ||
2193 | dev->func_data[1] = opal; | ||
2194 | ret = next(dev); | ||
2195 | mutex_unlock(&dev->dev_lock); | ||
2196 | return ret; | ||
2197 | } | ||
2198 | |||
2199 | static int __opal_lock_unlock_sum(struct opal_dev *dev) | ||
2200 | { | ||
2201 | static const opal_step ulk_funcs_sum[] = { | ||
2202 | opal_discovery0, | ||
2203 | start_auth_opal_session, | ||
2204 | lock_unlock_locking_range_sum, | ||
2205 | end_opal_session, | ||
2206 | NULL | ||
2207 | }; | ||
2208 | |||
2209 | dev->funcs = ulk_funcs_sum; | ||
2210 | return next(dev); | ||
2211 | } | ||
2212 | |||
2213 | static int __opal_lock_unlock(struct opal_dev *dev) | ||
2214 | { | ||
2215 | static const opal_step _unlock_funcs[] = { | ||
2216 | opal_discovery0, | ||
2217 | start_auth_opal_session, | ||
2218 | lock_unlock_locking_range, | ||
2219 | end_opal_session, | ||
2220 | NULL | ||
2221 | }; | ||
2222 | |||
2223 | dev->funcs = _unlock_funcs; | ||
2224 | return next(dev); | ||
2225 | } | ||
2226 | |||
2227 | static int opal_lock_unlock(struct opal_dev *dev, struct opal_lock_unlock *lk_unlk) | ||
2228 | { | ||
2229 | void *func_data[3] = { NULL }; | ||
2230 | int ret; | ||
2231 | |||
2232 | if (lk_unlk->session.who < OPAL_ADMIN1 || | ||
2233 | lk_unlk->session.who > OPAL_USER9) | ||
2234 | return -EINVAL; | ||
2235 | |||
2236 | mutex_lock(&dev->dev_lock); | ||
2237 | setup_opal_dev(dev, NULL); | ||
2238 | dev->func_data = func_data; | ||
2239 | dev->func_data[1] = &lk_unlk->session; | ||
2240 | dev->func_data[2] = lk_unlk; | ||
2241 | |||
2242 | if (lk_unlk->session.sum) | ||
2243 | ret = __opal_lock_unlock_sum(dev); | ||
2244 | else | ||
2245 | ret = __opal_lock_unlock(dev); | ||
2246 | |||
2247 | mutex_unlock(&dev->dev_lock); | ||
2248 | return ret; | ||
2249 | } | ||
2250 | |||
2251 | static int opal_take_ownership(struct opal_dev *dev, struct opal_key *opal) | ||
2252 | { | ||
2253 | static const opal_step owner_funcs[] = { | ||
2254 | opal_discovery0, | ||
2255 | start_anybodyASP_opal_session, | ||
2256 | get_msid_cpin_pin, | ||
2257 | end_opal_session, | ||
2258 | start_SIDASP_opal_session, | ||
2259 | set_sid_cpin_pin, | ||
2260 | end_opal_session, | ||
2261 | NULL | ||
2262 | }; | ||
2263 | void *data[6] = { NULL }; | ||
2264 | int ret; | ||
2265 | |||
2266 | if (!dev) | ||
2267 | return -ENODEV; | ||
2268 | |||
2269 | mutex_lock(&dev->dev_lock); | ||
2270 | setup_opal_dev(dev, owner_funcs); | ||
2271 | dev->func_data = data; | ||
2272 | dev->func_data[4] = opal; | ||
2273 | dev->func_data[5] = opal; | ||
2274 | ret = next(dev); | ||
2275 | mutex_unlock(&dev->dev_lock); | ||
2276 | return ret; | ||
2277 | } | ||
2278 | |||
2279 | static int opal_activate_lsp(struct opal_dev *dev, struct opal_lr_act *opal_lr_act) | ||
2280 | { | ||
2281 | void *data[4] = { NULL }; | ||
2282 | static const opal_step active_funcs[] = { | ||
2283 | opal_discovery0, | ||
2284 | start_SIDASP_opal_session, /* Open session as SID auth */ | ||
2285 | get_lsp_lifecycle, | ||
2286 | activate_lsp, | ||
2287 | end_opal_session, | ||
2288 | NULL | ||
2289 | }; | ||
2290 | int ret; | ||
2291 | |||
2292 | if (!opal_lr_act->num_lrs || opal_lr_act->num_lrs > OPAL_MAX_LRS) | ||
2293 | return -EINVAL; | ||
2294 | |||
2295 | mutex_lock(&dev->dev_lock); | ||
2296 | setup_opal_dev(dev, active_funcs); | ||
2297 | dev->func_data = data; | ||
2298 | dev->func_data[1] = &opal_lr_act->key; | ||
2299 | dev->func_data[3] = opal_lr_act; | ||
2300 | ret = next(dev); | ||
2301 | mutex_unlock(&dev->dev_lock); | ||
2302 | return ret; | ||
2303 | } | ||
2304 | |||
2305 | static int opal_setup_locking_range(struct opal_dev *dev, | ||
2306 | struct opal_user_lr_setup *opal_lrs) | ||
2307 | { | ||
2308 | void *data[3] = { NULL }; | ||
2309 | static const opal_step lr_funcs[] = { | ||
2310 | opal_discovery0, | ||
2311 | start_auth_opal_session, | ||
2312 | setup_locking_range, | ||
2313 | end_opal_session, | ||
2314 | NULL, | ||
2315 | }; | ||
2316 | int ret; | ||
2317 | |||
2318 | mutex_lock(&dev->dev_lock); | ||
2319 | setup_opal_dev(dev, lr_funcs); | ||
2320 | dev->func_data = data; | ||
2321 | dev->func_data[1] = &opal_lrs->session; | ||
2322 | dev->func_data[2] = opal_lrs; | ||
2323 | ret = next(dev); | ||
2324 | mutex_unlock(&dev->dev_lock); | ||
2325 | return ret; | ||
2326 | } | ||
2327 | |||
2328 | static int opal_set_new_pw(struct opal_dev *dev, struct opal_new_pw *opal_pw) | ||
2329 | { | ||
2330 | static const opal_step pw_funcs[] = { | ||
2331 | opal_discovery0, | ||
2332 | start_auth_opal_session, | ||
2333 | set_new_pw, | ||
2334 | end_opal_session, | ||
2335 | NULL | ||
2336 | }; | ||
2337 | void *data[3] = { NULL }; | ||
2338 | int ret; | ||
2339 | |||
2340 | if (opal_pw->session.who < OPAL_ADMIN1 || | ||
2341 | opal_pw->session.who > OPAL_USER9 || | ||
2342 | opal_pw->new_user_pw.who < OPAL_ADMIN1 || | ||
2343 | opal_pw->new_user_pw.who > OPAL_USER9) | ||
2344 | return -EINVAL; | ||
2345 | |||
2346 | mutex_lock(&dev->dev_lock); | ||
2347 | setup_opal_dev(dev, pw_funcs); | ||
2348 | dev->func_data = data; | ||
2349 | dev->func_data[1] = (void *) &opal_pw->session; | ||
2350 | dev->func_data[2] = (void *) &opal_pw->new_user_pw; | ||
2351 | |||
2352 | ret = next(dev); | ||
2353 | mutex_unlock(&dev->dev_lock); | ||
2354 | return ret; | ||
2355 | } | ||
2356 | |||
2357 | static int opal_activate_user(struct opal_dev *dev, | ||
2358 | struct opal_session_info *opal_session) | ||
2359 | { | ||
2360 | static const opal_step act_funcs[] = { | ||
2361 | opal_discovery0, | ||
2362 | start_admin1LSP_opal_session, | ||
2363 | internal_activate_user, | ||
2364 | end_opal_session, | ||
2365 | NULL | ||
2366 | }; | ||
2367 | void *data[3] = { NULL }; | ||
2368 | int ret; | ||
2369 | |||
2370 | /* We can't activate Admin1 it's active as manufactured */ | ||
2371 | if (opal_session->who < OPAL_USER1 && | ||
2372 | opal_session->who > OPAL_USER9) { | ||
2373 | pr_err("Who was not a valid user: %d\n", opal_session->who); | ||
2374 | return -EINVAL; | ||
2375 | } | ||
2376 | |||
2377 | mutex_lock(&dev->dev_lock); | ||
2378 | setup_opal_dev(dev, act_funcs); | ||
2379 | dev->func_data = data; | ||
2380 | dev->func_data[1] = &opal_session->opal_key; | ||
2381 | dev->func_data[2] = opal_session; | ||
2382 | ret = next(dev); | ||
2383 | mutex_unlock(&dev->dev_lock); | ||
2384 | return ret; | ||
2385 | } | ||
2386 | |||
2387 | bool opal_unlock_from_suspend(struct opal_dev *dev) | ||
2388 | { | ||
2389 | struct opal_suspend_data *suspend; | ||
2390 | void *func_data[3] = { NULL }; | ||
2391 | bool was_failure = false; | ||
2392 | int ret = 0; | ||
2393 | |||
2394 | if (!dev) | ||
2395 | return false; | ||
2396 | if (!dev->supported) | ||
2397 | return false; | ||
2398 | |||
2399 | mutex_lock(&dev->dev_lock); | ||
2400 | setup_opal_dev(dev, NULL); | ||
2401 | dev->func_data = func_data; | ||
2402 | |||
2403 | list_for_each_entry(suspend, &dev->unlk_lst, node) { | ||
2404 | dev->state = 0; | ||
2405 | dev->func_data[1] = &suspend->unlk.session; | ||
2406 | dev->func_data[2] = &suspend->unlk; | ||
2407 | dev->tsn = 0; | ||
2408 | dev->hsn = 0; | ||
2409 | |||
2410 | if (suspend->unlk.session.sum) | ||
2411 | ret = __opal_lock_unlock_sum(dev); | ||
2412 | else | ||
2413 | ret = __opal_lock_unlock(dev); | ||
2414 | if (ret) { | ||
2415 | pr_warn("Failed to unlock LR %hhu with sum %d\n", | ||
2416 | suspend->unlk.session.opal_key.lr, | ||
2417 | suspend->unlk.session.sum); | ||
2418 | was_failure = true; | ||
2419 | } | ||
2420 | } | ||
2421 | mutex_unlock(&dev->dev_lock); | ||
2422 | return was_failure; | ||
2423 | } | ||
2424 | EXPORT_SYMBOL(opal_unlock_from_suspend); | ||
2425 | |||
2426 | int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) | ||
2427 | { | ||
2428 | void *p; | ||
2429 | int ret = -ENOTTY; | ||
2430 | |||
2431 | if (!capable(CAP_SYS_ADMIN)) | ||
2432 | return -EACCES; | ||
2433 | if (!dev) | ||
2434 | return -ENOTSUPP; | ||
2435 | if (!dev->supported) { | ||
2436 | pr_err("Not supported\n"); | ||
2437 | return -ENOTSUPP; | ||
2438 | } | ||
2439 | |||
2440 | p = memdup_user(arg, _IOC_SIZE(cmd)); | ||
2441 | if (IS_ERR(p)) | ||
2442 | return PTR_ERR(p); | ||
2443 | |||
2444 | switch (cmd) { | ||
2445 | case IOC_OPAL_SAVE: | ||
2446 | ret = opal_save(dev, p); | ||
2447 | break; | ||
2448 | case IOC_OPAL_LOCK_UNLOCK: | ||
2449 | ret = opal_lock_unlock(dev, p); | ||
2450 | break; | ||
2451 | case IOC_OPAL_TAKE_OWNERSHIP: | ||
2452 | ret = opal_take_ownership(dev, p); | ||
2453 | break; | ||
2454 | case IOC_OPAL_ACTIVATE_LSP: | ||
2455 | ret = opal_activate_lsp(dev, p); | ||
2456 | break; | ||
2457 | case IOC_OPAL_SET_PW: | ||
2458 | ret = opal_set_new_pw(dev, p); | ||
2459 | break; | ||
2460 | case IOC_OPAL_ACTIVATE_USR: | ||
2461 | ret = opal_activate_user(dev, p); | ||
2462 | break; | ||
2463 | case IOC_OPAL_REVERT_TPR: | ||
2464 | ret = opal_reverttper(dev, p); | ||
2465 | break; | ||
2466 | case IOC_OPAL_LR_SETUP: | ||
2467 | ret = opal_setup_locking_range(dev, p); | ||
2468 | break; | ||
2469 | case IOC_OPAL_ADD_USR_TO_LR: | ||
2470 | ret = opal_add_user_to_lr(dev, p); | ||
2471 | break; | ||
2472 | case IOC_OPAL_ENABLE_DISABLE_MBR: | ||
2473 | ret = opal_enable_disable_shadow_mbr(dev, p); | ||
2474 | break; | ||
2475 | case IOC_OPAL_ERASE_LR: | ||
2476 | ret = opal_erase_locking_range(dev, p); | ||
2477 | break; | ||
2478 | case IOC_OPAL_SECURE_ERASE_LR: | ||
2479 | ret = opal_secure_erase_locking_range(dev, p); | ||
2480 | break; | ||
2481 | default: | ||
2482 | pr_warn("No such Opal Ioctl %u\n", cmd); | ||
2483 | } | ||
2484 | |||
2485 | kfree(p); | ||
2486 | return ret; | ||
2487 | } | ||
2488 | EXPORT_SYMBOL_GPL(sed_ioctl); | ||
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index e5c5b8eb14a9..3a44438a1195 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -4074,41 +4074,27 @@ clean_up: | |||
4074 | 4074 | ||
4075 | static void cciss_interrupt_mode(ctlr_info_t *h) | 4075 | static void cciss_interrupt_mode(ctlr_info_t *h) |
4076 | { | 4076 | { |
4077 | #ifdef CONFIG_PCI_MSI | 4077 | int ret; |
4078 | int err; | ||
4079 | struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1}, | ||
4080 | {0, 2}, {0, 3} | ||
4081 | }; | ||
4082 | 4078 | ||
4083 | /* Some boards advertise MSI but don't really support it */ | 4079 | /* Some boards advertise MSI but don't really support it */ |
4084 | if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || | 4080 | if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) || |
4085 | (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11)) | 4081 | (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11)) |
4086 | goto default_int_mode; | 4082 | goto default_int_mode; |
4087 | 4083 | ||
4088 | if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { | 4084 | ret = pci_alloc_irq_vectors(h->pdev, 4, 4, PCI_IRQ_MSIX); |
4089 | err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); | 4085 | if (ret >= 0) { |
4090 | if (!err) { | 4086 | h->intr[0] = pci_irq_vector(h->pdev, 0); |
4091 | h->intr[0] = cciss_msix_entries[0].vector; | 4087 | h->intr[1] = pci_irq_vector(h->pdev, 1); |
4092 | h->intr[1] = cciss_msix_entries[1].vector; | 4088 | h->intr[2] = pci_irq_vector(h->pdev, 2); |
4093 | h->intr[2] = cciss_msix_entries[2].vector; | 4089 | h->intr[3] = pci_irq_vector(h->pdev, 3); |
4094 | h->intr[3] = cciss_msix_entries[3].vector; | 4090 | return; |
4095 | h->msix_vector = 1; | ||
4096 | return; | ||
4097 | } else { | ||
4098 | dev_warn(&h->pdev->dev, | ||
4099 | "MSI-X init failed %d\n", err); | ||
4100 | } | ||
4101 | } | ||
4102 | if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) { | ||
4103 | if (!pci_enable_msi(h->pdev)) | ||
4104 | h->msi_vector = 1; | ||
4105 | else | ||
4106 | dev_warn(&h->pdev->dev, "MSI init failed\n"); | ||
4107 | } | 4091 | } |
4092 | |||
4093 | ret = pci_alloc_irq_vectors(h->pdev, 1, 1, PCI_IRQ_MSI); | ||
4094 | |||
4108 | default_int_mode: | 4095 | default_int_mode: |
4109 | #endif /* CONFIG_PCI_MSI */ | ||
4110 | /* if we get here we're going to use the default interrupt mode */ | 4096 | /* if we get here we're going to use the default interrupt mode */ |
4111 | h->intr[h->intr_mode] = h->pdev->irq; | 4097 | h->intr[h->intr_mode] = pci_irq_vector(h->pdev, 0); |
4112 | return; | 4098 | return; |
4113 | } | 4099 | } |
4114 | 4100 | ||
@@ -4888,7 +4874,7 @@ static int cciss_request_irq(ctlr_info_t *h, | |||
4888 | irqreturn_t (*msixhandler)(int, void *), | 4874 | irqreturn_t (*msixhandler)(int, void *), |
4889 | irqreturn_t (*intxhandler)(int, void *)) | 4875 | irqreturn_t (*intxhandler)(int, void *)) |
4890 | { | 4876 | { |
4891 | if (h->msix_vector || h->msi_vector) { | 4877 | if (h->pdev->msi_enabled || h->pdev->msix_enabled) { |
4892 | if (!request_irq(h->intr[h->intr_mode], msixhandler, | 4878 | if (!request_irq(h->intr[h->intr_mode], msixhandler, |
4893 | 0, h->devname, h)) | 4879 | 0, h->devname, h)) |
4894 | return 0; | 4880 | return 0; |
@@ -4934,12 +4920,7 @@ static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h) | |||
4934 | int ctlr = h->ctlr; | 4920 | int ctlr = h->ctlr; |
4935 | 4921 | ||
4936 | free_irq(h->intr[h->intr_mode], h); | 4922 | free_irq(h->intr[h->intr_mode], h); |
4937 | #ifdef CONFIG_PCI_MSI | 4923 | pci_free_irq_vectors(h->pdev); |
4938 | if (h->msix_vector) | ||
4939 | pci_disable_msix(h->pdev); | ||
4940 | else if (h->msi_vector) | ||
4941 | pci_disable_msi(h->pdev); | ||
4942 | #endif /* CONFIG_PCI_MSI */ | ||
4943 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); | 4924 | cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); |
4944 | cciss_free_scatterlists(h); | 4925 | cciss_free_scatterlists(h); |
4945 | cciss_free_cmd_pool(h); | 4926 | cciss_free_cmd_pool(h); |
@@ -5295,12 +5276,7 @@ static void cciss_remove_one(struct pci_dev *pdev) | |||
5295 | 5276 | ||
5296 | cciss_shutdown(pdev); | 5277 | cciss_shutdown(pdev); |
5297 | 5278 | ||
5298 | #ifdef CONFIG_PCI_MSI | 5279 | pci_free_irq_vectors(h->pdev); |
5299 | if (h->msix_vector) | ||
5300 | pci_disable_msix(h->pdev); | ||
5301 | else if (h->msi_vector) | ||
5302 | pci_disable_msi(h->pdev); | ||
5303 | #endif /* CONFIG_PCI_MSI */ | ||
5304 | 5280 | ||
5305 | iounmap(h->transtable); | 5281 | iounmap(h->transtable); |
5306 | iounmap(h->cfgtable); | 5282 | iounmap(h->cfgtable); |
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index 7fda30e4a241..4affa94ca17b 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h | |||
@@ -90,8 +90,6 @@ struct ctlr_info | |||
90 | # define SIMPLE_MODE_INT 2 | 90 | # define SIMPLE_MODE_INT 2 |
91 | # define MEMQ_MODE_INT 3 | 91 | # define MEMQ_MODE_INT 3 |
92 | unsigned int intr[4]; | 92 | unsigned int intr[4]; |
93 | unsigned int msix_vector; | ||
94 | unsigned int msi_vector; | ||
95 | int intr_mode; | 93 | int intr_mode; |
96 | int cciss_max_sectors; | 94 | int cciss_max_sectors; |
97 | BYTE cciss_read; | 95 | BYTE cciss_read; |
@@ -333,7 +331,7 @@ static unsigned long SA5_performant_completed(ctlr_info_t *h) | |||
333 | */ | 331 | */ |
334 | register_value = readl(h->vaddr + SA5_OUTDB_STATUS); | 332 | register_value = readl(h->vaddr + SA5_OUTDB_STATUS); |
335 | /* msi auto clears the interrupt pending bit. */ | 333 | /* msi auto clears the interrupt pending bit. */ |
336 | if (!(h->msi_vector || h->msix_vector)) { | 334 | if (!(h->pdev->msi_enabled || h->pdev->msix_enabled)) { |
337 | writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); | 335 | writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR); |
338 | /* Do a read in order to flush the write to the controller | 336 | /* Do a read in order to flush the write to the controller |
339 | * (as per spec.) | 337 | * (as per spec.) |
@@ -393,7 +391,7 @@ static bool SA5_performant_intr_pending(ctlr_info_t *h) | |||
393 | if (!register_value) | 391 | if (!register_value) |
394 | return false; | 392 | return false; |
395 | 393 | ||
396 | if (h->msi_vector || h->msix_vector) | 394 | if (h->pdev->msi_enabled || h->pdev->msix_enabled) |
397 | return true; | 395 | return true; |
398 | 396 | ||
399 | /* Read outbound doorbell to flush */ | 397 | /* Read outbound doorbell to flush */ |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a391a3cfb3fe..184887af4b9f 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -3119,7 +3119,7 @@ static int raw_cmd_copyin(int cmd, void __user *param, | |||
3119 | *rcmd = NULL; | 3119 | *rcmd = NULL; |
3120 | 3120 | ||
3121 | loop: | 3121 | loop: |
3122 | ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_USER); | 3122 | ptr = kmalloc(sizeof(struct floppy_raw_cmd), GFP_KERNEL); |
3123 | if (!ptr) | 3123 | if (!ptr) |
3124 | return -ENOMEM; | 3124 | return -ENOMEM; |
3125 | *rcmd = ptr; | 3125 | *rcmd = ptr; |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f347285c67ec..304377182c1a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -1097,9 +1097,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | |||
1097 | if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) | 1097 | if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) |
1098 | return -EINVAL; | 1098 | return -EINVAL; |
1099 | 1099 | ||
1100 | /* I/O need to be drained during transfer transition */ | ||
1101 | blk_mq_freeze_queue(lo->lo_queue); | ||
1102 | |||
1100 | err = loop_release_xfer(lo); | 1103 | err = loop_release_xfer(lo); |
1101 | if (err) | 1104 | if (err) |
1102 | return err; | 1105 | goto exit; |
1103 | 1106 | ||
1104 | if (info->lo_encrypt_type) { | 1107 | if (info->lo_encrypt_type) { |
1105 | unsigned int type = info->lo_encrypt_type; | 1108 | unsigned int type = info->lo_encrypt_type; |
@@ -1114,12 +1117,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | |||
1114 | 1117 | ||
1115 | err = loop_init_xfer(lo, xfer, info); | 1118 | err = loop_init_xfer(lo, xfer, info); |
1116 | if (err) | 1119 | if (err) |
1117 | return err; | 1120 | goto exit; |
1118 | 1121 | ||
1119 | if (lo->lo_offset != info->lo_offset || | 1122 | if (lo->lo_offset != info->lo_offset || |
1120 | lo->lo_sizelimit != info->lo_sizelimit) | 1123 | lo->lo_sizelimit != info->lo_sizelimit) |
1121 | if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) | 1124 | if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { |
1122 | return -EFBIG; | 1125 | err = -EFBIG; |
1126 | goto exit; | ||
1127 | } | ||
1123 | 1128 | ||
1124 | loop_config_discard(lo); | 1129 | loop_config_discard(lo); |
1125 | 1130 | ||
@@ -1156,7 +1161,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) | |||
1156 | /* update dio if lo_offset or transfer is changed */ | 1161 | /* update dio if lo_offset or transfer is changed */ |
1157 | __loop_update_dio(lo, lo->use_dio); | 1162 | __loop_update_dio(lo, lo->use_dio); |
1158 | 1163 | ||
1159 | return 0; | 1164 | exit: |
1165 | blk_mq_unfreeze_queue(lo->lo_queue); | ||
1166 | return err; | ||
1160 | } | 1167 | } |
1161 | 1168 | ||
1162 | static int | 1169 | static int |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c0e14e54909b..a67b7ea1e3bf 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -420,7 +420,8 @@ static void null_lnvm_end_io(struct request *rq, int error) | |||
420 | { | 420 | { |
421 | struct nvm_rq *rqd = rq->end_io_data; | 421 | struct nvm_rq *rqd = rq->end_io_data; |
422 | 422 | ||
423 | nvm_end_io(rqd, error); | 423 | rqd->error = error; |
424 | nvm_end_io(rqd); | ||
424 | 425 | ||
425 | blk_put_request(rq); | 426 | blk_put_request(rq); |
426 | } | 427 | } |
@@ -460,7 +461,6 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) | |||
460 | 461 | ||
461 | id->ver_id = 0x1; | 462 | id->ver_id = 0x1; |
462 | id->vmnt = 0; | 463 | id->vmnt = 0; |
463 | id->cgrps = 1; | ||
464 | id->cap = 0x2; | 464 | id->cap = 0x2; |
465 | id->dom = 0x1; | 465 | id->dom = 0x1; |
466 | 466 | ||
@@ -479,7 +479,7 @@ static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id) | |||
479 | 479 | ||
480 | sector_div(size, bs); /* convert size to pages */ | 480 | sector_div(size, bs); /* convert size to pages */ |
481 | size >>= 8; /* concert size to pgs pr blk */ | 481 | size >>= 8; /* concert size to pgs pr blk */ |
482 | grp = &id->groups[0]; | 482 | grp = &id->grp; |
483 | grp->mtype = 0; | 483 | grp->mtype = 0; |
484 | grp->fmtype = 0; | 484 | grp->fmtype = 0; |
485 | grp->num_ch = 1; | 485 | grp->num_ch = 1; |
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 5fd2d0e25567..10aed84244f5 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c | |||
@@ -273,7 +273,7 @@ static const struct block_device_operations pcd_bdops = { | |||
273 | .check_events = pcd_block_check_events, | 273 | .check_events = pcd_block_check_events, |
274 | }; | 274 | }; |
275 | 275 | ||
276 | static struct cdrom_device_ops pcd_dops = { | 276 | static const struct cdrom_device_ops pcd_dops = { |
277 | .open = pcd_open, | 277 | .open = pcd_open, |
278 | .release = pcd_release, | 278 | .release = pcd_release, |
279 | .drive_status = pcd_drive_status, | 279 | .drive_status = pcd_drive_status, |
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 59cca72647a6..bbbd3caa927c 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c | |||
@@ -342,8 +342,8 @@ static void cdrom_sysctl_register(void); | |||
342 | 342 | ||
343 | static LIST_HEAD(cdrom_list); | 343 | static LIST_HEAD(cdrom_list); |
344 | 344 | ||
345 | static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, | 345 | int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, |
346 | struct packet_command *cgc) | 346 | struct packet_command *cgc) |
347 | { | 347 | { |
348 | if (cgc->sense) { | 348 | if (cgc->sense) { |
349 | cgc->sense->sense_key = 0x05; | 349 | cgc->sense->sense_key = 0x05; |
@@ -354,6 +354,7 @@ static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, | |||
354 | cgc->stat = -EIO; | 354 | cgc->stat = -EIO; |
355 | return -EIO; | 355 | return -EIO; |
356 | } | 356 | } |
357 | EXPORT_SYMBOL(cdrom_dummy_generic_packet); | ||
357 | 358 | ||
358 | static int cdrom_flush_cache(struct cdrom_device_info *cdi) | 359 | static int cdrom_flush_cache(struct cdrom_device_info *cdi) |
359 | { | 360 | { |
@@ -371,7 +372,7 @@ static int cdrom_flush_cache(struct cdrom_device_info *cdi) | |||
371 | static int cdrom_get_disc_info(struct cdrom_device_info *cdi, | 372 | static int cdrom_get_disc_info(struct cdrom_device_info *cdi, |
372 | disc_information *di) | 373 | disc_information *di) |
373 | { | 374 | { |
374 | struct cdrom_device_ops *cdo = cdi->ops; | 375 | const struct cdrom_device_ops *cdo = cdi->ops; |
375 | struct packet_command cgc; | 376 | struct packet_command cgc; |
376 | int ret, buflen; | 377 | int ret, buflen; |
377 | 378 | ||
@@ -586,7 +587,7 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space) | |||
586 | int register_cdrom(struct cdrom_device_info *cdi) | 587 | int register_cdrom(struct cdrom_device_info *cdi) |
587 | { | 588 | { |
588 | static char banner_printed; | 589 | static char banner_printed; |
589 | struct cdrom_device_ops *cdo = cdi->ops; | 590 | const struct cdrom_device_ops *cdo = cdi->ops; |
590 | int *change_capability = (int *)&cdo->capability; /* hack */ | 591 | int *change_capability = (int *)&cdo->capability; /* hack */ |
591 | 592 | ||
592 | cd_dbg(CD_OPEN, "entering register_cdrom\n"); | 593 | cd_dbg(CD_OPEN, "entering register_cdrom\n"); |
@@ -610,7 +611,6 @@ int register_cdrom(struct cdrom_device_info *cdi) | |||
610 | ENSURE(reset, CDC_RESET); | 611 | ENSURE(reset, CDC_RESET); |
611 | ENSURE(generic_packet, CDC_GENERIC_PACKET); | 612 | ENSURE(generic_packet, CDC_GENERIC_PACKET); |
612 | cdi->mc_flags = 0; | 613 | cdi->mc_flags = 0; |
613 | cdo->n_minors = 0; | ||
614 | cdi->options = CDO_USE_FFLAGS; | 614 | cdi->options = CDO_USE_FFLAGS; |
615 | 615 | ||
616 | if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY)) | 616 | if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY)) |
@@ -630,8 +630,7 @@ int register_cdrom(struct cdrom_device_info *cdi) | |||
630 | else | 630 | else |
631 | cdi->cdda_method = CDDA_OLD; | 631 | cdi->cdda_method = CDDA_OLD; |
632 | 632 | ||
633 | if (!cdo->generic_packet) | 633 | WARN_ON(!cdo->generic_packet); |
634 | cdo->generic_packet = cdrom_dummy_generic_packet; | ||
635 | 634 | ||
636 | cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); | 635 | cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); |
637 | mutex_lock(&cdrom_mutex); | 636 | mutex_lock(&cdrom_mutex); |
@@ -652,7 +651,6 @@ void unregister_cdrom(struct cdrom_device_info *cdi) | |||
652 | if (cdi->exit) | 651 | if (cdi->exit) |
653 | cdi->exit(cdi); | 652 | cdi->exit(cdi); |
654 | 653 | ||
655 | cdi->ops->n_minors--; | ||
656 | cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); | 654 | cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); |
657 | } | 655 | } |
658 | 656 | ||
@@ -1036,7 +1034,7 @@ static | |||
1036 | int open_for_data(struct cdrom_device_info *cdi) | 1034 | int open_for_data(struct cdrom_device_info *cdi) |
1037 | { | 1035 | { |
1038 | int ret; | 1036 | int ret; |
1039 | struct cdrom_device_ops *cdo = cdi->ops; | 1037 | const struct cdrom_device_ops *cdo = cdi->ops; |
1040 | tracktype tracks; | 1038 | tracktype tracks; |
1041 | cd_dbg(CD_OPEN, "entering open_for_data\n"); | 1039 | cd_dbg(CD_OPEN, "entering open_for_data\n"); |
1042 | /* Check if the driver can report drive status. If it can, we | 1040 | /* Check if the driver can report drive status. If it can, we |
@@ -1198,8 +1196,8 @@ err: | |||
1198 | /* This code is similar to that in open_for_data. The routine is called | 1196 | /* This code is similar to that in open_for_data. The routine is called |
1199 | whenever an audio play operation is requested. | 1197 | whenever an audio play operation is requested. |
1200 | */ | 1198 | */ |
1201 | static int check_for_audio_disc(struct cdrom_device_info * cdi, | 1199 | static int check_for_audio_disc(struct cdrom_device_info *cdi, |
1202 | struct cdrom_device_ops * cdo) | 1200 | const struct cdrom_device_ops *cdo) |
1203 | { | 1201 | { |
1204 | int ret; | 1202 | int ret; |
1205 | tracktype tracks; | 1203 | tracktype tracks; |
@@ -1254,7 +1252,7 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi, | |||
1254 | 1252 | ||
1255 | void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) | 1253 | void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) |
1256 | { | 1254 | { |
1257 | struct cdrom_device_ops *cdo = cdi->ops; | 1255 | const struct cdrom_device_ops *cdo = cdi->ops; |
1258 | int opened_for_data; | 1256 | int opened_for_data; |
1259 | 1257 | ||
1260 | cd_dbg(CD_CLOSE, "entering cdrom_release\n"); | 1258 | cd_dbg(CD_CLOSE, "entering cdrom_release\n"); |
@@ -1294,7 +1292,7 @@ static int cdrom_read_mech_status(struct cdrom_device_info *cdi, | |||
1294 | struct cdrom_changer_info *buf) | 1292 | struct cdrom_changer_info *buf) |
1295 | { | 1293 | { |
1296 | struct packet_command cgc; | 1294 | struct packet_command cgc; |
1297 | struct cdrom_device_ops *cdo = cdi->ops; | 1295 | const struct cdrom_device_ops *cdo = cdi->ops; |
1298 | int length; | 1296 | int length; |
1299 | 1297 | ||
1300 | /* | 1298 | /* |
@@ -1643,7 +1641,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1643 | int ret; | 1641 | int ret; |
1644 | u_char buf[20]; | 1642 | u_char buf[20]; |
1645 | struct packet_command cgc; | 1643 | struct packet_command cgc; |
1646 | struct cdrom_device_ops *cdo = cdi->ops; | 1644 | const struct cdrom_device_ops *cdo = cdi->ops; |
1647 | rpc_state_t rpc_state; | 1645 | rpc_state_t rpc_state; |
1648 | 1646 | ||
1649 | memset(buf, 0, sizeof(buf)); | 1647 | memset(buf, 0, sizeof(buf)); |
@@ -1791,7 +1789,7 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1791 | { | 1789 | { |
1792 | unsigned char buf[21], *base; | 1790 | unsigned char buf[21], *base; |
1793 | struct dvd_layer *layer; | 1791 | struct dvd_layer *layer; |
1794 | struct cdrom_device_ops *cdo = cdi->ops; | 1792 | const struct cdrom_device_ops *cdo = cdi->ops; |
1795 | int ret, layer_num = s->physical.layer_num; | 1793 | int ret, layer_num = s->physical.layer_num; |
1796 | 1794 | ||
1797 | if (layer_num >= DVD_LAYERS) | 1795 | if (layer_num >= DVD_LAYERS) |
@@ -1842,7 +1840,7 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1842 | { | 1840 | { |
1843 | int ret; | 1841 | int ret; |
1844 | u_char buf[8]; | 1842 | u_char buf[8]; |
1845 | struct cdrom_device_ops *cdo = cdi->ops; | 1843 | const struct cdrom_device_ops *cdo = cdi->ops; |
1846 | 1844 | ||
1847 | init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); | 1845 | init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ); |
1848 | cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; | 1846 | cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE; |
@@ -1866,7 +1864,7 @@ static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1866 | { | 1864 | { |
1867 | int ret, size; | 1865 | int ret, size; |
1868 | u_char *buf; | 1866 | u_char *buf; |
1869 | struct cdrom_device_ops *cdo = cdi->ops; | 1867 | const struct cdrom_device_ops *cdo = cdi->ops; |
1870 | 1868 | ||
1871 | size = sizeof(s->disckey.value) + 4; | 1869 | size = sizeof(s->disckey.value) + 4; |
1872 | 1870 | ||
@@ -1894,7 +1892,7 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1894 | { | 1892 | { |
1895 | int ret, size = 4 + 188; | 1893 | int ret, size = 4 + 188; |
1896 | u_char *buf; | 1894 | u_char *buf; |
1897 | struct cdrom_device_ops *cdo = cdi->ops; | 1895 | const struct cdrom_device_ops *cdo = cdi->ops; |
1898 | 1896 | ||
1899 | buf = kmalloc(size, GFP_KERNEL); | 1897 | buf = kmalloc(size, GFP_KERNEL); |
1900 | if (!buf) | 1898 | if (!buf) |
@@ -1928,7 +1926,7 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1928 | { | 1926 | { |
1929 | int ret = 0, size; | 1927 | int ret = 0, size; |
1930 | u_char *buf; | 1928 | u_char *buf; |
1931 | struct cdrom_device_ops *cdo = cdi->ops; | 1929 | const struct cdrom_device_ops *cdo = cdi->ops; |
1932 | 1930 | ||
1933 | size = sizeof(s->manufact.value) + 4; | 1931 | size = sizeof(s->manufact.value) + 4; |
1934 | 1932 | ||
@@ -1995,7 +1993,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi, | |||
1995 | struct packet_command *cgc, | 1993 | struct packet_command *cgc, |
1996 | int page_code, int page_control) | 1994 | int page_code, int page_control) |
1997 | { | 1995 | { |
1998 | struct cdrom_device_ops *cdo = cdi->ops; | 1996 | const struct cdrom_device_ops *cdo = cdi->ops; |
1999 | 1997 | ||
2000 | memset(cgc->cmd, 0, sizeof(cgc->cmd)); | 1998 | memset(cgc->cmd, 0, sizeof(cgc->cmd)); |
2001 | 1999 | ||
@@ -2010,7 +2008,7 @@ int cdrom_mode_sense(struct cdrom_device_info *cdi, | |||
2010 | int cdrom_mode_select(struct cdrom_device_info *cdi, | 2008 | int cdrom_mode_select(struct cdrom_device_info *cdi, |
2011 | struct packet_command *cgc) | 2009 | struct packet_command *cgc) |
2012 | { | 2010 | { |
2013 | struct cdrom_device_ops *cdo = cdi->ops; | 2011 | const struct cdrom_device_ops *cdo = cdi->ops; |
2014 | 2012 | ||
2015 | memset(cgc->cmd, 0, sizeof(cgc->cmd)); | 2013 | memset(cgc->cmd, 0, sizeof(cgc->cmd)); |
2016 | memset(cgc->buffer, 0, 2); | 2014 | memset(cgc->buffer, 0, 2); |
@@ -2025,7 +2023,7 @@ int cdrom_mode_select(struct cdrom_device_info *cdi, | |||
2025 | static int cdrom_read_subchannel(struct cdrom_device_info *cdi, | 2023 | static int cdrom_read_subchannel(struct cdrom_device_info *cdi, |
2026 | struct cdrom_subchnl *subchnl, int mcn) | 2024 | struct cdrom_subchnl *subchnl, int mcn) |
2027 | { | 2025 | { |
2028 | struct cdrom_device_ops *cdo = cdi->ops; | 2026 | const struct cdrom_device_ops *cdo = cdi->ops; |
2029 | struct packet_command cgc; | 2027 | struct packet_command cgc; |
2030 | char buffer[32]; | 2028 | char buffer[32]; |
2031 | int ret; | 2029 | int ret; |
@@ -2073,7 +2071,7 @@ static int cdrom_read_cd(struct cdrom_device_info *cdi, | |||
2073 | struct packet_command *cgc, int lba, | 2071 | struct packet_command *cgc, int lba, |
2074 | int blocksize, int nblocks) | 2072 | int blocksize, int nblocks) |
2075 | { | 2073 | { |
2076 | struct cdrom_device_ops *cdo = cdi->ops; | 2074 | const struct cdrom_device_ops *cdo = cdi->ops; |
2077 | 2075 | ||
2078 | memset(&cgc->cmd, 0, sizeof(cgc->cmd)); | 2076 | memset(&cgc->cmd, 0, sizeof(cgc->cmd)); |
2079 | cgc->cmd[0] = GPCMD_READ_10; | 2077 | cgc->cmd[0] = GPCMD_READ_10; |
@@ -2093,7 +2091,7 @@ static int cdrom_read_block(struct cdrom_device_info *cdi, | |||
2093 | struct packet_command *cgc, | 2091 | struct packet_command *cgc, |
2094 | int lba, int nblocks, int format, int blksize) | 2092 | int lba, int nblocks, int format, int blksize) |
2095 | { | 2093 | { |
2096 | struct cdrom_device_ops *cdo = cdi->ops; | 2094 | const struct cdrom_device_ops *cdo = cdi->ops; |
2097 | 2095 | ||
2098 | memset(&cgc->cmd, 0, sizeof(cgc->cmd)); | 2096 | memset(&cgc->cmd, 0, sizeof(cgc->cmd)); |
2099 | cgc->cmd[0] = GPCMD_READ_CD; | 2097 | cgc->cmd[0] = GPCMD_READ_CD; |
@@ -2764,7 +2762,7 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi, | |||
2764 | */ | 2762 | */ |
2765 | static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) | 2763 | static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) |
2766 | { | 2764 | { |
2767 | struct cdrom_device_ops *cdo = cdi->ops; | 2765 | const struct cdrom_device_ops *cdo = cdi->ops; |
2768 | struct packet_command cgc; | 2766 | struct packet_command cgc; |
2769 | struct modesel_head mh; | 2767 | struct modesel_head mh; |
2770 | 2768 | ||
@@ -2790,7 +2788,7 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) | |||
2790 | static int cdrom_get_track_info(struct cdrom_device_info *cdi, | 2788 | static int cdrom_get_track_info(struct cdrom_device_info *cdi, |
2791 | __u16 track, __u8 type, track_information *ti) | 2789 | __u16 track, __u8 type, track_information *ti) |
2792 | { | 2790 | { |
2793 | struct cdrom_device_ops *cdo = cdi->ops; | 2791 | const struct cdrom_device_ops *cdo = cdi->ops; |
2794 | struct packet_command cgc; | 2792 | struct packet_command cgc; |
2795 | int ret, buflen; | 2793 | int ret, buflen; |
2796 | 2794 | ||
@@ -3049,7 +3047,7 @@ static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi, | |||
3049 | void __user *arg, | 3047 | void __user *arg, |
3050 | struct packet_command *cgc) | 3048 | struct packet_command *cgc) |
3051 | { | 3049 | { |
3052 | struct cdrom_device_ops *cdo = cdi->ops; | 3050 | const struct cdrom_device_ops *cdo = cdi->ops; |
3053 | struct cdrom_msf msf; | 3051 | struct cdrom_msf msf; |
3054 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); | 3052 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); |
3055 | if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) | 3053 | if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) |
@@ -3069,7 +3067,7 @@ static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi, | |||
3069 | void __user *arg, | 3067 | void __user *arg, |
3070 | struct packet_command *cgc) | 3068 | struct packet_command *cgc) |
3071 | { | 3069 | { |
3072 | struct cdrom_device_ops *cdo = cdi->ops; | 3070 | const struct cdrom_device_ops *cdo = cdi->ops; |
3073 | struct cdrom_blk blk; | 3071 | struct cdrom_blk blk; |
3074 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); | 3072 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); |
3075 | if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk))) | 3073 | if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk))) |
@@ -3164,7 +3162,7 @@ static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi, | |||
3164 | struct packet_command *cgc, | 3162 | struct packet_command *cgc, |
3165 | int cmd) | 3163 | int cmd) |
3166 | { | 3164 | { |
3167 | struct cdrom_device_ops *cdo = cdi->ops; | 3165 | const struct cdrom_device_ops *cdo = cdi->ops; |
3168 | cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); | 3166 | cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); |
3169 | cgc->cmd[0] = GPCMD_START_STOP_UNIT; | 3167 | cgc->cmd[0] = GPCMD_START_STOP_UNIT; |
3170 | cgc->cmd[1] = 1; | 3168 | cgc->cmd[1] = 1; |
@@ -3177,7 +3175,7 @@ static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi, | |||
3177 | struct packet_command *cgc, | 3175 | struct packet_command *cgc, |
3178 | int cmd) | 3176 | int cmd) |
3179 | { | 3177 | { |
3180 | struct cdrom_device_ops *cdo = cdi->ops; | 3178 | const struct cdrom_device_ops *cdo = cdi->ops; |
3181 | cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); | 3179 | cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); |
3182 | cgc->cmd[0] = GPCMD_PAUSE_RESUME; | 3180 | cgc->cmd[0] = GPCMD_PAUSE_RESUME; |
3183 | cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; | 3181 | cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; |
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 584bc3126403..1afab6558d0c 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c | |||
@@ -481,7 +481,7 @@ static int gdrom_audio_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, | |||
481 | return -EINVAL; | 481 | return -EINVAL; |
482 | } | 482 | } |
483 | 483 | ||
484 | static struct cdrom_device_ops gdrom_ops = { | 484 | static const struct cdrom_device_ops gdrom_ops = { |
485 | .open = gdrom_open, | 485 | .open = gdrom_open, |
486 | .release = gdrom_release, | 486 | .release = gdrom_release, |
487 | .drive_status = gdrom_drivestatus, | 487 | .drive_status = gdrom_drivestatus, |
@@ -489,9 +489,9 @@ static struct cdrom_device_ops gdrom_ops = { | |||
489 | .get_last_session = gdrom_get_last_session, | 489 | .get_last_session = gdrom_get_last_session, |
490 | .reset = gdrom_hardreset, | 490 | .reset = gdrom_hardreset, |
491 | .audio_ioctl = gdrom_audio_ioctl, | 491 | .audio_ioctl = gdrom_audio_ioctl, |
492 | .generic_packet = cdrom_dummy_generic_packet, | ||
492 | .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED | | 493 | .capability = CDC_MULTI_SESSION | CDC_MEDIA_CHANGED | |
493 | CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R, | 494 | CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R, |
494 | .n_minors = 1, | ||
495 | }; | 495 | }; |
496 | 496 | ||
497 | static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) | 497 | static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) |
@@ -807,16 +807,20 @@ static int probe_gdrom(struct platform_device *devptr) | |||
807 | if (err) | 807 | if (err) |
808 | goto probe_fail_cmdirq_register; | 808 | goto probe_fail_cmdirq_register; |
809 | gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); | 809 | gd.gdrom_rq = blk_init_queue(gdrom_request, &gdrom_lock); |
810 | if (!gd.gdrom_rq) | 810 | if (!gd.gdrom_rq) { |
811 | err = -ENOMEM; | ||
811 | goto probe_fail_requestq; | 812 | goto probe_fail_requestq; |
813 | } | ||
812 | 814 | ||
813 | err = probe_gdrom_setupqueue(); | 815 | err = probe_gdrom_setupqueue(); |
814 | if (err) | 816 | if (err) |
815 | goto probe_fail_toc; | 817 | goto probe_fail_toc; |
816 | 818 | ||
817 | gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL); | 819 | gd.toc = kzalloc(sizeof(struct gdromtoc), GFP_KERNEL); |
818 | if (!gd.toc) | 820 | if (!gd.toc) { |
821 | err = -ENOMEM; | ||
819 | goto probe_fail_toc; | 822 | goto probe_fail_toc; |
823 | } | ||
820 | add_disk(gd.disk); | 824 | add_disk(gd.disk); |
821 | return 0; | 825 | return 0; |
822 | 826 | ||
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 9cbd217bc0c9..ab9232e1e16f 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c | |||
@@ -1166,7 +1166,7 @@ void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf) | |||
1166 | CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \ | 1166 | CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \ |
1167 | CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM) | 1167 | CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM) |
1168 | 1168 | ||
1169 | static struct cdrom_device_ops ide_cdrom_dops = { | 1169 | static const struct cdrom_device_ops ide_cdrom_dops = { |
1170 | .open = ide_cdrom_open_real, | 1170 | .open = ide_cdrom_open_real, |
1171 | .release = ide_cdrom_release_real, | 1171 | .release = ide_cdrom_release_real, |
1172 | .drive_status = ide_cdrom_drive_status, | 1172 | .drive_status = ide_cdrom_drive_status, |
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 2f5d5f4a4c75..052714106b7b 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig | |||
@@ -26,15 +26,6 @@ config NVM_DEBUG | |||
26 | 26 | ||
27 | It is required to create/remove targets without IOCTLs. | 27 | It is required to create/remove targets without IOCTLs. |
28 | 28 | ||
29 | config NVM_GENNVM | ||
30 | tristate "General Non-Volatile Memory Manager for Open-Channel SSDs" | ||
31 | ---help--- | ||
32 | Non-volatile memory media manager for Open-Channel SSDs that implements | ||
33 | physical media metadata management and block provisioning API. | ||
34 | |||
35 | This is the standard media manager for using Open-Channel SSDs, and | ||
36 | required for targets to be instantiated. | ||
37 | |||
38 | config NVM_RRPC | 29 | config NVM_RRPC |
39 | tristate "Round-robin Hybrid Open-Channel SSD target" | 30 | tristate "Round-robin Hybrid Open-Channel SSD target" |
40 | ---help--- | 31 | ---help--- |
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile index a7a0a22cf1a5..b2a39e2d2895 100644 --- a/drivers/lightnvm/Makefile +++ b/drivers/lightnvm/Makefile | |||
@@ -2,6 +2,5 @@ | |||
2 | # Makefile for Open-Channel SSDs. | 2 | # Makefile for Open-Channel SSDs. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_NVM) := core.o sysblk.o | 5 | obj-$(CONFIG_NVM) := core.o |
6 | obj-$(CONFIG_NVM_GENNVM) += gennvm.o | ||
7 | obj-$(CONFIG_NVM_RRPC) += rrpc.o | 6 | obj-$(CONFIG_NVM_RRPC) += rrpc.o |
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index 02240a0b39c9..5262ba66a7a7 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c | |||
@@ -29,10 +29,483 @@ | |||
29 | 29 | ||
30 | static LIST_HEAD(nvm_tgt_types); | 30 | static LIST_HEAD(nvm_tgt_types); |
31 | static DECLARE_RWSEM(nvm_tgtt_lock); | 31 | static DECLARE_RWSEM(nvm_tgtt_lock); |
32 | static LIST_HEAD(nvm_mgrs); | ||
33 | static LIST_HEAD(nvm_devices); | 32 | static LIST_HEAD(nvm_devices); |
34 | static DECLARE_RWSEM(nvm_lock); | 33 | static DECLARE_RWSEM(nvm_lock); |
35 | 34 | ||
35 | /* Map between virtual and physical channel and lun */ | ||
36 | struct nvm_ch_map { | ||
37 | int ch_off; | ||
38 | int nr_luns; | ||
39 | int *lun_offs; | ||
40 | }; | ||
41 | |||
42 | struct nvm_dev_map { | ||
43 | struct nvm_ch_map *chnls; | ||
44 | int nr_chnls; | ||
45 | }; | ||
46 | |||
47 | struct nvm_area { | ||
48 | struct list_head list; | ||
49 | sector_t begin; | ||
50 | sector_t end; /* end is excluded */ | ||
51 | }; | ||
52 | |||
53 | static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) | ||
54 | { | ||
55 | struct nvm_target *tgt; | ||
56 | |||
57 | list_for_each_entry(tgt, &dev->targets, list) | ||
58 | if (!strcmp(name, tgt->disk->disk_name)) | ||
59 | return tgt; | ||
60 | |||
61 | return NULL; | ||
62 | } | ||
63 | |||
64 | static int nvm_reserve_luns(struct nvm_dev *dev, int lun_begin, int lun_end) | ||
65 | { | ||
66 | int i; | ||
67 | |||
68 | for (i = lun_begin; i <= lun_end; i++) { | ||
69 | if (test_and_set_bit(i, dev->lun_map)) { | ||
70 | pr_err("nvm: lun %d already allocated\n", i); | ||
71 | goto err; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | return 0; | ||
76 | err: | ||
77 | while (--i > lun_begin) | ||
78 | clear_bit(i, dev->lun_map); | ||
79 | |||
80 | return -EBUSY; | ||
81 | } | ||
82 | |||
83 | static void nvm_release_luns_err(struct nvm_dev *dev, int lun_begin, | ||
84 | int lun_end) | ||
85 | { | ||
86 | int i; | ||
87 | |||
88 | for (i = lun_begin; i <= lun_end; i++) | ||
89 | WARN_ON(!test_and_clear_bit(i, dev->lun_map)); | ||
90 | } | ||
91 | |||
92 | static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) | ||
93 | { | ||
94 | struct nvm_dev *dev = tgt_dev->parent; | ||
95 | struct nvm_dev_map *dev_map = tgt_dev->map; | ||
96 | int i, j; | ||
97 | |||
98 | for (i = 0; i < dev_map->nr_chnls; i++) { | ||
99 | struct nvm_ch_map *ch_map = &dev_map->chnls[i]; | ||
100 | int *lun_offs = ch_map->lun_offs; | ||
101 | int ch = i + ch_map->ch_off; | ||
102 | |||
103 | for (j = 0; j < ch_map->nr_luns; j++) { | ||
104 | int lun = j + lun_offs[j]; | ||
105 | int lunid = (ch * dev->geo.luns_per_chnl) + lun; | ||
106 | |||
107 | WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); | ||
108 | } | ||
109 | |||
110 | kfree(ch_map->lun_offs); | ||
111 | } | ||
112 | |||
113 | kfree(dev_map->chnls); | ||
114 | kfree(dev_map); | ||
115 | |||
116 | kfree(tgt_dev->luns); | ||
117 | kfree(tgt_dev); | ||
118 | } | ||
119 | |||
120 | static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, | ||
121 | int lun_begin, int lun_end) | ||
122 | { | ||
123 | struct nvm_tgt_dev *tgt_dev = NULL; | ||
124 | struct nvm_dev_map *dev_rmap = dev->rmap; | ||
125 | struct nvm_dev_map *dev_map; | ||
126 | struct ppa_addr *luns; | ||
127 | int nr_luns = lun_end - lun_begin + 1; | ||
128 | int luns_left = nr_luns; | ||
129 | int nr_chnls = nr_luns / dev->geo.luns_per_chnl; | ||
130 | int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl; | ||
131 | int bch = lun_begin / dev->geo.luns_per_chnl; | ||
132 | int blun = lun_begin % dev->geo.luns_per_chnl; | ||
133 | int lunid = 0; | ||
134 | int lun_balanced = 1; | ||
135 | int prev_nr_luns; | ||
136 | int i, j; | ||
137 | |||
138 | nr_chnls = nr_luns / dev->geo.luns_per_chnl; | ||
139 | nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; | ||
140 | |||
141 | dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); | ||
142 | if (!dev_map) | ||
143 | goto err_dev; | ||
144 | |||
145 | dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map), | ||
146 | GFP_KERNEL); | ||
147 | if (!dev_map->chnls) | ||
148 | goto err_chnls; | ||
149 | |||
150 | luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL); | ||
151 | if (!luns) | ||
152 | goto err_luns; | ||
153 | |||
154 | prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ? | ||
155 | dev->geo.luns_per_chnl : luns_left; | ||
156 | for (i = 0; i < nr_chnls; i++) { | ||
157 | struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; | ||
158 | int *lun_roffs = ch_rmap->lun_offs; | ||
159 | struct nvm_ch_map *ch_map = &dev_map->chnls[i]; | ||
160 | int *lun_offs; | ||
161 | int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ? | ||
162 | dev->geo.luns_per_chnl : luns_left; | ||
163 | |||
164 | if (lun_balanced && prev_nr_luns != luns_in_chnl) | ||
165 | lun_balanced = 0; | ||
166 | |||
167 | ch_map->ch_off = ch_rmap->ch_off = bch; | ||
168 | ch_map->nr_luns = luns_in_chnl; | ||
169 | |||
170 | lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); | ||
171 | if (!lun_offs) | ||
172 | goto err_ch; | ||
173 | |||
174 | for (j = 0; j < luns_in_chnl; j++) { | ||
175 | luns[lunid].ppa = 0; | ||
176 | luns[lunid].g.ch = i; | ||
177 | luns[lunid++].g.lun = j; | ||
178 | |||
179 | lun_offs[j] = blun; | ||
180 | lun_roffs[j + blun] = blun; | ||
181 | } | ||
182 | |||
183 | ch_map->lun_offs = lun_offs; | ||
184 | |||
185 | /* when starting a new channel, lun offset is reset */ | ||
186 | blun = 0; | ||
187 | luns_left -= luns_in_chnl; | ||
188 | } | ||
189 | |||
190 | dev_map->nr_chnls = nr_chnls; | ||
191 | |||
192 | tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); | ||
193 | if (!tgt_dev) | ||
194 | goto err_ch; | ||
195 | |||
196 | memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); | ||
197 | /* Target device only owns a portion of the physical device */ | ||
198 | tgt_dev->geo.nr_chnls = nr_chnls; | ||
199 | tgt_dev->geo.nr_luns = nr_luns; | ||
200 | tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1; | ||
201 | tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; | ||
202 | tgt_dev->q = dev->q; | ||
203 | tgt_dev->map = dev_map; | ||
204 | tgt_dev->luns = luns; | ||
205 | memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id)); | ||
206 | |||
207 | tgt_dev->parent = dev; | ||
208 | |||
209 | return tgt_dev; | ||
210 | err_ch: | ||
211 | while (--i > 0) | ||
212 | kfree(dev_map->chnls[i].lun_offs); | ||
213 | kfree(luns); | ||
214 | err_luns: | ||
215 | kfree(dev_map->chnls); | ||
216 | err_chnls: | ||
217 | kfree(dev_map); | ||
218 | err_dev: | ||
219 | return tgt_dev; | ||
220 | } | ||
221 | |||
222 | static const struct block_device_operations nvm_fops = { | ||
223 | .owner = THIS_MODULE, | ||
224 | }; | ||
225 | |||
226 | static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) | ||
227 | { | ||
228 | struct nvm_ioctl_create_simple *s = &create->conf.s; | ||
229 | struct request_queue *tqueue; | ||
230 | struct gendisk *tdisk; | ||
231 | struct nvm_tgt_type *tt; | ||
232 | struct nvm_target *t; | ||
233 | struct nvm_tgt_dev *tgt_dev; | ||
234 | void *targetdata; | ||
235 | |||
236 | tt = nvm_find_target_type(create->tgttype, 1); | ||
237 | if (!tt) { | ||
238 | pr_err("nvm: target type %s not found\n", create->tgttype); | ||
239 | return -EINVAL; | ||
240 | } | ||
241 | |||
242 | mutex_lock(&dev->mlock); | ||
243 | t = nvm_find_target(dev, create->tgtname); | ||
244 | if (t) { | ||
245 | pr_err("nvm: target name already exists.\n"); | ||
246 | mutex_unlock(&dev->mlock); | ||
247 | return -EINVAL; | ||
248 | } | ||
249 | mutex_unlock(&dev->mlock); | ||
250 | |||
251 | if (nvm_reserve_luns(dev, s->lun_begin, s->lun_end)) | ||
252 | return -ENOMEM; | ||
253 | |||
254 | t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); | ||
255 | if (!t) | ||
256 | goto err_reserve; | ||
257 | |||
258 | tgt_dev = nvm_create_tgt_dev(dev, s->lun_begin, s->lun_end); | ||
259 | if (!tgt_dev) { | ||
260 | pr_err("nvm: could not create target device\n"); | ||
261 | goto err_t; | ||
262 | } | ||
263 | |||
264 | tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); | ||
265 | if (!tqueue) | ||
266 | goto err_dev; | ||
267 | blk_queue_make_request(tqueue, tt->make_rq); | ||
268 | |||
269 | tdisk = alloc_disk(0); | ||
270 | if (!tdisk) | ||
271 | goto err_queue; | ||
272 | |||
273 | sprintf(tdisk->disk_name, "%s", create->tgtname); | ||
274 | tdisk->flags = GENHD_FL_EXT_DEVT; | ||
275 | tdisk->major = 0; | ||
276 | tdisk->first_minor = 0; | ||
277 | tdisk->fops = &nvm_fops; | ||
278 | tdisk->queue = tqueue; | ||
279 | |||
280 | targetdata = tt->init(tgt_dev, tdisk); | ||
281 | if (IS_ERR(targetdata)) | ||
282 | goto err_init; | ||
283 | |||
284 | tdisk->private_data = targetdata; | ||
285 | tqueue->queuedata = targetdata; | ||
286 | |||
287 | blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); | ||
288 | |||
289 | set_capacity(tdisk, tt->capacity(targetdata)); | ||
290 | add_disk(tdisk); | ||
291 | |||
292 | if (tt->sysfs_init && tt->sysfs_init(tdisk)) | ||
293 | goto err_sysfs; | ||
294 | |||
295 | t->type = tt; | ||
296 | t->disk = tdisk; | ||
297 | t->dev = tgt_dev; | ||
298 | |||
299 | mutex_lock(&dev->mlock); | ||
300 | list_add_tail(&t->list, &dev->targets); | ||
301 | mutex_unlock(&dev->mlock); | ||
302 | |||
303 | return 0; | ||
304 | err_sysfs: | ||
305 | if (tt->exit) | ||
306 | tt->exit(targetdata); | ||
307 | err_init: | ||
308 | put_disk(tdisk); | ||
309 | err_queue: | ||
310 | blk_cleanup_queue(tqueue); | ||
311 | err_dev: | ||
312 | nvm_remove_tgt_dev(tgt_dev); | ||
313 | err_t: | ||
314 | kfree(t); | ||
315 | err_reserve: | ||
316 | nvm_release_luns_err(dev, s->lun_begin, s->lun_end); | ||
317 | return -ENOMEM; | ||
318 | } | ||
319 | |||
320 | static void __nvm_remove_target(struct nvm_target *t) | ||
321 | { | ||
322 | struct nvm_tgt_type *tt = t->type; | ||
323 | struct gendisk *tdisk = t->disk; | ||
324 | struct request_queue *q = tdisk->queue; | ||
325 | |||
326 | del_gendisk(tdisk); | ||
327 | blk_cleanup_queue(q); | ||
328 | |||
329 | if (tt->sysfs_exit) | ||
330 | tt->sysfs_exit(tdisk); | ||
331 | |||
332 | if (tt->exit) | ||
333 | tt->exit(tdisk->private_data); | ||
334 | |||
335 | nvm_remove_tgt_dev(t->dev); | ||
336 | put_disk(tdisk); | ||
337 | |||
338 | list_del(&t->list); | ||
339 | kfree(t); | ||
340 | } | ||
341 | |||
342 | /** | ||
343 | * nvm_remove_tgt - Removes a target from the media manager | ||
344 | * @dev: device | ||
345 | * @remove: ioctl structure with target name to remove. | ||
346 | * | ||
347 | * Returns: | ||
348 | * 0: on success | ||
349 | * 1: on not found | ||
350 | * <0: on error | ||
351 | */ | ||
352 | static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) | ||
353 | { | ||
354 | struct nvm_target *t; | ||
355 | |||
356 | mutex_lock(&dev->mlock); | ||
357 | t = nvm_find_target(dev, remove->tgtname); | ||
358 | if (!t) { | ||
359 | mutex_unlock(&dev->mlock); | ||
360 | return 1; | ||
361 | } | ||
362 | __nvm_remove_target(t); | ||
363 | mutex_unlock(&dev->mlock); | ||
364 | |||
365 | return 0; | ||
366 | } | ||
367 | |||
368 | static int nvm_register_map(struct nvm_dev *dev) | ||
369 | { | ||
370 | struct nvm_dev_map *rmap; | ||
371 | int i, j; | ||
372 | |||
373 | rmap = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); | ||
374 | if (!rmap) | ||
375 | goto err_rmap; | ||
376 | |||
377 | rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map), | ||
378 | GFP_KERNEL); | ||
379 | if (!rmap->chnls) | ||
380 | goto err_chnls; | ||
381 | |||
382 | for (i = 0; i < dev->geo.nr_chnls; i++) { | ||
383 | struct nvm_ch_map *ch_rmap; | ||
384 | int *lun_roffs; | ||
385 | int luns_in_chnl = dev->geo.luns_per_chnl; | ||
386 | |||
387 | ch_rmap = &rmap->chnls[i]; | ||
388 | |||
389 | ch_rmap->ch_off = -1; | ||
390 | ch_rmap->nr_luns = luns_in_chnl; | ||
391 | |||
392 | lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); | ||
393 | if (!lun_roffs) | ||
394 | goto err_ch; | ||
395 | |||
396 | for (j = 0; j < luns_in_chnl; j++) | ||
397 | lun_roffs[j] = -1; | ||
398 | |||
399 | ch_rmap->lun_offs = lun_roffs; | ||
400 | } | ||
401 | |||
402 | dev->rmap = rmap; | ||
403 | |||
404 | return 0; | ||
405 | err_ch: | ||
406 | while (--i >= 0) | ||
407 | kfree(rmap->chnls[i].lun_offs); | ||
408 | err_chnls: | ||
409 | kfree(rmap); | ||
410 | err_rmap: | ||
411 | return -ENOMEM; | ||
412 | } | ||
413 | |||
414 | static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) | ||
415 | { | ||
416 | struct nvm_dev_map *dev_map = tgt_dev->map; | ||
417 | struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch]; | ||
418 | int lun_off = ch_map->lun_offs[p->g.lun]; | ||
419 | |||
420 | p->g.ch += ch_map->ch_off; | ||
421 | p->g.lun += lun_off; | ||
422 | } | ||
423 | |||
424 | static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) | ||
425 | { | ||
426 | struct nvm_dev *dev = tgt_dev->parent; | ||
427 | struct nvm_dev_map *dev_rmap = dev->rmap; | ||
428 | struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch]; | ||
429 | int lun_roff = ch_rmap->lun_offs[p->g.lun]; | ||
430 | |||
431 | p->g.ch -= ch_rmap->ch_off; | ||
432 | p->g.lun -= lun_roff; | ||
433 | } | ||
434 | |||
435 | static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, | ||
436 | struct ppa_addr *ppa_list, int nr_ppas) | ||
437 | { | ||
438 | int i; | ||
439 | |||
440 | for (i = 0; i < nr_ppas; i++) { | ||
441 | nvm_map_to_dev(tgt_dev, &ppa_list[i]); | ||
442 | ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, | ||
447 | struct ppa_addr *ppa_list, int nr_ppas) | ||
448 | { | ||
449 | int i; | ||
450 | |||
451 | for (i = 0; i < nr_ppas; i++) { | ||
452 | ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]); | ||
453 | nvm_map_to_tgt(tgt_dev, &ppa_list[i]); | ||
454 | } | ||
455 | } | ||
456 | |||
457 | static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) | ||
458 | { | ||
459 | if (rqd->nr_ppas == 1) { | ||
460 | nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1); | ||
461 | return; | ||
462 | } | ||
463 | |||
464 | nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas); | ||
465 | } | ||
466 | |||
467 | static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) | ||
468 | { | ||
469 | if (rqd->nr_ppas == 1) { | ||
470 | nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1); | ||
471 | return; | ||
472 | } | ||
473 | |||
474 | nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas); | ||
475 | } | ||
476 | |||
477 | void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, | ||
478 | int len) | ||
479 | { | ||
480 | struct nvm_geo *geo = &dev->geo; | ||
481 | struct nvm_dev_map *dev_rmap = dev->rmap; | ||
482 | u64 i; | ||
483 | |||
484 | for (i = 0; i < len; i++) { | ||
485 | struct nvm_ch_map *ch_rmap; | ||
486 | int *lun_roffs; | ||
487 | struct ppa_addr gaddr; | ||
488 | u64 pba = le64_to_cpu(entries[i]); | ||
489 | int off; | ||
490 | u64 diff; | ||
491 | |||
492 | if (!pba) | ||
493 | continue; | ||
494 | |||
495 | gaddr = linear_to_generic_addr(geo, pba); | ||
496 | ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; | ||
497 | lun_roffs = ch_rmap->lun_offs; | ||
498 | |||
499 | off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun; | ||
500 | |||
501 | diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + | ||
502 | (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; | ||
503 | |||
504 | entries[i] -= cpu_to_le64(diff); | ||
505 | } | ||
506 | } | ||
507 | EXPORT_SYMBOL(nvm_part_to_tgt); | ||
508 | |||
36 | struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) | 509 | struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) |
37 | { | 510 | { |
38 | struct nvm_tgt_type *tmp, *tt = NULL; | 511 | struct nvm_tgt_type *tmp, *tt = NULL; |
@@ -92,78 +565,6 @@ void nvm_dev_dma_free(struct nvm_dev *dev, void *addr, dma_addr_t dma_handler) | |||
92 | } | 565 | } |
93 | EXPORT_SYMBOL(nvm_dev_dma_free); | 566 | EXPORT_SYMBOL(nvm_dev_dma_free); |
94 | 567 | ||
95 | static struct nvmm_type *nvm_find_mgr_type(const char *name) | ||
96 | { | ||
97 | struct nvmm_type *mt; | ||
98 | |||
99 | list_for_each_entry(mt, &nvm_mgrs, list) | ||
100 | if (!strcmp(name, mt->name)) | ||
101 | return mt; | ||
102 | |||
103 | return NULL; | ||
104 | } | ||
105 | |||
106 | static struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev) | ||
107 | { | ||
108 | struct nvmm_type *mt; | ||
109 | int ret; | ||
110 | |||
111 | lockdep_assert_held(&nvm_lock); | ||
112 | |||
113 | list_for_each_entry(mt, &nvm_mgrs, list) { | ||
114 | if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN)) | ||
115 | continue; | ||
116 | |||
117 | ret = mt->register_mgr(dev); | ||
118 | if (ret < 0) { | ||
119 | pr_err("nvm: media mgr failed to init (%d) on dev %s\n", | ||
120 | ret, dev->name); | ||
121 | return NULL; /* initialization failed */ | ||
122 | } else if (ret > 0) | ||
123 | return mt; | ||
124 | } | ||
125 | |||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | int nvm_register_mgr(struct nvmm_type *mt) | ||
130 | { | ||
131 | struct nvm_dev *dev; | ||
132 | int ret = 0; | ||
133 | |||
134 | down_write(&nvm_lock); | ||
135 | if (nvm_find_mgr_type(mt->name)) { | ||
136 | ret = -EEXIST; | ||
137 | goto finish; | ||
138 | } else { | ||
139 | list_add(&mt->list, &nvm_mgrs); | ||
140 | } | ||
141 | |||
142 | /* try to register media mgr if any device have none configured */ | ||
143 | list_for_each_entry(dev, &nvm_devices, devices) { | ||
144 | if (dev->mt) | ||
145 | continue; | ||
146 | |||
147 | dev->mt = nvm_init_mgr(dev); | ||
148 | } | ||
149 | finish: | ||
150 | up_write(&nvm_lock); | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | EXPORT_SYMBOL(nvm_register_mgr); | ||
155 | |||
156 | void nvm_unregister_mgr(struct nvmm_type *mt) | ||
157 | { | ||
158 | if (!mt) | ||
159 | return; | ||
160 | |||
161 | down_write(&nvm_lock); | ||
162 | list_del(&mt->list); | ||
163 | up_write(&nvm_lock); | ||
164 | } | ||
165 | EXPORT_SYMBOL(nvm_unregister_mgr); | ||
166 | |||
167 | static struct nvm_dev *nvm_find_nvm_dev(const char *name) | 568 | static struct nvm_dev *nvm_find_nvm_dev(const char *name) |
168 | { | 569 | { |
169 | struct nvm_dev *dev; | 570 | struct nvm_dev *dev; |
@@ -175,53 +576,6 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name) | |||
175 | return NULL; | 576 | return NULL; |
176 | } | 577 | } |
177 | 578 | ||
178 | static void nvm_tgt_generic_to_addr_mode(struct nvm_tgt_dev *tgt_dev, | ||
179 | struct nvm_rq *rqd) | ||
180 | { | ||
181 | struct nvm_dev *dev = tgt_dev->parent; | ||
182 | int i; | ||
183 | |||
184 | if (rqd->nr_ppas > 1) { | ||
185 | for (i = 0; i < rqd->nr_ppas; i++) { | ||
186 | rqd->ppa_list[i] = dev->mt->trans_ppa(tgt_dev, | ||
187 | rqd->ppa_list[i], TRANS_TGT_TO_DEV); | ||
188 | rqd->ppa_list[i] = generic_to_dev_addr(dev, | ||
189 | rqd->ppa_list[i]); | ||
190 | } | ||
191 | } else { | ||
192 | rqd->ppa_addr = dev->mt->trans_ppa(tgt_dev, rqd->ppa_addr, | ||
193 | TRANS_TGT_TO_DEV); | ||
194 | rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas, | ||
199 | int type) | ||
200 | { | ||
201 | struct nvm_rq rqd; | ||
202 | int ret; | ||
203 | |||
204 | if (nr_ppas > dev->ops->max_phys_sect) { | ||
205 | pr_err("nvm: unable to update all sysblocks atomically\n"); | ||
206 | return -EINVAL; | ||
207 | } | ||
208 | |||
209 | memset(&rqd, 0, sizeof(struct nvm_rq)); | ||
210 | |||
211 | nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); | ||
212 | nvm_generic_to_addr_mode(dev, &rqd); | ||
213 | |||
214 | ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); | ||
215 | nvm_free_rqd_ppalist(dev, &rqd); | ||
216 | if (ret) { | ||
217 | pr_err("nvm: sysblk failed bb mark\n"); | ||
218 | return -EINVAL; | ||
219 | } | ||
220 | |||
221 | return 0; | ||
222 | } | ||
223 | EXPORT_SYMBOL(nvm_set_bb_tbl); | ||
224 | |||
225 | int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, | 579 | int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, |
226 | int nr_ppas, int type) | 580 | int nr_ppas, int type) |
227 | { | 581 | { |
@@ -237,12 +591,12 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, | |||
237 | memset(&rqd, 0, sizeof(struct nvm_rq)); | 591 | memset(&rqd, 0, sizeof(struct nvm_rq)); |
238 | 592 | ||
239 | nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); | 593 | nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); |
240 | nvm_tgt_generic_to_addr_mode(tgt_dev, &rqd); | 594 | nvm_rq_tgt_to_dev(tgt_dev, &rqd); |
241 | 595 | ||
242 | ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); | 596 | ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); |
243 | nvm_free_rqd_ppalist(dev, &rqd); | 597 | nvm_free_rqd_ppalist(dev, &rqd); |
244 | if (ret) { | 598 | if (ret) { |
245 | pr_err("nvm: sysblk failed bb mark\n"); | 599 | pr_err("nvm: failed bb mark\n"); |
246 | return -EINVAL; | 600 | return -EINVAL; |
247 | } | 601 | } |
248 | 602 | ||
@@ -262,15 +616,42 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) | |||
262 | { | 616 | { |
263 | struct nvm_dev *dev = tgt_dev->parent; | 617 | struct nvm_dev *dev = tgt_dev->parent; |
264 | 618 | ||
265 | return dev->mt->submit_io(tgt_dev, rqd); | 619 | if (!dev->ops->submit_io) |
620 | return -ENODEV; | ||
621 | |||
622 | nvm_rq_tgt_to_dev(tgt_dev, rqd); | ||
623 | |||
624 | rqd->dev = tgt_dev; | ||
625 | return dev->ops->submit_io(dev, rqd); | ||
266 | } | 626 | } |
267 | EXPORT_SYMBOL(nvm_submit_io); | 627 | EXPORT_SYMBOL(nvm_submit_io); |
268 | 628 | ||
269 | int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, int flags) | 629 | int nvm_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int flags) |
270 | { | 630 | { |
271 | struct nvm_dev *dev = tgt_dev->parent; | 631 | struct nvm_dev *dev = tgt_dev->parent; |
632 | struct nvm_rq rqd; | ||
633 | int ret; | ||
634 | |||
635 | if (!dev->ops->erase_block) | ||
636 | return 0; | ||
637 | |||
638 | nvm_map_to_dev(tgt_dev, ppas); | ||
639 | |||
640 | memset(&rqd, 0, sizeof(struct nvm_rq)); | ||
641 | |||
642 | ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, 1, 1); | ||
643 | if (ret) | ||
644 | return ret; | ||
645 | |||
646 | nvm_rq_tgt_to_dev(tgt_dev, &rqd); | ||
647 | |||
648 | rqd.flags = flags; | ||
649 | |||
650 | ret = dev->ops->erase_block(dev, &rqd); | ||
272 | 651 | ||
273 | return dev->mt->erase_blk(tgt_dev, p, flags); | 652 | nvm_free_rqd_ppalist(dev, &rqd); |
653 | |||
654 | return ret; | ||
274 | } | 655 | } |
275 | EXPORT_SYMBOL(nvm_erase_blk); | 656 | EXPORT_SYMBOL(nvm_erase_blk); |
276 | 657 | ||
@@ -289,46 +670,67 @@ EXPORT_SYMBOL(nvm_get_l2p_tbl); | |||
289 | int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len) | 670 | int nvm_get_area(struct nvm_tgt_dev *tgt_dev, sector_t *lba, sector_t len) |
290 | { | 671 | { |
291 | struct nvm_dev *dev = tgt_dev->parent; | 672 | struct nvm_dev *dev = tgt_dev->parent; |
673 | struct nvm_geo *geo = &dev->geo; | ||
674 | struct nvm_area *area, *prev, *next; | ||
675 | sector_t begin = 0; | ||
676 | sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9; | ||
292 | 677 | ||
293 | return dev->mt->get_area(dev, lba, len); | 678 | if (len > max_sectors) |
294 | } | 679 | return -EINVAL; |
295 | EXPORT_SYMBOL(nvm_get_area); | ||
296 | 680 | ||
297 | void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t lba) | 681 | area = kmalloc(sizeof(struct nvm_area), GFP_KERNEL); |
298 | { | 682 | if (!area) |
299 | struct nvm_dev *dev = tgt_dev->parent; | 683 | return -ENOMEM; |
300 | 684 | ||
301 | dev->mt->put_area(dev, lba); | 685 | prev = NULL; |
302 | } | ||
303 | EXPORT_SYMBOL(nvm_put_area); | ||
304 | 686 | ||
305 | void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) | 687 | spin_lock(&dev->lock); |
306 | { | 688 | list_for_each_entry(next, &dev->area_list, list) { |
307 | int i; | 689 | if (begin + len > next->begin) { |
690 | begin = next->end; | ||
691 | prev = next; | ||
692 | continue; | ||
693 | } | ||
694 | break; | ||
695 | } | ||
308 | 696 | ||
309 | if (rqd->nr_ppas > 1) { | 697 | if ((begin + len) > max_sectors) { |
310 | for (i = 0; i < rqd->nr_ppas; i++) | 698 | spin_unlock(&dev->lock); |
311 | rqd->ppa_list[i] = dev_to_generic_addr(dev, | 699 | kfree(area); |
312 | rqd->ppa_list[i]); | 700 | return -EINVAL; |
313 | } else { | ||
314 | rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr); | ||
315 | } | 701 | } |
702 | |||
703 | area->begin = *lba = begin; | ||
704 | area->end = begin + len; | ||
705 | |||
706 | if (prev) /* insert into sorted order */ | ||
707 | list_add(&area->list, &prev->list); | ||
708 | else | ||
709 | list_add(&area->list, &dev->area_list); | ||
710 | spin_unlock(&dev->lock); | ||
711 | |||
712 | return 0; | ||
316 | } | 713 | } |
317 | EXPORT_SYMBOL(nvm_addr_to_generic_mode); | 714 | EXPORT_SYMBOL(nvm_get_area); |
318 | 715 | ||
319 | void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) | 716 | void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) |
320 | { | 717 | { |
321 | int i; | 718 | struct nvm_dev *dev = tgt_dev->parent; |
719 | struct nvm_area *area; | ||
322 | 720 | ||
323 | if (rqd->nr_ppas > 1) { | 721 | spin_lock(&dev->lock); |
324 | for (i = 0; i < rqd->nr_ppas; i++) | 722 | list_for_each_entry(area, &dev->area_list, list) { |
325 | rqd->ppa_list[i] = generic_to_dev_addr(dev, | 723 | if (area->begin != begin) |
326 | rqd->ppa_list[i]); | 724 | continue; |
327 | } else { | 725 | |
328 | rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); | 726 | list_del(&area->list); |
727 | spin_unlock(&dev->lock); | ||
728 | kfree(area); | ||
729 | return; | ||
329 | } | 730 | } |
731 | spin_unlock(&dev->lock); | ||
330 | } | 732 | } |
331 | EXPORT_SYMBOL(nvm_generic_to_addr_mode); | 733 | EXPORT_SYMBOL(nvm_put_area); |
332 | 734 | ||
333 | int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, | 735 | int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd, |
334 | const struct ppa_addr *ppas, int nr_ppas, int vblk) | 736 | const struct ppa_addr *ppas, int nr_ppas, int vblk) |
@@ -380,149 +782,19 @@ void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd) | |||
380 | } | 782 | } |
381 | EXPORT_SYMBOL(nvm_free_rqd_ppalist); | 783 | EXPORT_SYMBOL(nvm_free_rqd_ppalist); |
382 | 784 | ||
383 | int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas, | 785 | void nvm_end_io(struct nvm_rq *rqd) |
384 | int flags) | ||
385 | { | 786 | { |
386 | struct nvm_rq rqd; | 787 | struct nvm_tgt_dev *tgt_dev = rqd->dev; |
387 | int ret; | ||
388 | 788 | ||
389 | if (!dev->ops->erase_block) | 789 | /* Convert address space */ |
390 | return 0; | 790 | if (tgt_dev) |
791 | nvm_rq_dev_to_tgt(tgt_dev, rqd); | ||
391 | 792 | ||
392 | memset(&rqd, 0, sizeof(struct nvm_rq)); | 793 | if (rqd->end_io) |
393 | 794 | rqd->end_io(rqd); | |
394 | ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas, 1); | ||
395 | if (ret) | ||
396 | return ret; | ||
397 | |||
398 | nvm_generic_to_addr_mode(dev, &rqd); | ||
399 | |||
400 | rqd.flags = flags; | ||
401 | |||
402 | ret = dev->ops->erase_block(dev, &rqd); | ||
403 | |||
404 | nvm_free_rqd_ppalist(dev, &rqd); | ||
405 | |||
406 | return ret; | ||
407 | } | ||
408 | EXPORT_SYMBOL(nvm_erase_ppa); | ||
409 | |||
410 | void nvm_end_io(struct nvm_rq *rqd, int error) | ||
411 | { | ||
412 | rqd->error = error; | ||
413 | rqd->end_io(rqd); | ||
414 | } | 795 | } |
415 | EXPORT_SYMBOL(nvm_end_io); | 796 | EXPORT_SYMBOL(nvm_end_io); |
416 | 797 | ||
417 | static void nvm_end_io_sync(struct nvm_rq *rqd) | ||
418 | { | ||
419 | struct completion *waiting = rqd->wait; | ||
420 | |||
421 | rqd->wait = NULL; | ||
422 | |||
423 | complete(waiting); | ||
424 | } | ||
425 | |||
426 | static int __nvm_submit_ppa(struct nvm_dev *dev, struct nvm_rq *rqd, int opcode, | ||
427 | int flags, void *buf, int len) | ||
428 | { | ||
429 | DECLARE_COMPLETION_ONSTACK(wait); | ||
430 | struct bio *bio; | ||
431 | int ret; | ||
432 | unsigned long hang_check; | ||
433 | |||
434 | bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL); | ||
435 | if (IS_ERR_OR_NULL(bio)) | ||
436 | return -ENOMEM; | ||
437 | |||
438 | nvm_generic_to_addr_mode(dev, rqd); | ||
439 | |||
440 | rqd->dev = NULL; | ||
441 | rqd->opcode = opcode; | ||
442 | rqd->flags = flags; | ||
443 | rqd->bio = bio; | ||
444 | rqd->wait = &wait; | ||
445 | rqd->end_io = nvm_end_io_sync; | ||
446 | |||
447 | ret = dev->ops->submit_io(dev, rqd); | ||
448 | if (ret) { | ||
449 | bio_put(bio); | ||
450 | return ret; | ||
451 | } | ||
452 | |||
453 | /* Prevent hang_check timer from firing at us during very long I/O */ | ||
454 | hang_check = sysctl_hung_task_timeout_secs; | ||
455 | if (hang_check) | ||
456 | while (!wait_for_completion_io_timeout(&wait, | ||
457 | hang_check * (HZ/2))) | ||
458 | ; | ||
459 | else | ||
460 | wait_for_completion_io(&wait); | ||
461 | |||
462 | return rqd->error; | ||
463 | } | ||
464 | |||
465 | /** | ||
466 | * nvm_submit_ppa_list - submit user-defined ppa list to device. The user must | ||
467 | * take to free ppa list if necessary. | ||
468 | * @dev: device | ||
469 | * @ppa_list: user created ppa_list | ||
470 | * @nr_ppas: length of ppa_list | ||
471 | * @opcode: device opcode | ||
472 | * @flags: device flags | ||
473 | * @buf: data buffer | ||
474 | * @len: data buffer length | ||
475 | */ | ||
476 | int nvm_submit_ppa_list(struct nvm_dev *dev, struct ppa_addr *ppa_list, | ||
477 | int nr_ppas, int opcode, int flags, void *buf, int len) | ||
478 | { | ||
479 | struct nvm_rq rqd; | ||
480 | |||
481 | if (dev->ops->max_phys_sect < nr_ppas) | ||
482 | return -EINVAL; | ||
483 | |||
484 | memset(&rqd, 0, sizeof(struct nvm_rq)); | ||
485 | |||
486 | rqd.nr_ppas = nr_ppas; | ||
487 | if (nr_ppas > 1) | ||
488 | rqd.ppa_list = ppa_list; | ||
489 | else | ||
490 | rqd.ppa_addr = ppa_list[0]; | ||
491 | |||
492 | return __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len); | ||
493 | } | ||
494 | EXPORT_SYMBOL(nvm_submit_ppa_list); | ||
495 | |||
496 | /** | ||
497 | * nvm_submit_ppa - submit PPAs to device. PPAs will automatically be unfolded | ||
498 | * as single, dual, quad plane PPAs depending on device type. | ||
499 | * @dev: device | ||
500 | * @ppa: user created ppa_list | ||
501 | * @nr_ppas: length of ppa_list | ||
502 | * @opcode: device opcode | ||
503 | * @flags: device flags | ||
504 | * @buf: data buffer | ||
505 | * @len: data buffer length | ||
506 | */ | ||
507 | int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas, | ||
508 | int opcode, int flags, void *buf, int len) | ||
509 | { | ||
510 | struct nvm_rq rqd; | ||
511 | int ret; | ||
512 | |||
513 | memset(&rqd, 0, sizeof(struct nvm_rq)); | ||
514 | ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas, 1); | ||
515 | if (ret) | ||
516 | return ret; | ||
517 | |||
518 | ret = __nvm_submit_ppa(dev, &rqd, opcode, flags, buf, len); | ||
519 | |||
520 | nvm_free_rqd_ppalist(dev, &rqd); | ||
521 | |||
522 | return ret; | ||
523 | } | ||
524 | EXPORT_SYMBOL(nvm_submit_ppa); | ||
525 | |||
526 | /* | 798 | /* |
527 | * folds a bad block list from its plane representation to its virtual | 799 | * folds a bad block list from its plane representation to its virtual |
528 | * block representation. The fold is done in place and reduced size is | 800 | * block representation. The fold is done in place and reduced size is |
@@ -559,21 +831,14 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) | |||
559 | } | 831 | } |
560 | EXPORT_SYMBOL(nvm_bb_tbl_fold); | 832 | EXPORT_SYMBOL(nvm_bb_tbl_fold); |
561 | 833 | ||
562 | int nvm_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa, u8 *blks) | ||
563 | { | ||
564 | ppa = generic_to_dev_addr(dev, ppa); | ||
565 | |||
566 | return dev->ops->get_bb_tbl(dev, ppa, blks); | ||
567 | } | ||
568 | EXPORT_SYMBOL(nvm_get_bb_tbl); | ||
569 | |||
570 | int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, | 834 | int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa, |
571 | u8 *blks) | 835 | u8 *blks) |
572 | { | 836 | { |
573 | struct nvm_dev *dev = tgt_dev->parent; | 837 | struct nvm_dev *dev = tgt_dev->parent; |
574 | 838 | ||
575 | ppa = dev->mt->trans_ppa(tgt_dev, ppa, TRANS_TGT_TO_DEV); | 839 | nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); |
576 | return nvm_get_bb_tbl(dev, ppa, blks); | 840 | |
841 | return dev->ops->get_bb_tbl(dev, ppa, blks); | ||
577 | } | 842 | } |
578 | EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); | 843 | EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); |
579 | 844 | ||
@@ -627,7 +892,7 @@ static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp) | |||
627 | static int nvm_core_init(struct nvm_dev *dev) | 892 | static int nvm_core_init(struct nvm_dev *dev) |
628 | { | 893 | { |
629 | struct nvm_id *id = &dev->identity; | 894 | struct nvm_id *id = &dev->identity; |
630 | struct nvm_id_group *grp = &id->groups[0]; | 895 | struct nvm_id_group *grp = &id->grp; |
631 | struct nvm_geo *geo = &dev->geo; | 896 | struct nvm_geo *geo = &dev->geo; |
632 | int ret; | 897 | int ret; |
633 | 898 | ||
@@ -691,36 +956,31 @@ static int nvm_core_init(struct nvm_dev *dev) | |||
691 | goto err_fmtype; | 956 | goto err_fmtype; |
692 | } | 957 | } |
693 | 958 | ||
959 | INIT_LIST_HEAD(&dev->area_list); | ||
960 | INIT_LIST_HEAD(&dev->targets); | ||
694 | mutex_init(&dev->mlock); | 961 | mutex_init(&dev->mlock); |
695 | spin_lock_init(&dev->lock); | 962 | spin_lock_init(&dev->lock); |
696 | 963 | ||
697 | blk_queue_logical_block_size(dev->q, geo->sec_size); | 964 | ret = nvm_register_map(dev); |
965 | if (ret) | ||
966 | goto err_fmtype; | ||
698 | 967 | ||
968 | blk_queue_logical_block_size(dev->q, geo->sec_size); | ||
699 | return 0; | 969 | return 0; |
700 | err_fmtype: | 970 | err_fmtype: |
701 | kfree(dev->lun_map); | 971 | kfree(dev->lun_map); |
702 | return ret; | 972 | return ret; |
703 | } | 973 | } |
704 | 974 | ||
705 | static void nvm_free_mgr(struct nvm_dev *dev) | ||
706 | { | ||
707 | if (!dev->mt) | ||
708 | return; | ||
709 | |||
710 | dev->mt->unregister_mgr(dev); | ||
711 | dev->mt = NULL; | ||
712 | } | ||
713 | |||
714 | void nvm_free(struct nvm_dev *dev) | 975 | void nvm_free(struct nvm_dev *dev) |
715 | { | 976 | { |
716 | if (!dev) | 977 | if (!dev) |
717 | return; | 978 | return; |
718 | 979 | ||
719 | nvm_free_mgr(dev); | ||
720 | |||
721 | if (dev->dma_pool) | 980 | if (dev->dma_pool) |
722 | dev->ops->destroy_dma_pool(dev->dma_pool); | 981 | dev->ops->destroy_dma_pool(dev->dma_pool); |
723 | 982 | ||
983 | kfree(dev->rmap); | ||
724 | kfree(dev->lptbl); | 984 | kfree(dev->lptbl); |
725 | kfree(dev->lun_map); | 985 | kfree(dev->lun_map); |
726 | kfree(dev); | 986 | kfree(dev); |
@@ -731,28 +991,19 @@ static int nvm_init(struct nvm_dev *dev) | |||
731 | struct nvm_geo *geo = &dev->geo; | 991 | struct nvm_geo *geo = &dev->geo; |
732 | int ret = -EINVAL; | 992 | int ret = -EINVAL; |
733 | 993 | ||
734 | if (!dev->q || !dev->ops) | ||
735 | return ret; | ||
736 | |||
737 | if (dev->ops->identity(dev, &dev->identity)) { | 994 | if (dev->ops->identity(dev, &dev->identity)) { |
738 | pr_err("nvm: device could not be identified\n"); | 995 | pr_err("nvm: device could not be identified\n"); |
739 | goto err; | 996 | goto err; |
740 | } | 997 | } |
741 | 998 | ||
742 | pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n", | 999 | pr_debug("nvm: ver:%x nvm_vendor:%x\n", |
743 | dev->identity.ver_id, dev->identity.vmnt, | 1000 | dev->identity.ver_id, dev->identity.vmnt); |
744 | dev->identity.cgrps); | ||
745 | 1001 | ||
746 | if (dev->identity.ver_id != 1) { | 1002 | if (dev->identity.ver_id != 1) { |
747 | pr_err("nvm: device not supported by kernel."); | 1003 | pr_err("nvm: device not supported by kernel."); |
748 | goto err; | 1004 | goto err; |
749 | } | 1005 | } |
750 | 1006 | ||
751 | if (dev->identity.cgrps != 1) { | ||
752 | pr_err("nvm: only one group configuration supported."); | ||
753 | goto err; | ||
754 | } | ||
755 | |||
756 | ret = nvm_core_init(dev); | 1007 | ret = nvm_core_init(dev); |
757 | if (ret) { | 1008 | if (ret) { |
758 | pr_err("nvm: could not initialize core structures.\n"); | 1009 | pr_err("nvm: could not initialize core structures.\n"); |
@@ -779,49 +1030,50 @@ int nvm_register(struct nvm_dev *dev) | |||
779 | { | 1030 | { |
780 | int ret; | 1031 | int ret; |
781 | 1032 | ||
782 | ret = nvm_init(dev); | 1033 | if (!dev->q || !dev->ops) |
783 | if (ret) | 1034 | return -EINVAL; |
784 | goto err_init; | ||
785 | 1035 | ||
786 | if (dev->ops->max_phys_sect > 256) { | 1036 | if (dev->ops->max_phys_sect > 256) { |
787 | pr_info("nvm: max sectors supported is 256.\n"); | 1037 | pr_info("nvm: max sectors supported is 256.\n"); |
788 | ret = -EINVAL; | 1038 | return -EINVAL; |
789 | goto err_init; | ||
790 | } | 1039 | } |
791 | 1040 | ||
792 | if (dev->ops->max_phys_sect > 1) { | 1041 | if (dev->ops->max_phys_sect > 1) { |
793 | dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); | 1042 | dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); |
794 | if (!dev->dma_pool) { | 1043 | if (!dev->dma_pool) { |
795 | pr_err("nvm: could not create dma pool\n"); | 1044 | pr_err("nvm: could not create dma pool\n"); |
796 | ret = -ENOMEM; | 1045 | return -ENOMEM; |
797 | goto err_init; | ||
798 | } | 1046 | } |
799 | } | 1047 | } |
800 | 1048 | ||
801 | if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { | 1049 | ret = nvm_init(dev); |
802 | ret = nvm_get_sysblock(dev, &dev->sb); | 1050 | if (ret) |
803 | if (!ret) | 1051 | goto err_init; |
804 | pr_err("nvm: device not initialized.\n"); | ||
805 | else if (ret < 0) | ||
806 | pr_err("nvm: err (%d) on device initialization\n", ret); | ||
807 | } | ||
808 | 1052 | ||
809 | /* register device with a supported media manager */ | 1053 | /* register device with a supported media manager */ |
810 | down_write(&nvm_lock); | 1054 | down_write(&nvm_lock); |
811 | if (ret > 0) | ||
812 | dev->mt = nvm_init_mgr(dev); | ||
813 | list_add(&dev->devices, &nvm_devices); | 1055 | list_add(&dev->devices, &nvm_devices); |
814 | up_write(&nvm_lock); | 1056 | up_write(&nvm_lock); |
815 | 1057 | ||
816 | return 0; | 1058 | return 0; |
817 | err_init: | 1059 | err_init: |
818 | kfree(dev->lun_map); | 1060 | dev->ops->destroy_dma_pool(dev->dma_pool); |
819 | return ret; | 1061 | return ret; |
820 | } | 1062 | } |
821 | EXPORT_SYMBOL(nvm_register); | 1063 | EXPORT_SYMBOL(nvm_register); |
822 | 1064 | ||
823 | void nvm_unregister(struct nvm_dev *dev) | 1065 | void nvm_unregister(struct nvm_dev *dev) |
824 | { | 1066 | { |
1067 | struct nvm_target *t, *tmp; | ||
1068 | |||
1069 | mutex_lock(&dev->mlock); | ||
1070 | list_for_each_entry_safe(t, tmp, &dev->targets, list) { | ||
1071 | if (t->dev->parent != dev) | ||
1072 | continue; | ||
1073 | __nvm_remove_target(t); | ||
1074 | } | ||
1075 | mutex_unlock(&dev->mlock); | ||
1076 | |||
825 | down_write(&nvm_lock); | 1077 | down_write(&nvm_lock); |
826 | list_del(&dev->devices); | 1078 | list_del(&dev->devices); |
827 | up_write(&nvm_lock); | 1079 | up_write(&nvm_lock); |
@@ -844,24 +1096,24 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create) | |||
844 | return -EINVAL; | 1096 | return -EINVAL; |
845 | } | 1097 | } |
846 | 1098 | ||
847 | if (!dev->mt) { | ||
848 | pr_info("nvm: device has no media manager registered.\n"); | ||
849 | return -ENODEV; | ||
850 | } | ||
851 | |||
852 | if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { | 1099 | if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { |
853 | pr_err("nvm: config type not valid\n"); | 1100 | pr_err("nvm: config type not valid\n"); |
854 | return -EINVAL; | 1101 | return -EINVAL; |
855 | } | 1102 | } |
856 | s = &create->conf.s; | 1103 | s = &create->conf.s; |
857 | 1104 | ||
858 | if (s->lun_begin > s->lun_end || s->lun_end > dev->geo.nr_luns) { | 1105 | if (s->lun_begin == -1 && s->lun_end == -1) { |
1106 | s->lun_begin = 0; | ||
1107 | s->lun_end = dev->geo.nr_luns - 1; | ||
1108 | } | ||
1109 | |||
1110 | if (s->lun_begin > s->lun_end || s->lun_end >= dev->geo.nr_luns) { | ||
859 | pr_err("nvm: lun out of bound (%u:%u > %u)\n", | 1111 | pr_err("nvm: lun out of bound (%u:%u > %u)\n", |
860 | s->lun_begin, s->lun_end, dev->geo.nr_luns); | 1112 | s->lun_begin, s->lun_end, dev->geo.nr_luns - 1); |
861 | return -EINVAL; | 1113 | return -EINVAL; |
862 | } | 1114 | } |
863 | 1115 | ||
864 | return dev->mt->create_tgt(dev, create); | 1116 | return nvm_create_tgt(dev, create); |
865 | } | 1117 | } |
866 | 1118 | ||
867 | static long nvm_ioctl_info(struct file *file, void __user *arg) | 1119 | static long nvm_ioctl_info(struct file *file, void __user *arg) |
@@ -923,16 +1175,14 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg) | |||
923 | struct nvm_ioctl_device_info *info = &devices->info[i]; | 1175 | struct nvm_ioctl_device_info *info = &devices->info[i]; |
924 | 1176 | ||
925 | sprintf(info->devname, "%s", dev->name); | 1177 | sprintf(info->devname, "%s", dev->name); |
926 | if (dev->mt) { | ||
927 | info->bmversion[0] = dev->mt->version[0]; | ||
928 | info->bmversion[1] = dev->mt->version[1]; | ||
929 | info->bmversion[2] = dev->mt->version[2]; | ||
930 | sprintf(info->bmname, "%s", dev->mt->name); | ||
931 | } else { | ||
932 | sprintf(info->bmname, "none"); | ||
933 | } | ||
934 | 1178 | ||
1179 | /* kept for compatibility */ | ||
1180 | info->bmversion[0] = 1; | ||
1181 | info->bmversion[1] = 0; | ||
1182 | info->bmversion[2] = 0; | ||
1183 | sprintf(info->bmname, "%s", "gennvm"); | ||
935 | i++; | 1184 | i++; |
1185 | |||
936 | if (i > 31) { | 1186 | if (i > 31) { |
937 | pr_err("nvm: max 31 devices can be reported.\n"); | 1187 | pr_err("nvm: max 31 devices can be reported.\n"); |
938 | break; | 1188 | break; |
@@ -994,7 +1244,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) | |||
994 | } | 1244 | } |
995 | 1245 | ||
996 | list_for_each_entry(dev, &nvm_devices, devices) { | 1246 | list_for_each_entry(dev, &nvm_devices, devices) { |
997 | ret = dev->mt->remove_tgt(dev, &remove); | 1247 | ret = nvm_remove_tgt(dev, &remove); |
998 | if (!ret) | 1248 | if (!ret) |
999 | break; | 1249 | break; |
1000 | } | 1250 | } |
@@ -1002,47 +1252,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) | |||
1002 | return ret; | 1252 | return ret; |
1003 | } | 1253 | } |
1004 | 1254 | ||
1005 | static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info) | 1255 | /* kept for compatibility reasons */ |
1006 | { | ||
1007 | info->seqnr = 1; | ||
1008 | info->erase_cnt = 0; | ||
1009 | info->version = 1; | ||
1010 | } | ||
1011 | |||
1012 | static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init) | ||
1013 | { | ||
1014 | struct nvm_dev *dev; | ||
1015 | struct nvm_sb_info info; | ||
1016 | int ret; | ||
1017 | |||
1018 | down_write(&nvm_lock); | ||
1019 | dev = nvm_find_nvm_dev(init->dev); | ||
1020 | up_write(&nvm_lock); | ||
1021 | if (!dev) { | ||
1022 | pr_err("nvm: device not found\n"); | ||
1023 | return -EINVAL; | ||
1024 | } | ||
1025 | |||
1026 | nvm_setup_nvm_sb_info(&info); | ||
1027 | |||
1028 | strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN); | ||
1029 | info.fs_ppa.ppa = -1; | ||
1030 | |||
1031 | if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) { | ||
1032 | ret = nvm_init_sysblock(dev, &info); | ||
1033 | if (ret) | ||
1034 | return ret; | ||
1035 | } | ||
1036 | |||
1037 | memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info)); | ||
1038 | |||
1039 | down_write(&nvm_lock); | ||
1040 | dev->mt = nvm_init_mgr(dev); | ||
1041 | up_write(&nvm_lock); | ||
1042 | |||
1043 | return 0; | ||
1044 | } | ||
1045 | |||
1046 | static long nvm_ioctl_dev_init(struct file *file, void __user *arg) | 1256 | static long nvm_ioctl_dev_init(struct file *file, void __user *arg) |
1047 | { | 1257 | { |
1048 | struct nvm_ioctl_dev_init init; | 1258 | struct nvm_ioctl_dev_init init; |
@@ -1058,15 +1268,13 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg) | |||
1058 | return -EINVAL; | 1268 | return -EINVAL; |
1059 | } | 1269 | } |
1060 | 1270 | ||
1061 | init.dev[DISK_NAME_LEN - 1] = '\0'; | 1271 | return 0; |
1062 | |||
1063 | return __nvm_ioctl_dev_init(&init); | ||
1064 | } | 1272 | } |
1065 | 1273 | ||
1274 | /* Kept for compatibility reasons */ | ||
1066 | static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) | 1275 | static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) |
1067 | { | 1276 | { |
1068 | struct nvm_ioctl_dev_factory fact; | 1277 | struct nvm_ioctl_dev_factory fact; |
1069 | struct nvm_dev *dev; | ||
1070 | 1278 | ||
1071 | if (!capable(CAP_SYS_ADMIN)) | 1279 | if (!capable(CAP_SYS_ADMIN)) |
1072 | return -EPERM; | 1280 | return -EPERM; |
@@ -1079,19 +1287,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) | |||
1079 | if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) | 1287 | if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1)) |
1080 | return -EINVAL; | 1288 | return -EINVAL; |
1081 | 1289 | ||
1082 | down_write(&nvm_lock); | ||
1083 | dev = nvm_find_nvm_dev(fact.dev); | ||
1084 | up_write(&nvm_lock); | ||
1085 | if (!dev) { | ||
1086 | pr_err("nvm: device not found\n"); | ||
1087 | return -EINVAL; | ||
1088 | } | ||
1089 | |||
1090 | nvm_free_mgr(dev); | ||
1091 | |||
1092 | if (dev->identity.cap & NVM_ID_DCAP_BBLKMGMT) | ||
1093 | return nvm_dev_factory(dev, fact.flags); | ||
1094 | |||
1095 | return 0; | 1290 | return 0; |
1096 | } | 1291 | } |
1097 | 1292 | ||
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c deleted file mode 100644 index ca7880082d80..000000000000 --- a/drivers/lightnvm/gennvm.c +++ /dev/null | |||
@@ -1,657 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Matias Bjorling <m@bjorling.me> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License version | ||
6 | * 2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; see the file COPYING. If not, write to | ||
15 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
16 | * USA. | ||
17 | * | ||
18 | * Implementation of a general nvm manager for Open-Channel SSDs. | ||
19 | */ | ||
20 | |||
21 | #include "gennvm.h" | ||
22 | |||
23 | static struct nvm_target *gen_find_target(struct gen_dev *gn, const char *name) | ||
24 | { | ||
25 | struct nvm_target *tgt; | ||
26 | |||
27 | list_for_each_entry(tgt, &gn->targets, list) | ||
28 | if (!strcmp(name, tgt->disk->disk_name)) | ||
29 | return tgt; | ||
30 | |||
31 | return NULL; | ||
32 | } | ||
33 | |||
34 | static const struct block_device_operations gen_fops = { | ||
35 | .owner = THIS_MODULE, | ||
36 | }; | ||
37 | |||
38 | static int gen_reserve_luns(struct nvm_dev *dev, struct nvm_target *t, | ||
39 | int lun_begin, int lun_end) | ||
40 | { | ||
41 | int i; | ||
42 | |||
43 | for (i = lun_begin; i <= lun_end; i++) { | ||
44 | if (test_and_set_bit(i, dev->lun_map)) { | ||
45 | pr_err("nvm: lun %d already allocated\n", i); | ||
46 | goto err; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | return 0; | ||
51 | |||
52 | err: | ||
53 | while (--i > lun_begin) | ||
54 | clear_bit(i, dev->lun_map); | ||
55 | |||
56 | return -EBUSY; | ||
57 | } | ||
58 | |||
59 | static void gen_release_luns_err(struct nvm_dev *dev, int lun_begin, | ||
60 | int lun_end) | ||
61 | { | ||
62 | int i; | ||
63 | |||
64 | for (i = lun_begin; i <= lun_end; i++) | ||
65 | WARN_ON(!test_and_clear_bit(i, dev->lun_map)); | ||
66 | } | ||
67 | |||
68 | static void gen_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev) | ||
69 | { | ||
70 | struct nvm_dev *dev = tgt_dev->parent; | ||
71 | struct gen_dev_map *dev_map = tgt_dev->map; | ||
72 | int i, j; | ||
73 | |||
74 | for (i = 0; i < dev_map->nr_chnls; i++) { | ||
75 | struct gen_ch_map *ch_map = &dev_map->chnls[i]; | ||
76 | int *lun_offs = ch_map->lun_offs; | ||
77 | int ch = i + ch_map->ch_off; | ||
78 | |||
79 | for (j = 0; j < ch_map->nr_luns; j++) { | ||
80 | int lun = j + lun_offs[j]; | ||
81 | int lunid = (ch * dev->geo.luns_per_chnl) + lun; | ||
82 | |||
83 | WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); | ||
84 | } | ||
85 | |||
86 | kfree(ch_map->lun_offs); | ||
87 | } | ||
88 | |||
89 | kfree(dev_map->chnls); | ||
90 | kfree(dev_map); | ||
91 | kfree(tgt_dev->luns); | ||
92 | kfree(tgt_dev); | ||
93 | } | ||
94 | |||
95 | static struct nvm_tgt_dev *gen_create_tgt_dev(struct nvm_dev *dev, | ||
96 | int lun_begin, int lun_end) | ||
97 | { | ||
98 | struct nvm_tgt_dev *tgt_dev = NULL; | ||
99 | struct gen_dev_map *dev_rmap = dev->rmap; | ||
100 | struct gen_dev_map *dev_map; | ||
101 | struct ppa_addr *luns; | ||
102 | int nr_luns = lun_end - lun_begin + 1; | ||
103 | int luns_left = nr_luns; | ||
104 | int nr_chnls = nr_luns / dev->geo.luns_per_chnl; | ||
105 | int nr_chnls_mod = nr_luns % dev->geo.luns_per_chnl; | ||
106 | int bch = lun_begin / dev->geo.luns_per_chnl; | ||
107 | int blun = lun_begin % dev->geo.luns_per_chnl; | ||
108 | int lunid = 0; | ||
109 | int lun_balanced = 1; | ||
110 | int prev_nr_luns; | ||
111 | int i, j; | ||
112 | |||
113 | nr_chnls = nr_luns / dev->geo.luns_per_chnl; | ||
114 | nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; | ||
115 | |||
116 | dev_map = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL); | ||
117 | if (!dev_map) | ||
118 | goto err_dev; | ||
119 | |||
120 | dev_map->chnls = kcalloc(nr_chnls, sizeof(struct gen_ch_map), | ||
121 | GFP_KERNEL); | ||
122 | if (!dev_map->chnls) | ||
123 | goto err_chnls; | ||
124 | |||
125 | luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL); | ||
126 | if (!luns) | ||
127 | goto err_luns; | ||
128 | |||
129 | prev_nr_luns = (luns_left > dev->geo.luns_per_chnl) ? | ||
130 | dev->geo.luns_per_chnl : luns_left; | ||
131 | for (i = 0; i < nr_chnls; i++) { | ||
132 | struct gen_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; | ||
133 | int *lun_roffs = ch_rmap->lun_offs; | ||
134 | struct gen_ch_map *ch_map = &dev_map->chnls[i]; | ||
135 | int *lun_offs; | ||
136 | int luns_in_chnl = (luns_left > dev->geo.luns_per_chnl) ? | ||
137 | dev->geo.luns_per_chnl : luns_left; | ||
138 | |||
139 | if (lun_balanced && prev_nr_luns != luns_in_chnl) | ||
140 | lun_balanced = 0; | ||
141 | |||
142 | ch_map->ch_off = ch_rmap->ch_off = bch; | ||
143 | ch_map->nr_luns = luns_in_chnl; | ||
144 | |||
145 | lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); | ||
146 | if (!lun_offs) | ||
147 | goto err_ch; | ||
148 | |||
149 | for (j = 0; j < luns_in_chnl; j++) { | ||
150 | luns[lunid].ppa = 0; | ||
151 | luns[lunid].g.ch = i; | ||
152 | luns[lunid++].g.lun = j; | ||
153 | |||
154 | lun_offs[j] = blun; | ||
155 | lun_roffs[j + blun] = blun; | ||
156 | } | ||
157 | |||
158 | ch_map->lun_offs = lun_offs; | ||
159 | |||
160 | /* when starting a new channel, lun offset is reset */ | ||
161 | blun = 0; | ||
162 | luns_left -= luns_in_chnl; | ||
163 | } | ||
164 | |||
165 | dev_map->nr_chnls = nr_chnls; | ||
166 | |||
167 | tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); | ||
168 | if (!tgt_dev) | ||
169 | goto err_ch; | ||
170 | |||
171 | memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); | ||
172 | /* Target device only owns a portion of the physical device */ | ||
173 | tgt_dev->geo.nr_chnls = nr_chnls; | ||
174 | tgt_dev->geo.nr_luns = nr_luns; | ||
175 | tgt_dev->geo.luns_per_chnl = (lun_balanced) ? prev_nr_luns : -1; | ||
176 | tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; | ||
177 | tgt_dev->q = dev->q; | ||
178 | tgt_dev->map = dev_map; | ||
179 | tgt_dev->luns = luns; | ||
180 | memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id)); | ||
181 | |||
182 | tgt_dev->parent = dev; | ||
183 | |||
184 | return tgt_dev; | ||
185 | err_ch: | ||
186 | while (--i > 0) | ||
187 | kfree(dev_map->chnls[i].lun_offs); | ||
188 | kfree(luns); | ||
189 | err_luns: | ||
190 | kfree(dev_map->chnls); | ||
191 | err_chnls: | ||
192 | kfree(dev_map); | ||
193 | err_dev: | ||
194 | return tgt_dev; | ||
195 | } | ||
196 | |||
197 | static int gen_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) | ||
198 | { | ||
199 | struct gen_dev *gn = dev->mp; | ||
200 | struct nvm_ioctl_create_simple *s = &create->conf.s; | ||
201 | struct request_queue *tqueue; | ||
202 | struct gendisk *tdisk; | ||
203 | struct nvm_tgt_type *tt; | ||
204 | struct nvm_target *t; | ||
205 | struct nvm_tgt_dev *tgt_dev; | ||
206 | void *targetdata; | ||
207 | |||
208 | tt = nvm_find_target_type(create->tgttype, 1); | ||
209 | if (!tt) { | ||
210 | pr_err("nvm: target type %s not found\n", create->tgttype); | ||
211 | return -EINVAL; | ||
212 | } | ||
213 | |||
214 | mutex_lock(&gn->lock); | ||
215 | t = gen_find_target(gn, create->tgtname); | ||
216 | if (t) { | ||
217 | pr_err("nvm: target name already exists.\n"); | ||
218 | mutex_unlock(&gn->lock); | ||
219 | return -EINVAL; | ||
220 | } | ||
221 | mutex_unlock(&gn->lock); | ||
222 | |||
223 | t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); | ||
224 | if (!t) | ||
225 | return -ENOMEM; | ||
226 | |||
227 | if (gen_reserve_luns(dev, t, s->lun_begin, s->lun_end)) | ||
228 | goto err_t; | ||
229 | |||
230 | tgt_dev = gen_create_tgt_dev(dev, s->lun_begin, s->lun_end); | ||
231 | if (!tgt_dev) { | ||
232 | pr_err("nvm: could not create target device\n"); | ||
233 | goto err_reserve; | ||
234 | } | ||
235 | |||
236 | tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); | ||
237 | if (!tqueue) | ||
238 | goto err_dev; | ||
239 | blk_queue_make_request(tqueue, tt->make_rq); | ||
240 | |||
241 | tdisk = alloc_disk(0); | ||
242 | if (!tdisk) | ||
243 | goto err_queue; | ||
244 | |||
245 | sprintf(tdisk->disk_name, "%s", create->tgtname); | ||
246 | tdisk->flags = GENHD_FL_EXT_DEVT; | ||
247 | tdisk->major = 0; | ||
248 | tdisk->first_minor = 0; | ||
249 | tdisk->fops = &gen_fops; | ||
250 | tdisk->queue = tqueue; | ||
251 | |||
252 | targetdata = tt->init(tgt_dev, tdisk); | ||
253 | if (IS_ERR(targetdata)) | ||
254 | goto err_init; | ||
255 | |||
256 | tdisk->private_data = targetdata; | ||
257 | tqueue->queuedata = targetdata; | ||
258 | |||
259 | blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); | ||
260 | |||
261 | set_capacity(tdisk, tt->capacity(targetdata)); | ||
262 | add_disk(tdisk); | ||
263 | |||
264 | t->type = tt; | ||
265 | t->disk = tdisk; | ||
266 | t->dev = tgt_dev; | ||
267 | |||
268 | mutex_lock(&gn->lock); | ||
269 | list_add_tail(&t->list, &gn->targets); | ||
270 | mutex_unlock(&gn->lock); | ||
271 | |||
272 | return 0; | ||
273 | err_init: | ||
274 | put_disk(tdisk); | ||
275 | err_queue: | ||
276 | blk_cleanup_queue(tqueue); | ||
277 | err_dev: | ||
278 | kfree(tgt_dev); | ||
279 | err_reserve: | ||
280 | gen_release_luns_err(dev, s->lun_begin, s->lun_end); | ||
281 | err_t: | ||
282 | kfree(t); | ||
283 | return -ENOMEM; | ||
284 | } | ||
285 | |||
286 | static void __gen_remove_target(struct nvm_target *t) | ||
287 | { | ||
288 | struct nvm_tgt_type *tt = t->type; | ||
289 | struct gendisk *tdisk = t->disk; | ||
290 | struct request_queue *q = tdisk->queue; | ||
291 | |||
292 | del_gendisk(tdisk); | ||
293 | blk_cleanup_queue(q); | ||
294 | |||
295 | if (tt->exit) | ||
296 | tt->exit(tdisk->private_data); | ||
297 | |||
298 | gen_remove_tgt_dev(t->dev); | ||
299 | put_disk(tdisk); | ||
300 | |||
301 | list_del(&t->list); | ||
302 | kfree(t); | ||
303 | } | ||
304 | |||
305 | /** | ||
306 | * gen_remove_tgt - Removes a target from the media manager | ||
307 | * @dev: device | ||
308 | * @remove: ioctl structure with target name to remove. | ||
309 | * | ||
310 | * Returns: | ||
311 | * 0: on success | ||
312 | * 1: on not found | ||
313 | * <0: on error | ||
314 | */ | ||
315 | static int gen_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) | ||
316 | { | ||
317 | struct gen_dev *gn = dev->mp; | ||
318 | struct nvm_target *t; | ||
319 | |||
320 | if (!gn) | ||
321 | return 1; | ||
322 | |||
323 | mutex_lock(&gn->lock); | ||
324 | t = gen_find_target(gn, remove->tgtname); | ||
325 | if (!t) { | ||
326 | mutex_unlock(&gn->lock); | ||
327 | return 1; | ||
328 | } | ||
329 | __gen_remove_target(t); | ||
330 | mutex_unlock(&gn->lock); | ||
331 | |||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | static int gen_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len) | ||
336 | { | ||
337 | struct nvm_geo *geo = &dev->geo; | ||
338 | struct gen_dev *gn = dev->mp; | ||
339 | struct gen_area *area, *prev, *next; | ||
340 | sector_t begin = 0; | ||
341 | sector_t max_sectors = (geo->sec_size * dev->total_secs) >> 9; | ||
342 | |||
343 | if (len > max_sectors) | ||
344 | return -EINVAL; | ||
345 | |||
346 | area = kmalloc(sizeof(struct gen_area), GFP_KERNEL); | ||
347 | if (!area) | ||
348 | return -ENOMEM; | ||
349 | |||
350 | prev = NULL; | ||
351 | |||
352 | spin_lock(&dev->lock); | ||
353 | list_for_each_entry(next, &gn->area_list, list) { | ||
354 | if (begin + len > next->begin) { | ||
355 | begin = next->end; | ||
356 | prev = next; | ||
357 | continue; | ||
358 | } | ||
359 | break; | ||
360 | } | ||
361 | |||
362 | if ((begin + len) > max_sectors) { | ||
363 | spin_unlock(&dev->lock); | ||
364 | kfree(area); | ||
365 | return -EINVAL; | ||
366 | } | ||
367 | |||
368 | area->begin = *lba = begin; | ||
369 | area->end = begin + len; | ||
370 | |||
371 | if (prev) /* insert into sorted order */ | ||
372 | list_add(&area->list, &prev->list); | ||
373 | else | ||
374 | list_add(&area->list, &gn->area_list); | ||
375 | spin_unlock(&dev->lock); | ||
376 | |||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | static void gen_put_area(struct nvm_dev *dev, sector_t begin) | ||
381 | { | ||
382 | struct gen_dev *gn = dev->mp; | ||
383 | struct gen_area *area; | ||
384 | |||
385 | spin_lock(&dev->lock); | ||
386 | list_for_each_entry(area, &gn->area_list, list) { | ||
387 | if (area->begin != begin) | ||
388 | continue; | ||
389 | |||
390 | list_del(&area->list); | ||
391 | spin_unlock(&dev->lock); | ||
392 | kfree(area); | ||
393 | return; | ||
394 | } | ||
395 | spin_unlock(&dev->lock); | ||
396 | } | ||
397 | |||
398 | static void gen_free(struct nvm_dev *dev) | ||
399 | { | ||
400 | kfree(dev->mp); | ||
401 | kfree(dev->rmap); | ||
402 | dev->mp = NULL; | ||
403 | } | ||
404 | |||
405 | static int gen_register(struct nvm_dev *dev) | ||
406 | { | ||
407 | struct gen_dev *gn; | ||
408 | struct gen_dev_map *dev_rmap; | ||
409 | int i, j; | ||
410 | |||
411 | if (!try_module_get(THIS_MODULE)) | ||
412 | return -ENODEV; | ||
413 | |||
414 | gn = kzalloc(sizeof(struct gen_dev), GFP_KERNEL); | ||
415 | if (!gn) | ||
416 | goto err_gn; | ||
417 | |||
418 | dev_rmap = kmalloc(sizeof(struct gen_dev_map), GFP_KERNEL); | ||
419 | if (!dev_rmap) | ||
420 | goto err_rmap; | ||
421 | |||
422 | dev_rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct gen_ch_map), | ||
423 | GFP_KERNEL); | ||
424 | if (!dev_rmap->chnls) | ||
425 | goto err_chnls; | ||
426 | |||
427 | for (i = 0; i < dev->geo.nr_chnls; i++) { | ||
428 | struct gen_ch_map *ch_rmap; | ||
429 | int *lun_roffs; | ||
430 | int luns_in_chnl = dev->geo.luns_per_chnl; | ||
431 | |||
432 | ch_rmap = &dev_rmap->chnls[i]; | ||
433 | |||
434 | ch_rmap->ch_off = -1; | ||
435 | ch_rmap->nr_luns = luns_in_chnl; | ||
436 | |||
437 | lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); | ||
438 | if (!lun_roffs) | ||
439 | goto err_ch; | ||
440 | |||
441 | for (j = 0; j < luns_in_chnl; j++) | ||
442 | lun_roffs[j] = -1; | ||
443 | |||
444 | ch_rmap->lun_offs = lun_roffs; | ||
445 | } | ||
446 | |||
447 | gn->dev = dev; | ||
448 | gn->nr_luns = dev->geo.nr_luns; | ||
449 | INIT_LIST_HEAD(&gn->area_list); | ||
450 | mutex_init(&gn->lock); | ||
451 | INIT_LIST_HEAD(&gn->targets); | ||
452 | dev->mp = gn; | ||
453 | dev->rmap = dev_rmap; | ||
454 | |||
455 | return 1; | ||
456 | err_ch: | ||
457 | while (--i >= 0) | ||
458 | kfree(dev_rmap->chnls[i].lun_offs); | ||
459 | err_chnls: | ||
460 | kfree(dev_rmap); | ||
461 | err_rmap: | ||
462 | gen_free(dev); | ||
463 | err_gn: | ||
464 | module_put(THIS_MODULE); | ||
465 | return -ENOMEM; | ||
466 | } | ||
467 | |||
468 | static void gen_unregister(struct nvm_dev *dev) | ||
469 | { | ||
470 | struct gen_dev *gn = dev->mp; | ||
471 | struct nvm_target *t, *tmp; | ||
472 | |||
473 | mutex_lock(&gn->lock); | ||
474 | list_for_each_entry_safe(t, tmp, &gn->targets, list) { | ||
475 | if (t->dev->parent != dev) | ||
476 | continue; | ||
477 | __gen_remove_target(t); | ||
478 | } | ||
479 | mutex_unlock(&gn->lock); | ||
480 | |||
481 | gen_free(dev); | ||
482 | module_put(THIS_MODULE); | ||
483 | } | ||
484 | |||
485 | static int gen_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) | ||
486 | { | ||
487 | struct gen_dev_map *dev_map = tgt_dev->map; | ||
488 | struct gen_ch_map *ch_map = &dev_map->chnls[p->g.ch]; | ||
489 | int lun_off = ch_map->lun_offs[p->g.lun]; | ||
490 | struct nvm_dev *dev = tgt_dev->parent; | ||
491 | struct gen_dev_map *dev_rmap = dev->rmap; | ||
492 | struct gen_ch_map *ch_rmap; | ||
493 | int lun_roff; | ||
494 | |||
495 | p->g.ch += ch_map->ch_off; | ||
496 | p->g.lun += lun_off; | ||
497 | |||
498 | ch_rmap = &dev_rmap->chnls[p->g.ch]; | ||
499 | lun_roff = ch_rmap->lun_offs[p->g.lun]; | ||
500 | |||
501 | if (unlikely(ch_rmap->ch_off < 0 || lun_roff < 0)) { | ||
502 | pr_err("nvm: corrupted device partition table\n"); | ||
503 | return -EINVAL; | ||
504 | } | ||
505 | |||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | static int gen_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) | ||
510 | { | ||
511 | struct nvm_dev *dev = tgt_dev->parent; | ||
512 | struct gen_dev_map *dev_rmap = dev->rmap; | ||
513 | struct gen_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch]; | ||
514 | int lun_roff = ch_rmap->lun_offs[p->g.lun]; | ||
515 | |||
516 | p->g.ch -= ch_rmap->ch_off; | ||
517 | p->g.lun -= lun_roff; | ||
518 | |||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static int gen_trans_rq(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, | ||
523 | int flag) | ||
524 | { | ||
525 | gen_trans_fn *f; | ||
526 | int i; | ||
527 | int ret = 0; | ||
528 | |||
529 | f = (flag == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt; | ||
530 | |||
531 | if (rqd->nr_ppas == 1) | ||
532 | return f(tgt_dev, &rqd->ppa_addr); | ||
533 | |||
534 | for (i = 0; i < rqd->nr_ppas; i++) { | ||
535 | ret = f(tgt_dev, &rqd->ppa_list[i]); | ||
536 | if (ret) | ||
537 | goto out; | ||
538 | } | ||
539 | |||
540 | out: | ||
541 | return ret; | ||
542 | } | ||
543 | |||
544 | static void gen_end_io(struct nvm_rq *rqd) | ||
545 | { | ||
546 | struct nvm_tgt_dev *tgt_dev = rqd->dev; | ||
547 | struct nvm_tgt_instance *ins = rqd->ins; | ||
548 | |||
549 | /* Convert address space */ | ||
550 | if (tgt_dev) | ||
551 | gen_trans_rq(tgt_dev, rqd, TRANS_DEV_TO_TGT); | ||
552 | |||
553 | ins->tt->end_io(rqd); | ||
554 | } | ||
555 | |||
556 | static int gen_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) | ||
557 | { | ||
558 | struct nvm_dev *dev = tgt_dev->parent; | ||
559 | |||
560 | if (!dev->ops->submit_io) | ||
561 | return -ENODEV; | ||
562 | |||
563 | /* Convert address space */ | ||
564 | gen_trans_rq(tgt_dev, rqd, TRANS_TGT_TO_DEV); | ||
565 | nvm_generic_to_addr_mode(dev, rqd); | ||
566 | |||
567 | rqd->dev = tgt_dev; | ||
568 | rqd->end_io = gen_end_io; | ||
569 | return dev->ops->submit_io(dev, rqd); | ||
570 | } | ||
571 | |||
572 | static int gen_erase_blk(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p, | ||
573 | int flags) | ||
574 | { | ||
575 | /* Convert address space */ | ||
576 | gen_map_to_dev(tgt_dev, p); | ||
577 | |||
578 | return nvm_erase_ppa(tgt_dev->parent, p, 1, flags); | ||
579 | } | ||
580 | |||
581 | static struct ppa_addr gen_trans_ppa(struct nvm_tgt_dev *tgt_dev, | ||
582 | struct ppa_addr p, int direction) | ||
583 | { | ||
584 | gen_trans_fn *f; | ||
585 | struct ppa_addr ppa = p; | ||
586 | |||
587 | f = (direction == TRANS_TGT_TO_DEV) ? gen_map_to_dev : gen_map_to_tgt; | ||
588 | f(tgt_dev, &ppa); | ||
589 | |||
590 | return ppa; | ||
591 | } | ||
592 | |||
593 | static void gen_part_to_tgt(struct nvm_dev *dev, sector_t *entries, | ||
594 | int len) | ||
595 | { | ||
596 | struct nvm_geo *geo = &dev->geo; | ||
597 | struct gen_dev_map *dev_rmap = dev->rmap; | ||
598 | u64 i; | ||
599 | |||
600 | for (i = 0; i < len; i++) { | ||
601 | struct gen_ch_map *ch_rmap; | ||
602 | int *lun_roffs; | ||
603 | struct ppa_addr gaddr; | ||
604 | u64 pba = le64_to_cpu(entries[i]); | ||
605 | int off; | ||
606 | u64 diff; | ||
607 | |||
608 | if (!pba) | ||
609 | continue; | ||
610 | |||
611 | gaddr = linear_to_generic_addr(geo, pba); | ||
612 | ch_rmap = &dev_rmap->chnls[gaddr.g.ch]; | ||
613 | lun_roffs = ch_rmap->lun_offs; | ||
614 | |||
615 | off = gaddr.g.ch * geo->luns_per_chnl + gaddr.g.lun; | ||
616 | |||
617 | diff = ((ch_rmap->ch_off * geo->luns_per_chnl) + | ||
618 | (lun_roffs[gaddr.g.lun])) * geo->sec_per_lun; | ||
619 | |||
620 | entries[i] -= cpu_to_le64(diff); | ||
621 | } | ||
622 | } | ||
623 | |||
624 | static struct nvmm_type gen = { | ||
625 | .name = "gennvm", | ||
626 | .version = {0, 1, 0}, | ||
627 | |||
628 | .register_mgr = gen_register, | ||
629 | .unregister_mgr = gen_unregister, | ||
630 | |||
631 | .create_tgt = gen_create_tgt, | ||
632 | .remove_tgt = gen_remove_tgt, | ||
633 | |||
634 | .submit_io = gen_submit_io, | ||
635 | .erase_blk = gen_erase_blk, | ||
636 | |||
637 | .get_area = gen_get_area, | ||
638 | .put_area = gen_put_area, | ||
639 | |||
640 | .trans_ppa = gen_trans_ppa, | ||
641 | .part_to_tgt = gen_part_to_tgt, | ||
642 | }; | ||
643 | |||
644 | static int __init gen_module_init(void) | ||
645 | { | ||
646 | return nvm_register_mgr(&gen); | ||
647 | } | ||
648 | |||
649 | static void gen_module_exit(void) | ||
650 | { | ||
651 | nvm_unregister_mgr(&gen); | ||
652 | } | ||
653 | |||
654 | module_init(gen_module_init); | ||
655 | module_exit(gen_module_exit); | ||
656 | MODULE_LICENSE("GPL v2"); | ||
657 | MODULE_DESCRIPTION("General media manager for Open-Channel SSDs"); | ||
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h deleted file mode 100644 index 6a4b3f368848..000000000000 --- a/drivers/lightnvm/gennvm.h +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright: Matias Bjorling <mb@bjorling.me> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License version | ||
6 | * 2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #ifndef GENNVM_H_ | ||
16 | #define GENNVM_H_ | ||
17 | |||
18 | #include <linux/module.h> | ||
19 | #include <linux/vmalloc.h> | ||
20 | |||
21 | #include <linux/lightnvm.h> | ||
22 | |||
23 | struct gen_dev { | ||
24 | struct nvm_dev *dev; | ||
25 | |||
26 | int nr_luns; | ||
27 | struct list_head area_list; | ||
28 | |||
29 | struct mutex lock; | ||
30 | struct list_head targets; | ||
31 | }; | ||
32 | |||
33 | /* Map between virtual and physical channel and lun */ | ||
34 | struct gen_ch_map { | ||
35 | int ch_off; | ||
36 | int nr_luns; | ||
37 | int *lun_offs; | ||
38 | }; | ||
39 | |||
40 | struct gen_dev_map { | ||
41 | struct gen_ch_map *chnls; | ||
42 | int nr_chnls; | ||
43 | }; | ||
44 | |||
45 | struct gen_area { | ||
46 | struct list_head list; | ||
47 | sector_t begin; | ||
48 | sector_t end; /* end is excluded */ | ||
49 | }; | ||
50 | |||
51 | static inline void *ch_map_to_lun_offs(struct gen_ch_map *ch_map) | ||
52 | { | ||
53 | return ch_map + 1; | ||
54 | } | ||
55 | |||
56 | typedef int (gen_trans_fn)(struct nvm_tgt_dev *, struct ppa_addr *); | ||
57 | |||
58 | #define gen_for_each_lun(bm, lun, i) \ | ||
59 | for ((i) = 0, lun = &(bm)->luns[0]; \ | ||
60 | (i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)]) | ||
61 | |||
62 | #endif /* GENNVM_H_ */ | ||
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c index 9fb7de395915..e00b1d7b976f 100644 --- a/drivers/lightnvm/rrpc.c +++ b/drivers/lightnvm/rrpc.c | |||
@@ -779,7 +779,7 @@ static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, | |||
779 | 779 | ||
780 | static void rrpc_end_io(struct nvm_rq *rqd) | 780 | static void rrpc_end_io(struct nvm_rq *rqd) |
781 | { | 781 | { |
782 | struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); | 782 | struct rrpc *rrpc = rqd->private; |
783 | struct nvm_tgt_dev *dev = rrpc->dev; | 783 | struct nvm_tgt_dev *dev = rrpc->dev; |
784 | struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); | 784 | struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); |
785 | uint8_t npages = rqd->nr_ppas; | 785 | uint8_t npages = rqd->nr_ppas; |
@@ -972,8 +972,9 @@ static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, | |||
972 | 972 | ||
973 | bio_get(bio); | 973 | bio_get(bio); |
974 | rqd->bio = bio; | 974 | rqd->bio = bio; |
975 | rqd->ins = &rrpc->instance; | 975 | rqd->private = rrpc; |
976 | rqd->nr_ppas = nr_pages; | 976 | rqd->nr_ppas = nr_pages; |
977 | rqd->end_io = rrpc_end_io; | ||
977 | rrq->flags = flags; | 978 | rrq->flags = flags; |
978 | 979 | ||
979 | err = nvm_submit_io(dev, rqd); | 980 | err = nvm_submit_io(dev, rqd); |
@@ -1532,7 +1533,6 @@ static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk) | |||
1532 | if (!rrpc) | 1533 | if (!rrpc) |
1533 | return ERR_PTR(-ENOMEM); | 1534 | return ERR_PTR(-ENOMEM); |
1534 | 1535 | ||
1535 | rrpc->instance.tt = &tt_rrpc; | ||
1536 | rrpc->dev = dev; | 1536 | rrpc->dev = dev; |
1537 | rrpc->disk = tdisk; | 1537 | rrpc->disk = tdisk; |
1538 | 1538 | ||
@@ -1611,7 +1611,6 @@ static struct nvm_tgt_type tt_rrpc = { | |||
1611 | 1611 | ||
1612 | .make_rq = rrpc_make_rq, | 1612 | .make_rq = rrpc_make_rq, |
1613 | .capacity = rrpc_capacity, | 1613 | .capacity = rrpc_capacity, |
1614 | .end_io = rrpc_end_io, | ||
1615 | 1614 | ||
1616 | .init = rrpc_init, | 1615 | .init = rrpc_init, |
1617 | .exit = rrpc_exit, | 1616 | .exit = rrpc_exit, |
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h index 94e4d73116b2..fdb6ff902903 100644 --- a/drivers/lightnvm/rrpc.h +++ b/drivers/lightnvm/rrpc.h | |||
@@ -102,9 +102,6 @@ struct rrpc_lun { | |||
102 | }; | 102 | }; |
103 | 103 | ||
104 | struct rrpc { | 104 | struct rrpc { |
105 | /* instance must be kept in top to resolve rrpc in unprep */ | ||
106 | struct nvm_tgt_instance instance; | ||
107 | |||
108 | struct nvm_tgt_dev *dev; | 105 | struct nvm_tgt_dev *dev; |
109 | struct gendisk *disk; | 106 | struct gendisk *disk; |
110 | 107 | ||
diff --git a/drivers/lightnvm/sysblk.c b/drivers/lightnvm/sysblk.c deleted file mode 100644 index 12002bf4efc2..000000000000 --- a/drivers/lightnvm/sysblk.c +++ /dev/null | |||
@@ -1,733 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Matias Bjorling. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License version | ||
6 | * 2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; see the file COPYING. If not, write to | ||
15 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
16 | * USA. | ||
17 | * | ||
18 | */ | ||
19 | |||
20 | #include <linux/lightnvm.h> | ||
21 | |||
22 | #define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */ | ||
23 | #define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases | ||
24 | * enables ~1.5M updates per sysblk unit | ||
25 | */ | ||
26 | |||
27 | struct sysblk_scan { | ||
28 | /* A row is a collection of flash blocks for a system block. */ | ||
29 | int nr_rows; | ||
30 | int row; | ||
31 | int act_blk[MAX_SYSBLKS]; | ||
32 | |||
33 | int nr_ppas; | ||
34 | struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */ | ||
35 | }; | ||
36 | |||
37 | static inline int scan_ppa_idx(int row, int blkid) | ||
38 | { | ||
39 | return (row * MAX_BLKS_PR_SYSBLK) + blkid; | ||
40 | } | ||
41 | |||
42 | static void nvm_sysblk_to_cpu(struct nvm_sb_info *info, | ||
43 | struct nvm_system_block *sb) | ||
44 | { | ||
45 | info->seqnr = be32_to_cpu(sb->seqnr); | ||
46 | info->erase_cnt = be32_to_cpu(sb->erase_cnt); | ||
47 | info->version = be16_to_cpu(sb->version); | ||
48 | strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN); | ||
49 | info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa); | ||
50 | } | ||
51 | |||
52 | static void nvm_cpu_to_sysblk(struct nvm_system_block *sb, | ||
53 | struct nvm_sb_info *info) | ||
54 | { | ||
55 | sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC); | ||
56 | sb->seqnr = cpu_to_be32(info->seqnr); | ||
57 | sb->erase_cnt = cpu_to_be32(info->erase_cnt); | ||
58 | sb->version = cpu_to_be16(info->version); | ||
59 | strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN); | ||
60 | sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa); | ||
61 | } | ||
62 | |||
63 | static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas) | ||
64 | { | ||
65 | struct nvm_geo *geo = &dev->geo; | ||
66 | int nr_rows = min_t(int, MAX_SYSBLKS, geo->nr_chnls); | ||
67 | int i; | ||
68 | |||
69 | for (i = 0; i < nr_rows; i++) | ||
70 | sysblk_ppas[i].ppa = 0; | ||
71 | |||
72 | /* if possible, place sysblk at first channel, middle channel and last | ||
73 | * channel of the device. If not, create only one or two sys blocks | ||
74 | */ | ||
75 | switch (geo->nr_chnls) { | ||
76 | case 2: | ||
77 | sysblk_ppas[1].g.ch = 1; | ||
78 | /* fall-through */ | ||
79 | case 1: | ||
80 | sysblk_ppas[0].g.ch = 0; | ||
81 | break; | ||
82 | default: | ||
83 | sysblk_ppas[0].g.ch = 0; | ||
84 | sysblk_ppas[1].g.ch = geo->nr_chnls / 2; | ||
85 | sysblk_ppas[2].g.ch = geo->nr_chnls - 1; | ||
86 | break; | ||
87 | } | ||
88 | |||
89 | return nr_rows; | ||
90 | } | ||
91 | |||
92 | static void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s, | ||
93 | struct ppa_addr *sysblk_ppas) | ||
94 | { | ||
95 | memset(s, 0, sizeof(struct sysblk_scan)); | ||
96 | s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas); | ||
97 | } | ||
98 | |||
99 | static int sysblk_get_free_blks(struct nvm_dev *dev, struct ppa_addr ppa, | ||
100 | u8 *blks, int nr_blks, | ||
101 | struct sysblk_scan *s) | ||
102 | { | ||
103 | struct ppa_addr *sppa; | ||
104 | int i, blkid = 0; | ||
105 | |||
106 | nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); | ||
107 | if (nr_blks < 0) | ||
108 | return nr_blks; | ||
109 | |||
110 | for (i = 0; i < nr_blks; i++) { | ||
111 | if (blks[i] == NVM_BLK_T_HOST) | ||
112 | return -EEXIST; | ||
113 | |||
114 | if (blks[i] != NVM_BLK_T_FREE) | ||
115 | continue; | ||
116 | |||
117 | sppa = &s->ppas[scan_ppa_idx(s->row, blkid)]; | ||
118 | sppa->g.ch = ppa.g.ch; | ||
119 | sppa->g.lun = ppa.g.lun; | ||
120 | sppa->g.blk = i; | ||
121 | s->nr_ppas++; | ||
122 | blkid++; | ||
123 | |||
124 | pr_debug("nvm: use (%u %u %u) as sysblk\n", | ||
125 | sppa->g.ch, sppa->g.lun, sppa->g.blk); | ||
126 | if (blkid > MAX_BLKS_PR_SYSBLK - 1) | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | pr_err("nvm: sysblk failed get sysblk\n"); | ||
131 | return -EINVAL; | ||
132 | } | ||
133 | |||
134 | static int sysblk_get_host_blks(struct nvm_dev *dev, struct ppa_addr ppa, | ||
135 | u8 *blks, int nr_blks, | ||
136 | struct sysblk_scan *s) | ||
137 | { | ||
138 | int i, nr_sysblk = 0; | ||
139 | |||
140 | nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); | ||
141 | if (nr_blks < 0) | ||
142 | return nr_blks; | ||
143 | |||
144 | for (i = 0; i < nr_blks; i++) { | ||
145 | if (blks[i] != NVM_BLK_T_HOST) | ||
146 | continue; | ||
147 | |||
148 | if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) { | ||
149 | pr_err("nvm: too many host blks\n"); | ||
150 | return -EINVAL; | ||
151 | } | ||
152 | |||
153 | ppa.g.blk = i; | ||
154 | |||
155 | s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa; | ||
156 | s->nr_ppas++; | ||
157 | nr_sysblk++; | ||
158 | } | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s, | ||
164 | struct ppa_addr *ppas, int get_free) | ||
165 | { | ||
166 | struct nvm_geo *geo = &dev->geo; | ||
167 | int i, nr_blks, ret = 0; | ||
168 | u8 *blks; | ||
169 | |||
170 | s->nr_ppas = 0; | ||
171 | nr_blks = geo->blks_per_lun * geo->plane_mode; | ||
172 | |||
173 | blks = kmalloc(nr_blks, GFP_KERNEL); | ||
174 | if (!blks) | ||
175 | return -ENOMEM; | ||
176 | |||
177 | for (i = 0; i < s->nr_rows; i++) { | ||
178 | s->row = i; | ||
179 | |||
180 | ret = nvm_get_bb_tbl(dev, ppas[i], blks); | ||
181 | if (ret) { | ||
182 | pr_err("nvm: failed bb tbl for ppa (%u %u)\n", | ||
183 | ppas[i].g.ch, | ||
184 | ppas[i].g.blk); | ||
185 | goto err_get; | ||
186 | } | ||
187 | |||
188 | if (get_free) | ||
189 | ret = sysblk_get_free_blks(dev, ppas[i], blks, nr_blks, | ||
190 | s); | ||
191 | else | ||
192 | ret = sysblk_get_host_blks(dev, ppas[i], blks, nr_blks, | ||
193 | s); | ||
194 | |||
195 | if (ret) | ||
196 | goto err_get; | ||
197 | } | ||
198 | |||
199 | err_get: | ||
200 | kfree(blks); | ||
201 | return ret; | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * scans a block for latest sysblk. | ||
206 | * Returns: | ||
207 | * 0 - newer sysblk not found. PPA is updated to latest page. | ||
208 | * 1 - newer sysblk found and stored in *cur. PPA is updated to | ||
209 | * next valid page. | ||
210 | * <0- error. | ||
211 | */ | ||
212 | static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa, | ||
213 | struct nvm_system_block *sblk) | ||
214 | { | ||
215 | struct nvm_geo *geo = &dev->geo; | ||
216 | struct nvm_system_block *cur; | ||
217 | int pg, ret, found = 0; | ||
218 | |||
219 | /* the full buffer for a flash page is allocated. Only the first of it | ||
220 | * contains the system block information | ||
221 | */ | ||
222 | cur = kmalloc(geo->pfpg_size, GFP_KERNEL); | ||
223 | if (!cur) | ||
224 | return -ENOMEM; | ||
225 | |||
226 | /* perform linear scan through the block */ | ||
227 | for (pg = 0; pg < dev->lps_per_blk; pg++) { | ||
228 | ppa->g.pg = ppa_to_slc(dev, pg); | ||
229 | |||
230 | ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE, | ||
231 | cur, geo->pfpg_size); | ||
232 | if (ret) { | ||
233 | if (ret == NVM_RSP_ERR_EMPTYPAGE) { | ||
234 | pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n", | ||
235 | ppa->g.ch, | ||
236 | ppa->g.lun, | ||
237 | ppa->g.blk, | ||
238 | ppa->g.pg); | ||
239 | break; | ||
240 | } | ||
241 | pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)", | ||
242 | ret, | ||
243 | ppa->g.ch, | ||
244 | ppa->g.lun, | ||
245 | ppa->g.blk, | ||
246 | ppa->g.pg); | ||
247 | break; /* if we can't read a page, continue to the | ||
248 | * next blk | ||
249 | */ | ||
250 | } | ||
251 | |||
252 | if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) { | ||
253 | pr_debug("nvm: scan break for ppa (%u %u %u %u)\n", | ||
254 | ppa->g.ch, | ||
255 | ppa->g.lun, | ||
256 | ppa->g.blk, | ||
257 | ppa->g.pg); | ||
258 | break; /* last valid page already found */ | ||
259 | } | ||
260 | |||
261 | if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr)) | ||
262 | continue; | ||
263 | |||
264 | memcpy(sblk, cur, sizeof(struct nvm_system_block)); | ||
265 | found = 1; | ||
266 | } | ||
267 | |||
268 | kfree(cur); | ||
269 | |||
270 | return found; | ||
271 | } | ||
272 | |||
273 | static int nvm_sysblk_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, | ||
274 | int type) | ||
275 | { | ||
276 | return nvm_set_bb_tbl(dev, s->ppas, s->nr_ppas, type); | ||
277 | } | ||
278 | |||
279 | static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info, | ||
280 | struct sysblk_scan *s) | ||
281 | { | ||
282 | struct nvm_geo *geo = &dev->geo; | ||
283 | struct nvm_system_block nvmsb; | ||
284 | void *buf; | ||
285 | int i, sect, ret = 0; | ||
286 | struct ppa_addr *ppas; | ||
287 | |||
288 | nvm_cpu_to_sysblk(&nvmsb, info); | ||
289 | |||
290 | buf = kzalloc(geo->pfpg_size, GFP_KERNEL); | ||
291 | if (!buf) | ||
292 | return -ENOMEM; | ||
293 | memcpy(buf, &nvmsb, sizeof(struct nvm_system_block)); | ||
294 | |||
295 | ppas = kcalloc(geo->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL); | ||
296 | if (!ppas) { | ||
297 | ret = -ENOMEM; | ||
298 | goto err; | ||
299 | } | ||
300 | |||
301 | /* Write and verify */ | ||
302 | for (i = 0; i < s->nr_rows; i++) { | ||
303 | ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])]; | ||
304 | |||
305 | pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n", | ||
306 | ppas[0].g.ch, | ||
307 | ppas[0].g.lun, | ||
308 | ppas[0].g.blk, | ||
309 | ppas[0].g.pg); | ||
310 | |||
311 | /* Expand to all sectors within a flash page */ | ||
312 | if (geo->sec_per_pg > 1) { | ||
313 | for (sect = 1; sect < geo->sec_per_pg; sect++) { | ||
314 | ppas[sect].ppa = ppas[0].ppa; | ||
315 | ppas[sect].g.sec = sect; | ||
316 | } | ||
317 | } | ||
318 | |||
319 | ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PWRITE, | ||
320 | NVM_IO_SLC_MODE, buf, geo->pfpg_size); | ||
321 | if (ret) { | ||
322 | pr_err("nvm: sysblk failed program (%u %u %u)\n", | ||
323 | ppas[0].g.ch, | ||
324 | ppas[0].g.lun, | ||
325 | ppas[0].g.blk); | ||
326 | break; | ||
327 | } | ||
328 | |||
329 | ret = nvm_submit_ppa(dev, ppas, geo->sec_per_pg, NVM_OP_PREAD, | ||
330 | NVM_IO_SLC_MODE, buf, geo->pfpg_size); | ||
331 | if (ret) { | ||
332 | pr_err("nvm: sysblk failed read (%u %u %u)\n", | ||
333 | ppas[0].g.ch, | ||
334 | ppas[0].g.lun, | ||
335 | ppas[0].g.blk); | ||
336 | break; | ||
337 | } | ||
338 | |||
339 | if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) { | ||
340 | pr_err("nvm: sysblk failed verify (%u %u %u)\n", | ||
341 | ppas[0].g.ch, | ||
342 | ppas[0].g.lun, | ||
343 | ppas[0].g.blk); | ||
344 | ret = -EINVAL; | ||
345 | break; | ||
346 | } | ||
347 | } | ||
348 | |||
349 | kfree(ppas); | ||
350 | err: | ||
351 | kfree(buf); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s) | ||
357 | { | ||
358 | int i, ret; | ||
359 | unsigned long nxt_blk; | ||
360 | struct ppa_addr *ppa; | ||
361 | |||
362 | for (i = 0; i < s->nr_rows; i++) { | ||
363 | nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK; | ||
364 | ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)]; | ||
365 | ppa->g.pg = ppa_to_slc(dev, 0); | ||
366 | |||
367 | ret = nvm_erase_ppa(dev, ppa, 1, 0); | ||
368 | if (ret) | ||
369 | return ret; | ||
370 | |||
371 | s->act_blk[i] = nxt_blk; | ||
372 | } | ||
373 | |||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info) | ||
378 | { | ||
379 | struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; | ||
380 | struct sysblk_scan s; | ||
381 | struct nvm_system_block *cur; | ||
382 | int i, j, found = 0; | ||
383 | int ret = -ENOMEM; | ||
384 | |||
385 | /* | ||
386 | * 1. setup sysblk locations | ||
387 | * 2. get bad block list | ||
388 | * 3. filter on host-specific (type 3) | ||
389 | * 4. iterate through all and find the highest seq nr. | ||
390 | * 5. return superblock information | ||
391 | */ | ||
392 | |||
393 | if (!dev->ops->get_bb_tbl) | ||
394 | return -EINVAL; | ||
395 | |||
396 | nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); | ||
397 | |||
398 | mutex_lock(&dev->mlock); | ||
399 | ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); | ||
400 | if (ret) | ||
401 | goto err_sysblk; | ||
402 | |||
403 | /* no sysblocks initialized */ | ||
404 | if (!s.nr_ppas) | ||
405 | goto err_sysblk; | ||
406 | |||
407 | cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL); | ||
408 | if (!cur) | ||
409 | goto err_sysblk; | ||
410 | |||
411 | /* find the latest block across all sysblocks */ | ||
412 | for (i = 0; i < s.nr_rows; i++) { | ||
413 | for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) { | ||
414 | struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)]; | ||
415 | |||
416 | ret = nvm_scan_block(dev, &ppa, cur); | ||
417 | if (ret > 0) | ||
418 | found = 1; | ||
419 | else if (ret < 0) | ||
420 | break; | ||
421 | } | ||
422 | } | ||
423 | |||
424 | nvm_sysblk_to_cpu(info, cur); | ||
425 | |||
426 | kfree(cur); | ||
427 | err_sysblk: | ||
428 | mutex_unlock(&dev->mlock); | ||
429 | |||
430 | if (found) | ||
431 | return 1; | ||
432 | return ret; | ||
433 | } | ||
434 | |||
435 | int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new) | ||
436 | { | ||
437 | /* 1. for each latest superblock | ||
438 | * 2. if room | ||
439 | * a. write new flash page entry with the updated information | ||
440 | * 3. if no room | ||
441 | * a. find next available block on lun (linear search) | ||
442 | * if none, continue to next lun | ||
443 | * if none at all, report error. also report that it wasn't | ||
444 | * possible to write to all superblocks. | ||
445 | * c. write data to block. | ||
446 | */ | ||
447 | struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; | ||
448 | struct sysblk_scan s; | ||
449 | struct nvm_system_block *cur; | ||
450 | int i, j, ppaidx, found = 0; | ||
451 | int ret = -ENOMEM; | ||
452 | |||
453 | if (!dev->ops->get_bb_tbl) | ||
454 | return -EINVAL; | ||
455 | |||
456 | nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); | ||
457 | |||
458 | mutex_lock(&dev->mlock); | ||
459 | ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); | ||
460 | if (ret) | ||
461 | goto err_sysblk; | ||
462 | |||
463 | cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL); | ||
464 | if (!cur) | ||
465 | goto err_sysblk; | ||
466 | |||
467 | /* Get the latest sysblk for each sysblk row */ | ||
468 | for (i = 0; i < s.nr_rows; i++) { | ||
469 | found = 0; | ||
470 | for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) { | ||
471 | ppaidx = scan_ppa_idx(i, j); | ||
472 | ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur); | ||
473 | if (ret > 0) { | ||
474 | s.act_blk[i] = j; | ||
475 | found = 1; | ||
476 | } else if (ret < 0) | ||
477 | break; | ||
478 | } | ||
479 | } | ||
480 | |||
481 | if (!found) { | ||
482 | pr_err("nvm: no valid sysblks found to update\n"); | ||
483 | ret = -EINVAL; | ||
484 | goto err_cur; | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * All sysblocks found. Check that they have same page id in their flash | ||
489 | * blocks | ||
490 | */ | ||
491 | for (i = 1; i < s.nr_rows; i++) { | ||
492 | struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])]; | ||
493 | struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])]; | ||
494 | |||
495 | if (l.g.pg != r.g.pg) { | ||
496 | pr_err("nvm: sysblks not on same page. Previous update failed.\n"); | ||
497 | ret = -EINVAL; | ||
498 | goto err_cur; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Check that there haven't been another update to the seqnr since we | ||
504 | * began | ||
505 | */ | ||
506 | if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) { | ||
507 | pr_err("nvm: seq is not sequential\n"); | ||
508 | ret = -EINVAL; | ||
509 | goto err_cur; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * When all pages in a block has been written, a new block is selected | ||
514 | * and writing is performed on the new block. | ||
515 | */ | ||
516 | if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg == | ||
517 | dev->lps_per_blk - 1) { | ||
518 | ret = nvm_prepare_new_sysblks(dev, &s); | ||
519 | if (ret) | ||
520 | goto err_cur; | ||
521 | } | ||
522 | |||
523 | ret = nvm_write_and_verify(dev, new, &s); | ||
524 | err_cur: | ||
525 | kfree(cur); | ||
526 | err_sysblk: | ||
527 | mutex_unlock(&dev->mlock); | ||
528 | |||
529 | return ret; | ||
530 | } | ||
531 | |||
532 | int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info) | ||
533 | { | ||
534 | struct nvm_geo *geo = &dev->geo; | ||
535 | struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; | ||
536 | struct sysblk_scan s; | ||
537 | int ret; | ||
538 | |||
539 | /* | ||
540 | * 1. select master blocks and select first available blks | ||
541 | * 2. get bad block list | ||
542 | * 3. mark MAX_SYSBLKS block as host-based device allocated. | ||
543 | * 4. write and verify data to block | ||
544 | */ | ||
545 | |||
546 | if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl) | ||
547 | return -EINVAL; | ||
548 | |||
549 | if (!(geo->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) { | ||
550 | pr_err("nvm: memory does not support SLC access\n"); | ||
551 | return -EINVAL; | ||
552 | } | ||
553 | |||
554 | /* Index all sysblocks and mark them as host-driven */ | ||
555 | nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); | ||
556 | |||
557 | mutex_lock(&dev->mlock); | ||
558 | ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 1); | ||
559 | if (ret) | ||
560 | goto err_mark; | ||
561 | |||
562 | ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_HOST); | ||
563 | if (ret) | ||
564 | goto err_mark; | ||
565 | |||
566 | /* Write to the first block of each row */ | ||
567 | ret = nvm_write_and_verify(dev, info, &s); | ||
568 | err_mark: | ||
569 | mutex_unlock(&dev->mlock); | ||
570 | return ret; | ||
571 | } | ||
572 | |||
573 | static int factory_nblks(int nblks) | ||
574 | { | ||
575 | /* Round up to nearest BITS_PER_LONG */ | ||
576 | return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); | ||
577 | } | ||
578 | |||
579 | static unsigned int factory_blk_offset(struct nvm_geo *geo, struct ppa_addr ppa) | ||
580 | { | ||
581 | int nblks = factory_nblks(geo->blks_per_lun); | ||
582 | |||
583 | return ((ppa.g.ch * geo->luns_per_chnl * nblks) + (ppa.g.lun * nblks)) / | ||
584 | BITS_PER_LONG; | ||
585 | } | ||
586 | |||
587 | static int nvm_factory_blks(struct nvm_dev *dev, struct ppa_addr ppa, | ||
588 | u8 *blks, int nr_blks, | ||
589 | unsigned long *blk_bitmap, int flags) | ||
590 | { | ||
591 | int i, lunoff; | ||
592 | |||
593 | nr_blks = nvm_bb_tbl_fold(dev, blks, nr_blks); | ||
594 | if (nr_blks < 0) | ||
595 | return nr_blks; | ||
596 | |||
597 | lunoff = factory_blk_offset(&dev->geo, ppa); | ||
598 | |||
599 | /* non-set bits correspond to the block must be erased */ | ||
600 | for (i = 0; i < nr_blks; i++) { | ||
601 | switch (blks[i]) { | ||
602 | case NVM_BLK_T_FREE: | ||
603 | if (flags & NVM_FACTORY_ERASE_ONLY_USER) | ||
604 | set_bit(i, &blk_bitmap[lunoff]); | ||
605 | break; | ||
606 | case NVM_BLK_T_HOST: | ||
607 | if (!(flags & NVM_FACTORY_RESET_HOST_BLKS)) | ||
608 | set_bit(i, &blk_bitmap[lunoff]); | ||
609 | break; | ||
610 | case NVM_BLK_T_GRWN_BAD: | ||
611 | if (!(flags & NVM_FACTORY_RESET_GRWN_BBLKS)) | ||
612 | set_bit(i, &blk_bitmap[lunoff]); | ||
613 | break; | ||
614 | default: | ||
615 | set_bit(i, &blk_bitmap[lunoff]); | ||
616 | break; | ||
617 | } | ||
618 | } | ||
619 | |||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list, | ||
624 | int max_ppas, unsigned long *blk_bitmap) | ||
625 | { | ||
626 | struct nvm_geo *geo = &dev->geo; | ||
627 | struct ppa_addr ppa; | ||
628 | int ch, lun, blkid, idx, done = 0, ppa_cnt = 0; | ||
629 | unsigned long *offset; | ||
630 | |||
631 | while (!done) { | ||
632 | done = 1; | ||
633 | nvm_for_each_lun_ppa(geo, ppa, ch, lun) { | ||
634 | idx = factory_blk_offset(geo, ppa); | ||
635 | offset = &blk_bitmap[idx]; | ||
636 | |||
637 | blkid = find_first_zero_bit(offset, geo->blks_per_lun); | ||
638 | if (blkid >= geo->blks_per_lun) | ||
639 | continue; | ||
640 | set_bit(blkid, offset); | ||
641 | |||
642 | ppa.g.blk = blkid; | ||
643 | pr_debug("nvm: erase ppa (%u %u %u)\n", | ||
644 | ppa.g.ch, | ||
645 | ppa.g.lun, | ||
646 | ppa.g.blk); | ||
647 | |||
648 | erase_list[ppa_cnt] = ppa; | ||
649 | ppa_cnt++; | ||
650 | done = 0; | ||
651 | |||
652 | if (ppa_cnt == max_ppas) | ||
653 | return ppa_cnt; | ||
654 | } | ||
655 | } | ||
656 | |||
657 | return ppa_cnt; | ||
658 | } | ||
659 | |||
660 | static int nvm_fact_select_blks(struct nvm_dev *dev, unsigned long *blk_bitmap, | ||
661 | int flags) | ||
662 | { | ||
663 | struct nvm_geo *geo = &dev->geo; | ||
664 | struct ppa_addr ppa; | ||
665 | int ch, lun, nr_blks, ret = 0; | ||
666 | u8 *blks; | ||
667 | |||
668 | nr_blks = geo->blks_per_lun * geo->plane_mode; | ||
669 | blks = kmalloc(nr_blks, GFP_KERNEL); | ||
670 | if (!blks) | ||
671 | return -ENOMEM; | ||
672 | |||
673 | nvm_for_each_lun_ppa(geo, ppa, ch, lun) { | ||
674 | ret = nvm_get_bb_tbl(dev, ppa, blks); | ||
675 | if (ret) | ||
676 | pr_err("nvm: failed bb tbl for ch%u lun%u\n", | ||
677 | ppa.g.ch, ppa.g.blk); | ||
678 | |||
679 | ret = nvm_factory_blks(dev, ppa, blks, nr_blks, blk_bitmap, | ||
680 | flags); | ||
681 | if (ret) | ||
682 | break; | ||
683 | } | ||
684 | |||
685 | kfree(blks); | ||
686 | return ret; | ||
687 | } | ||
688 | |||
689 | int nvm_dev_factory(struct nvm_dev *dev, int flags) | ||
690 | { | ||
691 | struct nvm_geo *geo = &dev->geo; | ||
692 | struct ppa_addr *ppas; | ||
693 | int ppa_cnt, ret = -ENOMEM; | ||
694 | int max_ppas = dev->ops->max_phys_sect / geo->nr_planes; | ||
695 | struct ppa_addr sysblk_ppas[MAX_SYSBLKS]; | ||
696 | struct sysblk_scan s; | ||
697 | unsigned long *blk_bitmap; | ||
698 | |||
699 | blk_bitmap = kzalloc(factory_nblks(geo->blks_per_lun) * geo->nr_luns, | ||
700 | GFP_KERNEL); | ||
701 | if (!blk_bitmap) | ||
702 | return ret; | ||
703 | |||
704 | ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL); | ||
705 | if (!ppas) | ||
706 | goto err_blks; | ||
707 | |||
708 | /* create list of blks to be erased */ | ||
709 | ret = nvm_fact_select_blks(dev, blk_bitmap, flags); | ||
710 | if (ret) | ||
711 | goto err_ppas; | ||
712 | |||
713 | /* continue to erase until list of blks until empty */ | ||
714 | while ((ppa_cnt = | ||
715 | nvm_fact_get_blks(dev, ppas, max_ppas, blk_bitmap)) > 0) | ||
716 | nvm_erase_ppa(dev, ppas, ppa_cnt, 0); | ||
717 | |||
718 | /* mark host reserved blocks free */ | ||
719 | if (flags & NVM_FACTORY_RESET_HOST_BLKS) { | ||
720 | nvm_setup_sysblk_scan(dev, &s, sysblk_ppas); | ||
721 | mutex_lock(&dev->mlock); | ||
722 | ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, 0); | ||
723 | if (!ret) | ||
724 | ret = nvm_sysblk_set_bb_tbl(dev, &s, NVM_BLK_T_FREE); | ||
725 | mutex_unlock(&dev->mlock); | ||
726 | } | ||
727 | err_ppas: | ||
728 | kfree(ppas); | ||
729 | err_blks: | ||
730 | kfree(blk_bitmap); | ||
731 | return ret; | ||
732 | } | ||
733 | EXPORT_SYMBOL(nvm_dev_factory); | ||
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 76d20875503c..01035e718c1c 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -666,7 +666,7 @@ static inline struct search *search_alloc(struct bio *bio, | |||
666 | s->iop.write_prio = 0; | 666 | s->iop.write_prio = 0; |
667 | s->iop.error = 0; | 667 | s->iop.error = 0; |
668 | s->iop.flags = 0; | 668 | s->iop.flags = 0; |
669 | s->iop.flush_journal = (bio->bi_opf & (REQ_PREFLUSH|REQ_FUA)) != 0; | 669 | s->iop.flush_journal = op_is_flush(bio->bi_opf); |
670 | s->iop.wq = bcache_wq; | 670 | s->iop.wq = bcache_wq; |
671 | 671 | ||
672 | return s; | 672 | return s; |
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index e04c61e0839e..5b9cf56de8ef 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c | |||
@@ -787,8 +787,7 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) | |||
787 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); | 787 | struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); |
788 | 788 | ||
789 | spin_lock_irqsave(&cache->lock, flags); | 789 | spin_lock_irqsave(&cache->lock, flags); |
790 | if (cache->need_tick_bio && | 790 | if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) && |
791 | !(bio->bi_opf & (REQ_FUA | REQ_PREFLUSH)) && | ||
792 | bio_op(bio) != REQ_OP_DISCARD) { | 791 | bio_op(bio) != REQ_OP_DISCARD) { |
793 | pb->tick = true; | 792 | pb->tick = true; |
794 | cache->need_tick_bio = false; | 793 | cache->need_tick_bio = false; |
@@ -828,11 +827,6 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) | |||
828 | return to_oblock(block_nr); | 827 | return to_oblock(block_nr); |
829 | } | 828 | } |
830 | 829 | ||
831 | static int bio_triggers_commit(struct cache *cache, struct bio *bio) | ||
832 | { | ||
833 | return bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); | ||
834 | } | ||
835 | |||
836 | /* | 830 | /* |
837 | * You must increment the deferred set whilst the prison cell is held. To | 831 | * You must increment the deferred set whilst the prison cell is held. To |
838 | * encourage this, we ask for 'cell' to be passed in. | 832 | * encourage this, we ask for 'cell' to be passed in. |
@@ -884,7 +878,7 @@ static void issue(struct cache *cache, struct bio *bio) | |||
884 | { | 878 | { |
885 | unsigned long flags; | 879 | unsigned long flags; |
886 | 880 | ||
887 | if (!bio_triggers_commit(cache, bio)) { | 881 | if (!op_is_flush(bio->bi_opf)) { |
888 | accounted_request(cache, bio); | 882 | accounted_request(cache, bio); |
889 | return; | 883 | return; |
890 | } | 884 | } |
@@ -1069,8 +1063,7 @@ static void dec_io_migrations(struct cache *cache) | |||
1069 | 1063 | ||
1070 | static bool discard_or_flush(struct bio *bio) | 1064 | static bool discard_or_flush(struct bio *bio) |
1071 | { | 1065 | { |
1072 | return bio_op(bio) == REQ_OP_DISCARD || | 1066 | return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf); |
1073 | bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); | ||
1074 | } | 1067 | } |
1075 | 1068 | ||
1076 | static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) | 1069 | static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) |
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d1c05c12a9db..110982db4b48 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -699,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) | |||
699 | 699 | ||
700 | static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) | 700 | static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) |
701 | { | 701 | { |
702 | return (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) && | 702 | return op_is_flush(bio->bi_opf) && |
703 | dm_thin_changed_this_transaction(tc->td); | 703 | dm_thin_changed_this_transaction(tc->td); |
704 | } | 704 | } |
705 | 705 | ||
@@ -870,8 +870,7 @@ static void __inc_remap_and_issue_cell(void *context, | |||
870 | struct bio *bio; | 870 | struct bio *bio; |
871 | 871 | ||
872 | while ((bio = bio_list_pop(&cell->bios))) { | 872 | while ((bio = bio_list_pop(&cell->bios))) { |
873 | if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || | 873 | if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) |
874 | bio_op(bio) == REQ_OP_DISCARD) | ||
875 | bio_list_add(&info->defer_bios, bio); | 874 | bio_list_add(&info->defer_bios, bio); |
876 | else { | 875 | else { |
877 | inc_all_io_entry(info->tc->pool, bio); | 876 | inc_all_io_entry(info->tc->pool, bio); |
@@ -1716,9 +1715,8 @@ static void __remap_and_issue_shared_cell(void *context, | |||
1716 | struct bio *bio; | 1715 | struct bio *bio; |
1717 | 1716 | ||
1718 | while ((bio = bio_list_pop(&cell->bios))) { | 1717 | while ((bio = bio_list_pop(&cell->bios))) { |
1719 | if ((bio_data_dir(bio) == WRITE) || | 1718 | if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) || |
1720 | (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || | 1719 | bio_op(bio) == REQ_OP_DISCARD) |
1721 | bio_op(bio) == REQ_OP_DISCARD)) | ||
1722 | bio_list_add(&info->defer_bios, bio); | 1720 | bio_list_add(&info->defer_bios, bio); |
1723 | else { | 1721 | else { |
1724 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; | 1722 | struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));; |
@@ -2635,8 +2633,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) | |||
2635 | return DM_MAPIO_SUBMITTED; | 2633 | return DM_MAPIO_SUBMITTED; |
2636 | } | 2634 | } |
2637 | 2635 | ||
2638 | if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA) || | 2636 | if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) { |
2639 | bio_op(bio) == REQ_OP_DISCARD) { | ||
2640 | thin_defer_bio_with_throttle(tc, bio); | 2637 | thin_defer_bio_with_throttle(tc, bio); |
2641 | return DM_MAPIO_SUBMITTED; | 2638 | return DM_MAPIO_SUBMITTED; |
2642 | } | 2639 | } |
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8a3c3e32a704..138c6fa00cd5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -784,6 +784,13 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, | |||
784 | return nvme_sg_io(ns, (void __user *)arg); | 784 | return nvme_sg_io(ns, (void __user *)arg); |
785 | #endif | 785 | #endif |
786 | default: | 786 | default: |
787 | #ifdef CONFIG_NVM | ||
788 | if (ns->ndev) | ||
789 | return nvme_nvm_ioctl(ns, cmd, arg); | ||
790 | #endif | ||
791 | if (is_sed_ioctl(cmd)) | ||
792 | return sed_ioctl(ns->ctrl->opal_dev, cmd, | ||
793 | (void __user *) arg); | ||
787 | return -ENOTTY; | 794 | return -ENOTTY; |
788 | } | 795 | } |
789 | } | 796 | } |
@@ -1051,6 +1058,28 @@ static const struct pr_ops nvme_pr_ops = { | |||
1051 | .pr_clear = nvme_pr_clear, | 1058 | .pr_clear = nvme_pr_clear, |
1052 | }; | 1059 | }; |
1053 | 1060 | ||
1061 | #ifdef CONFIG_BLK_SED_OPAL | ||
1062 | int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, | ||
1063 | bool send) | ||
1064 | { | ||
1065 | struct nvme_ctrl *ctrl = data; | ||
1066 | struct nvme_command cmd; | ||
1067 | |||
1068 | memset(&cmd, 0, sizeof(cmd)); | ||
1069 | if (send) | ||
1070 | cmd.common.opcode = nvme_admin_security_send; | ||
1071 | else | ||
1072 | cmd.common.opcode = nvme_admin_security_recv; | ||
1073 | cmd.common.nsid = 0; | ||
1074 | cmd.common.cdw10[0] = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); | ||
1075 | cmd.common.cdw10[1] = cpu_to_le32(len); | ||
1076 | |||
1077 | return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, | ||
1078 | ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0); | ||
1079 | } | ||
1080 | EXPORT_SYMBOL_GPL(nvme_sec_submit); | ||
1081 | #endif /* CONFIG_BLK_SED_OPAL */ | ||
1082 | |||
1054 | static const struct block_device_operations nvme_fops = { | 1083 | static const struct block_device_operations nvme_fops = { |
1055 | .owner = THIS_MODULE, | 1084 | .owner = THIS_MODULE, |
1056 | .ioctl = nvme_ioctl, | 1085 | .ioctl = nvme_ioctl, |
@@ -1230,6 +1259,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) | |||
1230 | return -EIO; | 1259 | return -EIO; |
1231 | } | 1260 | } |
1232 | 1261 | ||
1262 | ctrl->oacs = le16_to_cpu(id->oacs); | ||
1233 | ctrl->vid = le16_to_cpu(id->vid); | 1263 | ctrl->vid = le16_to_cpu(id->vid); |
1234 | ctrl->oncs = le16_to_cpup(&id->oncs); | 1264 | ctrl->oncs = le16_to_cpup(&id->oncs); |
1235 | atomic_set(&ctrl->abort_limit, id->acl + 1); | 1265 | atomic_set(&ctrl->abort_limit, id->acl + 1); |
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 588d4a34c083..21cac8523bd8 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
27 | #include <linux/lightnvm.h> | 27 | #include <linux/lightnvm.h> |
28 | #include <linux/vmalloc.h> | 28 | #include <linux/vmalloc.h> |
29 | #include <linux/sched/sysctl.h> | ||
30 | #include <uapi/linux/lightnvm.h> | ||
29 | 31 | ||
30 | enum nvme_nvm_admin_opcode { | 32 | enum nvme_nvm_admin_opcode { |
31 | nvme_nvm_admin_identity = 0xe2, | 33 | nvme_nvm_admin_identity = 0xe2, |
@@ -248,50 +250,48 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) | |||
248 | { | 250 | { |
249 | struct nvme_nvm_id_group *src; | 251 | struct nvme_nvm_id_group *src; |
250 | struct nvm_id_group *dst; | 252 | struct nvm_id_group *dst; |
251 | int i, end; | ||
252 | |||
253 | end = min_t(u32, 4, nvm_id->cgrps); | ||
254 | |||
255 | for (i = 0; i < end; i++) { | ||
256 | src = &nvme_nvm_id->groups[i]; | ||
257 | dst = &nvm_id->groups[i]; | ||
258 | |||
259 | dst->mtype = src->mtype; | ||
260 | dst->fmtype = src->fmtype; | ||
261 | dst->num_ch = src->num_ch; | ||
262 | dst->num_lun = src->num_lun; | ||
263 | dst->num_pln = src->num_pln; | ||
264 | |||
265 | dst->num_pg = le16_to_cpu(src->num_pg); | ||
266 | dst->num_blk = le16_to_cpu(src->num_blk); | ||
267 | dst->fpg_sz = le16_to_cpu(src->fpg_sz); | ||
268 | dst->csecs = le16_to_cpu(src->csecs); | ||
269 | dst->sos = le16_to_cpu(src->sos); | ||
270 | |||
271 | dst->trdt = le32_to_cpu(src->trdt); | ||
272 | dst->trdm = le32_to_cpu(src->trdm); | ||
273 | dst->tprt = le32_to_cpu(src->tprt); | ||
274 | dst->tprm = le32_to_cpu(src->tprm); | ||
275 | dst->tbet = le32_to_cpu(src->tbet); | ||
276 | dst->tbem = le32_to_cpu(src->tbem); | ||
277 | dst->mpos = le32_to_cpu(src->mpos); | ||
278 | dst->mccap = le32_to_cpu(src->mccap); | ||
279 | |||
280 | dst->cpar = le16_to_cpu(src->cpar); | ||
281 | |||
282 | if (dst->fmtype == NVM_ID_FMTYPE_MLC) { | ||
283 | memcpy(dst->lptbl.id, src->lptbl.id, 8); | ||
284 | dst->lptbl.mlc.num_pairs = | ||
285 | le16_to_cpu(src->lptbl.mlc.num_pairs); | ||
286 | |||
287 | if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { | ||
288 | pr_err("nvm: number of MLC pairs not supported\n"); | ||
289 | return -EINVAL; | ||
290 | } | ||
291 | 253 | ||
292 | memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, | 254 | if (nvme_nvm_id->cgrps != 1) |
293 | dst->lptbl.mlc.num_pairs); | 255 | return -EINVAL; |
256 | |||
257 | src = &nvme_nvm_id->groups[0]; | ||
258 | dst = &nvm_id->grp; | ||
259 | |||
260 | dst->mtype = src->mtype; | ||
261 | dst->fmtype = src->fmtype; | ||
262 | dst->num_ch = src->num_ch; | ||
263 | dst->num_lun = src->num_lun; | ||
264 | dst->num_pln = src->num_pln; | ||
265 | |||
266 | dst->num_pg = le16_to_cpu(src->num_pg); | ||
267 | dst->num_blk = le16_to_cpu(src->num_blk); | ||
268 | dst->fpg_sz = le16_to_cpu(src->fpg_sz); | ||
269 | dst->csecs = le16_to_cpu(src->csecs); | ||
270 | dst->sos = le16_to_cpu(src->sos); | ||
271 | |||
272 | dst->trdt = le32_to_cpu(src->trdt); | ||
273 | dst->trdm = le32_to_cpu(src->trdm); | ||
274 | dst->tprt = le32_to_cpu(src->tprt); | ||
275 | dst->tprm = le32_to_cpu(src->tprm); | ||
276 | dst->tbet = le32_to_cpu(src->tbet); | ||
277 | dst->tbem = le32_to_cpu(src->tbem); | ||
278 | dst->mpos = le32_to_cpu(src->mpos); | ||
279 | dst->mccap = le32_to_cpu(src->mccap); | ||
280 | |||
281 | dst->cpar = le16_to_cpu(src->cpar); | ||
282 | |||
283 | if (dst->fmtype == NVM_ID_FMTYPE_MLC) { | ||
284 | memcpy(dst->lptbl.id, src->lptbl.id, 8); | ||
285 | dst->lptbl.mlc.num_pairs = | ||
286 | le16_to_cpu(src->lptbl.mlc.num_pairs); | ||
287 | |||
288 | if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { | ||
289 | pr_err("nvm: number of MLC pairs not supported\n"); | ||
290 | return -EINVAL; | ||
294 | } | 291 | } |
292 | |||
293 | memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, | ||
294 | dst->lptbl.mlc.num_pairs); | ||
295 | } | 295 | } |
296 | 296 | ||
297 | return 0; | 297 | return 0; |
@@ -321,7 +321,6 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) | |||
321 | 321 | ||
322 | nvm_id->ver_id = nvme_nvm_id->ver_id; | 322 | nvm_id->ver_id = nvme_nvm_id->ver_id; |
323 | nvm_id->vmnt = nvme_nvm_id->vmnt; | 323 | nvm_id->vmnt = nvme_nvm_id->vmnt; |
324 | nvm_id->cgrps = nvme_nvm_id->cgrps; | ||
325 | nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); | 324 | nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); |
326 | nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); | 325 | nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); |
327 | memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, | 326 | memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, |
@@ -372,7 +371,7 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, | |||
372 | } | 371 | } |
373 | 372 | ||
374 | /* Transform physical address to target address space */ | 373 | /* Transform physical address to target address space */ |
375 | nvmdev->mt->part_to_tgt(nvmdev, entries, cmd_nlb); | 374 | nvm_part_to_tgt(nvmdev, entries, cmd_nlb); |
376 | 375 | ||
377 | if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { | 376 | if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { |
378 | ret = -EINTR; | 377 | ret = -EINTR; |
@@ -485,7 +484,8 @@ static void nvme_nvm_end_io(struct request *rq, int error) | |||
485 | struct nvm_rq *rqd = rq->end_io_data; | 484 | struct nvm_rq *rqd = rq->end_io_data; |
486 | 485 | ||
487 | rqd->ppa_status = nvme_req(rq)->result.u64; | 486 | rqd->ppa_status = nvme_req(rq)->result.u64; |
488 | nvm_end_io(rqd, error); | 487 | rqd->error = error; |
488 | nvm_end_io(rqd); | ||
489 | 489 | ||
490 | kfree(nvme_req(rq)->cmd); | 490 | kfree(nvme_req(rq)->cmd); |
491 | blk_mq_free_request(rq); | 491 | blk_mq_free_request(rq); |
@@ -586,6 +586,224 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { | |||
586 | .max_phys_sect = 64, | 586 | .max_phys_sect = 64, |
587 | }; | 587 | }; |
588 | 588 | ||
589 | static void nvme_nvm_end_user_vio(struct request *rq, int error) | ||
590 | { | ||
591 | struct completion *waiting = rq->end_io_data; | ||
592 | |||
593 | complete(waiting); | ||
594 | } | ||
595 | |||
596 | static int nvme_nvm_submit_user_cmd(struct request_queue *q, | ||
597 | struct nvme_ns *ns, | ||
598 | struct nvme_nvm_command *vcmd, | ||
599 | void __user *ubuf, unsigned int bufflen, | ||
600 | void __user *meta_buf, unsigned int meta_len, | ||
601 | void __user *ppa_buf, unsigned int ppa_len, | ||
602 | u32 *result, u64 *status, unsigned int timeout) | ||
603 | { | ||
604 | bool write = nvme_is_write((struct nvme_command *)vcmd); | ||
605 | struct nvm_dev *dev = ns->ndev; | ||
606 | struct gendisk *disk = ns->disk; | ||
607 | struct request *rq; | ||
608 | struct bio *bio = NULL; | ||
609 | __le64 *ppa_list = NULL; | ||
610 | dma_addr_t ppa_dma; | ||
611 | __le64 *metadata = NULL; | ||
612 | dma_addr_t metadata_dma; | ||
613 | DECLARE_COMPLETION_ONSTACK(wait); | ||
614 | int ret; | ||
615 | |||
616 | rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, | ||
617 | NVME_QID_ANY); | ||
618 | if (IS_ERR(rq)) { | ||
619 | ret = -ENOMEM; | ||
620 | goto err_cmd; | ||
621 | } | ||
622 | |||
623 | rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; | ||
624 | |||
625 | rq->cmd_flags &= ~REQ_FAILFAST_DRIVER; | ||
626 | rq->end_io_data = &wait; | ||
627 | |||
628 | if (ppa_buf && ppa_len) { | ||
629 | ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); | ||
630 | if (!ppa_list) { | ||
631 | ret = -ENOMEM; | ||
632 | goto err_rq; | ||
633 | } | ||
634 | if (copy_from_user(ppa_list, (void __user *)ppa_buf, | ||
635 | sizeof(u64) * (ppa_len + 1))) { | ||
636 | ret = -EFAULT; | ||
637 | goto err_ppa; | ||
638 | } | ||
639 | vcmd->ph_rw.spba = cpu_to_le64(ppa_dma); | ||
640 | } else { | ||
641 | vcmd->ph_rw.spba = cpu_to_le64((uintptr_t)ppa_buf); | ||
642 | } | ||
643 | |||
644 | if (ubuf && bufflen) { | ||
645 | ret = blk_rq_map_user(q, rq, NULL, ubuf, bufflen, GFP_KERNEL); | ||
646 | if (ret) | ||
647 | goto err_ppa; | ||
648 | bio = rq->bio; | ||
649 | |||
650 | if (meta_buf && meta_len) { | ||
651 | metadata = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, | ||
652 | &metadata_dma); | ||
653 | if (!metadata) { | ||
654 | ret = -ENOMEM; | ||
655 | goto err_map; | ||
656 | } | ||
657 | |||
658 | if (write) { | ||
659 | if (copy_from_user(metadata, | ||
660 | (void __user *)meta_buf, | ||
661 | meta_len)) { | ||
662 | ret = -EFAULT; | ||
663 | goto err_meta; | ||
664 | } | ||
665 | } | ||
666 | vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); | ||
667 | } | ||
668 | |||
669 | if (!disk) | ||
670 | goto submit; | ||
671 | |||
672 | bio->bi_bdev = bdget_disk(disk, 0); | ||
673 | if (!bio->bi_bdev) { | ||
674 | ret = -ENODEV; | ||
675 | goto err_meta; | ||
676 | } | ||
677 | } | ||
678 | |||
679 | submit: | ||
680 | blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_user_vio); | ||
681 | |||
682 | wait_for_completion_io(&wait); | ||
683 | |||
684 | ret = nvme_error_status(rq->errors); | ||
685 | if (result) | ||
686 | *result = rq->errors & 0x7ff; | ||
687 | if (status) | ||
688 | *status = le64_to_cpu(nvme_req(rq)->result.u64); | ||
689 | |||
690 | if (metadata && !ret && !write) { | ||
691 | if (copy_to_user(meta_buf, (void *)metadata, meta_len)) | ||
692 | ret = -EFAULT; | ||
693 | } | ||
694 | err_meta: | ||
695 | if (meta_buf && meta_len) | ||
696 | dma_pool_free(dev->dma_pool, metadata, metadata_dma); | ||
697 | err_map: | ||
698 | if (bio) { | ||
699 | if (disk && bio->bi_bdev) | ||
700 | bdput(bio->bi_bdev); | ||
701 | blk_rq_unmap_user(bio); | ||
702 | } | ||
703 | err_ppa: | ||
704 | if (ppa_buf && ppa_len) | ||
705 | dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); | ||
706 | err_rq: | ||
707 | blk_mq_free_request(rq); | ||
708 | err_cmd: | ||
709 | return ret; | ||
710 | } | ||
711 | |||
712 | static int nvme_nvm_submit_vio(struct nvme_ns *ns, | ||
713 | struct nvm_user_vio __user *uvio) | ||
714 | { | ||
715 | struct nvm_user_vio vio; | ||
716 | struct nvme_nvm_command c; | ||
717 | unsigned int length; | ||
718 | int ret; | ||
719 | |||
720 | if (copy_from_user(&vio, uvio, sizeof(vio))) | ||
721 | return -EFAULT; | ||
722 | if (vio.flags) | ||
723 | return -EINVAL; | ||
724 | |||
725 | memset(&c, 0, sizeof(c)); | ||
726 | c.ph_rw.opcode = vio.opcode; | ||
727 | c.ph_rw.nsid = cpu_to_le32(ns->ns_id); | ||
728 | c.ph_rw.control = cpu_to_le16(vio.control); | ||
729 | c.ph_rw.length = cpu_to_le16(vio.nppas); | ||
730 | |||
731 | length = (vio.nppas + 1) << ns->lba_shift; | ||
732 | |||
733 | ret = nvme_nvm_submit_user_cmd(ns->queue, ns, &c, | ||
734 | (void __user *)(uintptr_t)vio.addr, length, | ||
735 | (void __user *)(uintptr_t)vio.metadata, | ||
736 | vio.metadata_len, | ||
737 | (void __user *)(uintptr_t)vio.ppa_list, vio.nppas, | ||
738 | &vio.result, &vio.status, 0); | ||
739 | |||
740 | if (ret && copy_to_user(uvio, &vio, sizeof(vio))) | ||
741 | return -EFAULT; | ||
742 | |||
743 | return ret; | ||
744 | } | ||
745 | |||
746 | static int nvme_nvm_user_vcmd(struct nvme_ns *ns, int admin, | ||
747 | struct nvm_passthru_vio __user *uvcmd) | ||
748 | { | ||
749 | struct nvm_passthru_vio vcmd; | ||
750 | struct nvme_nvm_command c; | ||
751 | struct request_queue *q; | ||
752 | unsigned int timeout = 0; | ||
753 | int ret; | ||
754 | |||
755 | if (copy_from_user(&vcmd, uvcmd, sizeof(vcmd))) | ||
756 | return -EFAULT; | ||
757 | if ((vcmd.opcode != 0xF2) && (!capable(CAP_SYS_ADMIN))) | ||
758 | return -EACCES; | ||
759 | if (vcmd.flags) | ||
760 | return -EINVAL; | ||
761 | |||
762 | memset(&c, 0, sizeof(c)); | ||
763 | c.common.opcode = vcmd.opcode; | ||
764 | c.common.nsid = cpu_to_le32(ns->ns_id); | ||
765 | c.common.cdw2[0] = cpu_to_le32(vcmd.cdw2); | ||
766 | c.common.cdw2[1] = cpu_to_le32(vcmd.cdw3); | ||
767 | /* cdw11-12 */ | ||
768 | c.ph_rw.length = cpu_to_le16(vcmd.nppas); | ||
769 | c.ph_rw.control = cpu_to_le32(vcmd.control); | ||
770 | c.common.cdw10[3] = cpu_to_le32(vcmd.cdw13); | ||
771 | c.common.cdw10[4] = cpu_to_le32(vcmd.cdw14); | ||
772 | c.common.cdw10[5] = cpu_to_le32(vcmd.cdw15); | ||
773 | |||
774 | if (vcmd.timeout_ms) | ||
775 | timeout = msecs_to_jiffies(vcmd.timeout_ms); | ||
776 | |||
777 | q = admin ? ns->ctrl->admin_q : ns->queue; | ||
778 | |||
779 | ret = nvme_nvm_submit_user_cmd(q, ns, | ||
780 | (struct nvme_nvm_command *)&c, | ||
781 | (void __user *)(uintptr_t)vcmd.addr, vcmd.data_len, | ||
782 | (void __user *)(uintptr_t)vcmd.metadata, | ||
783 | vcmd.metadata_len, | ||
784 | (void __user *)(uintptr_t)vcmd.ppa_list, vcmd.nppas, | ||
785 | &vcmd.result, &vcmd.status, timeout); | ||
786 | |||
787 | if (ret && copy_to_user(uvcmd, &vcmd, sizeof(vcmd))) | ||
788 | return -EFAULT; | ||
789 | |||
790 | return ret; | ||
791 | } | ||
792 | |||
793 | int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) | ||
794 | { | ||
795 | switch (cmd) { | ||
796 | case NVME_NVM_IOCTL_ADMIN_VIO: | ||
797 | return nvme_nvm_user_vcmd(ns, 1, (void __user *)arg); | ||
798 | case NVME_NVM_IOCTL_IO_VIO: | ||
799 | return nvme_nvm_user_vcmd(ns, 0, (void __user *)arg); | ||
800 | case NVME_NVM_IOCTL_SUBMIT_VIO: | ||
801 | return nvme_nvm_submit_vio(ns, (void __user *)arg); | ||
802 | default: | ||
803 | return -ENOTTY; | ||
804 | } | ||
805 | } | ||
806 | |||
589 | int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) | 807 | int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) |
590 | { | 808 | { |
591 | struct request_queue *q = ns->queue; | 809 | struct request_queue *q = ns->queue; |
@@ -622,7 +840,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev, | |||
622 | return 0; | 840 | return 0; |
623 | 841 | ||
624 | id = &ndev->identity; | 842 | id = &ndev->identity; |
625 | grp = &id->groups[0]; | 843 | grp = &id->grp; |
626 | attr = &dattr->attr; | 844 | attr = &dattr->attr; |
627 | 845 | ||
628 | if (strcmp(attr->name, "version") == 0) { | 846 | if (strcmp(attr->name, "version") == 0) { |
@@ -633,10 +851,9 @@ static ssize_t nvm_dev_attr_show(struct device *dev, | |||
633 | return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); | 851 | return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); |
634 | } else if (strcmp(attr->name, "device_mode") == 0) { | 852 | } else if (strcmp(attr->name, "device_mode") == 0) { |
635 | return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); | 853 | return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); |
854 | /* kept for compatibility */ | ||
636 | } else if (strcmp(attr->name, "media_manager") == 0) { | 855 | } else if (strcmp(attr->name, "media_manager") == 0) { |
637 | if (!ndev->mt) | 856 | return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); |
638 | return scnprintf(page, PAGE_SIZE, "%s\n", "none"); | ||
639 | return scnprintf(page, PAGE_SIZE, "%s\n", ndev->mt->name); | ||
640 | } else if (strcmp(attr->name, "ppa_format") == 0) { | 857 | } else if (strcmp(attr->name, "ppa_format") == 0) { |
641 | return scnprintf(page, PAGE_SIZE, | 858 | return scnprintf(page, PAGE_SIZE, |
642 | "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", | 859 | "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index aead6d08ed2c..14cfc6f7facb 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kref.h> | 19 | #include <linux/kref.h> |
20 | #include <linux/blk-mq.h> | 20 | #include <linux/blk-mq.h> |
21 | #include <linux/lightnvm.h> | 21 | #include <linux/lightnvm.h> |
22 | #include <linux/sed-opal.h> | ||
22 | 23 | ||
23 | enum { | 24 | enum { |
24 | /* | 25 | /* |
@@ -125,6 +126,8 @@ struct nvme_ctrl { | |||
125 | struct list_head node; | 126 | struct list_head node; |
126 | struct ida ns_ida; | 127 | struct ida ns_ida; |
127 | 128 | ||
129 | struct opal_dev *opal_dev; | ||
130 | |||
128 | char name[12]; | 131 | char name[12]; |
129 | char serial[20]; | 132 | char serial[20]; |
130 | char model[40]; | 133 | char model[40]; |
@@ -137,6 +140,7 @@ struct nvme_ctrl { | |||
137 | u32 max_hw_sectors; | 140 | u32 max_hw_sectors; |
138 | u16 oncs; | 141 | u16 oncs; |
139 | u16 vid; | 142 | u16 vid; |
143 | u16 oacs; | ||
140 | atomic_t abort_limit; | 144 | atomic_t abort_limit; |
141 | u8 event_limit; | 145 | u8 event_limit; |
142 | u8 vwc; | 146 | u8 vwc; |
@@ -267,6 +271,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl); | |||
267 | void nvme_queue_scan(struct nvme_ctrl *ctrl); | 271 | void nvme_queue_scan(struct nvme_ctrl *ctrl); |
268 | void nvme_remove_namespaces(struct nvme_ctrl *ctrl); | 272 | void nvme_remove_namespaces(struct nvme_ctrl *ctrl); |
269 | 273 | ||
274 | int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, | ||
275 | bool send); | ||
276 | |||
270 | #define NVME_NR_AERS 1 | 277 | #define NVME_NR_AERS 1 |
271 | void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, | 278 | void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, |
272 | union nvme_result *res); | 279 | union nvme_result *res); |
@@ -318,6 +325,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); | |||
318 | void nvme_nvm_unregister(struct nvme_ns *ns); | 325 | void nvme_nvm_unregister(struct nvme_ns *ns); |
319 | int nvme_nvm_register_sysfs(struct nvme_ns *ns); | 326 | int nvme_nvm_register_sysfs(struct nvme_ns *ns); |
320 | void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); | 327 | void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); |
328 | int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); | ||
321 | #else | 329 | #else |
322 | static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, | 330 | static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, |
323 | int node) | 331 | int node) |
@@ -335,6 +343,11 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i | |||
335 | { | 343 | { |
336 | return 0; | 344 | return 0; |
337 | } | 345 | } |
346 | static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, | ||
347 | unsigned long arg) | ||
348 | { | ||
349 | return -ENOTTY; | ||
350 | } | ||
338 | #endif /* CONFIG_NVM */ | 351 | #endif /* CONFIG_NVM */ |
339 | 352 | ||
340 | static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) | 353 | static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3faefabf339c..d67d0d0a3bc0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <linux/types.h> | 43 | #include <linux/types.h> |
44 | #include <linux/io-64-nonatomic-lo-hi.h> | 44 | #include <linux/io-64-nonatomic-lo-hi.h> |
45 | #include <asm/unaligned.h> | 45 | #include <asm/unaligned.h> |
46 | #include <linux/sed-opal.h> | ||
46 | 47 | ||
47 | #include "nvme.h" | 48 | #include "nvme.h" |
48 | 49 | ||
@@ -895,12 +896,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | |||
895 | return BLK_EH_HANDLED; | 896 | return BLK_EH_HANDLED; |
896 | } | 897 | } |
897 | 898 | ||
898 | iod->aborted = 1; | ||
899 | |||
900 | if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { | 899 | if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { |
901 | atomic_inc(&dev->ctrl.abort_limit); | 900 | atomic_inc(&dev->ctrl.abort_limit); |
902 | return BLK_EH_RESET_TIMER; | 901 | return BLK_EH_RESET_TIMER; |
903 | } | 902 | } |
903 | iod->aborted = 1; | ||
904 | 904 | ||
905 | memset(&cmd, 0, sizeof(cmd)); | 905 | memset(&cmd, 0, sizeof(cmd)); |
906 | cmd.abort.opcode = nvme_admin_abort_cmd; | 906 | cmd.abort.opcode = nvme_admin_abort_cmd; |
@@ -1178,6 +1178,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) | |||
1178 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; | 1178 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; |
1179 | dev->admin_tagset.numa_node = dev_to_node(dev->dev); | 1179 | dev->admin_tagset.numa_node = dev_to_node(dev->dev); |
1180 | dev->admin_tagset.cmd_size = nvme_cmd_size(dev); | 1180 | dev->admin_tagset.cmd_size = nvme_cmd_size(dev); |
1181 | dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; | ||
1181 | dev->admin_tagset.driver_data = dev; | 1182 | dev->admin_tagset.driver_data = dev; |
1182 | 1183 | ||
1183 | if (blk_mq_alloc_tag_set(&dev->admin_tagset)) | 1184 | if (blk_mq_alloc_tag_set(&dev->admin_tagset)) |
@@ -1738,6 +1739,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) | |||
1738 | if (dev->ctrl.admin_q) | 1739 | if (dev->ctrl.admin_q) |
1739 | blk_put_queue(dev->ctrl.admin_q); | 1740 | blk_put_queue(dev->ctrl.admin_q); |
1740 | kfree(dev->queues); | 1741 | kfree(dev->queues); |
1742 | kfree(dev->ctrl.opal_dev); | ||
1741 | kfree(dev); | 1743 | kfree(dev); |
1742 | } | 1744 | } |
1743 | 1745 | ||
@@ -1754,6 +1756,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) | |||
1754 | static void nvme_reset_work(struct work_struct *work) | 1756 | static void nvme_reset_work(struct work_struct *work) |
1755 | { | 1757 | { |
1756 | struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); | 1758 | struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); |
1759 | bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); | ||
1757 | int result = -ENODEV; | 1760 | int result = -ENODEV; |
1758 | 1761 | ||
1759 | if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) | 1762 | if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) |
@@ -1786,6 +1789,14 @@ static void nvme_reset_work(struct work_struct *work) | |||
1786 | if (result) | 1789 | if (result) |
1787 | goto out; | 1790 | goto out; |
1788 | 1791 | ||
1792 | if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) { | ||
1793 | dev->ctrl.opal_dev = | ||
1794 | init_opal_dev(&dev->ctrl, &nvme_sec_submit); | ||
1795 | } | ||
1796 | |||
1797 | if (was_suspend) | ||
1798 | opal_unlock_from_suspend(dev->ctrl.opal_dev); | ||
1799 | |||
1789 | result = nvme_setup_io_queues(dev); | 1800 | result = nvme_setup_io_queues(dev); |
1790 | if (result) | 1801 | if (result) |
1791 | goto out; | 1802 | goto out; |
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94352e4df831..013bfe049a48 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c | |||
@@ -117,7 +117,7 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi, | |||
117 | unsigned int clearing, int slot); | 117 | unsigned int clearing, int slot); |
118 | static int sr_packet(struct cdrom_device_info *, struct packet_command *); | 118 | static int sr_packet(struct cdrom_device_info *, struct packet_command *); |
119 | 119 | ||
120 | static struct cdrom_device_ops sr_dops = { | 120 | static const struct cdrom_device_ops sr_dops = { |
121 | .open = sr_open, | 121 | .open = sr_open, |
122 | .release = sr_release, | 122 | .release = sr_release, |
123 | .drive_status = sr_drive_status, | 123 | .drive_status = sr_drive_status, |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4a2ab5d99ff7..8e4df3d6c8cd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx { | |||
22 | 22 | ||
23 | unsigned long flags; /* BLK_MQ_F_* flags */ | 23 | unsigned long flags; /* BLK_MQ_F_* flags */ |
24 | 24 | ||
25 | void *sched_data; | ||
25 | struct request_queue *queue; | 26 | struct request_queue *queue; |
26 | struct blk_flush_queue *fq; | 27 | struct blk_flush_queue *fq; |
27 | 28 | ||
@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx { | |||
35 | atomic_t wait_index; | 36 | atomic_t wait_index; |
36 | 37 | ||
37 | struct blk_mq_tags *tags; | 38 | struct blk_mq_tags *tags; |
39 | struct blk_mq_tags *sched_tags; | ||
38 | 40 | ||
39 | struct srcu_struct queue_rq_srcu; | 41 | struct srcu_struct queue_rq_srcu; |
40 | 42 | ||
@@ -60,7 +62,7 @@ struct blk_mq_hw_ctx { | |||
60 | 62 | ||
61 | struct blk_mq_tag_set { | 63 | struct blk_mq_tag_set { |
62 | unsigned int *mq_map; | 64 | unsigned int *mq_map; |
63 | struct blk_mq_ops *ops; | 65 | const struct blk_mq_ops *ops; |
64 | unsigned int nr_hw_queues; | 66 | unsigned int nr_hw_queues; |
65 | unsigned int queue_depth; /* max hw supported */ | 67 | unsigned int queue_depth; /* max hw supported */ |
66 | unsigned int reserved_tags; | 68 | unsigned int reserved_tags; |
@@ -151,11 +153,13 @@ enum { | |||
151 | BLK_MQ_F_SG_MERGE = 1 << 2, | 153 | BLK_MQ_F_SG_MERGE = 1 << 2, |
152 | BLK_MQ_F_DEFER_ISSUE = 1 << 4, | 154 | BLK_MQ_F_DEFER_ISSUE = 1 << 4, |
153 | BLK_MQ_F_BLOCKING = 1 << 5, | 155 | BLK_MQ_F_BLOCKING = 1 << 5, |
156 | BLK_MQ_F_NO_SCHED = 1 << 6, | ||
154 | BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, | 157 | BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, |
155 | BLK_MQ_F_ALLOC_POLICY_BITS = 1, | 158 | BLK_MQ_F_ALLOC_POLICY_BITS = 1, |
156 | 159 | ||
157 | BLK_MQ_S_STOPPED = 0, | 160 | BLK_MQ_S_STOPPED = 0, |
158 | BLK_MQ_S_TAG_ACTIVE = 1, | 161 | BLK_MQ_S_TAG_ACTIVE = 1, |
162 | BLK_MQ_S_SCHED_RESTART = 2, | ||
159 | 163 | ||
160 | BLK_MQ_MAX_DEPTH = 10240, | 164 | BLK_MQ_MAX_DEPTH = 10240, |
161 | 165 | ||
@@ -179,14 +183,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); | |||
179 | 183 | ||
180 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); | 184 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
181 | 185 | ||
182 | void blk_mq_insert_request(struct request *, bool, bool, bool); | ||
183 | void blk_mq_free_request(struct request *rq); | 186 | void blk_mq_free_request(struct request *rq); |
184 | void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); | ||
185 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | 187 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); |
186 | 188 | ||
187 | enum { | 189 | enum { |
188 | BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ | 190 | BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ |
189 | BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ | 191 | BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ |
192 | BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ | ||
190 | }; | 193 | }; |
191 | 194 | ||
192 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, | 195 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 519ea2c9df61..37c9a43c5e78 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -221,6 +221,15 @@ static inline bool op_is_write(unsigned int op) | |||
221 | } | 221 | } |
222 | 222 | ||
223 | /* | 223 | /* |
224 | * Check if the bio or request is one that needs special treatment in the | ||
225 | * flush state machine. | ||
226 | */ | ||
227 | static inline bool op_is_flush(unsigned int op) | ||
228 | { | ||
229 | return op & (REQ_FUA | REQ_PREFLUSH); | ||
230 | } | ||
231 | |||
232 | /* | ||
224 | * Reads are always treated as synchronous, as are requests with the FUA or | 233 | * Reads are always treated as synchronous, as are requests with the FUA or |
225 | * PREFLUSH flag. Other operations may be marked as synchronous using the | 234 | * PREFLUSH flag. Other operations may be marked as synchronous using the |
226 | * REQ_SYNC flag. | 235 | * REQ_SYNC flag. |
@@ -232,22 +241,29 @@ static inline bool op_is_sync(unsigned int op) | |||
232 | } | 241 | } |
233 | 242 | ||
234 | typedef unsigned int blk_qc_t; | 243 | typedef unsigned int blk_qc_t; |
235 | #define BLK_QC_T_NONE -1U | 244 | #define BLK_QC_T_NONE -1U |
236 | #define BLK_QC_T_SHIFT 16 | 245 | #define BLK_QC_T_SHIFT 16 |
246 | #define BLK_QC_T_INTERNAL (1U << 31) | ||
237 | 247 | ||
238 | static inline bool blk_qc_t_valid(blk_qc_t cookie) | 248 | static inline bool blk_qc_t_valid(blk_qc_t cookie) |
239 | { | 249 | { |
240 | return cookie != BLK_QC_T_NONE; | 250 | return cookie != BLK_QC_T_NONE; |
241 | } | 251 | } |
242 | 252 | ||
243 | static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) | 253 | static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, |
254 | bool internal) | ||
244 | { | 255 | { |
245 | return tag | (queue_num << BLK_QC_T_SHIFT); | 256 | blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); |
257 | |||
258 | if (internal) | ||
259 | ret |= BLK_QC_T_INTERNAL; | ||
260 | |||
261 | return ret; | ||
246 | } | 262 | } |
247 | 263 | ||
248 | static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) | 264 | static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) |
249 | { | 265 | { |
250 | return cookie >> BLK_QC_T_SHIFT; | 266 | return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; |
251 | } | 267 | } |
252 | 268 | ||
253 | static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) | 269 | static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) |
@@ -255,6 +271,11 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) | |||
255 | return cookie & ((1u << BLK_QC_T_SHIFT) - 1); | 271 | return cookie & ((1u << BLK_QC_T_SHIFT) - 1); |
256 | } | 272 | } |
257 | 273 | ||
274 | static inline bool blk_qc_t_is_internal(blk_qc_t cookie) | ||
275 | { | ||
276 | return (cookie & BLK_QC_T_INTERNAL) != 0; | ||
277 | } | ||
278 | |||
258 | struct blk_issue_stat { | 279 | struct blk_issue_stat { |
259 | u64 time; | 280 | u64 time; |
260 | }; | 281 | }; |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1ca8e8fd1078..05675b1dfd20 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -154,6 +154,7 @@ struct request { | |||
154 | 154 | ||
155 | /* the following two fields are internal, NEVER access directly */ | 155 | /* the following two fields are internal, NEVER access directly */ |
156 | unsigned int __data_len; /* total data len */ | 156 | unsigned int __data_len; /* total data len */ |
157 | int tag; | ||
157 | sector_t __sector; /* sector cursor */ | 158 | sector_t __sector; /* sector cursor */ |
158 | 159 | ||
159 | struct bio *bio; | 160 | struct bio *bio; |
@@ -220,9 +221,10 @@ struct request { | |||
220 | 221 | ||
221 | unsigned short ioprio; | 222 | unsigned short ioprio; |
222 | 223 | ||
224 | int internal_tag; | ||
225 | |||
223 | void *special; /* opaque pointer available for LLD use */ | 226 | void *special; /* opaque pointer available for LLD use */ |
224 | 227 | ||
225 | int tag; | ||
226 | int errors; | 228 | int errors; |
227 | 229 | ||
228 | /* | 230 | /* |
@@ -407,7 +409,7 @@ struct request_queue { | |||
407 | dma_drain_needed_fn *dma_drain_needed; | 409 | dma_drain_needed_fn *dma_drain_needed; |
408 | lld_busy_fn *lld_busy_fn; | 410 | lld_busy_fn *lld_busy_fn; |
409 | 411 | ||
410 | struct blk_mq_ops *mq_ops; | 412 | const struct blk_mq_ops *mq_ops; |
411 | 413 | ||
412 | unsigned int *mq_map; | 414 | unsigned int *mq_map; |
413 | 415 | ||
@@ -569,6 +571,11 @@ struct request_queue { | |||
569 | struct list_head tag_set_list; | 571 | struct list_head tag_set_list; |
570 | struct bio_set *bio_split; | 572 | struct bio_set *bio_split; |
571 | 573 | ||
574 | #ifdef CONFIG_DEBUG_FS | ||
575 | struct dentry *debugfs_dir; | ||
576 | struct dentry *mq_debugfs_dir; | ||
577 | #endif | ||
578 | |||
572 | bool mq_sysfs_init_done; | 579 | bool mq_sysfs_init_done; |
573 | }; | 580 | }; |
574 | 581 | ||
@@ -600,6 +607,7 @@ struct request_queue { | |||
600 | #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ | 607 | #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ |
601 | #define QUEUE_FLAG_DAX 26 /* device supports DAX */ | 608 | #define QUEUE_FLAG_DAX 26 /* device supports DAX */ |
602 | #define QUEUE_FLAG_STATS 27 /* track rq completion times */ | 609 | #define QUEUE_FLAG_STATS 27 /* track rq completion times */ |
610 | #define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ | ||
603 | 611 | ||
604 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 612 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
605 | (1 << QUEUE_FLAG_STACKABLE) | \ | 613 | (1 << QUEUE_FLAG_STACKABLE) | \ |
@@ -1620,6 +1628,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, | |||
1620 | return __bvec_gap_to_prev(q, bprv, offset); | 1628 | return __bvec_gap_to_prev(q, bprv, offset); |
1621 | } | 1629 | } |
1622 | 1630 | ||
1631 | /* | ||
1632 | * Check if the two bvecs from two bios can be merged to one segment. | ||
1633 | * If yes, no need to check gap between the two bios since the 1st bio | ||
1634 | * and the 1st bvec in the 2nd bio can be handled in one segment. | ||
1635 | */ | ||
1636 | static inline bool bios_segs_mergeable(struct request_queue *q, | ||
1637 | struct bio *prev, struct bio_vec *prev_last_bv, | ||
1638 | struct bio_vec *next_first_bv) | ||
1639 | { | ||
1640 | if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) | ||
1641 | return false; | ||
1642 | if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) | ||
1643 | return false; | ||
1644 | if (prev->bi_seg_back_size + next_first_bv->bv_len > | ||
1645 | queue_max_segment_size(q)) | ||
1646 | return false; | ||
1647 | return true; | ||
1648 | } | ||
1649 | |||
1623 | static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, | 1650 | static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, |
1624 | struct bio *next) | 1651 | struct bio *next) |
1625 | { | 1652 | { |
@@ -1629,7 +1656,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, | |||
1629 | bio_get_last_bvec(prev, &pb); | 1656 | bio_get_last_bvec(prev, &pb); |
1630 | bio_get_first_bvec(next, &nb); | 1657 | bio_get_first_bvec(next, &nb); |
1631 | 1658 | ||
1632 | return __bvec_gap_to_prev(q, &pb, nb.bv_offset); | 1659 | if (!bios_segs_mergeable(q, prev, &pb, &nb)) |
1660 | return __bvec_gap_to_prev(q, &pb, nb.bv_offset); | ||
1633 | } | 1661 | } |
1634 | 1662 | ||
1635 | return false; | 1663 | return false; |
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h index 8609d577bb66..6e8f209a6dff 100644 --- a/include/linux/cdrom.h +++ b/include/linux/cdrom.h | |||
@@ -36,7 +36,7 @@ struct packet_command | |||
36 | 36 | ||
37 | /* Uniform cdrom data structures for cdrom.c */ | 37 | /* Uniform cdrom data structures for cdrom.c */ |
38 | struct cdrom_device_info { | 38 | struct cdrom_device_info { |
39 | struct cdrom_device_ops *ops; /* link to device_ops */ | 39 | const struct cdrom_device_ops *ops; /* link to device_ops */ |
40 | struct list_head list; /* linked list of all device_info */ | 40 | struct list_head list; /* linked list of all device_info */ |
41 | struct gendisk *disk; /* matching block layer disk */ | 41 | struct gendisk *disk; /* matching block layer disk */ |
42 | void *handle; /* driver-dependent data */ | 42 | void *handle; /* driver-dependent data */ |
@@ -87,7 +87,6 @@ struct cdrom_device_ops { | |||
87 | 87 | ||
88 | /* driver specifications */ | 88 | /* driver specifications */ |
89 | const int capability; /* capability flags */ | 89 | const int capability; /* capability flags */ |
90 | int n_minors; /* number of active minor devices */ | ||
91 | /* handle uniform packets for scsi type devices (scsi,atapi) */ | 90 | /* handle uniform packets for scsi type devices (scsi,atapi) */ |
92 | int (*generic_packet) (struct cdrom_device_info *, | 91 | int (*generic_packet) (struct cdrom_device_info *, |
93 | struct packet_command *); | 92 | struct packet_command *); |
@@ -123,6 +122,8 @@ extern int cdrom_mode_sense(struct cdrom_device_info *cdi, | |||
123 | int page_code, int page_control); | 122 | int page_code, int page_control); |
124 | extern void init_cdrom_command(struct packet_command *cgc, | 123 | extern void init_cdrom_command(struct packet_command *cgc, |
125 | void *buffer, int len, int type); | 124 | void *buffer, int len, int type); |
125 | extern int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, | ||
126 | struct packet_command *cgc); | ||
126 | 127 | ||
127 | /* The SCSI spec says there could be 256 slots. */ | 128 | /* The SCSI spec says there could be 256 slots. */ |
128 | #define CDROM_MAX_SLOTS 256 | 129 | #define CDROM_MAX_SLOTS 256 |
diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b276e9ef0e0b..b5825c4f06f7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h | |||
@@ -77,6 +77,34 @@ struct elevator_ops | |||
77 | elevator_registered_fn *elevator_registered_fn; | 77 | elevator_registered_fn *elevator_registered_fn; |
78 | }; | 78 | }; |
79 | 79 | ||
80 | struct blk_mq_alloc_data; | ||
81 | struct blk_mq_hw_ctx; | ||
82 | |||
83 | struct elevator_mq_ops { | ||
84 | int (*init_sched)(struct request_queue *, struct elevator_type *); | ||
85 | void (*exit_sched)(struct elevator_queue *); | ||
86 | |||
87 | bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); | ||
88 | bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); | ||
89 | int (*request_merge)(struct request_queue *q, struct request **, struct bio *); | ||
90 | void (*request_merged)(struct request_queue *, struct request *, int); | ||
91 | void (*requests_merged)(struct request_queue *, struct request *, struct request *); | ||
92 | struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *); | ||
93 | void (*put_request)(struct request *); | ||
94 | void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); | ||
95 | struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); | ||
96 | bool (*has_work)(struct blk_mq_hw_ctx *); | ||
97 | void (*completed_request)(struct blk_mq_hw_ctx *, struct request *); | ||
98 | void (*started_request)(struct request *); | ||
99 | void (*requeue_request)(struct request *); | ||
100 | struct request *(*former_request)(struct request_queue *, struct request *); | ||
101 | struct request *(*next_request)(struct request_queue *, struct request *); | ||
102 | int (*get_rq_priv)(struct request_queue *, struct request *); | ||
103 | void (*put_rq_priv)(struct request_queue *, struct request *); | ||
104 | void (*init_icq)(struct io_cq *); | ||
105 | void (*exit_icq)(struct io_cq *); | ||
106 | }; | ||
107 | |||
80 | #define ELV_NAME_MAX (16) | 108 | #define ELV_NAME_MAX (16) |
81 | 109 | ||
82 | struct elv_fs_entry { | 110 | struct elv_fs_entry { |
@@ -94,12 +122,16 @@ struct elevator_type | |||
94 | struct kmem_cache *icq_cache; | 122 | struct kmem_cache *icq_cache; |
95 | 123 | ||
96 | /* fields provided by elevator implementation */ | 124 | /* fields provided by elevator implementation */ |
97 | struct elevator_ops ops; | 125 | union { |
126 | struct elevator_ops sq; | ||
127 | struct elevator_mq_ops mq; | ||
128 | } ops; | ||
98 | size_t icq_size; /* see iocontext.h */ | 129 | size_t icq_size; /* see iocontext.h */ |
99 | size_t icq_align; /* ditto */ | 130 | size_t icq_align; /* ditto */ |
100 | struct elv_fs_entry *elevator_attrs; | 131 | struct elv_fs_entry *elevator_attrs; |
101 | char elevator_name[ELV_NAME_MAX]; | 132 | char elevator_name[ELV_NAME_MAX]; |
102 | struct module *elevator_owner; | 133 | struct module *elevator_owner; |
134 | bool uses_mq; | ||
103 | 135 | ||
104 | /* managed by elevator core */ | 136 | /* managed by elevator core */ |
105 | char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ | 137 | char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ |
@@ -123,6 +155,7 @@ struct elevator_queue | |||
123 | struct kobject kobj; | 155 | struct kobject kobj; |
124 | struct mutex sysfs_lock; | 156 | struct mutex sysfs_lock; |
125 | unsigned int registered:1; | 157 | unsigned int registered:1; |
158 | unsigned int uses_mq:1; | ||
126 | DECLARE_HASHTABLE(hash, ELV_HASH_BITS); | 159 | DECLARE_HASHTABLE(hash, ELV_HASH_BITS); |
127 | }; | 160 | }; |
128 | 161 | ||
@@ -139,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *, | |||
139 | extern void elv_merged_request(struct request_queue *, struct request *, int); | 172 | extern void elv_merged_request(struct request_queue *, struct request *, int); |
140 | extern void elv_bio_merged(struct request_queue *q, struct request *, | 173 | extern void elv_bio_merged(struct request_queue *q, struct request *, |
141 | struct bio *); | 174 | struct bio *); |
175 | extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); | ||
142 | extern void elv_requeue_request(struct request_queue *, struct request *); | 176 | extern void elv_requeue_request(struct request_queue *, struct request *); |
143 | extern struct request *elv_former_request(struct request_queue *, struct request *); | 177 | extern struct request *elv_former_request(struct request_queue *, struct request *); |
144 | extern struct request *elv_latter_request(struct request_queue *, struct request *); | 178 | extern struct request *elv_latter_request(struct request_queue *, struct request *); |
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7c273bbc5351..ca45e4a088a9 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h | |||
@@ -80,8 +80,6 @@ struct nvm_dev_ops { | |||
80 | unsigned int max_phys_sect; | 80 | unsigned int max_phys_sect; |
81 | }; | 81 | }; |
82 | 82 | ||
83 | |||
84 | |||
85 | #ifdef CONFIG_NVM | 83 | #ifdef CONFIG_NVM |
86 | 84 | ||
87 | #include <linux/blkdev.h> | 85 | #include <linux/blkdev.h> |
@@ -109,6 +107,7 @@ enum { | |||
109 | NVM_RSP_ERR_FAILWRITE = 0x40ff, | 107 | NVM_RSP_ERR_FAILWRITE = 0x40ff, |
110 | NVM_RSP_ERR_EMPTYPAGE = 0x42ff, | 108 | NVM_RSP_ERR_EMPTYPAGE = 0x42ff, |
111 | NVM_RSP_ERR_FAILECC = 0x4281, | 109 | NVM_RSP_ERR_FAILECC = 0x4281, |
110 | NVM_RSP_ERR_FAILCRC = 0x4004, | ||
112 | NVM_RSP_WARN_HIGHECC = 0x4700, | 111 | NVM_RSP_WARN_HIGHECC = 0x4700, |
113 | 112 | ||
114 | /* Device opcodes */ | 113 | /* Device opcodes */ |
@@ -202,11 +201,10 @@ struct nvm_addr_format { | |||
202 | struct nvm_id { | 201 | struct nvm_id { |
203 | u8 ver_id; | 202 | u8 ver_id; |
204 | u8 vmnt; | 203 | u8 vmnt; |
205 | u8 cgrps; | ||
206 | u32 cap; | 204 | u32 cap; |
207 | u32 dom; | 205 | u32 dom; |
208 | struct nvm_addr_format ppaf; | 206 | struct nvm_addr_format ppaf; |
209 | struct nvm_id_group groups[4]; | 207 | struct nvm_id_group grp; |
210 | } __packed; | 208 | } __packed; |
211 | 209 | ||
212 | struct nvm_target { | 210 | struct nvm_target { |
@@ -216,10 +214,6 @@ struct nvm_target { | |||
216 | struct gendisk *disk; | 214 | struct gendisk *disk; |
217 | }; | 215 | }; |
218 | 216 | ||
219 | struct nvm_tgt_instance { | ||
220 | struct nvm_tgt_type *tt; | ||
221 | }; | ||
222 | |||
223 | #define ADDR_EMPTY (~0ULL) | 217 | #define ADDR_EMPTY (~0ULL) |
224 | 218 | ||
225 | #define NVM_VERSION_MAJOR 1 | 219 | #define NVM_VERSION_MAJOR 1 |
@@ -230,7 +224,6 @@ struct nvm_rq; | |||
230 | typedef void (nvm_end_io_fn)(struct nvm_rq *); | 224 | typedef void (nvm_end_io_fn)(struct nvm_rq *); |
231 | 225 | ||
232 | struct nvm_rq { | 226 | struct nvm_rq { |
233 | struct nvm_tgt_instance *ins; | ||
234 | struct nvm_tgt_dev *dev; | 227 | struct nvm_tgt_dev *dev; |
235 | 228 | ||
236 | struct bio *bio; | 229 | struct bio *bio; |
@@ -254,6 +247,8 @@ struct nvm_rq { | |||
254 | 247 | ||
255 | u64 ppa_status; /* ppa media status */ | 248 | u64 ppa_status; /* ppa media status */ |
256 | int error; | 249 | int error; |
250 | |||
251 | void *private; | ||
257 | }; | 252 | }; |
258 | 253 | ||
259 | static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) | 254 | static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) |
@@ -272,15 +267,6 @@ enum { | |||
272 | NVM_BLK_ST_BAD = 0x8, /* Bad block */ | 267 | NVM_BLK_ST_BAD = 0x8, /* Bad block */ |
273 | }; | 268 | }; |
274 | 269 | ||
275 | /* system block cpu representation */ | ||
276 | struct nvm_sb_info { | ||
277 | unsigned long seqnr; | ||
278 | unsigned long erase_cnt; | ||
279 | unsigned int version; | ||
280 | char mmtype[NVM_MMTYPE_LEN]; | ||
281 | struct ppa_addr fs_ppa; | ||
282 | }; | ||
283 | |||
284 | /* Device generic information */ | 270 | /* Device generic information */ |
285 | struct nvm_geo { | 271 | struct nvm_geo { |
286 | int nr_chnls; | 272 | int nr_chnls; |
@@ -308,6 +294,7 @@ struct nvm_geo { | |||
308 | int sec_per_lun; | 294 | int sec_per_lun; |
309 | }; | 295 | }; |
310 | 296 | ||
297 | /* sub-device structure */ | ||
311 | struct nvm_tgt_dev { | 298 | struct nvm_tgt_dev { |
312 | /* Device information */ | 299 | /* Device information */ |
313 | struct nvm_geo geo; | 300 | struct nvm_geo geo; |
@@ -329,17 +316,10 @@ struct nvm_dev { | |||
329 | 316 | ||
330 | struct list_head devices; | 317 | struct list_head devices; |
331 | 318 | ||
332 | /* Media manager */ | ||
333 | struct nvmm_type *mt; | ||
334 | void *mp; | ||
335 | |||
336 | /* System blocks */ | ||
337 | struct nvm_sb_info sb; | ||
338 | |||
339 | /* Device information */ | 319 | /* Device information */ |
340 | struct nvm_geo geo; | 320 | struct nvm_geo geo; |
341 | 321 | ||
342 | /* lower page table */ | 322 | /* lower page table */ |
343 | int lps_per_blk; | 323 | int lps_per_blk; |
344 | int *lptbl; | 324 | int *lptbl; |
345 | 325 | ||
@@ -359,6 +339,10 @@ struct nvm_dev { | |||
359 | 339 | ||
360 | struct mutex mlock; | 340 | struct mutex mlock; |
361 | spinlock_t lock; | 341 | spinlock_t lock; |
342 | |||
343 | /* target management */ | ||
344 | struct list_head area_list; | ||
345 | struct list_head targets; | ||
362 | }; | 346 | }; |
363 | 347 | ||
364 | static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, | 348 | static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, |
@@ -391,10 +375,10 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo, | |||
391 | return l; | 375 | return l; |
392 | } | 376 | } |
393 | 377 | ||
394 | static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, | 378 | static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev, |
395 | struct ppa_addr r) | 379 | struct ppa_addr r) |
396 | { | 380 | { |
397 | struct nvm_geo *geo = &dev->geo; | 381 | struct nvm_geo *geo = &tgt_dev->geo; |
398 | struct ppa_addr l; | 382 | struct ppa_addr l; |
399 | 383 | ||
400 | l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; | 384 | l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset; |
@@ -407,10 +391,10 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev, | |||
407 | return l; | 391 | return l; |
408 | } | 392 | } |
409 | 393 | ||
410 | static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev, | 394 | static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev, |
411 | struct ppa_addr r) | 395 | struct ppa_addr r) |
412 | { | 396 | { |
413 | struct nvm_geo *geo = &dev->geo; | 397 | struct nvm_geo *geo = &tgt_dev->geo; |
414 | struct ppa_addr l; | 398 | struct ppa_addr l; |
415 | 399 | ||
416 | l.ppa = 0; | 400 | l.ppa = 0; |
@@ -452,15 +436,12 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) | |||
452 | (ppa1.g.blk == ppa2.g.blk)); | 436 | (ppa1.g.blk == ppa2.g.blk)); |
453 | } | 437 | } |
454 | 438 | ||
455 | static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg) | ||
456 | { | ||
457 | return dev->lptbl[slc_pg]; | ||
458 | } | ||
459 | |||
460 | typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); | 439 | typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); |
461 | typedef sector_t (nvm_tgt_capacity_fn)(void *); | 440 | typedef sector_t (nvm_tgt_capacity_fn)(void *); |
462 | typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); | 441 | typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); |
463 | typedef void (nvm_tgt_exit_fn)(void *); | 442 | typedef void (nvm_tgt_exit_fn)(void *); |
443 | typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); | ||
444 | typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); | ||
464 | 445 | ||
465 | struct nvm_tgt_type { | 446 | struct nvm_tgt_type { |
466 | const char *name; | 447 | const char *name; |
@@ -469,12 +450,15 @@ struct nvm_tgt_type { | |||
469 | /* target entry points */ | 450 | /* target entry points */ |
470 | nvm_tgt_make_rq_fn *make_rq; | 451 | nvm_tgt_make_rq_fn *make_rq; |
471 | nvm_tgt_capacity_fn *capacity; | 452 | nvm_tgt_capacity_fn *capacity; |
472 | nvm_end_io_fn *end_io; | ||
473 | 453 | ||
474 | /* module-specific init/teardown */ | 454 | /* module-specific init/teardown */ |
475 | nvm_tgt_init_fn *init; | 455 | nvm_tgt_init_fn *init; |
476 | nvm_tgt_exit_fn *exit; | 456 | nvm_tgt_exit_fn *exit; |
477 | 457 | ||
458 | /* sysfs */ | ||
459 | nvm_tgt_sysfs_init_fn *sysfs_init; | ||
460 | nvm_tgt_sysfs_exit_fn *sysfs_exit; | ||
461 | |||
478 | /* For internal use */ | 462 | /* For internal use */ |
479 | struct list_head list; | 463 | struct list_head list; |
480 | }; | 464 | }; |
@@ -487,103 +471,29 @@ extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); | |||
487 | extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); | 471 | extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); |
488 | extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); | 472 | extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); |
489 | 473 | ||
490 | typedef int (nvmm_register_fn)(struct nvm_dev *); | ||
491 | typedef void (nvmm_unregister_fn)(struct nvm_dev *); | ||
492 | |||
493 | typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *); | ||
494 | typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *); | ||
495 | typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *); | ||
496 | typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int); | ||
497 | typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t); | ||
498 | typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t); | ||
499 | typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *, | ||
500 | struct ppa_addr, int); | ||
501 | typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int); | ||
502 | |||
503 | enum { | ||
504 | TRANS_TGT_TO_DEV = 0x0, | ||
505 | TRANS_DEV_TO_TGT = 0x1, | ||
506 | }; | ||
507 | |||
508 | struct nvmm_type { | ||
509 | const char *name; | ||
510 | unsigned int version[3]; | ||
511 | |||
512 | nvmm_register_fn *register_mgr; | ||
513 | nvmm_unregister_fn *unregister_mgr; | ||
514 | |||
515 | nvmm_create_tgt_fn *create_tgt; | ||
516 | nvmm_remove_tgt_fn *remove_tgt; | ||
517 | |||
518 | nvmm_submit_io_fn *submit_io; | ||
519 | nvmm_erase_blk_fn *erase_blk; | ||
520 | |||
521 | nvmm_get_area_fn *get_area; | ||
522 | nvmm_put_area_fn *put_area; | ||
523 | |||
524 | nvmm_trans_ppa_fn *trans_ppa; | ||
525 | nvmm_part_to_tgt_fn *part_to_tgt; | ||
526 | |||
527 | struct list_head list; | ||
528 | }; | ||
529 | |||
530 | extern int nvm_register_mgr(struct nvmm_type *); | ||
531 | extern void nvm_unregister_mgr(struct nvmm_type *); | ||
532 | |||
533 | extern struct nvm_dev *nvm_alloc_dev(int); | 474 | extern struct nvm_dev *nvm_alloc_dev(int); |
534 | extern int nvm_register(struct nvm_dev *); | 475 | extern int nvm_register(struct nvm_dev *); |
535 | extern void nvm_unregister(struct nvm_dev *); | 476 | extern void nvm_unregister(struct nvm_dev *); |
536 | 477 | ||
537 | extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int); | ||
538 | extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, | 478 | extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, |
539 | int, int); | 479 | int, int); |
540 | extern int nvm_max_phys_sects(struct nvm_tgt_dev *); | 480 | extern int nvm_max_phys_sects(struct nvm_tgt_dev *); |
541 | extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); | 481 | extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); |
542 | extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *); | ||
543 | extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *); | ||
544 | extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, | 482 | extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, |
545 | const struct ppa_addr *, int, int); | 483 | const struct ppa_addr *, int, int); |
546 | extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); | 484 | extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); |
547 | extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int); | ||
548 | extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); | 485 | extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); |
549 | extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, | 486 | extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, |
550 | void *); | 487 | void *); |
551 | extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); | 488 | extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); |
552 | extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); | 489 | extern void nvm_put_area(struct nvm_tgt_dev *, sector_t); |
553 | extern void nvm_end_io(struct nvm_rq *, int); | 490 | extern void nvm_end_io(struct nvm_rq *); |
554 | extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int, | ||
555 | void *, int); | ||
556 | extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int, | ||
557 | int, void *, int); | ||
558 | extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); | 491 | extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); |
559 | extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *); | ||
560 | extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); | 492 | extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); |
561 | 493 | ||
562 | /* sysblk.c */ | ||
563 | #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */ | ||
564 | |||
565 | /* system block on disk representation */ | ||
566 | struct nvm_system_block { | ||
567 | __be32 magic; /* magic signature */ | ||
568 | __be32 seqnr; /* sequence number */ | ||
569 | __be32 erase_cnt; /* erase count */ | ||
570 | __be16 version; /* version number */ | ||
571 | u8 mmtype[NVM_MMTYPE_LEN]; /* media manager name */ | ||
572 | __be64 fs_ppa; /* PPA for media manager | ||
573 | * superblock */ | ||
574 | }; | ||
575 | |||
576 | extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *); | ||
577 | extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *); | ||
578 | extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *); | ||
579 | |||
580 | extern int nvm_dev_factory(struct nvm_dev *, int flags); | 494 | extern int nvm_dev_factory(struct nvm_dev *, int flags); |
581 | 495 | ||
582 | #define nvm_for_each_lun_ppa(geo, ppa, chid, lunid) \ | 496 | extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int); |
583 | for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls; \ | ||
584 | (chid)++, (ppa).g.ch = (chid)) \ | ||
585 | for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl; \ | ||
586 | (lunid)++, (ppa).g.lun = (lunid)) | ||
587 | 497 | ||
588 | #else /* CONFIG_NVM */ | 498 | #else /* CONFIG_NVM */ |
589 | struct nvm_dev_ops; | 499 | struct nvm_dev_ops; |
diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d1c6f1b15c9..00eac863a9c7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h | |||
@@ -244,6 +244,7 @@ enum { | |||
244 | NVME_CTRL_ONCS_DSM = 1 << 2, | 244 | NVME_CTRL_ONCS_DSM = 1 << 2, |
245 | NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, | 245 | NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, |
246 | NVME_CTRL_VWC_PRESENT = 1 << 0, | 246 | NVME_CTRL_VWC_PRESENT = 1 << 0, |
247 | NVME_CTRL_OACS_SEC_SUPP = 1 << 0, | ||
247 | }; | 248 | }; |
248 | 249 | ||
249 | struct nvme_lbaf { | 250 | struct nvme_lbaf { |
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index f017fd6e69c4..d4e0a204c118 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h | |||
@@ -259,6 +259,26 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) | |||
259 | unsigned int sbitmap_weight(const struct sbitmap *sb); | 259 | unsigned int sbitmap_weight(const struct sbitmap *sb); |
260 | 260 | ||
261 | /** | 261 | /** |
262 | * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. | ||
263 | * @sb: Bitmap to show. | ||
264 | * @m: struct seq_file to write to. | ||
265 | * | ||
266 | * This is intended for debugging. The format may change at any time. | ||
267 | */ | ||
268 | void sbitmap_show(struct sbitmap *sb, struct seq_file *m); | ||
269 | |||
270 | /** | ||
271 | * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct | ||
272 | * seq_file. | ||
273 | * @sb: Bitmap to show. | ||
274 | * @m: struct seq_file to write to. | ||
275 | * | ||
276 | * This is intended for debugging. The output isn't guaranteed to be internally | ||
277 | * consistent. | ||
278 | */ | ||
279 | void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m); | ||
280 | |||
281 | /** | ||
262 | * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific | 282 | * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific |
263 | * memory node. | 283 | * memory node. |
264 | * @sbq: Bitmap queue to initialize. | 284 | * @sbq: Bitmap queue to initialize. |
@@ -370,4 +390,14 @@ static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, | |||
370 | */ | 390 | */ |
371 | void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); | 391 | void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); |
372 | 392 | ||
393 | /** | ||
394 | * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct | ||
395 | * seq_file. | ||
396 | * @sbq: Bitmap queue to show. | ||
397 | * @m: struct seq_file to write to. | ||
398 | * | ||
399 | * This is intended for debugging. The format may change at any time. | ||
400 | */ | ||
401 | void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); | ||
402 | |||
373 | #endif /* __LINUX_SCALE_BITMAP_H */ | 403 | #endif /* __LINUX_SCALE_BITMAP_H */ |
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h new file mode 100644 index 000000000000..deee23d012e7 --- /dev/null +++ b/include/linux/sed-opal.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * Copyright © 2016 Intel Corporation | ||
3 | * | ||
4 | * Authors: | ||
5 | * Rafael Antognolli <rafael.antognolli@intel.com> | ||
6 | * Scott Bauer <scott.bauer@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef LINUX_OPAL_H | ||
19 | #define LINUX_OPAL_H | ||
20 | |||
21 | #include <uapi/linux/sed-opal.h> | ||
22 | #include <linux/kernel.h> | ||
23 | |||
24 | struct opal_dev; | ||
25 | |||
26 | typedef int (sec_send_recv)(void *data, u16 spsp, u8 secp, void *buffer, | ||
27 | size_t len, bool send); | ||
28 | |||
29 | #ifdef CONFIG_BLK_SED_OPAL | ||
30 | bool opal_unlock_from_suspend(struct opal_dev *dev); | ||
31 | struct opal_dev *init_opal_dev(void *data, sec_send_recv *send_recv); | ||
32 | int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *ioctl_ptr); | ||
33 | |||
34 | static inline bool is_sed_ioctl(unsigned int cmd) | ||
35 | { | ||
36 | switch (cmd) { | ||
37 | case IOC_OPAL_SAVE: | ||
38 | case IOC_OPAL_LOCK_UNLOCK: | ||
39 | case IOC_OPAL_TAKE_OWNERSHIP: | ||
40 | case IOC_OPAL_ACTIVATE_LSP: | ||
41 | case IOC_OPAL_SET_PW: | ||
42 | case IOC_OPAL_ACTIVATE_USR: | ||
43 | case IOC_OPAL_REVERT_TPR: | ||
44 | case IOC_OPAL_LR_SETUP: | ||
45 | case IOC_OPAL_ADD_USR_TO_LR: | ||
46 | case IOC_OPAL_ENABLE_DISABLE_MBR: | ||
47 | case IOC_OPAL_ERASE_LR: | ||
48 | case IOC_OPAL_SECURE_ERASE_LR: | ||
49 | return true; | ||
50 | } | ||
51 | return false; | ||
52 | } | ||
53 | #else | ||
54 | static inline bool is_sed_ioctl(unsigned int cmd) | ||
55 | { | ||
56 | return false; | ||
57 | } | ||
58 | |||
59 | static inline int sed_ioctl(struct opal_dev *dev, unsigned int cmd, | ||
60 | void __user *ioctl_ptr) | ||
61 | { | ||
62 | return 0; | ||
63 | } | ||
64 | static inline bool opal_unlock_from_suspend(struct opal_dev *dev) | ||
65 | { | ||
66 | return false; | ||
67 | } | ||
68 | #define init_opal_dev(data, send_recv) NULL | ||
69 | #endif /* CONFIG_BLK_SED_OPAL */ | ||
70 | #endif /* LINUX_OPAL_H */ | ||
diff --git a/include/uapi/linux/lightnvm.h b/include/uapi/linux/lightnvm.h index 774a43128a7a..fd19f36b3129 100644 --- a/include/uapi/linux/lightnvm.h +++ b/include/uapi/linux/lightnvm.h | |||
@@ -122,6 +122,44 @@ struct nvm_ioctl_dev_factory { | |||
122 | __u32 flags; | 122 | __u32 flags; |
123 | }; | 123 | }; |
124 | 124 | ||
125 | struct nvm_user_vio { | ||
126 | __u8 opcode; | ||
127 | __u8 flags; | ||
128 | __u16 control; | ||
129 | __u16 nppas; | ||
130 | __u16 rsvd; | ||
131 | __u64 metadata; | ||
132 | __u64 addr; | ||
133 | __u64 ppa_list; | ||
134 | __u32 metadata_len; | ||
135 | __u32 data_len; | ||
136 | __u64 status; | ||
137 | __u32 result; | ||
138 | __u32 rsvd3[3]; | ||
139 | }; | ||
140 | |||
141 | struct nvm_passthru_vio { | ||
142 | __u8 opcode; | ||
143 | __u8 flags; | ||
144 | __u8 rsvd[2]; | ||
145 | __u32 nsid; | ||
146 | __u32 cdw2; | ||
147 | __u32 cdw3; | ||
148 | __u64 metadata; | ||
149 | __u64 addr; | ||
150 | __u32 metadata_len; | ||
151 | __u32 data_len; | ||
152 | __u64 ppa_list; | ||
153 | __u16 nppas; | ||
154 | __u16 control; | ||
155 | __u32 cdw13; | ||
156 | __u32 cdw14; | ||
157 | __u32 cdw15; | ||
158 | __u64 status; | ||
159 | __u32 result; | ||
160 | __u32 timeout_ms; | ||
161 | }; | ||
162 | |||
125 | /* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ | 163 | /* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */ |
126 | enum { | 164 | enum { |
127 | /* top level cmds */ | 165 | /* top level cmds */ |
@@ -137,6 +175,11 @@ enum { | |||
137 | 175 | ||
138 | /* Factory reset device */ | 176 | /* Factory reset device */ |
139 | NVM_DEV_FACTORY_CMD, | 177 | NVM_DEV_FACTORY_CMD, |
178 | |||
179 | /* Vector user I/O */ | ||
180 | NVM_DEV_VIO_ADMIN_CMD = 0x41, | ||
181 | NVM_DEV_VIO_CMD = 0x42, | ||
182 | NVM_DEV_VIO_USER_CMD = 0x43, | ||
140 | }; | 183 | }; |
141 | 184 | ||
142 | #define NVM_IOCTL 'L' /* 0x4c */ | 185 | #define NVM_IOCTL 'L' /* 0x4c */ |
@@ -154,6 +197,13 @@ enum { | |||
154 | #define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ | 197 | #define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \ |
155 | struct nvm_ioctl_dev_factory) | 198 | struct nvm_ioctl_dev_factory) |
156 | 199 | ||
200 | #define NVME_NVM_IOCTL_IO_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_USER_CMD, \ | ||
201 | struct nvm_passthru_vio) | ||
202 | #define NVME_NVM_IOCTL_ADMIN_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_ADMIN_CMD,\ | ||
203 | struct nvm_passthru_vio) | ||
204 | #define NVME_NVM_IOCTL_SUBMIT_VIO _IOWR(NVM_IOCTL, NVM_DEV_VIO_CMD,\ | ||
205 | struct nvm_user_vio) | ||
206 | |||
157 | #define NVM_VERSION_MAJOR 1 | 207 | #define NVM_VERSION_MAJOR 1 |
158 | #define NVM_VERSION_MINOR 0 | 208 | #define NVM_VERSION_MINOR 0 |
159 | #define NVM_VERSION_PATCHLEVEL 0 | 209 | #define NVM_VERSION_PATCHLEVEL 0 |
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h new file mode 100644 index 000000000000..c72e0735532d --- /dev/null +++ b/include/uapi/linux/sed-opal.h | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * Copyright © 2016 Intel Corporation | ||
3 | * | ||
4 | * Authors: | ||
5 | * Rafael Antognolli <rafael.antognolli@intel.com> | ||
6 | * Scott Bauer <scott.bauer@intel.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify it | ||
9 | * under the terms and conditions of the GNU General Public License, | ||
10 | * version 2, as published by the Free Software Foundation. | ||
11 | * | ||
12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
15 | * more details. | ||
16 | */ | ||
17 | |||
18 | #ifndef _UAPI_SED_OPAL_H | ||
19 | #define _UAPI_SED_OPAL_H | ||
20 | |||
21 | #include <linux/types.h> | ||
22 | |||
23 | #define OPAL_KEY_MAX 256 | ||
24 | #define OPAL_MAX_LRS 9 | ||
25 | |||
26 | enum opal_mbr { | ||
27 | OPAL_MBR_ENABLE = 0x0, | ||
28 | OPAL_MBR_DISABLE = 0x01, | ||
29 | }; | ||
30 | |||
31 | enum opal_user { | ||
32 | OPAL_ADMIN1 = 0x0, | ||
33 | OPAL_USER1 = 0x01, | ||
34 | OPAL_USER2 = 0x02, | ||
35 | OPAL_USER3 = 0x03, | ||
36 | OPAL_USER4 = 0x04, | ||
37 | OPAL_USER5 = 0x05, | ||
38 | OPAL_USER6 = 0x06, | ||
39 | OPAL_USER7 = 0x07, | ||
40 | OPAL_USER8 = 0x08, | ||
41 | OPAL_USER9 = 0x09, | ||
42 | }; | ||
43 | |||
44 | enum opal_lock_state { | ||
45 | OPAL_RO = 0x01, /* 0001 */ | ||
46 | OPAL_RW = 0x02, /* 0010 */ | ||
47 | OPAL_LK = 0x04, /* 0100 */ | ||
48 | }; | ||
49 | |||
50 | struct opal_key { | ||
51 | __u8 lr; | ||
52 | __u8 key_len; | ||
53 | __u8 __align[6]; | ||
54 | __u8 key[OPAL_KEY_MAX]; | ||
55 | }; | ||
56 | |||
57 | struct opal_lr_act { | ||
58 | struct opal_key key; | ||
59 | __u32 sum; | ||
60 | __u8 num_lrs; | ||
61 | __u8 lr[OPAL_MAX_LRS]; | ||
62 | __u8 align[2]; /* Align to 8 byte boundary */ | ||
63 | }; | ||
64 | |||
65 | struct opal_session_info { | ||
66 | __u32 sum; | ||
67 | __u32 who; | ||
68 | struct opal_key opal_key; | ||
69 | }; | ||
70 | |||
71 | struct opal_user_lr_setup { | ||
72 | __u64 range_start; | ||
73 | __u64 range_length; | ||
74 | __u32 RLE; /* Read Lock enabled */ | ||
75 | __u32 WLE; /* Write Lock Enabled */ | ||
76 | struct opal_session_info session; | ||
77 | }; | ||
78 | |||
79 | struct opal_lock_unlock { | ||
80 | struct opal_session_info session; | ||
81 | __u32 l_state; | ||
82 | __u8 __align[4]; | ||
83 | }; | ||
84 | |||
85 | struct opal_new_pw { | ||
86 | struct opal_session_info session; | ||
87 | |||
88 | /* When we're not operating in sum, and we first set | ||
89 | * passwords we need to set them via ADMIN authority. | ||
90 | * After passwords are changed, we can set them via, | ||
91 | * User authorities. | ||
92 | * Because of this restriction we need to know about | ||
93 | * Two different users. One in 'session' which we will use | ||
94 | * to start the session and new_userr_pw as the user we're | ||
95 | * chaning the pw for. | ||
96 | */ | ||
97 | struct opal_session_info new_user_pw; | ||
98 | }; | ||
99 | |||
100 | struct opal_mbr_data { | ||
101 | struct opal_key key; | ||
102 | __u8 enable_disable; | ||
103 | __u8 __align[7]; | ||
104 | }; | ||
105 | |||
106 | #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) | ||
107 | #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) | ||
108 | #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) | ||
109 | #define IOC_OPAL_ACTIVATE_LSP _IOW('p', 223, struct opal_lr_act) | ||
110 | #define IOC_OPAL_SET_PW _IOW('p', 224, struct opal_new_pw) | ||
111 | #define IOC_OPAL_ACTIVATE_USR _IOW('p', 225, struct opal_session_info) | ||
112 | #define IOC_OPAL_REVERT_TPR _IOW('p', 226, struct opal_key) | ||
113 | #define IOC_OPAL_LR_SETUP _IOW('p', 227, struct opal_user_lr_setup) | ||
114 | #define IOC_OPAL_ADD_USR_TO_LR _IOW('p', 228, struct opal_lock_unlock) | ||
115 | #define IOC_OPAL_ENABLE_DISABLE_MBR _IOW('p', 229, struct opal_mbr_data) | ||
116 | #define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info) | ||
117 | #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) | ||
118 | |||
119 | #endif /* _UAPI_SED_OPAL_H */ | ||
diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2cecf05c82fd..55e11c4b2f3b 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/random.h> | 18 | #include <linux/random.h> |
19 | #include <linux/sbitmap.h> | 19 | #include <linux/sbitmap.h> |
20 | #include <linux/seq_file.h> | ||
20 | 21 | ||
21 | int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, | 22 | int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, |
22 | gfp_t flags, int node) | 23 | gfp_t flags, int node) |
@@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb) | |||
180 | } | 181 | } |
181 | EXPORT_SYMBOL_GPL(sbitmap_weight); | 182 | EXPORT_SYMBOL_GPL(sbitmap_weight); |
182 | 183 | ||
184 | void sbitmap_show(struct sbitmap *sb, struct seq_file *m) | ||
185 | { | ||
186 | seq_printf(m, "depth=%u\n", sb->depth); | ||
187 | seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); | ||
188 | seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); | ||
189 | seq_printf(m, "map_nr=%u\n", sb->map_nr); | ||
190 | } | ||
191 | EXPORT_SYMBOL_GPL(sbitmap_show); | ||
192 | |||
193 | static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) | ||
194 | { | ||
195 | if ((offset & 0xf) == 0) { | ||
196 | if (offset != 0) | ||
197 | seq_putc(m, '\n'); | ||
198 | seq_printf(m, "%08x:", offset); | ||
199 | } | ||
200 | if ((offset & 0x1) == 0) | ||
201 | seq_putc(m, ' '); | ||
202 | seq_printf(m, "%02x", byte); | ||
203 | } | ||
204 | |||
205 | void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) | ||
206 | { | ||
207 | u8 byte = 0; | ||
208 | unsigned int byte_bits = 0; | ||
209 | unsigned int offset = 0; | ||
210 | int i; | ||
211 | |||
212 | for (i = 0; i < sb->map_nr; i++) { | ||
213 | unsigned long word = READ_ONCE(sb->map[i].word); | ||
214 | unsigned int word_bits = READ_ONCE(sb->map[i].depth); | ||
215 | |||
216 | while (word_bits > 0) { | ||
217 | unsigned int bits = min(8 - byte_bits, word_bits); | ||
218 | |||
219 | byte |= (word & (BIT(bits) - 1)) << byte_bits; | ||
220 | byte_bits += bits; | ||
221 | if (byte_bits == 8) { | ||
222 | emit_byte(m, offset, byte); | ||
223 | byte = 0; | ||
224 | byte_bits = 0; | ||
225 | offset++; | ||
226 | } | ||
227 | word >>= bits; | ||
228 | word_bits -= bits; | ||
229 | } | ||
230 | } | ||
231 | if (byte_bits) { | ||
232 | emit_byte(m, offset, byte); | ||
233 | offset++; | ||
234 | } | ||
235 | if (offset) | ||
236 | seq_putc(m, '\n'); | ||
237 | } | ||
238 | EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); | ||
239 | |||
183 | static unsigned int sbq_calc_wake_batch(unsigned int depth) | 240 | static unsigned int sbq_calc_wake_batch(unsigned int depth) |
184 | { | 241 | { |
185 | unsigned int wake_batch; | 242 | unsigned int wake_batch; |
@@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); | |||
239 | 296 | ||
240 | void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) | 297 | void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) |
241 | { | 298 | { |
242 | sbq->wake_batch = sbq_calc_wake_batch(depth); | 299 | unsigned int wake_batch = sbq_calc_wake_batch(depth); |
300 | int i; | ||
301 | |||
302 | if (sbq->wake_batch != wake_batch) { | ||
303 | WRITE_ONCE(sbq->wake_batch, wake_batch); | ||
304 | /* | ||
305 | * Pairs with the memory barrier in sbq_wake_up() to ensure that | ||
306 | * the batch size is updated before the wait counts. | ||
307 | */ | ||
308 | smp_mb__before_atomic(); | ||
309 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) | ||
310 | atomic_set(&sbq->ws[i].wait_cnt, 1); | ||
311 | } | ||
243 | sbitmap_resize(&sbq->sb, depth); | 312 | sbitmap_resize(&sbq->sb, depth); |
244 | } | 313 | } |
245 | EXPORT_SYMBOL_GPL(sbitmap_queue_resize); | 314 | EXPORT_SYMBOL_GPL(sbitmap_queue_resize); |
@@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) | |||
297 | static void sbq_wake_up(struct sbitmap_queue *sbq) | 366 | static void sbq_wake_up(struct sbitmap_queue *sbq) |
298 | { | 367 | { |
299 | struct sbq_wait_state *ws; | 368 | struct sbq_wait_state *ws; |
369 | unsigned int wake_batch; | ||
300 | int wait_cnt; | 370 | int wait_cnt; |
301 | 371 | ||
302 | /* Ensure that the wait list checks occur after clear_bit(). */ | 372 | /* |
303 | smp_mb(); | 373 | * Pairs with the memory barrier in set_current_state() to ensure the |
374 | * proper ordering of clear_bit()/waitqueue_active() in the waker and | ||
375 | * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See | ||
376 | * the comment on waitqueue_active(). This is __after_atomic because we | ||
377 | * just did clear_bit() in the caller. | ||
378 | */ | ||
379 | smp_mb__after_atomic(); | ||
304 | 380 | ||
305 | ws = sbq_wake_ptr(sbq); | 381 | ws = sbq_wake_ptr(sbq); |
306 | if (!ws) | 382 | if (!ws) |
307 | return; | 383 | return; |
308 | 384 | ||
309 | wait_cnt = atomic_dec_return(&ws->wait_cnt); | 385 | wait_cnt = atomic_dec_return(&ws->wait_cnt); |
310 | if (unlikely(wait_cnt < 0)) | 386 | if (wait_cnt <= 0) { |
311 | wait_cnt = atomic_inc_return(&ws->wait_cnt); | 387 | wake_batch = READ_ONCE(sbq->wake_batch); |
312 | if (wait_cnt == 0) { | 388 | /* |
313 | atomic_add(sbq->wake_batch, &ws->wait_cnt); | 389 | * Pairs with the memory barrier in sbitmap_queue_resize() to |
390 | * ensure that we see the batch size update before the wait | ||
391 | * count is reset. | ||
392 | */ | ||
393 | smp_mb__before_atomic(); | ||
394 | /* | ||
395 | * If there are concurrent callers to sbq_wake_up(), the last | ||
396 | * one to decrement the wait count below zero will bump it back | ||
397 | * up. If there is a concurrent resize, the count reset will | ||
398 | * either cause the cmpxchg to fail or overwrite after the | ||
399 | * cmpxchg. | ||
400 | */ | ||
401 | atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); | ||
314 | sbq_index_atomic_inc(&sbq->wake_index); | 402 | sbq_index_atomic_inc(&sbq->wake_index); |
315 | wake_up(&ws->wait); | 403 | wake_up(&ws->wait); |
316 | } | 404 | } |
@@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) | |||
331 | int i, wake_index; | 419 | int i, wake_index; |
332 | 420 | ||
333 | /* | 421 | /* |
334 | * Make sure all changes prior to this are visible from other CPUs. | 422 | * Pairs with the memory barrier in set_current_state() like in |
423 | * sbq_wake_up(). | ||
335 | */ | 424 | */ |
336 | smp_mb(); | 425 | smp_mb(); |
337 | wake_index = atomic_read(&sbq->wake_index); | 426 | wake_index = atomic_read(&sbq->wake_index); |
@@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) | |||
345 | } | 434 | } |
346 | } | 435 | } |
347 | EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); | 436 | EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); |
437 | |||
438 | void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) | ||
439 | { | ||
440 | bool first; | ||
441 | int i; | ||
442 | |||
443 | sbitmap_show(&sbq->sb, m); | ||
444 | |||
445 | seq_puts(m, "alloc_hint={"); | ||
446 | first = true; | ||
447 | for_each_possible_cpu(i) { | ||
448 | if (!first) | ||
449 | seq_puts(m, ", "); | ||
450 | first = false; | ||
451 | seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); | ||
452 | } | ||
453 | seq_puts(m, "}\n"); | ||
454 | |||
455 | seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); | ||
456 | seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); | ||
457 | |||
458 | seq_puts(m, "ws={\n"); | ||
459 | for (i = 0; i < SBQ_WAIT_QUEUES; i++) { | ||
460 | struct sbq_wait_state *ws = &sbq->ws[i]; | ||
461 | |||
462 | seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", | ||
463 | atomic_read(&ws->wait_cnt), | ||
464 | waitqueue_active(&ws->wait) ? "active" : "inactive"); | ||
465 | } | ||
466 | seq_puts(m, "}\n"); | ||
467 | |||
468 | seq_printf(m, "round_robin=%d\n", sbq->round_robin); | ||
469 | } | ||
470 | EXPORT_SYMBOL_GPL(sbitmap_queue_show); | ||