diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-21 22:58:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-21 22:58:02 -0500 |
commit | 3e1e21c7bfcfa9bf06c07f48a13faca2f62b3339 (patch) | |
tree | b26e480594c8e978c48118e2e3d624d1386f51df /drivers/nvme | |
parent | 0a13daedf7ffc71b0c374a036355da7fddb20d6d (diff) | |
parent | a9cf8284b45110a4d98aea180a89c857e53bf850 (diff) |
Merge branch 'for-4.5/nvme' of git://git.kernel.dk/linux-block
Pull NVMe updates from Jens Axboe:
"Last branch for this series is the nvme changes. It's in a separate
branch to avoid splitting too much between core and NVMe changes,
since NVMe is still helping drive some blk-mq changes. That said, not
a huge amount of core changes in here. The grunt of the work is the
continued split of the code"
* 'for-4.5/nvme' of git://git.kernel.dk/linux-block: (67 commits)
uapi: update install list after nvme.h rename
NVMe: Export NVMe attributes to sysfs group
NVMe: Shutdown controller only for power-off
NVMe: IO queue deletion re-write
NVMe: Remove queue freezing on resets
NVMe: Use a retryable error code on reset
NVMe: Fix admin queue ring wrap
nvme: make SG_IO support optional
nvme: fixes for NVME_IOCTL_IO_CMD on the char device
nvme: synchronize access to ctrl->namespaces
nvme: Move nvme_freeze/unfreeze_queues to nvme core
PCI/AER: include header file
NVMe: Export namespace attributes to sysfs
NVMe: Add pci error handlers
block: remove REQ_NO_TIMEOUT flag
nvme: merge iod and cmd_info
nvme: meta_sg doesn't have to be an array
nvme: properly free resources for cancelled command
nvme: simplify completion handling
nvme: special case AEN requests
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Kconfig | 11 | ||||
-rw-r--r-- | drivers/nvme/host/Makefile | 5 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 1472 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 35 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 242 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 2700 | ||||
-rw-r--r-- | drivers/nvme/host/scsi.c | 212 |
7 files changed, 2527 insertions, 2150 deletions
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 002a94abdbc4..5d6237391dcd 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig | |||
@@ -8,3 +8,14 @@ config BLK_DEV_NVME | |||
8 | 8 | ||
9 | To compile this driver as a module, choose M here: the | 9 | To compile this driver as a module, choose M here: the |
10 | module will be called nvme. | 10 | module will be called nvme. |
11 | |||
12 | config BLK_DEV_NVME_SCSI | ||
13 | bool "SCSI emulation for NVMe device nodes" | ||
14 | depends on BLK_DEV_NVME | ||
15 | ---help--- | ||
16 | This adds support for the SG_IO ioctl on the NVMe character | ||
17 | and block devices nodes, as well a a translation for a small | ||
18 | number of selected SCSI commands to NVMe commands to the NVMe | ||
19 | driver. If you don't know what this means you probably want | ||
20 | to say N here, and if you know what it means you probably | ||
21 | want to say N as well. | ||
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index a5fe23952586..51bf90871549 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile | |||
@@ -1,5 +1,6 @@ | |||
1 | 1 | ||
2 | obj-$(CONFIG_BLK_DEV_NVME) += nvme.o | 2 | obj-$(CONFIG_BLK_DEV_NVME) += nvme.o |
3 | 3 | ||
4 | lightnvm-$(CONFIG_NVM) := lightnvm.o | 4 | lightnvm-$(CONFIG_NVM) := lightnvm.o |
5 | nvme-y += pci.o scsi.o $(lightnvm-y) | 5 | nvme-y += core.o pci.o $(lightnvm-y) |
6 | nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o | ||
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c new file mode 100644 index 000000000000..c5bf001af559 --- /dev/null +++ b/drivers/nvme/host/core.c | |||
@@ -0,0 +1,1472 @@ | |||
1 | /* | ||
2 | * NVM Express device driver | ||
3 | * Copyright (c) 2011-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #include <linux/blkdev.h> | ||
16 | #include <linux/blk-mq.h> | ||
17 | #include <linux/delay.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/hdreg.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/list_sort.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/pr.h> | ||
26 | #include <linux/ptrace.h> | ||
27 | #include <linux/nvme_ioctl.h> | ||
28 | #include <linux/t10-pi.h> | ||
29 | #include <scsi/sg.h> | ||
30 | #include <asm/unaligned.h> | ||
31 | |||
32 | #include "nvme.h" | ||
33 | |||
34 | #define NVME_MINORS (1U << MINORBITS) | ||
35 | |||
36 | static int nvme_major; | ||
37 | module_param(nvme_major, int, 0); | ||
38 | |||
39 | static int nvme_char_major; | ||
40 | module_param(nvme_char_major, int, 0); | ||
41 | |||
42 | static LIST_HEAD(nvme_ctrl_list); | ||
43 | DEFINE_SPINLOCK(dev_list_lock); | ||
44 | |||
45 | static struct class *nvme_class; | ||
46 | |||
47 | static void nvme_free_ns(struct kref *kref) | ||
48 | { | ||
49 | struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); | ||
50 | |||
51 | if (ns->type == NVME_NS_LIGHTNVM) | ||
52 | nvme_nvm_unregister(ns->queue, ns->disk->disk_name); | ||
53 | |||
54 | spin_lock(&dev_list_lock); | ||
55 | ns->disk->private_data = NULL; | ||
56 | spin_unlock(&dev_list_lock); | ||
57 | |||
58 | nvme_put_ctrl(ns->ctrl); | ||
59 | put_disk(ns->disk); | ||
60 | kfree(ns); | ||
61 | } | ||
62 | |||
63 | static void nvme_put_ns(struct nvme_ns *ns) | ||
64 | { | ||
65 | kref_put(&ns->kref, nvme_free_ns); | ||
66 | } | ||
67 | |||
68 | static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk) | ||
69 | { | ||
70 | struct nvme_ns *ns; | ||
71 | |||
72 | spin_lock(&dev_list_lock); | ||
73 | ns = disk->private_data; | ||
74 | if (ns && !kref_get_unless_zero(&ns->kref)) | ||
75 | ns = NULL; | ||
76 | spin_unlock(&dev_list_lock); | ||
77 | |||
78 | return ns; | ||
79 | } | ||
80 | |||
81 | void nvme_requeue_req(struct request *req) | ||
82 | { | ||
83 | unsigned long flags; | ||
84 | |||
85 | blk_mq_requeue_request(req); | ||
86 | spin_lock_irqsave(req->q->queue_lock, flags); | ||
87 | if (!blk_queue_stopped(req->q)) | ||
88 | blk_mq_kick_requeue_list(req->q); | ||
89 | spin_unlock_irqrestore(req->q->queue_lock, flags); | ||
90 | } | ||
91 | |||
92 | struct request *nvme_alloc_request(struct request_queue *q, | ||
93 | struct nvme_command *cmd, unsigned int flags) | ||
94 | { | ||
95 | bool write = cmd->common.opcode & 1; | ||
96 | struct request *req; | ||
97 | |||
98 | req = blk_mq_alloc_request(q, write, flags); | ||
99 | if (IS_ERR(req)) | ||
100 | return req; | ||
101 | |||
102 | req->cmd_type = REQ_TYPE_DRV_PRIV; | ||
103 | req->cmd_flags |= REQ_FAILFAST_DRIVER; | ||
104 | req->__data_len = 0; | ||
105 | req->__sector = (sector_t) -1; | ||
106 | req->bio = req->biotail = NULL; | ||
107 | |||
108 | req->cmd = (unsigned char *)cmd; | ||
109 | req->cmd_len = sizeof(struct nvme_command); | ||
110 | req->special = (void *)0; | ||
111 | |||
112 | return req; | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * Returns 0 on success. If the result is negative, it's a Linux error code; | ||
117 | * if the result is positive, it's an NVM Express status code | ||
118 | */ | ||
119 | int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
120 | void *buffer, unsigned bufflen, u32 *result, unsigned timeout) | ||
121 | { | ||
122 | struct request *req; | ||
123 | int ret; | ||
124 | |||
125 | req = nvme_alloc_request(q, cmd, 0); | ||
126 | if (IS_ERR(req)) | ||
127 | return PTR_ERR(req); | ||
128 | |||
129 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; | ||
130 | |||
131 | if (buffer && bufflen) { | ||
132 | ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); | ||
133 | if (ret) | ||
134 | goto out; | ||
135 | } | ||
136 | |||
137 | blk_execute_rq(req->q, NULL, req, 0); | ||
138 | if (result) | ||
139 | *result = (u32)(uintptr_t)req->special; | ||
140 | ret = req->errors; | ||
141 | out: | ||
142 | blk_mq_free_request(req); | ||
143 | return ret; | ||
144 | } | ||
145 | |||
146 | int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
147 | void *buffer, unsigned bufflen) | ||
148 | { | ||
149 | return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0); | ||
150 | } | ||
151 | |||
152 | int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
153 | void __user *ubuffer, unsigned bufflen, | ||
154 | void __user *meta_buffer, unsigned meta_len, u32 meta_seed, | ||
155 | u32 *result, unsigned timeout) | ||
156 | { | ||
157 | bool write = cmd->common.opcode & 1; | ||
158 | struct nvme_ns *ns = q->queuedata; | ||
159 | struct gendisk *disk = ns ? ns->disk : NULL; | ||
160 | struct request *req; | ||
161 | struct bio *bio = NULL; | ||
162 | void *meta = NULL; | ||
163 | int ret; | ||
164 | |||
165 | req = nvme_alloc_request(q, cmd, 0); | ||
166 | if (IS_ERR(req)) | ||
167 | return PTR_ERR(req); | ||
168 | |||
169 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; | ||
170 | |||
171 | if (ubuffer && bufflen) { | ||
172 | ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, | ||
173 | GFP_KERNEL); | ||
174 | if (ret) | ||
175 | goto out; | ||
176 | bio = req->bio; | ||
177 | |||
178 | if (!disk) | ||
179 | goto submit; | ||
180 | bio->bi_bdev = bdget_disk(disk, 0); | ||
181 | if (!bio->bi_bdev) { | ||
182 | ret = -ENODEV; | ||
183 | goto out_unmap; | ||
184 | } | ||
185 | |||
186 | if (meta_buffer) { | ||
187 | struct bio_integrity_payload *bip; | ||
188 | |||
189 | meta = kmalloc(meta_len, GFP_KERNEL); | ||
190 | if (!meta) { | ||
191 | ret = -ENOMEM; | ||
192 | goto out_unmap; | ||
193 | } | ||
194 | |||
195 | if (write) { | ||
196 | if (copy_from_user(meta, meta_buffer, | ||
197 | meta_len)) { | ||
198 | ret = -EFAULT; | ||
199 | goto out_free_meta; | ||
200 | } | ||
201 | } | ||
202 | |||
203 | bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); | ||
204 | if (IS_ERR(bip)) { | ||
205 | ret = PTR_ERR(bip); | ||
206 | goto out_free_meta; | ||
207 | } | ||
208 | |||
209 | bip->bip_iter.bi_size = meta_len; | ||
210 | bip->bip_iter.bi_sector = meta_seed; | ||
211 | |||
212 | ret = bio_integrity_add_page(bio, virt_to_page(meta), | ||
213 | meta_len, offset_in_page(meta)); | ||
214 | if (ret != meta_len) { | ||
215 | ret = -ENOMEM; | ||
216 | goto out_free_meta; | ||
217 | } | ||
218 | } | ||
219 | } | ||
220 | submit: | ||
221 | blk_execute_rq(req->q, disk, req, 0); | ||
222 | ret = req->errors; | ||
223 | if (result) | ||
224 | *result = (u32)(uintptr_t)req->special; | ||
225 | if (meta && !ret && !write) { | ||
226 | if (copy_to_user(meta_buffer, meta, meta_len)) | ||
227 | ret = -EFAULT; | ||
228 | } | ||
229 | out_free_meta: | ||
230 | kfree(meta); | ||
231 | out_unmap: | ||
232 | if (bio) { | ||
233 | if (disk && bio->bi_bdev) | ||
234 | bdput(bio->bi_bdev); | ||
235 | blk_rq_unmap_user(bio); | ||
236 | } | ||
237 | out: | ||
238 | blk_mq_free_request(req); | ||
239 | return ret; | ||
240 | } | ||
241 | |||
242 | int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
243 | void __user *ubuffer, unsigned bufflen, u32 *result, | ||
244 | unsigned timeout) | ||
245 | { | ||
246 | return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, | ||
247 | result, timeout); | ||
248 | } | ||
249 | |||
250 | int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) | ||
251 | { | ||
252 | struct nvme_command c = { }; | ||
253 | int error; | ||
254 | |||
255 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ | ||
256 | c.identify.opcode = nvme_admin_identify; | ||
257 | c.identify.cns = cpu_to_le32(1); | ||
258 | |||
259 | *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); | ||
260 | if (!*id) | ||
261 | return -ENOMEM; | ||
262 | |||
263 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, | ||
264 | sizeof(struct nvme_id_ctrl)); | ||
265 | if (error) | ||
266 | kfree(*id); | ||
267 | return error; | ||
268 | } | ||
269 | |||
270 | static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list) | ||
271 | { | ||
272 | struct nvme_command c = { }; | ||
273 | |||
274 | c.identify.opcode = nvme_admin_identify; | ||
275 | c.identify.cns = cpu_to_le32(2); | ||
276 | c.identify.nsid = cpu_to_le32(nsid); | ||
277 | return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); | ||
278 | } | ||
279 | |||
280 | int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, | ||
281 | struct nvme_id_ns **id) | ||
282 | { | ||
283 | struct nvme_command c = { }; | ||
284 | int error; | ||
285 | |||
286 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ | ||
287 | c.identify.opcode = nvme_admin_identify, | ||
288 | c.identify.nsid = cpu_to_le32(nsid), | ||
289 | |||
290 | *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); | ||
291 | if (!*id) | ||
292 | return -ENOMEM; | ||
293 | |||
294 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, | ||
295 | sizeof(struct nvme_id_ns)); | ||
296 | if (error) | ||
297 | kfree(*id); | ||
298 | return error; | ||
299 | } | ||
300 | |||
301 | int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, | ||
302 | dma_addr_t dma_addr, u32 *result) | ||
303 | { | ||
304 | struct nvme_command c; | ||
305 | |||
306 | memset(&c, 0, sizeof(c)); | ||
307 | c.features.opcode = nvme_admin_get_features; | ||
308 | c.features.nsid = cpu_to_le32(nsid); | ||
309 | c.features.prp1 = cpu_to_le64(dma_addr); | ||
310 | c.features.fid = cpu_to_le32(fid); | ||
311 | |||
312 | return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); | ||
313 | } | ||
314 | |||
315 | int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, | ||
316 | dma_addr_t dma_addr, u32 *result) | ||
317 | { | ||
318 | struct nvme_command c; | ||
319 | |||
320 | memset(&c, 0, sizeof(c)); | ||
321 | c.features.opcode = nvme_admin_set_features; | ||
322 | c.features.prp1 = cpu_to_le64(dma_addr); | ||
323 | c.features.fid = cpu_to_le32(fid); | ||
324 | c.features.dword11 = cpu_to_le32(dword11); | ||
325 | |||
326 | return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0); | ||
327 | } | ||
328 | |||
329 | int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log) | ||
330 | { | ||
331 | struct nvme_command c = { }; | ||
332 | int error; | ||
333 | |||
334 | c.common.opcode = nvme_admin_get_log_page, | ||
335 | c.common.nsid = cpu_to_le32(0xFFFFFFFF), | ||
336 | c.common.cdw10[0] = cpu_to_le32( | ||
337 | (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | | ||
338 | NVME_LOG_SMART), | ||
339 | |||
340 | *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); | ||
341 | if (!*log) | ||
342 | return -ENOMEM; | ||
343 | |||
344 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, | ||
345 | sizeof(struct nvme_smart_log)); | ||
346 | if (error) | ||
347 | kfree(*log); | ||
348 | return error; | ||
349 | } | ||
350 | |||
351 | int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) | ||
352 | { | ||
353 | u32 q_count = (*count - 1) | ((*count - 1) << 16); | ||
354 | u32 result; | ||
355 | int status, nr_io_queues; | ||
356 | |||
357 | status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0, | ||
358 | &result); | ||
359 | if (status) | ||
360 | return status; | ||
361 | |||
362 | nr_io_queues = min(result & 0xffff, result >> 16) + 1; | ||
363 | *count = min(*count, nr_io_queues); | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | ||
368 | { | ||
369 | struct nvme_user_io io; | ||
370 | struct nvme_command c; | ||
371 | unsigned length, meta_len; | ||
372 | void __user *metadata; | ||
373 | |||
374 | if (copy_from_user(&io, uio, sizeof(io))) | ||
375 | return -EFAULT; | ||
376 | |||
377 | switch (io.opcode) { | ||
378 | case nvme_cmd_write: | ||
379 | case nvme_cmd_read: | ||
380 | case nvme_cmd_compare: | ||
381 | break; | ||
382 | default: | ||
383 | return -EINVAL; | ||
384 | } | ||
385 | |||
386 | length = (io.nblocks + 1) << ns->lba_shift; | ||
387 | meta_len = (io.nblocks + 1) * ns->ms; | ||
388 | metadata = (void __user *)(uintptr_t)io.metadata; | ||
389 | |||
390 | if (ns->ext) { | ||
391 | length += meta_len; | ||
392 | meta_len = 0; | ||
393 | } else if (meta_len) { | ||
394 | if ((io.metadata & 3) || !io.metadata) | ||
395 | return -EINVAL; | ||
396 | } | ||
397 | |||
398 | memset(&c, 0, sizeof(c)); | ||
399 | c.rw.opcode = io.opcode; | ||
400 | c.rw.flags = io.flags; | ||
401 | c.rw.nsid = cpu_to_le32(ns->ns_id); | ||
402 | c.rw.slba = cpu_to_le64(io.slba); | ||
403 | c.rw.length = cpu_to_le16(io.nblocks); | ||
404 | c.rw.control = cpu_to_le16(io.control); | ||
405 | c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); | ||
406 | c.rw.reftag = cpu_to_le32(io.reftag); | ||
407 | c.rw.apptag = cpu_to_le16(io.apptag); | ||
408 | c.rw.appmask = cpu_to_le16(io.appmask); | ||
409 | |||
410 | return __nvme_submit_user_cmd(ns->queue, &c, | ||
411 | (void __user *)(uintptr_t)io.addr, length, | ||
412 | metadata, meta_len, io.slba, NULL, 0); | ||
413 | } | ||
414 | |||
415 | static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, | ||
416 | struct nvme_passthru_cmd __user *ucmd) | ||
417 | { | ||
418 | struct nvme_passthru_cmd cmd; | ||
419 | struct nvme_command c; | ||
420 | unsigned timeout = 0; | ||
421 | int status; | ||
422 | |||
423 | if (!capable(CAP_SYS_ADMIN)) | ||
424 | return -EACCES; | ||
425 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) | ||
426 | return -EFAULT; | ||
427 | |||
428 | memset(&c, 0, sizeof(c)); | ||
429 | c.common.opcode = cmd.opcode; | ||
430 | c.common.flags = cmd.flags; | ||
431 | c.common.nsid = cpu_to_le32(cmd.nsid); | ||
432 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); | ||
433 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); | ||
434 | c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); | ||
435 | c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); | ||
436 | c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); | ||
437 | c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); | ||
438 | c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); | ||
439 | c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); | ||
440 | |||
441 | if (cmd.timeout_ms) | ||
442 | timeout = msecs_to_jiffies(cmd.timeout_ms); | ||
443 | |||
444 | status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, | ||
445 | (void __user *)(uintptr_t)cmd.addr, cmd.data_len, | ||
446 | &cmd.result, timeout); | ||
447 | if (status >= 0) { | ||
448 | if (put_user(cmd.result, &ucmd->result)) | ||
449 | return -EFAULT; | ||
450 | } | ||
451 | |||
452 | return status; | ||
453 | } | ||
454 | |||
455 | static int nvme_ioctl(struct block_device *bdev, fmode_t mode, | ||
456 | unsigned int cmd, unsigned long arg) | ||
457 | { | ||
458 | struct nvme_ns *ns = bdev->bd_disk->private_data; | ||
459 | |||
460 | switch (cmd) { | ||
461 | case NVME_IOCTL_ID: | ||
462 | force_successful_syscall_return(); | ||
463 | return ns->ns_id; | ||
464 | case NVME_IOCTL_ADMIN_CMD: | ||
465 | return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); | ||
466 | case NVME_IOCTL_IO_CMD: | ||
467 | return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); | ||
468 | case NVME_IOCTL_SUBMIT_IO: | ||
469 | return nvme_submit_io(ns, (void __user *)arg); | ||
470 | #ifdef CONFIG_BLK_DEV_NVME_SCSI | ||
471 | case SG_GET_VERSION_NUM: | ||
472 | return nvme_sg_get_version_num((void __user *)arg); | ||
473 | case SG_IO: | ||
474 | return nvme_sg_io(ns, (void __user *)arg); | ||
475 | #endif | ||
476 | default: | ||
477 | return -ENOTTY; | ||
478 | } | ||
479 | } | ||
480 | |||
481 | #ifdef CONFIG_COMPAT | ||
482 | static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, | ||
483 | unsigned int cmd, unsigned long arg) | ||
484 | { | ||
485 | switch (cmd) { | ||
486 | case SG_IO: | ||
487 | return -ENOIOCTLCMD; | ||
488 | } | ||
489 | return nvme_ioctl(bdev, mode, cmd, arg); | ||
490 | } | ||
491 | #else | ||
492 | #define nvme_compat_ioctl NULL | ||
493 | #endif | ||
494 | |||
495 | static int nvme_open(struct block_device *bdev, fmode_t mode) | ||
496 | { | ||
497 | return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO; | ||
498 | } | ||
499 | |||
500 | static void nvme_release(struct gendisk *disk, fmode_t mode) | ||
501 | { | ||
502 | nvme_put_ns(disk->private_data); | ||
503 | } | ||
504 | |||
505 | static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) | ||
506 | { | ||
507 | /* some standard values */ | ||
508 | geo->heads = 1 << 6; | ||
509 | geo->sectors = 1 << 5; | ||
510 | geo->cylinders = get_capacity(bdev->bd_disk) >> 11; | ||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | #ifdef CONFIG_BLK_DEV_INTEGRITY | ||
515 | static void nvme_init_integrity(struct nvme_ns *ns) | ||
516 | { | ||
517 | struct blk_integrity integrity; | ||
518 | |||
519 | switch (ns->pi_type) { | ||
520 | case NVME_NS_DPS_PI_TYPE3: | ||
521 | integrity.profile = &t10_pi_type3_crc; | ||
522 | break; | ||
523 | case NVME_NS_DPS_PI_TYPE1: | ||
524 | case NVME_NS_DPS_PI_TYPE2: | ||
525 | integrity.profile = &t10_pi_type1_crc; | ||
526 | break; | ||
527 | default: | ||
528 | integrity.profile = NULL; | ||
529 | break; | ||
530 | } | ||
531 | integrity.tuple_size = ns->ms; | ||
532 | blk_integrity_register(ns->disk, &integrity); | ||
533 | blk_queue_max_integrity_segments(ns->queue, 1); | ||
534 | } | ||
535 | #else | ||
536 | static void nvme_init_integrity(struct nvme_ns *ns) | ||
537 | { | ||
538 | } | ||
539 | #endif /* CONFIG_BLK_DEV_INTEGRITY */ | ||
540 | |||
541 | static void nvme_config_discard(struct nvme_ns *ns) | ||
542 | { | ||
543 | u32 logical_block_size = queue_logical_block_size(ns->queue); | ||
544 | ns->queue->limits.discard_zeroes_data = 0; | ||
545 | ns->queue->limits.discard_alignment = logical_block_size; | ||
546 | ns->queue->limits.discard_granularity = logical_block_size; | ||
547 | blk_queue_max_discard_sectors(ns->queue, 0xffffffff); | ||
548 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); | ||
549 | } | ||
550 | |||
551 | static int nvme_revalidate_disk(struct gendisk *disk) | ||
552 | { | ||
553 | struct nvme_ns *ns = disk->private_data; | ||
554 | struct nvme_id_ns *id; | ||
555 | u8 lbaf, pi_type; | ||
556 | u16 old_ms; | ||
557 | unsigned short bs; | ||
558 | |||
559 | if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) { | ||
560 | dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n", | ||
561 | __func__, ns->ctrl->instance, ns->ns_id); | ||
562 | return -ENODEV; | ||
563 | } | ||
564 | if (id->ncap == 0) { | ||
565 | kfree(id); | ||
566 | return -ENODEV; | ||
567 | } | ||
568 | |||
569 | if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { | ||
570 | if (nvme_nvm_register(ns->queue, disk->disk_name)) { | ||
571 | dev_warn(ns->ctrl->dev, | ||
572 | "%s: LightNVM init failure\n", __func__); | ||
573 | kfree(id); | ||
574 | return -ENODEV; | ||
575 | } | ||
576 | ns->type = NVME_NS_LIGHTNVM; | ||
577 | } | ||
578 | |||
579 | if (ns->ctrl->vs >= NVME_VS(1, 1)) | ||
580 | memcpy(ns->eui, id->eui64, sizeof(ns->eui)); | ||
581 | if (ns->ctrl->vs >= NVME_VS(1, 2)) | ||
582 | memcpy(ns->uuid, id->nguid, sizeof(ns->uuid)); | ||
583 | |||
584 | old_ms = ns->ms; | ||
585 | lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; | ||
586 | ns->lba_shift = id->lbaf[lbaf].ds; | ||
587 | ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); | ||
588 | ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); | ||
589 | |||
590 | /* | ||
591 | * If identify namespace failed, use default 512 byte block size so | ||
592 | * block layer can use before failing read/write for 0 capacity. | ||
593 | */ | ||
594 | if (ns->lba_shift == 0) | ||
595 | ns->lba_shift = 9; | ||
596 | bs = 1 << ns->lba_shift; | ||
597 | /* XXX: PI implementation requires metadata equal t10 pi tuple size */ | ||
598 | pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? | ||
599 | id->dps & NVME_NS_DPS_PI_MASK : 0; | ||
600 | |||
601 | blk_mq_freeze_queue(disk->queue); | ||
602 | if (blk_get_integrity(disk) && (ns->pi_type != pi_type || | ||
603 | ns->ms != old_ms || | ||
604 | bs != queue_logical_block_size(disk->queue) || | ||
605 | (ns->ms && ns->ext))) | ||
606 | blk_integrity_unregister(disk); | ||
607 | |||
608 | ns->pi_type = pi_type; | ||
609 | blk_queue_logical_block_size(ns->queue, bs); | ||
610 | |||
611 | if (ns->ms && !blk_get_integrity(disk) && !ns->ext) | ||
612 | nvme_init_integrity(ns); | ||
613 | if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk)) | ||
614 | set_capacity(disk, 0); | ||
615 | else | ||
616 | set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); | ||
617 | |||
618 | if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM) | ||
619 | nvme_config_discard(ns); | ||
620 | blk_mq_unfreeze_queue(disk->queue); | ||
621 | |||
622 | kfree(id); | ||
623 | return 0; | ||
624 | } | ||
625 | |||
626 | static char nvme_pr_type(enum pr_type type) | ||
627 | { | ||
628 | switch (type) { | ||
629 | case PR_WRITE_EXCLUSIVE: | ||
630 | return 1; | ||
631 | case PR_EXCLUSIVE_ACCESS: | ||
632 | return 2; | ||
633 | case PR_WRITE_EXCLUSIVE_REG_ONLY: | ||
634 | return 3; | ||
635 | case PR_EXCLUSIVE_ACCESS_REG_ONLY: | ||
636 | return 4; | ||
637 | case PR_WRITE_EXCLUSIVE_ALL_REGS: | ||
638 | return 5; | ||
639 | case PR_EXCLUSIVE_ACCESS_ALL_REGS: | ||
640 | return 6; | ||
641 | default: | ||
642 | return 0; | ||
643 | } | ||
644 | }; | ||
645 | |||
646 | static int nvme_pr_command(struct block_device *bdev, u32 cdw10, | ||
647 | u64 key, u64 sa_key, u8 op) | ||
648 | { | ||
649 | struct nvme_ns *ns = bdev->bd_disk->private_data; | ||
650 | struct nvme_command c; | ||
651 | u8 data[16] = { 0, }; | ||
652 | |||
653 | put_unaligned_le64(key, &data[0]); | ||
654 | put_unaligned_le64(sa_key, &data[8]); | ||
655 | |||
656 | memset(&c, 0, sizeof(c)); | ||
657 | c.common.opcode = op; | ||
658 | c.common.nsid = cpu_to_le32(ns->ns_id); | ||
659 | c.common.cdw10[0] = cpu_to_le32(cdw10); | ||
660 | |||
661 | return nvme_submit_sync_cmd(ns->queue, &c, data, 16); | ||
662 | } | ||
663 | |||
664 | static int nvme_pr_register(struct block_device *bdev, u64 old, | ||
665 | u64 new, unsigned flags) | ||
666 | { | ||
667 | u32 cdw10; | ||
668 | |||
669 | if (flags & ~PR_FL_IGNORE_KEY) | ||
670 | return -EOPNOTSUPP; | ||
671 | |||
672 | cdw10 = old ? 2 : 0; | ||
673 | cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; | ||
674 | cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ | ||
675 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); | ||
676 | } | ||
677 | |||
678 | static int nvme_pr_reserve(struct block_device *bdev, u64 key, | ||
679 | enum pr_type type, unsigned flags) | ||
680 | { | ||
681 | u32 cdw10; | ||
682 | |||
683 | if (flags & ~PR_FL_IGNORE_KEY) | ||
684 | return -EOPNOTSUPP; | ||
685 | |||
686 | cdw10 = nvme_pr_type(type) << 8; | ||
687 | cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); | ||
688 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); | ||
689 | } | ||
690 | |||
691 | static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, | ||
692 | enum pr_type type, bool abort) | ||
693 | { | ||
694 | u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; | ||
695 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); | ||
696 | } | ||
697 | |||
698 | static int nvme_pr_clear(struct block_device *bdev, u64 key) | ||
699 | { | ||
700 | u32 cdw10 = 1 | (key ? 1 << 3 : 0); | ||
701 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); | ||
702 | } | ||
703 | |||
704 | static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) | ||
705 | { | ||
706 | u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; | ||
707 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); | ||
708 | } | ||
709 | |||
710 | static const struct pr_ops nvme_pr_ops = { | ||
711 | .pr_register = nvme_pr_register, | ||
712 | .pr_reserve = nvme_pr_reserve, | ||
713 | .pr_release = nvme_pr_release, | ||
714 | .pr_preempt = nvme_pr_preempt, | ||
715 | .pr_clear = nvme_pr_clear, | ||
716 | }; | ||
717 | |||
718 | static const struct block_device_operations nvme_fops = { | ||
719 | .owner = THIS_MODULE, | ||
720 | .ioctl = nvme_ioctl, | ||
721 | .compat_ioctl = nvme_compat_ioctl, | ||
722 | .open = nvme_open, | ||
723 | .release = nvme_release, | ||
724 | .getgeo = nvme_getgeo, | ||
725 | .revalidate_disk= nvme_revalidate_disk, | ||
726 | .pr_ops = &nvme_pr_ops, | ||
727 | }; | ||
728 | |||
729 | static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) | ||
730 | { | ||
731 | unsigned long timeout = | ||
732 | ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; | ||
733 | u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; | ||
734 | int ret; | ||
735 | |||
736 | while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { | ||
737 | if ((csts & NVME_CSTS_RDY) == bit) | ||
738 | break; | ||
739 | |||
740 | msleep(100); | ||
741 | if (fatal_signal_pending(current)) | ||
742 | return -EINTR; | ||
743 | if (time_after(jiffies, timeout)) { | ||
744 | dev_err(ctrl->dev, | ||
745 | "Device not ready; aborting %s\n", enabled ? | ||
746 | "initialisation" : "reset"); | ||
747 | return -ENODEV; | ||
748 | } | ||
749 | } | ||
750 | |||
751 | return ret; | ||
752 | } | ||
753 | |||
754 | /* | ||
755 | * If the device has been passed off to us in an enabled state, just clear | ||
756 | * the enabled bit. The spec says we should set the 'shutdown notification | ||
757 | * bits', but doing so may cause the device to complete commands to the | ||
758 | * admin queue ... and we don't know what memory that might be pointing at! | ||
759 | */ | ||
760 | int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap) | ||
761 | { | ||
762 | int ret; | ||
763 | |||
764 | ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; | ||
765 | ctrl->ctrl_config &= ~NVME_CC_ENABLE; | ||
766 | |||
767 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); | ||
768 | if (ret) | ||
769 | return ret; | ||
770 | return nvme_wait_ready(ctrl, cap, false); | ||
771 | } | ||
772 | |||
773 | int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) | ||
774 | { | ||
775 | /* | ||
776 | * Default to a 4K page size, with the intention to update this | ||
777 | * path in the future to accomodate architectures with differing | ||
778 | * kernel and IO page sizes. | ||
779 | */ | ||
780 | unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12; | ||
781 | int ret; | ||
782 | |||
783 | if (page_shift < dev_page_min) { | ||
784 | dev_err(ctrl->dev, | ||
785 | "Minimum device page size %u too large for host (%u)\n", | ||
786 | 1 << dev_page_min, 1 << page_shift); | ||
787 | return -ENODEV; | ||
788 | } | ||
789 | |||
790 | ctrl->page_size = 1 << page_shift; | ||
791 | |||
792 | ctrl->ctrl_config = NVME_CC_CSS_NVM; | ||
793 | ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; | ||
794 | ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; | ||
795 | ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; | ||
796 | ctrl->ctrl_config |= NVME_CC_ENABLE; | ||
797 | |||
798 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); | ||
799 | if (ret) | ||
800 | return ret; | ||
801 | return nvme_wait_ready(ctrl, cap, true); | ||
802 | } | ||
803 | |||
804 | int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) | ||
805 | { | ||
806 | unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies; | ||
807 | u32 csts; | ||
808 | int ret; | ||
809 | |||
810 | ctrl->ctrl_config &= ~NVME_CC_SHN_MASK; | ||
811 | ctrl->ctrl_config |= NVME_CC_SHN_NORMAL; | ||
812 | |||
813 | ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); | ||
814 | if (ret) | ||
815 | return ret; | ||
816 | |||
817 | while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { | ||
818 | if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT) | ||
819 | break; | ||
820 | |||
821 | msleep(100); | ||
822 | if (fatal_signal_pending(current)) | ||
823 | return -EINTR; | ||
824 | if (time_after(jiffies, timeout)) { | ||
825 | dev_err(ctrl->dev, | ||
826 | "Device shutdown incomplete; abort shutdown\n"); | ||
827 | return -ENODEV; | ||
828 | } | ||
829 | } | ||
830 | |||
831 | return ret; | ||
832 | } | ||
833 | |||
834 | /* | ||
835 | * Initialize the cached copies of the Identify data and various controller | ||
836 | * register in our nvme_ctrl structure. This should be called as soon as | ||
837 | * the admin queue is fully up and running. | ||
838 | */ | ||
839 | int nvme_init_identify(struct nvme_ctrl *ctrl) | ||
840 | { | ||
841 | struct nvme_id_ctrl *id; | ||
842 | u64 cap; | ||
843 | int ret, page_shift; | ||
844 | |||
845 | ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); | ||
846 | if (ret) { | ||
847 | dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret); | ||
848 | return ret; | ||
849 | } | ||
850 | |||
851 | ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap); | ||
852 | if (ret) { | ||
853 | dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret); | ||
854 | return ret; | ||
855 | } | ||
856 | page_shift = NVME_CAP_MPSMIN(cap) + 12; | ||
857 | |||
858 | if (ctrl->vs >= NVME_VS(1, 1)) | ||
859 | ctrl->subsystem = NVME_CAP_NSSRC(cap); | ||
860 | |||
861 | ret = nvme_identify_ctrl(ctrl, &id); | ||
862 | if (ret) { | ||
863 | dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret); | ||
864 | return -EIO; | ||
865 | } | ||
866 | |||
867 | ctrl->oncs = le16_to_cpup(&id->oncs); | ||
868 | atomic_set(&ctrl->abort_limit, id->acl + 1); | ||
869 | ctrl->vwc = id->vwc; | ||
870 | memcpy(ctrl->serial, id->sn, sizeof(id->sn)); | ||
871 | memcpy(ctrl->model, id->mn, sizeof(id->mn)); | ||
872 | memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr)); | ||
873 | if (id->mdts) | ||
874 | ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9); | ||
875 | else | ||
876 | ctrl->max_hw_sectors = UINT_MAX; | ||
877 | |||
878 | if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) { | ||
879 | unsigned int max_hw_sectors; | ||
880 | |||
881 | ctrl->stripe_size = 1 << (id->vs[3] + page_shift); | ||
882 | max_hw_sectors = ctrl->stripe_size >> (page_shift - 9); | ||
883 | if (ctrl->max_hw_sectors) { | ||
884 | ctrl->max_hw_sectors = min(max_hw_sectors, | ||
885 | ctrl->max_hw_sectors); | ||
886 | } else { | ||
887 | ctrl->max_hw_sectors = max_hw_sectors; | ||
888 | } | ||
889 | } | ||
890 | |||
891 | kfree(id); | ||
892 | return 0; | ||
893 | } | ||
894 | |||
895 | static int nvme_dev_open(struct inode *inode, struct file *file) | ||
896 | { | ||
897 | struct nvme_ctrl *ctrl; | ||
898 | int instance = iminor(inode); | ||
899 | int ret = -ENODEV; | ||
900 | |||
901 | spin_lock(&dev_list_lock); | ||
902 | list_for_each_entry(ctrl, &nvme_ctrl_list, node) { | ||
903 | if (ctrl->instance != instance) | ||
904 | continue; | ||
905 | |||
906 | if (!ctrl->admin_q) { | ||
907 | ret = -EWOULDBLOCK; | ||
908 | break; | ||
909 | } | ||
910 | if (!kref_get_unless_zero(&ctrl->kref)) | ||
911 | break; | ||
912 | file->private_data = ctrl; | ||
913 | ret = 0; | ||
914 | break; | ||
915 | } | ||
916 | spin_unlock(&dev_list_lock); | ||
917 | |||
918 | return ret; | ||
919 | } | ||
920 | |||
921 | static int nvme_dev_release(struct inode *inode, struct file *file) | ||
922 | { | ||
923 | nvme_put_ctrl(file->private_data); | ||
924 | return 0; | ||
925 | } | ||
926 | |||
927 | static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) | ||
928 | { | ||
929 | struct nvme_ns *ns; | ||
930 | int ret; | ||
931 | |||
932 | mutex_lock(&ctrl->namespaces_mutex); | ||
933 | if (list_empty(&ctrl->namespaces)) { | ||
934 | ret = -ENOTTY; | ||
935 | goto out_unlock; | ||
936 | } | ||
937 | |||
938 | ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); | ||
939 | if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { | ||
940 | dev_warn(ctrl->dev, | ||
941 | "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); | ||
942 | ret = -EINVAL; | ||
943 | goto out_unlock; | ||
944 | } | ||
945 | |||
946 | dev_warn(ctrl->dev, | ||
947 | "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); | ||
948 | kref_get(&ns->kref); | ||
949 | mutex_unlock(&ctrl->namespaces_mutex); | ||
950 | |||
951 | ret = nvme_user_cmd(ctrl, ns, argp); | ||
952 | nvme_put_ns(ns); | ||
953 | return ret; | ||
954 | |||
955 | out_unlock: | ||
956 | mutex_unlock(&ctrl->namespaces_mutex); | ||
957 | return ret; | ||
958 | } | ||
959 | |||
960 | static long nvme_dev_ioctl(struct file *file, unsigned int cmd, | ||
961 | unsigned long arg) | ||
962 | { | ||
963 | struct nvme_ctrl *ctrl = file->private_data; | ||
964 | void __user *argp = (void __user *)arg; | ||
965 | |||
966 | switch (cmd) { | ||
967 | case NVME_IOCTL_ADMIN_CMD: | ||
968 | return nvme_user_cmd(ctrl, NULL, argp); | ||
969 | case NVME_IOCTL_IO_CMD: | ||
970 | return nvme_dev_user_cmd(ctrl, argp); | ||
971 | case NVME_IOCTL_RESET: | ||
972 | dev_warn(ctrl->dev, "resetting controller\n"); | ||
973 | return ctrl->ops->reset_ctrl(ctrl); | ||
974 | case NVME_IOCTL_SUBSYS_RESET: | ||
975 | return nvme_reset_subsystem(ctrl); | ||
976 | default: | ||
977 | return -ENOTTY; | ||
978 | } | ||
979 | } | ||
980 | |||
981 | static const struct file_operations nvme_dev_fops = { | ||
982 | .owner = THIS_MODULE, | ||
983 | .open = nvme_dev_open, | ||
984 | .release = nvme_dev_release, | ||
985 | .unlocked_ioctl = nvme_dev_ioctl, | ||
986 | .compat_ioctl = nvme_dev_ioctl, | ||
987 | }; | ||
988 | |||
989 | static ssize_t nvme_sysfs_reset(struct device *dev, | ||
990 | struct device_attribute *attr, const char *buf, | ||
991 | size_t count) | ||
992 | { | ||
993 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); | ||
994 | int ret; | ||
995 | |||
996 | ret = ctrl->ops->reset_ctrl(ctrl); | ||
997 | if (ret < 0) | ||
998 | return ret; | ||
999 | return count; | ||
1000 | } | ||
1001 | static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); | ||
1002 | |||
1003 | static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, | ||
1004 | char *buf) | ||
1005 | { | ||
1006 | struct nvme_ns *ns = dev_to_disk(dev)->private_data; | ||
1007 | return sprintf(buf, "%pU\n", ns->uuid); | ||
1008 | } | ||
1009 | static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL); | ||
1010 | |||
1011 | static ssize_t eui_show(struct device *dev, struct device_attribute *attr, | ||
1012 | char *buf) | ||
1013 | { | ||
1014 | struct nvme_ns *ns = dev_to_disk(dev)->private_data; | ||
1015 | return sprintf(buf, "%8phd\n", ns->eui); | ||
1016 | } | ||
1017 | static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL); | ||
1018 | |||
1019 | static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, | ||
1020 | char *buf) | ||
1021 | { | ||
1022 | struct nvme_ns *ns = dev_to_disk(dev)->private_data; | ||
1023 | return sprintf(buf, "%d\n", ns->ns_id); | ||
1024 | } | ||
1025 | static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL); | ||
1026 | |||
1027 | static struct attribute *nvme_ns_attrs[] = { | ||
1028 | &dev_attr_uuid.attr, | ||
1029 | &dev_attr_eui.attr, | ||
1030 | &dev_attr_nsid.attr, | ||
1031 | NULL, | ||
1032 | }; | ||
1033 | |||
1034 | static umode_t nvme_attrs_are_visible(struct kobject *kobj, | ||
1035 | struct attribute *a, int n) | ||
1036 | { | ||
1037 | struct device *dev = container_of(kobj, struct device, kobj); | ||
1038 | struct nvme_ns *ns = dev_to_disk(dev)->private_data; | ||
1039 | |||
1040 | if (a == &dev_attr_uuid.attr) { | ||
1041 | if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid))) | ||
1042 | return 0; | ||
1043 | } | ||
1044 | if (a == &dev_attr_eui.attr) { | ||
1045 | if (!memchr_inv(ns->eui, 0, sizeof(ns->eui))) | ||
1046 | return 0; | ||
1047 | } | ||
1048 | return a->mode; | ||
1049 | } | ||
1050 | |||
1051 | static const struct attribute_group nvme_ns_attr_group = { | ||
1052 | .attrs = nvme_ns_attrs, | ||
1053 | .is_visible = nvme_attrs_are_visible, | ||
1054 | }; | ||
1055 | |||
1056 | #define nvme_show_function(field) \ | ||
1057 | static ssize_t field##_show(struct device *dev, \ | ||
1058 | struct device_attribute *attr, char *buf) \ | ||
1059 | { \ | ||
1060 | struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ | ||
1061 | return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \ | ||
1062 | } \ | ||
1063 | static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); | ||
1064 | |||
1065 | nvme_show_function(model); | ||
1066 | nvme_show_function(serial); | ||
1067 | nvme_show_function(firmware_rev); | ||
1068 | |||
1069 | static struct attribute *nvme_dev_attrs[] = { | ||
1070 | &dev_attr_reset_controller.attr, | ||
1071 | &dev_attr_model.attr, | ||
1072 | &dev_attr_serial.attr, | ||
1073 | &dev_attr_firmware_rev.attr, | ||
1074 | NULL | ||
1075 | }; | ||
1076 | |||
1077 | static struct attribute_group nvme_dev_attrs_group = { | ||
1078 | .attrs = nvme_dev_attrs, | ||
1079 | }; | ||
1080 | |||
1081 | static const struct attribute_group *nvme_dev_attr_groups[] = { | ||
1082 | &nvme_dev_attrs_group, | ||
1083 | NULL, | ||
1084 | }; | ||
1085 | |||
1086 | static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
1087 | { | ||
1088 | struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); | ||
1089 | struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); | ||
1090 | |||
1091 | return nsa->ns_id - nsb->ns_id; | ||
1092 | } | ||
1093 | |||
1094 | static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid) | ||
1095 | { | ||
1096 | struct nvme_ns *ns; | ||
1097 | |||
1098 | lockdep_assert_held(&ctrl->namespaces_mutex); | ||
1099 | |||
1100 | list_for_each_entry(ns, &ctrl->namespaces, list) { | ||
1101 | if (ns->ns_id == nsid) | ||
1102 | return ns; | ||
1103 | if (ns->ns_id > nsid) | ||
1104 | break; | ||
1105 | } | ||
1106 | return NULL; | ||
1107 | } | ||
1108 | |||
1109 | static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | ||
1110 | { | ||
1111 | struct nvme_ns *ns; | ||
1112 | struct gendisk *disk; | ||
1113 | int node = dev_to_node(ctrl->dev); | ||
1114 | |||
1115 | lockdep_assert_held(&ctrl->namespaces_mutex); | ||
1116 | |||
1117 | ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); | ||
1118 | if (!ns) | ||
1119 | return; | ||
1120 | |||
1121 | ns->queue = blk_mq_init_queue(ctrl->tagset); | ||
1122 | if (IS_ERR(ns->queue)) | ||
1123 | goto out_free_ns; | ||
1124 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); | ||
1125 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); | ||
1126 | ns->queue->queuedata = ns; | ||
1127 | ns->ctrl = ctrl; | ||
1128 | |||
1129 | disk = alloc_disk_node(0, node); | ||
1130 | if (!disk) | ||
1131 | goto out_free_queue; | ||
1132 | |||
1133 | kref_init(&ns->kref); | ||
1134 | ns->ns_id = nsid; | ||
1135 | ns->disk = disk; | ||
1136 | ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ | ||
1137 | |||
1138 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | ||
1139 | if (ctrl->max_hw_sectors) { | ||
1140 | blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors); | ||
1141 | blk_queue_max_segments(ns->queue, | ||
1142 | (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1); | ||
1143 | } | ||
1144 | if (ctrl->stripe_size) | ||
1145 | blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9); | ||
1146 | if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) | ||
1147 | blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); | ||
1148 | blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1); | ||
1149 | |||
1150 | disk->major = nvme_major; | ||
1151 | disk->first_minor = 0; | ||
1152 | disk->fops = &nvme_fops; | ||
1153 | disk->private_data = ns; | ||
1154 | disk->queue = ns->queue; | ||
1155 | disk->driverfs_dev = ctrl->device; | ||
1156 | disk->flags = GENHD_FL_EXT_DEVT; | ||
1157 | sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid); | ||
1158 | |||
1159 | if (nvme_revalidate_disk(ns->disk)) | ||
1160 | goto out_free_disk; | ||
1161 | |||
1162 | list_add_tail(&ns->list, &ctrl->namespaces); | ||
1163 | kref_get(&ctrl->kref); | ||
1164 | if (ns->type == NVME_NS_LIGHTNVM) | ||
1165 | return; | ||
1166 | |||
1167 | add_disk(ns->disk); | ||
1168 | if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, | ||
1169 | &nvme_ns_attr_group)) | ||
1170 | pr_warn("%s: failed to create sysfs group for identification\n", | ||
1171 | ns->disk->disk_name); | ||
1172 | return; | ||
1173 | out_free_disk: | ||
1174 | kfree(disk); | ||
1175 | out_free_queue: | ||
1176 | blk_cleanup_queue(ns->queue); | ||
1177 | out_free_ns: | ||
1178 | kfree(ns); | ||
1179 | } | ||
1180 | |||
1181 | static void nvme_ns_remove(struct nvme_ns *ns) | ||
1182 | { | ||
1183 | bool kill = nvme_io_incapable(ns->ctrl) && | ||
1184 | !blk_queue_dying(ns->queue); | ||
1185 | |||
1186 | lockdep_assert_held(&ns->ctrl->namespaces_mutex); | ||
1187 | |||
1188 | if (kill) { | ||
1189 | blk_set_queue_dying(ns->queue); | ||
1190 | |||
1191 | /* | ||
1192 | * The controller was shutdown first if we got here through | ||
1193 | * device removal. The shutdown may requeue outstanding | ||
1194 | * requests. These need to be aborted immediately so | ||
1195 | * del_gendisk doesn't block indefinitely for their completion. | ||
1196 | */ | ||
1197 | blk_mq_abort_requeue_list(ns->queue); | ||
1198 | } | ||
1199 | if (ns->disk->flags & GENHD_FL_UP) { | ||
1200 | if (blk_get_integrity(ns->disk)) | ||
1201 | blk_integrity_unregister(ns->disk); | ||
1202 | sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, | ||
1203 | &nvme_ns_attr_group); | ||
1204 | del_gendisk(ns->disk); | ||
1205 | } | ||
1206 | if (kill || !blk_queue_dying(ns->queue)) { | ||
1207 | blk_mq_abort_requeue_list(ns->queue); | ||
1208 | blk_cleanup_queue(ns->queue); | ||
1209 | } | ||
1210 | list_del_init(&ns->list); | ||
1211 | nvme_put_ns(ns); | ||
1212 | } | ||
1213 | |||
1214 | static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) | ||
1215 | { | ||
1216 | struct nvme_ns *ns; | ||
1217 | |||
1218 | ns = nvme_find_ns(ctrl, nsid); | ||
1219 | if (ns) { | ||
1220 | if (revalidate_disk(ns->disk)) | ||
1221 | nvme_ns_remove(ns); | ||
1222 | } else | ||
1223 | nvme_alloc_ns(ctrl, nsid); | ||
1224 | } | ||
1225 | |||
1226 | static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) | ||
1227 | { | ||
1228 | struct nvme_ns *ns; | ||
1229 | __le32 *ns_list; | ||
1230 | unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); | ||
1231 | int ret = 0; | ||
1232 | |||
1233 | ns_list = kzalloc(0x1000, GFP_KERNEL); | ||
1234 | if (!ns_list) | ||
1235 | return -ENOMEM; | ||
1236 | |||
1237 | for (i = 0; i < num_lists; i++) { | ||
1238 | ret = nvme_identify_ns_list(ctrl, prev, ns_list); | ||
1239 | if (ret) | ||
1240 | goto out; | ||
1241 | |||
1242 | for (j = 0; j < min(nn, 1024U); j++) { | ||
1243 | nsid = le32_to_cpu(ns_list[j]); | ||
1244 | if (!nsid) | ||
1245 | goto out; | ||
1246 | |||
1247 | nvme_validate_ns(ctrl, nsid); | ||
1248 | |||
1249 | while (++prev < nsid) { | ||
1250 | ns = nvme_find_ns(ctrl, prev); | ||
1251 | if (ns) | ||
1252 | nvme_ns_remove(ns); | ||
1253 | } | ||
1254 | } | ||
1255 | nn -= j; | ||
1256 | } | ||
1257 | out: | ||
1258 | kfree(ns_list); | ||
1259 | return ret; | ||
1260 | } | ||
1261 | |||
1262 | static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn) | ||
1263 | { | ||
1264 | struct nvme_ns *ns, *next; | ||
1265 | unsigned i; | ||
1266 | |||
1267 | lockdep_assert_held(&ctrl->namespaces_mutex); | ||
1268 | |||
1269 | for (i = 1; i <= nn; i++) | ||
1270 | nvme_validate_ns(ctrl, i); | ||
1271 | |||
1272 | list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { | ||
1273 | if (ns->ns_id > nn) | ||
1274 | nvme_ns_remove(ns); | ||
1275 | } | ||
1276 | } | ||
1277 | |||
1278 | void nvme_scan_namespaces(struct nvme_ctrl *ctrl) | ||
1279 | { | ||
1280 | struct nvme_id_ctrl *id; | ||
1281 | unsigned nn; | ||
1282 | |||
1283 | if (nvme_identify_ctrl(ctrl, &id)) | ||
1284 | return; | ||
1285 | |||
1286 | mutex_lock(&ctrl->namespaces_mutex); | ||
1287 | nn = le32_to_cpu(id->nn); | ||
1288 | if (ctrl->vs >= NVME_VS(1, 1) && | ||
1289 | !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { | ||
1290 | if (!nvme_scan_ns_list(ctrl, nn)) | ||
1291 | goto done; | ||
1292 | } | ||
1293 | __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn)); | ||
1294 | done: | ||
1295 | list_sort(NULL, &ctrl->namespaces, ns_cmp); | ||
1296 | mutex_unlock(&ctrl->namespaces_mutex); | ||
1297 | kfree(id); | ||
1298 | } | ||
1299 | |||
1300 | void nvme_remove_namespaces(struct nvme_ctrl *ctrl) | ||
1301 | { | ||
1302 | struct nvme_ns *ns, *next; | ||
1303 | |||
1304 | mutex_lock(&ctrl->namespaces_mutex); | ||
1305 | list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) | ||
1306 | nvme_ns_remove(ns); | ||
1307 | mutex_unlock(&ctrl->namespaces_mutex); | ||
1308 | } | ||
1309 | |||
1310 | static DEFINE_IDA(nvme_instance_ida); | ||
1311 | |||
1312 | static int nvme_set_instance(struct nvme_ctrl *ctrl) | ||
1313 | { | ||
1314 | int instance, error; | ||
1315 | |||
1316 | do { | ||
1317 | if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) | ||
1318 | return -ENODEV; | ||
1319 | |||
1320 | spin_lock(&dev_list_lock); | ||
1321 | error = ida_get_new(&nvme_instance_ida, &instance); | ||
1322 | spin_unlock(&dev_list_lock); | ||
1323 | } while (error == -EAGAIN); | ||
1324 | |||
1325 | if (error) | ||
1326 | return -ENODEV; | ||
1327 | |||
1328 | ctrl->instance = instance; | ||
1329 | return 0; | ||
1330 | } | ||
1331 | |||
1332 | static void nvme_release_instance(struct nvme_ctrl *ctrl) | ||
1333 | { | ||
1334 | spin_lock(&dev_list_lock); | ||
1335 | ida_remove(&nvme_instance_ida, ctrl->instance); | ||
1336 | spin_unlock(&dev_list_lock); | ||
1337 | } | ||
1338 | |||
1339 | void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) | ||
1340 | { | ||
1341 | device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance)); | ||
1342 | |||
1343 | spin_lock(&dev_list_lock); | ||
1344 | list_del(&ctrl->node); | ||
1345 | spin_unlock(&dev_list_lock); | ||
1346 | } | ||
1347 | |||
1348 | static void nvme_free_ctrl(struct kref *kref) | ||
1349 | { | ||
1350 | struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref); | ||
1351 | |||
1352 | put_device(ctrl->device); | ||
1353 | nvme_release_instance(ctrl); | ||
1354 | |||
1355 | ctrl->ops->free_ctrl(ctrl); | ||
1356 | } | ||
1357 | |||
1358 | void nvme_put_ctrl(struct nvme_ctrl *ctrl) | ||
1359 | { | ||
1360 | kref_put(&ctrl->kref, nvme_free_ctrl); | ||
1361 | } | ||
1362 | |||
1363 | /* | ||
1364 | * Initialize a NVMe controller structures. This needs to be called during | ||
1365 | * earliest initialization so that we have the initialized structured around | ||
1366 | * during probing. | ||
1367 | */ | ||
1368 | int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, | ||
1369 | const struct nvme_ctrl_ops *ops, unsigned long quirks) | ||
1370 | { | ||
1371 | int ret; | ||
1372 | |||
1373 | INIT_LIST_HEAD(&ctrl->namespaces); | ||
1374 | mutex_init(&ctrl->namespaces_mutex); | ||
1375 | kref_init(&ctrl->kref); | ||
1376 | ctrl->dev = dev; | ||
1377 | ctrl->ops = ops; | ||
1378 | ctrl->quirks = quirks; | ||
1379 | |||
1380 | ret = nvme_set_instance(ctrl); | ||
1381 | if (ret) | ||
1382 | goto out; | ||
1383 | |||
1384 | ctrl->device = device_create_with_groups(nvme_class, ctrl->dev, | ||
1385 | MKDEV(nvme_char_major, ctrl->instance), | ||
1386 | dev, nvme_dev_attr_groups, | ||
1387 | "nvme%d", ctrl->instance); | ||
1388 | if (IS_ERR(ctrl->device)) { | ||
1389 | ret = PTR_ERR(ctrl->device); | ||
1390 | goto out_release_instance; | ||
1391 | } | ||
1392 | get_device(ctrl->device); | ||
1393 | dev_set_drvdata(ctrl->device, ctrl); | ||
1394 | |||
1395 | spin_lock(&dev_list_lock); | ||
1396 | list_add_tail(&ctrl->node, &nvme_ctrl_list); | ||
1397 | spin_unlock(&dev_list_lock); | ||
1398 | |||
1399 | return 0; | ||
1400 | out_release_instance: | ||
1401 | nvme_release_instance(ctrl); | ||
1402 | out: | ||
1403 | return ret; | ||
1404 | } | ||
1405 | |||
1406 | void nvme_stop_queues(struct nvme_ctrl *ctrl) | ||
1407 | { | ||
1408 | struct nvme_ns *ns; | ||
1409 | |||
1410 | mutex_lock(&ctrl->namespaces_mutex); | ||
1411 | list_for_each_entry(ns, &ctrl->namespaces, list) { | ||
1412 | spin_lock_irq(ns->queue->queue_lock); | ||
1413 | queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); | ||
1414 | spin_unlock_irq(ns->queue->queue_lock); | ||
1415 | |||
1416 | blk_mq_cancel_requeue_work(ns->queue); | ||
1417 | blk_mq_stop_hw_queues(ns->queue); | ||
1418 | } | ||
1419 | mutex_unlock(&ctrl->namespaces_mutex); | ||
1420 | } | ||
1421 | |||
1422 | void nvme_start_queues(struct nvme_ctrl *ctrl) | ||
1423 | { | ||
1424 | struct nvme_ns *ns; | ||
1425 | |||
1426 | mutex_lock(&ctrl->namespaces_mutex); | ||
1427 | list_for_each_entry(ns, &ctrl->namespaces, list) { | ||
1428 | queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); | ||
1429 | blk_mq_start_stopped_hw_queues(ns->queue, true); | ||
1430 | blk_mq_kick_requeue_list(ns->queue); | ||
1431 | } | ||
1432 | mutex_unlock(&ctrl->namespaces_mutex); | ||
1433 | } | ||
1434 | |||
1435 | int __init nvme_core_init(void) | ||
1436 | { | ||
1437 | int result; | ||
1438 | |||
1439 | result = register_blkdev(nvme_major, "nvme"); | ||
1440 | if (result < 0) | ||
1441 | return result; | ||
1442 | else if (result > 0) | ||
1443 | nvme_major = result; | ||
1444 | |||
1445 | result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", | ||
1446 | &nvme_dev_fops); | ||
1447 | if (result < 0) | ||
1448 | goto unregister_blkdev; | ||
1449 | else if (result > 0) | ||
1450 | nvme_char_major = result; | ||
1451 | |||
1452 | nvme_class = class_create(THIS_MODULE, "nvme"); | ||
1453 | if (IS_ERR(nvme_class)) { | ||
1454 | result = PTR_ERR(nvme_class); | ||
1455 | goto unregister_chrdev; | ||
1456 | } | ||
1457 | |||
1458 | return 0; | ||
1459 | |||
1460 | unregister_chrdev: | ||
1461 | __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); | ||
1462 | unregister_blkdev: | ||
1463 | unregister_blkdev(nvme_major, "nvme"); | ||
1464 | return result; | ||
1465 | } | ||
1466 | |||
1467 | void nvme_core_exit(void) | ||
1468 | { | ||
1469 | unregister_blkdev(nvme_major, "nvme"); | ||
1470 | class_destroy(nvme_class); | ||
1471 | __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); | ||
1472 | } | ||
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 71f2bbc865cf..5cd3725e2fa4 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c | |||
@@ -294,7 +294,6 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) | |||
294 | static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) | 294 | static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) |
295 | { | 295 | { |
296 | struct nvme_ns *ns = nvmdev->q->queuedata; | 296 | struct nvme_ns *ns = nvmdev->q->queuedata; |
297 | struct nvme_dev *dev = ns->dev; | ||
298 | struct nvme_nvm_id *nvme_nvm_id; | 297 | struct nvme_nvm_id *nvme_nvm_id; |
299 | struct nvme_nvm_command c = {}; | 298 | struct nvme_nvm_command c = {}; |
300 | int ret; | 299 | int ret; |
@@ -307,7 +306,7 @@ static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) | |||
307 | if (!nvme_nvm_id) | 306 | if (!nvme_nvm_id) |
308 | return -ENOMEM; | 307 | return -ENOMEM; |
309 | 308 | ||
310 | ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, | 309 | ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, |
311 | nvme_nvm_id, sizeof(struct nvme_nvm_id)); | 310 | nvme_nvm_id, sizeof(struct nvme_nvm_id)); |
312 | if (ret) { | 311 | if (ret) { |
313 | ret = -EIO; | 312 | ret = -EIO; |
@@ -332,9 +331,8 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, | |||
332 | nvm_l2p_update_fn *update_l2p, void *priv) | 331 | nvm_l2p_update_fn *update_l2p, void *priv) |
333 | { | 332 | { |
334 | struct nvme_ns *ns = nvmdev->q->queuedata; | 333 | struct nvme_ns *ns = nvmdev->q->queuedata; |
335 | struct nvme_dev *dev = ns->dev; | ||
336 | struct nvme_nvm_command c = {}; | 334 | struct nvme_nvm_command c = {}; |
337 | u32 len = queue_max_hw_sectors(dev->admin_q) << 9; | 335 | u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9; |
338 | u32 nlb_pr_rq = len / sizeof(u64); | 336 | u32 nlb_pr_rq = len / sizeof(u64); |
339 | u64 cmd_slba = slba; | 337 | u64 cmd_slba = slba; |
340 | void *entries; | 338 | void *entries; |
@@ -352,10 +350,10 @@ static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, | |||
352 | c.l2p.slba = cpu_to_le64(cmd_slba); | 350 | c.l2p.slba = cpu_to_le64(cmd_slba); |
353 | c.l2p.nlb = cpu_to_le32(cmd_nlb); | 351 | c.l2p.nlb = cpu_to_le32(cmd_nlb); |
354 | 352 | ||
355 | ret = nvme_submit_sync_cmd(dev->admin_q, | 353 | ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, |
356 | (struct nvme_command *)&c, entries, len); | 354 | (struct nvme_command *)&c, entries, len); |
357 | if (ret) { | 355 | if (ret) { |
358 | dev_err(dev->dev, "L2P table transfer failed (%d)\n", | 356 | dev_err(ns->ctrl->dev, "L2P table transfer failed (%d)\n", |
359 | ret); | 357 | ret); |
360 | ret = -EIO; | 358 | ret = -EIO; |
361 | goto out; | 359 | goto out; |
@@ -381,7 +379,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, | |||
381 | { | 379 | { |
382 | struct request_queue *q = nvmdev->q; | 380 | struct request_queue *q = nvmdev->q; |
383 | struct nvme_ns *ns = q->queuedata; | 381 | struct nvme_ns *ns = q->queuedata; |
384 | struct nvme_dev *dev = ns->dev; | 382 | struct nvme_ctrl *ctrl = ns->ctrl; |
385 | struct nvme_nvm_command c = {}; | 383 | struct nvme_nvm_command c = {}; |
386 | struct nvme_nvm_bb_tbl *bb_tbl; | 384 | struct nvme_nvm_bb_tbl *bb_tbl; |
387 | int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks; | 385 | int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks; |
@@ -395,30 +393,30 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, | |||
395 | if (!bb_tbl) | 393 | if (!bb_tbl) |
396 | return -ENOMEM; | 394 | return -ENOMEM; |
397 | 395 | ||
398 | ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, | 396 | ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c, |
399 | bb_tbl, tblsz); | 397 | bb_tbl, tblsz); |
400 | if (ret) { | 398 | if (ret) { |
401 | dev_err(dev->dev, "get bad block table failed (%d)\n", ret); | 399 | dev_err(ctrl->dev, "get bad block table failed (%d)\n", ret); |
402 | ret = -EIO; | 400 | ret = -EIO; |
403 | goto out; | 401 | goto out; |
404 | } | 402 | } |
405 | 403 | ||
406 | if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || | 404 | if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' || |
407 | bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { | 405 | bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') { |
408 | dev_err(dev->dev, "bbt format mismatch\n"); | 406 | dev_err(ctrl->dev, "bbt format mismatch\n"); |
409 | ret = -EINVAL; | 407 | ret = -EINVAL; |
410 | goto out; | 408 | goto out; |
411 | } | 409 | } |
412 | 410 | ||
413 | if (le16_to_cpu(bb_tbl->verid) != 1) { | 411 | if (le16_to_cpu(bb_tbl->verid) != 1) { |
414 | ret = -EINVAL; | 412 | ret = -EINVAL; |
415 | dev_err(dev->dev, "bbt version not supported\n"); | 413 | dev_err(ctrl->dev, "bbt version not supported\n"); |
416 | goto out; | 414 | goto out; |
417 | } | 415 | } |
418 | 416 | ||
419 | if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) { | 417 | if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) { |
420 | ret = -EINVAL; | 418 | ret = -EINVAL; |
421 | dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)", | 419 | dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)", |
422 | le32_to_cpu(bb_tbl->tblks), nr_blocks); | 420 | le32_to_cpu(bb_tbl->tblks), nr_blocks); |
423 | goto out; | 421 | goto out; |
424 | } | 422 | } |
@@ -434,7 +432,6 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd, | |||
434 | int type) | 432 | int type) |
435 | { | 433 | { |
436 | struct nvme_ns *ns = nvmdev->q->queuedata; | 434 | struct nvme_ns *ns = nvmdev->q->queuedata; |
437 | struct nvme_dev *dev = ns->dev; | ||
438 | struct nvme_nvm_command c = {}; | 435 | struct nvme_nvm_command c = {}; |
439 | int ret = 0; | 436 | int ret = 0; |
440 | 437 | ||
@@ -444,10 +441,10 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd, | |||
444 | c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1); | 441 | c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1); |
445 | c.set_bb.value = type; | 442 | c.set_bb.value = type; |
446 | 443 | ||
447 | ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c, | 444 | ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, |
448 | NULL, 0); | 445 | NULL, 0); |
449 | if (ret) | 446 | if (ret) |
450 | dev_err(dev->dev, "set bad block table failed (%d)\n", ret); | 447 | dev_err(ns->ctrl->dev, "set bad block table failed (%d)\n", ret); |
451 | return ret; | 448 | return ret; |
452 | } | 449 | } |
453 | 450 | ||
@@ -532,9 +529,8 @@ static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd) | |||
532 | static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) | 529 | static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name) |
533 | { | 530 | { |
534 | struct nvme_ns *ns = nvmdev->q->queuedata; | 531 | struct nvme_ns *ns = nvmdev->q->queuedata; |
535 | struct nvme_dev *dev = ns->dev; | ||
536 | 532 | ||
537 | return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0); | 533 | return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0); |
538 | } | 534 | } |
539 | 535 | ||
540 | static void nvme_nvm_destroy_dma_pool(void *pool) | 536 | static void nvme_nvm_destroy_dma_pool(void *pool) |
@@ -592,8 +588,9 @@ void nvme_nvm_unregister(struct request_queue *q, char *disk_name) | |||
592 | 588 | ||
593 | int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) | 589 | int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id) |
594 | { | 590 | { |
595 | struct nvme_dev *dev = ns->dev; | 591 | struct nvme_ctrl *ctrl = ns->ctrl; |
596 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 592 | /* XXX: this is poking into PCI structures from generic code! */ |
593 | struct pci_dev *pdev = to_pci_dev(ctrl->dev); | ||
597 | 594 | ||
598 | /* QEMU NVMe simulator - PCI ID + Vendor specific bit */ | 595 | /* QEMU NVMe simulator - PCI ID + Vendor specific bit */ |
599 | if (pdev->vendor == PCI_VENDOR_ID_CNEX && | 596 | if (pdev->vendor == PCI_VENDOR_ID_CNEX && |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 044253dca30a..4fb5bb737868 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -19,58 +19,77 @@ | |||
19 | #include <linux/kref.h> | 19 | #include <linux/kref.h> |
20 | #include <linux/blk-mq.h> | 20 | #include <linux/blk-mq.h> |
21 | 21 | ||
22 | enum { | ||
23 | /* | ||
24 | * Driver internal status code for commands that were cancelled due | ||
25 | * to timeouts or controller shutdown. The value is negative so | ||
26 | * that it a) doesn't overlap with the unsigned hardware error codes, | ||
27 | * and b) can easily be tested for. | ||
28 | */ | ||
29 | NVME_SC_CANCELLED = -EINTR, | ||
30 | }; | ||
31 | |||
22 | extern unsigned char nvme_io_timeout; | 32 | extern unsigned char nvme_io_timeout; |
23 | #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) | 33 | #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) |
24 | 34 | ||
35 | extern unsigned char admin_timeout; | ||
36 | #define ADMIN_TIMEOUT (admin_timeout * HZ) | ||
37 | |||
38 | extern unsigned char shutdown_timeout; | ||
39 | #define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ) | ||
40 | |||
25 | enum { | 41 | enum { |
26 | NVME_NS_LBA = 0, | 42 | NVME_NS_LBA = 0, |
27 | NVME_NS_LIGHTNVM = 1, | 43 | NVME_NS_LIGHTNVM = 1, |
28 | }; | 44 | }; |
29 | 45 | ||
30 | /* | 46 | /* |
31 | * Represents an NVM Express device. Each nvme_dev is a PCI function. | 47 | * List of workarounds for devices that required behavior not specified in |
48 | * the standard. | ||
32 | */ | 49 | */ |
33 | struct nvme_dev { | 50 | enum nvme_quirks { |
34 | struct list_head node; | 51 | /* |
35 | struct nvme_queue **queues; | 52 | * Prefers I/O aligned to a stripe size specified in a vendor |
53 | * specific Identify field. | ||
54 | */ | ||
55 | NVME_QUIRK_STRIPE_SIZE = (1 << 0), | ||
56 | |||
57 | /* | ||
58 | * The controller doesn't handle Identify value others than 0 or 1 | ||
59 | * correctly. | ||
60 | */ | ||
61 | NVME_QUIRK_IDENTIFY_CNS = (1 << 1), | ||
62 | }; | ||
63 | |||
64 | struct nvme_ctrl { | ||
65 | const struct nvme_ctrl_ops *ops; | ||
36 | struct request_queue *admin_q; | 66 | struct request_queue *admin_q; |
37 | struct blk_mq_tag_set tagset; | ||
38 | struct blk_mq_tag_set admin_tagset; | ||
39 | u32 __iomem *dbs; | ||
40 | struct device *dev; | 67 | struct device *dev; |
41 | struct dma_pool *prp_page_pool; | 68 | struct kref kref; |
42 | struct dma_pool *prp_small_pool; | ||
43 | int instance; | 69 | int instance; |
44 | unsigned queue_count; | 70 | struct blk_mq_tag_set *tagset; |
45 | unsigned online_queues; | ||
46 | unsigned max_qid; | ||
47 | int q_depth; | ||
48 | u32 db_stride; | ||
49 | u32 ctrl_config; | ||
50 | struct msix_entry *entry; | ||
51 | struct nvme_bar __iomem *bar; | ||
52 | struct list_head namespaces; | 71 | struct list_head namespaces; |
53 | struct kref kref; | 72 | struct mutex namespaces_mutex; |
54 | struct device *device; | 73 | struct device *device; /* char device */ |
55 | struct work_struct reset_work; | 74 | struct list_head node; |
56 | struct work_struct probe_work; | 75 | |
57 | struct work_struct scan_work; | ||
58 | char name[12]; | 76 | char name[12]; |
59 | char serial[20]; | 77 | char serial[20]; |
60 | char model[40]; | 78 | char model[40]; |
61 | char firmware_rev[8]; | 79 | char firmware_rev[8]; |
62 | bool subsystem; | 80 | |
81 | u32 ctrl_config; | ||
82 | |||
83 | u32 page_size; | ||
63 | u32 max_hw_sectors; | 84 | u32 max_hw_sectors; |
64 | u32 stripe_size; | 85 | u32 stripe_size; |
65 | u32 page_size; | ||
66 | void __iomem *cmb; | ||
67 | dma_addr_t cmb_dma_addr; | ||
68 | u64 cmb_size; | ||
69 | u32 cmbsz; | ||
70 | u16 oncs; | 86 | u16 oncs; |
71 | u16 abort_limit; | 87 | atomic_t abort_limit; |
72 | u8 event_limit; | 88 | u8 event_limit; |
73 | u8 vwc; | 89 | u8 vwc; |
90 | u32 vs; | ||
91 | bool subsystem; | ||
92 | unsigned long quirks; | ||
74 | }; | 93 | }; |
75 | 94 | ||
76 | /* | 95 | /* |
@@ -79,11 +98,14 @@ struct nvme_dev { | |||
79 | struct nvme_ns { | 98 | struct nvme_ns { |
80 | struct list_head list; | 99 | struct list_head list; |
81 | 100 | ||
82 | struct nvme_dev *dev; | 101 | struct nvme_ctrl *ctrl; |
83 | struct request_queue *queue; | 102 | struct request_queue *queue; |
84 | struct gendisk *disk; | 103 | struct gendisk *disk; |
85 | struct kref kref; | 104 | struct kref kref; |
86 | 105 | ||
106 | u8 eui[8]; | ||
107 | u8 uuid[16]; | ||
108 | |||
87 | unsigned ns_id; | 109 | unsigned ns_id; |
88 | int lba_shift; | 110 | int lba_shift; |
89 | u16 ms; | 111 | u16 ms; |
@@ -94,41 +116,156 @@ struct nvme_ns { | |||
94 | u32 mode_select_block_len; | 116 | u32 mode_select_block_len; |
95 | }; | 117 | }; |
96 | 118 | ||
97 | /* | 119 | struct nvme_ctrl_ops { |
98 | * The nvme_iod describes the data in an I/O, including the list of PRP | 120 | int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); |
99 | * entries. You can't see it in this data structure because C doesn't let | 121 | int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); |
100 | * me express that. Use nvme_alloc_iod to ensure there's enough space | 122 | int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); |
101 | * allocated to store the PRP list. | 123 | bool (*io_incapable)(struct nvme_ctrl *ctrl); |
102 | */ | 124 | int (*reset_ctrl)(struct nvme_ctrl *ctrl); |
103 | struct nvme_iod { | 125 | void (*free_ctrl)(struct nvme_ctrl *ctrl); |
104 | unsigned long private; /* For the use of the submitter of the I/O */ | ||
105 | int npages; /* In the PRP list. 0 means small pool in use */ | ||
106 | int offset; /* Of PRP list */ | ||
107 | int nents; /* Used in scatterlist */ | ||
108 | int length; /* Of data, in bytes */ | ||
109 | dma_addr_t first_dma; | ||
110 | struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */ | ||
111 | struct scatterlist sg[0]; | ||
112 | }; | 126 | }; |
113 | 127 | ||
128 | static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) | ||
129 | { | ||
130 | u32 val = 0; | ||
131 | |||
132 | if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val)) | ||
133 | return false; | ||
134 | return val & NVME_CSTS_RDY; | ||
135 | } | ||
136 | |||
137 | static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl) | ||
138 | { | ||
139 | u32 val = 0; | ||
140 | |||
141 | if (ctrl->ops->io_incapable(ctrl)) | ||
142 | return false; | ||
143 | if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val)) | ||
144 | return false; | ||
145 | return val & NVME_CSTS_CFS; | ||
146 | } | ||
147 | |||
148 | static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) | ||
149 | { | ||
150 | if (!ctrl->subsystem) | ||
151 | return -ENOTTY; | ||
152 | return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); | ||
153 | } | ||
154 | |||
114 | static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) | 155 | static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) |
115 | { | 156 | { |
116 | return (sector >> (ns->lba_shift - 9)); | 157 | return (sector >> (ns->lba_shift - 9)); |
117 | } | 158 | } |
118 | 159 | ||
160 | static inline void nvme_setup_flush(struct nvme_ns *ns, | ||
161 | struct nvme_command *cmnd) | ||
162 | { | ||
163 | memset(cmnd, 0, sizeof(*cmnd)); | ||
164 | cmnd->common.opcode = nvme_cmd_flush; | ||
165 | cmnd->common.nsid = cpu_to_le32(ns->ns_id); | ||
166 | } | ||
167 | |||
168 | static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req, | ||
169 | struct nvme_command *cmnd) | ||
170 | { | ||
171 | u16 control = 0; | ||
172 | u32 dsmgmt = 0; | ||
173 | |||
174 | if (req->cmd_flags & REQ_FUA) | ||
175 | control |= NVME_RW_FUA; | ||
176 | if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) | ||
177 | control |= NVME_RW_LR; | ||
178 | |||
179 | if (req->cmd_flags & REQ_RAHEAD) | ||
180 | dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; | ||
181 | |||
182 | memset(cmnd, 0, sizeof(*cmnd)); | ||
183 | cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); | ||
184 | cmnd->rw.command_id = req->tag; | ||
185 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); | ||
186 | cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); | ||
187 | cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); | ||
188 | |||
189 | if (ns->ms) { | ||
190 | switch (ns->pi_type) { | ||
191 | case NVME_NS_DPS_PI_TYPE3: | ||
192 | control |= NVME_RW_PRINFO_PRCHK_GUARD; | ||
193 | break; | ||
194 | case NVME_NS_DPS_PI_TYPE1: | ||
195 | case NVME_NS_DPS_PI_TYPE2: | ||
196 | control |= NVME_RW_PRINFO_PRCHK_GUARD | | ||
197 | NVME_RW_PRINFO_PRCHK_REF; | ||
198 | cmnd->rw.reftag = cpu_to_le32( | ||
199 | nvme_block_nr(ns, blk_rq_pos(req))); | ||
200 | break; | ||
201 | } | ||
202 | if (!blk_integrity_rq(req)) | ||
203 | control |= NVME_RW_PRINFO_PRACT; | ||
204 | } | ||
205 | |||
206 | cmnd->rw.control = cpu_to_le16(control); | ||
207 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); | ||
208 | } | ||
209 | |||
210 | |||
211 | static inline int nvme_error_status(u16 status) | ||
212 | { | ||
213 | switch (status & 0x7ff) { | ||
214 | case NVME_SC_SUCCESS: | ||
215 | return 0; | ||
216 | case NVME_SC_CAP_EXCEEDED: | ||
217 | return -ENOSPC; | ||
218 | default: | ||
219 | return -EIO; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | static inline bool nvme_req_needs_retry(struct request *req, u16 status) | ||
224 | { | ||
225 | return !(status & NVME_SC_DNR || blk_noretry_request(req)) && | ||
226 | (jiffies - req->start_time) < req->timeout; | ||
227 | } | ||
228 | |||
229 | int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap); | ||
230 | int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap); | ||
231 | int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); | ||
232 | int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, | ||
233 | const struct nvme_ctrl_ops *ops, unsigned long quirks); | ||
234 | void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); | ||
235 | void nvme_put_ctrl(struct nvme_ctrl *ctrl); | ||
236 | int nvme_init_identify(struct nvme_ctrl *ctrl); | ||
237 | |||
238 | void nvme_scan_namespaces(struct nvme_ctrl *ctrl); | ||
239 | void nvme_remove_namespaces(struct nvme_ctrl *ctrl); | ||
240 | |||
241 | void nvme_stop_queues(struct nvme_ctrl *ctrl); | ||
242 | void nvme_start_queues(struct nvme_ctrl *ctrl); | ||
243 | |||
244 | struct request *nvme_alloc_request(struct request_queue *q, | ||
245 | struct nvme_command *cmd, unsigned int flags); | ||
246 | void nvme_requeue_req(struct request *req); | ||
119 | int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | 247 | int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, |
120 | void *buf, unsigned bufflen); | 248 | void *buf, unsigned bufflen); |
121 | int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | 249 | int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, |
122 | void *buffer, void __user *ubuffer, unsigned bufflen, | 250 | void *buffer, unsigned bufflen, u32 *result, unsigned timeout); |
251 | int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
252 | void __user *ubuffer, unsigned bufflen, u32 *result, | ||
253 | unsigned timeout); | ||
254 | int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
255 | void __user *ubuffer, unsigned bufflen, | ||
256 | void __user *meta_buffer, unsigned meta_len, u32 meta_seed, | ||
123 | u32 *result, unsigned timeout); | 257 | u32 *result, unsigned timeout); |
124 | int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); | 258 | int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id); |
125 | int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, | 259 | int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, |
126 | struct nvme_id_ns **id); | 260 | struct nvme_id_ns **id); |
127 | int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); | 261 | int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log); |
128 | int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, | 262 | int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, |
129 | dma_addr_t dma_addr, u32 *result); | 263 | dma_addr_t dma_addr, u32 *result); |
130 | int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, | 264 | int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, |
131 | dma_addr_t dma_addr, u32 *result); | 265 | dma_addr_t dma_addr, u32 *result); |
266 | int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); | ||
267 | |||
268 | extern spinlock_t dev_list_lock; | ||
132 | 269 | ||
133 | struct sg_io_hdr; | 270 | struct sg_io_hdr; |
134 | 271 | ||
@@ -154,4 +291,7 @@ static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *i | |||
154 | } | 291 | } |
155 | #endif /* CONFIG_NVM */ | 292 | #endif /* CONFIG_NVM */ |
156 | 293 | ||
294 | int __init nvme_core_init(void); | ||
295 | void nvme_core_exit(void); | ||
296 | |||
157 | #endif /* _NVME_H */ | 297 | #endif /* _NVME_H */ |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f5c0e2613c7c..72ef8322d32a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -12,6 +12,7 @@ | |||
12 | * more details. | 12 | * more details. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/aer.h> | ||
15 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
16 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
17 | #include <linux/blk-mq.h> | 18 | #include <linux/blk-mq.h> |
@@ -28,10 +29,10 @@ | |||
28 | #include <linux/kdev_t.h> | 29 | #include <linux/kdev_t.h> |
29 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
30 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
31 | #include <linux/list_sort.h> | ||
32 | #include <linux/mm.h> | 32 | #include <linux/mm.h> |
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <linux/moduleparam.h> | 34 | #include <linux/moduleparam.h> |
35 | #include <linux/mutex.h> | ||
35 | #include <linux/pci.h> | 36 | #include <linux/pci.h> |
36 | #include <linux/poison.h> | 37 | #include <linux/poison.h> |
37 | #include <linux/ptrace.h> | 38 | #include <linux/ptrace.h> |
@@ -39,23 +40,24 @@ | |||
39 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
40 | #include <linux/t10-pi.h> | 41 | #include <linux/t10-pi.h> |
41 | #include <linux/types.h> | 42 | #include <linux/types.h> |
42 | #include <linux/pr.h> | ||
43 | #include <scsi/sg.h> | ||
44 | #include <linux/io-64-nonatomic-lo-hi.h> | 43 | #include <linux/io-64-nonatomic-lo-hi.h> |
45 | #include <asm/unaligned.h> | 44 | #include <asm/unaligned.h> |
46 | 45 | ||
47 | #include <uapi/linux/nvme_ioctl.h> | ||
48 | #include "nvme.h" | 46 | #include "nvme.h" |
49 | 47 | ||
50 | #define NVME_MINORS (1U << MINORBITS) | ||
51 | #define NVME_Q_DEPTH 1024 | 48 | #define NVME_Q_DEPTH 1024 |
52 | #define NVME_AQ_DEPTH 256 | 49 | #define NVME_AQ_DEPTH 256 |
53 | #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) | 50 | #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) |
54 | #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) | 51 | #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) |
55 | #define ADMIN_TIMEOUT (admin_timeout * HZ) | 52 | |
56 | #define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ) | 53 | /* |
54 | * We handle AEN commands ourselves and don't even let the | ||
55 | * block layer know about them. | ||
56 | */ | ||
57 | #define NVME_NR_AEN_COMMANDS 1 | ||
58 | #define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) | ||
57 | 59 | ||
58 | static unsigned char admin_timeout = 60; | 60 | unsigned char admin_timeout = 60; |
59 | module_param(admin_timeout, byte, 0644); | 61 | module_param(admin_timeout, byte, 0644); |
60 | MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); | 62 | MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); |
61 | 63 | ||
@@ -63,16 +65,10 @@ unsigned char nvme_io_timeout = 30; | |||
63 | module_param_named(io_timeout, nvme_io_timeout, byte, 0644); | 65 | module_param_named(io_timeout, nvme_io_timeout, byte, 0644); |
64 | MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); | 66 | MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); |
65 | 67 | ||
66 | static unsigned char shutdown_timeout = 5; | 68 | unsigned char shutdown_timeout = 5; |
67 | module_param(shutdown_timeout, byte, 0644); | 69 | module_param(shutdown_timeout, byte, 0644); |
68 | MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); | 70 | MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); |
69 | 71 | ||
70 | static int nvme_major; | ||
71 | module_param(nvme_major, int, 0); | ||
72 | |||
73 | static int nvme_char_major; | ||
74 | module_param(nvme_char_major, int, 0); | ||
75 | |||
76 | static int use_threaded_interrupts; | 72 | static int use_threaded_interrupts; |
77 | module_param(use_threaded_interrupts, int, 0); | 73 | module_param(use_threaded_interrupts, int, 0); |
78 | 74 | ||
@@ -80,28 +76,60 @@ static bool use_cmb_sqes = true; | |||
80 | module_param(use_cmb_sqes, bool, 0644); | 76 | module_param(use_cmb_sqes, bool, 0644); |
81 | MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); | 77 | MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); |
82 | 78 | ||
83 | static DEFINE_SPINLOCK(dev_list_lock); | ||
84 | static LIST_HEAD(dev_list); | 79 | static LIST_HEAD(dev_list); |
85 | static struct task_struct *nvme_thread; | 80 | static struct task_struct *nvme_thread; |
86 | static struct workqueue_struct *nvme_workq; | 81 | static struct workqueue_struct *nvme_workq; |
87 | static wait_queue_head_t nvme_kthread_wait; | 82 | static wait_queue_head_t nvme_kthread_wait; |
88 | 83 | ||
89 | static struct class *nvme_class; | 84 | struct nvme_dev; |
85 | struct nvme_queue; | ||
90 | 86 | ||
91 | static int __nvme_reset(struct nvme_dev *dev); | ||
92 | static int nvme_reset(struct nvme_dev *dev); | 87 | static int nvme_reset(struct nvme_dev *dev); |
93 | static void nvme_process_cq(struct nvme_queue *nvmeq); | 88 | static void nvme_process_cq(struct nvme_queue *nvmeq); |
94 | static void nvme_dead_ctrl(struct nvme_dev *dev); | 89 | static void nvme_remove_dead_ctrl(struct nvme_dev *dev); |
90 | static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); | ||
95 | 91 | ||
96 | struct async_cmd_info { | 92 | /* |
97 | struct kthread_work work; | 93 | * Represents an NVM Express device. Each nvme_dev is a PCI function. |
98 | struct kthread_worker *worker; | 94 | */ |
99 | struct request *req; | 95 | struct nvme_dev { |
100 | u32 result; | 96 | struct list_head node; |
101 | int status; | 97 | struct nvme_queue **queues; |
102 | void *ctx; | 98 | struct blk_mq_tag_set tagset; |
99 | struct blk_mq_tag_set admin_tagset; | ||
100 | u32 __iomem *dbs; | ||
101 | struct device *dev; | ||
102 | struct dma_pool *prp_page_pool; | ||
103 | struct dma_pool *prp_small_pool; | ||
104 | unsigned queue_count; | ||
105 | unsigned online_queues; | ||
106 | unsigned max_qid; | ||
107 | int q_depth; | ||
108 | u32 db_stride; | ||
109 | struct msix_entry *entry; | ||
110 | void __iomem *bar; | ||
111 | struct work_struct reset_work; | ||
112 | struct work_struct scan_work; | ||
113 | struct work_struct remove_work; | ||
114 | struct mutex shutdown_lock; | ||
115 | bool subsystem; | ||
116 | void __iomem *cmb; | ||
117 | dma_addr_t cmb_dma_addr; | ||
118 | u64 cmb_size; | ||
119 | u32 cmbsz; | ||
120 | unsigned long flags; | ||
121 | |||
122 | #define NVME_CTRL_RESETTING 0 | ||
123 | |||
124 | struct nvme_ctrl ctrl; | ||
125 | struct completion ioq_wait; | ||
103 | }; | 126 | }; |
104 | 127 | ||
128 | static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) | ||
129 | { | ||
130 | return container_of(ctrl, struct nvme_dev, ctrl); | ||
131 | } | ||
132 | |||
105 | /* | 133 | /* |
106 | * An NVM Express queue. Each device has at least two (one for admin | 134 | * An NVM Express queue. Each device has at least two (one for admin |
107 | * commands and one for I/O commands). | 135 | * commands and one for I/O commands). |
@@ -126,7 +154,24 @@ struct nvme_queue { | |||
126 | u16 qid; | 154 | u16 qid; |
127 | u8 cq_phase; | 155 | u8 cq_phase; |
128 | u8 cqe_seen; | 156 | u8 cqe_seen; |
129 | struct async_cmd_info cmdinfo; | 157 | }; |
158 | |||
159 | /* | ||
160 | * The nvme_iod describes the data in an I/O, including the list of PRP | ||
161 | * entries. You can't see it in this data structure because C doesn't let | ||
162 | * me express that. Use nvme_init_iod to ensure there's enough space | ||
163 | * allocated to store the PRP list. | ||
164 | */ | ||
165 | struct nvme_iod { | ||
166 | struct nvme_queue *nvmeq; | ||
167 | int aborted; | ||
168 | int npages; /* In the PRP list. 0 means small pool in use */ | ||
169 | int nents; /* Used in scatterlist */ | ||
170 | int length; /* Of data, in bytes */ | ||
171 | dma_addr_t first_dma; | ||
172 | struct scatterlist meta_sg; /* metadata requires single contiguous buffer */ | ||
173 | struct scatterlist *sg; | ||
174 | struct scatterlist inline_sg[0]; | ||
130 | }; | 175 | }; |
131 | 176 | ||
132 | /* | 177 | /* |
@@ -148,23 +193,11 @@ static inline void _nvme_check_size(void) | |||
148 | BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); | 193 | BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); |
149 | } | 194 | } |
150 | 195 | ||
151 | typedef void (*nvme_completion_fn)(struct nvme_queue *, void *, | ||
152 | struct nvme_completion *); | ||
153 | |||
154 | struct nvme_cmd_info { | ||
155 | nvme_completion_fn fn; | ||
156 | void *ctx; | ||
157 | int aborted; | ||
158 | struct nvme_queue *nvmeq; | ||
159 | struct nvme_iod iod[0]; | ||
160 | }; | ||
161 | |||
162 | /* | 196 | /* |
163 | * Max size of iod being embedded in the request payload | 197 | * Max size of iod being embedded in the request payload |
164 | */ | 198 | */ |
165 | #define NVME_INT_PAGES 2 | 199 | #define NVME_INT_PAGES 2 |
166 | #define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size) | 200 | #define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size) |
167 | #define NVME_INT_MASK 0x01 | ||
168 | 201 | ||
169 | /* | 202 | /* |
170 | * Will slightly overestimate the number of pages needed. This is OK | 203 | * Will slightly overestimate the number of pages needed. This is OK |
@@ -173,19 +206,22 @@ struct nvme_cmd_info { | |||
173 | */ | 206 | */ |
174 | static int nvme_npages(unsigned size, struct nvme_dev *dev) | 207 | static int nvme_npages(unsigned size, struct nvme_dev *dev) |
175 | { | 208 | { |
176 | unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size); | 209 | unsigned nprps = DIV_ROUND_UP(size + dev->ctrl.page_size, |
210 | dev->ctrl.page_size); | ||
177 | return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); | 211 | return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); |
178 | } | 212 | } |
179 | 213 | ||
180 | static unsigned int nvme_cmd_size(struct nvme_dev *dev) | 214 | static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev, |
215 | unsigned int size, unsigned int nseg) | ||
181 | { | 216 | { |
182 | unsigned int ret = sizeof(struct nvme_cmd_info); | 217 | return sizeof(__le64 *) * nvme_npages(size, dev) + |
183 | 218 | sizeof(struct scatterlist) * nseg; | |
184 | ret += sizeof(struct nvme_iod); | 219 | } |
185 | ret += sizeof(__le64 *) * nvme_npages(NVME_INT_BYTES(dev), dev); | ||
186 | ret += sizeof(struct scatterlist) * NVME_INT_PAGES; | ||
187 | 220 | ||
188 | return ret; | 221 | static unsigned int nvme_cmd_size(struct nvme_dev *dev) |
222 | { | ||
223 | return sizeof(struct nvme_iod) + | ||
224 | nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES); | ||
189 | } | 225 | } |
190 | 226 | ||
191 | static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | 227 | static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
@@ -215,11 +251,11 @@ static int nvme_admin_init_request(void *data, struct request *req, | |||
215 | unsigned int numa_node) | 251 | unsigned int numa_node) |
216 | { | 252 | { |
217 | struct nvme_dev *dev = data; | 253 | struct nvme_dev *dev = data; |
218 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); | 254 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
219 | struct nvme_queue *nvmeq = dev->queues[0]; | 255 | struct nvme_queue *nvmeq = dev->queues[0]; |
220 | 256 | ||
221 | BUG_ON(!nvmeq); | 257 | BUG_ON(!nvmeq); |
222 | cmd->nvmeq = nvmeq; | 258 | iod->nvmeq = nvmeq; |
223 | return 0; | 259 | return 0; |
224 | } | 260 | } |
225 | 261 | ||
@@ -242,148 +278,36 @@ static int nvme_init_request(void *data, struct request *req, | |||
242 | unsigned int numa_node) | 278 | unsigned int numa_node) |
243 | { | 279 | { |
244 | struct nvme_dev *dev = data; | 280 | struct nvme_dev *dev = data; |
245 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); | 281 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
246 | struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; | 282 | struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; |
247 | 283 | ||
248 | BUG_ON(!nvmeq); | 284 | BUG_ON(!nvmeq); |
249 | cmd->nvmeq = nvmeq; | 285 | iod->nvmeq = nvmeq; |
250 | return 0; | 286 | return 0; |
251 | } | 287 | } |
252 | 288 | ||
253 | static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx, | 289 | static void nvme_complete_async_event(struct nvme_dev *dev, |
254 | nvme_completion_fn handler) | 290 | struct nvme_completion *cqe) |
255 | { | ||
256 | cmd->fn = handler; | ||
257 | cmd->ctx = ctx; | ||
258 | cmd->aborted = 0; | ||
259 | blk_mq_start_request(blk_mq_rq_from_pdu(cmd)); | ||
260 | } | ||
261 | |||
262 | static void *iod_get_private(struct nvme_iod *iod) | ||
263 | { | ||
264 | return (void *) (iod->private & ~0x1UL); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * If bit 0 is set, the iod is embedded in the request payload. | ||
269 | */ | ||
270 | static bool iod_should_kfree(struct nvme_iod *iod) | ||
271 | { | ||
272 | return (iod->private & NVME_INT_MASK) == 0; | ||
273 | } | ||
274 | |||
275 | /* Special values must be less than 0x1000 */ | ||
276 | #define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA) | ||
277 | #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) | ||
278 | #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) | ||
279 | #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) | ||
280 | |||
281 | static void special_completion(struct nvme_queue *nvmeq, void *ctx, | ||
282 | struct nvme_completion *cqe) | ||
283 | { | ||
284 | if (ctx == CMD_CTX_CANCELLED) | ||
285 | return; | ||
286 | if (ctx == CMD_CTX_COMPLETED) { | ||
287 | dev_warn(nvmeq->q_dmadev, | ||
288 | "completed id %d twice on queue %d\n", | ||
289 | cqe->command_id, le16_to_cpup(&cqe->sq_id)); | ||
290 | return; | ||
291 | } | ||
292 | if (ctx == CMD_CTX_INVALID) { | ||
293 | dev_warn(nvmeq->q_dmadev, | ||
294 | "invalid id %d completed on queue %d\n", | ||
295 | cqe->command_id, le16_to_cpup(&cqe->sq_id)); | ||
296 | return; | ||
297 | } | ||
298 | dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx); | ||
299 | } | ||
300 | |||
301 | static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn) | ||
302 | { | ||
303 | void *ctx; | ||
304 | |||
305 | if (fn) | ||
306 | *fn = cmd->fn; | ||
307 | ctx = cmd->ctx; | ||
308 | cmd->fn = special_completion; | ||
309 | cmd->ctx = CMD_CTX_CANCELLED; | ||
310 | return ctx; | ||
311 | } | ||
312 | |||
313 | static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, | ||
314 | struct nvme_completion *cqe) | ||
315 | { | 291 | { |
316 | u32 result = le32_to_cpup(&cqe->result); | 292 | u16 status = le16_to_cpu(cqe->status) >> 1; |
317 | u16 status = le16_to_cpup(&cqe->status) >> 1; | 293 | u32 result = le32_to_cpu(cqe->result); |
318 | 294 | ||
319 | if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) | 295 | if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) |
320 | ++nvmeq->dev->event_limit; | 296 | ++dev->ctrl.event_limit; |
321 | if (status != NVME_SC_SUCCESS) | 297 | if (status != NVME_SC_SUCCESS) |
322 | return; | 298 | return; |
323 | 299 | ||
324 | switch (result & 0xff07) { | 300 | switch (result & 0xff07) { |
325 | case NVME_AER_NOTICE_NS_CHANGED: | 301 | case NVME_AER_NOTICE_NS_CHANGED: |
326 | dev_info(nvmeq->q_dmadev, "rescanning\n"); | 302 | dev_info(dev->dev, "rescanning\n"); |
327 | schedule_work(&nvmeq->dev->scan_work); | 303 | queue_work(nvme_workq, &dev->scan_work); |
328 | default: | 304 | default: |
329 | dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result); | 305 | dev_warn(dev->dev, "async event result %08x\n", result); |
330 | } | 306 | } |
331 | } | 307 | } |
332 | 308 | ||
333 | static void abort_completion(struct nvme_queue *nvmeq, void *ctx, | ||
334 | struct nvme_completion *cqe) | ||
335 | { | ||
336 | struct request *req = ctx; | ||
337 | |||
338 | u16 status = le16_to_cpup(&cqe->status) >> 1; | ||
339 | u32 result = le32_to_cpup(&cqe->result); | ||
340 | |||
341 | blk_mq_free_request(req); | ||
342 | |||
343 | dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); | ||
344 | ++nvmeq->dev->abort_limit; | ||
345 | } | ||
346 | |||
347 | static void async_completion(struct nvme_queue *nvmeq, void *ctx, | ||
348 | struct nvme_completion *cqe) | ||
349 | { | ||
350 | struct async_cmd_info *cmdinfo = ctx; | ||
351 | cmdinfo->result = le32_to_cpup(&cqe->result); | ||
352 | cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; | ||
353 | queue_kthread_work(cmdinfo->worker, &cmdinfo->work); | ||
354 | blk_mq_free_request(cmdinfo->req); | ||
355 | } | ||
356 | |||
357 | static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq, | ||
358 | unsigned int tag) | ||
359 | { | ||
360 | struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag); | ||
361 | |||
362 | return blk_mq_rq_to_pdu(req); | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Called with local interrupts disabled and the q_lock held. May not sleep. | ||
367 | */ | ||
368 | static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag, | ||
369 | nvme_completion_fn *fn) | ||
370 | { | ||
371 | struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag); | ||
372 | void *ctx; | ||
373 | if (tag >= nvmeq->q_depth) { | ||
374 | *fn = special_completion; | ||
375 | return CMD_CTX_INVALID; | ||
376 | } | ||
377 | if (fn) | ||
378 | *fn = cmd->fn; | ||
379 | ctx = cmd->ctx; | ||
380 | cmd->fn = special_completion; | ||
381 | cmd->ctx = CMD_CTX_COMPLETED; | ||
382 | return ctx; | ||
383 | } | ||
384 | |||
385 | /** | 309 | /** |
386 | * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell | 310 | * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell |
387 | * @nvmeq: The queue to use | 311 | * @nvmeq: The queue to use |
388 | * @cmd: The command to send | 312 | * @cmd: The command to send |
389 | * | 313 | * |
@@ -405,69 +329,44 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, | |||
405 | nvmeq->sq_tail = tail; | 329 | nvmeq->sq_tail = tail; |
406 | } | 330 | } |
407 | 331 | ||
408 | static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | 332 | static __le64 **iod_list(struct request *req) |
409 | { | ||
410 | unsigned long flags; | ||
411 | spin_lock_irqsave(&nvmeq->q_lock, flags); | ||
412 | __nvme_submit_cmd(nvmeq, cmd); | ||
413 | spin_unlock_irqrestore(&nvmeq->q_lock, flags); | ||
414 | } | ||
415 | |||
416 | static __le64 **iod_list(struct nvme_iod *iod) | ||
417 | { | 333 | { |
418 | return ((void *)iod) + iod->offset; | 334 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
335 | return (__le64 **)(iod->sg + req->nr_phys_segments); | ||
419 | } | 336 | } |
420 | 337 | ||
421 | static inline void iod_init(struct nvme_iod *iod, unsigned nbytes, | 338 | static int nvme_init_iod(struct request *rq, struct nvme_dev *dev) |
422 | unsigned nseg, unsigned long private) | ||
423 | { | 339 | { |
424 | iod->private = private; | 340 | struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); |
425 | iod->offset = offsetof(struct nvme_iod, sg[nseg]); | 341 | int nseg = rq->nr_phys_segments; |
426 | iod->npages = -1; | 342 | unsigned size; |
427 | iod->length = nbytes; | ||
428 | iod->nents = 0; | ||
429 | } | ||
430 | |||
431 | static struct nvme_iod * | ||
432 | __nvme_alloc_iod(unsigned nseg, unsigned bytes, struct nvme_dev *dev, | ||
433 | unsigned long priv, gfp_t gfp) | ||
434 | { | ||
435 | struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + | ||
436 | sizeof(__le64 *) * nvme_npages(bytes, dev) + | ||
437 | sizeof(struct scatterlist) * nseg, gfp); | ||
438 | |||
439 | if (iod) | ||
440 | iod_init(iod, bytes, nseg, priv); | ||
441 | |||
442 | return iod; | ||
443 | } | ||
444 | |||
445 | static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, | ||
446 | gfp_t gfp) | ||
447 | { | ||
448 | unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) : | ||
449 | sizeof(struct nvme_dsm_range); | ||
450 | struct nvme_iod *iod; | ||
451 | 343 | ||
452 | if (rq->nr_phys_segments <= NVME_INT_PAGES && | 344 | if (rq->cmd_flags & REQ_DISCARD) |
453 | size <= NVME_INT_BYTES(dev)) { | 345 | size = sizeof(struct nvme_dsm_range); |
454 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq); | 346 | else |
347 | size = blk_rq_bytes(rq); | ||
455 | 348 | ||
456 | iod = cmd->iod; | 349 | if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { |
457 | iod_init(iod, size, rq->nr_phys_segments, | 350 | iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); |
458 | (unsigned long) rq | NVME_INT_MASK); | 351 | if (!iod->sg) |
459 | return iod; | 352 | return BLK_MQ_RQ_QUEUE_BUSY; |
353 | } else { | ||
354 | iod->sg = iod->inline_sg; | ||
460 | } | 355 | } |
461 | 356 | ||
462 | return __nvme_alloc_iod(rq->nr_phys_segments, size, dev, | 357 | iod->aborted = 0; |
463 | (unsigned long) rq, gfp); | 358 | iod->npages = -1; |
359 | iod->nents = 0; | ||
360 | iod->length = size; | ||
361 | return 0; | ||
464 | } | 362 | } |
465 | 363 | ||
466 | static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) | 364 | static void nvme_free_iod(struct nvme_dev *dev, struct request *req) |
467 | { | 365 | { |
468 | const int last_prp = dev->page_size / 8 - 1; | 366 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
367 | const int last_prp = dev->ctrl.page_size / 8 - 1; | ||
469 | int i; | 368 | int i; |
470 | __le64 **list = iod_list(iod); | 369 | __le64 **list = iod_list(req); |
471 | dma_addr_t prp_dma = iod->first_dma; | 370 | dma_addr_t prp_dma = iod->first_dma; |
472 | 371 | ||
473 | if (iod->npages == 0) | 372 | if (iod->npages == 0) |
@@ -479,20 +378,8 @@ static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) | |||
479 | prp_dma = next_prp_dma; | 378 | prp_dma = next_prp_dma; |
480 | } | 379 | } |
481 | 380 | ||
482 | if (iod_should_kfree(iod)) | 381 | if (iod->sg != iod->inline_sg) |
483 | kfree(iod); | 382 | kfree(iod->sg); |
484 | } | ||
485 | |||
486 | static int nvme_error_status(u16 status) | ||
487 | { | ||
488 | switch (status & 0x7ff) { | ||
489 | case NVME_SC_SUCCESS: | ||
490 | return 0; | ||
491 | case NVME_SC_CAP_EXCEEDED: | ||
492 | return -ENOSPC; | ||
493 | default: | ||
494 | return -EIO; | ||
495 | } | ||
496 | } | 383 | } |
497 | 384 | ||
498 | #ifdef CONFIG_BLK_DEV_INTEGRITY | 385 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
@@ -549,27 +436,6 @@ static void nvme_dif_remap(struct request *req, | |||
549 | } | 436 | } |
550 | kunmap_atomic(pmap); | 437 | kunmap_atomic(pmap); |
551 | } | 438 | } |
552 | |||
553 | static void nvme_init_integrity(struct nvme_ns *ns) | ||
554 | { | ||
555 | struct blk_integrity integrity; | ||
556 | |||
557 | switch (ns->pi_type) { | ||
558 | case NVME_NS_DPS_PI_TYPE3: | ||
559 | integrity.profile = &t10_pi_type3_crc; | ||
560 | break; | ||
561 | case NVME_NS_DPS_PI_TYPE1: | ||
562 | case NVME_NS_DPS_PI_TYPE2: | ||
563 | integrity.profile = &t10_pi_type1_crc; | ||
564 | break; | ||
565 | default: | ||
566 | integrity.profile = NULL; | ||
567 | break; | ||
568 | } | ||
569 | integrity.tuple_size = ns->ms; | ||
570 | blk_integrity_register(ns->disk, &integrity); | ||
571 | blk_queue_max_integrity_segments(ns->queue, 1); | ||
572 | } | ||
573 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | 439 | #else /* CONFIG_BLK_DEV_INTEGRITY */ |
574 | static void nvme_dif_remap(struct request *req, | 440 | static void nvme_dif_remap(struct request *req, |
575 | void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) | 441 | void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) |
@@ -581,91 +447,27 @@ static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi) | |||
581 | static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) | 447 | static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) |
582 | { | 448 | { |
583 | } | 449 | } |
584 | static void nvme_init_integrity(struct nvme_ns *ns) | ||
585 | { | ||
586 | } | ||
587 | #endif | 450 | #endif |
588 | 451 | ||
589 | static void req_completion(struct nvme_queue *nvmeq, void *ctx, | 452 | static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req, |
590 | struct nvme_completion *cqe) | 453 | int total_len) |
591 | { | ||
592 | struct nvme_iod *iod = ctx; | ||
593 | struct request *req = iod_get_private(iod); | ||
594 | struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); | ||
595 | u16 status = le16_to_cpup(&cqe->status) >> 1; | ||
596 | bool requeue = false; | ||
597 | int error = 0; | ||
598 | |||
599 | if (unlikely(status)) { | ||
600 | if (!(status & NVME_SC_DNR || blk_noretry_request(req)) | ||
601 | && (jiffies - req->start_time) < req->timeout) { | ||
602 | unsigned long flags; | ||
603 | |||
604 | requeue = true; | ||
605 | blk_mq_requeue_request(req); | ||
606 | spin_lock_irqsave(req->q->queue_lock, flags); | ||
607 | if (!blk_queue_stopped(req->q)) | ||
608 | blk_mq_kick_requeue_list(req->q); | ||
609 | spin_unlock_irqrestore(req->q->queue_lock, flags); | ||
610 | goto release_iod; | ||
611 | } | ||
612 | |||
613 | if (req->cmd_type == REQ_TYPE_DRV_PRIV) { | ||
614 | if (cmd_rq->ctx == CMD_CTX_CANCELLED) | ||
615 | error = -EINTR; | ||
616 | else | ||
617 | error = status; | ||
618 | } else { | ||
619 | error = nvme_error_status(status); | ||
620 | } | ||
621 | } | ||
622 | |||
623 | if (req->cmd_type == REQ_TYPE_DRV_PRIV) { | ||
624 | u32 result = le32_to_cpup(&cqe->result); | ||
625 | req->special = (void *)(uintptr_t)result; | ||
626 | } | ||
627 | |||
628 | if (cmd_rq->aborted) | ||
629 | dev_warn(nvmeq->dev->dev, | ||
630 | "completing aborted command with status:%04x\n", | ||
631 | error); | ||
632 | |||
633 | release_iod: | ||
634 | if (iod->nents) { | ||
635 | dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, | ||
636 | rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
637 | if (blk_integrity_rq(req)) { | ||
638 | if (!rq_data_dir(req)) | ||
639 | nvme_dif_remap(req, nvme_dif_complete); | ||
640 | dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1, | ||
641 | rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
642 | } | ||
643 | } | ||
644 | nvme_free_iod(nvmeq->dev, iod); | ||
645 | |||
646 | if (likely(!requeue)) | ||
647 | blk_mq_complete_request(req, error); | ||
648 | } | ||
649 | |||
650 | /* length is in bytes. gfp flags indicates whether we may sleep. */ | ||
651 | static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, | ||
652 | int total_len, gfp_t gfp) | ||
653 | { | 454 | { |
455 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | ||
654 | struct dma_pool *pool; | 456 | struct dma_pool *pool; |
655 | int length = total_len; | 457 | int length = total_len; |
656 | struct scatterlist *sg = iod->sg; | 458 | struct scatterlist *sg = iod->sg; |
657 | int dma_len = sg_dma_len(sg); | 459 | int dma_len = sg_dma_len(sg); |
658 | u64 dma_addr = sg_dma_address(sg); | 460 | u64 dma_addr = sg_dma_address(sg); |
659 | u32 page_size = dev->page_size; | 461 | u32 page_size = dev->ctrl.page_size; |
660 | int offset = dma_addr & (page_size - 1); | 462 | int offset = dma_addr & (page_size - 1); |
661 | __le64 *prp_list; | 463 | __le64 *prp_list; |
662 | __le64 **list = iod_list(iod); | 464 | __le64 **list = iod_list(req); |
663 | dma_addr_t prp_dma; | 465 | dma_addr_t prp_dma; |
664 | int nprps, i; | 466 | int nprps, i; |
665 | 467 | ||
666 | length -= (page_size - offset); | 468 | length -= (page_size - offset); |
667 | if (length <= 0) | 469 | if (length <= 0) |
668 | return total_len; | 470 | return true; |
669 | 471 | ||
670 | dma_len -= (page_size - offset); | 472 | dma_len -= (page_size - offset); |
671 | if (dma_len) { | 473 | if (dma_len) { |
@@ -678,7 +480,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, | |||
678 | 480 | ||
679 | if (length <= page_size) { | 481 | if (length <= page_size) { |
680 | iod->first_dma = dma_addr; | 482 | iod->first_dma = dma_addr; |
681 | return total_len; | 483 | return true; |
682 | } | 484 | } |
683 | 485 | ||
684 | nprps = DIV_ROUND_UP(length, page_size); | 486 | nprps = DIV_ROUND_UP(length, page_size); |
@@ -690,11 +492,11 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, | |||
690 | iod->npages = 1; | 492 | iod->npages = 1; |
691 | } | 493 | } |
692 | 494 | ||
693 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | 495 | prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); |
694 | if (!prp_list) { | 496 | if (!prp_list) { |
695 | iod->first_dma = dma_addr; | 497 | iod->first_dma = dma_addr; |
696 | iod->npages = -1; | 498 | iod->npages = -1; |
697 | return (total_len - length) + page_size; | 499 | return false; |
698 | } | 500 | } |
699 | list[0] = prp_list; | 501 | list[0] = prp_list; |
700 | iod->first_dma = prp_dma; | 502 | iod->first_dma = prp_dma; |
@@ -702,9 +504,9 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, | |||
702 | for (;;) { | 504 | for (;;) { |
703 | if (i == page_size >> 3) { | 505 | if (i == page_size >> 3) { |
704 | __le64 *old_prp_list = prp_list; | 506 | __le64 *old_prp_list = prp_list; |
705 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | 507 | prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); |
706 | if (!prp_list) | 508 | if (!prp_list) |
707 | return total_len - length; | 509 | return false; |
708 | list[iod->npages++] = prp_list; | 510 | list[iod->npages++] = prp_list; |
709 | prp_list[0] = old_prp_list[i - 1]; | 511 | prp_list[0] = old_prp_list[i - 1]; |
710 | old_prp_list[i - 1] = cpu_to_le64(prp_dma); | 512 | old_prp_list[i - 1] = cpu_to_le64(prp_dma); |
@@ -724,115 +526,105 @@ static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, | |||
724 | dma_len = sg_dma_len(sg); | 526 | dma_len = sg_dma_len(sg); |
725 | } | 527 | } |
726 | 528 | ||
727 | return total_len; | 529 | return true; |
728 | } | 530 | } |
729 | 531 | ||
730 | static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req, | 532 | static int nvme_map_data(struct nvme_dev *dev, struct request *req, |
731 | struct nvme_iod *iod) | 533 | struct nvme_command *cmnd) |
732 | { | 534 | { |
733 | struct nvme_command cmnd; | 535 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
536 | struct request_queue *q = req->q; | ||
537 | enum dma_data_direction dma_dir = rq_data_dir(req) ? | ||
538 | DMA_TO_DEVICE : DMA_FROM_DEVICE; | ||
539 | int ret = BLK_MQ_RQ_QUEUE_ERROR; | ||
734 | 540 | ||
735 | memcpy(&cmnd, req->cmd, sizeof(cmnd)); | 541 | sg_init_table(iod->sg, req->nr_phys_segments); |
736 | cmnd.rw.command_id = req->tag; | 542 | iod->nents = blk_rq_map_sg(q, req, iod->sg); |
737 | if (req->nr_phys_segments) { | 543 | if (!iod->nents) |
738 | cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); | 544 | goto out; |
739 | cmnd.rw.prp2 = cpu_to_le64(iod->first_dma); | ||
740 | } | ||
741 | 545 | ||
742 | __nvme_submit_cmd(nvmeq, &cmnd); | 546 | ret = BLK_MQ_RQ_QUEUE_BUSY; |
743 | } | 547 | if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir)) |
548 | goto out; | ||
744 | 549 | ||
745 | /* | 550 | if (!nvme_setup_prps(dev, req, blk_rq_bytes(req))) |
746 | * We reuse the small pool to allocate the 16-byte range here as it is not | 551 | goto out_unmap; |
747 | * worth having a special pool for these or additional cases to handle freeing | ||
748 | * the iod. | ||
749 | */ | ||
750 | static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, | ||
751 | struct request *req, struct nvme_iod *iod) | ||
752 | { | ||
753 | struct nvme_dsm_range *range = | ||
754 | (struct nvme_dsm_range *)iod_list(iod)[0]; | ||
755 | struct nvme_command cmnd; | ||
756 | 552 | ||
757 | range->cattr = cpu_to_le32(0); | 553 | ret = BLK_MQ_RQ_QUEUE_ERROR; |
758 | range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); | 554 | if (blk_integrity_rq(req)) { |
759 | range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); | 555 | if (blk_rq_count_integrity_sg(q, req->bio) != 1) |
556 | goto out_unmap; | ||
760 | 557 | ||
761 | memset(&cmnd, 0, sizeof(cmnd)); | 558 | sg_init_table(&iod->meta_sg, 1); |
762 | cmnd.dsm.opcode = nvme_cmd_dsm; | 559 | if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) |
763 | cmnd.dsm.command_id = req->tag; | 560 | goto out_unmap; |
764 | cmnd.dsm.nsid = cpu_to_le32(ns->ns_id); | ||
765 | cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma); | ||
766 | cmnd.dsm.nr = 0; | ||
767 | cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); | ||
768 | 561 | ||
769 | __nvme_submit_cmd(nvmeq, &cmnd); | 562 | if (rq_data_dir(req)) |
770 | } | 563 | nvme_dif_remap(req, nvme_dif_prep); |
771 | 564 | ||
772 | static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, | 565 | if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) |
773 | int cmdid) | 566 | goto out_unmap; |
774 | { | 567 | } |
775 | struct nvme_command cmnd; | ||
776 | 568 | ||
777 | memset(&cmnd, 0, sizeof(cmnd)); | 569 | cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); |
778 | cmnd.common.opcode = nvme_cmd_flush; | 570 | cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); |
779 | cmnd.common.command_id = cmdid; | 571 | if (blk_integrity_rq(req)) |
780 | cmnd.common.nsid = cpu_to_le32(ns->ns_id); | 572 | cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); |
573 | return BLK_MQ_RQ_QUEUE_OK; | ||
781 | 574 | ||
782 | __nvme_submit_cmd(nvmeq, &cmnd); | 575 | out_unmap: |
576 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); | ||
577 | out: | ||
578 | return ret; | ||
783 | } | 579 | } |
784 | 580 | ||
785 | static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, | 581 | static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) |
786 | struct nvme_ns *ns) | ||
787 | { | 582 | { |
788 | struct request *req = iod_get_private(iod); | 583 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
789 | struct nvme_command cmnd; | 584 | enum dma_data_direction dma_dir = rq_data_dir(req) ? |
790 | u16 control = 0; | 585 | DMA_TO_DEVICE : DMA_FROM_DEVICE; |
791 | u32 dsmgmt = 0; | 586 | |
792 | 587 | if (iod->nents) { | |
793 | if (req->cmd_flags & REQ_FUA) | 588 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); |
794 | control |= NVME_RW_FUA; | 589 | if (blk_integrity_rq(req)) { |
795 | if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) | 590 | if (!rq_data_dir(req)) |
796 | control |= NVME_RW_LR; | 591 | nvme_dif_remap(req, nvme_dif_complete); |
797 | 592 | dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); | |
798 | if (req->cmd_flags & REQ_RAHEAD) | ||
799 | dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; | ||
800 | |||
801 | memset(&cmnd, 0, sizeof(cmnd)); | ||
802 | cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); | ||
803 | cmnd.rw.command_id = req->tag; | ||
804 | cmnd.rw.nsid = cpu_to_le32(ns->ns_id); | ||
805 | cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); | ||
806 | cmnd.rw.prp2 = cpu_to_le64(iod->first_dma); | ||
807 | cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); | ||
808 | cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); | ||
809 | |||
810 | if (ns->ms) { | ||
811 | switch (ns->pi_type) { | ||
812 | case NVME_NS_DPS_PI_TYPE3: | ||
813 | control |= NVME_RW_PRINFO_PRCHK_GUARD; | ||
814 | break; | ||
815 | case NVME_NS_DPS_PI_TYPE1: | ||
816 | case NVME_NS_DPS_PI_TYPE2: | ||
817 | control |= NVME_RW_PRINFO_PRCHK_GUARD | | ||
818 | NVME_RW_PRINFO_PRCHK_REF; | ||
819 | cmnd.rw.reftag = cpu_to_le32( | ||
820 | nvme_block_nr(ns, blk_rq_pos(req))); | ||
821 | break; | ||
822 | } | 593 | } |
823 | if (blk_integrity_rq(req)) | ||
824 | cmnd.rw.metadata = | ||
825 | cpu_to_le64(sg_dma_address(iod->meta_sg)); | ||
826 | else | ||
827 | control |= NVME_RW_PRINFO_PRACT; | ||
828 | } | 594 | } |
829 | 595 | ||
830 | cmnd.rw.control = cpu_to_le16(control); | 596 | nvme_free_iod(dev, req); |
831 | cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt); | 597 | } |
832 | 598 | ||
833 | __nvme_submit_cmd(nvmeq, &cmnd); | 599 | /* |
600 | * We reuse the small pool to allocate the 16-byte range here as it is not | ||
601 | * worth having a special pool for these or additional cases to handle freeing | ||
602 | * the iod. | ||
603 | */ | ||
604 | static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, | ||
605 | struct request *req, struct nvme_command *cmnd) | ||
606 | { | ||
607 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | ||
608 | struct nvme_dsm_range *range; | ||
834 | 609 | ||
835 | return 0; | 610 | range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC, |
611 | &iod->first_dma); | ||
612 | if (!range) | ||
613 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
614 | iod_list(req)[0] = (__le64 *)range; | ||
615 | iod->npages = 0; | ||
616 | |||
617 | range->cattr = cpu_to_le32(0); | ||
618 | range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); | ||
619 | range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); | ||
620 | |||
621 | memset(cmnd, 0, sizeof(*cmnd)); | ||
622 | cmnd->dsm.opcode = nvme_cmd_dsm; | ||
623 | cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); | ||
624 | cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); | ||
625 | cmnd->dsm.nr = 0; | ||
626 | cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); | ||
627 | return BLK_MQ_RQ_QUEUE_OK; | ||
836 | } | 628 | } |
837 | 629 | ||
838 | /* | 630 | /* |
@@ -845,9 +637,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
845 | struct nvme_queue *nvmeq = hctx->driver_data; | 637 | struct nvme_queue *nvmeq = hctx->driver_data; |
846 | struct nvme_dev *dev = nvmeq->dev; | 638 | struct nvme_dev *dev = nvmeq->dev; |
847 | struct request *req = bd->rq; | 639 | struct request *req = bd->rq; |
848 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); | 640 | struct nvme_command cmnd; |
849 | struct nvme_iod *iod; | 641 | int ret = BLK_MQ_RQ_QUEUE_OK; |
850 | enum dma_data_direction dma_dir; | ||
851 | 642 | ||
852 | /* | 643 | /* |
853 | * If formated with metadata, require the block layer provide a buffer | 644 | * If formated with metadata, require the block layer provide a buffer |
@@ -857,91 +648,72 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
857 | if (ns && ns->ms && !blk_integrity_rq(req)) { | 648 | if (ns && ns->ms && !blk_integrity_rq(req)) { |
858 | if (!(ns->pi_type && ns->ms == 8) && | 649 | if (!(ns->pi_type && ns->ms == 8) && |
859 | req->cmd_type != REQ_TYPE_DRV_PRIV) { | 650 | req->cmd_type != REQ_TYPE_DRV_PRIV) { |
860 | blk_mq_complete_request(req, -EFAULT); | 651 | blk_mq_end_request(req, -EFAULT); |
861 | return BLK_MQ_RQ_QUEUE_OK; | 652 | return BLK_MQ_RQ_QUEUE_OK; |
862 | } | 653 | } |
863 | } | 654 | } |
864 | 655 | ||
865 | iod = nvme_alloc_iod(req, dev, GFP_ATOMIC); | 656 | ret = nvme_init_iod(req, dev); |
866 | if (!iod) | 657 | if (ret) |
867 | return BLK_MQ_RQ_QUEUE_BUSY; | 658 | return ret; |
868 | 659 | ||
869 | if (req->cmd_flags & REQ_DISCARD) { | 660 | if (req->cmd_flags & REQ_DISCARD) { |
870 | void *range; | 661 | ret = nvme_setup_discard(nvmeq, ns, req, &cmnd); |
871 | /* | 662 | } else { |
872 | * We reuse the small pool to allocate the 16-byte range here | 663 | if (req->cmd_type == REQ_TYPE_DRV_PRIV) |
873 | * as it is not worth having a special pool for these or | 664 | memcpy(&cmnd, req->cmd, sizeof(cmnd)); |
874 | * additional cases to handle freeing the iod. | 665 | else if (req->cmd_flags & REQ_FLUSH) |
875 | */ | 666 | nvme_setup_flush(ns, &cmnd); |
876 | range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC, | 667 | else |
877 | &iod->first_dma); | 668 | nvme_setup_rw(ns, req, &cmnd); |
878 | if (!range) | ||
879 | goto retry_cmd; | ||
880 | iod_list(iod)[0] = (__le64 *)range; | ||
881 | iod->npages = 0; | ||
882 | } else if (req->nr_phys_segments) { | ||
883 | dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; | ||
884 | 669 | ||
885 | sg_init_table(iod->sg, req->nr_phys_segments); | 670 | if (req->nr_phys_segments) |
886 | iod->nents = blk_rq_map_sg(req->q, req, iod->sg); | 671 | ret = nvme_map_data(dev, req, &cmnd); |
887 | if (!iod->nents) | 672 | } |
888 | goto error_cmd; | ||
889 | 673 | ||
890 | if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir)) | 674 | if (ret) |
891 | goto retry_cmd; | 675 | goto out; |
892 | 676 | ||
893 | if (blk_rq_bytes(req) != | 677 | cmnd.common.command_id = req->tag; |
894 | nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { | 678 | blk_mq_start_request(req); |
895 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); | ||
896 | goto retry_cmd; | ||
897 | } | ||
898 | if (blk_integrity_rq(req)) { | ||
899 | if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) { | ||
900 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, | ||
901 | dma_dir); | ||
902 | goto error_cmd; | ||
903 | } | ||
904 | 679 | ||
905 | sg_init_table(iod->meta_sg, 1); | 680 | spin_lock_irq(&nvmeq->q_lock); |
906 | if (blk_rq_map_integrity_sg( | 681 | __nvme_submit_cmd(nvmeq, &cmnd); |
907 | req->q, req->bio, iod->meta_sg) != 1) { | 682 | nvme_process_cq(nvmeq); |
908 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, | 683 | spin_unlock_irq(&nvmeq->q_lock); |
909 | dma_dir); | 684 | return BLK_MQ_RQ_QUEUE_OK; |
910 | goto error_cmd; | 685 | out: |
911 | } | 686 | nvme_free_iod(dev, req); |
687 | return ret; | ||
688 | } | ||
912 | 689 | ||
913 | if (rq_data_dir(req)) | 690 | static void nvme_complete_rq(struct request *req) |
914 | nvme_dif_remap(req, nvme_dif_prep); | 691 | { |
692 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); | ||
693 | struct nvme_dev *dev = iod->nvmeq->dev; | ||
694 | int error = 0; | ||
915 | 695 | ||
916 | if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) { | 696 | nvme_unmap_data(dev, req); |
917 | dma_unmap_sg(dev->dev, iod->sg, iod->nents, | 697 | |
918 | dma_dir); | 698 | if (unlikely(req->errors)) { |
919 | goto error_cmd; | 699 | if (nvme_req_needs_retry(req, req->errors)) { |
920 | } | 700 | nvme_requeue_req(req); |
701 | return; | ||
921 | } | 702 | } |
922 | } | ||
923 | 703 | ||
924 | nvme_set_info(cmd, iod, req_completion); | 704 | if (req->cmd_type == REQ_TYPE_DRV_PRIV) |
925 | spin_lock_irq(&nvmeq->q_lock); | 705 | error = req->errors; |
926 | if (req->cmd_type == REQ_TYPE_DRV_PRIV) | 706 | else |
927 | nvme_submit_priv(nvmeq, req, iod); | 707 | error = nvme_error_status(req->errors); |
928 | else if (req->cmd_flags & REQ_DISCARD) | 708 | } |
929 | nvme_submit_discard(nvmeq, ns, req, iod); | ||
930 | else if (req->cmd_flags & REQ_FLUSH) | ||
931 | nvme_submit_flush(nvmeq, ns, req->tag); | ||
932 | else | ||
933 | nvme_submit_iod(nvmeq, iod, ns); | ||
934 | 709 | ||
935 | nvme_process_cq(nvmeq); | 710 | if (unlikely(iod->aborted)) { |
936 | spin_unlock_irq(&nvmeq->q_lock); | 711 | dev_warn(dev->dev, |
937 | return BLK_MQ_RQ_QUEUE_OK; | 712 | "completing aborted command with status: %04x\n", |
713 | req->errors); | ||
714 | } | ||
938 | 715 | ||
939 | error_cmd: | 716 | blk_mq_end_request(req, error); |
940 | nvme_free_iod(dev, iod); | ||
941 | return BLK_MQ_RQ_QUEUE_ERROR; | ||
942 | retry_cmd: | ||
943 | nvme_free_iod(dev, iod); | ||
944 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
945 | } | 717 | } |
946 | 718 | ||
947 | static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) | 719 | static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) |
@@ -952,20 +724,47 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) | |||
952 | phase = nvmeq->cq_phase; | 724 | phase = nvmeq->cq_phase; |
953 | 725 | ||
954 | for (;;) { | 726 | for (;;) { |
955 | void *ctx; | ||
956 | nvme_completion_fn fn; | ||
957 | struct nvme_completion cqe = nvmeq->cqes[head]; | 727 | struct nvme_completion cqe = nvmeq->cqes[head]; |
958 | if ((le16_to_cpu(cqe.status) & 1) != phase) | 728 | u16 status = le16_to_cpu(cqe.status); |
729 | struct request *req; | ||
730 | |||
731 | if ((status & 1) != phase) | ||
959 | break; | 732 | break; |
960 | nvmeq->sq_head = le16_to_cpu(cqe.sq_head); | 733 | nvmeq->sq_head = le16_to_cpu(cqe.sq_head); |
961 | if (++head == nvmeq->q_depth) { | 734 | if (++head == nvmeq->q_depth) { |
962 | head = 0; | 735 | head = 0; |
963 | phase = !phase; | 736 | phase = !phase; |
964 | } | 737 | } |
738 | |||
965 | if (tag && *tag == cqe.command_id) | 739 | if (tag && *tag == cqe.command_id) |
966 | *tag = -1; | 740 | *tag = -1; |
967 | ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn); | 741 | |
968 | fn(nvmeq, ctx, &cqe); | 742 | if (unlikely(cqe.command_id >= nvmeq->q_depth)) { |
743 | dev_warn(nvmeq->q_dmadev, | ||
744 | "invalid id %d completed on queue %d\n", | ||
745 | cqe.command_id, le16_to_cpu(cqe.sq_id)); | ||
746 | continue; | ||
747 | } | ||
748 | |||
749 | /* | ||
750 | * AEN requests are special as they don't time out and can | ||
751 | * survive any kind of queue freeze and often don't respond to | ||
752 | * aborts. We don't even bother to allocate a struct request | ||
753 | * for them but rather special case them here. | ||
754 | */ | ||
755 | if (unlikely(nvmeq->qid == 0 && | ||
756 | cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) { | ||
757 | nvme_complete_async_event(nvmeq->dev, &cqe); | ||
758 | continue; | ||
759 | } | ||
760 | |||
761 | req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); | ||
762 | if (req->cmd_type == REQ_TYPE_DRV_PRIV) { | ||
763 | u32 result = le32_to_cpu(cqe.result); | ||
764 | req->special = (void *)(uintptr_t)result; | ||
765 | } | ||
766 | blk_mq_complete_request(req, status >> 1); | ||
767 | |||
969 | } | 768 | } |
970 | 769 | ||
971 | /* If the controller ignores the cq head doorbell and continuously | 770 | /* If the controller ignores the cq head doorbell and continuously |
@@ -1028,112 +827,15 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) | |||
1028 | return 0; | 827 | return 0; |
1029 | } | 828 | } |
1030 | 829 | ||
1031 | /* | 830 | static void nvme_submit_async_event(struct nvme_dev *dev) |
1032 | * Returns 0 on success. If the result is negative, it's a Linux error code; | ||
1033 | * if the result is positive, it's an NVM Express status code | ||
1034 | */ | ||
1035 | int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
1036 | void *buffer, void __user *ubuffer, unsigned bufflen, | ||
1037 | u32 *result, unsigned timeout) | ||
1038 | { | ||
1039 | bool write = cmd->common.opcode & 1; | ||
1040 | struct bio *bio = NULL; | ||
1041 | struct request *req; | ||
1042 | int ret; | ||
1043 | |||
1044 | req = blk_mq_alloc_request(q, write, 0); | ||
1045 | if (IS_ERR(req)) | ||
1046 | return PTR_ERR(req); | ||
1047 | |||
1048 | req->cmd_type = REQ_TYPE_DRV_PRIV; | ||
1049 | req->cmd_flags |= REQ_FAILFAST_DRIVER; | ||
1050 | req->__data_len = 0; | ||
1051 | req->__sector = (sector_t) -1; | ||
1052 | req->bio = req->biotail = NULL; | ||
1053 | |||
1054 | req->timeout = timeout ? timeout : ADMIN_TIMEOUT; | ||
1055 | |||
1056 | req->cmd = (unsigned char *)cmd; | ||
1057 | req->cmd_len = sizeof(struct nvme_command); | ||
1058 | req->special = (void *)0; | ||
1059 | |||
1060 | if (buffer && bufflen) { | ||
1061 | ret = blk_rq_map_kern(q, req, buffer, bufflen, | ||
1062 | __GFP_DIRECT_RECLAIM); | ||
1063 | if (ret) | ||
1064 | goto out; | ||
1065 | } else if (ubuffer && bufflen) { | ||
1066 | ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, | ||
1067 | __GFP_DIRECT_RECLAIM); | ||
1068 | if (ret) | ||
1069 | goto out; | ||
1070 | bio = req->bio; | ||
1071 | } | ||
1072 | |||
1073 | blk_execute_rq(req->q, NULL, req, 0); | ||
1074 | if (bio) | ||
1075 | blk_rq_unmap_user(bio); | ||
1076 | if (result) | ||
1077 | *result = (u32)(uintptr_t)req->special; | ||
1078 | ret = req->errors; | ||
1079 | out: | ||
1080 | blk_mq_free_request(req); | ||
1081 | return ret; | ||
1082 | } | ||
1083 | |||
1084 | int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, | ||
1085 | void *buffer, unsigned bufflen) | ||
1086 | { | ||
1087 | return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0); | ||
1088 | } | ||
1089 | |||
1090 | static int nvme_submit_async_admin_req(struct nvme_dev *dev) | ||
1091 | { | 831 | { |
1092 | struct nvme_queue *nvmeq = dev->queues[0]; | ||
1093 | struct nvme_command c; | 832 | struct nvme_command c; |
1094 | struct nvme_cmd_info *cmd_info; | ||
1095 | struct request *req; | ||
1096 | |||
1097 | req = blk_mq_alloc_request(dev->admin_q, WRITE, | ||
1098 | BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED); | ||
1099 | if (IS_ERR(req)) | ||
1100 | return PTR_ERR(req); | ||
1101 | |||
1102 | req->cmd_flags |= REQ_NO_TIMEOUT; | ||
1103 | cmd_info = blk_mq_rq_to_pdu(req); | ||
1104 | nvme_set_info(cmd_info, NULL, async_req_completion); | ||
1105 | 833 | ||
1106 | memset(&c, 0, sizeof(c)); | 834 | memset(&c, 0, sizeof(c)); |
1107 | c.common.opcode = nvme_admin_async_event; | 835 | c.common.opcode = nvme_admin_async_event; |
1108 | c.common.command_id = req->tag; | 836 | c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit; |
1109 | |||
1110 | blk_mq_free_request(req); | ||
1111 | __nvme_submit_cmd(nvmeq, &c); | ||
1112 | return 0; | ||
1113 | } | ||
1114 | |||
1115 | static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, | ||
1116 | struct nvme_command *cmd, | ||
1117 | struct async_cmd_info *cmdinfo, unsigned timeout) | ||
1118 | { | ||
1119 | struct nvme_queue *nvmeq = dev->queues[0]; | ||
1120 | struct request *req; | ||
1121 | struct nvme_cmd_info *cmd_rq; | ||
1122 | |||
1123 | req = blk_mq_alloc_request(dev->admin_q, WRITE, 0); | ||
1124 | if (IS_ERR(req)) | ||
1125 | return PTR_ERR(req); | ||
1126 | |||
1127 | req->timeout = timeout; | ||
1128 | cmd_rq = blk_mq_rq_to_pdu(req); | ||
1129 | cmdinfo->req = req; | ||
1130 | nvme_set_info(cmd_rq, cmdinfo, async_completion); | ||
1131 | cmdinfo->status = -EINTR; | ||
1132 | 837 | ||
1133 | cmd->common.command_id = req->tag; | 838 | __nvme_submit_cmd(dev->queues[0], &c); |
1134 | |||
1135 | nvme_submit_cmd(nvmeq, cmd); | ||
1136 | return 0; | ||
1137 | } | 839 | } |
1138 | 840 | ||
1139 | static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) | 841 | static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) |
@@ -1144,7 +846,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) | |||
1144 | c.delete_queue.opcode = opcode; | 846 | c.delete_queue.opcode = opcode; |
1145 | c.delete_queue.qid = cpu_to_le16(id); | 847 | c.delete_queue.qid = cpu_to_le16(id); |
1146 | 848 | ||
1147 | return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); | 849 | return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); |
1148 | } | 850 | } |
1149 | 851 | ||
1150 | static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | 852 | static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, |
@@ -1165,7 +867,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, | |||
1165 | c.create_cq.cq_flags = cpu_to_le16(flags); | 867 | c.create_cq.cq_flags = cpu_to_le16(flags); |
1166 | c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); | 868 | c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); |
1167 | 869 | ||
1168 | return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); | 870 | return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); |
1169 | } | 871 | } |
1170 | 872 | ||
1171 | static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, | 873 | static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, |
@@ -1186,7 +888,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, | |||
1186 | c.create_sq.sq_flags = cpu_to_le16(flags); | 888 | c.create_sq.sq_flags = cpu_to_le16(flags); |
1187 | c.create_sq.cqid = cpu_to_le16(qid); | 889 | c.create_sq.cqid = cpu_to_le16(qid); |
1188 | 890 | ||
1189 | return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); | 891 | return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); |
1190 | } | 892 | } |
1191 | 893 | ||
1192 | static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) | 894 | static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) |
@@ -1199,195 +901,111 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) | |||
1199 | return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); | 901 | return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); |
1200 | } | 902 | } |
1201 | 903 | ||
1202 | int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) | 904 | static void abort_endio(struct request *req, int error) |
1203 | { | ||
1204 | struct nvme_command c = { }; | ||
1205 | int error; | ||
1206 | |||
1207 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ | ||
1208 | c.identify.opcode = nvme_admin_identify; | ||
1209 | c.identify.cns = cpu_to_le32(1); | ||
1210 | |||
1211 | *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); | ||
1212 | if (!*id) | ||
1213 | return -ENOMEM; | ||
1214 | |||
1215 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, | ||
1216 | sizeof(struct nvme_id_ctrl)); | ||
1217 | if (error) | ||
1218 | kfree(*id); | ||
1219 | return error; | ||
1220 | } | ||
1221 | |||
1222 | int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, | ||
1223 | struct nvme_id_ns **id) | ||
1224 | { | ||
1225 | struct nvme_command c = { }; | ||
1226 | int error; | ||
1227 | |||
1228 | /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ | ||
1229 | c.identify.opcode = nvme_admin_identify, | ||
1230 | c.identify.nsid = cpu_to_le32(nsid), | ||
1231 | |||
1232 | *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); | ||
1233 | if (!*id) | ||
1234 | return -ENOMEM; | ||
1235 | |||
1236 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, | ||
1237 | sizeof(struct nvme_id_ns)); | ||
1238 | if (error) | ||
1239 | kfree(*id); | ||
1240 | return error; | ||
1241 | } | ||
1242 | |||
1243 | int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, | ||
1244 | dma_addr_t dma_addr, u32 *result) | ||
1245 | { | ||
1246 | struct nvme_command c; | ||
1247 | |||
1248 | memset(&c, 0, sizeof(c)); | ||
1249 | c.features.opcode = nvme_admin_get_features; | ||
1250 | c.features.nsid = cpu_to_le32(nsid); | ||
1251 | c.features.prp1 = cpu_to_le64(dma_addr); | ||
1252 | c.features.fid = cpu_to_le32(fid); | ||
1253 | |||
1254 | return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, | ||
1255 | result, 0); | ||
1256 | } | ||
1257 | |||
1258 | int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, | ||
1259 | dma_addr_t dma_addr, u32 *result) | ||
1260 | { | ||
1261 | struct nvme_command c; | ||
1262 | |||
1263 | memset(&c, 0, sizeof(c)); | ||
1264 | c.features.opcode = nvme_admin_set_features; | ||
1265 | c.features.prp1 = cpu_to_le64(dma_addr); | ||
1266 | c.features.fid = cpu_to_le32(fid); | ||
1267 | c.features.dword11 = cpu_to_le32(dword11); | ||
1268 | |||
1269 | return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0, | ||
1270 | result, 0); | ||
1271 | } | ||
1272 | |||
1273 | int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) | ||
1274 | { | 905 | { |
1275 | struct nvme_command c = { }; | 906 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
1276 | int error; | 907 | struct nvme_queue *nvmeq = iod->nvmeq; |
1277 | 908 | u32 result = (u32)(uintptr_t)req->special; | |
1278 | c.common.opcode = nvme_admin_get_log_page, | 909 | u16 status = req->errors; |
1279 | c.common.nsid = cpu_to_le32(0xFFFFFFFF), | ||
1280 | c.common.cdw10[0] = cpu_to_le32( | ||
1281 | (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | | ||
1282 | NVME_LOG_SMART), | ||
1283 | 910 | ||
1284 | *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); | 911 | dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); |
1285 | if (!*log) | 912 | atomic_inc(&nvmeq->dev->ctrl.abort_limit); |
1286 | return -ENOMEM; | ||
1287 | 913 | ||
1288 | error = nvme_submit_sync_cmd(dev->admin_q, &c, *log, | 914 | blk_mq_free_request(req); |
1289 | sizeof(struct nvme_smart_log)); | ||
1290 | if (error) | ||
1291 | kfree(*log); | ||
1292 | return error; | ||
1293 | } | 915 | } |
1294 | 916 | ||
1295 | /** | 917 | static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) |
1296 | * nvme_abort_req - Attempt aborting a request | ||
1297 | * | ||
1298 | * Schedule controller reset if the command was already aborted once before and | ||
1299 | * still hasn't been returned to the driver, or if this is the admin queue. | ||
1300 | */ | ||
1301 | static void nvme_abort_req(struct request *req) | ||
1302 | { | 918 | { |
1303 | struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); | 919 | struct nvme_iod *iod = blk_mq_rq_to_pdu(req); |
1304 | struct nvme_queue *nvmeq = cmd_rq->nvmeq; | 920 | struct nvme_queue *nvmeq = iod->nvmeq; |
1305 | struct nvme_dev *dev = nvmeq->dev; | 921 | struct nvme_dev *dev = nvmeq->dev; |
1306 | struct request *abort_req; | 922 | struct request *abort_req; |
1307 | struct nvme_cmd_info *abort_cmd; | ||
1308 | struct nvme_command cmd; | 923 | struct nvme_command cmd; |
1309 | 924 | ||
1310 | if (!nvmeq->qid || cmd_rq->aborted) { | 925 | /* |
1311 | spin_lock(&dev_list_lock); | 926 | * Shutdown immediately if controller times out while starting. The |
1312 | if (!__nvme_reset(dev)) { | 927 | * reset work will see the pci device disabled when it gets the forced |
1313 | dev_warn(dev->dev, | 928 | * cancellation error. All outstanding requests are completed on |
1314 | "I/O %d QID %d timeout, reset controller\n", | 929 | * shutdown, so we return BLK_EH_HANDLED. |
1315 | req->tag, nvmeq->qid); | 930 | */ |
1316 | } | 931 | if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) { |
1317 | spin_unlock(&dev_list_lock); | 932 | dev_warn(dev->dev, |
1318 | return; | 933 | "I/O %d QID %d timeout, disable controller\n", |
934 | req->tag, nvmeq->qid); | ||
935 | nvme_dev_disable(dev, false); | ||
936 | req->errors = NVME_SC_CANCELLED; | ||
937 | return BLK_EH_HANDLED; | ||
1319 | } | 938 | } |
1320 | 939 | ||
1321 | if (!dev->abort_limit) | 940 | /* |
1322 | return; | 941 | * Shutdown the controller immediately and schedule a reset if the |
942 | * command was already aborted once before and still hasn't been | ||
943 | * returned to the driver, or if this is the admin queue. | ||
944 | */ | ||
945 | if (!nvmeq->qid || iod->aborted) { | ||
946 | dev_warn(dev->dev, | ||
947 | "I/O %d QID %d timeout, reset controller\n", | ||
948 | req->tag, nvmeq->qid); | ||
949 | nvme_dev_disable(dev, false); | ||
950 | queue_work(nvme_workq, &dev->reset_work); | ||
1323 | 951 | ||
1324 | abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, | 952 | /* |
1325 | BLK_MQ_REQ_NOWAIT); | 953 | * Mark the request as handled, since the inline shutdown |
1326 | if (IS_ERR(abort_req)) | 954 | * forces all outstanding requests to complete. |
1327 | return; | 955 | */ |
956 | req->errors = NVME_SC_CANCELLED; | ||
957 | return BLK_EH_HANDLED; | ||
958 | } | ||
1328 | 959 | ||
1329 | abort_cmd = blk_mq_rq_to_pdu(abort_req); | 960 | iod->aborted = 1; |
1330 | nvme_set_info(abort_cmd, abort_req, abort_completion); | 961 | |
962 | if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { | ||
963 | atomic_inc(&dev->ctrl.abort_limit); | ||
964 | return BLK_EH_RESET_TIMER; | ||
965 | } | ||
1331 | 966 | ||
1332 | memset(&cmd, 0, sizeof(cmd)); | 967 | memset(&cmd, 0, sizeof(cmd)); |
1333 | cmd.abort.opcode = nvme_admin_abort_cmd; | 968 | cmd.abort.opcode = nvme_admin_abort_cmd; |
1334 | cmd.abort.cid = req->tag; | 969 | cmd.abort.cid = req->tag; |
1335 | cmd.abort.sqid = cpu_to_le16(nvmeq->qid); | 970 | cmd.abort.sqid = cpu_to_le16(nvmeq->qid); |
1336 | cmd.abort.command_id = abort_req->tag; | ||
1337 | 971 | ||
1338 | --dev->abort_limit; | 972 | dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n", |
1339 | cmd_rq->aborted = 1; | 973 | req->tag, nvmeq->qid); |
1340 | 974 | ||
1341 | dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag, | 975 | abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, |
1342 | nvmeq->qid); | 976 | BLK_MQ_REQ_NOWAIT); |
1343 | nvme_submit_cmd(dev->queues[0], &cmd); | 977 | if (IS_ERR(abort_req)) { |
978 | atomic_inc(&dev->ctrl.abort_limit); | ||
979 | return BLK_EH_RESET_TIMER; | ||
980 | } | ||
981 | |||
982 | abort_req->timeout = ADMIN_TIMEOUT; | ||
983 | abort_req->end_io_data = NULL; | ||
984 | blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio); | ||
985 | |||
986 | /* | ||
987 | * The aborted req will be completed on receiving the abort req. | ||
988 | * We enable the timer again. If hit twice, it'll cause a device reset, | ||
989 | * as the device then is in a faulty state. | ||
990 | */ | ||
991 | return BLK_EH_RESET_TIMER; | ||
1344 | } | 992 | } |
1345 | 993 | ||
1346 | static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) | 994 | static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved) |
1347 | { | 995 | { |
1348 | struct nvme_queue *nvmeq = data; | 996 | struct nvme_queue *nvmeq = data; |
1349 | void *ctx; | 997 | int status; |
1350 | nvme_completion_fn fn; | ||
1351 | struct nvme_cmd_info *cmd; | ||
1352 | struct nvme_completion cqe; | ||
1353 | 998 | ||
1354 | if (!blk_mq_request_started(req)) | 999 | if (!blk_mq_request_started(req)) |
1355 | return; | 1000 | return; |
1356 | 1001 | ||
1357 | cmd = blk_mq_rq_to_pdu(req); | 1002 | dev_warn(nvmeq->q_dmadev, |
1358 | 1003 | "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid); | |
1359 | if (cmd->ctx == CMD_CTX_CANCELLED) | ||
1360 | return; | ||
1361 | 1004 | ||
1005 | status = NVME_SC_ABORT_REQ; | ||
1362 | if (blk_queue_dying(req->q)) | 1006 | if (blk_queue_dying(req->q)) |
1363 | cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); | 1007 | status |= NVME_SC_DNR; |
1364 | else | 1008 | blk_mq_complete_request(req, status); |
1365 | cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); | ||
1366 | |||
1367 | |||
1368 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", | ||
1369 | req->tag, nvmeq->qid); | ||
1370 | ctx = cancel_cmd_info(cmd, &fn); | ||
1371 | fn(nvmeq, ctx, &cqe); | ||
1372 | } | ||
1373 | |||
1374 | static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) | ||
1375 | { | ||
1376 | struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); | ||
1377 | struct nvme_queue *nvmeq = cmd->nvmeq; | ||
1378 | |||
1379 | dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, | ||
1380 | nvmeq->qid); | ||
1381 | spin_lock_irq(&nvmeq->q_lock); | ||
1382 | nvme_abort_req(req); | ||
1383 | spin_unlock_irq(&nvmeq->q_lock); | ||
1384 | |||
1385 | /* | ||
1386 | * The aborted req will be completed on receiving the abort req. | ||
1387 | * We enable the timer again. If hit twice, it'll cause a device reset, | ||
1388 | * as the device then is in a faulty state. | ||
1389 | */ | ||
1390 | return BLK_EH_RESET_TIMER; | ||
1391 | } | 1009 | } |
1392 | 1010 | ||
1393 | static void nvme_free_queue(struct nvme_queue *nvmeq) | 1011 | static void nvme_free_queue(struct nvme_queue *nvmeq) |
@@ -1430,8 +1048,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) | |||
1430 | nvmeq->cq_vector = -1; | 1048 | nvmeq->cq_vector = -1; |
1431 | spin_unlock_irq(&nvmeq->q_lock); | 1049 | spin_unlock_irq(&nvmeq->q_lock); |
1432 | 1050 | ||
1433 | if (!nvmeq->qid && nvmeq->dev->admin_q) | 1051 | if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) |
1434 | blk_mq_freeze_queue_start(nvmeq->dev->admin_q); | 1052 | blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q); |
1435 | 1053 | ||
1436 | irq_set_affinity_hint(vector, NULL); | 1054 | irq_set_affinity_hint(vector, NULL); |
1437 | free_irq(vector, nvmeq); | 1055 | free_irq(vector, nvmeq); |
@@ -1447,21 +1065,20 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq) | |||
1447 | spin_unlock_irq(&nvmeq->q_lock); | 1065 | spin_unlock_irq(&nvmeq->q_lock); |
1448 | } | 1066 | } |
1449 | 1067 | ||
1450 | static void nvme_disable_queue(struct nvme_dev *dev, int qid) | 1068 | static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) |
1451 | { | 1069 | { |
1452 | struct nvme_queue *nvmeq = dev->queues[qid]; | 1070 | struct nvme_queue *nvmeq = dev->queues[0]; |
1453 | 1071 | ||
1454 | if (!nvmeq) | 1072 | if (!nvmeq) |
1455 | return; | 1073 | return; |
1456 | if (nvme_suspend_queue(nvmeq)) | 1074 | if (nvme_suspend_queue(nvmeq)) |
1457 | return; | 1075 | return; |
1458 | 1076 | ||
1459 | /* Don't tell the adapter to delete the admin queue. | 1077 | if (shutdown) |
1460 | * Don't tell a removed adapter to delete IO queues. */ | 1078 | nvme_shutdown_ctrl(&dev->ctrl); |
1461 | if (qid && readl(&dev->bar->csts) != -1) { | 1079 | else |
1462 | adapter_delete_sq(dev, qid); | 1080 | nvme_disable_ctrl(&dev->ctrl, lo_hi_readq( |
1463 | adapter_delete_cq(dev, qid); | 1081 | dev->bar + NVME_REG_CAP)); |
1464 | } | ||
1465 | 1082 | ||
1466 | spin_lock_irq(&nvmeq->q_lock); | 1083 | spin_lock_irq(&nvmeq->q_lock); |
1467 | nvme_process_cq(nvmeq); | 1084 | nvme_process_cq(nvmeq); |
@@ -1472,11 +1089,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, | |||
1472 | int entry_size) | 1089 | int entry_size) |
1473 | { | 1090 | { |
1474 | int q_depth = dev->q_depth; | 1091 | int q_depth = dev->q_depth; |
1475 | unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size); | 1092 | unsigned q_size_aligned = roundup(q_depth * entry_size, |
1093 | dev->ctrl.page_size); | ||
1476 | 1094 | ||
1477 | if (q_size_aligned * nr_io_queues > dev->cmb_size) { | 1095 | if (q_size_aligned * nr_io_queues > dev->cmb_size) { |
1478 | u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues); | 1096 | u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues); |
1479 | mem_per_q = round_down(mem_per_q, dev->page_size); | 1097 | mem_per_q = round_down(mem_per_q, dev->ctrl.page_size); |
1480 | q_depth = div_u64(mem_per_q, entry_size); | 1098 | q_depth = div_u64(mem_per_q, entry_size); |
1481 | 1099 | ||
1482 | /* | 1100 | /* |
@@ -1495,8 +1113,8 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, | |||
1495 | int qid, int depth) | 1113 | int qid, int depth) |
1496 | { | 1114 | { |
1497 | if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { | 1115 | if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { |
1498 | unsigned offset = (qid - 1) * | 1116 | unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth), |
1499 | roundup(SQ_SIZE(depth), dev->page_size); | 1117 | dev->ctrl.page_size); |
1500 | nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset; | 1118 | nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset; |
1501 | nvmeq->sq_cmds_io = dev->cmb + offset; | 1119 | nvmeq->sq_cmds_io = dev->cmb + offset; |
1502 | } else { | 1120 | } else { |
@@ -1527,7 +1145,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | |||
1527 | nvmeq->q_dmadev = dev->dev; | 1145 | nvmeq->q_dmadev = dev->dev; |
1528 | nvmeq->dev = dev; | 1146 | nvmeq->dev = dev; |
1529 | snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", | 1147 | snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", |
1530 | dev->instance, qid); | 1148 | dev->ctrl.instance, qid); |
1531 | spin_lock_init(&nvmeq->q_lock); | 1149 | spin_lock_init(&nvmeq->q_lock); |
1532 | nvmeq->cq_head = 0; | 1150 | nvmeq->cq_head = 0; |
1533 | nvmeq->cq_phase = 1; | 1151 | nvmeq->cq_phase = 1; |
@@ -1604,79 +1222,9 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) | |||
1604 | return result; | 1222 | return result; |
1605 | } | 1223 | } |
1606 | 1224 | ||
1607 | static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) | ||
1608 | { | ||
1609 | unsigned long timeout; | ||
1610 | u32 bit = enabled ? NVME_CSTS_RDY : 0; | ||
1611 | |||
1612 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; | ||
1613 | |||
1614 | while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) { | ||
1615 | msleep(100); | ||
1616 | if (fatal_signal_pending(current)) | ||
1617 | return -EINTR; | ||
1618 | if (time_after(jiffies, timeout)) { | ||
1619 | dev_err(dev->dev, | ||
1620 | "Device not ready; aborting %s\n", enabled ? | ||
1621 | "initialisation" : "reset"); | ||
1622 | return -ENODEV; | ||
1623 | } | ||
1624 | } | ||
1625 | |||
1626 | return 0; | ||
1627 | } | ||
1628 | |||
1629 | /* | ||
1630 | * If the device has been passed off to us in an enabled state, just clear | ||
1631 | * the enabled bit. The spec says we should set the 'shutdown notification | ||
1632 | * bits', but doing so may cause the device to complete commands to the | ||
1633 | * admin queue ... and we don't know what memory that might be pointing at! | ||
1634 | */ | ||
1635 | static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap) | ||
1636 | { | ||
1637 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | ||
1638 | dev->ctrl_config &= ~NVME_CC_ENABLE; | ||
1639 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1640 | |||
1641 | return nvme_wait_ready(dev, cap, false); | ||
1642 | } | ||
1643 | |||
1644 | static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) | ||
1645 | { | ||
1646 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | ||
1647 | dev->ctrl_config |= NVME_CC_ENABLE; | ||
1648 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1649 | |||
1650 | return nvme_wait_ready(dev, cap, true); | ||
1651 | } | ||
1652 | |||
1653 | static int nvme_shutdown_ctrl(struct nvme_dev *dev) | ||
1654 | { | ||
1655 | unsigned long timeout; | ||
1656 | |||
1657 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | ||
1658 | dev->ctrl_config |= NVME_CC_SHN_NORMAL; | ||
1659 | |||
1660 | writel(dev->ctrl_config, &dev->bar->cc); | ||
1661 | |||
1662 | timeout = SHUTDOWN_TIMEOUT + jiffies; | ||
1663 | while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) != | ||
1664 | NVME_CSTS_SHST_CMPLT) { | ||
1665 | msleep(100); | ||
1666 | if (fatal_signal_pending(current)) | ||
1667 | return -EINTR; | ||
1668 | if (time_after(jiffies, timeout)) { | ||
1669 | dev_err(dev->dev, | ||
1670 | "Device shutdown incomplete; abort shutdown\n"); | ||
1671 | return -ENODEV; | ||
1672 | } | ||
1673 | } | ||
1674 | |||
1675 | return 0; | ||
1676 | } | ||
1677 | |||
1678 | static struct blk_mq_ops nvme_mq_admin_ops = { | 1225 | static struct blk_mq_ops nvme_mq_admin_ops = { |
1679 | .queue_rq = nvme_queue_rq, | 1226 | .queue_rq = nvme_queue_rq, |
1227 | .complete = nvme_complete_rq, | ||
1680 | .map_queue = blk_mq_map_queue, | 1228 | .map_queue = blk_mq_map_queue, |
1681 | .init_hctx = nvme_admin_init_hctx, | 1229 | .init_hctx = nvme_admin_init_hctx, |
1682 | .exit_hctx = nvme_admin_exit_hctx, | 1230 | .exit_hctx = nvme_admin_exit_hctx, |
@@ -1686,6 +1234,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = { | |||
1686 | 1234 | ||
1687 | static struct blk_mq_ops nvme_mq_ops = { | 1235 | static struct blk_mq_ops nvme_mq_ops = { |
1688 | .queue_rq = nvme_queue_rq, | 1236 | .queue_rq = nvme_queue_rq, |
1237 | .complete = nvme_complete_rq, | ||
1689 | .map_queue = blk_mq_map_queue, | 1238 | .map_queue = blk_mq_map_queue, |
1690 | .init_hctx = nvme_init_hctx, | 1239 | .init_hctx = nvme_init_hctx, |
1691 | .init_request = nvme_init_request, | 1240 | .init_request = nvme_init_request, |
@@ -1695,19 +1244,23 @@ static struct blk_mq_ops nvme_mq_ops = { | |||
1695 | 1244 | ||
1696 | static void nvme_dev_remove_admin(struct nvme_dev *dev) | 1245 | static void nvme_dev_remove_admin(struct nvme_dev *dev) |
1697 | { | 1246 | { |
1698 | if (dev->admin_q && !blk_queue_dying(dev->admin_q)) { | 1247 | if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { |
1699 | blk_cleanup_queue(dev->admin_q); | 1248 | blk_cleanup_queue(dev->ctrl.admin_q); |
1700 | blk_mq_free_tag_set(&dev->admin_tagset); | 1249 | blk_mq_free_tag_set(&dev->admin_tagset); |
1701 | } | 1250 | } |
1702 | } | 1251 | } |
1703 | 1252 | ||
1704 | static int nvme_alloc_admin_tags(struct nvme_dev *dev) | 1253 | static int nvme_alloc_admin_tags(struct nvme_dev *dev) |
1705 | { | 1254 | { |
1706 | if (!dev->admin_q) { | 1255 | if (!dev->ctrl.admin_q) { |
1707 | dev->admin_tagset.ops = &nvme_mq_admin_ops; | 1256 | dev->admin_tagset.ops = &nvme_mq_admin_ops; |
1708 | dev->admin_tagset.nr_hw_queues = 1; | 1257 | dev->admin_tagset.nr_hw_queues = 1; |
1709 | dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; | 1258 | |
1710 | dev->admin_tagset.reserved_tags = 1; | 1259 | /* |
1260 | * Subtract one to leave an empty queue entry for 'Full Queue' | ||
1261 | * condition. See NVM-Express 1.2 specification, section 4.1.2. | ||
1262 | */ | ||
1263 | dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1; | ||
1711 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; | 1264 | dev->admin_tagset.timeout = ADMIN_TIMEOUT; |
1712 | dev->admin_tagset.numa_node = dev_to_node(dev->dev); | 1265 | dev->admin_tagset.numa_node = dev_to_node(dev->dev); |
1713 | dev->admin_tagset.cmd_size = nvme_cmd_size(dev); | 1266 | dev->admin_tagset.cmd_size = nvme_cmd_size(dev); |
@@ -1716,18 +1269,18 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) | |||
1716 | if (blk_mq_alloc_tag_set(&dev->admin_tagset)) | 1269 | if (blk_mq_alloc_tag_set(&dev->admin_tagset)) |
1717 | return -ENOMEM; | 1270 | return -ENOMEM; |
1718 | 1271 | ||
1719 | dev->admin_q = blk_mq_init_queue(&dev->admin_tagset); | 1272 | dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); |
1720 | if (IS_ERR(dev->admin_q)) { | 1273 | if (IS_ERR(dev->ctrl.admin_q)) { |
1721 | blk_mq_free_tag_set(&dev->admin_tagset); | 1274 | blk_mq_free_tag_set(&dev->admin_tagset); |
1722 | return -ENOMEM; | 1275 | return -ENOMEM; |
1723 | } | 1276 | } |
1724 | if (!blk_get_queue(dev->admin_q)) { | 1277 | if (!blk_get_queue(dev->ctrl.admin_q)) { |
1725 | nvme_dev_remove_admin(dev); | 1278 | nvme_dev_remove_admin(dev); |
1726 | dev->admin_q = NULL; | 1279 | dev->ctrl.admin_q = NULL; |
1727 | return -ENODEV; | 1280 | return -ENODEV; |
1728 | } | 1281 | } |
1729 | } else | 1282 | } else |
1730 | blk_mq_unfreeze_queue(dev->admin_q); | 1283 | blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true); |
1731 | 1284 | ||
1732 | return 0; | 1285 | return 0; |
1733 | } | 1286 | } |
@@ -1736,31 +1289,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1736 | { | 1289 | { |
1737 | int result; | 1290 | int result; |
1738 | u32 aqa; | 1291 | u32 aqa; |
1739 | u64 cap = lo_hi_readq(&dev->bar->cap); | 1292 | u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); |
1740 | struct nvme_queue *nvmeq; | 1293 | struct nvme_queue *nvmeq; |
1741 | /* | ||
1742 | * default to a 4K page size, with the intention to update this | ||
1743 | * path in the future to accomodate architectures with differing | ||
1744 | * kernel and IO page sizes. | ||
1745 | */ | ||
1746 | unsigned page_shift = 12; | ||
1747 | unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; | ||
1748 | |||
1749 | if (page_shift < dev_page_min) { | ||
1750 | dev_err(dev->dev, | ||
1751 | "Minimum device page size (%u) too large for " | ||
1752 | "host (%u)\n", 1 << dev_page_min, | ||
1753 | 1 << page_shift); | ||
1754 | return -ENODEV; | ||
1755 | } | ||
1756 | 1294 | ||
1757 | dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ? | 1295 | dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ? |
1758 | NVME_CAP_NSSRC(cap) : 0; | 1296 | NVME_CAP_NSSRC(cap) : 0; |
1759 | 1297 | ||
1760 | if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO)) | 1298 | if (dev->subsystem && |
1761 | writel(NVME_CSTS_NSSRO, &dev->bar->csts); | 1299 | (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) |
1300 | writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); | ||
1762 | 1301 | ||
1763 | result = nvme_disable_ctrl(dev, cap); | 1302 | result = nvme_disable_ctrl(&dev->ctrl, cap); |
1764 | if (result < 0) | 1303 | if (result < 0) |
1765 | return result; | 1304 | return result; |
1766 | 1305 | ||
@@ -1774,18 +1313,11 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1774 | aqa = nvmeq->q_depth - 1; | 1313 | aqa = nvmeq->q_depth - 1; |
1775 | aqa |= aqa << 16; | 1314 | aqa |= aqa << 16; |
1776 | 1315 | ||
1777 | dev->page_size = 1 << page_shift; | 1316 | writel(aqa, dev->bar + NVME_REG_AQA); |
1778 | 1317 | lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); | |
1779 | dev->ctrl_config = NVME_CC_CSS_NVM; | 1318 | lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); |
1780 | dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; | ||
1781 | dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; | ||
1782 | dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; | ||
1783 | |||
1784 | writel(aqa, &dev->bar->aqa); | ||
1785 | lo_hi_writeq(nvmeq->sq_dma_addr, &dev->bar->asq); | ||
1786 | lo_hi_writeq(nvmeq->cq_dma_addr, &dev->bar->acq); | ||
1787 | 1319 | ||
1788 | result = nvme_enable_ctrl(dev, cap); | 1320 | result = nvme_enable_ctrl(&dev->ctrl, cap); |
1789 | if (result) | 1321 | if (result) |
1790 | goto free_nvmeq; | 1322 | goto free_nvmeq; |
1791 | 1323 | ||
@@ -1803,406 +1335,6 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1803 | return result; | 1335 | return result; |
1804 | } | 1336 | } |
1805 | 1337 | ||
1806 | static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | ||
1807 | { | ||
1808 | struct nvme_dev *dev = ns->dev; | ||
1809 | struct nvme_user_io io; | ||
1810 | struct nvme_command c; | ||
1811 | unsigned length, meta_len; | ||
1812 | int status, write; | ||
1813 | dma_addr_t meta_dma = 0; | ||
1814 | void *meta = NULL; | ||
1815 | void __user *metadata; | ||
1816 | |||
1817 | if (copy_from_user(&io, uio, sizeof(io))) | ||
1818 | return -EFAULT; | ||
1819 | |||
1820 | switch (io.opcode) { | ||
1821 | case nvme_cmd_write: | ||
1822 | case nvme_cmd_read: | ||
1823 | case nvme_cmd_compare: | ||
1824 | break; | ||
1825 | default: | ||
1826 | return -EINVAL; | ||
1827 | } | ||
1828 | |||
1829 | length = (io.nblocks + 1) << ns->lba_shift; | ||
1830 | meta_len = (io.nblocks + 1) * ns->ms; | ||
1831 | metadata = (void __user *)(uintptr_t)io.metadata; | ||
1832 | write = io.opcode & 1; | ||
1833 | |||
1834 | if (ns->ext) { | ||
1835 | length += meta_len; | ||
1836 | meta_len = 0; | ||
1837 | } | ||
1838 | if (meta_len) { | ||
1839 | if (((io.metadata & 3) || !io.metadata) && !ns->ext) | ||
1840 | return -EINVAL; | ||
1841 | |||
1842 | meta = dma_alloc_coherent(dev->dev, meta_len, | ||
1843 | &meta_dma, GFP_KERNEL); | ||
1844 | |||
1845 | if (!meta) { | ||
1846 | status = -ENOMEM; | ||
1847 | goto unmap; | ||
1848 | } | ||
1849 | if (write) { | ||
1850 | if (copy_from_user(meta, metadata, meta_len)) { | ||
1851 | status = -EFAULT; | ||
1852 | goto unmap; | ||
1853 | } | ||
1854 | } | ||
1855 | } | ||
1856 | |||
1857 | memset(&c, 0, sizeof(c)); | ||
1858 | c.rw.opcode = io.opcode; | ||
1859 | c.rw.flags = io.flags; | ||
1860 | c.rw.nsid = cpu_to_le32(ns->ns_id); | ||
1861 | c.rw.slba = cpu_to_le64(io.slba); | ||
1862 | c.rw.length = cpu_to_le16(io.nblocks); | ||
1863 | c.rw.control = cpu_to_le16(io.control); | ||
1864 | c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); | ||
1865 | c.rw.reftag = cpu_to_le32(io.reftag); | ||
1866 | c.rw.apptag = cpu_to_le16(io.apptag); | ||
1867 | c.rw.appmask = cpu_to_le16(io.appmask); | ||
1868 | c.rw.metadata = cpu_to_le64(meta_dma); | ||
1869 | |||
1870 | status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, | ||
1871 | (void __user *)(uintptr_t)io.addr, length, NULL, 0); | ||
1872 | unmap: | ||
1873 | if (meta) { | ||
1874 | if (status == NVME_SC_SUCCESS && !write) { | ||
1875 | if (copy_to_user(metadata, meta, meta_len)) | ||
1876 | status = -EFAULT; | ||
1877 | } | ||
1878 | dma_free_coherent(dev->dev, meta_len, meta, meta_dma); | ||
1879 | } | ||
1880 | return status; | ||
1881 | } | ||
1882 | |||
1883 | static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, | ||
1884 | struct nvme_passthru_cmd __user *ucmd) | ||
1885 | { | ||
1886 | struct nvme_passthru_cmd cmd; | ||
1887 | struct nvme_command c; | ||
1888 | unsigned timeout = 0; | ||
1889 | int status; | ||
1890 | |||
1891 | if (!capable(CAP_SYS_ADMIN)) | ||
1892 | return -EACCES; | ||
1893 | if (copy_from_user(&cmd, ucmd, sizeof(cmd))) | ||
1894 | return -EFAULT; | ||
1895 | |||
1896 | memset(&c, 0, sizeof(c)); | ||
1897 | c.common.opcode = cmd.opcode; | ||
1898 | c.common.flags = cmd.flags; | ||
1899 | c.common.nsid = cpu_to_le32(cmd.nsid); | ||
1900 | c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); | ||
1901 | c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); | ||
1902 | c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); | ||
1903 | c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); | ||
1904 | c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); | ||
1905 | c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); | ||
1906 | c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); | ||
1907 | c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); | ||
1908 | |||
1909 | if (cmd.timeout_ms) | ||
1910 | timeout = msecs_to_jiffies(cmd.timeout_ms); | ||
1911 | |||
1912 | status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, | ||
1913 | NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, | ||
1914 | &cmd.result, timeout); | ||
1915 | if (status >= 0) { | ||
1916 | if (put_user(cmd.result, &ucmd->result)) | ||
1917 | return -EFAULT; | ||
1918 | } | ||
1919 | |||
1920 | return status; | ||
1921 | } | ||
1922 | |||
1923 | static int nvme_subsys_reset(struct nvme_dev *dev) | ||
1924 | { | ||
1925 | if (!dev->subsystem) | ||
1926 | return -ENOTTY; | ||
1927 | |||
1928 | writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */ | ||
1929 | return 0; | ||
1930 | } | ||
1931 | |||
1932 | static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | ||
1933 | unsigned long arg) | ||
1934 | { | ||
1935 | struct nvme_ns *ns = bdev->bd_disk->private_data; | ||
1936 | |||
1937 | switch (cmd) { | ||
1938 | case NVME_IOCTL_ID: | ||
1939 | force_successful_syscall_return(); | ||
1940 | return ns->ns_id; | ||
1941 | case NVME_IOCTL_ADMIN_CMD: | ||
1942 | return nvme_user_cmd(ns->dev, NULL, (void __user *)arg); | ||
1943 | case NVME_IOCTL_IO_CMD: | ||
1944 | return nvme_user_cmd(ns->dev, ns, (void __user *)arg); | ||
1945 | case NVME_IOCTL_SUBMIT_IO: | ||
1946 | return nvme_submit_io(ns, (void __user *)arg); | ||
1947 | case SG_GET_VERSION_NUM: | ||
1948 | return nvme_sg_get_version_num((void __user *)arg); | ||
1949 | case SG_IO: | ||
1950 | return nvme_sg_io(ns, (void __user *)arg); | ||
1951 | default: | ||
1952 | return -ENOTTY; | ||
1953 | } | ||
1954 | } | ||
1955 | |||
1956 | #ifdef CONFIG_COMPAT | ||
1957 | static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, | ||
1958 | unsigned int cmd, unsigned long arg) | ||
1959 | { | ||
1960 | switch (cmd) { | ||
1961 | case SG_IO: | ||
1962 | return -ENOIOCTLCMD; | ||
1963 | } | ||
1964 | return nvme_ioctl(bdev, mode, cmd, arg); | ||
1965 | } | ||
1966 | #else | ||
1967 | #define nvme_compat_ioctl NULL | ||
1968 | #endif | ||
1969 | |||
1970 | static void nvme_free_dev(struct kref *kref); | ||
1971 | static void nvme_free_ns(struct kref *kref) | ||
1972 | { | ||
1973 | struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); | ||
1974 | |||
1975 | if (ns->type == NVME_NS_LIGHTNVM) | ||
1976 | nvme_nvm_unregister(ns->queue, ns->disk->disk_name); | ||
1977 | |||
1978 | spin_lock(&dev_list_lock); | ||
1979 | ns->disk->private_data = NULL; | ||
1980 | spin_unlock(&dev_list_lock); | ||
1981 | |||
1982 | kref_put(&ns->dev->kref, nvme_free_dev); | ||
1983 | put_disk(ns->disk); | ||
1984 | kfree(ns); | ||
1985 | } | ||
1986 | |||
1987 | static int nvme_open(struct block_device *bdev, fmode_t mode) | ||
1988 | { | ||
1989 | int ret = 0; | ||
1990 | struct nvme_ns *ns; | ||
1991 | |||
1992 | spin_lock(&dev_list_lock); | ||
1993 | ns = bdev->bd_disk->private_data; | ||
1994 | if (!ns) | ||
1995 | ret = -ENXIO; | ||
1996 | else if (!kref_get_unless_zero(&ns->kref)) | ||
1997 | ret = -ENXIO; | ||
1998 | spin_unlock(&dev_list_lock); | ||
1999 | |||
2000 | return ret; | ||
2001 | } | ||
2002 | |||
2003 | static void nvme_release(struct gendisk *disk, fmode_t mode) | ||
2004 | { | ||
2005 | struct nvme_ns *ns = disk->private_data; | ||
2006 | kref_put(&ns->kref, nvme_free_ns); | ||
2007 | } | ||
2008 | |||
2009 | static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) | ||
2010 | { | ||
2011 | /* some standard values */ | ||
2012 | geo->heads = 1 << 6; | ||
2013 | geo->sectors = 1 << 5; | ||
2014 | geo->cylinders = get_capacity(bd->bd_disk) >> 11; | ||
2015 | return 0; | ||
2016 | } | ||
2017 | |||
2018 | static void nvme_config_discard(struct nvme_ns *ns) | ||
2019 | { | ||
2020 | u32 logical_block_size = queue_logical_block_size(ns->queue); | ||
2021 | ns->queue->limits.discard_zeroes_data = 0; | ||
2022 | ns->queue->limits.discard_alignment = logical_block_size; | ||
2023 | ns->queue->limits.discard_granularity = logical_block_size; | ||
2024 | blk_queue_max_discard_sectors(ns->queue, 0xffffffff); | ||
2025 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); | ||
2026 | } | ||
2027 | |||
2028 | static int nvme_revalidate_disk(struct gendisk *disk) | ||
2029 | { | ||
2030 | struct nvme_ns *ns = disk->private_data; | ||
2031 | struct nvme_dev *dev = ns->dev; | ||
2032 | struct nvme_id_ns *id; | ||
2033 | u8 lbaf, pi_type; | ||
2034 | u16 old_ms; | ||
2035 | unsigned short bs; | ||
2036 | |||
2037 | if (nvme_identify_ns(dev, ns->ns_id, &id)) { | ||
2038 | dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__, | ||
2039 | dev->instance, ns->ns_id); | ||
2040 | return -ENODEV; | ||
2041 | } | ||
2042 | if (id->ncap == 0) { | ||
2043 | kfree(id); | ||
2044 | return -ENODEV; | ||
2045 | } | ||
2046 | |||
2047 | if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) { | ||
2048 | if (nvme_nvm_register(ns->queue, disk->disk_name)) { | ||
2049 | dev_warn(dev->dev, | ||
2050 | "%s: LightNVM init failure\n", __func__); | ||
2051 | kfree(id); | ||
2052 | return -ENODEV; | ||
2053 | } | ||
2054 | ns->type = NVME_NS_LIGHTNVM; | ||
2055 | } | ||
2056 | |||
2057 | old_ms = ns->ms; | ||
2058 | lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; | ||
2059 | ns->lba_shift = id->lbaf[lbaf].ds; | ||
2060 | ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); | ||
2061 | ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); | ||
2062 | |||
2063 | /* | ||
2064 | * If identify namespace failed, use default 512 byte block size so | ||
2065 | * block layer can use before failing read/write for 0 capacity. | ||
2066 | */ | ||
2067 | if (ns->lba_shift == 0) | ||
2068 | ns->lba_shift = 9; | ||
2069 | bs = 1 << ns->lba_shift; | ||
2070 | |||
2071 | /* XXX: PI implementation requires metadata equal t10 pi tuple size */ | ||
2072 | pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? | ||
2073 | id->dps & NVME_NS_DPS_PI_MASK : 0; | ||
2074 | |||
2075 | blk_mq_freeze_queue(disk->queue); | ||
2076 | if (blk_get_integrity(disk) && (ns->pi_type != pi_type || | ||
2077 | ns->ms != old_ms || | ||
2078 | bs != queue_logical_block_size(disk->queue) || | ||
2079 | (ns->ms && ns->ext))) | ||
2080 | blk_integrity_unregister(disk); | ||
2081 | |||
2082 | ns->pi_type = pi_type; | ||
2083 | blk_queue_logical_block_size(ns->queue, bs); | ||
2084 | |||
2085 | if (ns->ms && !ns->ext) | ||
2086 | nvme_init_integrity(ns); | ||
2087 | |||
2088 | if ((ns->ms && !(ns->ms == 8 && ns->pi_type) && | ||
2089 | !blk_get_integrity(disk)) || | ||
2090 | ns->type == NVME_NS_LIGHTNVM) | ||
2091 | set_capacity(disk, 0); | ||
2092 | else | ||
2093 | set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); | ||
2094 | |||
2095 | if (dev->oncs & NVME_CTRL_ONCS_DSM) | ||
2096 | nvme_config_discard(ns); | ||
2097 | blk_mq_unfreeze_queue(disk->queue); | ||
2098 | |||
2099 | kfree(id); | ||
2100 | return 0; | ||
2101 | } | ||
2102 | |||
2103 | static char nvme_pr_type(enum pr_type type) | ||
2104 | { | ||
2105 | switch (type) { | ||
2106 | case PR_WRITE_EXCLUSIVE: | ||
2107 | return 1; | ||
2108 | case PR_EXCLUSIVE_ACCESS: | ||
2109 | return 2; | ||
2110 | case PR_WRITE_EXCLUSIVE_REG_ONLY: | ||
2111 | return 3; | ||
2112 | case PR_EXCLUSIVE_ACCESS_REG_ONLY: | ||
2113 | return 4; | ||
2114 | case PR_WRITE_EXCLUSIVE_ALL_REGS: | ||
2115 | return 5; | ||
2116 | case PR_EXCLUSIVE_ACCESS_ALL_REGS: | ||
2117 | return 6; | ||
2118 | default: | ||
2119 | return 0; | ||
2120 | } | ||
2121 | }; | ||
2122 | |||
2123 | static int nvme_pr_command(struct block_device *bdev, u32 cdw10, | ||
2124 | u64 key, u64 sa_key, u8 op) | ||
2125 | { | ||
2126 | struct nvme_ns *ns = bdev->bd_disk->private_data; | ||
2127 | struct nvme_command c; | ||
2128 | u8 data[16] = { 0, }; | ||
2129 | |||
2130 | put_unaligned_le64(key, &data[0]); | ||
2131 | put_unaligned_le64(sa_key, &data[8]); | ||
2132 | |||
2133 | memset(&c, 0, sizeof(c)); | ||
2134 | c.common.opcode = op; | ||
2135 | c.common.nsid = cpu_to_le32(ns->ns_id); | ||
2136 | c.common.cdw10[0] = cpu_to_le32(cdw10); | ||
2137 | |||
2138 | return nvme_submit_sync_cmd(ns->queue, &c, data, 16); | ||
2139 | } | ||
2140 | |||
2141 | static int nvme_pr_register(struct block_device *bdev, u64 old, | ||
2142 | u64 new, unsigned flags) | ||
2143 | { | ||
2144 | u32 cdw10; | ||
2145 | |||
2146 | if (flags & ~PR_FL_IGNORE_KEY) | ||
2147 | return -EOPNOTSUPP; | ||
2148 | |||
2149 | cdw10 = old ? 2 : 0; | ||
2150 | cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0; | ||
2151 | cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */ | ||
2152 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register); | ||
2153 | } | ||
2154 | |||
2155 | static int nvme_pr_reserve(struct block_device *bdev, u64 key, | ||
2156 | enum pr_type type, unsigned flags) | ||
2157 | { | ||
2158 | u32 cdw10; | ||
2159 | |||
2160 | if (flags & ~PR_FL_IGNORE_KEY) | ||
2161 | return -EOPNOTSUPP; | ||
2162 | |||
2163 | cdw10 = nvme_pr_type(type) << 8; | ||
2164 | cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0); | ||
2165 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire); | ||
2166 | } | ||
2167 | |||
2168 | static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, | ||
2169 | enum pr_type type, bool abort) | ||
2170 | { | ||
2171 | u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; | ||
2172 | return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); | ||
2173 | } | ||
2174 | |||
2175 | static int nvme_pr_clear(struct block_device *bdev, u64 key) | ||
2176 | { | ||
2177 | u32 cdw10 = 1 | (key ? 1 << 3 : 0); | ||
2178 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); | ||
2179 | } | ||
2180 | |||
2181 | static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) | ||
2182 | { | ||
2183 | u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; | ||
2184 | return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); | ||
2185 | } | ||
2186 | |||
2187 | static const struct pr_ops nvme_pr_ops = { | ||
2188 | .pr_register = nvme_pr_register, | ||
2189 | .pr_reserve = nvme_pr_reserve, | ||
2190 | .pr_release = nvme_pr_release, | ||
2191 | .pr_preempt = nvme_pr_preempt, | ||
2192 | .pr_clear = nvme_pr_clear, | ||
2193 | }; | ||
2194 | |||
2195 | static const struct block_device_operations nvme_fops = { | ||
2196 | .owner = THIS_MODULE, | ||
2197 | .ioctl = nvme_ioctl, | ||
2198 | .compat_ioctl = nvme_compat_ioctl, | ||
2199 | .open = nvme_open, | ||
2200 | .release = nvme_release, | ||
2201 | .getgeo = nvme_getgeo, | ||
2202 | .revalidate_disk= nvme_revalidate_disk, | ||
2203 | .pr_ops = &nvme_pr_ops, | ||
2204 | }; | ||
2205 | |||
2206 | static int nvme_kthread(void *data) | 1338 | static int nvme_kthread(void *data) |
2207 | { | 1339 | { |
2208 | struct nvme_dev *dev, *next; | 1340 | struct nvme_dev *dev, *next; |
@@ -2212,14 +1344,20 @@ static int nvme_kthread(void *data) | |||
2212 | spin_lock(&dev_list_lock); | 1344 | spin_lock(&dev_list_lock); |
2213 | list_for_each_entry_safe(dev, next, &dev_list, node) { | 1345 | list_for_each_entry_safe(dev, next, &dev_list, node) { |
2214 | int i; | 1346 | int i; |
2215 | u32 csts = readl(&dev->bar->csts); | 1347 | u32 csts = readl(dev->bar + NVME_REG_CSTS); |
1348 | |||
1349 | /* | ||
1350 | * Skip controllers currently under reset. | ||
1351 | */ | ||
1352 | if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work)) | ||
1353 | continue; | ||
2216 | 1354 | ||
2217 | if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) || | 1355 | if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) || |
2218 | csts & NVME_CSTS_CFS) { | 1356 | csts & NVME_CSTS_CFS) { |
2219 | if (!__nvme_reset(dev)) { | 1357 | if (queue_work(nvme_workq, &dev->reset_work)) { |
2220 | dev_warn(dev->dev, | 1358 | dev_warn(dev->dev, |
2221 | "Failed status: %x, reset controller\n", | 1359 | "Failed status: %x, reset controller\n", |
2222 | readl(&dev->bar->csts)); | 1360 | readl(dev->bar + NVME_REG_CSTS)); |
2223 | } | 1361 | } |
2224 | continue; | 1362 | continue; |
2225 | } | 1363 | } |
@@ -2230,11 +1368,8 @@ static int nvme_kthread(void *data) | |||
2230 | spin_lock_irq(&nvmeq->q_lock); | 1368 | spin_lock_irq(&nvmeq->q_lock); |
2231 | nvme_process_cq(nvmeq); | 1369 | nvme_process_cq(nvmeq); |
2232 | 1370 | ||
2233 | while ((i == 0) && (dev->event_limit > 0)) { | 1371 | while (i == 0 && dev->ctrl.event_limit > 0) |
2234 | if (nvme_submit_async_admin_req(dev)) | 1372 | nvme_submit_async_event(dev); |
2235 | break; | ||
2236 | dev->event_limit--; | ||
2237 | } | ||
2238 | spin_unlock_irq(&nvmeq->q_lock); | 1373 | spin_unlock_irq(&nvmeq->q_lock); |
2239 | } | 1374 | } |
2240 | } | 1375 | } |
@@ -2244,127 +1379,33 @@ static int nvme_kthread(void *data) | |||
2244 | return 0; | 1379 | return 0; |
2245 | } | 1380 | } |
2246 | 1381 | ||
2247 | static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) | 1382 | static int nvme_create_io_queues(struct nvme_dev *dev) |
2248 | { | ||
2249 | struct nvme_ns *ns; | ||
2250 | struct gendisk *disk; | ||
2251 | int node = dev_to_node(dev->dev); | ||
2252 | |||
2253 | ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); | ||
2254 | if (!ns) | ||
2255 | return; | ||
2256 | |||
2257 | ns->queue = blk_mq_init_queue(&dev->tagset); | ||
2258 | if (IS_ERR(ns->queue)) | ||
2259 | goto out_free_ns; | ||
2260 | queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); | ||
2261 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); | ||
2262 | ns->dev = dev; | ||
2263 | ns->queue->queuedata = ns; | ||
2264 | |||
2265 | disk = alloc_disk_node(0, node); | ||
2266 | if (!disk) | ||
2267 | goto out_free_queue; | ||
2268 | |||
2269 | kref_init(&ns->kref); | ||
2270 | ns->ns_id = nsid; | ||
2271 | ns->disk = disk; | ||
2272 | ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ | ||
2273 | list_add_tail(&ns->list, &dev->namespaces); | ||
2274 | |||
2275 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | ||
2276 | if (dev->max_hw_sectors) { | ||
2277 | blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); | ||
2278 | blk_queue_max_segments(ns->queue, | ||
2279 | (dev->max_hw_sectors / (dev->page_size >> 9)) + 1); | ||
2280 | } | ||
2281 | if (dev->stripe_size) | ||
2282 | blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); | ||
2283 | if (dev->vwc & NVME_CTRL_VWC_PRESENT) | ||
2284 | blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); | ||
2285 | blk_queue_virt_boundary(ns->queue, dev->page_size - 1); | ||
2286 | |||
2287 | disk->major = nvme_major; | ||
2288 | disk->first_minor = 0; | ||
2289 | disk->fops = &nvme_fops; | ||
2290 | disk->private_data = ns; | ||
2291 | disk->queue = ns->queue; | ||
2292 | disk->driverfs_dev = dev->device; | ||
2293 | disk->flags = GENHD_FL_EXT_DEVT; | ||
2294 | sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid); | ||
2295 | |||
2296 | /* | ||
2297 | * Initialize capacity to 0 until we establish the namespace format and | ||
2298 | * setup integrity extentions if necessary. The revalidate_disk after | ||
2299 | * add_disk allows the driver to register with integrity if the format | ||
2300 | * requires it. | ||
2301 | */ | ||
2302 | set_capacity(disk, 0); | ||
2303 | if (nvme_revalidate_disk(ns->disk)) | ||
2304 | goto out_free_disk; | ||
2305 | |||
2306 | kref_get(&dev->kref); | ||
2307 | if (ns->type != NVME_NS_LIGHTNVM) { | ||
2308 | add_disk(ns->disk); | ||
2309 | if (ns->ms) { | ||
2310 | struct block_device *bd = bdget_disk(ns->disk, 0); | ||
2311 | if (!bd) | ||
2312 | return; | ||
2313 | if (blkdev_get(bd, FMODE_READ, NULL)) { | ||
2314 | bdput(bd); | ||
2315 | return; | ||
2316 | } | ||
2317 | blkdev_reread_part(bd); | ||
2318 | blkdev_put(bd, FMODE_READ); | ||
2319 | } | ||
2320 | } | ||
2321 | return; | ||
2322 | out_free_disk: | ||
2323 | kfree(disk); | ||
2324 | list_del(&ns->list); | ||
2325 | out_free_queue: | ||
2326 | blk_cleanup_queue(ns->queue); | ||
2327 | out_free_ns: | ||
2328 | kfree(ns); | ||
2329 | } | ||
2330 | |||
2331 | /* | ||
2332 | * Create I/O queues. Failing to create an I/O queue is not an issue, | ||
2333 | * we can continue with less than the desired amount of queues, and | ||
2334 | * even a controller without I/O queues an still be used to issue | ||
2335 | * admin commands. This might be useful to upgrade a buggy firmware | ||
2336 | * for example. | ||
2337 | */ | ||
2338 | static void nvme_create_io_queues(struct nvme_dev *dev) | ||
2339 | { | 1383 | { |
2340 | unsigned i; | 1384 | unsigned i; |
1385 | int ret = 0; | ||
2341 | 1386 | ||
2342 | for (i = dev->queue_count; i <= dev->max_qid; i++) | 1387 | for (i = dev->queue_count; i <= dev->max_qid; i++) { |
2343 | if (!nvme_alloc_queue(dev, i, dev->q_depth)) | 1388 | if (!nvme_alloc_queue(dev, i, dev->q_depth)) { |
1389 | ret = -ENOMEM; | ||
2344 | break; | 1390 | break; |
1391 | } | ||
1392 | } | ||
2345 | 1393 | ||
2346 | for (i = dev->online_queues; i <= dev->queue_count - 1; i++) | 1394 | for (i = dev->online_queues; i <= dev->queue_count - 1; i++) { |
2347 | if (nvme_create_queue(dev->queues[i], i)) { | 1395 | ret = nvme_create_queue(dev->queues[i], i); |
1396 | if (ret) { | ||
2348 | nvme_free_queues(dev, i); | 1397 | nvme_free_queues(dev, i); |
2349 | break; | 1398 | break; |
2350 | } | 1399 | } |
2351 | } | ||
2352 | |||
2353 | static int set_queue_count(struct nvme_dev *dev, int count) | ||
2354 | { | ||
2355 | int status; | ||
2356 | u32 result; | ||
2357 | u32 q_count = (count - 1) | ((count - 1) << 16); | ||
2358 | |||
2359 | status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, | ||
2360 | &result); | ||
2361 | if (status < 0) | ||
2362 | return status; | ||
2363 | if (status > 0) { | ||
2364 | dev_err(dev->dev, "Could not set queue count (%d)\n", status); | ||
2365 | return 0; | ||
2366 | } | 1400 | } |
2367 | return min(result & 0xffff, result >> 16) + 1; | 1401 | |
1402 | /* | ||
1403 | * Ignore failing Create SQ/CQ commands, we can continue with less | ||
1404 | * than the desired aount of queues, and even a controller without | ||
1405 | * I/O queues an still be used to issue admin commands. This might | ||
1406 | * be useful to upgrade a buggy firmware for example. | ||
1407 | */ | ||
1408 | return ret >= 0 ? 0 : ret; | ||
2368 | } | 1409 | } |
2369 | 1410 | ||
2370 | static void __iomem *nvme_map_cmb(struct nvme_dev *dev) | 1411 | static void __iomem *nvme_map_cmb(struct nvme_dev *dev) |
@@ -2379,11 +1420,11 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) | |||
2379 | if (!use_cmb_sqes) | 1420 | if (!use_cmb_sqes) |
2380 | return NULL; | 1421 | return NULL; |
2381 | 1422 | ||
2382 | dev->cmbsz = readl(&dev->bar->cmbsz); | 1423 | dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); |
2383 | if (!(NVME_CMB_SZ(dev->cmbsz))) | 1424 | if (!(NVME_CMB_SZ(dev->cmbsz))) |
2384 | return NULL; | 1425 | return NULL; |
2385 | 1426 | ||
2386 | cmbloc = readl(&dev->bar->cmbloc); | 1427 | cmbloc = readl(dev->bar + NVME_REG_CMBLOC); |
2387 | 1428 | ||
2388 | szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); | 1429 | szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); |
2389 | size = szu * NVME_CMB_SZ(dev->cmbsz); | 1430 | size = szu * NVME_CMB_SZ(dev->cmbsz); |
@@ -2431,11 +1472,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
2431 | int result, i, vecs, nr_io_queues, size; | 1472 | int result, i, vecs, nr_io_queues, size; |
2432 | 1473 | ||
2433 | nr_io_queues = num_possible_cpus(); | 1474 | nr_io_queues = num_possible_cpus(); |
2434 | result = set_queue_count(dev, nr_io_queues); | 1475 | result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); |
2435 | if (result <= 0) | 1476 | if (result < 0) |
2436 | return result; | 1477 | return result; |
2437 | if (result < nr_io_queues) | 1478 | |
2438 | nr_io_queues = result; | 1479 | /* |
1480 | * Degraded controllers might return an error when setting the queue | ||
1481 | * count. We still want to be able to bring them online and offer | ||
1482 | * access to the admin queue, as that might be only way to fix them up. | ||
1483 | */ | ||
1484 | if (result > 0) { | ||
1485 | dev_err(dev->dev, "Could not set queue count (%d)\n", result); | ||
1486 | nr_io_queues = 0; | ||
1487 | result = 0; | ||
1488 | } | ||
2439 | 1489 | ||
2440 | if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) { | 1490 | if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) { |
2441 | result = nvme_cmb_qdepth(dev, nr_io_queues, | 1491 | result = nvme_cmb_qdepth(dev, nr_io_queues, |
@@ -2457,7 +1507,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
2457 | return -ENOMEM; | 1507 | return -ENOMEM; |
2458 | size = db_bar_size(dev, nr_io_queues); | 1508 | size = db_bar_size(dev, nr_io_queues); |
2459 | } while (1); | 1509 | } while (1); |
2460 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | 1510 | dev->dbs = dev->bar + 4096; |
2461 | adminq->q_db = dev->dbs; | 1511 | adminq->q_db = dev->dbs; |
2462 | } | 1512 | } |
2463 | 1513 | ||
@@ -2501,115 +1551,115 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
2501 | 1551 | ||
2502 | /* Free previously allocated queues that are no longer usable */ | 1552 | /* Free previously allocated queues that are no longer usable */ |
2503 | nvme_free_queues(dev, nr_io_queues + 1); | 1553 | nvme_free_queues(dev, nr_io_queues + 1); |
2504 | nvme_create_io_queues(dev); | 1554 | return nvme_create_io_queues(dev); |
2505 | |||
2506 | return 0; | ||
2507 | 1555 | ||
2508 | free_queues: | 1556 | free_queues: |
2509 | nvme_free_queues(dev, 1); | 1557 | nvme_free_queues(dev, 1); |
2510 | return result; | 1558 | return result; |
2511 | } | 1559 | } |
2512 | 1560 | ||
2513 | static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) | 1561 | static void nvme_set_irq_hints(struct nvme_dev *dev) |
2514 | { | 1562 | { |
2515 | struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); | 1563 | struct nvme_queue *nvmeq; |
2516 | struct nvme_ns *nsb = container_of(b, struct nvme_ns, list); | 1564 | int i; |
2517 | 1565 | ||
2518 | return nsa->ns_id - nsb->ns_id; | 1566 | for (i = 0; i < dev->online_queues; i++) { |
2519 | } | 1567 | nvmeq = dev->queues[i]; |
2520 | 1568 | ||
2521 | static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid) | 1569 | if (!nvmeq->tags || !(*nvmeq->tags)) |
2522 | { | 1570 | continue; |
2523 | struct nvme_ns *ns; | ||
2524 | 1571 | ||
2525 | list_for_each_entry(ns, &dev->namespaces, list) { | 1572 | irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, |
2526 | if (ns->ns_id == nsid) | 1573 | blk_mq_tags_cpumask(*nvmeq->tags)); |
2527 | return ns; | ||
2528 | if (ns->ns_id > nsid) | ||
2529 | break; | ||
2530 | } | 1574 | } |
2531 | return NULL; | ||
2532 | } | 1575 | } |
2533 | 1576 | ||
2534 | static inline bool nvme_io_incapable(struct nvme_dev *dev) | 1577 | static void nvme_dev_scan(struct work_struct *work) |
2535 | { | 1578 | { |
2536 | return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS || | 1579 | struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); |
2537 | dev->online_queues < 2); | 1580 | |
1581 | if (!dev->tagset.tags) | ||
1582 | return; | ||
1583 | nvme_scan_namespaces(&dev->ctrl); | ||
1584 | nvme_set_irq_hints(dev); | ||
2538 | } | 1585 | } |
2539 | 1586 | ||
2540 | static void nvme_ns_remove(struct nvme_ns *ns) | 1587 | static void nvme_del_queue_end(struct request *req, int error) |
2541 | { | 1588 | { |
2542 | bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue); | 1589 | struct nvme_queue *nvmeq = req->end_io_data; |
2543 | 1590 | ||
2544 | if (kill) { | 1591 | blk_mq_free_request(req); |
2545 | blk_set_queue_dying(ns->queue); | 1592 | complete(&nvmeq->dev->ioq_wait); |
2546 | |||
2547 | /* | ||
2548 | * The controller was shutdown first if we got here through | ||
2549 | * device removal. The shutdown may requeue outstanding | ||
2550 | * requests. These need to be aborted immediately so | ||
2551 | * del_gendisk doesn't block indefinitely for their completion. | ||
2552 | */ | ||
2553 | blk_mq_abort_requeue_list(ns->queue); | ||
2554 | } | ||
2555 | if (ns->disk->flags & GENHD_FL_UP) | ||
2556 | del_gendisk(ns->disk); | ||
2557 | if (kill || !blk_queue_dying(ns->queue)) { | ||
2558 | blk_mq_abort_requeue_list(ns->queue); | ||
2559 | blk_cleanup_queue(ns->queue); | ||
2560 | } | ||
2561 | list_del_init(&ns->list); | ||
2562 | kref_put(&ns->kref, nvme_free_ns); | ||
2563 | } | 1593 | } |
2564 | 1594 | ||
2565 | static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) | 1595 | static void nvme_del_cq_end(struct request *req, int error) |
2566 | { | 1596 | { |
2567 | struct nvme_ns *ns, *next; | 1597 | struct nvme_queue *nvmeq = req->end_io_data; |
2568 | unsigned i; | ||
2569 | 1598 | ||
2570 | for (i = 1; i <= nn; i++) { | 1599 | if (!error) { |
2571 | ns = nvme_find_ns(dev, i); | 1600 | unsigned long flags; |
2572 | if (ns) { | 1601 | |
2573 | if (revalidate_disk(ns->disk)) | 1602 | spin_lock_irqsave(&nvmeq->q_lock, flags); |
2574 | nvme_ns_remove(ns); | 1603 | nvme_process_cq(nvmeq); |
2575 | } else | 1604 | spin_unlock_irqrestore(&nvmeq->q_lock, flags); |
2576 | nvme_alloc_ns(dev, i); | ||
2577 | } | ||
2578 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | ||
2579 | if (ns->ns_id > nn) | ||
2580 | nvme_ns_remove(ns); | ||
2581 | } | 1605 | } |
2582 | list_sort(NULL, &dev->namespaces, ns_cmp); | 1606 | |
1607 | nvme_del_queue_end(req, error); | ||
2583 | } | 1608 | } |
2584 | 1609 | ||
2585 | static void nvme_set_irq_hints(struct nvme_dev *dev) | 1610 | static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) |
2586 | { | 1611 | { |
2587 | struct nvme_queue *nvmeq; | 1612 | struct request_queue *q = nvmeq->dev->ctrl.admin_q; |
2588 | int i; | 1613 | struct request *req; |
1614 | struct nvme_command cmd; | ||
2589 | 1615 | ||
2590 | for (i = 0; i < dev->online_queues; i++) { | 1616 | memset(&cmd, 0, sizeof(cmd)); |
2591 | nvmeq = dev->queues[i]; | 1617 | cmd.delete_queue.opcode = opcode; |
1618 | cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); | ||
2592 | 1619 | ||
2593 | if (!nvmeq->tags || !(*nvmeq->tags)) | 1620 | req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); |
2594 | continue; | 1621 | if (IS_ERR(req)) |
1622 | return PTR_ERR(req); | ||
2595 | 1623 | ||
2596 | irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, | 1624 | req->timeout = ADMIN_TIMEOUT; |
2597 | blk_mq_tags_cpumask(*nvmeq->tags)); | 1625 | req->end_io_data = nvmeq; |
2598 | } | 1626 | |
1627 | blk_execute_rq_nowait(q, NULL, req, false, | ||
1628 | opcode == nvme_admin_delete_cq ? | ||
1629 | nvme_del_cq_end : nvme_del_queue_end); | ||
1630 | return 0; | ||
2599 | } | 1631 | } |
2600 | 1632 | ||
2601 | static void nvme_dev_scan(struct work_struct *work) | 1633 | static void nvme_disable_io_queues(struct nvme_dev *dev) |
2602 | { | 1634 | { |
2603 | struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); | 1635 | int pass; |
2604 | struct nvme_id_ctrl *ctrl; | 1636 | unsigned long timeout; |
1637 | u8 opcode = nvme_admin_delete_sq; | ||
2605 | 1638 | ||
2606 | if (!dev->tagset.tags) | 1639 | for (pass = 0; pass < 2; pass++) { |
2607 | return; | 1640 | int sent = 0, i = dev->queue_count - 1; |
2608 | if (nvme_identify_ctrl(dev, &ctrl)) | 1641 | |
2609 | return; | 1642 | reinit_completion(&dev->ioq_wait); |
2610 | nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); | 1643 | retry: |
2611 | kfree(ctrl); | 1644 | timeout = ADMIN_TIMEOUT; |
2612 | nvme_set_irq_hints(dev); | 1645 | for (; i > 0; i--) { |
1646 | struct nvme_queue *nvmeq = dev->queues[i]; | ||
1647 | |||
1648 | if (!pass) | ||
1649 | nvme_suspend_queue(nvmeq); | ||
1650 | if (nvme_delete_queue(nvmeq, opcode)) | ||
1651 | break; | ||
1652 | ++sent; | ||
1653 | } | ||
1654 | while (sent--) { | ||
1655 | timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout); | ||
1656 | if (timeout == 0) | ||
1657 | return; | ||
1658 | if (i) | ||
1659 | goto retry; | ||
1660 | } | ||
1661 | opcode = nvme_admin_delete_cq; | ||
1662 | } | ||
2613 | } | 1663 | } |
2614 | 1664 | ||
2615 | /* | 1665 | /* |
@@ -2620,42 +1670,7 @@ static void nvme_dev_scan(struct work_struct *work) | |||
2620 | */ | 1670 | */ |
2621 | static int nvme_dev_add(struct nvme_dev *dev) | 1671 | static int nvme_dev_add(struct nvme_dev *dev) |
2622 | { | 1672 | { |
2623 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 1673 | if (!dev->ctrl.tagset) { |
2624 | int res; | ||
2625 | struct nvme_id_ctrl *ctrl; | ||
2626 | int shift = NVME_CAP_MPSMIN(lo_hi_readq(&dev->bar->cap)) + 12; | ||
2627 | |||
2628 | res = nvme_identify_ctrl(dev, &ctrl); | ||
2629 | if (res) { | ||
2630 | dev_err(dev->dev, "Identify Controller failed (%d)\n", res); | ||
2631 | return -EIO; | ||
2632 | } | ||
2633 | |||
2634 | dev->oncs = le16_to_cpup(&ctrl->oncs); | ||
2635 | dev->abort_limit = ctrl->acl + 1; | ||
2636 | dev->vwc = ctrl->vwc; | ||
2637 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); | ||
2638 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); | ||
2639 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); | ||
2640 | if (ctrl->mdts) | ||
2641 | dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); | ||
2642 | else | ||
2643 | dev->max_hw_sectors = UINT_MAX; | ||
2644 | if ((pdev->vendor == PCI_VENDOR_ID_INTEL) && | ||
2645 | (pdev->device == 0x0953) && ctrl->vs[3]) { | ||
2646 | unsigned int max_hw_sectors; | ||
2647 | |||
2648 | dev->stripe_size = 1 << (ctrl->vs[3] + shift); | ||
2649 | max_hw_sectors = dev->stripe_size >> (shift - 9); | ||
2650 | if (dev->max_hw_sectors) { | ||
2651 | dev->max_hw_sectors = min(max_hw_sectors, | ||
2652 | dev->max_hw_sectors); | ||
2653 | } else | ||
2654 | dev->max_hw_sectors = max_hw_sectors; | ||
2655 | } | ||
2656 | kfree(ctrl); | ||
2657 | |||
2658 | if (!dev->tagset.tags) { | ||
2659 | dev->tagset.ops = &nvme_mq_ops; | 1674 | dev->tagset.ops = &nvme_mq_ops; |
2660 | dev->tagset.nr_hw_queues = dev->online_queues - 1; | 1675 | dev->tagset.nr_hw_queues = dev->online_queues - 1; |
2661 | dev->tagset.timeout = NVME_IO_TIMEOUT; | 1676 | dev->tagset.timeout = NVME_IO_TIMEOUT; |
@@ -2668,8 +1683,9 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
2668 | 1683 | ||
2669 | if (blk_mq_alloc_tag_set(&dev->tagset)) | 1684 | if (blk_mq_alloc_tag_set(&dev->tagset)) |
2670 | return 0; | 1685 | return 0; |
1686 | dev->ctrl.tagset = &dev->tagset; | ||
2671 | } | 1687 | } |
2672 | schedule_work(&dev->scan_work); | 1688 | queue_work(nvme_workq, &dev->scan_work); |
2673 | return 0; | 1689 | return 0; |
2674 | } | 1690 | } |
2675 | 1691 | ||
@@ -2699,7 +1715,7 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
2699 | if (!dev->bar) | 1715 | if (!dev->bar) |
2700 | goto disable; | 1716 | goto disable; |
2701 | 1717 | ||
2702 | if (readl(&dev->bar->csts) == -1) { | 1718 | if (readl(dev->bar + NVME_REG_CSTS) == -1) { |
2703 | result = -ENODEV; | 1719 | result = -ENODEV; |
2704 | goto unmap; | 1720 | goto unmap; |
2705 | } | 1721 | } |
@@ -2714,10 +1730,11 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
2714 | goto unmap; | 1730 | goto unmap; |
2715 | } | 1731 | } |
2716 | 1732 | ||
2717 | cap = lo_hi_readq(&dev->bar->cap); | 1733 | cap = lo_hi_readq(dev->bar + NVME_REG_CAP); |
1734 | |||
2718 | dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); | 1735 | dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); |
2719 | dev->db_stride = 1 << NVME_CAP_STRIDE(cap); | 1736 | dev->db_stride = 1 << NVME_CAP_STRIDE(cap); |
2720 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | 1737 | dev->dbs = dev->bar + 4096; |
2721 | 1738 | ||
2722 | /* | 1739 | /* |
2723 | * Temporary fix for the Apple controller found in the MacBook8,1 and | 1740 | * Temporary fix for the Apple controller found in the MacBook8,1 and |
@@ -2730,9 +1747,11 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
2730 | dev->q_depth); | 1747 | dev->q_depth); |
2731 | } | 1748 | } |
2732 | 1749 | ||
2733 | if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) | 1750 | if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) |
2734 | dev->cmb = nvme_map_cmb(dev); | 1751 | dev->cmb = nvme_map_cmb(dev); |
2735 | 1752 | ||
1753 | pci_enable_pcie_error_reporting(pdev); | ||
1754 | pci_save_state(pdev); | ||
2736 | return 0; | 1755 | return 0; |
2737 | 1756 | ||
2738 | unmap: | 1757 | unmap: |
@@ -2760,152 +1779,34 @@ static void nvme_dev_unmap(struct nvme_dev *dev) | |||
2760 | pci_release_regions(pdev); | 1779 | pci_release_regions(pdev); |
2761 | } | 1780 | } |
2762 | 1781 | ||
2763 | if (pci_is_enabled(pdev)) | 1782 | if (pci_is_enabled(pdev)) { |
1783 | pci_disable_pcie_error_reporting(pdev); | ||
2764 | pci_disable_device(pdev); | 1784 | pci_disable_device(pdev); |
2765 | } | ||
2766 | |||
2767 | struct nvme_delq_ctx { | ||
2768 | struct task_struct *waiter; | ||
2769 | struct kthread_worker *worker; | ||
2770 | atomic_t refcount; | ||
2771 | }; | ||
2772 | |||
2773 | static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev) | ||
2774 | { | ||
2775 | dq->waiter = current; | ||
2776 | mb(); | ||
2777 | |||
2778 | for (;;) { | ||
2779 | set_current_state(TASK_KILLABLE); | ||
2780 | if (!atomic_read(&dq->refcount)) | ||
2781 | break; | ||
2782 | if (!schedule_timeout(ADMIN_TIMEOUT) || | ||
2783 | fatal_signal_pending(current)) { | ||
2784 | /* | ||
2785 | * Disable the controller first since we can't trust it | ||
2786 | * at this point, but leave the admin queue enabled | ||
2787 | * until all queue deletion requests are flushed. | ||
2788 | * FIXME: This may take a while if there are more h/w | ||
2789 | * queues than admin tags. | ||
2790 | */ | ||
2791 | set_current_state(TASK_RUNNING); | ||
2792 | nvme_disable_ctrl(dev, lo_hi_readq(&dev->bar->cap)); | ||
2793 | nvme_clear_queue(dev->queues[0]); | ||
2794 | flush_kthread_worker(dq->worker); | ||
2795 | nvme_disable_queue(dev, 0); | ||
2796 | return; | ||
2797 | } | ||
2798 | } | 1785 | } |
2799 | set_current_state(TASK_RUNNING); | ||
2800 | } | ||
2801 | |||
2802 | static void nvme_put_dq(struct nvme_delq_ctx *dq) | ||
2803 | { | ||
2804 | atomic_dec(&dq->refcount); | ||
2805 | if (dq->waiter) | ||
2806 | wake_up_process(dq->waiter); | ||
2807 | } | ||
2808 | |||
2809 | static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq) | ||
2810 | { | ||
2811 | atomic_inc(&dq->refcount); | ||
2812 | return dq; | ||
2813 | } | ||
2814 | |||
2815 | static void nvme_del_queue_end(struct nvme_queue *nvmeq) | ||
2816 | { | ||
2817 | struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; | ||
2818 | nvme_put_dq(dq); | ||
2819 | |||
2820 | spin_lock_irq(&nvmeq->q_lock); | ||
2821 | nvme_process_cq(nvmeq); | ||
2822 | spin_unlock_irq(&nvmeq->q_lock); | ||
2823 | } | ||
2824 | |||
2825 | static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode, | ||
2826 | kthread_work_func_t fn) | ||
2827 | { | ||
2828 | struct nvme_command c; | ||
2829 | |||
2830 | memset(&c, 0, sizeof(c)); | ||
2831 | c.delete_queue.opcode = opcode; | ||
2832 | c.delete_queue.qid = cpu_to_le16(nvmeq->qid); | ||
2833 | |||
2834 | init_kthread_work(&nvmeq->cmdinfo.work, fn); | ||
2835 | return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo, | ||
2836 | ADMIN_TIMEOUT); | ||
2837 | } | ||
2838 | |||
2839 | static void nvme_del_cq_work_handler(struct kthread_work *work) | ||
2840 | { | ||
2841 | struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, | ||
2842 | cmdinfo.work); | ||
2843 | nvme_del_queue_end(nvmeq); | ||
2844 | } | ||
2845 | |||
2846 | static int nvme_delete_cq(struct nvme_queue *nvmeq) | ||
2847 | { | ||
2848 | return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq, | ||
2849 | nvme_del_cq_work_handler); | ||
2850 | } | ||
2851 | |||
2852 | static void nvme_del_sq_work_handler(struct kthread_work *work) | ||
2853 | { | ||
2854 | struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, | ||
2855 | cmdinfo.work); | ||
2856 | int status = nvmeq->cmdinfo.status; | ||
2857 | |||
2858 | if (!status) | ||
2859 | status = nvme_delete_cq(nvmeq); | ||
2860 | if (status) | ||
2861 | nvme_del_queue_end(nvmeq); | ||
2862 | } | ||
2863 | |||
2864 | static int nvme_delete_sq(struct nvme_queue *nvmeq) | ||
2865 | { | ||
2866 | return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq, | ||
2867 | nvme_del_sq_work_handler); | ||
2868 | } | 1786 | } |
2869 | 1787 | ||
2870 | static void nvme_del_queue_start(struct kthread_work *work) | 1788 | static int nvme_dev_list_add(struct nvme_dev *dev) |
2871 | { | 1789 | { |
2872 | struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, | 1790 | bool start_thread = false; |
2873 | cmdinfo.work); | ||
2874 | if (nvme_delete_sq(nvmeq)) | ||
2875 | nvme_del_queue_end(nvmeq); | ||
2876 | } | ||
2877 | 1791 | ||
2878 | static void nvme_disable_io_queues(struct nvme_dev *dev) | 1792 | spin_lock(&dev_list_lock); |
2879 | { | 1793 | if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) { |
2880 | int i; | 1794 | start_thread = true; |
2881 | DEFINE_KTHREAD_WORKER_ONSTACK(worker); | 1795 | nvme_thread = NULL; |
2882 | struct nvme_delq_ctx dq; | ||
2883 | struct task_struct *kworker_task = kthread_run(kthread_worker_fn, | ||
2884 | &worker, "nvme%d", dev->instance); | ||
2885 | |||
2886 | if (IS_ERR(kworker_task)) { | ||
2887 | dev_err(dev->dev, | ||
2888 | "Failed to create queue del task\n"); | ||
2889 | for (i = dev->queue_count - 1; i > 0; i--) | ||
2890 | nvme_disable_queue(dev, i); | ||
2891 | return; | ||
2892 | } | 1796 | } |
1797 | list_add(&dev->node, &dev_list); | ||
1798 | spin_unlock(&dev_list_lock); | ||
2893 | 1799 | ||
2894 | dq.waiter = NULL; | 1800 | if (start_thread) { |
2895 | atomic_set(&dq.refcount, 0); | 1801 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); |
2896 | dq.worker = &worker; | 1802 | wake_up_all(&nvme_kthread_wait); |
2897 | for (i = dev->queue_count - 1; i > 0; i--) { | 1803 | } else |
2898 | struct nvme_queue *nvmeq = dev->queues[i]; | 1804 | wait_event_killable(nvme_kthread_wait, nvme_thread); |
2899 | 1805 | ||
2900 | if (nvme_suspend_queue(nvmeq)) | 1806 | if (IS_ERR_OR_NULL(nvme_thread)) |
2901 | continue; | 1807 | return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR; |
2902 | nvmeq->cmdinfo.ctx = nvme_get_dq(&dq); | 1808 | |
2903 | nvmeq->cmdinfo.worker = dq.worker; | 1809 | return 0; |
2904 | init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start); | ||
2905 | queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work); | ||
2906 | } | ||
2907 | nvme_wait_dq(&dq, dev); | ||
2908 | kthread_stop(kworker_task); | ||
2909 | } | 1810 | } |
2910 | 1811 | ||
2911 | /* | 1812 | /* |
@@ -2928,44 +1829,17 @@ static void nvme_dev_list_remove(struct nvme_dev *dev) | |||
2928 | kthread_stop(tmp); | 1829 | kthread_stop(tmp); |
2929 | } | 1830 | } |
2930 | 1831 | ||
2931 | static void nvme_freeze_queues(struct nvme_dev *dev) | 1832 | static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) |
2932 | { | ||
2933 | struct nvme_ns *ns; | ||
2934 | |||
2935 | list_for_each_entry(ns, &dev->namespaces, list) { | ||
2936 | blk_mq_freeze_queue_start(ns->queue); | ||
2937 | |||
2938 | spin_lock_irq(ns->queue->queue_lock); | ||
2939 | queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); | ||
2940 | spin_unlock_irq(ns->queue->queue_lock); | ||
2941 | |||
2942 | blk_mq_cancel_requeue_work(ns->queue); | ||
2943 | blk_mq_stop_hw_queues(ns->queue); | ||
2944 | } | ||
2945 | } | ||
2946 | |||
2947 | static void nvme_unfreeze_queues(struct nvme_dev *dev) | ||
2948 | { | ||
2949 | struct nvme_ns *ns; | ||
2950 | |||
2951 | list_for_each_entry(ns, &dev->namespaces, list) { | ||
2952 | queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); | ||
2953 | blk_mq_unfreeze_queue(ns->queue); | ||
2954 | blk_mq_start_stopped_hw_queues(ns->queue, true); | ||
2955 | blk_mq_kick_requeue_list(ns->queue); | ||
2956 | } | ||
2957 | } | ||
2958 | |||
2959 | static void nvme_dev_shutdown(struct nvme_dev *dev) | ||
2960 | { | 1833 | { |
2961 | int i; | 1834 | int i; |
2962 | u32 csts = -1; | 1835 | u32 csts = -1; |
2963 | 1836 | ||
2964 | nvme_dev_list_remove(dev); | 1837 | nvme_dev_list_remove(dev); |
2965 | 1838 | ||
1839 | mutex_lock(&dev->shutdown_lock); | ||
2966 | if (dev->bar) { | 1840 | if (dev->bar) { |
2967 | nvme_freeze_queues(dev); | 1841 | nvme_stop_queues(&dev->ctrl); |
2968 | csts = readl(&dev->bar->csts); | 1842 | csts = readl(dev->bar + NVME_REG_CSTS); |
2969 | } | 1843 | } |
2970 | if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { | 1844 | if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { |
2971 | for (i = dev->queue_count - 1; i >= 0; i--) { | 1845 | for (i = dev->queue_count - 1; i >= 0; i--) { |
@@ -2974,30 +1848,13 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) | |||
2974 | } | 1848 | } |
2975 | } else { | 1849 | } else { |
2976 | nvme_disable_io_queues(dev); | 1850 | nvme_disable_io_queues(dev); |
2977 | nvme_shutdown_ctrl(dev); | 1851 | nvme_disable_admin_queue(dev, shutdown); |
2978 | nvme_disable_queue(dev, 0); | ||
2979 | } | 1852 | } |
2980 | nvme_dev_unmap(dev); | 1853 | nvme_dev_unmap(dev); |
2981 | 1854 | ||
2982 | for (i = dev->queue_count - 1; i >= 0; i--) | 1855 | for (i = dev->queue_count - 1; i >= 0; i--) |
2983 | nvme_clear_queue(dev->queues[i]); | 1856 | nvme_clear_queue(dev->queues[i]); |
2984 | } | 1857 | mutex_unlock(&dev->shutdown_lock); |
2985 | |||
2986 | static void nvme_dev_remove(struct nvme_dev *dev) | ||
2987 | { | ||
2988 | struct nvme_ns *ns, *next; | ||
2989 | |||
2990 | if (nvme_io_incapable(dev)) { | ||
2991 | /* | ||
2992 | * If the device is not capable of IO (surprise hot-removal, | ||
2993 | * for example), we need to quiesce prior to deleting the | ||
2994 | * namespaces. This will end outstanding requests and prevent | ||
2995 | * attempts to sync dirty data. | ||
2996 | */ | ||
2997 | nvme_dev_shutdown(dev); | ||
2998 | } | ||
2999 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) | ||
3000 | nvme_ns_remove(ns); | ||
3001 | } | 1858 | } |
3002 | 1859 | ||
3003 | static int nvme_setup_prp_pools(struct nvme_dev *dev) | 1860 | static int nvme_setup_prp_pools(struct nvme_dev *dev) |
@@ -3023,119 +1880,36 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) | |||
3023 | dma_pool_destroy(dev->prp_small_pool); | 1880 | dma_pool_destroy(dev->prp_small_pool); |
3024 | } | 1881 | } |
3025 | 1882 | ||
3026 | static DEFINE_IDA(nvme_instance_ida); | 1883 | static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) |
3027 | |||
3028 | static int nvme_set_instance(struct nvme_dev *dev) | ||
3029 | { | ||
3030 | int instance, error; | ||
3031 | |||
3032 | do { | ||
3033 | if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) | ||
3034 | return -ENODEV; | ||
3035 | |||
3036 | spin_lock(&dev_list_lock); | ||
3037 | error = ida_get_new(&nvme_instance_ida, &instance); | ||
3038 | spin_unlock(&dev_list_lock); | ||
3039 | } while (error == -EAGAIN); | ||
3040 | |||
3041 | if (error) | ||
3042 | return -ENODEV; | ||
3043 | |||
3044 | dev->instance = instance; | ||
3045 | return 0; | ||
3046 | } | ||
3047 | |||
3048 | static void nvme_release_instance(struct nvme_dev *dev) | ||
3049 | { | 1884 | { |
3050 | spin_lock(&dev_list_lock); | 1885 | struct nvme_dev *dev = to_nvme_dev(ctrl); |
3051 | ida_remove(&nvme_instance_ida, dev->instance); | ||
3052 | spin_unlock(&dev_list_lock); | ||
3053 | } | ||
3054 | |||
3055 | static void nvme_free_dev(struct kref *kref) | ||
3056 | { | ||
3057 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); | ||
3058 | 1886 | ||
3059 | put_device(dev->dev); | 1887 | put_device(dev->dev); |
3060 | put_device(dev->device); | ||
3061 | nvme_release_instance(dev); | ||
3062 | if (dev->tagset.tags) | 1888 | if (dev->tagset.tags) |
3063 | blk_mq_free_tag_set(&dev->tagset); | 1889 | blk_mq_free_tag_set(&dev->tagset); |
3064 | if (dev->admin_q) | 1890 | if (dev->ctrl.admin_q) |
3065 | blk_put_queue(dev->admin_q); | 1891 | blk_put_queue(dev->ctrl.admin_q); |
3066 | kfree(dev->queues); | 1892 | kfree(dev->queues); |
3067 | kfree(dev->entry); | 1893 | kfree(dev->entry); |
3068 | kfree(dev); | 1894 | kfree(dev); |
3069 | } | 1895 | } |
3070 | 1896 | ||
3071 | static int nvme_dev_open(struct inode *inode, struct file *f) | 1897 | static void nvme_reset_work(struct work_struct *work) |
3072 | { | 1898 | { |
3073 | struct nvme_dev *dev; | 1899 | struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); |
3074 | int instance = iminor(inode); | 1900 | int result; |
3075 | int ret = -ENODEV; | ||
3076 | |||
3077 | spin_lock(&dev_list_lock); | ||
3078 | list_for_each_entry(dev, &dev_list, node) { | ||
3079 | if (dev->instance == instance) { | ||
3080 | if (!dev->admin_q) { | ||
3081 | ret = -EWOULDBLOCK; | ||
3082 | break; | ||
3083 | } | ||
3084 | if (!kref_get_unless_zero(&dev->kref)) | ||
3085 | break; | ||
3086 | f->private_data = dev; | ||
3087 | ret = 0; | ||
3088 | break; | ||
3089 | } | ||
3090 | } | ||
3091 | spin_unlock(&dev_list_lock); | ||
3092 | |||
3093 | return ret; | ||
3094 | } | ||
3095 | 1901 | ||
3096 | static int nvme_dev_release(struct inode *inode, struct file *f) | 1902 | if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags))) |
3097 | { | 1903 | goto out; |
3098 | struct nvme_dev *dev = f->private_data; | ||
3099 | kref_put(&dev->kref, nvme_free_dev); | ||
3100 | return 0; | ||
3101 | } | ||
3102 | 1904 | ||
3103 | static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | 1905 | /* |
3104 | { | 1906 | * If we're called to reset a live controller first shut it down before |
3105 | struct nvme_dev *dev = f->private_data; | 1907 | * moving on. |
3106 | struct nvme_ns *ns; | 1908 | */ |
3107 | 1909 | if (dev->bar) | |
3108 | switch (cmd) { | 1910 | nvme_dev_disable(dev, false); |
3109 | case NVME_IOCTL_ADMIN_CMD: | ||
3110 | return nvme_user_cmd(dev, NULL, (void __user *)arg); | ||
3111 | case NVME_IOCTL_IO_CMD: | ||
3112 | if (list_empty(&dev->namespaces)) | ||
3113 | return -ENOTTY; | ||
3114 | ns = list_first_entry(&dev->namespaces, struct nvme_ns, list); | ||
3115 | return nvme_user_cmd(dev, ns, (void __user *)arg); | ||
3116 | case NVME_IOCTL_RESET: | ||
3117 | dev_warn(dev->dev, "resetting controller\n"); | ||
3118 | return nvme_reset(dev); | ||
3119 | case NVME_IOCTL_SUBSYS_RESET: | ||
3120 | return nvme_subsys_reset(dev); | ||
3121 | default: | ||
3122 | return -ENOTTY; | ||
3123 | } | ||
3124 | } | ||
3125 | 1911 | ||
3126 | static const struct file_operations nvme_dev_fops = { | 1912 | set_bit(NVME_CTRL_RESETTING, &dev->flags); |
3127 | .owner = THIS_MODULE, | ||
3128 | .open = nvme_dev_open, | ||
3129 | .release = nvme_dev_release, | ||
3130 | .unlocked_ioctl = nvme_dev_ioctl, | ||
3131 | .compat_ioctl = nvme_dev_ioctl, | ||
3132 | }; | ||
3133 | |||
3134 | static void nvme_probe_work(struct work_struct *work) | ||
3135 | { | ||
3136 | struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); | ||
3137 | bool start_thread = false; | ||
3138 | int result; | ||
3139 | 1913 | ||
3140 | result = nvme_dev_map(dev); | 1914 | result = nvme_dev_map(dev); |
3141 | if (result) | 1915 | if (result) |
@@ -3145,35 +1919,24 @@ static void nvme_probe_work(struct work_struct *work) | |||
3145 | if (result) | 1919 | if (result) |
3146 | goto unmap; | 1920 | goto unmap; |
3147 | 1921 | ||
3148 | spin_lock(&dev_list_lock); | ||
3149 | if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) { | ||
3150 | start_thread = true; | ||
3151 | nvme_thread = NULL; | ||
3152 | } | ||
3153 | list_add(&dev->node, &dev_list); | ||
3154 | spin_unlock(&dev_list_lock); | ||
3155 | |||
3156 | if (start_thread) { | ||
3157 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); | ||
3158 | wake_up_all(&nvme_kthread_wait); | ||
3159 | } else | ||
3160 | wait_event_killable(nvme_kthread_wait, nvme_thread); | ||
3161 | |||
3162 | if (IS_ERR_OR_NULL(nvme_thread)) { | ||
3163 | result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR; | ||
3164 | goto disable; | ||
3165 | } | ||
3166 | |||
3167 | nvme_init_queue(dev->queues[0], 0); | 1922 | nvme_init_queue(dev->queues[0], 0); |
3168 | result = nvme_alloc_admin_tags(dev); | 1923 | result = nvme_alloc_admin_tags(dev); |
3169 | if (result) | 1924 | if (result) |
3170 | goto disable; | 1925 | goto disable; |
3171 | 1926 | ||
1927 | result = nvme_init_identify(&dev->ctrl); | ||
1928 | if (result) | ||
1929 | goto free_tags; | ||
1930 | |||
3172 | result = nvme_setup_io_queues(dev); | 1931 | result = nvme_setup_io_queues(dev); |
3173 | if (result) | 1932 | if (result) |
3174 | goto free_tags; | 1933 | goto free_tags; |
3175 | 1934 | ||
3176 | dev->event_limit = 1; | 1935 | dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS; |
1936 | |||
1937 | result = nvme_dev_list_add(dev); | ||
1938 | if (result) | ||
1939 | goto remove; | ||
3177 | 1940 | ||
3178 | /* | 1941 | /* |
3179 | * Keep the controller around but remove all namespaces if we don't have | 1942 | * Keep the controller around but remove all namespaces if we don't have |
@@ -3181,117 +1944,98 @@ static void nvme_probe_work(struct work_struct *work) | |||
3181 | */ | 1944 | */ |
3182 | if (dev->online_queues < 2) { | 1945 | if (dev->online_queues < 2) { |
3183 | dev_warn(dev->dev, "IO queues not created\n"); | 1946 | dev_warn(dev->dev, "IO queues not created\n"); |
3184 | nvme_dev_remove(dev); | 1947 | nvme_remove_namespaces(&dev->ctrl); |
3185 | } else { | 1948 | } else { |
3186 | nvme_unfreeze_queues(dev); | 1949 | nvme_start_queues(&dev->ctrl); |
3187 | nvme_dev_add(dev); | 1950 | nvme_dev_add(dev); |
3188 | } | 1951 | } |
3189 | 1952 | ||
1953 | clear_bit(NVME_CTRL_RESETTING, &dev->flags); | ||
3190 | return; | 1954 | return; |
3191 | 1955 | ||
1956 | remove: | ||
1957 | nvme_dev_list_remove(dev); | ||
3192 | free_tags: | 1958 | free_tags: |
3193 | nvme_dev_remove_admin(dev); | 1959 | nvme_dev_remove_admin(dev); |
3194 | blk_put_queue(dev->admin_q); | 1960 | blk_put_queue(dev->ctrl.admin_q); |
3195 | dev->admin_q = NULL; | 1961 | dev->ctrl.admin_q = NULL; |
3196 | dev->queues[0]->tags = NULL; | 1962 | dev->queues[0]->tags = NULL; |
3197 | disable: | 1963 | disable: |
3198 | nvme_disable_queue(dev, 0); | 1964 | nvme_disable_admin_queue(dev, false); |
3199 | nvme_dev_list_remove(dev); | ||
3200 | unmap: | 1965 | unmap: |
3201 | nvme_dev_unmap(dev); | 1966 | nvme_dev_unmap(dev); |
3202 | out: | 1967 | out: |
3203 | if (!work_busy(&dev->reset_work)) | 1968 | nvme_remove_dead_ctrl(dev); |
3204 | nvme_dead_ctrl(dev); | ||
3205 | } | 1969 | } |
3206 | 1970 | ||
3207 | static int nvme_remove_dead_ctrl(void *arg) | 1971 | static void nvme_remove_dead_ctrl_work(struct work_struct *work) |
3208 | { | 1972 | { |
3209 | struct nvme_dev *dev = (struct nvme_dev *)arg; | 1973 | struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); |
3210 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 1974 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
3211 | 1975 | ||
3212 | if (pci_get_drvdata(pdev)) | 1976 | if (pci_get_drvdata(pdev)) |
3213 | pci_stop_and_remove_bus_device_locked(pdev); | 1977 | pci_stop_and_remove_bus_device_locked(pdev); |
3214 | kref_put(&dev->kref, nvme_free_dev); | 1978 | nvme_put_ctrl(&dev->ctrl); |
3215 | return 0; | ||
3216 | } | 1979 | } |
3217 | 1980 | ||
3218 | static void nvme_dead_ctrl(struct nvme_dev *dev) | 1981 | static void nvme_remove_dead_ctrl(struct nvme_dev *dev) |
3219 | { | 1982 | { |
3220 | dev_warn(dev->dev, "Device failed to resume\n"); | 1983 | dev_warn(dev->dev, "Removing after probe failure\n"); |
3221 | kref_get(&dev->kref); | 1984 | kref_get(&dev->ctrl.kref); |
3222 | if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", | 1985 | if (!schedule_work(&dev->remove_work)) |
3223 | dev->instance))) { | 1986 | nvme_put_ctrl(&dev->ctrl); |
3224 | dev_err(dev->dev, | ||
3225 | "Failed to start controller remove task\n"); | ||
3226 | kref_put(&dev->kref, nvme_free_dev); | ||
3227 | } | ||
3228 | } | 1987 | } |
3229 | 1988 | ||
3230 | static void nvme_reset_work(struct work_struct *ws) | 1989 | static int nvme_reset(struct nvme_dev *dev) |
3231 | { | 1990 | { |
3232 | struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); | 1991 | if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) |
3233 | bool in_probe = work_busy(&dev->probe_work); | 1992 | return -ENODEV; |
3234 | |||
3235 | nvme_dev_shutdown(dev); | ||
3236 | 1993 | ||
3237 | /* Synchronize with device probe so that work will see failure status | 1994 | if (!queue_work(nvme_workq, &dev->reset_work)) |
3238 | * and exit gracefully without trying to schedule another reset */ | 1995 | return -EBUSY; |
3239 | flush_work(&dev->probe_work); | ||
3240 | 1996 | ||
3241 | /* Fail this device if reset occured during probe to avoid | 1997 | flush_work(&dev->reset_work); |
3242 | * infinite initialization loops. */ | 1998 | return 0; |
3243 | if (in_probe) { | ||
3244 | nvme_dead_ctrl(dev); | ||
3245 | return; | ||
3246 | } | ||
3247 | /* Schedule device resume asynchronously so the reset work is available | ||
3248 | * to cleanup errors that may occur during reinitialization */ | ||
3249 | schedule_work(&dev->probe_work); | ||
3250 | } | 1999 | } |
3251 | 2000 | ||
3252 | static int __nvme_reset(struct nvme_dev *dev) | 2001 | static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) |
3253 | { | 2002 | { |
3254 | if (work_pending(&dev->reset_work)) | 2003 | *val = readl(to_nvme_dev(ctrl)->bar + off); |
3255 | return -EBUSY; | ||
3256 | list_del_init(&dev->node); | ||
3257 | queue_work(nvme_workq, &dev->reset_work); | ||
3258 | return 0; | 2004 | return 0; |
3259 | } | 2005 | } |
3260 | 2006 | ||
3261 | static int nvme_reset(struct nvme_dev *dev) | 2007 | static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) |
3262 | { | 2008 | { |
3263 | int ret; | 2009 | writel(val, to_nvme_dev(ctrl)->bar + off); |
3264 | 2010 | return 0; | |
3265 | if (!dev->admin_q || blk_queue_dying(dev->admin_q)) | 2011 | } |
3266 | return -ENODEV; | ||
3267 | |||
3268 | spin_lock(&dev_list_lock); | ||
3269 | ret = __nvme_reset(dev); | ||
3270 | spin_unlock(&dev_list_lock); | ||
3271 | |||
3272 | if (!ret) { | ||
3273 | flush_work(&dev->reset_work); | ||
3274 | flush_work(&dev->probe_work); | ||
3275 | return 0; | ||
3276 | } | ||
3277 | 2012 | ||
3278 | return ret; | 2013 | static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) |
2014 | { | ||
2015 | *val = readq(to_nvme_dev(ctrl)->bar + off); | ||
2016 | return 0; | ||
3279 | } | 2017 | } |
3280 | 2018 | ||
3281 | static ssize_t nvme_sysfs_reset(struct device *dev, | 2019 | static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl) |
3282 | struct device_attribute *attr, const char *buf, | ||
3283 | size_t count) | ||
3284 | { | 2020 | { |
3285 | struct nvme_dev *ndev = dev_get_drvdata(dev); | 2021 | struct nvme_dev *dev = to_nvme_dev(ctrl); |
3286 | int ret; | ||
3287 | 2022 | ||
3288 | ret = nvme_reset(ndev); | 2023 | return !dev->bar || dev->online_queues < 2; |
3289 | if (ret < 0) | 2024 | } |
3290 | return ret; | ||
3291 | 2025 | ||
3292 | return count; | 2026 | static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) |
2027 | { | ||
2028 | return nvme_reset(to_nvme_dev(ctrl)); | ||
3293 | } | 2029 | } |
3294 | static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); | 2030 | |
2031 | static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { | ||
2032 | .reg_read32 = nvme_pci_reg_read32, | ||
2033 | .reg_write32 = nvme_pci_reg_write32, | ||
2034 | .reg_read64 = nvme_pci_reg_read64, | ||
2035 | .io_incapable = nvme_pci_io_incapable, | ||
2036 | .reset_ctrl = nvme_pci_reset_ctrl, | ||
2037 | .free_ctrl = nvme_pci_free_ctrl, | ||
2038 | }; | ||
3295 | 2039 | ||
3296 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | 2040 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
3297 | { | 2041 | { |
@@ -3314,46 +2058,30 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
3314 | if (!dev->queues) | 2058 | if (!dev->queues) |
3315 | goto free; | 2059 | goto free; |
3316 | 2060 | ||
3317 | INIT_LIST_HEAD(&dev->namespaces); | ||
3318 | INIT_WORK(&dev->reset_work, nvme_reset_work); | ||
3319 | dev->dev = get_device(&pdev->dev); | 2061 | dev->dev = get_device(&pdev->dev); |
3320 | pci_set_drvdata(pdev, dev); | 2062 | pci_set_drvdata(pdev, dev); |
3321 | result = nvme_set_instance(dev); | 2063 | |
3322 | if (result) | 2064 | INIT_LIST_HEAD(&dev->node); |
3323 | goto put_pci; | 2065 | INIT_WORK(&dev->scan_work, nvme_dev_scan); |
2066 | INIT_WORK(&dev->reset_work, nvme_reset_work); | ||
2067 | INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); | ||
2068 | mutex_init(&dev->shutdown_lock); | ||
2069 | init_completion(&dev->ioq_wait); | ||
3324 | 2070 | ||
3325 | result = nvme_setup_prp_pools(dev); | 2071 | result = nvme_setup_prp_pools(dev); |
3326 | if (result) | 2072 | if (result) |
3327 | goto release; | 2073 | goto put_pci; |
3328 | |||
3329 | kref_init(&dev->kref); | ||
3330 | dev->device = device_create(nvme_class, &pdev->dev, | ||
3331 | MKDEV(nvme_char_major, dev->instance), | ||
3332 | dev, "nvme%d", dev->instance); | ||
3333 | if (IS_ERR(dev->device)) { | ||
3334 | result = PTR_ERR(dev->device); | ||
3335 | goto release_pools; | ||
3336 | } | ||
3337 | get_device(dev->device); | ||
3338 | dev_set_drvdata(dev->device, dev); | ||
3339 | 2074 | ||
3340 | result = device_create_file(dev->device, &dev_attr_reset_controller); | 2075 | result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, |
2076 | id->driver_data); | ||
3341 | if (result) | 2077 | if (result) |
3342 | goto put_dev; | 2078 | goto release_pools; |
3343 | 2079 | ||
3344 | INIT_LIST_HEAD(&dev->node); | 2080 | queue_work(nvme_workq, &dev->reset_work); |
3345 | INIT_WORK(&dev->scan_work, nvme_dev_scan); | ||
3346 | INIT_WORK(&dev->probe_work, nvme_probe_work); | ||
3347 | schedule_work(&dev->probe_work); | ||
3348 | return 0; | 2081 | return 0; |
3349 | 2082 | ||
3350 | put_dev: | ||
3351 | device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); | ||
3352 | put_device(dev->device); | ||
3353 | release_pools: | 2083 | release_pools: |
3354 | nvme_release_prp_pools(dev); | 2084 | nvme_release_prp_pools(dev); |
3355 | release: | ||
3356 | nvme_release_instance(dev); | ||
3357 | put_pci: | 2085 | put_pci: |
3358 | put_device(dev->dev); | 2086 | put_device(dev->dev); |
3359 | free: | 2087 | free: |
@@ -3368,15 +2096,15 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) | |||
3368 | struct nvme_dev *dev = pci_get_drvdata(pdev); | 2096 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
3369 | 2097 | ||
3370 | if (prepare) | 2098 | if (prepare) |
3371 | nvme_dev_shutdown(dev); | 2099 | nvme_dev_disable(dev, false); |
3372 | else | 2100 | else |
3373 | schedule_work(&dev->probe_work); | 2101 | queue_work(nvme_workq, &dev->reset_work); |
3374 | } | 2102 | } |
3375 | 2103 | ||
3376 | static void nvme_shutdown(struct pci_dev *pdev) | 2104 | static void nvme_shutdown(struct pci_dev *pdev) |
3377 | { | 2105 | { |
3378 | struct nvme_dev *dev = pci_get_drvdata(pdev); | 2106 | struct nvme_dev *dev = pci_get_drvdata(pdev); |
3379 | nvme_dev_shutdown(dev); | 2107 | nvme_dev_disable(dev, true); |
3380 | } | 2108 | } |
3381 | 2109 | ||
3382 | static void nvme_remove(struct pci_dev *pdev) | 2110 | static void nvme_remove(struct pci_dev *pdev) |
@@ -3388,34 +2116,25 @@ static void nvme_remove(struct pci_dev *pdev) | |||
3388 | spin_unlock(&dev_list_lock); | 2116 | spin_unlock(&dev_list_lock); |
3389 | 2117 | ||
3390 | pci_set_drvdata(pdev, NULL); | 2118 | pci_set_drvdata(pdev, NULL); |
3391 | flush_work(&dev->probe_work); | ||
3392 | flush_work(&dev->reset_work); | 2119 | flush_work(&dev->reset_work); |
3393 | flush_work(&dev->scan_work); | 2120 | flush_work(&dev->scan_work); |
3394 | device_remove_file(dev->device, &dev_attr_reset_controller); | 2121 | nvme_remove_namespaces(&dev->ctrl); |
3395 | nvme_dev_remove(dev); | 2122 | nvme_uninit_ctrl(&dev->ctrl); |
3396 | nvme_dev_shutdown(dev); | 2123 | nvme_dev_disable(dev, true); |
3397 | nvme_dev_remove_admin(dev); | 2124 | nvme_dev_remove_admin(dev); |
3398 | device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); | ||
3399 | nvme_free_queues(dev, 0); | 2125 | nvme_free_queues(dev, 0); |
3400 | nvme_release_cmb(dev); | 2126 | nvme_release_cmb(dev); |
3401 | nvme_release_prp_pools(dev); | 2127 | nvme_release_prp_pools(dev); |
3402 | kref_put(&dev->kref, nvme_free_dev); | 2128 | nvme_put_ctrl(&dev->ctrl); |
3403 | } | 2129 | } |
3404 | 2130 | ||
3405 | /* These functions are yet to be implemented */ | ||
3406 | #define nvme_error_detected NULL | ||
3407 | #define nvme_dump_registers NULL | ||
3408 | #define nvme_link_reset NULL | ||
3409 | #define nvme_slot_reset NULL | ||
3410 | #define nvme_error_resume NULL | ||
3411 | |||
3412 | #ifdef CONFIG_PM_SLEEP | 2131 | #ifdef CONFIG_PM_SLEEP |
3413 | static int nvme_suspend(struct device *dev) | 2132 | static int nvme_suspend(struct device *dev) |
3414 | { | 2133 | { |
3415 | struct pci_dev *pdev = to_pci_dev(dev); | 2134 | struct pci_dev *pdev = to_pci_dev(dev); |
3416 | struct nvme_dev *ndev = pci_get_drvdata(pdev); | 2135 | struct nvme_dev *ndev = pci_get_drvdata(pdev); |
3417 | 2136 | ||
3418 | nvme_dev_shutdown(ndev); | 2137 | nvme_dev_disable(ndev, true); |
3419 | return 0; | 2138 | return 0; |
3420 | } | 2139 | } |
3421 | 2140 | ||
@@ -3424,17 +2143,53 @@ static int nvme_resume(struct device *dev) | |||
3424 | struct pci_dev *pdev = to_pci_dev(dev); | 2143 | struct pci_dev *pdev = to_pci_dev(dev); |
3425 | struct nvme_dev *ndev = pci_get_drvdata(pdev); | 2144 | struct nvme_dev *ndev = pci_get_drvdata(pdev); |
3426 | 2145 | ||
3427 | schedule_work(&ndev->probe_work); | 2146 | queue_work(nvme_workq, &ndev->reset_work); |
3428 | return 0; | 2147 | return 0; |
3429 | } | 2148 | } |
3430 | #endif | 2149 | #endif |
3431 | 2150 | ||
3432 | static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); | 2151 | static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); |
3433 | 2152 | ||
2153 | static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, | ||
2154 | pci_channel_state_t state) | ||
2155 | { | ||
2156 | struct nvme_dev *dev = pci_get_drvdata(pdev); | ||
2157 | |||
2158 | /* | ||
2159 | * A frozen channel requires a reset. When detected, this method will | ||
2160 | * shutdown the controller to quiesce. The controller will be restarted | ||
2161 | * after the slot reset through driver's slot_reset callback. | ||
2162 | */ | ||
2163 | dev_warn(&pdev->dev, "error detected: state:%d\n", state); | ||
2164 | switch (state) { | ||
2165 | case pci_channel_io_normal: | ||
2166 | return PCI_ERS_RESULT_CAN_RECOVER; | ||
2167 | case pci_channel_io_frozen: | ||
2168 | nvme_dev_disable(dev, false); | ||
2169 | return PCI_ERS_RESULT_NEED_RESET; | ||
2170 | case pci_channel_io_perm_failure: | ||
2171 | return PCI_ERS_RESULT_DISCONNECT; | ||
2172 | } | ||
2173 | return PCI_ERS_RESULT_NEED_RESET; | ||
2174 | } | ||
2175 | |||
2176 | static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) | ||
2177 | { | ||
2178 | struct nvme_dev *dev = pci_get_drvdata(pdev); | ||
2179 | |||
2180 | dev_info(&pdev->dev, "restart after slot reset\n"); | ||
2181 | pci_restore_state(pdev); | ||
2182 | queue_work(nvme_workq, &dev->reset_work); | ||
2183 | return PCI_ERS_RESULT_RECOVERED; | ||
2184 | } | ||
2185 | |||
2186 | static void nvme_error_resume(struct pci_dev *pdev) | ||
2187 | { | ||
2188 | pci_cleanup_aer_uncorrect_error_status(pdev); | ||
2189 | } | ||
2190 | |||
3434 | static const struct pci_error_handlers nvme_err_handler = { | 2191 | static const struct pci_error_handlers nvme_err_handler = { |
3435 | .error_detected = nvme_error_detected, | 2192 | .error_detected = nvme_error_detected, |
3436 | .mmio_enabled = nvme_dump_registers, | ||
3437 | .link_reset = nvme_link_reset, | ||
3438 | .slot_reset = nvme_slot_reset, | 2193 | .slot_reset = nvme_slot_reset, |
3439 | .resume = nvme_error_resume, | 2194 | .resume = nvme_error_resume, |
3440 | .reset_notify = nvme_reset_notify, | 2195 | .reset_notify = nvme_reset_notify, |
@@ -3444,6 +2199,10 @@ static const struct pci_error_handlers nvme_err_handler = { | |||
3444 | #define PCI_CLASS_STORAGE_EXPRESS 0x010802 | 2199 | #define PCI_CLASS_STORAGE_EXPRESS 0x010802 |
3445 | 2200 | ||
3446 | static const struct pci_device_id nvme_id_table[] = { | 2201 | static const struct pci_device_id nvme_id_table[] = { |
2202 | { PCI_VDEVICE(INTEL, 0x0953), | ||
2203 | .driver_data = NVME_QUIRK_STRIPE_SIZE, }, | ||
2204 | { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ | ||
2205 | .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, | ||
3447 | { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, | 2206 | { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, |
3448 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, | 2207 | { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, |
3449 | { 0, } | 2208 | { 0, } |
@@ -3468,40 +2227,21 @@ static int __init nvme_init(void) | |||
3468 | 2227 | ||
3469 | init_waitqueue_head(&nvme_kthread_wait); | 2228 | init_waitqueue_head(&nvme_kthread_wait); |
3470 | 2229 | ||
3471 | nvme_workq = create_singlethread_workqueue("nvme"); | 2230 | nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0); |
3472 | if (!nvme_workq) | 2231 | if (!nvme_workq) |
3473 | return -ENOMEM; | 2232 | return -ENOMEM; |
3474 | 2233 | ||
3475 | result = register_blkdev(nvme_major, "nvme"); | 2234 | result = nvme_core_init(); |
3476 | if (result < 0) | 2235 | if (result < 0) |
3477 | goto kill_workq; | 2236 | goto kill_workq; |
3478 | else if (result > 0) | ||
3479 | nvme_major = result; | ||
3480 | |||
3481 | result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", | ||
3482 | &nvme_dev_fops); | ||
3483 | if (result < 0) | ||
3484 | goto unregister_blkdev; | ||
3485 | else if (result > 0) | ||
3486 | nvme_char_major = result; | ||
3487 | |||
3488 | nvme_class = class_create(THIS_MODULE, "nvme"); | ||
3489 | if (IS_ERR(nvme_class)) { | ||
3490 | result = PTR_ERR(nvme_class); | ||
3491 | goto unregister_chrdev; | ||
3492 | } | ||
3493 | 2237 | ||
3494 | result = pci_register_driver(&nvme_driver); | 2238 | result = pci_register_driver(&nvme_driver); |
3495 | if (result) | 2239 | if (result) |
3496 | goto destroy_class; | 2240 | goto core_exit; |
3497 | return 0; | 2241 | return 0; |
3498 | 2242 | ||
3499 | destroy_class: | 2243 | core_exit: |
3500 | class_destroy(nvme_class); | 2244 | nvme_core_exit(); |
3501 | unregister_chrdev: | ||
3502 | __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); | ||
3503 | unregister_blkdev: | ||
3504 | unregister_blkdev(nvme_major, "nvme"); | ||
3505 | kill_workq: | 2245 | kill_workq: |
3506 | destroy_workqueue(nvme_workq); | 2246 | destroy_workqueue(nvme_workq); |
3507 | return result; | 2247 | return result; |
@@ -3510,10 +2250,8 @@ static int __init nvme_init(void) | |||
3510 | static void __exit nvme_exit(void) | 2250 | static void __exit nvme_exit(void) |
3511 | { | 2251 | { |
3512 | pci_unregister_driver(&nvme_driver); | 2252 | pci_unregister_driver(&nvme_driver); |
3513 | unregister_blkdev(nvme_major, "nvme"); | 2253 | nvme_core_exit(); |
3514 | destroy_workqueue(nvme_workq); | 2254 | destroy_workqueue(nvme_workq); |
3515 | class_destroy(nvme_class); | ||
3516 | __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); | ||
3517 | BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); | 2255 | BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); |
3518 | _nvme_check_size(); | 2256 | _nvme_check_size(); |
3519 | } | 2257 | } |
diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index c3d8d3887a31..e947e298a737 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c | |||
@@ -524,7 +524,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, | |||
524 | struct sg_io_hdr *hdr, u8 *inq_response, | 524 | struct sg_io_hdr *hdr, u8 *inq_response, |
525 | int alloc_len) | 525 | int alloc_len) |
526 | { | 526 | { |
527 | struct nvme_dev *dev = ns->dev; | 527 | struct nvme_ctrl *ctrl = ns->ctrl; |
528 | struct nvme_id_ns *id_ns; | 528 | struct nvme_id_ns *id_ns; |
529 | int res; | 529 | int res; |
530 | int nvme_sc; | 530 | int nvme_sc; |
@@ -532,10 +532,10 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, | |||
532 | u8 resp_data_format = 0x02; | 532 | u8 resp_data_format = 0x02; |
533 | u8 protect; | 533 | u8 protect; |
534 | u8 cmdque = 0x01 << 1; | 534 | u8 cmdque = 0x01 << 1; |
535 | u8 fw_offset = sizeof(dev->firmware_rev); | 535 | u8 fw_offset = sizeof(ctrl->firmware_rev); |
536 | 536 | ||
537 | /* nvme ns identify - use DPS value for PROTECT field */ | 537 | /* nvme ns identify - use DPS value for PROTECT field */ |
538 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 538 | nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns); |
539 | res = nvme_trans_status_code(hdr, nvme_sc); | 539 | res = nvme_trans_status_code(hdr, nvme_sc); |
540 | if (res) | 540 | if (res) |
541 | return res; | 541 | return res; |
@@ -553,12 +553,12 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, | |||
553 | inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */ | 553 | inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */ |
554 | inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ | 554 | inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ |
555 | strncpy(&inq_response[8], "NVMe ", 8); | 555 | strncpy(&inq_response[8], "NVMe ", 8); |
556 | strncpy(&inq_response[16], dev->model, 16); | 556 | strncpy(&inq_response[16], ctrl->model, 16); |
557 | 557 | ||
558 | while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4) | 558 | while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4) |
559 | fw_offset--; | 559 | fw_offset--; |
560 | fw_offset -= 4; | 560 | fw_offset -= 4; |
561 | strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); | 561 | strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4); |
562 | 562 | ||
563 | xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); | 563 | xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); |
564 | return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); | 564 | return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); |
@@ -588,82 +588,113 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns, | |||
588 | struct sg_io_hdr *hdr, u8 *inq_response, | 588 | struct sg_io_hdr *hdr, u8 *inq_response, |
589 | int alloc_len) | 589 | int alloc_len) |
590 | { | 590 | { |
591 | struct nvme_dev *dev = ns->dev; | ||
592 | int xfer_len; | 591 | int xfer_len; |
593 | 592 | ||
594 | memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); | 593 | memset(inq_response, 0, STANDARD_INQUIRY_LENGTH); |
595 | inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */ | 594 | inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */ |
596 | inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */ | 595 | inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */ |
597 | strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH); | 596 | strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH); |
598 | 597 | ||
599 | xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); | 598 | xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); |
600 | return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); | 599 | return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); |
601 | } | 600 | } |
602 | 601 | ||
603 | static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | 602 | static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr, |
604 | u8 *inq_response, int alloc_len) | 603 | u8 *inq_response, int alloc_len) |
605 | { | 604 | { |
606 | struct nvme_dev *dev = ns->dev; | 605 | struct nvme_id_ns *id_ns; |
607 | int res; | 606 | int nvme_sc, res; |
608 | int nvme_sc; | 607 | size_t len; |
609 | int xfer_len; | 608 | void *eui; |
610 | __be32 tmp_id = cpu_to_be32(ns->ns_id); | ||
611 | 609 | ||
612 | memset(inq_response, 0, alloc_len); | 610 | nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns); |
613 | inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */ | 611 | res = nvme_trans_status_code(hdr, nvme_sc); |
614 | if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) { | 612 | if (res) |
615 | struct nvme_id_ns *id_ns; | 613 | return res; |
616 | void *eui; | ||
617 | int len; | ||
618 | 614 | ||
619 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 615 | eui = id_ns->eui64; |
620 | res = nvme_trans_status_code(hdr, nvme_sc); | 616 | len = sizeof(id_ns->eui64); |
621 | if (res) | ||
622 | return res; | ||
623 | 617 | ||
624 | eui = id_ns->eui64; | 618 | if (ns->ctrl->vs >= NVME_VS(1, 2)) { |
625 | len = sizeof(id_ns->eui64); | ||
626 | if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) { | ||
627 | if (bitmap_empty(eui, len * 8)) { | ||
628 | eui = id_ns->nguid; | ||
629 | len = sizeof(id_ns->nguid); | ||
630 | } | ||
631 | } | ||
632 | if (bitmap_empty(eui, len * 8)) { | 619 | if (bitmap_empty(eui, len * 8)) { |
633 | kfree(id_ns); | 620 | eui = id_ns->nguid; |
634 | goto scsi_string; | 621 | len = sizeof(id_ns->nguid); |
635 | } | 622 | } |
623 | } | ||
636 | 624 | ||
637 | inq_response[3] = 4 + len; /* Page Length */ | 625 | if (bitmap_empty(eui, len * 8)) { |
638 | /* Designation Descriptor start */ | 626 | res = -EOPNOTSUPP; |
639 | inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */ | 627 | goto out_free_id; |
640 | inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */ | ||
641 | inq_response[6] = 0x00; /* Rsvd */ | ||
642 | inq_response[7] = len; /* Designator Length */ | ||
643 | memcpy(&inq_response[8], eui, len); | ||
644 | kfree(id_ns); | ||
645 | } else { | ||
646 | scsi_string: | ||
647 | if (alloc_len < 72) { | ||
648 | return nvme_trans_completion(hdr, | ||
649 | SAM_STAT_CHECK_CONDITION, | ||
650 | ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, | ||
651 | SCSI_ASCQ_CAUSE_NOT_REPORTABLE); | ||
652 | } | ||
653 | inq_response[3] = 0x48; /* Page Length */ | ||
654 | /* Designation Descriptor start */ | ||
655 | inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */ | ||
656 | inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */ | ||
657 | inq_response[6] = 0x00; /* Rsvd */ | ||
658 | inq_response[7] = 0x44; /* Designator Length */ | ||
659 | |||
660 | sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor); | ||
661 | memcpy(&inq_response[12], dev->model, sizeof(dev->model)); | ||
662 | sprintf(&inq_response[52], "%04x", tmp_id); | ||
663 | memcpy(&inq_response[56], dev->serial, sizeof(dev->serial)); | ||
664 | } | 628 | } |
665 | xfer_len = alloc_len; | 629 | |
666 | return nvme_trans_copy_to_user(hdr, inq_response, xfer_len); | 630 | memset(inq_response, 0, alloc_len); |
631 | inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; | ||
632 | inq_response[3] = 4 + len; /* Page Length */ | ||
633 | |||
634 | /* Designation Descriptor start */ | ||
635 | inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */ | ||
636 | inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */ | ||
637 | inq_response[6] = 0x00; /* Rsvd */ | ||
638 | inq_response[7] = len; /* Designator Length */ | ||
639 | memcpy(&inq_response[8], eui, len); | ||
640 | |||
641 | res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len); | ||
642 | out_free_id: | ||
643 | kfree(id_ns); | ||
644 | return res; | ||
645 | } | ||
646 | |||
647 | static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns, | ||
648 | struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len) | ||
649 | { | ||
650 | struct nvme_ctrl *ctrl = ns->ctrl; | ||
651 | struct nvme_id_ctrl *id_ctrl; | ||
652 | int nvme_sc, res; | ||
653 | |||
654 | if (alloc_len < 72) { | ||
655 | return nvme_trans_completion(hdr, | ||
656 | SAM_STAT_CHECK_CONDITION, | ||
657 | ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB, | ||
658 | SCSI_ASCQ_CAUSE_NOT_REPORTABLE); | ||
659 | } | ||
660 | |||
661 | nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl); | ||
662 | res = nvme_trans_status_code(hdr, nvme_sc); | ||
663 | if (res) | ||
664 | return res; | ||
665 | |||
666 | memset(inq_response, 0, alloc_len); | ||
667 | inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; | ||
668 | inq_response[3] = 0x48; /* Page Length */ | ||
669 | |||
670 | /* Designation Descriptor start */ | ||
671 | inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */ | ||
672 | inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */ | ||
673 | inq_response[6] = 0x00; /* Rsvd */ | ||
674 | inq_response[7] = 0x44; /* Designator Length */ | ||
675 | |||
676 | sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid)); | ||
677 | memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model)); | ||
678 | sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id)); | ||
679 | memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial)); | ||
680 | |||
681 | res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len); | ||
682 | kfree(id_ctrl); | ||
683 | return res; | ||
684 | } | ||
685 | |||
686 | static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | ||
687 | u8 *resp, int alloc_len) | ||
688 | { | ||
689 | int res; | ||
690 | |||
691 | if (ns->ctrl->vs >= NVME_VS(1, 1)) { | ||
692 | res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len); | ||
693 | if (res != -EOPNOTSUPP) | ||
694 | return res; | ||
695 | } | ||
696 | |||
697 | return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len); | ||
667 | } | 698 | } |
668 | 699 | ||
669 | static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | 700 | static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, |
@@ -672,7 +703,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
672 | u8 *inq_response; | 703 | u8 *inq_response; |
673 | int res; | 704 | int res; |
674 | int nvme_sc; | 705 | int nvme_sc; |
675 | struct nvme_dev *dev = ns->dev; | 706 | struct nvme_ctrl *ctrl = ns->ctrl; |
676 | struct nvme_id_ctrl *id_ctrl; | 707 | struct nvme_id_ctrl *id_ctrl; |
677 | struct nvme_id_ns *id_ns; | 708 | struct nvme_id_ns *id_ns; |
678 | int xfer_len; | 709 | int xfer_len; |
@@ -688,7 +719,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
688 | if (inq_response == NULL) | 719 | if (inq_response == NULL) |
689 | return -ENOMEM; | 720 | return -ENOMEM; |
690 | 721 | ||
691 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 722 | nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns); |
692 | res = nvme_trans_status_code(hdr, nvme_sc); | 723 | res = nvme_trans_status_code(hdr, nvme_sc); |
693 | if (res) | 724 | if (res) |
694 | goto out_free_inq; | 725 | goto out_free_inq; |
@@ -704,7 +735,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
704 | app_chk = protect << 1; | 735 | app_chk = protect << 1; |
705 | ref_chk = protect; | 736 | ref_chk = protect; |
706 | 737 | ||
707 | nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); | 738 | nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl); |
708 | res = nvme_trans_status_code(hdr, nvme_sc); | 739 | res = nvme_trans_status_code(hdr, nvme_sc); |
709 | if (res) | 740 | if (res) |
710 | goto out_free_inq; | 741 | goto out_free_inq; |
@@ -815,7 +846,6 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, | |||
815 | int res; | 846 | int res; |
816 | int xfer_len; | 847 | int xfer_len; |
817 | u8 *log_response; | 848 | u8 *log_response; |
818 | struct nvme_dev *dev = ns->dev; | ||
819 | struct nvme_smart_log *smart_log; | 849 | struct nvme_smart_log *smart_log; |
820 | u8 temp_c; | 850 | u8 temp_c; |
821 | u16 temp_k; | 851 | u16 temp_k; |
@@ -824,7 +854,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, | |||
824 | if (log_response == NULL) | 854 | if (log_response == NULL) |
825 | return -ENOMEM; | 855 | return -ENOMEM; |
826 | 856 | ||
827 | res = nvme_get_log_page(dev, &smart_log); | 857 | res = nvme_get_log_page(ns->ctrl, &smart_log); |
828 | if (res < 0) | 858 | if (res < 0) |
829 | goto out_free_response; | 859 | goto out_free_response; |
830 | 860 | ||
@@ -862,7 +892,6 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
862 | int res; | 892 | int res; |
863 | int xfer_len; | 893 | int xfer_len; |
864 | u8 *log_response; | 894 | u8 *log_response; |
865 | struct nvme_dev *dev = ns->dev; | ||
866 | struct nvme_smart_log *smart_log; | 895 | struct nvme_smart_log *smart_log; |
867 | u32 feature_resp; | 896 | u32 feature_resp; |
868 | u8 temp_c_cur, temp_c_thresh; | 897 | u8 temp_c_cur, temp_c_thresh; |
@@ -872,7 +901,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
872 | if (log_response == NULL) | 901 | if (log_response == NULL) |
873 | return -ENOMEM; | 902 | return -ENOMEM; |
874 | 903 | ||
875 | res = nvme_get_log_page(dev, &smart_log); | 904 | res = nvme_get_log_page(ns->ctrl, &smart_log); |
876 | if (res < 0) | 905 | if (res < 0) |
877 | goto out_free_response; | 906 | goto out_free_response; |
878 | 907 | ||
@@ -886,7 +915,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
886 | kfree(smart_log); | 915 | kfree(smart_log); |
887 | 916 | ||
888 | /* Get Features for Temp Threshold */ | 917 | /* Get Features for Temp Threshold */ |
889 | res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0, | 918 | res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0, |
890 | &feature_resp); | 919 | &feature_resp); |
891 | if (res != NVME_SC_SUCCESS) | 920 | if (res != NVME_SC_SUCCESS) |
892 | temp_c_thresh = LOG_TEMP_UNKNOWN; | 921 | temp_c_thresh = LOG_TEMP_UNKNOWN; |
@@ -948,7 +977,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
948 | { | 977 | { |
949 | int res; | 978 | int res; |
950 | int nvme_sc; | 979 | int nvme_sc; |
951 | struct nvme_dev *dev = ns->dev; | ||
952 | struct nvme_id_ns *id_ns; | 980 | struct nvme_id_ns *id_ns; |
953 | u8 flbas; | 981 | u8 flbas; |
954 | u32 lba_length; | 982 | u32 lba_length; |
@@ -958,7 +986,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
958 | else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) | 986 | else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN) |
959 | return -EINVAL; | 987 | return -EINVAL; |
960 | 988 | ||
961 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 989 | nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns); |
962 | res = nvme_trans_status_code(hdr, nvme_sc); | 990 | res = nvme_trans_status_code(hdr, nvme_sc); |
963 | if (res) | 991 | if (res) |
964 | return res; | 992 | return res; |
@@ -1014,14 +1042,13 @@ static int nvme_trans_fill_caching_page(struct nvme_ns *ns, | |||
1014 | { | 1042 | { |
1015 | int res = 0; | 1043 | int res = 0; |
1016 | int nvme_sc; | 1044 | int nvme_sc; |
1017 | struct nvme_dev *dev = ns->dev; | ||
1018 | u32 feature_resp; | 1045 | u32 feature_resp; |
1019 | u8 vwc; | 1046 | u8 vwc; |
1020 | 1047 | ||
1021 | if (len < MODE_PAGE_CACHING_LEN) | 1048 | if (len < MODE_PAGE_CACHING_LEN) |
1022 | return -EINVAL; | 1049 | return -EINVAL; |
1023 | 1050 | ||
1024 | nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0, | 1051 | nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0, |
1025 | &feature_resp); | 1052 | &feature_resp); |
1026 | res = nvme_trans_status_code(hdr, nvme_sc); | 1053 | res = nvme_trans_status_code(hdr, nvme_sc); |
1027 | if (res) | 1054 | if (res) |
@@ -1207,12 +1234,11 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1207 | { | 1234 | { |
1208 | int res; | 1235 | int res; |
1209 | int nvme_sc; | 1236 | int nvme_sc; |
1210 | struct nvme_dev *dev = ns->dev; | ||
1211 | struct nvme_id_ctrl *id_ctrl; | 1237 | struct nvme_id_ctrl *id_ctrl; |
1212 | int lowest_pow_st; /* max npss = lowest power consumption */ | 1238 | int lowest_pow_st; /* max npss = lowest power consumption */ |
1213 | unsigned ps_desired = 0; | 1239 | unsigned ps_desired = 0; |
1214 | 1240 | ||
1215 | nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); | 1241 | nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl); |
1216 | res = nvme_trans_status_code(hdr, nvme_sc); | 1242 | res = nvme_trans_status_code(hdr, nvme_sc); |
1217 | if (res) | 1243 | if (res) |
1218 | return res; | 1244 | return res; |
@@ -1256,7 +1282,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1256 | SCSI_ASCQ_CAUSE_NOT_REPORTABLE); | 1282 | SCSI_ASCQ_CAUSE_NOT_REPORTABLE); |
1257 | break; | 1283 | break; |
1258 | } | 1284 | } |
1259 | nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0, | 1285 | nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0, |
1260 | NULL); | 1286 | NULL); |
1261 | return nvme_trans_status_code(hdr, nvme_sc); | 1287 | return nvme_trans_status_code(hdr, nvme_sc); |
1262 | } | 1288 | } |
@@ -1280,7 +1306,6 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr | |||
1280 | u8 buffer_id) | 1306 | u8 buffer_id) |
1281 | { | 1307 | { |
1282 | int nvme_sc; | 1308 | int nvme_sc; |
1283 | struct nvme_dev *dev = ns->dev; | ||
1284 | struct nvme_command c; | 1309 | struct nvme_command c; |
1285 | 1310 | ||
1286 | if (hdr->iovec_count > 0) { | 1311 | if (hdr->iovec_count > 0) { |
@@ -1297,7 +1322,7 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr | |||
1297 | c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); | 1322 | c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); |
1298 | c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); | 1323 | c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); |
1299 | 1324 | ||
1300 | nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, | 1325 | nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c, |
1301 | hdr->dxferp, tot_len, NULL, 0); | 1326 | hdr->dxferp, tot_len, NULL, 0); |
1302 | return nvme_trans_status_code(hdr, nvme_sc); | 1327 | return nvme_trans_status_code(hdr, nvme_sc); |
1303 | } | 1328 | } |
@@ -1364,14 +1389,13 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1364 | { | 1389 | { |
1365 | int res = 0; | 1390 | int res = 0; |
1366 | int nvme_sc; | 1391 | int nvme_sc; |
1367 | struct nvme_dev *dev = ns->dev; | ||
1368 | unsigned dword11; | 1392 | unsigned dword11; |
1369 | 1393 | ||
1370 | switch (page_code) { | 1394 | switch (page_code) { |
1371 | case MODE_PAGE_CACHING: | 1395 | case MODE_PAGE_CACHING: |
1372 | dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0); | 1396 | dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0); |
1373 | nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11, | 1397 | nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, |
1374 | 0, NULL); | 1398 | dword11, 0, NULL); |
1375 | res = nvme_trans_status_code(hdr, nvme_sc); | 1399 | res = nvme_trans_status_code(hdr, nvme_sc); |
1376 | break; | 1400 | break; |
1377 | case MODE_PAGE_CONTROL: | 1401 | case MODE_PAGE_CONTROL: |
@@ -1473,7 +1497,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, | |||
1473 | { | 1497 | { |
1474 | int res = 0; | 1498 | int res = 0; |
1475 | int nvme_sc; | 1499 | int nvme_sc; |
1476 | struct nvme_dev *dev = ns->dev; | ||
1477 | u8 flbas; | 1500 | u8 flbas; |
1478 | 1501 | ||
1479 | /* | 1502 | /* |
@@ -1486,7 +1509,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns, | |||
1486 | if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { | 1509 | if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) { |
1487 | struct nvme_id_ns *id_ns; | 1510 | struct nvme_id_ns *id_ns; |
1488 | 1511 | ||
1489 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 1512 | nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns); |
1490 | res = nvme_trans_status_code(hdr, nvme_sc); | 1513 | res = nvme_trans_status_code(hdr, nvme_sc); |
1491 | if (res) | 1514 | if (res) |
1492 | return res; | 1515 | return res; |
@@ -1570,7 +1593,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1570 | { | 1593 | { |
1571 | int res; | 1594 | int res; |
1572 | int nvme_sc; | 1595 | int nvme_sc; |
1573 | struct nvme_dev *dev = ns->dev; | ||
1574 | struct nvme_id_ns *id_ns; | 1596 | struct nvme_id_ns *id_ns; |
1575 | u8 i; | 1597 | u8 i; |
1576 | u8 flbas, nlbaf; | 1598 | u8 flbas, nlbaf; |
@@ -1579,7 +1601,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1579 | struct nvme_command c; | 1601 | struct nvme_command c; |
1580 | 1602 | ||
1581 | /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ | 1603 | /* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */ |
1582 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 1604 | nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns); |
1583 | res = nvme_trans_status_code(hdr, nvme_sc); | 1605 | res = nvme_trans_status_code(hdr, nvme_sc); |
1584 | if (res) | 1606 | if (res) |
1585 | return res; | 1607 | return res; |
@@ -1611,7 +1633,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1611 | c.format.nsid = cpu_to_le32(ns->ns_id); | 1633 | c.format.nsid = cpu_to_le32(ns->ns_id); |
1612 | c.format.cdw10 = cpu_to_le32(cdw10); | 1634 | c.format.cdw10 = cpu_to_le32(cdw10); |
1613 | 1635 | ||
1614 | nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0); | 1636 | nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0); |
1615 | res = nvme_trans_status_code(hdr, nvme_sc); | 1637 | res = nvme_trans_status_code(hdr, nvme_sc); |
1616 | 1638 | ||
1617 | kfree(id_ns); | 1639 | kfree(id_ns); |
@@ -1704,7 +1726,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1704 | nvme_sc = NVME_SC_LBA_RANGE; | 1726 | nvme_sc = NVME_SC_LBA_RANGE; |
1705 | break; | 1727 | break; |
1706 | } | 1728 | } |
1707 | nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL, | 1729 | nvme_sc = nvme_submit_user_cmd(ns->queue, &c, |
1708 | next_mapping_addr, unit_len, NULL, 0); | 1730 | next_mapping_addr, unit_len, NULL, 0); |
1709 | if (nvme_sc) | 1731 | if (nvme_sc) |
1710 | break; | 1732 | break; |
@@ -2040,7 +2062,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2040 | u32 alloc_len; | 2062 | u32 alloc_len; |
2041 | u32 resp_size; | 2063 | u32 resp_size; |
2042 | u32 xfer_len; | 2064 | u32 xfer_len; |
2043 | struct nvme_dev *dev = ns->dev; | ||
2044 | struct nvme_id_ns *id_ns; | 2065 | struct nvme_id_ns *id_ns; |
2045 | u8 *response; | 2066 | u8 *response; |
2046 | 2067 | ||
@@ -2052,7 +2073,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2052 | resp_size = READ_CAP_10_RESP_SIZE; | 2073 | resp_size = READ_CAP_10_RESP_SIZE; |
2053 | } | 2074 | } |
2054 | 2075 | ||
2055 | nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns); | 2076 | nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns); |
2056 | res = nvme_trans_status_code(hdr, nvme_sc); | 2077 | res = nvme_trans_status_code(hdr, nvme_sc); |
2057 | if (res) | 2078 | if (res) |
2058 | return res; | 2079 | return res; |
@@ -2080,7 +2101,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2080 | int nvme_sc; | 2101 | int nvme_sc; |
2081 | u32 alloc_len, xfer_len, resp_size; | 2102 | u32 alloc_len, xfer_len, resp_size; |
2082 | u8 *response; | 2103 | u8 *response; |
2083 | struct nvme_dev *dev = ns->dev; | ||
2084 | struct nvme_id_ctrl *id_ctrl; | 2104 | struct nvme_id_ctrl *id_ctrl; |
2085 | u32 ll_length, lun_id; | 2105 | u32 ll_length, lun_id; |
2086 | u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; | 2106 | u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET; |
@@ -2094,7 +2114,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2094 | case ALL_LUNS_RETURNED: | 2114 | case ALL_LUNS_RETURNED: |
2095 | case ALL_WELL_KNOWN_LUNS_RETURNED: | 2115 | case ALL_WELL_KNOWN_LUNS_RETURNED: |
2096 | case RESTRICTED_LUNS_RETURNED: | 2116 | case RESTRICTED_LUNS_RETURNED: |
2097 | nvme_sc = nvme_identify_ctrl(dev, &id_ctrl); | 2117 | nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl); |
2098 | res = nvme_trans_status_code(hdr, nvme_sc); | 2118 | res = nvme_trans_status_code(hdr, nvme_sc); |
2099 | if (res) | 2119 | if (res) |
2100 | return res; | 2120 | return res; |
@@ -2295,9 +2315,7 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns, | |||
2295 | struct sg_io_hdr *hdr, | 2315 | struct sg_io_hdr *hdr, |
2296 | u8 *cmd) | 2316 | u8 *cmd) |
2297 | { | 2317 | { |
2298 | struct nvme_dev *dev = ns->dev; | 2318 | if (nvme_ctrl_ready(ns->ctrl)) |
2299 | |||
2300 | if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) | ||
2301 | return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, | 2319 | return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, |
2302 | NOT_READY, SCSI_ASC_LUN_NOT_READY, | 2320 | NOT_READY, SCSI_ASC_LUN_NOT_READY, |
2303 | SCSI_ASCQ_CAUSE_NOT_REPORTABLE); | 2321 | SCSI_ASCQ_CAUSE_NOT_REPORTABLE); |