aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <matthew.r.wilcox@intel.com>2011-01-20 12:50:14 -0500
committerMatthew Wilcox <matthew.r.wilcox@intel.com>2011-11-04 15:52:51 -0400
commitb60503ba432b16fc84442a84e29a7aad2c0c363d (patch)
tree43dca7cd57965ce1a2b7b6f94437f0364fbc0034
parent0b934ccd707ff33a87f15a35a9916d1d8e85d30e (diff)
NVMe: New driver
This driver is for devices that follow the NVM Express standard Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--drivers/block/Kconfig11
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/nvme.c1043
-rw-r--r--include/linux/nvme.h343
5 files changed, 1399 insertions, 0 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 54078ed96b37..4840334ea97b 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -149,6 +149,7 @@ Code Seq#(hex) Include File Comments
149'M' 01-03 drivers/scsi/megaraid/megaraid_sas.h 149'M' 01-03 drivers/scsi/megaraid/megaraid_sas.h
150'M' 00-0F drivers/video/fsl-diu-fb.h conflict! 150'M' 00-0F drivers/video/fsl-diu-fb.h conflict!
151'N' 00-1F drivers/usb/scanner.h 151'N' 00-1F drivers/usb/scanner.h
152'N' 40-7F drivers/block/nvme.c
152'O' 00-06 mtd/ubi-user.h UBI 153'O' 00-06 mtd/ubi-user.h UBI
153'P' all linux/soundcard.h conflict! 154'P' all linux/soundcard.h conflict!
154'P' 60-6F sound/sscape_ioctl.h conflict! 155'P' 60-6F sound/sscape_ioctl.h conflict!
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 6f07ec1c2f58..35e56e1c948f 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -315,6 +315,17 @@ config BLK_DEV_NBD
315 315
316 If unsure, say N. 316 If unsure, say N.
317 317
318config BLK_DEV_NVME
319 tristate "NVM Express block device"
320 depends on PCI
321 ---help---
322 The NVM Express driver is for solid state drives directly
323 connected to the PCI or PCI Express bus. If you know you
324 don't have one of these, it is safe to answer N.
325
326 To compile this driver as a module, choose M here: the
327 module will be called nvme.
328
318config BLK_DEV_OSD 329config BLK_DEV_OSD
319 tristate "OSD object-as-blkdev support" 330 tristate "OSD object-as-blkdev support"
320 depends on SCSI_OSD_ULD 331 depends on SCSI_OSD_ULD
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 76646e9a1c91..349539ad3ad9 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
23obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o 23obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
24obj-$(CONFIG_MG_DISK) += mg_disk.o 24obj-$(CONFIG_MG_DISK) += mg_disk.o
25obj-$(CONFIG_SUNVDC) += sunvdc.o 25obj-$(CONFIG_SUNVDC) += sunvdc.o
26obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
26obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o 27obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
27 28
28obj-$(CONFIG_BLK_DEV_UMEM) += umem.o 29obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
new file mode 100644
index 000000000000..ef66eccc2aa2
--- /dev/null
+++ b/drivers/block/nvme.c
@@ -0,0 +1,1043 @@
1/*
2 * NVM Express device driver
3 * Copyright (c) 2011, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#include <linux/nvme.h>
20#include <linux/bio.h>
21#include <linux/blkdev.h>
22#include <linux/errno.h>
23#include <linux/fs.h>
24#include <linux/genhd.h>
25#include <linux/init.h>
26#include <linux/interrupt.h>
27#include <linux/io.h>
28#include <linux/kdev_t.h>
29#include <linux/kernel.h>
30#include <linux/mm.h>
31#include <linux/module.h>
32#include <linux/moduleparam.h>
33#include <linux/pci.h>
34#include <linux/sched.h>
35#include <linux/slab.h>
36#include <linux/types.h>
37#include <linux/version.h>
38
39#define NVME_Q_DEPTH 1024
40#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
41#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
42#define NVME_MINORS 64
43
44static int nvme_major;
45module_param(nvme_major, int, 0);
46
47/*
48 * Represents an NVM Express device. Each nvme_dev is a PCI function.
49 */
50struct nvme_dev {
51 struct list_head node;
52 struct nvme_queue **queues;
53 u32 __iomem *dbs;
54 struct pci_dev *pci_dev;
55 int instance;
56 int queue_count;
57 u32 ctrl_config;
58 struct msix_entry *entry;
59 struct nvme_bar __iomem *bar;
60 struct list_head namespaces;
61};
62
63/*
64 * An NVM Express namespace is equivalent to a SCSI LUN
65 */
66struct nvme_ns {
67 struct list_head list;
68
69 struct nvme_dev *dev;
70 struct request_queue *queue;
71 struct gendisk *disk;
72
73 int ns_id;
74 int lba_shift;
75};
76
77/*
78 * An NVM Express queue. Each device has at least two (one for admin
79 * commands and one for I/O commands).
80 */
81struct nvme_queue {
82 struct device *q_dmadev;
83 spinlock_t q_lock;
84 struct nvme_command *sq_cmds;
85 volatile struct nvme_completion *cqes;
86 dma_addr_t sq_dma_addr;
87 dma_addr_t cq_dma_addr;
88 wait_queue_head_t sq_full;
89 struct bio_list sq_cong;
90 u32 __iomem *q_db;
91 u16 q_depth;
92 u16 cq_vector;
93 u16 sq_head;
94 u16 sq_tail;
95 u16 cq_head;
96 u16 cq_cycle;
97 unsigned long cmdid_data[];
98};
99
100/*
101 * Check we didin't inadvertently grow the command struct
102 */
103static inline void _nvme_check_size(void)
104{
105 BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
106 BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
107 BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
108 BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
109 BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
110 BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
111 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096);
112 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096);
113 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
114}
115
116/**
117 * alloc_cmdid - Allocate a Command ID
118 * @param nvmeq The queue that will be used for this command
119 * @param ctx A pointer that will be passed to the handler
120 * @param handler The ID of the handler to call
121 *
122 * Allocate a Command ID for a queue. The data passed in will
123 * be passed to the completion handler. This is implemented by using
124 * the bottom two bits of the ctx pointer to store the handler ID.
125 * Passing in a pointer that's not 4-byte aligned will cause a BUG.
126 * We can change this if it becomes a problem.
127 */
128static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx, int handler)
129{
130 int depth = nvmeq->q_depth;
131 unsigned long data = (unsigned long)ctx | handler;
132 int cmdid;
133
134 BUG_ON((unsigned long)ctx & 3);
135
136 do {
137 cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
138 if (cmdid >= depth)
139 return -EBUSY;
140 } while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
141
142 nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(depth)] = data;
143 return cmdid;
144}
145
146static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
147 int handler)
148{
149 int cmdid;
150 wait_event_killable(nvmeq->sq_full,
151 (cmdid = alloc_cmdid(nvmeq, ctx, handler)) >= 0);
152 return (cmdid < 0) ? -EINTR : cmdid;
153}
154
155/* If you need more than four handlers, you'll need to change how
156 * alloc_cmdid and nvme_process_cq work
157 */
158enum {
159 sync_completion_id = 0,
160 bio_completion_id,
161};
162
163static unsigned long free_cmdid(struct nvme_queue *nvmeq, int cmdid)
164{
165 unsigned long data;
166
167 data = nvmeq->cmdid_data[cmdid + BITS_TO_LONGS(nvmeq->q_depth)];
168 clear_bit(cmdid, nvmeq->cmdid_data);
169 wake_up(&nvmeq->sq_full);
170 return data;
171}
172
173static struct nvme_queue *get_nvmeq(struct nvme_ns *ns)
174{
175 return ns->dev->queues[1];
176}
177
178static void put_nvmeq(struct nvme_queue *nvmeq)
179{
180}
181
182/**
183 * nvme_submit_cmd: Copy a command into a queue and ring the doorbell
184 * @nvmeq: The queue to use
185 * @cmd: The command to send
186 *
187 * Safe to use from interrupt context
188 */
189static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
190{
191 unsigned long flags;
192 u16 tail;
193 /* XXX: Need to check tail isn't going to overrun head */
194 spin_lock_irqsave(&nvmeq->q_lock, flags);
195 tail = nvmeq->sq_tail;
196 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
197 writel(tail, nvmeq->q_db);
198 if (++tail == nvmeq->q_depth)
199 tail = 0;
200 nvmeq->sq_tail = tail;
201 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
202
203 return 0;
204}
205
206struct nvme_req_info {
207 struct bio *bio;
208 int nents;
209 struct scatterlist sg[0];
210};
211
212/* XXX: use a mempool */
213static struct nvme_req_info *alloc_info(unsigned nseg, gfp_t gfp)
214{
215 return kmalloc(sizeof(struct nvme_req_info) +
216 sizeof(struct scatterlist) * nseg, gfp);
217}
218
219static void free_info(struct nvme_req_info *info)
220{
221 kfree(info);
222}
223
224static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
225 struct nvme_completion *cqe)
226{
227 struct nvme_req_info *info = ctx;
228 struct bio *bio = info->bio;
229 u16 status = le16_to_cpup(&cqe->status) >> 1;
230
231 dma_unmap_sg(nvmeq->q_dmadev, info->sg, info->nents,
232 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
233 free_info(info);
234 bio_endio(bio, status ? -EIO : 0);
235}
236
237static int nvme_map_bio(struct device *dev, struct nvme_req_info *info,
238 struct bio *bio, enum dma_data_direction dma_dir, int psegs)
239{
240 struct bio_vec *bvec;
241 struct scatterlist *sg = info->sg;
242 int i, nsegs;
243
244 sg_init_table(sg, psegs);
245 bio_for_each_segment(bvec, bio, i) {
246 sg_set_page(sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
247 /* XXX: handle non-mergable here */
248 nsegs++;
249 }
250 info->nents = nsegs;
251
252 return dma_map_sg(dev, info->sg, info->nents, dma_dir);
253}
254
255static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
256 struct bio *bio)
257{
258 struct nvme_rw_command *cmnd;
259 struct nvme_req_info *info;
260 enum dma_data_direction dma_dir;
261 int cmdid;
262 u16 control;
263 u32 dsmgmt;
264 unsigned long flags;
265 int psegs = bio_phys_segments(ns->queue, bio);
266
267 info = alloc_info(psegs, GFP_NOIO);
268 if (!info)
269 goto congestion;
270 info->bio = bio;
271
272 cmdid = alloc_cmdid(nvmeq, info, bio_completion_id);
273 if (unlikely(cmdid < 0))
274 goto free_info;
275
276 control = 0;
277 if (bio->bi_rw & REQ_FUA)
278 control |= NVME_RW_FUA;
279 if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
280 control |= NVME_RW_LR;
281
282 dsmgmt = 0;
283 if (bio->bi_rw & REQ_RAHEAD)
284 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
285
286 spin_lock_irqsave(&nvmeq->q_lock, flags);
287 cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail].rw;
288
289 if (bio_data_dir(bio)) {
290 cmnd->opcode = nvme_cmd_write;
291 dma_dir = DMA_TO_DEVICE;
292 } else {
293 cmnd->opcode = nvme_cmd_read;
294 dma_dir = DMA_FROM_DEVICE;
295 }
296
297 nvme_map_bio(nvmeq->q_dmadev, info, bio, dma_dir, psegs);
298
299 cmnd->flags = 1;
300 cmnd->command_id = cmdid;
301 cmnd->nsid = cpu_to_le32(ns->ns_id);
302 cmnd->prp1 = cpu_to_le64(sg_phys(info->sg));
303 /* XXX: Support more than one PRP */
304 cmnd->slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9));
305 cmnd->length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1);
306 cmnd->control = cpu_to_le16(control);
307 cmnd->dsmgmt = cpu_to_le32(dsmgmt);
308
309 writel(nvmeq->sq_tail, nvmeq->q_db);
310 if (++nvmeq->sq_tail == nvmeq->q_depth)
311 nvmeq->sq_tail = 0;
312
313 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
314
315 return 0;
316
317 free_info:
318 free_info(info);
319 congestion:
320 return -EBUSY;
321}
322
323/*
324 * NB: return value of non-zero would mean that we were a stacking driver.
325 * make_request must always succeed.
326 */
327static int nvme_make_request(struct request_queue *q, struct bio *bio)
328{
329 struct nvme_ns *ns = q->queuedata;
330 struct nvme_queue *nvmeq = get_nvmeq(ns);
331
332 if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
333 blk_set_queue_congested(q, rw_is_sync(bio->bi_rw));
334 bio_list_add(&nvmeq->sq_cong, bio);
335 }
336 put_nvmeq(nvmeq);
337
338 return 0;
339}
340
341struct sync_cmd_info {
342 struct task_struct *task;
343 u32 result;
344 int status;
345};
346
347static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
348 struct nvme_completion *cqe)
349{
350 struct sync_cmd_info *cmdinfo = ctx;
351 cmdinfo->result = le32_to_cpup(&cqe->result);
352 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
353 wake_up_process(cmdinfo->task);
354}
355
356typedef void (*completion_fn)(struct nvme_queue *, void *,
357 struct nvme_completion *);
358
359static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
360{
361 u16 head, cycle;
362
363 static const completion_fn completions[4] = {
364 [sync_completion_id] = sync_completion,
365 [bio_completion_id] = bio_completion,
366 };
367
368 head = nvmeq->cq_head;
369 cycle = nvmeq->cq_cycle;
370
371 for (;;) {
372 unsigned long data;
373 void *ptr;
374 unsigned char handler;
375 struct nvme_completion cqe = nvmeq->cqes[head];
376 if ((le16_to_cpu(cqe.status) & 1) != cycle)
377 break;
378 nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
379 if (++head == nvmeq->q_depth) {
380 head = 0;
381 cycle = !cycle;
382 }
383
384 data = free_cmdid(nvmeq, cqe.command_id);
385 handler = data & 3;
386 ptr = (void *)(data & ~3UL);
387 completions[handler](nvmeq, ptr, &cqe);
388 }
389
390 /* If the controller ignores the cq head doorbell and continuously
391 * writes to the queue, it is theoretically possible to wrap around
392 * the queue twice and mistakenly return IRQ_NONE. Linux only
393 * requires that 0.1% of your interrupts are handled, so this isn't
394 * a big problem.
395 */
396 if (head == nvmeq->cq_head && cycle == nvmeq->cq_cycle)
397 return IRQ_NONE;
398
399 writel(head, nvmeq->q_db + 1);
400 nvmeq->cq_head = head;
401 nvmeq->cq_cycle = cycle;
402
403 return IRQ_HANDLED;
404}
405
406static irqreturn_t nvme_irq(int irq, void *data)
407{
408 return nvme_process_cq(data);
409}
410
411/*
412 * Returns 0 on success. If the result is negative, it's a Linux error code;
413 * if the result is positive, it's an NVM Express status code
414 */
415static int nvme_submit_sync_cmd(struct nvme_queue *q, struct nvme_command *cmd,
416 u32 *result)
417{
418 int cmdid;
419 struct sync_cmd_info cmdinfo;
420
421 cmdinfo.task = current;
422 cmdinfo.status = -EINTR;
423
424 cmdid = alloc_cmdid_killable(q, &cmdinfo, sync_completion_id);
425 if (cmdid < 0)
426 return cmdid;
427 cmd->common.command_id = cmdid;
428
429 set_current_state(TASK_UNINTERRUPTIBLE);
430 nvme_submit_cmd(q, cmd);
431 schedule();
432
433 if (result)
434 *result = cmdinfo.result;
435
436 return cmdinfo.status;
437}
438
439static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
440 u32 *result)
441{
442 return nvme_submit_sync_cmd(dev->queues[0], cmd, result);
443}
444
445static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
446{
447 int status;
448 struct nvme_command c;
449
450 memset(&c, 0, sizeof(c));
451 c.delete_queue.opcode = opcode;
452 c.delete_queue.qid = cpu_to_le16(id);
453
454 status = nvme_submit_admin_cmd(dev, &c, NULL);
455 if (status)
456 return -EIO;
457 return 0;
458}
459
460static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
461 struct nvme_queue *nvmeq)
462{
463 int status;
464 struct nvme_command c;
465 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
466
467 memset(&c, 0, sizeof(c));
468 c.create_cq.opcode = nvme_admin_create_cq;
469 c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
470 c.create_cq.cqid = cpu_to_le16(qid);
471 c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
472 c.create_cq.cq_flags = cpu_to_le16(flags);
473 c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
474
475 status = nvme_submit_admin_cmd(dev, &c, NULL);
476 if (status)
477 return -EIO;
478 return 0;
479}
480
481static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
482 struct nvme_queue *nvmeq)
483{
484 int status;
485 struct nvme_command c;
486 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
487
488 memset(&c, 0, sizeof(c));
489 c.create_sq.opcode = nvme_admin_create_sq;
490 c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
491 c.create_sq.sqid = cpu_to_le16(qid);
492 c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
493 c.create_sq.sq_flags = cpu_to_le16(flags);
494 c.create_sq.cqid = cpu_to_le16(qid);
495
496 status = nvme_submit_admin_cmd(dev, &c, NULL);
497 if (status)
498 return -EIO;
499 return 0;
500}
501
502static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
503{
504 return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
505}
506
507static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
508{
509 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
510}
511
512static void nvme_free_queue(struct nvme_dev *dev, int qid)
513{
514 struct nvme_queue *nvmeq = dev->queues[qid];
515
516 free_irq(dev->entry[nvmeq->cq_vector].vector, nvmeq);
517
518 /* Don't tell the adapter to delete the admin queue */
519 if (qid) {
520 adapter_delete_sq(dev, qid);
521 adapter_delete_cq(dev, qid);
522 }
523
524 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
525 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
526 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
527 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
528 kfree(nvmeq);
529}
530
531static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
532 int depth, int vector)
533{
534 struct device *dmadev = &dev->pci_dev->dev;
535 unsigned extra = (depth + BITS_TO_LONGS(depth)) * sizeof(long);
536 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
537 if (!nvmeq)
538 return NULL;
539
540 nvmeq->cqes = dma_alloc_coherent(dmadev, CQ_SIZE(depth),
541 &nvmeq->cq_dma_addr, GFP_KERNEL);
542 if (!nvmeq->cqes)
543 goto free_nvmeq;
544 memset((void *)nvmeq->cqes, 0, CQ_SIZE(depth));
545
546 nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
547 &nvmeq->sq_dma_addr, GFP_KERNEL);
548 if (!nvmeq->sq_cmds)
549 goto free_cqdma;
550
551 nvmeq->q_dmadev = dmadev;
552 spin_lock_init(&nvmeq->q_lock);
553 nvmeq->cq_head = 0;
554 nvmeq->cq_cycle = 1;
555 init_waitqueue_head(&nvmeq->sq_full);
556 bio_list_init(&nvmeq->sq_cong);
557 nvmeq->q_db = &dev->dbs[qid * 2];
558 nvmeq->q_depth = depth;
559 nvmeq->cq_vector = vector;
560
561 return nvmeq;
562
563 free_cqdma:
564 dma_free_coherent(dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes,
565 nvmeq->cq_dma_addr);
566 free_nvmeq:
567 kfree(nvmeq);
568 return NULL;
569}
570
571static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
572 int qid, int cq_size, int vector)
573{
574 int result;
575 struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
576
577 result = adapter_alloc_cq(dev, qid, nvmeq);
578 if (result < 0)
579 goto free_nvmeq;
580
581 result = adapter_alloc_sq(dev, qid, nvmeq);
582 if (result < 0)
583 goto release_cq;
584
585 result = request_irq(dev->entry[vector].vector, nvme_irq,
586 IRQF_DISABLED | IRQF_SHARED, "nvme", nvmeq);
587 if (result < 0)
588 goto release_sq;
589
590 return nvmeq;
591
592 release_sq:
593 adapter_delete_sq(dev, qid);
594 release_cq:
595 adapter_delete_cq(dev, qid);
596 free_nvmeq:
597 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
598 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
599 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
600 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
601 kfree(nvmeq);
602 return NULL;
603}
604
605static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
606{
607 int result;
608 u32 aqa;
609 struct nvme_queue *nvmeq;
610
611 dev->dbs = ((void __iomem *)dev->bar) + 4096;
612
613 nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
614
615 aqa = nvmeq->q_depth - 1;
616 aqa |= aqa << 16;
617
618 dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
619 dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
620 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
621
622 writel(aqa, &dev->bar->aqa);
623 writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
624 writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
625 writel(dev->ctrl_config, &dev->bar->cc);
626
627 while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
628 msleep(100);
629 if (fatal_signal_pending(current))
630 return -EINTR;
631 }
632
633 result = request_irq(dev->entry[0].vector, nvme_irq,
634 IRQF_DISABLED | IRQF_SHARED, "nvme admin", nvmeq);
635 dev->queues[0] = nvmeq;
636 return result;
637}
638
639static int nvme_identify(struct nvme_ns *ns, void __user *addr, int cns)
640{
641 struct nvme_dev *dev = ns->dev;
642 int status;
643 struct nvme_command c;
644 void *page;
645 dma_addr_t dma_addr;
646
647 page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
648 GFP_KERNEL);
649
650 memset(&c, 0, sizeof(c));
651 c.identify.opcode = nvme_admin_identify;
652 c.identify.nsid = cns ? 0 : cpu_to_le32(ns->ns_id);
653 c.identify.prp1 = cpu_to_le64(dma_addr);
654 c.identify.cns = cpu_to_le32(cns);
655
656 status = nvme_submit_admin_cmd(dev, &c, NULL);
657
658 if (status)
659 status = -EIO;
660 else if (copy_to_user(addr, page, 4096))
661 status = -EFAULT;
662
663 dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
664
665 return status;
666}
667
668static int nvme_get_range_type(struct nvme_ns *ns, void __user *addr)
669{
670 struct nvme_dev *dev = ns->dev;
671 int status;
672 struct nvme_command c;
673 void *page;
674 dma_addr_t dma_addr;
675
676 page = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
677 GFP_KERNEL);
678
679 memset(&c, 0, sizeof(c));
680 c.features.opcode = nvme_admin_get_features;
681 c.features.nsid = cpu_to_le32(ns->ns_id);
682 c.features.prp1 = cpu_to_le64(dma_addr);
683 c.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
684
685 status = nvme_submit_admin_cmd(dev, &c, NULL);
686
687 /* XXX: Assuming first range for now */
688 if (status)
689 status = -EIO;
690 else if (copy_to_user(addr, page, 64))
691 status = -EFAULT;
692
693 dma_free_coherent(&dev->pci_dev->dev, 4096, page, dma_addr);
694
695 return status;
696}
697
698static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
699 unsigned long arg)
700{
701 struct nvme_ns *ns = bdev->bd_disk->private_data;
702
703 switch (cmd) {
704 case NVME_IOCTL_IDENTIFY_NS:
705 return nvme_identify(ns, (void __user *)arg, 0);
706 case NVME_IOCTL_IDENTIFY_CTRL:
707 return nvme_identify(ns, (void __user *)arg, 1);
708 case NVME_IOCTL_GET_RANGE_TYPE:
709 return nvme_get_range_type(ns, (void __user *)arg);
710 default:
711 return -ENOTTY;
712 }
713}
714
715static const struct block_device_operations nvme_fops = {
716 .owner = THIS_MODULE,
717 .ioctl = nvme_ioctl,
718};
719
720static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int index,
721 struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
722{
723 struct nvme_ns *ns;
724 struct gendisk *disk;
725 int lbaf;
726
727 if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
728 return NULL;
729
730 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
731 if (!ns)
732 return NULL;
733 ns->queue = blk_alloc_queue(GFP_KERNEL);
734 if (!ns->queue)
735 goto out_free_ns;
736 ns->queue->queue_flags = QUEUE_FLAG_DEFAULT | QUEUE_FLAG_NOMERGES |
737 QUEUE_FLAG_NONROT | QUEUE_FLAG_DISCARD;
738 blk_queue_make_request(ns->queue, nvme_make_request);
739 ns->dev = dev;
740 ns->queue->queuedata = ns;
741
742 disk = alloc_disk(NVME_MINORS);
743 if (!disk)
744 goto out_free_queue;
745 ns->ns_id = index;
746 ns->disk = disk;
747 lbaf = id->flbas & 0xf;
748 ns->lba_shift = id->lbaf[lbaf].ds;
749
750 disk->major = nvme_major;
751 disk->minors = NVME_MINORS;
752 disk->first_minor = NVME_MINORS * index;
753 disk->fops = &nvme_fops;
754 disk->private_data = ns;
755 disk->queue = ns->queue;
756 sprintf(disk->disk_name, "nvme%dn%d", dev->instance, index);
757 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
758
759 return ns;
760
761 out_free_queue:
762 blk_cleanup_queue(ns->queue);
763 out_free_ns:
764 kfree(ns);
765 return NULL;
766}
767
768static void nvme_ns_free(struct nvme_ns *ns)
769{
770 put_disk(ns->disk);
771 blk_cleanup_queue(ns->queue);
772 kfree(ns);
773}
774
775static int set_queue_count(struct nvme_dev *dev, int sq_count, int cq_count)
776{
777 int status;
778 u32 result;
779 struct nvme_command c;
780 u32 q_count = (sq_count - 1) | ((cq_count - 1) << 16);
781
782 memset(&c, 0, sizeof(c));
783 c.features.opcode = nvme_admin_get_features;
784 c.features.fid = cpu_to_le32(NVME_FEAT_NUM_QUEUES);
785 c.features.dword11 = cpu_to_le32(q_count);
786
787 status = nvme_submit_admin_cmd(dev, &c, &result);
788 if (status)
789 return -EIO;
790 return min(result & 0xffff, result >> 16) + 1;
791}
792
793/* XXX: Create per-CPU queues */
794static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
795{
796 int this_cpu;
797
798 set_queue_count(dev, 1, 1);
799
800 this_cpu = get_cpu();
801 dev->queues[1] = nvme_create_queue(dev, 1, NVME_Q_DEPTH, this_cpu);
802 put_cpu();
803 if (!dev->queues[1])
804 return -ENOMEM;
805 dev->queue_count++;
806
807 return 0;
808}
809
810static void nvme_free_queues(struct nvme_dev *dev)
811{
812 int i;
813
814 for (i = dev->queue_count - 1; i >= 0; i--)
815 nvme_free_queue(dev, i);
816}
817
818static int __devinit nvme_dev_add(struct nvme_dev *dev)
819{
820 int res, nn, i;
821 struct nvme_ns *ns, *next;
822 void *id;
823 dma_addr_t dma_addr;
824 struct nvme_command cid, crt;
825
826 res = nvme_setup_io_queues(dev);
827 if (res)
828 return res;
829
830 /* XXX: Switch to a SG list once prp2 works */
831 id = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
832 GFP_KERNEL);
833
834 memset(&cid, 0, sizeof(cid));
835 cid.identify.opcode = nvme_admin_identify;
836 cid.identify.nsid = 0;
837 cid.identify.prp1 = cpu_to_le64(dma_addr);
838 cid.identify.cns = cpu_to_le32(1);
839
840 res = nvme_submit_admin_cmd(dev, &cid, NULL);
841 if (res) {
842 res = -EIO;
843 goto out_free;
844 }
845
846 nn = le32_to_cpup(&((struct nvme_id_ctrl *)id)->nn);
847
848 cid.identify.cns = 0;
849 memset(&crt, 0, sizeof(crt));
850 crt.features.opcode = nvme_admin_get_features;
851 crt.features.prp1 = cpu_to_le64(dma_addr + 4096);
852 crt.features.fid = cpu_to_le32(NVME_FEAT_LBA_RANGE);
853
854 for (i = 0; i < nn; i++) {
855 cid.identify.nsid = cpu_to_le32(i);
856 res = nvme_submit_admin_cmd(dev, &cid, NULL);
857 if (res)
858 continue;
859
860 if (((struct nvme_id_ns *)id)->ncap == 0)
861 continue;
862
863 crt.features.nsid = cpu_to_le32(i);
864 res = nvme_submit_admin_cmd(dev, &crt, NULL);
865 if (res)
866 continue;
867
868 ns = nvme_alloc_ns(dev, i, id, id + 4096);
869 if (ns)
870 list_add_tail(&ns->list, &dev->namespaces);
871 }
872 list_for_each_entry(ns, &dev->namespaces, list)
873 add_disk(ns->disk);
874
875 dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
876 return 0;
877
878 out_free:
879 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
880 list_del(&ns->list);
881 nvme_ns_free(ns);
882 }
883
884 dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
885 return res;
886}
887
888static int nvme_dev_remove(struct nvme_dev *dev)
889{
890 struct nvme_ns *ns, *next;
891
892 /* TODO: wait all I/O finished or cancel them */
893
894 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
895 list_del(&ns->list);
896 del_gendisk(ns->disk);
897 nvme_ns_free(ns);
898 }
899
900 nvme_free_queues(dev);
901
902 return 0;
903}
904
905/* XXX: Use an ida or something to let remove / add work correctly */
906static void nvme_set_instance(struct nvme_dev *dev)
907{
908 static int instance;
909 dev->instance = instance++;
910}
911
912static void nvme_release_instance(struct nvme_dev *dev)
913{
914}
915
916static int __devinit nvme_probe(struct pci_dev *pdev,
917 const struct pci_device_id *id)
918{
919 int result = -ENOMEM;
920 struct nvme_dev *dev;
921
922 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
923 if (!dev)
924 return -ENOMEM;
925 dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
926 GFP_KERNEL);
927 if (!dev->entry)
928 goto free;
929 dev->queues = kcalloc(2, sizeof(void *), GFP_KERNEL);
930 if (!dev->queues)
931 goto free;
932
933 INIT_LIST_HEAD(&dev->namespaces);
934 dev->pci_dev = pdev;
935 pci_set_drvdata(pdev, dev);
936 dma_set_mask(&dev->pci_dev->dev, DMA_BIT_MASK(64));
937 nvme_set_instance(dev);
938
939 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
940 if (!dev->bar) {
941 result = -ENOMEM;
942 goto disable;
943 }
944
945 result = nvme_configure_admin_queue(dev);
946 if (result)
947 goto unmap;
948 dev->queue_count++;
949
950 result = nvme_dev_add(dev);
951 if (result)
952 goto delete;
953 return 0;
954
955 delete:
956 nvme_free_queues(dev);
957 unmap:
958 iounmap(dev->bar);
959 disable:
960 pci_disable_msix(pdev);
961 nvme_release_instance(dev);
962 free:
963 kfree(dev->queues);
964 kfree(dev->entry);
965 kfree(dev);
966 return result;
967}
968
969static void __devexit nvme_remove(struct pci_dev *pdev)
970{
971 struct nvme_dev *dev = pci_get_drvdata(pdev);
972 nvme_dev_remove(dev);
973 pci_disable_msix(pdev);
974 iounmap(dev->bar);
975 nvme_release_instance(dev);
976 kfree(dev->queues);
977 kfree(dev->entry);
978 kfree(dev);
979}
980
981/* These functions are yet to be implemented */
982#define nvme_error_detected NULL
983#define nvme_dump_registers NULL
984#define nvme_link_reset NULL
985#define nvme_slot_reset NULL
986#define nvme_error_resume NULL
987#define nvme_suspend NULL
988#define nvme_resume NULL
989
990static struct pci_error_handlers nvme_err_handler = {
991 .error_detected = nvme_error_detected,
992 .mmio_enabled = nvme_dump_registers,
993 .link_reset = nvme_link_reset,
994 .slot_reset = nvme_slot_reset,
995 .resume = nvme_error_resume,
996};
997
998/* Move to pci_ids.h later */
999#define PCI_CLASS_STORAGE_EXPRESS 0x010802
1000
1001static DEFINE_PCI_DEVICE_TABLE(nvme_id_table) = {
1002 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
1003 { 0, }
1004};
1005MODULE_DEVICE_TABLE(pci, nvme_id_table);
1006
1007static struct pci_driver nvme_driver = {
1008 .name = "nvme",
1009 .id_table = nvme_id_table,
1010 .probe = nvme_probe,
1011 .remove = __devexit_p(nvme_remove),
1012 .suspend = nvme_suspend,
1013 .resume = nvme_resume,
1014 .err_handler = &nvme_err_handler,
1015};
1016
1017static int __init nvme_init(void)
1018{
1019 int result;
1020
1021 nvme_major = register_blkdev(nvme_major, "nvme");
1022 if (nvme_major <= 0)
1023 return -EBUSY;
1024
1025 result = pci_register_driver(&nvme_driver);
1026 if (!result)
1027 return 0;
1028
1029 unregister_blkdev(nvme_major, "nvme");
1030 return result;
1031}
1032
1033static void __exit nvme_exit(void)
1034{
1035 pci_unregister_driver(&nvme_driver);
1036 unregister_blkdev(nvme_major, "nvme");
1037}
1038
1039MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
1040MODULE_LICENSE("GPL");
1041MODULE_VERSION("0.1");
1042module_init(nvme_init);
1043module_exit(nvme_exit);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
new file mode 100644
index 000000000000..9ba53584f722
--- /dev/null
+++ b/include/linux/nvme.h
@@ -0,0 +1,343 @@
1/*
2 * Definitions for the NVM Express interface
3 * Copyright (c) 2011, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#ifndef _LINUX_NVME_H
20#define _LINUX_NVME_H
21
22#include <linux/types.h>
23
24struct nvme_bar {
25 __u64 cap; /* Controller Capabilities */
26 __u32 vs; /* Version */
27 __u32 ims; /* Interrupt Mask Set */
28 __u32 imc; /* Interrupt Mask Clear */
29 __u32 cc; /* Controller Configuration */
30 __u32 csts; /* Controller Status */
31 __u32 aqa; /* Admin Queue Attributes */
32 __u64 asq; /* Admin SQ Base Address */
33 __u64 acq; /* Admin CQ Base Address */
34};
35
36enum {
37 NVME_CC_ENABLE = 1 << 0,
38 NVME_CC_CSS_NVM = 0 << 4,
39 NVME_CC_MPS_SHIFT = 7,
40 NVME_CC_ARB_RR = 0 << 11,
41 NVME_CC_ARB_WRRU = 1 << 11,
42 NVME_CC_ARB_VS = 3 << 11,
43 NVME_CC_SHN_NONE = 0 << 13,
44 NVME_CC_SHN_NORMAL = 1 << 13,
45 NVME_CC_SHN_ABRUPT = 2 << 13,
46 NVME_CSTS_RDY = 1 << 0,
47 NVME_CSTS_CFS = 1 << 1,
48 NVME_CSTS_SHST_NORMAL = 0 << 2,
49 NVME_CSTS_SHST_OCCUR = 1 << 2,
50 NVME_CSTS_SHST_CMPLT = 2 << 2,
51};
52
53#define NVME_VS(major, minor) (major << 16 | minor)
54
55struct nvme_id_ctrl {
56 __le16 vid;
57 __le16 ssvid;
58 char sn[20];
59 char mn[40];
60 char fr[8];
61 __le32 nn;
62 __u8 rab;
63 __u8 rsvd77[178];
64 __le16 oacs;
65 __u8 acl;
66 __u8 aerl;
67 __u8 frmw;
68 __u8 lpa;
69 __u8 elpe;
70 __u8 npss;
71 __u8 rsvd264[248];
72 __le64 psd[32];
73 __le16 oncs;
74 __le16 fuses;
75 __u8 fna;
76 __u8 vwc;
77 __le16 awun;
78 __le16 awupf;
79 __u8 rsvd778[246];
80 __u8 cmdset[2048];
81 __u8 vs[1024];
82};
83
84struct nvme_lbaf {
85 __le16 ms;
86 __u8 ds;
87 __u8 rp;
88};
89
90struct nvme_id_ns {
91 __le64 nsze;
92 __le64 ncap;
93 __le64 nuse;
94 __u8 nsfeat;
95 __u8 nlbaf;
96 __u8 flbas;
97 __u8 mc;
98 __u8 dpc;
99 __u8 dps;
100 __u8 rsvd30[98];
101 struct nvme_lbaf lbaf[16];
102 __u8 rsvd192[192];
103 __u8 vs[3712];
104};
105
106enum {
107 NVME_NS_FEAT_THIN = 1 << 0,
108 NVME_LBAF_RP_BEST = 0,
109 NVME_LBAF_RP_BETTER = 1,
110 NVME_LBAF_RP_GOOD = 2,
111 NVME_LBAF_RP_DEGRADED = 3,
112};
113
114struct nvme_lba_range_type {
115 __u8 type;
116 __u8 attributes;
117 __u8 rsvd2[14];
118 __u64 slba;
119 __u64 nlb;
120 __u8 guid[16];
121 __u8 rsvd48[16];
122};
123
124enum {
125 NVME_LBART_TYPE_FS = 0x01,
126 NVME_LBART_TYPE_RAID = 0x02,
127 NVME_LBART_TYPE_CACHE = 0x03,
128 NVME_LBART_TYPE_SWAP = 0x04,
129
130 NVME_LBART_ATTRIB_TEMP = 1 << 0,
131 NVME_LBART_ATTRIB_HIDE = 1 << 1,
132};
133
134/* I/O commands */
135
136enum nvme_opcode {
137 nvme_cmd_flush = 0x00,
138 nvme_cmd_write = 0x01,
139 nvme_cmd_read = 0x02,
140 nvme_cmd_write_uncor = 0x04,
141 nvme_cmd_compare = 0x05,
142 nvme_cmd_dsm = 0x09,
143};
144
145struct nvme_rw_command {
146 __u8 opcode;
147 __u8 flags;
148 __u16 command_id;
149 __le32 nsid;
150 __u64 rsvd2;
151 __le64 metadata;
152 __le64 prp1;
153 __le64 prp2;
154 __le64 slba;
155 __le16 length;
156 __le16 control;
157 __le32 dsmgmt;
158 __le32 reftag;
159 __le16 apptag;
160 __le16 appmask;
161};
162
163enum {
164 NVME_RW_LR = 1 << 15,
165 NVME_RW_FUA = 1 << 14,
166 NVME_RW_DSM_FREQ_UNSPEC = 0,
167 NVME_RW_DSM_FREQ_TYPICAL = 1,
168 NVME_RW_DSM_FREQ_RARE = 2,
169 NVME_RW_DSM_FREQ_READS = 3,
170 NVME_RW_DSM_FREQ_WRITES = 4,
171 NVME_RW_DSM_FREQ_RW = 5,
172 NVME_RW_DSM_FREQ_ONCE = 6,
173 NVME_RW_DSM_FREQ_PREFETCH = 7,
174 NVME_RW_DSM_FREQ_TEMP = 8,
175 NVME_RW_DSM_LATENCY_NONE = 0 << 4,
176 NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
177 NVME_RW_DSM_LATENCY_NORM = 2 << 4,
178 NVME_RW_DSM_LATENCY_LOW = 3 << 4,
179 NVME_RW_DSM_SEQ_REQ = 1 << 6,
180 NVME_RW_DSM_COMPRESSED = 1 << 7,
181};
182
183/* Admin commands */
184
185enum nvme_admin_opcode {
186 nvme_admin_delete_sq = 0x00,
187 nvme_admin_create_sq = 0x01,
188 nvme_admin_get_features = 0x02,
189 nvme_admin_delete_cq = 0x04,
190 nvme_admin_create_cq = 0x05,
191 nvme_admin_identify = 0x06,
192 nvme_admin_abort_cmd = 0x08,
193 nvme_admin_set_features = 0x09,
194 nvme_admin_get_log_page = 0x0a,
195 nvme_admin_async_event = 0x0c,
196 nvme_admin_download_fw = 0x0d,
197 nvme_admin_security_recv = 0x0e,
198 nvme_admin_format_nvm = 0x10,
199 nvme_admin_security_send = 0x11,
200 nvme_admin_activate_fw = 0x14,
201};
202
203enum {
204 NVME_QUEUE_PHYS_CONTIG = (1 << 0),
205 NVME_CQ_IRQ_ENABLED = (1 << 1),
206 NVME_SQ_PRIO_URGENT = (0 << 1),
207 NVME_SQ_PRIO_HIGH = (1 << 1),
208 NVME_SQ_PRIO_MEDIUM = (2 << 1),
209 NVME_SQ_PRIO_LOW = (3 << 1),
210 NVME_FEAT_ARBITRATION = 0x01,
211 NVME_FEAT_POWER_MGMT = 0x02,
212 NVME_FEAT_LBA_RANGE = 0x03,
213 NVME_FEAT_TEMP_THRESH = 0x04,
214 NVME_FEAT_ERR_RECOVERY = 0x05,
215 NVME_FEAT_VOLATILE_WC = 0x06,
216 NVME_FEAT_NUM_QUEUES = 0x07,
217 NVME_FEAT_IRQ_COALESCE = 0x08,
218 NVME_FEAT_IRQ_CONFIG = 0x09,
219 NVME_FEAT_WRITE_ATOMIC = 0x0a,
220 NVME_FEAT_ASYNC_EVENT = 0x0b,
221 NVME_FEAT_SW_PROGRESS = 0x0c,
222};
223
224struct nvme_identify {
225 __u8 opcode;
226 __u8 flags;
227 __u16 command_id;
228 __le32 nsid;
229 __u64 rsvd2[2];
230 __le64 prp1;
231 __le64 prp2;
232 __le32 cns;
233 __u32 rsvd11[5];
234};
235
236struct nvme_features {
237 __u8 opcode;
238 __u8 flags;
239 __u16 command_id;
240 __le32 nsid;
241 __u64 rsvd2[2];
242 __le64 prp1;
243 __le64 prp2;
244 __le32 fid;
245 __le32 dword11;
246 __u32 rsvd12[4];
247};
248
249struct nvme_create_cq {
250 __u8 opcode;
251 __u8 flags;
252 __u16 command_id;
253 __le32 rsvd1[5];
254 __le64 prp1;
255 __u64 rsvd8;
256 __le16 cqid;
257 __le16 qsize;
258 __le16 cq_flags;
259 __le16 irq_vector;
260 __u32 rsvd12[4];
261};
262
263struct nvme_create_sq {
264 __u8 opcode;
265 __u8 flags;
266 __u16 command_id;
267 __le32 rsvd1[5];
268 __le64 prp1;
269 __u64 rsvd8;
270 __le16 sqid;
271 __le16 qsize;
272 __le16 sq_flags;
273 __le16 cqid;
274 __le32 rsvd12[4];
275};
276
277struct nvme_delete_queue {
278 __u8 opcode;
279 __u8 flags;
280 __u16 command_id;
281 __u32 rsvd1[9];
282 __le16 qid;
283 __le16 rsvd10;
284 __le32 rsvd11[5];
285};
286
287struct nvme_common_command {
288 __u8 opcode;
289 __u8 flags;
290 __u16 command_id;
291 __le32 nsid;
292 __u32 rsvd2[14];
293};
294
295struct nvme_command {
296 union {
297 struct nvme_common_command common;
298 struct nvme_rw_command rw;
299 struct nvme_identify identify;
300 struct nvme_features features;
301 struct nvme_create_cq create_cq;
302 struct nvme_create_sq create_sq;
303 struct nvme_delete_queue delete_queue;
304 };
305};
306
307/* XXX: Sync with spec */
308enum {
309 NVME_SC_SUCCESS = 0x0,
310 NVME_SC_INVALID_OPCODE = 0x1,
311 NVME_SC_INVALID_FIELD = 0x2,
312 NVME_SC_CMDID_CONFLICT = 0x3,
313 NVME_SC_DATA_XFER_ERROR = 0x4,
314 NVME_SC_POWER_LOSS = 0x5,
315 NVME_SC_INTERNAL = 0x6,
316 NVME_SC_ABORT_REQ = 0x7,
317 NVME_SC_ABORT_QUEUE = 0x8,
318 NVME_SC_FUSED_FAIL = 0x9,
319 NVME_SC_FUSED_MISSING = 0xa,
320 NVME_SC_LBA_RANGE = 0x80,
321 NVME_SC_CAP_EXCEEDED = 0x81,
322 NVME_SC_NS_NOT_READY = 0x82,
323 NVME_SC_CQ_INVALID = 0x100,
324 NVME_SC_QID_INVALID = 0x101,
325 NVME_SC_QUEUE_SIZE = 0x102,
326 NVME_SC_WRITE_FAULT = 0x280,
327 NVME_SC_READ_ERROR = 0x281,
328};
329
330struct nvme_completion {
331 __le32 result; /* Used by admin commands to return data */
332 __le32 rsvd;
333 __le16 sq_head; /* how much of this queue may be reclaimed */
334 __le16 sq_id; /* submission queue that generated this entry */
335 __u16 command_id; /* of the command which completed */
336 __le16 status; /* did the command fail, and if so, why? */
337};
338
339#define NVME_IOCTL_IDENTIFY_NS _IOW('N', 0x40, struct nvme_id_ns)
340#define NVME_IOCTL_IDENTIFY_CTRL _IOW('N', 0x41, struct nvme_id_ctrl)
341#define NVME_IOCTL_GET_RANGE_TYPE _IOW('N', 0x42, struct nvme_lba_range_type)
342
343#endif /* _LINUX_NVME_H */