diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-07 23:19:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-07 23:19:02 -0400 |
commit | b409624ad5a99c2e84df6657bd0f7931ac470d2d (patch) | |
tree | a4d2197ed560300b831504789744fd10a3c58039 | |
parent | c4c17252283a13c0d63a8d9df828da109c116411 (diff) | |
parent | d82e8bfdef9afae83b894be49af4644d9ac3c359 (diff) |
Merge git://git.infradead.org/users/willy/linux-nvme
Pull NVM Express driver update from Matthew Wilcox.
* git://git.infradead.org/users/willy/linux-nvme:
NVMe: Merge issue on character device bring-up
NVMe: Handle ioremap failure
NVMe: Add pci suspend/resume driver callbacks
NVMe: Use normal shutdown
NVMe: Separate controller init from disk discovery
NVMe: Separate queue alloc/free from create/delete
NVMe: Group pci related actions in functions
NVMe: Disk stats for read/write commands only
NVMe: Bring up cdev on set feature failure
NVMe: Fix checkpatch issues
NVMe: Namespace IDs are unsigned
NVMe: Update nvme_id_power_state with latest spec
NVMe: Split header file into user-visible and kernel-visible pieces
NVMe: Call nvme_process_cq from submission path
NVMe: Remove "process_cq did something" message
NVMe: Return correct value from interrupt handler
NVMe: Disk IO statistics
NVMe: Restructure MSI / MSI-X setup
NVMe: Use kzalloc instead of kmalloc+memset
-rw-r--r-- | drivers/block/nvme-core.c | 585 | ||||
-rw-r--r-- | drivers/block/nvme-scsi.c | 24 | ||||
-rw-r--r-- | include/linux/nvme.h | 466 | ||||
-rw-r--r-- | include/uapi/linux/Kbuild | 1 | ||||
-rw-r--r-- | include/uapi/linux/nvme.h | 477 |
5 files changed, 895 insertions, 658 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index ce79a590b45b..da52092980e2 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/moduleparam.h> | 36 | #include <linux/moduleparam.h> |
37 | #include <linux/pci.h> | 37 | #include <linux/pci.h> |
38 | #include <linux/poison.h> | 38 | #include <linux/poison.h> |
39 | #include <linux/ptrace.h> | ||
39 | #include <linux/sched.h> | 40 | #include <linux/sched.h> |
40 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
41 | #include <linux/types.h> | 42 | #include <linux/types.h> |
@@ -79,7 +80,9 @@ struct nvme_queue { | |||
79 | u16 sq_head; | 80 | u16 sq_head; |
80 | u16 sq_tail; | 81 | u16 sq_tail; |
81 | u16 cq_head; | 82 | u16 cq_head; |
82 | u16 cq_phase; | 83 | u8 cq_phase; |
84 | u8 cqe_seen; | ||
85 | u8 q_suspended; | ||
83 | unsigned long cmdid_data[]; | 86 | unsigned long cmdid_data[]; |
84 | }; | 87 | }; |
85 | 88 | ||
@@ -115,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) | |||
115 | return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; | 118 | return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; |
116 | } | 119 | } |
117 | 120 | ||
121 | static unsigned nvme_queue_extra(int depth) | ||
122 | { | ||
123 | return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info)); | ||
124 | } | ||
125 | |||
118 | /** | 126 | /** |
119 | * alloc_cmdid() - Allocate a Command ID | 127 | * alloc_cmdid() - Allocate a Command ID |
120 | * @nvmeq: The queue that will be used for this command | 128 | * @nvmeq: The queue that will be used for this command |
@@ -285,6 +293,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) | |||
285 | iod->npages = -1; | 293 | iod->npages = -1; |
286 | iod->length = nbytes; | 294 | iod->length = nbytes; |
287 | iod->nents = 0; | 295 | iod->nents = 0; |
296 | iod->start_time = jiffies; | ||
288 | } | 297 | } |
289 | 298 | ||
290 | return iod; | 299 | return iod; |
@@ -308,6 +317,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) | |||
308 | kfree(iod); | 317 | kfree(iod); |
309 | } | 318 | } |
310 | 319 | ||
320 | static void nvme_start_io_acct(struct bio *bio) | ||
321 | { | ||
322 | struct gendisk *disk = bio->bi_bdev->bd_disk; | ||
323 | const int rw = bio_data_dir(bio); | ||
324 | int cpu = part_stat_lock(); | ||
325 | part_round_stats(cpu, &disk->part0); | ||
326 | part_stat_inc(cpu, &disk->part0, ios[rw]); | ||
327 | part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); | ||
328 | part_inc_in_flight(&disk->part0, rw); | ||
329 | part_stat_unlock(); | ||
330 | } | ||
331 | |||
332 | static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) | ||
333 | { | ||
334 | struct gendisk *disk = bio->bi_bdev->bd_disk; | ||
335 | const int rw = bio_data_dir(bio); | ||
336 | unsigned long duration = jiffies - start_time; | ||
337 | int cpu = part_stat_lock(); | ||
338 | part_stat_add(cpu, &disk->part0, ticks[rw], duration); | ||
339 | part_round_stats(cpu, &disk->part0); | ||
340 | part_dec_in_flight(&disk->part0, rw); | ||
341 | part_stat_unlock(); | ||
342 | } | ||
343 | |||
311 | static void bio_completion(struct nvme_dev *dev, void *ctx, | 344 | static void bio_completion(struct nvme_dev *dev, void *ctx, |
312 | struct nvme_completion *cqe) | 345 | struct nvme_completion *cqe) |
313 | { | 346 | { |
@@ -315,9 +348,11 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, | |||
315 | struct bio *bio = iod->private; | 348 | struct bio *bio = iod->private; |
316 | u16 status = le16_to_cpup(&cqe->status) >> 1; | 349 | u16 status = le16_to_cpup(&cqe->status) >> 1; |
317 | 350 | ||
318 | if (iod->nents) | 351 | if (iod->nents) { |
319 | dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, | 352 | dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, |
320 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 353 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); |
354 | nvme_end_io_acct(bio, iod->start_time); | ||
355 | } | ||
321 | nvme_free_iod(dev, iod); | 356 | nvme_free_iod(dev, iod); |
322 | if (status) | 357 | if (status) |
323 | bio_endio(bio, -EIO); | 358 | bio_endio(bio, -EIO); |
@@ -422,10 +457,8 @@ static void nvme_bio_pair_endio(struct bio *bio, int err) | |||
422 | 457 | ||
423 | if (atomic_dec_and_test(&bp->cnt)) { | 458 | if (atomic_dec_and_test(&bp->cnt)) { |
424 | bio_endio(bp->parent, bp->err); | 459 | bio_endio(bp->parent, bp->err); |
425 | if (bp->bv1) | 460 | kfree(bp->bv1); |
426 | kfree(bp->bv1); | 461 | kfree(bp->bv2); |
427 | if (bp->bv2) | ||
428 | kfree(bp->bv2); | ||
429 | kfree(bp); | 462 | kfree(bp); |
430 | } | 463 | } |
431 | } | 464 | } |
@@ -695,6 +728,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
695 | cmnd->rw.control = cpu_to_le16(control); | 728 | cmnd->rw.control = cpu_to_le16(control); |
696 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); | 729 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); |
697 | 730 | ||
731 | nvme_start_io_acct(bio); | ||
698 | if (++nvmeq->sq_tail == nvmeq->q_depth) | 732 | if (++nvmeq->sq_tail == nvmeq->q_depth) |
699 | nvmeq->sq_tail = 0; | 733 | nvmeq->sq_tail = 0; |
700 | writel(nvmeq->sq_tail, nvmeq->q_db); | 734 | writel(nvmeq->sq_tail, nvmeq->q_db); |
@@ -709,26 +743,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
709 | return result; | 743 | return result; |
710 | } | 744 | } |
711 | 745 | ||
712 | static void nvme_make_request(struct request_queue *q, struct bio *bio) | 746 | static int nvme_process_cq(struct nvme_queue *nvmeq) |
713 | { | ||
714 | struct nvme_ns *ns = q->queuedata; | ||
715 | struct nvme_queue *nvmeq = get_nvmeq(ns->dev); | ||
716 | int result = -EBUSY; | ||
717 | |||
718 | spin_lock_irq(&nvmeq->q_lock); | ||
719 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
720 | result = nvme_submit_bio_queue(nvmeq, ns, bio); | ||
721 | if (unlikely(result)) { | ||
722 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
723 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
724 | bio_list_add(&nvmeq->sq_cong, bio); | ||
725 | } | ||
726 | |||
727 | spin_unlock_irq(&nvmeq->q_lock); | ||
728 | put_nvmeq(nvmeq); | ||
729 | } | ||
730 | |||
731 | static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) | ||
732 | { | 747 | { |
733 | u16 head, phase; | 748 | u16 head, phase; |
734 | 749 | ||
@@ -758,13 +773,40 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) | |||
758 | * a big problem. | 773 | * a big problem. |
759 | */ | 774 | */ |
760 | if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) | 775 | if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) |
761 | return IRQ_NONE; | 776 | return 0; |
762 | 777 | ||
763 | writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); | 778 | writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); |
764 | nvmeq->cq_head = head; | 779 | nvmeq->cq_head = head; |
765 | nvmeq->cq_phase = phase; | 780 | nvmeq->cq_phase = phase; |
766 | 781 | ||
767 | return IRQ_HANDLED; | 782 | nvmeq->cqe_seen = 1; |
783 | return 1; | ||
784 | } | ||
785 | |||
786 | static void nvme_make_request(struct request_queue *q, struct bio *bio) | ||
787 | { | ||
788 | struct nvme_ns *ns = q->queuedata; | ||
789 | struct nvme_queue *nvmeq = get_nvmeq(ns->dev); | ||
790 | int result = -EBUSY; | ||
791 | |||
792 | if (!nvmeq) { | ||
793 | put_nvmeq(NULL); | ||
794 | bio_endio(bio, -EIO); | ||
795 | return; | ||
796 | } | ||
797 | |||
798 | spin_lock_irq(&nvmeq->q_lock); | ||
799 | if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) | ||
800 | result = nvme_submit_bio_queue(nvmeq, ns, bio); | ||
801 | if (unlikely(result)) { | ||
802 | if (bio_list_empty(&nvmeq->sq_cong)) | ||
803 | add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); | ||
804 | bio_list_add(&nvmeq->sq_cong, bio); | ||
805 | } | ||
806 | |||
807 | nvme_process_cq(nvmeq); | ||
808 | spin_unlock_irq(&nvmeq->q_lock); | ||
809 | put_nvmeq(nvmeq); | ||
768 | } | 810 | } |
769 | 811 | ||
770 | static irqreturn_t nvme_irq(int irq, void *data) | 812 | static irqreturn_t nvme_irq(int irq, void *data) |
@@ -772,7 +814,9 @@ static irqreturn_t nvme_irq(int irq, void *data) | |||
772 | irqreturn_t result; | 814 | irqreturn_t result; |
773 | struct nvme_queue *nvmeq = data; | 815 | struct nvme_queue *nvmeq = data; |
774 | spin_lock(&nvmeq->q_lock); | 816 | spin_lock(&nvmeq->q_lock); |
775 | result = nvme_process_cq(nvmeq); | 817 | nvme_process_cq(nvmeq); |
818 | result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE; | ||
819 | nvmeq->cqe_seen = 0; | ||
776 | spin_unlock(&nvmeq->q_lock); | 820 | spin_unlock(&nvmeq->q_lock); |
777 | return result; | 821 | return result; |
778 | } | 822 | } |
@@ -986,8 +1030,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | |||
986 | } | 1030 | } |
987 | } | 1031 | } |
988 | 1032 | ||
989 | static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | 1033 | static void nvme_free_queue(struct nvme_queue *nvmeq) |
990 | { | 1034 | { |
1035 | spin_lock_irq(&nvmeq->q_lock); | ||
1036 | while (bio_list_peek(&nvmeq->sq_cong)) { | ||
1037 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | ||
1038 | bio_endio(bio, -EIO); | ||
1039 | } | ||
1040 | spin_unlock_irq(&nvmeq->q_lock); | ||
1041 | |||
991 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | 1042 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), |
992 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | 1043 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); |
993 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | 1044 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), |
@@ -995,17 +1046,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | |||
995 | kfree(nvmeq); | 1046 | kfree(nvmeq); |
996 | } | 1047 | } |
997 | 1048 | ||
998 | static void nvme_free_queue(struct nvme_dev *dev, int qid) | 1049 | static void nvme_free_queues(struct nvme_dev *dev) |
1050 | { | ||
1051 | int i; | ||
1052 | |||
1053 | for (i = dev->queue_count - 1; i >= 0; i--) { | ||
1054 | nvme_free_queue(dev->queues[i]); | ||
1055 | dev->queue_count--; | ||
1056 | dev->queues[i] = NULL; | ||
1057 | } | ||
1058 | } | ||
1059 | |||
1060 | static void nvme_disable_queue(struct nvme_dev *dev, int qid) | ||
999 | { | 1061 | { |
1000 | struct nvme_queue *nvmeq = dev->queues[qid]; | 1062 | struct nvme_queue *nvmeq = dev->queues[qid]; |
1001 | int vector = dev->entry[nvmeq->cq_vector].vector; | 1063 | int vector = dev->entry[nvmeq->cq_vector].vector; |
1002 | 1064 | ||
1003 | spin_lock_irq(&nvmeq->q_lock); | 1065 | spin_lock_irq(&nvmeq->q_lock); |
1004 | nvme_cancel_ios(nvmeq, false); | 1066 | if (nvmeq->q_suspended) { |
1005 | while (bio_list_peek(&nvmeq->sq_cong)) { | 1067 | spin_unlock_irq(&nvmeq->q_lock); |
1006 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | 1068 | return; |
1007 | bio_endio(bio, -EIO); | ||
1008 | } | 1069 | } |
1070 | nvmeq->q_suspended = 1; | ||
1009 | spin_unlock_irq(&nvmeq->q_lock); | 1071 | spin_unlock_irq(&nvmeq->q_lock); |
1010 | 1072 | ||
1011 | irq_set_affinity_hint(vector, NULL); | 1073 | irq_set_affinity_hint(vector, NULL); |
@@ -1017,15 +1079,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) | |||
1017 | adapter_delete_cq(dev, qid); | 1079 | adapter_delete_cq(dev, qid); |
1018 | } | 1080 | } |
1019 | 1081 | ||
1020 | nvme_free_queue_mem(nvmeq); | 1082 | spin_lock_irq(&nvmeq->q_lock); |
1083 | nvme_process_cq(nvmeq); | ||
1084 | nvme_cancel_ios(nvmeq, false); | ||
1085 | spin_unlock_irq(&nvmeq->q_lock); | ||
1021 | } | 1086 | } |
1022 | 1087 | ||
1023 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | 1088 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, |
1024 | int depth, int vector) | 1089 | int depth, int vector) |
1025 | { | 1090 | { |
1026 | struct device *dmadev = &dev->pci_dev->dev; | 1091 | struct device *dmadev = &dev->pci_dev->dev; |
1027 | unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * | 1092 | unsigned extra = nvme_queue_extra(depth); |
1028 | sizeof(struct nvme_cmd_info)); | ||
1029 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); | 1093 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); |
1030 | if (!nvmeq) | 1094 | if (!nvmeq) |
1031 | return NULL; | 1095 | return NULL; |
@@ -1052,6 +1116,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | |||
1052 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; | 1116 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; |
1053 | nvmeq->q_depth = depth; | 1117 | nvmeq->q_depth = depth; |
1054 | nvmeq->cq_vector = vector; | 1118 | nvmeq->cq_vector = vector; |
1119 | nvmeq->q_suspended = 1; | ||
1120 | dev->queue_count++; | ||
1055 | 1121 | ||
1056 | return nvmeq; | 1122 | return nvmeq; |
1057 | 1123 | ||
@@ -1075,18 +1141,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, | |||
1075 | IRQF_DISABLED | IRQF_SHARED, name, nvmeq); | 1141 | IRQF_DISABLED | IRQF_SHARED, name, nvmeq); |
1076 | } | 1142 | } |
1077 | 1143 | ||
1078 | static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, | 1144 | static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) |
1079 | int cq_size, int vector) | ||
1080 | { | 1145 | { |
1081 | int result; | 1146 | struct nvme_dev *dev = nvmeq->dev; |
1082 | struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); | 1147 | unsigned extra = nvme_queue_extra(nvmeq->q_depth); |
1083 | 1148 | ||
1084 | if (!nvmeq) | 1149 | nvmeq->sq_tail = 0; |
1085 | return ERR_PTR(-ENOMEM); | 1150 | nvmeq->cq_head = 0; |
1151 | nvmeq->cq_phase = 1; | ||
1152 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; | ||
1153 | memset(nvmeq->cmdid_data, 0, extra); | ||
1154 | memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); | ||
1155 | nvme_cancel_ios(nvmeq, false); | ||
1156 | nvmeq->q_suspended = 0; | ||
1157 | } | ||
1158 | |||
1159 | static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) | ||
1160 | { | ||
1161 | struct nvme_dev *dev = nvmeq->dev; | ||
1162 | int result; | ||
1086 | 1163 | ||
1087 | result = adapter_alloc_cq(dev, qid, nvmeq); | 1164 | result = adapter_alloc_cq(dev, qid, nvmeq); |
1088 | if (result < 0) | 1165 | if (result < 0) |
1089 | goto free_nvmeq; | 1166 | return result; |
1090 | 1167 | ||
1091 | result = adapter_alloc_sq(dev, qid, nvmeq); | 1168 | result = adapter_alloc_sq(dev, qid, nvmeq); |
1092 | if (result < 0) | 1169 | if (result < 0) |
@@ -1096,19 +1173,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, | |||
1096 | if (result < 0) | 1173 | if (result < 0) |
1097 | goto release_sq; | 1174 | goto release_sq; |
1098 | 1175 | ||
1099 | return nvmeq; | 1176 | spin_lock(&nvmeq->q_lock); |
1177 | nvme_init_queue(nvmeq, qid); | ||
1178 | spin_unlock(&nvmeq->q_lock); | ||
1179 | |||
1180 | return result; | ||
1100 | 1181 | ||
1101 | release_sq: | 1182 | release_sq: |
1102 | adapter_delete_sq(dev, qid); | 1183 | adapter_delete_sq(dev, qid); |
1103 | release_cq: | 1184 | release_cq: |
1104 | adapter_delete_cq(dev, qid); | 1185 | adapter_delete_cq(dev, qid); |
1105 | free_nvmeq: | 1186 | return result; |
1106 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | ||
1107 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
1108 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
1109 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
1110 | kfree(nvmeq); | ||
1111 | return ERR_PTR(result); | ||
1112 | } | 1187 | } |
1113 | 1188 | ||
1114 | static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) | 1189 | static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) |
@@ -1152,6 +1227,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) | |||
1152 | return nvme_wait_ready(dev, cap, true); | 1227 | return nvme_wait_ready(dev, cap, true); |
1153 | } | 1228 | } |
1154 | 1229 | ||
1230 | static int nvme_shutdown_ctrl(struct nvme_dev *dev) | ||
1231 | { | ||
1232 | unsigned long timeout; | ||
1233 | u32 cc; | ||
1234 | |||
1235 | cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL; | ||
1236 | writel(cc, &dev->bar->cc); | ||
1237 | |||
1238 | timeout = 2 * HZ + jiffies; | ||
1239 | while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) != | ||
1240 | NVME_CSTS_SHST_CMPLT) { | ||
1241 | msleep(100); | ||
1242 | if (fatal_signal_pending(current)) | ||
1243 | return -EINTR; | ||
1244 | if (time_after(jiffies, timeout)) { | ||
1245 | dev_err(&dev->pci_dev->dev, | ||
1246 | "Device shutdown incomplete; abort shutdown\n"); | ||
1247 | return -ENODEV; | ||
1248 | } | ||
1249 | } | ||
1250 | |||
1251 | return 0; | ||
1252 | } | ||
1253 | |||
1155 | static int nvme_configure_admin_queue(struct nvme_dev *dev) | 1254 | static int nvme_configure_admin_queue(struct nvme_dev *dev) |
1156 | { | 1255 | { |
1157 | int result; | 1256 | int result; |
@@ -1159,16 +1258,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1159 | u64 cap = readq(&dev->bar->cap); | 1258 | u64 cap = readq(&dev->bar->cap); |
1160 | struct nvme_queue *nvmeq; | 1259 | struct nvme_queue *nvmeq; |
1161 | 1260 | ||
1162 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | ||
1163 | dev->db_stride = NVME_CAP_STRIDE(cap); | ||
1164 | |||
1165 | result = nvme_disable_ctrl(dev, cap); | 1261 | result = nvme_disable_ctrl(dev, cap); |
1166 | if (result < 0) | 1262 | if (result < 0) |
1167 | return result; | 1263 | return result; |
1168 | 1264 | ||
1169 | nvmeq = nvme_alloc_queue(dev, 0, 64, 0); | 1265 | nvmeq = dev->queues[0]; |
1170 | if (!nvmeq) | 1266 | if (!nvmeq) { |
1171 | return -ENOMEM; | 1267 | nvmeq = nvme_alloc_queue(dev, 0, 64, 0); |
1268 | if (!nvmeq) | ||
1269 | return -ENOMEM; | ||
1270 | dev->queues[0] = nvmeq; | ||
1271 | } | ||
1172 | 1272 | ||
1173 | aqa = nvmeq->q_depth - 1; | 1273 | aqa = nvmeq->q_depth - 1; |
1174 | aqa |= aqa << 16; | 1274 | aqa |= aqa << 16; |
@@ -1185,17 +1285,15 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1185 | 1285 | ||
1186 | result = nvme_enable_ctrl(dev, cap); | 1286 | result = nvme_enable_ctrl(dev, cap); |
1187 | if (result) | 1287 | if (result) |
1188 | goto free_q; | 1288 | return result; |
1189 | 1289 | ||
1190 | result = queue_request_irq(dev, nvmeq, "nvme admin"); | 1290 | result = queue_request_irq(dev, nvmeq, "nvme admin"); |
1191 | if (result) | 1291 | if (result) |
1192 | goto free_q; | 1292 | return result; |
1193 | |||
1194 | dev->queues[0] = nvmeq; | ||
1195 | return result; | ||
1196 | 1293 | ||
1197 | free_q: | 1294 | spin_lock(&nvmeq->q_lock); |
1198 | nvme_free_queue_mem(nvmeq); | 1295 | nvme_init_queue(nvmeq, 0); |
1296 | spin_unlock(&nvmeq->q_lock); | ||
1199 | return result; | 1297 | return result; |
1200 | } | 1298 | } |
1201 | 1299 | ||
@@ -1314,7 +1412,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1314 | c.rw.appmask = cpu_to_le16(io.appmask); | 1412 | c.rw.appmask = cpu_to_le16(io.appmask); |
1315 | 1413 | ||
1316 | if (meta_len) { | 1414 | if (meta_len) { |
1317 | meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, meta_len); | 1415 | meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, |
1416 | meta_len); | ||
1318 | if (IS_ERR(meta_iod)) { | 1417 | if (IS_ERR(meta_iod)) { |
1319 | status = PTR_ERR(meta_iod); | 1418 | status = PTR_ERR(meta_iod); |
1320 | meta_iod = NULL; | 1419 | meta_iod = NULL; |
@@ -1356,6 +1455,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1356 | put_nvmeq(nvmeq); | 1455 | put_nvmeq(nvmeq); |
1357 | if (length != (io.nblocks + 1) << ns->lba_shift) | 1456 | if (length != (io.nblocks + 1) << ns->lba_shift) |
1358 | status = -ENOMEM; | 1457 | status = -ENOMEM; |
1458 | else if (!nvmeq || nvmeq->q_suspended) | ||
1459 | status = -EBUSY; | ||
1359 | else | 1460 | else |
1360 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); | 1461 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); |
1361 | 1462 | ||
@@ -1453,6 +1554,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
1453 | 1554 | ||
1454 | switch (cmd) { | 1555 | switch (cmd) { |
1455 | case NVME_IOCTL_ID: | 1556 | case NVME_IOCTL_ID: |
1557 | force_successful_syscall_return(); | ||
1456 | return ns->ns_id; | 1558 | return ns->ns_id; |
1457 | case NVME_IOCTL_ADMIN_CMD: | 1559 | case NVME_IOCTL_ADMIN_CMD: |
1458 | return nvme_user_admin_cmd(ns->dev, (void __user *)arg); | 1560 | return nvme_user_admin_cmd(ns->dev, (void __user *)arg); |
@@ -1506,10 +1608,12 @@ static int nvme_kthread(void *data) | |||
1506 | if (!nvmeq) | 1608 | if (!nvmeq) |
1507 | continue; | 1609 | continue; |
1508 | spin_lock_irq(&nvmeq->q_lock); | 1610 | spin_lock_irq(&nvmeq->q_lock); |
1509 | if (nvme_process_cq(nvmeq)) | 1611 | if (nvmeq->q_suspended) |
1510 | printk("process_cq did something\n"); | 1612 | goto unlock; |
1613 | nvme_process_cq(nvmeq); | ||
1511 | nvme_cancel_ios(nvmeq, true); | 1614 | nvme_cancel_ios(nvmeq, true); |
1512 | nvme_resubmit_bios(nvmeq); | 1615 | nvme_resubmit_bios(nvmeq); |
1616 | unlock: | ||
1513 | spin_unlock_irq(&nvmeq->q_lock); | 1617 | spin_unlock_irq(&nvmeq->q_lock); |
1514 | } | 1618 | } |
1515 | } | 1619 | } |
@@ -1556,7 +1660,7 @@ static void nvme_config_discard(struct nvme_ns *ns) | |||
1556 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); | 1660 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); |
1557 | } | 1661 | } |
1558 | 1662 | ||
1559 | static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, | 1663 | static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, |
1560 | struct nvme_id_ns *id, struct nvme_lba_range_type *rt) | 1664 | struct nvme_id_ns *id, struct nvme_lba_range_type *rt) |
1561 | { | 1665 | { |
1562 | struct nvme_ns *ns; | 1666 | struct nvme_ns *ns; |
@@ -1631,14 +1735,19 @@ static int set_queue_count(struct nvme_dev *dev, int count) | |||
1631 | status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, | 1735 | status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, |
1632 | &result); | 1736 | &result); |
1633 | if (status) | 1737 | if (status) |
1634 | return -EIO; | 1738 | return status < 0 ? -EIO : -EBUSY; |
1635 | return min(result & 0xffff, result >> 16) + 1; | 1739 | return min(result & 0xffff, result >> 16) + 1; |
1636 | } | 1740 | } |
1637 | 1741 | ||
1742 | static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) | ||
1743 | { | ||
1744 | return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); | ||
1745 | } | ||
1746 | |||
1638 | static int nvme_setup_io_queues(struct nvme_dev *dev) | 1747 | static int nvme_setup_io_queues(struct nvme_dev *dev) |
1639 | { | 1748 | { |
1640 | struct pci_dev *pdev = dev->pci_dev; | 1749 | struct pci_dev *pdev = dev->pci_dev; |
1641 | int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count; | 1750 | int result, cpu, i, vecs, nr_io_queues, size, q_depth; |
1642 | 1751 | ||
1643 | nr_io_queues = num_online_cpus(); | 1752 | nr_io_queues = num_online_cpus(); |
1644 | result = set_queue_count(dev, nr_io_queues); | 1753 | result = set_queue_count(dev, nr_io_queues); |
@@ -1647,53 +1756,80 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1647 | if (result < nr_io_queues) | 1756 | if (result < nr_io_queues) |
1648 | nr_io_queues = result; | 1757 | nr_io_queues = result; |
1649 | 1758 | ||
1650 | q_count = nr_io_queues; | 1759 | size = db_bar_size(dev, nr_io_queues); |
1651 | /* Deregister the admin queue's interrupt */ | 1760 | if (size > 8192) { |
1652 | free_irq(dev->entry[0].vector, dev->queues[0]); | ||
1653 | |||
1654 | db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); | ||
1655 | if (db_bar_size > 8192) { | ||
1656 | iounmap(dev->bar); | 1761 | iounmap(dev->bar); |
1657 | dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size); | 1762 | do { |
1763 | dev->bar = ioremap(pci_resource_start(pdev, 0), size); | ||
1764 | if (dev->bar) | ||
1765 | break; | ||
1766 | if (!--nr_io_queues) | ||
1767 | return -ENOMEM; | ||
1768 | size = db_bar_size(dev, nr_io_queues); | ||
1769 | } while (1); | ||
1658 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | 1770 | dev->dbs = ((void __iomem *)dev->bar) + 4096; |
1659 | dev->queues[0]->q_db = dev->dbs; | 1771 | dev->queues[0]->q_db = dev->dbs; |
1660 | } | 1772 | } |
1661 | 1773 | ||
1662 | for (i = 0; i < nr_io_queues; i++) | 1774 | /* Deregister the admin queue's interrupt */ |
1775 | free_irq(dev->entry[0].vector, dev->queues[0]); | ||
1776 | |||
1777 | vecs = nr_io_queues; | ||
1778 | for (i = 0; i < vecs; i++) | ||
1663 | dev->entry[i].entry = i; | 1779 | dev->entry[i].entry = i; |
1664 | for (;;) { | 1780 | for (;;) { |
1665 | result = pci_enable_msix(pdev, dev->entry, nr_io_queues); | 1781 | result = pci_enable_msix(pdev, dev->entry, vecs); |
1666 | if (result == 0) { | 1782 | if (result <= 0) |
1667 | break; | ||
1668 | } else if (result > 0) { | ||
1669 | nr_io_queues = result; | ||
1670 | continue; | ||
1671 | } else { | ||
1672 | nr_io_queues = 0; | ||
1673 | break; | 1783 | break; |
1674 | } | 1784 | vecs = result; |
1675 | } | 1785 | } |
1676 | 1786 | ||
1677 | if (nr_io_queues == 0) { | 1787 | if (result < 0) { |
1678 | nr_io_queues = q_count; | 1788 | vecs = nr_io_queues; |
1789 | if (vecs > 32) | ||
1790 | vecs = 32; | ||
1679 | for (;;) { | 1791 | for (;;) { |
1680 | result = pci_enable_msi_block(pdev, nr_io_queues); | 1792 | result = pci_enable_msi_block(pdev, vecs); |
1681 | if (result == 0) { | 1793 | if (result == 0) { |
1682 | for (i = 0; i < nr_io_queues; i++) | 1794 | for (i = 0; i < vecs; i++) |
1683 | dev->entry[i].vector = i + pdev->irq; | 1795 | dev->entry[i].vector = i + pdev->irq; |
1684 | break; | 1796 | break; |
1685 | } else if (result > 0) { | 1797 | } else if (result < 0) { |
1686 | nr_io_queues = result; | 1798 | vecs = 1; |
1687 | continue; | ||
1688 | } else { | ||
1689 | nr_io_queues = 1; | ||
1690 | break; | 1799 | break; |
1691 | } | 1800 | } |
1801 | vecs = result; | ||
1692 | } | 1802 | } |
1693 | } | 1803 | } |
1694 | 1804 | ||
1805 | /* | ||
1806 | * Should investigate if there's a performance win from allocating | ||
1807 | * more queues than interrupt vectors; it might allow the submission | ||
1808 | * path to scale better, even if the receive path is limited by the | ||
1809 | * number of interrupts. | ||
1810 | */ | ||
1811 | nr_io_queues = vecs; | ||
1812 | |||
1695 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); | 1813 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); |
1696 | /* XXX: handle failure here */ | 1814 | if (result) { |
1815 | dev->queues[0]->q_suspended = 1; | ||
1816 | goto free_queues; | ||
1817 | } | ||
1818 | |||
1819 | /* Free previously allocated queues that are no longer usable */ | ||
1820 | spin_lock(&dev_list_lock); | ||
1821 | for (i = dev->queue_count - 1; i > nr_io_queues; i--) { | ||
1822 | struct nvme_queue *nvmeq = dev->queues[i]; | ||
1823 | |||
1824 | spin_lock(&nvmeq->q_lock); | ||
1825 | nvme_cancel_ios(nvmeq, false); | ||
1826 | spin_unlock(&nvmeq->q_lock); | ||
1827 | |||
1828 | nvme_free_queue(nvmeq); | ||
1829 | dev->queue_count--; | ||
1830 | dev->queues[i] = NULL; | ||
1831 | } | ||
1832 | spin_unlock(&dev_list_lock); | ||
1697 | 1833 | ||
1698 | cpu = cpumask_first(cpu_online_mask); | 1834 | cpu = cpumask_first(cpu_online_mask); |
1699 | for (i = 0; i < nr_io_queues; i++) { | 1835 | for (i = 0; i < nr_io_queues; i++) { |
@@ -1703,11 +1839,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1703 | 1839 | ||
1704 | q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, | 1840 | q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, |
1705 | NVME_Q_DEPTH); | 1841 | NVME_Q_DEPTH); |
1706 | for (i = 0; i < nr_io_queues; i++) { | 1842 | for (i = dev->queue_count - 1; i < nr_io_queues; i++) { |
1707 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); | 1843 | dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i); |
1708 | if (IS_ERR(dev->queues[i + 1])) | 1844 | if (!dev->queues[i + 1]) { |
1709 | return PTR_ERR(dev->queues[i + 1]); | 1845 | result = -ENOMEM; |
1710 | dev->queue_count++; | 1846 | goto free_queues; |
1847 | } | ||
1711 | } | 1848 | } |
1712 | 1849 | ||
1713 | for (; i < num_possible_cpus(); i++) { | 1850 | for (; i < num_possible_cpus(); i++) { |
@@ -1715,15 +1852,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1715 | dev->queues[i + 1] = dev->queues[target + 1]; | 1852 | dev->queues[i + 1] = dev->queues[target + 1]; |
1716 | } | 1853 | } |
1717 | 1854 | ||
1718 | return 0; | 1855 | for (i = 1; i < dev->queue_count; i++) { |
1719 | } | 1856 | result = nvme_create_queue(dev->queues[i], i); |
1857 | if (result) { | ||
1858 | for (--i; i > 0; i--) | ||
1859 | nvme_disable_queue(dev, i); | ||
1860 | goto free_queues; | ||
1861 | } | ||
1862 | } | ||
1720 | 1863 | ||
1721 | static void nvme_free_queues(struct nvme_dev *dev) | 1864 | return 0; |
1722 | { | ||
1723 | int i; | ||
1724 | 1865 | ||
1725 | for (i = dev->queue_count - 1; i >= 0; i--) | 1866 | free_queues: |
1726 | nvme_free_queue(dev, i); | 1867 | nvme_free_queues(dev); |
1868 | return result; | ||
1727 | } | 1869 | } |
1728 | 1870 | ||
1729 | /* | 1871 | /* |
@@ -1734,7 +1876,8 @@ static void nvme_free_queues(struct nvme_dev *dev) | |||
1734 | */ | 1876 | */ |
1735 | static int nvme_dev_add(struct nvme_dev *dev) | 1877 | static int nvme_dev_add(struct nvme_dev *dev) |
1736 | { | 1878 | { |
1737 | int res, nn, i; | 1879 | int res; |
1880 | unsigned nn, i; | ||
1738 | struct nvme_ns *ns; | 1881 | struct nvme_ns *ns; |
1739 | struct nvme_id_ctrl *ctrl; | 1882 | struct nvme_id_ctrl *ctrl; |
1740 | struct nvme_id_ns *id_ns; | 1883 | struct nvme_id_ns *id_ns; |
@@ -1742,10 +1885,6 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
1742 | dma_addr_t dma_addr; | 1885 | dma_addr_t dma_addr; |
1743 | int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; | 1886 | int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; |
1744 | 1887 | ||
1745 | res = nvme_setup_io_queues(dev); | ||
1746 | if (res) | ||
1747 | return res; | ||
1748 | |||
1749 | mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, | 1888 | mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, |
1750 | GFP_KERNEL); | 1889 | GFP_KERNEL); |
1751 | if (!mem) | 1890 | if (!mem) |
@@ -1796,23 +1935,86 @@ static int nvme_dev_add(struct nvme_dev *dev) | |||
1796 | return res; | 1935 | return res; |
1797 | } | 1936 | } |
1798 | 1937 | ||
1799 | static int nvme_dev_remove(struct nvme_dev *dev) | 1938 | static int nvme_dev_map(struct nvme_dev *dev) |
1800 | { | 1939 | { |
1801 | struct nvme_ns *ns, *next; | 1940 | int bars, result = -ENOMEM; |
1941 | struct pci_dev *pdev = dev->pci_dev; | ||
1942 | |||
1943 | if (pci_enable_device_mem(pdev)) | ||
1944 | return result; | ||
1945 | |||
1946 | dev->entry[0].vector = pdev->irq; | ||
1947 | pci_set_master(pdev); | ||
1948 | bars = pci_select_bars(pdev, IORESOURCE_MEM); | ||
1949 | if (pci_request_selected_regions(pdev, bars, "nvme")) | ||
1950 | goto disable_pci; | ||
1951 | |||
1952 | if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) | ||
1953 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | ||
1954 | else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) | ||
1955 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); | ||
1956 | else | ||
1957 | goto disable_pci; | ||
1958 | |||
1959 | pci_set_drvdata(pdev, dev); | ||
1960 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); | ||
1961 | if (!dev->bar) | ||
1962 | goto disable; | ||
1963 | |||
1964 | dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap)); | ||
1965 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | ||
1966 | |||
1967 | return 0; | ||
1968 | |||
1969 | disable: | ||
1970 | pci_release_regions(pdev); | ||
1971 | disable_pci: | ||
1972 | pci_disable_device(pdev); | ||
1973 | return result; | ||
1974 | } | ||
1975 | |||
1976 | static void nvme_dev_unmap(struct nvme_dev *dev) | ||
1977 | { | ||
1978 | if (dev->pci_dev->msi_enabled) | ||
1979 | pci_disable_msi(dev->pci_dev); | ||
1980 | else if (dev->pci_dev->msix_enabled) | ||
1981 | pci_disable_msix(dev->pci_dev); | ||
1982 | |||
1983 | if (dev->bar) { | ||
1984 | iounmap(dev->bar); | ||
1985 | dev->bar = NULL; | ||
1986 | } | ||
1987 | |||
1988 | pci_release_regions(dev->pci_dev); | ||
1989 | if (pci_is_enabled(dev->pci_dev)) | ||
1990 | pci_disable_device(dev->pci_dev); | ||
1991 | } | ||
1992 | |||
1993 | static void nvme_dev_shutdown(struct nvme_dev *dev) | ||
1994 | { | ||
1995 | int i; | ||
1996 | |||
1997 | for (i = dev->queue_count - 1; i >= 0; i--) | ||
1998 | nvme_disable_queue(dev, i); | ||
1802 | 1999 | ||
1803 | spin_lock(&dev_list_lock); | 2000 | spin_lock(&dev_list_lock); |
1804 | list_del(&dev->node); | 2001 | list_del_init(&dev->node); |
1805 | spin_unlock(&dev_list_lock); | 2002 | spin_unlock(&dev_list_lock); |
1806 | 2003 | ||
2004 | if (dev->bar) | ||
2005 | nvme_shutdown_ctrl(dev); | ||
2006 | nvme_dev_unmap(dev); | ||
2007 | } | ||
2008 | |||
2009 | static void nvme_dev_remove(struct nvme_dev *dev) | ||
2010 | { | ||
2011 | struct nvme_ns *ns, *next; | ||
2012 | |||
1807 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | 2013 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { |
1808 | list_del(&ns->list); | 2014 | list_del(&ns->list); |
1809 | del_gendisk(ns->disk); | 2015 | del_gendisk(ns->disk); |
1810 | nvme_ns_free(ns); | 2016 | nvme_ns_free(ns); |
1811 | } | 2017 | } |
1812 | |||
1813 | nvme_free_queues(dev); | ||
1814 | |||
1815 | return 0; | ||
1816 | } | 2018 | } |
1817 | 2019 | ||
1818 | static int nvme_setup_prp_pools(struct nvme_dev *dev) | 2020 | static int nvme_setup_prp_pools(struct nvme_dev *dev) |
@@ -1872,15 +2074,10 @@ static void nvme_free_dev(struct kref *kref) | |||
1872 | { | 2074 | { |
1873 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); | 2075 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); |
1874 | nvme_dev_remove(dev); | 2076 | nvme_dev_remove(dev); |
1875 | if (dev->pci_dev->msi_enabled) | 2077 | nvme_dev_shutdown(dev); |
1876 | pci_disable_msi(dev->pci_dev); | 2078 | nvme_free_queues(dev); |
1877 | else if (dev->pci_dev->msix_enabled) | ||
1878 | pci_disable_msix(dev->pci_dev); | ||
1879 | iounmap(dev->bar); | ||
1880 | nvme_release_instance(dev); | 2079 | nvme_release_instance(dev); |
1881 | nvme_release_prp_pools(dev); | 2080 | nvme_release_prp_pools(dev); |
1882 | pci_disable_device(dev->pci_dev); | ||
1883 | pci_release_regions(dev->pci_dev); | ||
1884 | kfree(dev->queues); | 2081 | kfree(dev->queues); |
1885 | kfree(dev->entry); | 2082 | kfree(dev->entry); |
1886 | kfree(dev); | 2083 | kfree(dev); |
@@ -1921,9 +2118,40 @@ static const struct file_operations nvme_dev_fops = { | |||
1921 | .compat_ioctl = nvme_dev_ioctl, | 2118 | .compat_ioctl = nvme_dev_ioctl, |
1922 | }; | 2119 | }; |
1923 | 2120 | ||
2121 | static int nvme_dev_start(struct nvme_dev *dev) | ||
2122 | { | ||
2123 | int result; | ||
2124 | |||
2125 | result = nvme_dev_map(dev); | ||
2126 | if (result) | ||
2127 | return result; | ||
2128 | |||
2129 | result = nvme_configure_admin_queue(dev); | ||
2130 | if (result) | ||
2131 | goto unmap; | ||
2132 | |||
2133 | spin_lock(&dev_list_lock); | ||
2134 | list_add(&dev->node, &dev_list); | ||
2135 | spin_unlock(&dev_list_lock); | ||
2136 | |||
2137 | result = nvme_setup_io_queues(dev); | ||
2138 | if (result && result != -EBUSY) | ||
2139 | goto disable; | ||
2140 | |||
2141 | return result; | ||
2142 | |||
2143 | disable: | ||
2144 | spin_lock(&dev_list_lock); | ||
2145 | list_del_init(&dev->node); | ||
2146 | spin_unlock(&dev_list_lock); | ||
2147 | unmap: | ||
2148 | nvme_dev_unmap(dev); | ||
2149 | return result; | ||
2150 | } | ||
2151 | |||
1924 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | 2152 | static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) |
1925 | { | 2153 | { |
1926 | int bars, result = -ENOMEM; | 2154 | int result = -ENOMEM; |
1927 | struct nvme_dev *dev; | 2155 | struct nvme_dev *dev; |
1928 | 2156 | ||
1929 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); | 2157 | dev = kzalloc(sizeof(*dev), GFP_KERNEL); |
@@ -1938,53 +2166,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
1938 | if (!dev->queues) | 2166 | if (!dev->queues) |
1939 | goto free; | 2167 | goto free; |
1940 | 2168 | ||
1941 | if (pci_enable_device_mem(pdev)) | ||
1942 | goto free; | ||
1943 | pci_set_master(pdev); | ||
1944 | bars = pci_select_bars(pdev, IORESOURCE_MEM); | ||
1945 | if (pci_request_selected_regions(pdev, bars, "nvme")) | ||
1946 | goto disable; | ||
1947 | |||
1948 | INIT_LIST_HEAD(&dev->namespaces); | 2169 | INIT_LIST_HEAD(&dev->namespaces); |
1949 | dev->pci_dev = pdev; | 2170 | dev->pci_dev = pdev; |
1950 | pci_set_drvdata(pdev, dev); | ||
1951 | |||
1952 | if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) | ||
1953 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | ||
1954 | else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) | ||
1955 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); | ||
1956 | else | ||
1957 | goto disable; | ||
1958 | |||
1959 | result = nvme_set_instance(dev); | 2171 | result = nvme_set_instance(dev); |
1960 | if (result) | 2172 | if (result) |
1961 | goto disable; | 2173 | goto free; |
1962 | |||
1963 | dev->entry[0].vector = pdev->irq; | ||
1964 | 2174 | ||
1965 | result = nvme_setup_prp_pools(dev); | 2175 | result = nvme_setup_prp_pools(dev); |
1966 | if (result) | 2176 | if (result) |
1967 | goto disable_msix; | 2177 | goto release; |
1968 | 2178 | ||
1969 | dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); | 2179 | result = nvme_dev_start(dev); |
1970 | if (!dev->bar) { | 2180 | if (result) { |
1971 | result = -ENOMEM; | 2181 | if (result == -EBUSY) |
1972 | goto disable_msix; | 2182 | goto create_cdev; |
2183 | goto release_pools; | ||
1973 | } | 2184 | } |
1974 | 2185 | ||
1975 | result = nvme_configure_admin_queue(dev); | ||
1976 | if (result) | ||
1977 | goto unmap; | ||
1978 | dev->queue_count++; | ||
1979 | |||
1980 | spin_lock(&dev_list_lock); | ||
1981 | list_add(&dev->node, &dev_list); | ||
1982 | spin_unlock(&dev_list_lock); | ||
1983 | |||
1984 | result = nvme_dev_add(dev); | 2186 | result = nvme_dev_add(dev); |
1985 | if (result) | 2187 | if (result) |
1986 | goto delete; | 2188 | goto shutdown; |
1987 | 2189 | ||
2190 | create_cdev: | ||
1988 | scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); | 2191 | scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); |
1989 | dev->miscdev.minor = MISC_DYNAMIC_MINOR; | 2192 | dev->miscdev.minor = MISC_DYNAMIC_MINOR; |
1990 | dev->miscdev.parent = &pdev->dev; | 2193 | dev->miscdev.parent = &pdev->dev; |
@@ -1999,24 +2202,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
1999 | 2202 | ||
2000 | remove: | 2203 | remove: |
2001 | nvme_dev_remove(dev); | 2204 | nvme_dev_remove(dev); |
2002 | delete: | 2205 | shutdown: |
2003 | spin_lock(&dev_list_lock); | 2206 | nvme_dev_shutdown(dev); |
2004 | list_del(&dev->node); | 2207 | release_pools: |
2005 | spin_unlock(&dev_list_lock); | ||
2006 | |||
2007 | nvme_free_queues(dev); | 2208 | nvme_free_queues(dev); |
2008 | unmap: | ||
2009 | iounmap(dev->bar); | ||
2010 | disable_msix: | ||
2011 | if (dev->pci_dev->msi_enabled) | ||
2012 | pci_disable_msi(dev->pci_dev); | ||
2013 | else if (dev->pci_dev->msix_enabled) | ||
2014 | pci_disable_msix(dev->pci_dev); | ||
2015 | nvme_release_instance(dev); | ||
2016 | nvme_release_prp_pools(dev); | 2209 | nvme_release_prp_pools(dev); |
2017 | disable: | 2210 | release: |
2018 | pci_disable_device(pdev); | 2211 | nvme_release_instance(dev); |
2019 | pci_release_regions(pdev); | ||
2020 | free: | 2212 | free: |
2021 | kfree(dev->queues); | 2213 | kfree(dev->queues); |
2022 | kfree(dev->entry); | 2214 | kfree(dev->entry); |
@@ -2037,8 +2229,30 @@ static void nvme_remove(struct pci_dev *pdev) | |||
2037 | #define nvme_link_reset NULL | 2229 | #define nvme_link_reset NULL |
2038 | #define nvme_slot_reset NULL | 2230 | #define nvme_slot_reset NULL |
2039 | #define nvme_error_resume NULL | 2231 | #define nvme_error_resume NULL |
2040 | #define nvme_suspend NULL | 2232 | |
2041 | #define nvme_resume NULL | 2233 | static int nvme_suspend(struct device *dev) |
2234 | { | ||
2235 | struct pci_dev *pdev = to_pci_dev(dev); | ||
2236 | struct nvme_dev *ndev = pci_get_drvdata(pdev); | ||
2237 | |||
2238 | nvme_dev_shutdown(ndev); | ||
2239 | return 0; | ||
2240 | } | ||
2241 | |||
2242 | static int nvme_resume(struct device *dev) | ||
2243 | { | ||
2244 | struct pci_dev *pdev = to_pci_dev(dev); | ||
2245 | struct nvme_dev *ndev = pci_get_drvdata(pdev); | ||
2246 | int ret; | ||
2247 | |||
2248 | ret = nvme_dev_start(ndev); | ||
2249 | /* XXX: should remove gendisks if resume fails */ | ||
2250 | if (ret) | ||
2251 | nvme_free_queues(ndev); | ||
2252 | return ret; | ||
2253 | } | ||
2254 | |||
2255 | static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); | ||
2042 | 2256 | ||
2043 | static const struct pci_error_handlers nvme_err_handler = { | 2257 | static const struct pci_error_handlers nvme_err_handler = { |
2044 | .error_detected = nvme_error_detected, | 2258 | .error_detected = nvme_error_detected, |
@@ -2062,8 +2276,9 @@ static struct pci_driver nvme_driver = { | |||
2062 | .id_table = nvme_id_table, | 2276 | .id_table = nvme_id_table, |
2063 | .probe = nvme_probe, | 2277 | .probe = nvme_probe, |
2064 | .remove = nvme_remove, | 2278 | .remove = nvme_remove, |
2065 | .suspend = nvme_suspend, | 2279 | .driver = { |
2066 | .resume = nvme_resume, | 2280 | .pm = &nvme_dev_pm_ops, |
2281 | }, | ||
2067 | .err_handler = &nvme_err_handler, | 2282 | .err_handler = &nvme_err_handler, |
2068 | }; | 2283 | }; |
2069 | 2284 | ||
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 102de2f52b5c..4a4ff4eb8e23 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c | |||
@@ -933,13 +933,12 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
933 | int res = SNTI_TRANSLATION_SUCCESS; | 933 | int res = SNTI_TRANSLATION_SUCCESS; |
934 | int xfer_len; | 934 | int xfer_len; |
935 | 935 | ||
936 | inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); | 936 | inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); |
937 | if (inq_response == NULL) { | 937 | if (inq_response == NULL) { |
938 | res = -ENOMEM; | 938 | res = -ENOMEM; |
939 | goto out_mem; | 939 | goto out_mem; |
940 | } | 940 | } |
941 | 941 | ||
942 | memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH); | ||
943 | inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ | 942 | inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ |
944 | inq_response[2] = 0x00; /* Page Length MSB */ | 943 | inq_response[2] = 0x00; /* Page Length MSB */ |
945 | inq_response[3] = 0x3C; /* Page Length LSB */ | 944 | inq_response[3] = 0x3C; /* Page Length LSB */ |
@@ -964,12 +963,11 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
964 | int xfer_len; | 963 | int xfer_len; |
965 | u8 *log_response; | 964 | u8 *log_response; |
966 | 965 | ||
967 | log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); | 966 | log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); |
968 | if (log_response == NULL) { | 967 | if (log_response == NULL) { |
969 | res = -ENOMEM; | 968 | res = -ENOMEM; |
970 | goto out_mem; | 969 | goto out_mem; |
971 | } | 970 | } |
972 | memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH); | ||
973 | 971 | ||
974 | log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; | 972 | log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; |
975 | /* Subpage=0x00, Page Length MSB=0 */ | 973 | /* Subpage=0x00, Page Length MSB=0 */ |
@@ -1000,12 +998,11 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, | |||
1000 | u8 temp_c; | 998 | u8 temp_c; |
1001 | u16 temp_k; | 999 | u16 temp_k; |
1002 | 1000 | ||
1003 | log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); | 1001 | log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); |
1004 | if (log_response == NULL) { | 1002 | if (log_response == NULL) { |
1005 | res = -ENOMEM; | 1003 | res = -ENOMEM; |
1006 | goto out_mem; | 1004 | goto out_mem; |
1007 | } | 1005 | } |
1008 | memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH); | ||
1009 | 1006 | ||
1010 | mem = dma_alloc_coherent(&dev->pci_dev->dev, | 1007 | mem = dma_alloc_coherent(&dev->pci_dev->dev, |
1011 | sizeof(struct nvme_smart_log), | 1008 | sizeof(struct nvme_smart_log), |
@@ -1069,12 +1066,11 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
1069 | u8 temp_c_cur, temp_c_thresh; | 1066 | u8 temp_c_cur, temp_c_thresh; |
1070 | u16 temp_k; | 1067 | u16 temp_k; |
1071 | 1068 | ||
1072 | log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); | 1069 | log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); |
1073 | if (log_response == NULL) { | 1070 | if (log_response == NULL) { |
1074 | res = -ENOMEM; | 1071 | res = -ENOMEM; |
1075 | goto out_mem; | 1072 | goto out_mem; |
1076 | } | 1073 | } |
1077 | memset(log_response, 0, LOG_TEMP_PAGE_LENGTH); | ||
1078 | 1074 | ||
1079 | mem = dma_alloc_coherent(&dev->pci_dev->dev, | 1075 | mem = dma_alloc_coherent(&dev->pci_dev->dev, |
1080 | sizeof(struct nvme_smart_log), | 1076 | sizeof(struct nvme_smart_log), |
@@ -1380,12 +1376,11 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, | |||
1380 | blk_desc_offset = mph_size; | 1376 | blk_desc_offset = mph_size; |
1381 | mode_pages_offset_1 = blk_desc_offset + blk_desc_len; | 1377 | mode_pages_offset_1 = blk_desc_offset + blk_desc_len; |
1382 | 1378 | ||
1383 | response = kmalloc(resp_size, GFP_KERNEL); | 1379 | response = kzalloc(resp_size, GFP_KERNEL); |
1384 | if (response == NULL) { | 1380 | if (response == NULL) { |
1385 | res = -ENOMEM; | 1381 | res = -ENOMEM; |
1386 | goto out_mem; | 1382 | goto out_mem; |
1387 | } | 1383 | } |
1388 | memset(response, 0, resp_size); | ||
1389 | 1384 | ||
1390 | res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, | 1385 | res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, |
1391 | llbaa, mode_data_length, blk_desc_len); | 1386 | llbaa, mode_data_length, blk_desc_len); |
@@ -2480,12 +2475,11 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2480 | } | 2475 | } |
2481 | id_ns = mem; | 2476 | id_ns = mem; |
2482 | 2477 | ||
2483 | response = kmalloc(resp_size, GFP_KERNEL); | 2478 | response = kzalloc(resp_size, GFP_KERNEL); |
2484 | if (response == NULL) { | 2479 | if (response == NULL) { |
2485 | res = -ENOMEM; | 2480 | res = -ENOMEM; |
2486 | goto out_dma; | 2481 | goto out_dma; |
2487 | } | 2482 | } |
2488 | memset(response, 0, resp_size); | ||
2489 | nvme_trans_fill_read_cap(response, id_ns, cdb16); | 2483 | nvme_trans_fill_read_cap(response, id_ns, cdb16); |
2490 | 2484 | ||
2491 | xfer_len = min(alloc_len, resp_size); | 2485 | xfer_len = min(alloc_len, resp_size); |
@@ -2554,12 +2548,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2554 | goto out_dma; | 2548 | goto out_dma; |
2555 | } | 2549 | } |
2556 | 2550 | ||
2557 | response = kmalloc(resp_size, GFP_KERNEL); | 2551 | response = kzalloc(resp_size, GFP_KERNEL); |
2558 | if (response == NULL) { | 2552 | if (response == NULL) { |
2559 | res = -ENOMEM; | 2553 | res = -ENOMEM; |
2560 | goto out_dma; | 2554 | goto out_dma; |
2561 | } | 2555 | } |
2562 | memset(response, 0, resp_size); | ||
2563 | 2556 | ||
2564 | /* The first LUN ID will always be 0 per the SAM spec */ | 2557 | /* The first LUN ID will always be 0 per the SAM spec */ |
2565 | for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) { | 2558 | for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) { |
@@ -2600,12 +2593,11 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, | |||
2600 | 2593 | ||
2601 | resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : | 2594 | resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : |
2602 | (FIXED_FMT_SENSE_DATA_SIZE)); | 2595 | (FIXED_FMT_SENSE_DATA_SIZE)); |
2603 | response = kmalloc(resp_size, GFP_KERNEL); | 2596 | response = kzalloc(resp_size, GFP_KERNEL); |
2604 | if (response == NULL) { | 2597 | if (response == NULL) { |
2605 | res = -ENOMEM; | 2598 | res = -ENOMEM; |
2606 | goto out; | 2599 | goto out; |
2607 | } | 2600 | } |
2608 | memset(response, 0, resp_size); | ||
2609 | 2601 | ||
2610 | if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { | 2602 | if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { |
2611 | /* Descriptor Format Sense Data */ | 2603 | /* Descriptor Format Sense Data */ |
diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f451c8d6e231..26ebcf41c213 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Definitions for the NVM Express interface | 2 | * Definitions for the NVM Express interface |
3 | * Copyright (c) 2011, Intel Corporation. | 3 | * Copyright (c) 2011-2013, Intel Corporation. |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify it | 5 | * This program is free software; you can redistribute it and/or modify it |
6 | * under the terms and conditions of the GNU General Public License, | 6 | * under the terms and conditions of the GNU General Public License, |
@@ -19,7 +19,10 @@ | |||
19 | #ifndef _LINUX_NVME_H | 19 | #ifndef _LINUX_NVME_H |
20 | #define _LINUX_NVME_H | 20 | #define _LINUX_NVME_H |
21 | 21 | ||
22 | #include <linux/types.h> | 22 | #include <uapi/linux/nvme.h> |
23 | #include <linux/pci.h> | ||
24 | #include <linux/miscdevice.h> | ||
25 | #include <linux/kref.h> | ||
23 | 26 | ||
24 | struct nvme_bar { | 27 | struct nvme_bar { |
25 | __u64 cap; /* Controller Capabilities */ | 28 | __u64 cap; /* Controller Capabilities */ |
@@ -50,6 +53,7 @@ enum { | |||
50 | NVME_CC_SHN_NONE = 0 << 14, | 53 | NVME_CC_SHN_NONE = 0 << 14, |
51 | NVME_CC_SHN_NORMAL = 1 << 14, | 54 | NVME_CC_SHN_NORMAL = 1 << 14, |
52 | NVME_CC_SHN_ABRUPT = 2 << 14, | 55 | NVME_CC_SHN_ABRUPT = 2 << 14, |
56 | NVME_CC_SHN_MASK = 3 << 14, | ||
53 | NVME_CC_IOSQES = 6 << 16, | 57 | NVME_CC_IOSQES = 6 << 16, |
54 | NVME_CC_IOCQES = 4 << 20, | 58 | NVME_CC_IOCQES = 4 << 20, |
55 | NVME_CSTS_RDY = 1 << 0, | 59 | NVME_CSTS_RDY = 1 << 0, |
@@ -57,462 +61,11 @@ enum { | |||
57 | NVME_CSTS_SHST_NORMAL = 0 << 2, | 61 | NVME_CSTS_SHST_NORMAL = 0 << 2, |
58 | NVME_CSTS_SHST_OCCUR = 1 << 2, | 62 | NVME_CSTS_SHST_OCCUR = 1 << 2, |
59 | NVME_CSTS_SHST_CMPLT = 2 << 2, | 63 | NVME_CSTS_SHST_CMPLT = 2 << 2, |
60 | }; | 64 | NVME_CSTS_SHST_MASK = 3 << 2, |
61 | |||
62 | struct nvme_id_power_state { | ||
63 | __le16 max_power; /* centiwatts */ | ||
64 | __u16 rsvd2; | ||
65 | __le32 entry_lat; /* microseconds */ | ||
66 | __le32 exit_lat; /* microseconds */ | ||
67 | __u8 read_tput; | ||
68 | __u8 read_lat; | ||
69 | __u8 write_tput; | ||
70 | __u8 write_lat; | ||
71 | __u8 rsvd16[16]; | ||
72 | }; | 65 | }; |
73 | 66 | ||
74 | #define NVME_VS(major, minor) (major << 16 | minor) | 67 | #define NVME_VS(major, minor) (major << 16 | minor) |
75 | 68 | ||
76 | struct nvme_id_ctrl { | ||
77 | __le16 vid; | ||
78 | __le16 ssvid; | ||
79 | char sn[20]; | ||
80 | char mn[40]; | ||
81 | char fr[8]; | ||
82 | __u8 rab; | ||
83 | __u8 ieee[3]; | ||
84 | __u8 mic; | ||
85 | __u8 mdts; | ||
86 | __u8 rsvd78[178]; | ||
87 | __le16 oacs; | ||
88 | __u8 acl; | ||
89 | __u8 aerl; | ||
90 | __u8 frmw; | ||
91 | __u8 lpa; | ||
92 | __u8 elpe; | ||
93 | __u8 npss; | ||
94 | __u8 rsvd264[248]; | ||
95 | __u8 sqes; | ||
96 | __u8 cqes; | ||
97 | __u8 rsvd514[2]; | ||
98 | __le32 nn; | ||
99 | __le16 oncs; | ||
100 | __le16 fuses; | ||
101 | __u8 fna; | ||
102 | __u8 vwc; | ||
103 | __le16 awun; | ||
104 | __le16 awupf; | ||
105 | __u8 rsvd530[1518]; | ||
106 | struct nvme_id_power_state psd[32]; | ||
107 | __u8 vs[1024]; | ||
108 | }; | ||
109 | |||
110 | enum { | ||
111 | NVME_CTRL_ONCS_COMPARE = 1 << 0, | ||
112 | NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, | ||
113 | NVME_CTRL_ONCS_DSM = 1 << 2, | ||
114 | }; | ||
115 | |||
116 | struct nvme_lbaf { | ||
117 | __le16 ms; | ||
118 | __u8 ds; | ||
119 | __u8 rp; | ||
120 | }; | ||
121 | |||
122 | struct nvme_id_ns { | ||
123 | __le64 nsze; | ||
124 | __le64 ncap; | ||
125 | __le64 nuse; | ||
126 | __u8 nsfeat; | ||
127 | __u8 nlbaf; | ||
128 | __u8 flbas; | ||
129 | __u8 mc; | ||
130 | __u8 dpc; | ||
131 | __u8 dps; | ||
132 | __u8 rsvd30[98]; | ||
133 | struct nvme_lbaf lbaf[16]; | ||
134 | __u8 rsvd192[192]; | ||
135 | __u8 vs[3712]; | ||
136 | }; | ||
137 | |||
138 | enum { | ||
139 | NVME_NS_FEAT_THIN = 1 << 0, | ||
140 | NVME_LBAF_RP_BEST = 0, | ||
141 | NVME_LBAF_RP_BETTER = 1, | ||
142 | NVME_LBAF_RP_GOOD = 2, | ||
143 | NVME_LBAF_RP_DEGRADED = 3, | ||
144 | }; | ||
145 | |||
146 | struct nvme_smart_log { | ||
147 | __u8 critical_warning; | ||
148 | __u8 temperature[2]; | ||
149 | __u8 avail_spare; | ||
150 | __u8 spare_thresh; | ||
151 | __u8 percent_used; | ||
152 | __u8 rsvd6[26]; | ||
153 | __u8 data_units_read[16]; | ||
154 | __u8 data_units_written[16]; | ||
155 | __u8 host_reads[16]; | ||
156 | __u8 host_writes[16]; | ||
157 | __u8 ctrl_busy_time[16]; | ||
158 | __u8 power_cycles[16]; | ||
159 | __u8 power_on_hours[16]; | ||
160 | __u8 unsafe_shutdowns[16]; | ||
161 | __u8 media_errors[16]; | ||
162 | __u8 num_err_log_entries[16]; | ||
163 | __u8 rsvd192[320]; | ||
164 | }; | ||
165 | |||
166 | enum { | ||
167 | NVME_SMART_CRIT_SPARE = 1 << 0, | ||
168 | NVME_SMART_CRIT_TEMPERATURE = 1 << 1, | ||
169 | NVME_SMART_CRIT_RELIABILITY = 1 << 2, | ||
170 | NVME_SMART_CRIT_MEDIA = 1 << 3, | ||
171 | NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, | ||
172 | }; | ||
173 | |||
174 | struct nvme_lba_range_type { | ||
175 | __u8 type; | ||
176 | __u8 attributes; | ||
177 | __u8 rsvd2[14]; | ||
178 | __u64 slba; | ||
179 | __u64 nlb; | ||
180 | __u8 guid[16]; | ||
181 | __u8 rsvd48[16]; | ||
182 | }; | ||
183 | |||
184 | enum { | ||
185 | NVME_LBART_TYPE_FS = 0x01, | ||
186 | NVME_LBART_TYPE_RAID = 0x02, | ||
187 | NVME_LBART_TYPE_CACHE = 0x03, | ||
188 | NVME_LBART_TYPE_SWAP = 0x04, | ||
189 | |||
190 | NVME_LBART_ATTRIB_TEMP = 1 << 0, | ||
191 | NVME_LBART_ATTRIB_HIDE = 1 << 1, | ||
192 | }; | ||
193 | |||
194 | /* I/O commands */ | ||
195 | |||
196 | enum nvme_opcode { | ||
197 | nvme_cmd_flush = 0x00, | ||
198 | nvme_cmd_write = 0x01, | ||
199 | nvme_cmd_read = 0x02, | ||
200 | nvme_cmd_write_uncor = 0x04, | ||
201 | nvme_cmd_compare = 0x05, | ||
202 | nvme_cmd_dsm = 0x09, | ||
203 | }; | ||
204 | |||
205 | struct nvme_common_command { | ||
206 | __u8 opcode; | ||
207 | __u8 flags; | ||
208 | __u16 command_id; | ||
209 | __le32 nsid; | ||
210 | __le32 cdw2[2]; | ||
211 | __le64 metadata; | ||
212 | __le64 prp1; | ||
213 | __le64 prp2; | ||
214 | __le32 cdw10[6]; | ||
215 | }; | ||
216 | |||
217 | struct nvme_rw_command { | ||
218 | __u8 opcode; | ||
219 | __u8 flags; | ||
220 | __u16 command_id; | ||
221 | __le32 nsid; | ||
222 | __u64 rsvd2; | ||
223 | __le64 metadata; | ||
224 | __le64 prp1; | ||
225 | __le64 prp2; | ||
226 | __le64 slba; | ||
227 | __le16 length; | ||
228 | __le16 control; | ||
229 | __le32 dsmgmt; | ||
230 | __le32 reftag; | ||
231 | __le16 apptag; | ||
232 | __le16 appmask; | ||
233 | }; | ||
234 | |||
235 | enum { | ||
236 | NVME_RW_LR = 1 << 15, | ||
237 | NVME_RW_FUA = 1 << 14, | ||
238 | NVME_RW_DSM_FREQ_UNSPEC = 0, | ||
239 | NVME_RW_DSM_FREQ_TYPICAL = 1, | ||
240 | NVME_RW_DSM_FREQ_RARE = 2, | ||
241 | NVME_RW_DSM_FREQ_READS = 3, | ||
242 | NVME_RW_DSM_FREQ_WRITES = 4, | ||
243 | NVME_RW_DSM_FREQ_RW = 5, | ||
244 | NVME_RW_DSM_FREQ_ONCE = 6, | ||
245 | NVME_RW_DSM_FREQ_PREFETCH = 7, | ||
246 | NVME_RW_DSM_FREQ_TEMP = 8, | ||
247 | NVME_RW_DSM_LATENCY_NONE = 0 << 4, | ||
248 | NVME_RW_DSM_LATENCY_IDLE = 1 << 4, | ||
249 | NVME_RW_DSM_LATENCY_NORM = 2 << 4, | ||
250 | NVME_RW_DSM_LATENCY_LOW = 3 << 4, | ||
251 | NVME_RW_DSM_SEQ_REQ = 1 << 6, | ||
252 | NVME_RW_DSM_COMPRESSED = 1 << 7, | ||
253 | }; | ||
254 | |||
255 | struct nvme_dsm_cmd { | ||
256 | __u8 opcode; | ||
257 | __u8 flags; | ||
258 | __u16 command_id; | ||
259 | __le32 nsid; | ||
260 | __u64 rsvd2[2]; | ||
261 | __le64 prp1; | ||
262 | __le64 prp2; | ||
263 | __le32 nr; | ||
264 | __le32 attributes; | ||
265 | __u32 rsvd12[4]; | ||
266 | }; | ||
267 | |||
268 | enum { | ||
269 | NVME_DSMGMT_IDR = 1 << 0, | ||
270 | NVME_DSMGMT_IDW = 1 << 1, | ||
271 | NVME_DSMGMT_AD = 1 << 2, | ||
272 | }; | ||
273 | |||
274 | struct nvme_dsm_range { | ||
275 | __le32 cattr; | ||
276 | __le32 nlb; | ||
277 | __le64 slba; | ||
278 | }; | ||
279 | |||
280 | /* Admin commands */ | ||
281 | |||
282 | enum nvme_admin_opcode { | ||
283 | nvme_admin_delete_sq = 0x00, | ||
284 | nvme_admin_create_sq = 0x01, | ||
285 | nvme_admin_get_log_page = 0x02, | ||
286 | nvme_admin_delete_cq = 0x04, | ||
287 | nvme_admin_create_cq = 0x05, | ||
288 | nvme_admin_identify = 0x06, | ||
289 | nvme_admin_abort_cmd = 0x08, | ||
290 | nvme_admin_set_features = 0x09, | ||
291 | nvme_admin_get_features = 0x0a, | ||
292 | nvme_admin_async_event = 0x0c, | ||
293 | nvme_admin_activate_fw = 0x10, | ||
294 | nvme_admin_download_fw = 0x11, | ||
295 | nvme_admin_format_nvm = 0x80, | ||
296 | nvme_admin_security_send = 0x81, | ||
297 | nvme_admin_security_recv = 0x82, | ||
298 | }; | ||
299 | |||
300 | enum { | ||
301 | NVME_QUEUE_PHYS_CONTIG = (1 << 0), | ||
302 | NVME_CQ_IRQ_ENABLED = (1 << 1), | ||
303 | NVME_SQ_PRIO_URGENT = (0 << 1), | ||
304 | NVME_SQ_PRIO_HIGH = (1 << 1), | ||
305 | NVME_SQ_PRIO_MEDIUM = (2 << 1), | ||
306 | NVME_SQ_PRIO_LOW = (3 << 1), | ||
307 | NVME_FEAT_ARBITRATION = 0x01, | ||
308 | NVME_FEAT_POWER_MGMT = 0x02, | ||
309 | NVME_FEAT_LBA_RANGE = 0x03, | ||
310 | NVME_FEAT_TEMP_THRESH = 0x04, | ||
311 | NVME_FEAT_ERR_RECOVERY = 0x05, | ||
312 | NVME_FEAT_VOLATILE_WC = 0x06, | ||
313 | NVME_FEAT_NUM_QUEUES = 0x07, | ||
314 | NVME_FEAT_IRQ_COALESCE = 0x08, | ||
315 | NVME_FEAT_IRQ_CONFIG = 0x09, | ||
316 | NVME_FEAT_WRITE_ATOMIC = 0x0a, | ||
317 | NVME_FEAT_ASYNC_EVENT = 0x0b, | ||
318 | NVME_FEAT_SW_PROGRESS = 0x0c, | ||
319 | NVME_FWACT_REPL = (0 << 3), | ||
320 | NVME_FWACT_REPL_ACTV = (1 << 3), | ||
321 | NVME_FWACT_ACTV = (2 << 3), | ||
322 | }; | ||
323 | |||
324 | struct nvme_identify { | ||
325 | __u8 opcode; | ||
326 | __u8 flags; | ||
327 | __u16 command_id; | ||
328 | __le32 nsid; | ||
329 | __u64 rsvd2[2]; | ||
330 | __le64 prp1; | ||
331 | __le64 prp2; | ||
332 | __le32 cns; | ||
333 | __u32 rsvd11[5]; | ||
334 | }; | ||
335 | |||
336 | struct nvme_features { | ||
337 | __u8 opcode; | ||
338 | __u8 flags; | ||
339 | __u16 command_id; | ||
340 | __le32 nsid; | ||
341 | __u64 rsvd2[2]; | ||
342 | __le64 prp1; | ||
343 | __le64 prp2; | ||
344 | __le32 fid; | ||
345 | __le32 dword11; | ||
346 | __u32 rsvd12[4]; | ||
347 | }; | ||
348 | |||
349 | struct nvme_create_cq { | ||
350 | __u8 opcode; | ||
351 | __u8 flags; | ||
352 | __u16 command_id; | ||
353 | __u32 rsvd1[5]; | ||
354 | __le64 prp1; | ||
355 | __u64 rsvd8; | ||
356 | __le16 cqid; | ||
357 | __le16 qsize; | ||
358 | __le16 cq_flags; | ||
359 | __le16 irq_vector; | ||
360 | __u32 rsvd12[4]; | ||
361 | }; | ||
362 | |||
363 | struct nvme_create_sq { | ||
364 | __u8 opcode; | ||
365 | __u8 flags; | ||
366 | __u16 command_id; | ||
367 | __u32 rsvd1[5]; | ||
368 | __le64 prp1; | ||
369 | __u64 rsvd8; | ||
370 | __le16 sqid; | ||
371 | __le16 qsize; | ||
372 | __le16 sq_flags; | ||
373 | __le16 cqid; | ||
374 | __u32 rsvd12[4]; | ||
375 | }; | ||
376 | |||
377 | struct nvme_delete_queue { | ||
378 | __u8 opcode; | ||
379 | __u8 flags; | ||
380 | __u16 command_id; | ||
381 | __u32 rsvd1[9]; | ||
382 | __le16 qid; | ||
383 | __u16 rsvd10; | ||
384 | __u32 rsvd11[5]; | ||
385 | }; | ||
386 | |||
387 | struct nvme_download_firmware { | ||
388 | __u8 opcode; | ||
389 | __u8 flags; | ||
390 | __u16 command_id; | ||
391 | __u32 rsvd1[5]; | ||
392 | __le64 prp1; | ||
393 | __le64 prp2; | ||
394 | __le32 numd; | ||
395 | __le32 offset; | ||
396 | __u32 rsvd12[4]; | ||
397 | }; | ||
398 | |||
399 | struct nvme_format_cmd { | ||
400 | __u8 opcode; | ||
401 | __u8 flags; | ||
402 | __u16 command_id; | ||
403 | __le32 nsid; | ||
404 | __u64 rsvd2[4]; | ||
405 | __le32 cdw10; | ||
406 | __u32 rsvd11[5]; | ||
407 | }; | ||
408 | |||
409 | struct nvme_command { | ||
410 | union { | ||
411 | struct nvme_common_command common; | ||
412 | struct nvme_rw_command rw; | ||
413 | struct nvme_identify identify; | ||
414 | struct nvme_features features; | ||
415 | struct nvme_create_cq create_cq; | ||
416 | struct nvme_create_sq create_sq; | ||
417 | struct nvme_delete_queue delete_queue; | ||
418 | struct nvme_download_firmware dlfw; | ||
419 | struct nvme_format_cmd format; | ||
420 | struct nvme_dsm_cmd dsm; | ||
421 | }; | ||
422 | }; | ||
423 | |||
424 | enum { | ||
425 | NVME_SC_SUCCESS = 0x0, | ||
426 | NVME_SC_INVALID_OPCODE = 0x1, | ||
427 | NVME_SC_INVALID_FIELD = 0x2, | ||
428 | NVME_SC_CMDID_CONFLICT = 0x3, | ||
429 | NVME_SC_DATA_XFER_ERROR = 0x4, | ||
430 | NVME_SC_POWER_LOSS = 0x5, | ||
431 | NVME_SC_INTERNAL = 0x6, | ||
432 | NVME_SC_ABORT_REQ = 0x7, | ||
433 | NVME_SC_ABORT_QUEUE = 0x8, | ||
434 | NVME_SC_FUSED_FAIL = 0x9, | ||
435 | NVME_SC_FUSED_MISSING = 0xa, | ||
436 | NVME_SC_INVALID_NS = 0xb, | ||
437 | NVME_SC_CMD_SEQ_ERROR = 0xc, | ||
438 | NVME_SC_LBA_RANGE = 0x80, | ||
439 | NVME_SC_CAP_EXCEEDED = 0x81, | ||
440 | NVME_SC_NS_NOT_READY = 0x82, | ||
441 | NVME_SC_CQ_INVALID = 0x100, | ||
442 | NVME_SC_QID_INVALID = 0x101, | ||
443 | NVME_SC_QUEUE_SIZE = 0x102, | ||
444 | NVME_SC_ABORT_LIMIT = 0x103, | ||
445 | NVME_SC_ABORT_MISSING = 0x104, | ||
446 | NVME_SC_ASYNC_LIMIT = 0x105, | ||
447 | NVME_SC_FIRMWARE_SLOT = 0x106, | ||
448 | NVME_SC_FIRMWARE_IMAGE = 0x107, | ||
449 | NVME_SC_INVALID_VECTOR = 0x108, | ||
450 | NVME_SC_INVALID_LOG_PAGE = 0x109, | ||
451 | NVME_SC_INVALID_FORMAT = 0x10a, | ||
452 | NVME_SC_BAD_ATTRIBUTES = 0x180, | ||
453 | NVME_SC_WRITE_FAULT = 0x280, | ||
454 | NVME_SC_READ_ERROR = 0x281, | ||
455 | NVME_SC_GUARD_CHECK = 0x282, | ||
456 | NVME_SC_APPTAG_CHECK = 0x283, | ||
457 | NVME_SC_REFTAG_CHECK = 0x284, | ||
458 | NVME_SC_COMPARE_FAILED = 0x285, | ||
459 | NVME_SC_ACCESS_DENIED = 0x286, | ||
460 | }; | ||
461 | |||
462 | struct nvme_completion { | ||
463 | __le32 result; /* Used by admin commands to return data */ | ||
464 | __u32 rsvd; | ||
465 | __le16 sq_head; /* how much of this queue may be reclaimed */ | ||
466 | __le16 sq_id; /* submission queue that generated this entry */ | ||
467 | __u16 command_id; /* of the command which completed */ | ||
468 | __le16 status; /* did the command fail, and if so, why? */ | ||
469 | }; | ||
470 | |||
471 | struct nvme_user_io { | ||
472 | __u8 opcode; | ||
473 | __u8 flags; | ||
474 | __u16 control; | ||
475 | __u16 nblocks; | ||
476 | __u16 rsvd; | ||
477 | __u64 metadata; | ||
478 | __u64 addr; | ||
479 | __u64 slba; | ||
480 | __u32 dsmgmt; | ||
481 | __u32 reftag; | ||
482 | __u16 apptag; | ||
483 | __u16 appmask; | ||
484 | }; | ||
485 | |||
486 | struct nvme_admin_cmd { | ||
487 | __u8 opcode; | ||
488 | __u8 flags; | ||
489 | __u16 rsvd1; | ||
490 | __u32 nsid; | ||
491 | __u32 cdw2; | ||
492 | __u32 cdw3; | ||
493 | __u64 metadata; | ||
494 | __u64 addr; | ||
495 | __u32 metadata_len; | ||
496 | __u32 data_len; | ||
497 | __u32 cdw10; | ||
498 | __u32 cdw11; | ||
499 | __u32 cdw12; | ||
500 | __u32 cdw13; | ||
501 | __u32 cdw14; | ||
502 | __u32 cdw15; | ||
503 | __u32 timeout_ms; | ||
504 | __u32 result; | ||
505 | }; | ||
506 | |||
507 | #define NVME_IOCTL_ID _IO('N', 0x40) | ||
508 | #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) | ||
509 | #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) | ||
510 | |||
511 | #ifdef __KERNEL__ | ||
512 | #include <linux/pci.h> | ||
513 | #include <linux/miscdevice.h> | ||
514 | #include <linux/kref.h> | ||
515 | |||
516 | #define NVME_IO_TIMEOUT (5 * HZ) | 69 | #define NVME_IO_TIMEOUT (5 * HZ) |
517 | 70 | ||
518 | /* | 71 | /* |
@@ -553,7 +106,7 @@ struct nvme_ns { | |||
553 | struct request_queue *queue; | 106 | struct request_queue *queue; |
554 | struct gendisk *disk; | 107 | struct gendisk *disk; |
555 | 108 | ||
556 | int ns_id; | 109 | unsigned ns_id; |
557 | int lba_shift; | 110 | int lba_shift; |
558 | int ms; | 111 | int ms; |
559 | u64 mode_select_num_blocks; | 112 | u64 mode_select_num_blocks; |
@@ -572,6 +125,7 @@ struct nvme_iod { | |||
572 | int offset; /* Of PRP list */ | 125 | int offset; /* Of PRP list */ |
573 | int nents; /* Used in scatterlist */ | 126 | int nents; /* Used in scatterlist */ |
574 | int length; /* Of data, in bytes */ | 127 | int length; /* Of data, in bytes */ |
128 | unsigned long start_time; | ||
575 | dma_addr_t first_dma; | 129 | dma_addr_t first_dma; |
576 | struct scatterlist sg[0]; | 130 | struct scatterlist sg[0]; |
577 | }; | 131 | }; |
@@ -613,6 +167,4 @@ struct sg_io_hdr; | |||
613 | int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); | 167 | int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); |
614 | int nvme_sg_get_version_num(int __user *ip); | 168 | int nvme_sg_get_version_num(int __user *ip); |
615 | 169 | ||
616 | #endif | ||
617 | |||
618 | #endif /* _LINUX_NVME_H */ | 170 | #endif /* _LINUX_NVME_H */ |
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e7c94eeb9475..115add2515aa 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild | |||
@@ -284,6 +284,7 @@ header-y += nfs_mount.h | |||
284 | header-y += nfsacl.h | 284 | header-y += nfsacl.h |
285 | header-y += nl80211.h | 285 | header-y += nl80211.h |
286 | header-y += nubus.h | 286 | header-y += nubus.h |
287 | header-y += nvme.h | ||
287 | header-y += nvram.h | 288 | header-y += nvram.h |
288 | header-y += omap3isp.h | 289 | header-y += omap3isp.h |
289 | header-y += omapfb.h | 290 | header-y += omapfb.h |
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h new file mode 100644 index 000000000000..989c04e0c563 --- /dev/null +++ b/include/uapi/linux/nvme.h | |||
@@ -0,0 +1,477 @@ | |||
1 | /* | ||
2 | * Definitions for the NVM Express interface | ||
3 | * Copyright (c) 2011-2013, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc., | ||
16 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
17 | */ | ||
18 | |||
19 | #ifndef _UAPI_LINUX_NVME_H | ||
20 | #define _UAPI_LINUX_NVME_H | ||
21 | |||
22 | #include <linux/types.h> | ||
23 | |||
24 | struct nvme_id_power_state { | ||
25 | __le16 max_power; /* centiwatts */ | ||
26 | __u8 rsvd2; | ||
27 | __u8 flags; | ||
28 | __le32 entry_lat; /* microseconds */ | ||
29 | __le32 exit_lat; /* microseconds */ | ||
30 | __u8 read_tput; | ||
31 | __u8 read_lat; | ||
32 | __u8 write_tput; | ||
33 | __u8 write_lat; | ||
34 | __u8 rsvd16[16]; | ||
35 | }; | ||
36 | |||
37 | enum { | ||
38 | NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, | ||
39 | NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, | ||
40 | }; | ||
41 | |||
42 | struct nvme_id_ctrl { | ||
43 | __le16 vid; | ||
44 | __le16 ssvid; | ||
45 | char sn[20]; | ||
46 | char mn[40]; | ||
47 | char fr[8]; | ||
48 | __u8 rab; | ||
49 | __u8 ieee[3]; | ||
50 | __u8 mic; | ||
51 | __u8 mdts; | ||
52 | __u8 rsvd78[178]; | ||
53 | __le16 oacs; | ||
54 | __u8 acl; | ||
55 | __u8 aerl; | ||
56 | __u8 frmw; | ||
57 | __u8 lpa; | ||
58 | __u8 elpe; | ||
59 | __u8 npss; | ||
60 | __u8 rsvd264[248]; | ||
61 | __u8 sqes; | ||
62 | __u8 cqes; | ||
63 | __u8 rsvd514[2]; | ||
64 | __le32 nn; | ||
65 | __le16 oncs; | ||
66 | __le16 fuses; | ||
67 | __u8 fna; | ||
68 | __u8 vwc; | ||
69 | __le16 awun; | ||
70 | __le16 awupf; | ||
71 | __u8 rsvd530[1518]; | ||
72 | struct nvme_id_power_state psd[32]; | ||
73 | __u8 vs[1024]; | ||
74 | }; | ||
75 | |||
76 | enum { | ||
77 | NVME_CTRL_ONCS_COMPARE = 1 << 0, | ||
78 | NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, | ||
79 | NVME_CTRL_ONCS_DSM = 1 << 2, | ||
80 | }; | ||
81 | |||
82 | struct nvme_lbaf { | ||
83 | __le16 ms; | ||
84 | __u8 ds; | ||
85 | __u8 rp; | ||
86 | }; | ||
87 | |||
88 | struct nvme_id_ns { | ||
89 | __le64 nsze; | ||
90 | __le64 ncap; | ||
91 | __le64 nuse; | ||
92 | __u8 nsfeat; | ||
93 | __u8 nlbaf; | ||
94 | __u8 flbas; | ||
95 | __u8 mc; | ||
96 | __u8 dpc; | ||
97 | __u8 dps; | ||
98 | __u8 rsvd30[98]; | ||
99 | struct nvme_lbaf lbaf[16]; | ||
100 | __u8 rsvd192[192]; | ||
101 | __u8 vs[3712]; | ||
102 | }; | ||
103 | |||
104 | enum { | ||
105 | NVME_NS_FEAT_THIN = 1 << 0, | ||
106 | NVME_LBAF_RP_BEST = 0, | ||
107 | NVME_LBAF_RP_BETTER = 1, | ||
108 | NVME_LBAF_RP_GOOD = 2, | ||
109 | NVME_LBAF_RP_DEGRADED = 3, | ||
110 | }; | ||
111 | |||
112 | struct nvme_smart_log { | ||
113 | __u8 critical_warning; | ||
114 | __u8 temperature[2]; | ||
115 | __u8 avail_spare; | ||
116 | __u8 spare_thresh; | ||
117 | __u8 percent_used; | ||
118 | __u8 rsvd6[26]; | ||
119 | __u8 data_units_read[16]; | ||
120 | __u8 data_units_written[16]; | ||
121 | __u8 host_reads[16]; | ||
122 | __u8 host_writes[16]; | ||
123 | __u8 ctrl_busy_time[16]; | ||
124 | __u8 power_cycles[16]; | ||
125 | __u8 power_on_hours[16]; | ||
126 | __u8 unsafe_shutdowns[16]; | ||
127 | __u8 media_errors[16]; | ||
128 | __u8 num_err_log_entries[16]; | ||
129 | __u8 rsvd192[320]; | ||
130 | }; | ||
131 | |||
132 | enum { | ||
133 | NVME_SMART_CRIT_SPARE = 1 << 0, | ||
134 | NVME_SMART_CRIT_TEMPERATURE = 1 << 1, | ||
135 | NVME_SMART_CRIT_RELIABILITY = 1 << 2, | ||
136 | NVME_SMART_CRIT_MEDIA = 1 << 3, | ||
137 | NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, | ||
138 | }; | ||
139 | |||
140 | struct nvme_lba_range_type { | ||
141 | __u8 type; | ||
142 | __u8 attributes; | ||
143 | __u8 rsvd2[14]; | ||
144 | __u64 slba; | ||
145 | __u64 nlb; | ||
146 | __u8 guid[16]; | ||
147 | __u8 rsvd48[16]; | ||
148 | }; | ||
149 | |||
150 | enum { | ||
151 | NVME_LBART_TYPE_FS = 0x01, | ||
152 | NVME_LBART_TYPE_RAID = 0x02, | ||
153 | NVME_LBART_TYPE_CACHE = 0x03, | ||
154 | NVME_LBART_TYPE_SWAP = 0x04, | ||
155 | |||
156 | NVME_LBART_ATTRIB_TEMP = 1 << 0, | ||
157 | NVME_LBART_ATTRIB_HIDE = 1 << 1, | ||
158 | }; | ||
159 | |||
160 | /* I/O commands */ | ||
161 | |||
162 | enum nvme_opcode { | ||
163 | nvme_cmd_flush = 0x00, | ||
164 | nvme_cmd_write = 0x01, | ||
165 | nvme_cmd_read = 0x02, | ||
166 | nvme_cmd_write_uncor = 0x04, | ||
167 | nvme_cmd_compare = 0x05, | ||
168 | nvme_cmd_dsm = 0x09, | ||
169 | }; | ||
170 | |||
171 | struct nvme_common_command { | ||
172 | __u8 opcode; | ||
173 | __u8 flags; | ||
174 | __u16 command_id; | ||
175 | __le32 nsid; | ||
176 | __le32 cdw2[2]; | ||
177 | __le64 metadata; | ||
178 | __le64 prp1; | ||
179 | __le64 prp2; | ||
180 | __le32 cdw10[6]; | ||
181 | }; | ||
182 | |||
183 | struct nvme_rw_command { | ||
184 | __u8 opcode; | ||
185 | __u8 flags; | ||
186 | __u16 command_id; | ||
187 | __le32 nsid; | ||
188 | __u64 rsvd2; | ||
189 | __le64 metadata; | ||
190 | __le64 prp1; | ||
191 | __le64 prp2; | ||
192 | __le64 slba; | ||
193 | __le16 length; | ||
194 | __le16 control; | ||
195 | __le32 dsmgmt; | ||
196 | __le32 reftag; | ||
197 | __le16 apptag; | ||
198 | __le16 appmask; | ||
199 | }; | ||
200 | |||
201 | enum { | ||
202 | NVME_RW_LR = 1 << 15, | ||
203 | NVME_RW_FUA = 1 << 14, | ||
204 | NVME_RW_DSM_FREQ_UNSPEC = 0, | ||
205 | NVME_RW_DSM_FREQ_TYPICAL = 1, | ||
206 | NVME_RW_DSM_FREQ_RARE = 2, | ||
207 | NVME_RW_DSM_FREQ_READS = 3, | ||
208 | NVME_RW_DSM_FREQ_WRITES = 4, | ||
209 | NVME_RW_DSM_FREQ_RW = 5, | ||
210 | NVME_RW_DSM_FREQ_ONCE = 6, | ||
211 | NVME_RW_DSM_FREQ_PREFETCH = 7, | ||
212 | NVME_RW_DSM_FREQ_TEMP = 8, | ||
213 | NVME_RW_DSM_LATENCY_NONE = 0 << 4, | ||
214 | NVME_RW_DSM_LATENCY_IDLE = 1 << 4, | ||
215 | NVME_RW_DSM_LATENCY_NORM = 2 << 4, | ||
216 | NVME_RW_DSM_LATENCY_LOW = 3 << 4, | ||
217 | NVME_RW_DSM_SEQ_REQ = 1 << 6, | ||
218 | NVME_RW_DSM_COMPRESSED = 1 << 7, | ||
219 | }; | ||
220 | |||
221 | struct nvme_dsm_cmd { | ||
222 | __u8 opcode; | ||
223 | __u8 flags; | ||
224 | __u16 command_id; | ||
225 | __le32 nsid; | ||
226 | __u64 rsvd2[2]; | ||
227 | __le64 prp1; | ||
228 | __le64 prp2; | ||
229 | __le32 nr; | ||
230 | __le32 attributes; | ||
231 | __u32 rsvd12[4]; | ||
232 | }; | ||
233 | |||
234 | enum { | ||
235 | NVME_DSMGMT_IDR = 1 << 0, | ||
236 | NVME_DSMGMT_IDW = 1 << 1, | ||
237 | NVME_DSMGMT_AD = 1 << 2, | ||
238 | }; | ||
239 | |||
240 | struct nvme_dsm_range { | ||
241 | __le32 cattr; | ||
242 | __le32 nlb; | ||
243 | __le64 slba; | ||
244 | }; | ||
245 | |||
246 | /* Admin commands */ | ||
247 | |||
248 | enum nvme_admin_opcode { | ||
249 | nvme_admin_delete_sq = 0x00, | ||
250 | nvme_admin_create_sq = 0x01, | ||
251 | nvme_admin_get_log_page = 0x02, | ||
252 | nvme_admin_delete_cq = 0x04, | ||
253 | nvme_admin_create_cq = 0x05, | ||
254 | nvme_admin_identify = 0x06, | ||
255 | nvme_admin_abort_cmd = 0x08, | ||
256 | nvme_admin_set_features = 0x09, | ||
257 | nvme_admin_get_features = 0x0a, | ||
258 | nvme_admin_async_event = 0x0c, | ||
259 | nvme_admin_activate_fw = 0x10, | ||
260 | nvme_admin_download_fw = 0x11, | ||
261 | nvme_admin_format_nvm = 0x80, | ||
262 | nvme_admin_security_send = 0x81, | ||
263 | nvme_admin_security_recv = 0x82, | ||
264 | }; | ||
265 | |||
266 | enum { | ||
267 | NVME_QUEUE_PHYS_CONTIG = (1 << 0), | ||
268 | NVME_CQ_IRQ_ENABLED = (1 << 1), | ||
269 | NVME_SQ_PRIO_URGENT = (0 << 1), | ||
270 | NVME_SQ_PRIO_HIGH = (1 << 1), | ||
271 | NVME_SQ_PRIO_MEDIUM = (2 << 1), | ||
272 | NVME_SQ_PRIO_LOW = (3 << 1), | ||
273 | NVME_FEAT_ARBITRATION = 0x01, | ||
274 | NVME_FEAT_POWER_MGMT = 0x02, | ||
275 | NVME_FEAT_LBA_RANGE = 0x03, | ||
276 | NVME_FEAT_TEMP_THRESH = 0x04, | ||
277 | NVME_FEAT_ERR_RECOVERY = 0x05, | ||
278 | NVME_FEAT_VOLATILE_WC = 0x06, | ||
279 | NVME_FEAT_NUM_QUEUES = 0x07, | ||
280 | NVME_FEAT_IRQ_COALESCE = 0x08, | ||
281 | NVME_FEAT_IRQ_CONFIG = 0x09, | ||
282 | NVME_FEAT_WRITE_ATOMIC = 0x0a, | ||
283 | NVME_FEAT_ASYNC_EVENT = 0x0b, | ||
284 | NVME_FEAT_SW_PROGRESS = 0x0c, | ||
285 | NVME_FWACT_REPL = (0 << 3), | ||
286 | NVME_FWACT_REPL_ACTV = (1 << 3), | ||
287 | NVME_FWACT_ACTV = (2 << 3), | ||
288 | }; | ||
289 | |||
290 | struct nvme_identify { | ||
291 | __u8 opcode; | ||
292 | __u8 flags; | ||
293 | __u16 command_id; | ||
294 | __le32 nsid; | ||
295 | __u64 rsvd2[2]; | ||
296 | __le64 prp1; | ||
297 | __le64 prp2; | ||
298 | __le32 cns; | ||
299 | __u32 rsvd11[5]; | ||
300 | }; | ||
301 | |||
302 | struct nvme_features { | ||
303 | __u8 opcode; | ||
304 | __u8 flags; | ||
305 | __u16 command_id; | ||
306 | __le32 nsid; | ||
307 | __u64 rsvd2[2]; | ||
308 | __le64 prp1; | ||
309 | __le64 prp2; | ||
310 | __le32 fid; | ||
311 | __le32 dword11; | ||
312 | __u32 rsvd12[4]; | ||
313 | }; | ||
314 | |||
315 | struct nvme_create_cq { | ||
316 | __u8 opcode; | ||
317 | __u8 flags; | ||
318 | __u16 command_id; | ||
319 | __u32 rsvd1[5]; | ||
320 | __le64 prp1; | ||
321 | __u64 rsvd8; | ||
322 | __le16 cqid; | ||
323 | __le16 qsize; | ||
324 | __le16 cq_flags; | ||
325 | __le16 irq_vector; | ||
326 | __u32 rsvd12[4]; | ||
327 | }; | ||
328 | |||
329 | struct nvme_create_sq { | ||
330 | __u8 opcode; | ||
331 | __u8 flags; | ||
332 | __u16 command_id; | ||
333 | __u32 rsvd1[5]; | ||
334 | __le64 prp1; | ||
335 | __u64 rsvd8; | ||
336 | __le16 sqid; | ||
337 | __le16 qsize; | ||
338 | __le16 sq_flags; | ||
339 | __le16 cqid; | ||
340 | __u32 rsvd12[4]; | ||
341 | }; | ||
342 | |||
343 | struct nvme_delete_queue { | ||
344 | __u8 opcode; | ||
345 | __u8 flags; | ||
346 | __u16 command_id; | ||
347 | __u32 rsvd1[9]; | ||
348 | __le16 qid; | ||
349 | __u16 rsvd10; | ||
350 | __u32 rsvd11[5]; | ||
351 | }; | ||
352 | |||
353 | struct nvme_download_firmware { | ||
354 | __u8 opcode; | ||
355 | __u8 flags; | ||
356 | __u16 command_id; | ||
357 | __u32 rsvd1[5]; | ||
358 | __le64 prp1; | ||
359 | __le64 prp2; | ||
360 | __le32 numd; | ||
361 | __le32 offset; | ||
362 | __u32 rsvd12[4]; | ||
363 | }; | ||
364 | |||
365 | struct nvme_format_cmd { | ||
366 | __u8 opcode; | ||
367 | __u8 flags; | ||
368 | __u16 command_id; | ||
369 | __le32 nsid; | ||
370 | __u64 rsvd2[4]; | ||
371 | __le32 cdw10; | ||
372 | __u32 rsvd11[5]; | ||
373 | }; | ||
374 | |||
375 | struct nvme_command { | ||
376 | union { | ||
377 | struct nvme_common_command common; | ||
378 | struct nvme_rw_command rw; | ||
379 | struct nvme_identify identify; | ||
380 | struct nvme_features features; | ||
381 | struct nvme_create_cq create_cq; | ||
382 | struct nvme_create_sq create_sq; | ||
383 | struct nvme_delete_queue delete_queue; | ||
384 | struct nvme_download_firmware dlfw; | ||
385 | struct nvme_format_cmd format; | ||
386 | struct nvme_dsm_cmd dsm; | ||
387 | }; | ||
388 | }; | ||
389 | |||
390 | enum { | ||
391 | NVME_SC_SUCCESS = 0x0, | ||
392 | NVME_SC_INVALID_OPCODE = 0x1, | ||
393 | NVME_SC_INVALID_FIELD = 0x2, | ||
394 | NVME_SC_CMDID_CONFLICT = 0x3, | ||
395 | NVME_SC_DATA_XFER_ERROR = 0x4, | ||
396 | NVME_SC_POWER_LOSS = 0x5, | ||
397 | NVME_SC_INTERNAL = 0x6, | ||
398 | NVME_SC_ABORT_REQ = 0x7, | ||
399 | NVME_SC_ABORT_QUEUE = 0x8, | ||
400 | NVME_SC_FUSED_FAIL = 0x9, | ||
401 | NVME_SC_FUSED_MISSING = 0xa, | ||
402 | NVME_SC_INVALID_NS = 0xb, | ||
403 | NVME_SC_CMD_SEQ_ERROR = 0xc, | ||
404 | NVME_SC_LBA_RANGE = 0x80, | ||
405 | NVME_SC_CAP_EXCEEDED = 0x81, | ||
406 | NVME_SC_NS_NOT_READY = 0x82, | ||
407 | NVME_SC_CQ_INVALID = 0x100, | ||
408 | NVME_SC_QID_INVALID = 0x101, | ||
409 | NVME_SC_QUEUE_SIZE = 0x102, | ||
410 | NVME_SC_ABORT_LIMIT = 0x103, | ||
411 | NVME_SC_ABORT_MISSING = 0x104, | ||
412 | NVME_SC_ASYNC_LIMIT = 0x105, | ||
413 | NVME_SC_FIRMWARE_SLOT = 0x106, | ||
414 | NVME_SC_FIRMWARE_IMAGE = 0x107, | ||
415 | NVME_SC_INVALID_VECTOR = 0x108, | ||
416 | NVME_SC_INVALID_LOG_PAGE = 0x109, | ||
417 | NVME_SC_INVALID_FORMAT = 0x10a, | ||
418 | NVME_SC_BAD_ATTRIBUTES = 0x180, | ||
419 | NVME_SC_WRITE_FAULT = 0x280, | ||
420 | NVME_SC_READ_ERROR = 0x281, | ||
421 | NVME_SC_GUARD_CHECK = 0x282, | ||
422 | NVME_SC_APPTAG_CHECK = 0x283, | ||
423 | NVME_SC_REFTAG_CHECK = 0x284, | ||
424 | NVME_SC_COMPARE_FAILED = 0x285, | ||
425 | NVME_SC_ACCESS_DENIED = 0x286, | ||
426 | }; | ||
427 | |||
428 | struct nvme_completion { | ||
429 | __le32 result; /* Used by admin commands to return data */ | ||
430 | __u32 rsvd; | ||
431 | __le16 sq_head; /* how much of this queue may be reclaimed */ | ||
432 | __le16 sq_id; /* submission queue that generated this entry */ | ||
433 | __u16 command_id; /* of the command which completed */ | ||
434 | __le16 status; /* did the command fail, and if so, why? */ | ||
435 | }; | ||
436 | |||
437 | struct nvme_user_io { | ||
438 | __u8 opcode; | ||
439 | __u8 flags; | ||
440 | __u16 control; | ||
441 | __u16 nblocks; | ||
442 | __u16 rsvd; | ||
443 | __u64 metadata; | ||
444 | __u64 addr; | ||
445 | __u64 slba; | ||
446 | __u32 dsmgmt; | ||
447 | __u32 reftag; | ||
448 | __u16 apptag; | ||
449 | __u16 appmask; | ||
450 | }; | ||
451 | |||
452 | struct nvme_admin_cmd { | ||
453 | __u8 opcode; | ||
454 | __u8 flags; | ||
455 | __u16 rsvd1; | ||
456 | __u32 nsid; | ||
457 | __u32 cdw2; | ||
458 | __u32 cdw3; | ||
459 | __u64 metadata; | ||
460 | __u64 addr; | ||
461 | __u32 metadata_len; | ||
462 | __u32 data_len; | ||
463 | __u32 cdw10; | ||
464 | __u32 cdw11; | ||
465 | __u32 cdw12; | ||
466 | __u32 cdw13; | ||
467 | __u32 cdw14; | ||
468 | __u32 cdw15; | ||
469 | __u32 timeout_ms; | ||
470 | __u32 result; | ||
471 | }; | ||
472 | |||
473 | #define NVME_IOCTL_ID _IO('N', 0x40) | ||
474 | #define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) | ||
475 | #define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) | ||
476 | |||
477 | #endif /* _UAPI_LINUX_NVME_H */ | ||