aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-07 23:19:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-07 23:19:02 -0400
commitb409624ad5a99c2e84df6657bd0f7931ac470d2d (patch)
treea4d2197ed560300b831504789744fd10a3c58039
parentc4c17252283a13c0d63a8d9df828da109c116411 (diff)
parentd82e8bfdef9afae83b894be49af4644d9ac3c359 (diff)
Merge git://git.infradead.org/users/willy/linux-nvme
Pull NVM Express driver update from Matthew Wilcox. * git://git.infradead.org/users/willy/linux-nvme: NVMe: Merge issue on character device bring-up NVMe: Handle ioremap failure NVMe: Add pci suspend/resume driver callbacks NVMe: Use normal shutdown NVMe: Separate controller init from disk discovery NVMe: Separate queue alloc/free from create/delete NVMe: Group pci related actions in functions NVMe: Disk stats for read/write commands only NVMe: Bring up cdev on set feature failure NVMe: Fix checkpatch issues NVMe: Namespace IDs are unsigned NVMe: Update nvme_id_power_state with latest spec NVMe: Split header file into user-visible and kernel-visible pieces NVMe: Call nvme_process_cq from submission path NVMe: Remove "process_cq did something" message NVMe: Return correct value from interrupt handler NVMe: Disk IO statistics NVMe: Restructure MSI / MSI-X setup NVMe: Use kzalloc instead of kmalloc+memset
-rw-r--r--drivers/block/nvme-core.c585
-rw-r--r--drivers/block/nvme-scsi.c24
-rw-r--r--include/linux/nvme.h466
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/nvme.h477
5 files changed, 895 insertions, 658 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index ce79a590b45b..da52092980e2 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -36,6 +36,7 @@
36#include <linux/moduleparam.h> 36#include <linux/moduleparam.h>
37#include <linux/pci.h> 37#include <linux/pci.h>
38#include <linux/poison.h> 38#include <linux/poison.h>
39#include <linux/ptrace.h>
39#include <linux/sched.h> 40#include <linux/sched.h>
40#include <linux/slab.h> 41#include <linux/slab.h>
41#include <linux/types.h> 42#include <linux/types.h>
@@ -79,7 +80,9 @@ struct nvme_queue {
79 u16 sq_head; 80 u16 sq_head;
80 u16 sq_tail; 81 u16 sq_tail;
81 u16 cq_head; 82 u16 cq_head;
82 u16 cq_phase; 83 u8 cq_phase;
84 u8 cqe_seen;
85 u8 q_suspended;
83 unsigned long cmdid_data[]; 86 unsigned long cmdid_data[];
84}; 87};
85 88
@@ -115,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
115 return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; 118 return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
116} 119}
117 120
121static unsigned nvme_queue_extra(int depth)
122{
123 return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
124}
125
118/** 126/**
119 * alloc_cmdid() - Allocate a Command ID 127 * alloc_cmdid() - Allocate a Command ID
120 * @nvmeq: The queue that will be used for this command 128 * @nvmeq: The queue that will be used for this command
@@ -285,6 +293,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
285 iod->npages = -1; 293 iod->npages = -1;
286 iod->length = nbytes; 294 iod->length = nbytes;
287 iod->nents = 0; 295 iod->nents = 0;
296 iod->start_time = jiffies;
288 } 297 }
289 298
290 return iod; 299 return iod;
@@ -308,6 +317,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
308 kfree(iod); 317 kfree(iod);
309} 318}
310 319
320static void nvme_start_io_acct(struct bio *bio)
321{
322 struct gendisk *disk = bio->bi_bdev->bd_disk;
323 const int rw = bio_data_dir(bio);
324 int cpu = part_stat_lock();
325 part_round_stats(cpu, &disk->part0);
326 part_stat_inc(cpu, &disk->part0, ios[rw]);
327 part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
328 part_inc_in_flight(&disk->part0, rw);
329 part_stat_unlock();
330}
331
332static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
333{
334 struct gendisk *disk = bio->bi_bdev->bd_disk;
335 const int rw = bio_data_dir(bio);
336 unsigned long duration = jiffies - start_time;
337 int cpu = part_stat_lock();
338 part_stat_add(cpu, &disk->part0, ticks[rw], duration);
339 part_round_stats(cpu, &disk->part0);
340 part_dec_in_flight(&disk->part0, rw);
341 part_stat_unlock();
342}
343
311static void bio_completion(struct nvme_dev *dev, void *ctx, 344static void bio_completion(struct nvme_dev *dev, void *ctx,
312 struct nvme_completion *cqe) 345 struct nvme_completion *cqe)
313{ 346{
@@ -315,9 +348,11 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
315 struct bio *bio = iod->private; 348 struct bio *bio = iod->private;
316 u16 status = le16_to_cpup(&cqe->status) >> 1; 349 u16 status = le16_to_cpup(&cqe->status) >> 1;
317 350
318 if (iod->nents) 351 if (iod->nents) {
319 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, 352 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
320 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 353 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
354 nvme_end_io_acct(bio, iod->start_time);
355 }
321 nvme_free_iod(dev, iod); 356 nvme_free_iod(dev, iod);
322 if (status) 357 if (status)
323 bio_endio(bio, -EIO); 358 bio_endio(bio, -EIO);
@@ -422,10 +457,8 @@ static void nvme_bio_pair_endio(struct bio *bio, int err)
422 457
423 if (atomic_dec_and_test(&bp->cnt)) { 458 if (atomic_dec_and_test(&bp->cnt)) {
424 bio_endio(bp->parent, bp->err); 459 bio_endio(bp->parent, bp->err);
425 if (bp->bv1) 460 kfree(bp->bv1);
426 kfree(bp->bv1); 461 kfree(bp->bv2);
427 if (bp->bv2)
428 kfree(bp->bv2);
429 kfree(bp); 462 kfree(bp);
430 } 463 }
431} 464}
@@ -695,6 +728,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
695 cmnd->rw.control = cpu_to_le16(control); 728 cmnd->rw.control = cpu_to_le16(control);
696 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 729 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
697 730
731 nvme_start_io_acct(bio);
698 if (++nvmeq->sq_tail == nvmeq->q_depth) 732 if (++nvmeq->sq_tail == nvmeq->q_depth)
699 nvmeq->sq_tail = 0; 733 nvmeq->sq_tail = 0;
700 writel(nvmeq->sq_tail, nvmeq->q_db); 734 writel(nvmeq->sq_tail, nvmeq->q_db);
@@ -709,26 +743,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
709 return result; 743 return result;
710} 744}
711 745
712static void nvme_make_request(struct request_queue *q, struct bio *bio) 746static int nvme_process_cq(struct nvme_queue *nvmeq)
713{
714 struct nvme_ns *ns = q->queuedata;
715 struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
716 int result = -EBUSY;
717
718 spin_lock_irq(&nvmeq->q_lock);
719 if (bio_list_empty(&nvmeq->sq_cong))
720 result = nvme_submit_bio_queue(nvmeq, ns, bio);
721 if (unlikely(result)) {
722 if (bio_list_empty(&nvmeq->sq_cong))
723 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
724 bio_list_add(&nvmeq->sq_cong, bio);
725 }
726
727 spin_unlock_irq(&nvmeq->q_lock);
728 put_nvmeq(nvmeq);
729}
730
731static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
732{ 747{
733 u16 head, phase; 748 u16 head, phase;
734 749
@@ -758,13 +773,40 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
758 * a big problem. 773 * a big problem.
759 */ 774 */
760 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) 775 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
761 return IRQ_NONE; 776 return 0;
762 777
763 writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); 778 writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
764 nvmeq->cq_head = head; 779 nvmeq->cq_head = head;
765 nvmeq->cq_phase = phase; 780 nvmeq->cq_phase = phase;
766 781
767 return IRQ_HANDLED; 782 nvmeq->cqe_seen = 1;
783 return 1;
784}
785
786static void nvme_make_request(struct request_queue *q, struct bio *bio)
787{
788 struct nvme_ns *ns = q->queuedata;
789 struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
790 int result = -EBUSY;
791
792 if (!nvmeq) {
793 put_nvmeq(NULL);
794 bio_endio(bio, -EIO);
795 return;
796 }
797
798 spin_lock_irq(&nvmeq->q_lock);
799 if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
800 result = nvme_submit_bio_queue(nvmeq, ns, bio);
801 if (unlikely(result)) {
802 if (bio_list_empty(&nvmeq->sq_cong))
803 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
804 bio_list_add(&nvmeq->sq_cong, bio);
805 }
806
807 nvme_process_cq(nvmeq);
808 spin_unlock_irq(&nvmeq->q_lock);
809 put_nvmeq(nvmeq);
768} 810}
769 811
770static irqreturn_t nvme_irq(int irq, void *data) 812static irqreturn_t nvme_irq(int irq, void *data)
@@ -772,7 +814,9 @@ static irqreturn_t nvme_irq(int irq, void *data)
772 irqreturn_t result; 814 irqreturn_t result;
773 struct nvme_queue *nvmeq = data; 815 struct nvme_queue *nvmeq = data;
774 spin_lock(&nvmeq->q_lock); 816 spin_lock(&nvmeq->q_lock);
775 result = nvme_process_cq(nvmeq); 817 nvme_process_cq(nvmeq);
818 result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
819 nvmeq->cqe_seen = 0;
776 spin_unlock(&nvmeq->q_lock); 820 spin_unlock(&nvmeq->q_lock);
777 return result; 821 return result;
778} 822}
@@ -986,8 +1030,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
986 } 1030 }
987} 1031}
988 1032
989static void nvme_free_queue_mem(struct nvme_queue *nvmeq) 1033static void nvme_free_queue(struct nvme_queue *nvmeq)
990{ 1034{
1035 spin_lock_irq(&nvmeq->q_lock);
1036 while (bio_list_peek(&nvmeq->sq_cong)) {
1037 struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
1038 bio_endio(bio, -EIO);
1039 }
1040 spin_unlock_irq(&nvmeq->q_lock);
1041
991 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 1042 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
992 (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 1043 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
993 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 1044 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
@@ -995,17 +1046,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
995 kfree(nvmeq); 1046 kfree(nvmeq);
996} 1047}
997 1048
998static void nvme_free_queue(struct nvme_dev *dev, int qid) 1049static void nvme_free_queues(struct nvme_dev *dev)
1050{
1051 int i;
1052
1053 for (i = dev->queue_count - 1; i >= 0; i--) {
1054 nvme_free_queue(dev->queues[i]);
1055 dev->queue_count--;
1056 dev->queues[i] = NULL;
1057 }
1058}
1059
1060static void nvme_disable_queue(struct nvme_dev *dev, int qid)
999{ 1061{
1000 struct nvme_queue *nvmeq = dev->queues[qid]; 1062 struct nvme_queue *nvmeq = dev->queues[qid];
1001 int vector = dev->entry[nvmeq->cq_vector].vector; 1063 int vector = dev->entry[nvmeq->cq_vector].vector;
1002 1064
1003 spin_lock_irq(&nvmeq->q_lock); 1065 spin_lock_irq(&nvmeq->q_lock);
1004 nvme_cancel_ios(nvmeq, false); 1066 if (nvmeq->q_suspended) {
1005 while (bio_list_peek(&nvmeq->sq_cong)) { 1067 spin_unlock_irq(&nvmeq->q_lock);
1006 struct bio *bio = bio_list_pop(&nvmeq->sq_cong); 1068 return;
1007 bio_endio(bio, -EIO);
1008 } 1069 }
1070 nvmeq->q_suspended = 1;
1009 spin_unlock_irq(&nvmeq->q_lock); 1071 spin_unlock_irq(&nvmeq->q_lock);
1010 1072
1011 irq_set_affinity_hint(vector, NULL); 1073 irq_set_affinity_hint(vector, NULL);
@@ -1017,15 +1079,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
1017 adapter_delete_cq(dev, qid); 1079 adapter_delete_cq(dev, qid);
1018 } 1080 }
1019 1081
1020 nvme_free_queue_mem(nvmeq); 1082 spin_lock_irq(&nvmeq->q_lock);
1083 nvme_process_cq(nvmeq);
1084 nvme_cancel_ios(nvmeq, false);
1085 spin_unlock_irq(&nvmeq->q_lock);
1021} 1086}
1022 1087
1023static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 1088static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1024 int depth, int vector) 1089 int depth, int vector)
1025{ 1090{
1026 struct device *dmadev = &dev->pci_dev->dev; 1091 struct device *dmadev = &dev->pci_dev->dev;
1027 unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * 1092 unsigned extra = nvme_queue_extra(depth);
1028 sizeof(struct nvme_cmd_info));
1029 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); 1093 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
1030 if (!nvmeq) 1094 if (!nvmeq)
1031 return NULL; 1095 return NULL;
@@ -1052,6 +1116,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1052 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; 1116 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
1053 nvmeq->q_depth = depth; 1117 nvmeq->q_depth = depth;
1054 nvmeq->cq_vector = vector; 1118 nvmeq->cq_vector = vector;
1119 nvmeq->q_suspended = 1;
1120 dev->queue_count++;
1055 1121
1056 return nvmeq; 1122 return nvmeq;
1057 1123
@@ -1075,18 +1141,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
1075 IRQF_DISABLED | IRQF_SHARED, name, nvmeq); 1141 IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
1076} 1142}
1077 1143
1078static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, 1144static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
1079 int cq_size, int vector)
1080{ 1145{
1081 int result; 1146 struct nvme_dev *dev = nvmeq->dev;
1082 struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); 1147 unsigned extra = nvme_queue_extra(nvmeq->q_depth);
1083 1148
1084 if (!nvmeq) 1149 nvmeq->sq_tail = 0;
1085 return ERR_PTR(-ENOMEM); 1150 nvmeq->cq_head = 0;
1151 nvmeq->cq_phase = 1;
1152 nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
1153 memset(nvmeq->cmdid_data, 0, extra);
1154 memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
1155 nvme_cancel_ios(nvmeq, false);
1156 nvmeq->q_suspended = 0;
1157}
1158
1159static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
1160{
1161 struct nvme_dev *dev = nvmeq->dev;
1162 int result;
1086 1163
1087 result = adapter_alloc_cq(dev, qid, nvmeq); 1164 result = adapter_alloc_cq(dev, qid, nvmeq);
1088 if (result < 0) 1165 if (result < 0)
1089 goto free_nvmeq; 1166 return result;
1090 1167
1091 result = adapter_alloc_sq(dev, qid, nvmeq); 1168 result = adapter_alloc_sq(dev, qid, nvmeq);
1092 if (result < 0) 1169 if (result < 0)
@@ -1096,19 +1173,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
1096 if (result < 0) 1173 if (result < 0)
1097 goto release_sq; 1174 goto release_sq;
1098 1175
1099 return nvmeq; 1176 spin_lock(&nvmeq->q_lock);
1177 nvme_init_queue(nvmeq, qid);
1178 spin_unlock(&nvmeq->q_lock);
1179
1180 return result;
1100 1181
1101 release_sq: 1182 release_sq:
1102 adapter_delete_sq(dev, qid); 1183 adapter_delete_sq(dev, qid);
1103 release_cq: 1184 release_cq:
1104 adapter_delete_cq(dev, qid); 1185 adapter_delete_cq(dev, qid);
1105 free_nvmeq: 1186 return result;
1106 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
1107 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
1108 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
1109 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
1110 kfree(nvmeq);
1111 return ERR_PTR(result);
1112} 1187}
1113 1188
1114static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) 1189static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
@@ -1152,6 +1227,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
1152 return nvme_wait_ready(dev, cap, true); 1227 return nvme_wait_ready(dev, cap, true);
1153} 1228}
1154 1229
1230static int nvme_shutdown_ctrl(struct nvme_dev *dev)
1231{
1232 unsigned long timeout;
1233 u32 cc;
1234
1235 cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL;
1236 writel(cc, &dev->bar->cc);
1237
1238 timeout = 2 * HZ + jiffies;
1239 while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
1240 NVME_CSTS_SHST_CMPLT) {
1241 msleep(100);
1242 if (fatal_signal_pending(current))
1243 return -EINTR;
1244 if (time_after(jiffies, timeout)) {
1245 dev_err(&dev->pci_dev->dev,
1246 "Device shutdown incomplete; abort shutdown\n");
1247 return -ENODEV;
1248 }
1249 }
1250
1251 return 0;
1252}
1253
1155static int nvme_configure_admin_queue(struct nvme_dev *dev) 1254static int nvme_configure_admin_queue(struct nvme_dev *dev)
1156{ 1255{
1157 int result; 1256 int result;
@@ -1159,16 +1258,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1159 u64 cap = readq(&dev->bar->cap); 1258 u64 cap = readq(&dev->bar->cap);
1160 struct nvme_queue *nvmeq; 1259 struct nvme_queue *nvmeq;
1161 1260
1162 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1163 dev->db_stride = NVME_CAP_STRIDE(cap);
1164
1165 result = nvme_disable_ctrl(dev, cap); 1261 result = nvme_disable_ctrl(dev, cap);
1166 if (result < 0) 1262 if (result < 0)
1167 return result; 1263 return result;
1168 1264
1169 nvmeq = nvme_alloc_queue(dev, 0, 64, 0); 1265 nvmeq = dev->queues[0];
1170 if (!nvmeq) 1266 if (!nvmeq) {
1171 return -ENOMEM; 1267 nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
1268 if (!nvmeq)
1269 return -ENOMEM;
1270 dev->queues[0] = nvmeq;
1271 }
1172 1272
1173 aqa = nvmeq->q_depth - 1; 1273 aqa = nvmeq->q_depth - 1;
1174 aqa |= aqa << 16; 1274 aqa |= aqa << 16;
@@ -1185,17 +1285,15 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1185 1285
1186 result = nvme_enable_ctrl(dev, cap); 1286 result = nvme_enable_ctrl(dev, cap);
1187 if (result) 1287 if (result)
1188 goto free_q; 1288 return result;
1189 1289
1190 result = queue_request_irq(dev, nvmeq, "nvme admin"); 1290 result = queue_request_irq(dev, nvmeq, "nvme admin");
1191 if (result) 1291 if (result)
1192 goto free_q; 1292 return result;
1193
1194 dev->queues[0] = nvmeq;
1195 return result;
1196 1293
1197 free_q: 1294 spin_lock(&nvmeq->q_lock);
1198 nvme_free_queue_mem(nvmeq); 1295 nvme_init_queue(nvmeq, 0);
1296 spin_unlock(&nvmeq->q_lock);
1199 return result; 1297 return result;
1200} 1298}
1201 1299
@@ -1314,7 +1412,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1314 c.rw.appmask = cpu_to_le16(io.appmask); 1412 c.rw.appmask = cpu_to_le16(io.appmask);
1315 1413
1316 if (meta_len) { 1414 if (meta_len) {
1317 meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, meta_len); 1415 meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
1416 meta_len);
1318 if (IS_ERR(meta_iod)) { 1417 if (IS_ERR(meta_iod)) {
1319 status = PTR_ERR(meta_iod); 1418 status = PTR_ERR(meta_iod);
1320 meta_iod = NULL; 1419 meta_iod = NULL;
@@ -1356,6 +1455,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1356 put_nvmeq(nvmeq); 1455 put_nvmeq(nvmeq);
1357 if (length != (io.nblocks + 1) << ns->lba_shift) 1456 if (length != (io.nblocks + 1) << ns->lba_shift)
1358 status = -ENOMEM; 1457 status = -ENOMEM;
1458 else if (!nvmeq || nvmeq->q_suspended)
1459 status = -EBUSY;
1359 else 1460 else
1360 status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); 1461 status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
1361 1462
@@ -1453,6 +1554,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
1453 1554
1454 switch (cmd) { 1555 switch (cmd) {
1455 case NVME_IOCTL_ID: 1556 case NVME_IOCTL_ID:
1557 force_successful_syscall_return();
1456 return ns->ns_id; 1558 return ns->ns_id;
1457 case NVME_IOCTL_ADMIN_CMD: 1559 case NVME_IOCTL_ADMIN_CMD:
1458 return nvme_user_admin_cmd(ns->dev, (void __user *)arg); 1560 return nvme_user_admin_cmd(ns->dev, (void __user *)arg);
@@ -1506,10 +1608,12 @@ static int nvme_kthread(void *data)
1506 if (!nvmeq) 1608 if (!nvmeq)
1507 continue; 1609 continue;
1508 spin_lock_irq(&nvmeq->q_lock); 1610 spin_lock_irq(&nvmeq->q_lock);
1509 if (nvme_process_cq(nvmeq)) 1611 if (nvmeq->q_suspended)
1510 printk("process_cq did something\n"); 1612 goto unlock;
1613 nvme_process_cq(nvmeq);
1511 nvme_cancel_ios(nvmeq, true); 1614 nvme_cancel_ios(nvmeq, true);
1512 nvme_resubmit_bios(nvmeq); 1615 nvme_resubmit_bios(nvmeq);
1616 unlock:
1513 spin_unlock_irq(&nvmeq->q_lock); 1617 spin_unlock_irq(&nvmeq->q_lock);
1514 } 1618 }
1515 } 1619 }
@@ -1556,7 +1660,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
1556 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 1660 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
1557} 1661}
1558 1662
1559static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, 1663static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
1560 struct nvme_id_ns *id, struct nvme_lba_range_type *rt) 1664 struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
1561{ 1665{
1562 struct nvme_ns *ns; 1666 struct nvme_ns *ns;
@@ -1631,14 +1735,19 @@ static int set_queue_count(struct nvme_dev *dev, int count)
1631 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, 1735 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
1632 &result); 1736 &result);
1633 if (status) 1737 if (status)
1634 return -EIO; 1738 return status < 0 ? -EIO : -EBUSY;
1635 return min(result & 0xffff, result >> 16) + 1; 1739 return min(result & 0xffff, result >> 16) + 1;
1636} 1740}
1637 1741
1742static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
1743{
1744 return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
1745}
1746
1638static int nvme_setup_io_queues(struct nvme_dev *dev) 1747static int nvme_setup_io_queues(struct nvme_dev *dev)
1639{ 1748{
1640 struct pci_dev *pdev = dev->pci_dev; 1749 struct pci_dev *pdev = dev->pci_dev;
1641 int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count; 1750 int result, cpu, i, vecs, nr_io_queues, size, q_depth;
1642 1751
1643 nr_io_queues = num_online_cpus(); 1752 nr_io_queues = num_online_cpus();
1644 result = set_queue_count(dev, nr_io_queues); 1753 result = set_queue_count(dev, nr_io_queues);
@@ -1647,53 +1756,80 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1647 if (result < nr_io_queues) 1756 if (result < nr_io_queues)
1648 nr_io_queues = result; 1757 nr_io_queues = result;
1649 1758
1650 q_count = nr_io_queues; 1759 size = db_bar_size(dev, nr_io_queues);
1651 /* Deregister the admin queue's interrupt */ 1760 if (size > 8192) {
1652 free_irq(dev->entry[0].vector, dev->queues[0]);
1653
1654 db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
1655 if (db_bar_size > 8192) {
1656 iounmap(dev->bar); 1761 iounmap(dev->bar);
1657 dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size); 1762 do {
1763 dev->bar = ioremap(pci_resource_start(pdev, 0), size);
1764 if (dev->bar)
1765 break;
1766 if (!--nr_io_queues)
1767 return -ENOMEM;
1768 size = db_bar_size(dev, nr_io_queues);
1769 } while (1);
1658 dev->dbs = ((void __iomem *)dev->bar) + 4096; 1770 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1659 dev->queues[0]->q_db = dev->dbs; 1771 dev->queues[0]->q_db = dev->dbs;
1660 } 1772 }
1661 1773
1662 for (i = 0; i < nr_io_queues; i++) 1774 /* Deregister the admin queue's interrupt */
1775 free_irq(dev->entry[0].vector, dev->queues[0]);
1776
1777 vecs = nr_io_queues;
1778 for (i = 0; i < vecs; i++)
1663 dev->entry[i].entry = i; 1779 dev->entry[i].entry = i;
1664 for (;;) { 1780 for (;;) {
1665 result = pci_enable_msix(pdev, dev->entry, nr_io_queues); 1781 result = pci_enable_msix(pdev, dev->entry, vecs);
1666 if (result == 0) { 1782 if (result <= 0)
1667 break;
1668 } else if (result > 0) {
1669 nr_io_queues = result;
1670 continue;
1671 } else {
1672 nr_io_queues = 0;
1673 break; 1783 break;
1674 } 1784 vecs = result;
1675 } 1785 }
1676 1786
1677 if (nr_io_queues == 0) { 1787 if (result < 0) {
1678 nr_io_queues = q_count; 1788 vecs = nr_io_queues;
1789 if (vecs > 32)
1790 vecs = 32;
1679 for (;;) { 1791 for (;;) {
1680 result = pci_enable_msi_block(pdev, nr_io_queues); 1792 result = pci_enable_msi_block(pdev, vecs);
1681 if (result == 0) { 1793 if (result == 0) {
1682 for (i = 0; i < nr_io_queues; i++) 1794 for (i = 0; i < vecs; i++)
1683 dev->entry[i].vector = i + pdev->irq; 1795 dev->entry[i].vector = i + pdev->irq;
1684 break; 1796 break;
1685 } else if (result > 0) { 1797 } else if (result < 0) {
1686 nr_io_queues = result; 1798 vecs = 1;
1687 continue;
1688 } else {
1689 nr_io_queues = 1;
1690 break; 1799 break;
1691 } 1800 }
1801 vecs = result;
1692 } 1802 }
1693 } 1803 }
1694 1804
1805 /*
1806 * Should investigate if there's a performance win from allocating
1807 * more queues than interrupt vectors; it might allow the submission
1808 * path to scale better, even if the receive path is limited by the
1809 * number of interrupts.
1810 */
1811 nr_io_queues = vecs;
1812
1695 result = queue_request_irq(dev, dev->queues[0], "nvme admin"); 1813 result = queue_request_irq(dev, dev->queues[0], "nvme admin");
1696 /* XXX: handle failure here */ 1814 if (result) {
1815 dev->queues[0]->q_suspended = 1;
1816 goto free_queues;
1817 }
1818
1819 /* Free previously allocated queues that are no longer usable */
1820 spin_lock(&dev_list_lock);
1821 for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
1822 struct nvme_queue *nvmeq = dev->queues[i];
1823
1824 spin_lock(&nvmeq->q_lock);
1825 nvme_cancel_ios(nvmeq, false);
1826 spin_unlock(&nvmeq->q_lock);
1827
1828 nvme_free_queue(nvmeq);
1829 dev->queue_count--;
1830 dev->queues[i] = NULL;
1831 }
1832 spin_unlock(&dev_list_lock);
1697 1833
1698 cpu = cpumask_first(cpu_online_mask); 1834 cpu = cpumask_first(cpu_online_mask);
1699 for (i = 0; i < nr_io_queues; i++) { 1835 for (i = 0; i < nr_io_queues; i++) {
@@ -1703,11 +1839,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1703 1839
1704 q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, 1840 q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
1705 NVME_Q_DEPTH); 1841 NVME_Q_DEPTH);
1706 for (i = 0; i < nr_io_queues; i++) { 1842 for (i = dev->queue_count - 1; i < nr_io_queues; i++) {
1707 dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); 1843 dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
1708 if (IS_ERR(dev->queues[i + 1])) 1844 if (!dev->queues[i + 1]) {
1709 return PTR_ERR(dev->queues[i + 1]); 1845 result = -ENOMEM;
1710 dev->queue_count++; 1846 goto free_queues;
1847 }
1711 } 1848 }
1712 1849
1713 for (; i < num_possible_cpus(); i++) { 1850 for (; i < num_possible_cpus(); i++) {
@@ -1715,15 +1852,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1715 dev->queues[i + 1] = dev->queues[target + 1]; 1852 dev->queues[i + 1] = dev->queues[target + 1];
1716 } 1853 }
1717 1854
1718 return 0; 1855 for (i = 1; i < dev->queue_count; i++) {
1719} 1856 result = nvme_create_queue(dev->queues[i], i);
1857 if (result) {
1858 for (--i; i > 0; i--)
1859 nvme_disable_queue(dev, i);
1860 goto free_queues;
1861 }
1862 }
1720 1863
1721static void nvme_free_queues(struct nvme_dev *dev) 1864 return 0;
1722{
1723 int i;
1724 1865
1725 for (i = dev->queue_count - 1; i >= 0; i--) 1866 free_queues:
1726 nvme_free_queue(dev, i); 1867 nvme_free_queues(dev);
1868 return result;
1727} 1869}
1728 1870
1729/* 1871/*
@@ -1734,7 +1876,8 @@ static void nvme_free_queues(struct nvme_dev *dev)
1734 */ 1876 */
1735static int nvme_dev_add(struct nvme_dev *dev) 1877static int nvme_dev_add(struct nvme_dev *dev)
1736{ 1878{
1737 int res, nn, i; 1879 int res;
1880 unsigned nn, i;
1738 struct nvme_ns *ns; 1881 struct nvme_ns *ns;
1739 struct nvme_id_ctrl *ctrl; 1882 struct nvme_id_ctrl *ctrl;
1740 struct nvme_id_ns *id_ns; 1883 struct nvme_id_ns *id_ns;
@@ -1742,10 +1885,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
1742 dma_addr_t dma_addr; 1885 dma_addr_t dma_addr;
1743 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; 1886 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
1744 1887
1745 res = nvme_setup_io_queues(dev);
1746 if (res)
1747 return res;
1748
1749 mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, 1888 mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
1750 GFP_KERNEL); 1889 GFP_KERNEL);
1751 if (!mem) 1890 if (!mem)
@@ -1796,23 +1935,86 @@ static int nvme_dev_add(struct nvme_dev *dev)
1796 return res; 1935 return res;
1797} 1936}
1798 1937
1799static int nvme_dev_remove(struct nvme_dev *dev) 1938static int nvme_dev_map(struct nvme_dev *dev)
1800{ 1939{
1801 struct nvme_ns *ns, *next; 1940 int bars, result = -ENOMEM;
1941 struct pci_dev *pdev = dev->pci_dev;
1942
1943 if (pci_enable_device_mem(pdev))
1944 return result;
1945
1946 dev->entry[0].vector = pdev->irq;
1947 pci_set_master(pdev);
1948 bars = pci_select_bars(pdev, IORESOURCE_MEM);
1949 if (pci_request_selected_regions(pdev, bars, "nvme"))
1950 goto disable_pci;
1951
1952 if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
1953 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1954 else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
1955 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1956 else
1957 goto disable_pci;
1958
1959 pci_set_drvdata(pdev, dev);
1960 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
1961 if (!dev->bar)
1962 goto disable;
1963
1964 dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
1965 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1966
1967 return 0;
1968
1969 disable:
1970 pci_release_regions(pdev);
1971 disable_pci:
1972 pci_disable_device(pdev);
1973 return result;
1974}
1975
1976static void nvme_dev_unmap(struct nvme_dev *dev)
1977{
1978 if (dev->pci_dev->msi_enabled)
1979 pci_disable_msi(dev->pci_dev);
1980 else if (dev->pci_dev->msix_enabled)
1981 pci_disable_msix(dev->pci_dev);
1982
1983 if (dev->bar) {
1984 iounmap(dev->bar);
1985 dev->bar = NULL;
1986 }
1987
1988 pci_release_regions(dev->pci_dev);
1989 if (pci_is_enabled(dev->pci_dev))
1990 pci_disable_device(dev->pci_dev);
1991}
1992
1993static void nvme_dev_shutdown(struct nvme_dev *dev)
1994{
1995 int i;
1996
1997 for (i = dev->queue_count - 1; i >= 0; i--)
1998 nvme_disable_queue(dev, i);
1802 1999
1803 spin_lock(&dev_list_lock); 2000 spin_lock(&dev_list_lock);
1804 list_del(&dev->node); 2001 list_del_init(&dev->node);
1805 spin_unlock(&dev_list_lock); 2002 spin_unlock(&dev_list_lock);
1806 2003
2004 if (dev->bar)
2005 nvme_shutdown_ctrl(dev);
2006 nvme_dev_unmap(dev);
2007}
2008
2009static void nvme_dev_remove(struct nvme_dev *dev)
2010{
2011 struct nvme_ns *ns, *next;
2012
1807 list_for_each_entry_safe(ns, next, &dev->namespaces, list) { 2013 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1808 list_del(&ns->list); 2014 list_del(&ns->list);
1809 del_gendisk(ns->disk); 2015 del_gendisk(ns->disk);
1810 nvme_ns_free(ns); 2016 nvme_ns_free(ns);
1811 } 2017 }
1812
1813 nvme_free_queues(dev);
1814
1815 return 0;
1816} 2018}
1817 2019
1818static int nvme_setup_prp_pools(struct nvme_dev *dev) 2020static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -1872,15 +2074,10 @@ static void nvme_free_dev(struct kref *kref)
1872{ 2074{
1873 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); 2075 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
1874 nvme_dev_remove(dev); 2076 nvme_dev_remove(dev);
1875 if (dev->pci_dev->msi_enabled) 2077 nvme_dev_shutdown(dev);
1876 pci_disable_msi(dev->pci_dev); 2078 nvme_free_queues(dev);
1877 else if (dev->pci_dev->msix_enabled)
1878 pci_disable_msix(dev->pci_dev);
1879 iounmap(dev->bar);
1880 nvme_release_instance(dev); 2079 nvme_release_instance(dev);
1881 nvme_release_prp_pools(dev); 2080 nvme_release_prp_pools(dev);
1882 pci_disable_device(dev->pci_dev);
1883 pci_release_regions(dev->pci_dev);
1884 kfree(dev->queues); 2081 kfree(dev->queues);
1885 kfree(dev->entry); 2082 kfree(dev->entry);
1886 kfree(dev); 2083 kfree(dev);
@@ -1921,9 +2118,40 @@ static const struct file_operations nvme_dev_fops = {
1921 .compat_ioctl = nvme_dev_ioctl, 2118 .compat_ioctl = nvme_dev_ioctl,
1922}; 2119};
1923 2120
2121static int nvme_dev_start(struct nvme_dev *dev)
2122{
2123 int result;
2124
2125 result = nvme_dev_map(dev);
2126 if (result)
2127 return result;
2128
2129 result = nvme_configure_admin_queue(dev);
2130 if (result)
2131 goto unmap;
2132
2133 spin_lock(&dev_list_lock);
2134 list_add(&dev->node, &dev_list);
2135 spin_unlock(&dev_list_lock);
2136
2137 result = nvme_setup_io_queues(dev);
2138 if (result && result != -EBUSY)
2139 goto disable;
2140
2141 return result;
2142
2143 disable:
2144 spin_lock(&dev_list_lock);
2145 list_del_init(&dev->node);
2146 spin_unlock(&dev_list_lock);
2147 unmap:
2148 nvme_dev_unmap(dev);
2149 return result;
2150}
2151
1924static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) 2152static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1925{ 2153{
1926 int bars, result = -ENOMEM; 2154 int result = -ENOMEM;
1927 struct nvme_dev *dev; 2155 struct nvme_dev *dev;
1928 2156
1929 dev = kzalloc(sizeof(*dev), GFP_KERNEL); 2157 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -1938,53 +2166,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1938 if (!dev->queues) 2166 if (!dev->queues)
1939 goto free; 2167 goto free;
1940 2168
1941 if (pci_enable_device_mem(pdev))
1942 goto free;
1943 pci_set_master(pdev);
1944 bars = pci_select_bars(pdev, IORESOURCE_MEM);
1945 if (pci_request_selected_regions(pdev, bars, "nvme"))
1946 goto disable;
1947
1948 INIT_LIST_HEAD(&dev->namespaces); 2169 INIT_LIST_HEAD(&dev->namespaces);
1949 dev->pci_dev = pdev; 2170 dev->pci_dev = pdev;
1950 pci_set_drvdata(pdev, dev);
1951
1952 if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
1953 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1954 else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
1955 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1956 else
1957 goto disable;
1958
1959 result = nvme_set_instance(dev); 2171 result = nvme_set_instance(dev);
1960 if (result) 2172 if (result)
1961 goto disable; 2173 goto free;
1962
1963 dev->entry[0].vector = pdev->irq;
1964 2174
1965 result = nvme_setup_prp_pools(dev); 2175 result = nvme_setup_prp_pools(dev);
1966 if (result) 2176 if (result)
1967 goto disable_msix; 2177 goto release;
1968 2178
1969 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); 2179 result = nvme_dev_start(dev);
1970 if (!dev->bar) { 2180 if (result) {
1971 result = -ENOMEM; 2181 if (result == -EBUSY)
1972 goto disable_msix; 2182 goto create_cdev;
2183 goto release_pools;
1973 } 2184 }
1974 2185
1975 result = nvme_configure_admin_queue(dev);
1976 if (result)
1977 goto unmap;
1978 dev->queue_count++;
1979
1980 spin_lock(&dev_list_lock);
1981 list_add(&dev->node, &dev_list);
1982 spin_unlock(&dev_list_lock);
1983
1984 result = nvme_dev_add(dev); 2186 result = nvme_dev_add(dev);
1985 if (result) 2187 if (result)
1986 goto delete; 2188 goto shutdown;
1987 2189
2190 create_cdev:
1988 scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); 2191 scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
1989 dev->miscdev.minor = MISC_DYNAMIC_MINOR; 2192 dev->miscdev.minor = MISC_DYNAMIC_MINOR;
1990 dev->miscdev.parent = &pdev->dev; 2193 dev->miscdev.parent = &pdev->dev;
@@ -1999,24 +2202,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1999 2202
2000 remove: 2203 remove:
2001 nvme_dev_remove(dev); 2204 nvme_dev_remove(dev);
2002 delete: 2205 shutdown:
2003 spin_lock(&dev_list_lock); 2206 nvme_dev_shutdown(dev);
2004 list_del(&dev->node); 2207 release_pools:
2005 spin_unlock(&dev_list_lock);
2006
2007 nvme_free_queues(dev); 2208 nvme_free_queues(dev);
2008 unmap:
2009 iounmap(dev->bar);
2010 disable_msix:
2011 if (dev->pci_dev->msi_enabled)
2012 pci_disable_msi(dev->pci_dev);
2013 else if (dev->pci_dev->msix_enabled)
2014 pci_disable_msix(dev->pci_dev);
2015 nvme_release_instance(dev);
2016 nvme_release_prp_pools(dev); 2209 nvme_release_prp_pools(dev);
2017 disable: 2210 release:
2018 pci_disable_device(pdev); 2211 nvme_release_instance(dev);
2019 pci_release_regions(pdev);
2020 free: 2212 free:
2021 kfree(dev->queues); 2213 kfree(dev->queues);
2022 kfree(dev->entry); 2214 kfree(dev->entry);
@@ -2037,8 +2229,30 @@ static void nvme_remove(struct pci_dev *pdev)
2037#define nvme_link_reset NULL 2229#define nvme_link_reset NULL
2038#define nvme_slot_reset NULL 2230#define nvme_slot_reset NULL
2039#define nvme_error_resume NULL 2231#define nvme_error_resume NULL
2040#define nvme_suspend NULL 2232
2041#define nvme_resume NULL 2233static int nvme_suspend(struct device *dev)
2234{
2235 struct pci_dev *pdev = to_pci_dev(dev);
2236 struct nvme_dev *ndev = pci_get_drvdata(pdev);
2237
2238 nvme_dev_shutdown(ndev);
2239 return 0;
2240}
2241
2242static int nvme_resume(struct device *dev)
2243{
2244 struct pci_dev *pdev = to_pci_dev(dev);
2245 struct nvme_dev *ndev = pci_get_drvdata(pdev);
2246 int ret;
2247
2248 ret = nvme_dev_start(ndev);
2249 /* XXX: should remove gendisks if resume fails */
2250 if (ret)
2251 nvme_free_queues(ndev);
2252 return ret;
2253}
2254
2255static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
2042 2256
2043static const struct pci_error_handlers nvme_err_handler = { 2257static const struct pci_error_handlers nvme_err_handler = {
2044 .error_detected = nvme_error_detected, 2258 .error_detected = nvme_error_detected,
@@ -2062,8 +2276,9 @@ static struct pci_driver nvme_driver = {
2062 .id_table = nvme_id_table, 2276 .id_table = nvme_id_table,
2063 .probe = nvme_probe, 2277 .probe = nvme_probe,
2064 .remove = nvme_remove, 2278 .remove = nvme_remove,
2065 .suspend = nvme_suspend, 2279 .driver = {
2066 .resume = nvme_resume, 2280 .pm = &nvme_dev_pm_ops,
2281 },
2067 .err_handler = &nvme_err_handler, 2282 .err_handler = &nvme_err_handler,
2068}; 2283};
2069 2284
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 102de2f52b5c..4a4ff4eb8e23 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -933,13 +933,12 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
933 int res = SNTI_TRANSLATION_SUCCESS; 933 int res = SNTI_TRANSLATION_SUCCESS;
934 int xfer_len; 934 int xfer_len;
935 935
936 inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); 936 inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
937 if (inq_response == NULL) { 937 if (inq_response == NULL) {
938 res = -ENOMEM; 938 res = -ENOMEM;
939 goto out_mem; 939 goto out_mem;
940 } 940 }
941 941
942 memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
943 inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ 942 inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
944 inq_response[2] = 0x00; /* Page Length MSB */ 943 inq_response[2] = 0x00; /* Page Length MSB */
945 inq_response[3] = 0x3C; /* Page Length LSB */ 944 inq_response[3] = 0x3C; /* Page Length LSB */
@@ -964,12 +963,11 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
964 int xfer_len; 963 int xfer_len;
965 u8 *log_response; 964 u8 *log_response;
966 965
967 log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); 966 log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
968 if (log_response == NULL) { 967 if (log_response == NULL) {
969 res = -ENOMEM; 968 res = -ENOMEM;
970 goto out_mem; 969 goto out_mem;
971 } 970 }
972 memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
973 971
974 log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; 972 log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
975 /* Subpage=0x00, Page Length MSB=0 */ 973 /* Subpage=0x00, Page Length MSB=0 */
@@ -1000,12 +998,11 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
1000 u8 temp_c; 998 u8 temp_c;
1001 u16 temp_k; 999 u16 temp_k;
1002 1000
1003 log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); 1001 log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
1004 if (log_response == NULL) { 1002 if (log_response == NULL) {
1005 res = -ENOMEM; 1003 res = -ENOMEM;
1006 goto out_mem; 1004 goto out_mem;
1007 } 1005 }
1008 memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH);
1009 1006
1010 mem = dma_alloc_coherent(&dev->pci_dev->dev, 1007 mem = dma_alloc_coherent(&dev->pci_dev->dev,
1011 sizeof(struct nvme_smart_log), 1008 sizeof(struct nvme_smart_log),
@@ -1069,12 +1066,11 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1069 u8 temp_c_cur, temp_c_thresh; 1066 u8 temp_c_cur, temp_c_thresh;
1070 u16 temp_k; 1067 u16 temp_k;
1071 1068
1072 log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); 1069 log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
1073 if (log_response == NULL) { 1070 if (log_response == NULL) {
1074 res = -ENOMEM; 1071 res = -ENOMEM;
1075 goto out_mem; 1072 goto out_mem;
1076 } 1073 }
1077 memset(log_response, 0, LOG_TEMP_PAGE_LENGTH);
1078 1074
1079 mem = dma_alloc_coherent(&dev->pci_dev->dev, 1075 mem = dma_alloc_coherent(&dev->pci_dev->dev,
1080 sizeof(struct nvme_smart_log), 1076 sizeof(struct nvme_smart_log),
@@ -1380,12 +1376,11 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
1380 blk_desc_offset = mph_size; 1376 blk_desc_offset = mph_size;
1381 mode_pages_offset_1 = blk_desc_offset + blk_desc_len; 1377 mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
1382 1378
1383 response = kmalloc(resp_size, GFP_KERNEL); 1379 response = kzalloc(resp_size, GFP_KERNEL);
1384 if (response == NULL) { 1380 if (response == NULL) {
1385 res = -ENOMEM; 1381 res = -ENOMEM;
1386 goto out_mem; 1382 goto out_mem;
1387 } 1383 }
1388 memset(response, 0, resp_size);
1389 1384
1390 res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, 1385 res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
1391 llbaa, mode_data_length, blk_desc_len); 1386 llbaa, mode_data_length, blk_desc_len);
@@ -2480,12 +2475,11 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2480 } 2475 }
2481 id_ns = mem; 2476 id_ns = mem;
2482 2477
2483 response = kmalloc(resp_size, GFP_KERNEL); 2478 response = kzalloc(resp_size, GFP_KERNEL);
2484 if (response == NULL) { 2479 if (response == NULL) {
2485 res = -ENOMEM; 2480 res = -ENOMEM;
2486 goto out_dma; 2481 goto out_dma;
2487 } 2482 }
2488 memset(response, 0, resp_size);
2489 nvme_trans_fill_read_cap(response, id_ns, cdb16); 2483 nvme_trans_fill_read_cap(response, id_ns, cdb16);
2490 2484
2491 xfer_len = min(alloc_len, resp_size); 2485 xfer_len = min(alloc_len, resp_size);
@@ -2554,12 +2548,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2554 goto out_dma; 2548 goto out_dma;
2555 } 2549 }
2556 2550
2557 response = kmalloc(resp_size, GFP_KERNEL); 2551 response = kzalloc(resp_size, GFP_KERNEL);
2558 if (response == NULL) { 2552 if (response == NULL) {
2559 res = -ENOMEM; 2553 res = -ENOMEM;
2560 goto out_dma; 2554 goto out_dma;
2561 } 2555 }
2562 memset(response, 0, resp_size);
2563 2556
2564 /* The first LUN ID will always be 0 per the SAM spec */ 2557 /* The first LUN ID will always be 0 per the SAM spec */
2565 for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) { 2558 for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
@@ -2600,12 +2593,11 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2600 2593
2601 resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : 2594 resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
2602 (FIXED_FMT_SENSE_DATA_SIZE)); 2595 (FIXED_FMT_SENSE_DATA_SIZE));
2603 response = kmalloc(resp_size, GFP_KERNEL); 2596 response = kzalloc(resp_size, GFP_KERNEL);
2604 if (response == NULL) { 2597 if (response == NULL) {
2605 res = -ENOMEM; 2598 res = -ENOMEM;
2606 goto out; 2599 goto out;
2607 } 2600 }
2608 memset(response, 0, resp_size);
2609 2601
2610 if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { 2602 if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
2611 /* Descriptor Format Sense Data */ 2603 /* Descriptor Format Sense Data */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index f451c8d6e231..26ebcf41c213 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Definitions for the NVM Express interface 2 * Definitions for the NVM Express interface
3 * Copyright (c) 2011, Intel Corporation. 3 * Copyright (c) 2011-2013, Intel Corporation.
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License, 6 * under the terms and conditions of the GNU General Public License,
@@ -19,7 +19,10 @@
19#ifndef _LINUX_NVME_H 19#ifndef _LINUX_NVME_H
20#define _LINUX_NVME_H 20#define _LINUX_NVME_H
21 21
22#include <linux/types.h> 22#include <uapi/linux/nvme.h>
23#include <linux/pci.h>
24#include <linux/miscdevice.h>
25#include <linux/kref.h>
23 26
24struct nvme_bar { 27struct nvme_bar {
25 __u64 cap; /* Controller Capabilities */ 28 __u64 cap; /* Controller Capabilities */
@@ -50,6 +53,7 @@ enum {
50 NVME_CC_SHN_NONE = 0 << 14, 53 NVME_CC_SHN_NONE = 0 << 14,
51 NVME_CC_SHN_NORMAL = 1 << 14, 54 NVME_CC_SHN_NORMAL = 1 << 14,
52 NVME_CC_SHN_ABRUPT = 2 << 14, 55 NVME_CC_SHN_ABRUPT = 2 << 14,
56 NVME_CC_SHN_MASK = 3 << 14,
53 NVME_CC_IOSQES = 6 << 16, 57 NVME_CC_IOSQES = 6 << 16,
54 NVME_CC_IOCQES = 4 << 20, 58 NVME_CC_IOCQES = 4 << 20,
55 NVME_CSTS_RDY = 1 << 0, 59 NVME_CSTS_RDY = 1 << 0,
@@ -57,462 +61,11 @@ enum {
57 NVME_CSTS_SHST_NORMAL = 0 << 2, 61 NVME_CSTS_SHST_NORMAL = 0 << 2,
58 NVME_CSTS_SHST_OCCUR = 1 << 2, 62 NVME_CSTS_SHST_OCCUR = 1 << 2,
59 NVME_CSTS_SHST_CMPLT = 2 << 2, 63 NVME_CSTS_SHST_CMPLT = 2 << 2,
60}; 64 NVME_CSTS_SHST_MASK = 3 << 2,
61
62struct nvme_id_power_state {
63 __le16 max_power; /* centiwatts */
64 __u16 rsvd2;
65 __le32 entry_lat; /* microseconds */
66 __le32 exit_lat; /* microseconds */
67 __u8 read_tput;
68 __u8 read_lat;
69 __u8 write_tput;
70 __u8 write_lat;
71 __u8 rsvd16[16];
72}; 65};
73 66
74#define NVME_VS(major, minor) (major << 16 | minor) 67#define NVME_VS(major, minor) (major << 16 | minor)
75 68
76struct nvme_id_ctrl {
77 __le16 vid;
78 __le16 ssvid;
79 char sn[20];
80 char mn[40];
81 char fr[8];
82 __u8 rab;
83 __u8 ieee[3];
84 __u8 mic;
85 __u8 mdts;
86 __u8 rsvd78[178];
87 __le16 oacs;
88 __u8 acl;
89 __u8 aerl;
90 __u8 frmw;
91 __u8 lpa;
92 __u8 elpe;
93 __u8 npss;
94 __u8 rsvd264[248];
95 __u8 sqes;
96 __u8 cqes;
97 __u8 rsvd514[2];
98 __le32 nn;
99 __le16 oncs;
100 __le16 fuses;
101 __u8 fna;
102 __u8 vwc;
103 __le16 awun;
104 __le16 awupf;
105 __u8 rsvd530[1518];
106 struct nvme_id_power_state psd[32];
107 __u8 vs[1024];
108};
109
110enum {
111 NVME_CTRL_ONCS_COMPARE = 1 << 0,
112 NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
113 NVME_CTRL_ONCS_DSM = 1 << 2,
114};
115
116struct nvme_lbaf {
117 __le16 ms;
118 __u8 ds;
119 __u8 rp;
120};
121
122struct nvme_id_ns {
123 __le64 nsze;
124 __le64 ncap;
125 __le64 nuse;
126 __u8 nsfeat;
127 __u8 nlbaf;
128 __u8 flbas;
129 __u8 mc;
130 __u8 dpc;
131 __u8 dps;
132 __u8 rsvd30[98];
133 struct nvme_lbaf lbaf[16];
134 __u8 rsvd192[192];
135 __u8 vs[3712];
136};
137
138enum {
139 NVME_NS_FEAT_THIN = 1 << 0,
140 NVME_LBAF_RP_BEST = 0,
141 NVME_LBAF_RP_BETTER = 1,
142 NVME_LBAF_RP_GOOD = 2,
143 NVME_LBAF_RP_DEGRADED = 3,
144};
145
146struct nvme_smart_log {
147 __u8 critical_warning;
148 __u8 temperature[2];
149 __u8 avail_spare;
150 __u8 spare_thresh;
151 __u8 percent_used;
152 __u8 rsvd6[26];
153 __u8 data_units_read[16];
154 __u8 data_units_written[16];
155 __u8 host_reads[16];
156 __u8 host_writes[16];
157 __u8 ctrl_busy_time[16];
158 __u8 power_cycles[16];
159 __u8 power_on_hours[16];
160 __u8 unsafe_shutdowns[16];
161 __u8 media_errors[16];
162 __u8 num_err_log_entries[16];
163 __u8 rsvd192[320];
164};
165
166enum {
167 NVME_SMART_CRIT_SPARE = 1 << 0,
168 NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
169 NVME_SMART_CRIT_RELIABILITY = 1 << 2,
170 NVME_SMART_CRIT_MEDIA = 1 << 3,
171 NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
172};
173
174struct nvme_lba_range_type {
175 __u8 type;
176 __u8 attributes;
177 __u8 rsvd2[14];
178 __u64 slba;
179 __u64 nlb;
180 __u8 guid[16];
181 __u8 rsvd48[16];
182};
183
184enum {
185 NVME_LBART_TYPE_FS = 0x01,
186 NVME_LBART_TYPE_RAID = 0x02,
187 NVME_LBART_TYPE_CACHE = 0x03,
188 NVME_LBART_TYPE_SWAP = 0x04,
189
190 NVME_LBART_ATTRIB_TEMP = 1 << 0,
191 NVME_LBART_ATTRIB_HIDE = 1 << 1,
192};
193
194/* I/O commands */
195
196enum nvme_opcode {
197 nvme_cmd_flush = 0x00,
198 nvme_cmd_write = 0x01,
199 nvme_cmd_read = 0x02,
200 nvme_cmd_write_uncor = 0x04,
201 nvme_cmd_compare = 0x05,
202 nvme_cmd_dsm = 0x09,
203};
204
205struct nvme_common_command {
206 __u8 opcode;
207 __u8 flags;
208 __u16 command_id;
209 __le32 nsid;
210 __le32 cdw2[2];
211 __le64 metadata;
212 __le64 prp1;
213 __le64 prp2;
214 __le32 cdw10[6];
215};
216
217struct nvme_rw_command {
218 __u8 opcode;
219 __u8 flags;
220 __u16 command_id;
221 __le32 nsid;
222 __u64 rsvd2;
223 __le64 metadata;
224 __le64 prp1;
225 __le64 prp2;
226 __le64 slba;
227 __le16 length;
228 __le16 control;
229 __le32 dsmgmt;
230 __le32 reftag;
231 __le16 apptag;
232 __le16 appmask;
233};
234
235enum {
236 NVME_RW_LR = 1 << 15,
237 NVME_RW_FUA = 1 << 14,
238 NVME_RW_DSM_FREQ_UNSPEC = 0,
239 NVME_RW_DSM_FREQ_TYPICAL = 1,
240 NVME_RW_DSM_FREQ_RARE = 2,
241 NVME_RW_DSM_FREQ_READS = 3,
242 NVME_RW_DSM_FREQ_WRITES = 4,
243 NVME_RW_DSM_FREQ_RW = 5,
244 NVME_RW_DSM_FREQ_ONCE = 6,
245 NVME_RW_DSM_FREQ_PREFETCH = 7,
246 NVME_RW_DSM_FREQ_TEMP = 8,
247 NVME_RW_DSM_LATENCY_NONE = 0 << 4,
248 NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
249 NVME_RW_DSM_LATENCY_NORM = 2 << 4,
250 NVME_RW_DSM_LATENCY_LOW = 3 << 4,
251 NVME_RW_DSM_SEQ_REQ = 1 << 6,
252 NVME_RW_DSM_COMPRESSED = 1 << 7,
253};
254
255struct nvme_dsm_cmd {
256 __u8 opcode;
257 __u8 flags;
258 __u16 command_id;
259 __le32 nsid;
260 __u64 rsvd2[2];
261 __le64 prp1;
262 __le64 prp2;
263 __le32 nr;
264 __le32 attributes;
265 __u32 rsvd12[4];
266};
267
268enum {
269 NVME_DSMGMT_IDR = 1 << 0,
270 NVME_DSMGMT_IDW = 1 << 1,
271 NVME_DSMGMT_AD = 1 << 2,
272};
273
274struct nvme_dsm_range {
275 __le32 cattr;
276 __le32 nlb;
277 __le64 slba;
278};
279
280/* Admin commands */
281
282enum nvme_admin_opcode {
283 nvme_admin_delete_sq = 0x00,
284 nvme_admin_create_sq = 0x01,
285 nvme_admin_get_log_page = 0x02,
286 nvme_admin_delete_cq = 0x04,
287 nvme_admin_create_cq = 0x05,
288 nvme_admin_identify = 0x06,
289 nvme_admin_abort_cmd = 0x08,
290 nvme_admin_set_features = 0x09,
291 nvme_admin_get_features = 0x0a,
292 nvme_admin_async_event = 0x0c,
293 nvme_admin_activate_fw = 0x10,
294 nvme_admin_download_fw = 0x11,
295 nvme_admin_format_nvm = 0x80,
296 nvme_admin_security_send = 0x81,
297 nvme_admin_security_recv = 0x82,
298};
299
300enum {
301 NVME_QUEUE_PHYS_CONTIG = (1 << 0),
302 NVME_CQ_IRQ_ENABLED = (1 << 1),
303 NVME_SQ_PRIO_URGENT = (0 << 1),
304 NVME_SQ_PRIO_HIGH = (1 << 1),
305 NVME_SQ_PRIO_MEDIUM = (2 << 1),
306 NVME_SQ_PRIO_LOW = (3 << 1),
307 NVME_FEAT_ARBITRATION = 0x01,
308 NVME_FEAT_POWER_MGMT = 0x02,
309 NVME_FEAT_LBA_RANGE = 0x03,
310 NVME_FEAT_TEMP_THRESH = 0x04,
311 NVME_FEAT_ERR_RECOVERY = 0x05,
312 NVME_FEAT_VOLATILE_WC = 0x06,
313 NVME_FEAT_NUM_QUEUES = 0x07,
314 NVME_FEAT_IRQ_COALESCE = 0x08,
315 NVME_FEAT_IRQ_CONFIG = 0x09,
316 NVME_FEAT_WRITE_ATOMIC = 0x0a,
317 NVME_FEAT_ASYNC_EVENT = 0x0b,
318 NVME_FEAT_SW_PROGRESS = 0x0c,
319 NVME_FWACT_REPL = (0 << 3),
320 NVME_FWACT_REPL_ACTV = (1 << 3),
321 NVME_FWACT_ACTV = (2 << 3),
322};
323
324struct nvme_identify {
325 __u8 opcode;
326 __u8 flags;
327 __u16 command_id;
328 __le32 nsid;
329 __u64 rsvd2[2];
330 __le64 prp1;
331 __le64 prp2;
332 __le32 cns;
333 __u32 rsvd11[5];
334};
335
336struct nvme_features {
337 __u8 opcode;
338 __u8 flags;
339 __u16 command_id;
340 __le32 nsid;
341 __u64 rsvd2[2];
342 __le64 prp1;
343 __le64 prp2;
344 __le32 fid;
345 __le32 dword11;
346 __u32 rsvd12[4];
347};
348
349struct nvme_create_cq {
350 __u8 opcode;
351 __u8 flags;
352 __u16 command_id;
353 __u32 rsvd1[5];
354 __le64 prp1;
355 __u64 rsvd8;
356 __le16 cqid;
357 __le16 qsize;
358 __le16 cq_flags;
359 __le16 irq_vector;
360 __u32 rsvd12[4];
361};
362
363struct nvme_create_sq {
364 __u8 opcode;
365 __u8 flags;
366 __u16 command_id;
367 __u32 rsvd1[5];
368 __le64 prp1;
369 __u64 rsvd8;
370 __le16 sqid;
371 __le16 qsize;
372 __le16 sq_flags;
373 __le16 cqid;
374 __u32 rsvd12[4];
375};
376
377struct nvme_delete_queue {
378 __u8 opcode;
379 __u8 flags;
380 __u16 command_id;
381 __u32 rsvd1[9];
382 __le16 qid;
383 __u16 rsvd10;
384 __u32 rsvd11[5];
385};
386
387struct nvme_download_firmware {
388 __u8 opcode;
389 __u8 flags;
390 __u16 command_id;
391 __u32 rsvd1[5];
392 __le64 prp1;
393 __le64 prp2;
394 __le32 numd;
395 __le32 offset;
396 __u32 rsvd12[4];
397};
398
399struct nvme_format_cmd {
400 __u8 opcode;
401 __u8 flags;
402 __u16 command_id;
403 __le32 nsid;
404 __u64 rsvd2[4];
405 __le32 cdw10;
406 __u32 rsvd11[5];
407};
408
409struct nvme_command {
410 union {
411 struct nvme_common_command common;
412 struct nvme_rw_command rw;
413 struct nvme_identify identify;
414 struct nvme_features features;
415 struct nvme_create_cq create_cq;
416 struct nvme_create_sq create_sq;
417 struct nvme_delete_queue delete_queue;
418 struct nvme_download_firmware dlfw;
419 struct nvme_format_cmd format;
420 struct nvme_dsm_cmd dsm;
421 };
422};
423
424enum {
425 NVME_SC_SUCCESS = 0x0,
426 NVME_SC_INVALID_OPCODE = 0x1,
427 NVME_SC_INVALID_FIELD = 0x2,
428 NVME_SC_CMDID_CONFLICT = 0x3,
429 NVME_SC_DATA_XFER_ERROR = 0x4,
430 NVME_SC_POWER_LOSS = 0x5,
431 NVME_SC_INTERNAL = 0x6,
432 NVME_SC_ABORT_REQ = 0x7,
433 NVME_SC_ABORT_QUEUE = 0x8,
434 NVME_SC_FUSED_FAIL = 0x9,
435 NVME_SC_FUSED_MISSING = 0xa,
436 NVME_SC_INVALID_NS = 0xb,
437 NVME_SC_CMD_SEQ_ERROR = 0xc,
438 NVME_SC_LBA_RANGE = 0x80,
439 NVME_SC_CAP_EXCEEDED = 0x81,
440 NVME_SC_NS_NOT_READY = 0x82,
441 NVME_SC_CQ_INVALID = 0x100,
442 NVME_SC_QID_INVALID = 0x101,
443 NVME_SC_QUEUE_SIZE = 0x102,
444 NVME_SC_ABORT_LIMIT = 0x103,
445 NVME_SC_ABORT_MISSING = 0x104,
446 NVME_SC_ASYNC_LIMIT = 0x105,
447 NVME_SC_FIRMWARE_SLOT = 0x106,
448 NVME_SC_FIRMWARE_IMAGE = 0x107,
449 NVME_SC_INVALID_VECTOR = 0x108,
450 NVME_SC_INVALID_LOG_PAGE = 0x109,
451 NVME_SC_INVALID_FORMAT = 0x10a,
452 NVME_SC_BAD_ATTRIBUTES = 0x180,
453 NVME_SC_WRITE_FAULT = 0x280,
454 NVME_SC_READ_ERROR = 0x281,
455 NVME_SC_GUARD_CHECK = 0x282,
456 NVME_SC_APPTAG_CHECK = 0x283,
457 NVME_SC_REFTAG_CHECK = 0x284,
458 NVME_SC_COMPARE_FAILED = 0x285,
459 NVME_SC_ACCESS_DENIED = 0x286,
460};
461
462struct nvme_completion {
463 __le32 result; /* Used by admin commands to return data */
464 __u32 rsvd;
465 __le16 sq_head; /* how much of this queue may be reclaimed */
466 __le16 sq_id; /* submission queue that generated this entry */
467 __u16 command_id; /* of the command which completed */
468 __le16 status; /* did the command fail, and if so, why? */
469};
470
471struct nvme_user_io {
472 __u8 opcode;
473 __u8 flags;
474 __u16 control;
475 __u16 nblocks;
476 __u16 rsvd;
477 __u64 metadata;
478 __u64 addr;
479 __u64 slba;
480 __u32 dsmgmt;
481 __u32 reftag;
482 __u16 apptag;
483 __u16 appmask;
484};
485
486struct nvme_admin_cmd {
487 __u8 opcode;
488 __u8 flags;
489 __u16 rsvd1;
490 __u32 nsid;
491 __u32 cdw2;
492 __u32 cdw3;
493 __u64 metadata;
494 __u64 addr;
495 __u32 metadata_len;
496 __u32 data_len;
497 __u32 cdw10;
498 __u32 cdw11;
499 __u32 cdw12;
500 __u32 cdw13;
501 __u32 cdw14;
502 __u32 cdw15;
503 __u32 timeout_ms;
504 __u32 result;
505};
506
507#define NVME_IOCTL_ID _IO('N', 0x40)
508#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
509#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
510
511#ifdef __KERNEL__
512#include <linux/pci.h>
513#include <linux/miscdevice.h>
514#include <linux/kref.h>
515
516#define NVME_IO_TIMEOUT (5 * HZ) 69#define NVME_IO_TIMEOUT (5 * HZ)
517 70
518/* 71/*
@@ -553,7 +106,7 @@ struct nvme_ns {
553 struct request_queue *queue; 106 struct request_queue *queue;
554 struct gendisk *disk; 107 struct gendisk *disk;
555 108
556 int ns_id; 109 unsigned ns_id;
557 int lba_shift; 110 int lba_shift;
558 int ms; 111 int ms;
559 u64 mode_select_num_blocks; 112 u64 mode_select_num_blocks;
@@ -572,6 +125,7 @@ struct nvme_iod {
572 int offset; /* Of PRP list */ 125 int offset; /* Of PRP list */
573 int nents; /* Used in scatterlist */ 126 int nents; /* Used in scatterlist */
574 int length; /* Of data, in bytes */ 127 int length; /* Of data, in bytes */
128 unsigned long start_time;
575 dma_addr_t first_dma; 129 dma_addr_t first_dma;
576 struct scatterlist sg[0]; 130 struct scatterlist sg[0];
577}; 131};
@@ -613,6 +167,4 @@ struct sg_io_hdr;
613int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); 167int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
614int nvme_sg_get_version_num(int __user *ip); 168int nvme_sg_get_version_num(int __user *ip);
615 169
616#endif
617
618#endif /* _LINUX_NVME_H */ 170#endif /* _LINUX_NVME_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index e7c94eeb9475..115add2515aa 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -284,6 +284,7 @@ header-y += nfs_mount.h
284header-y += nfsacl.h 284header-y += nfsacl.h
285header-y += nl80211.h 285header-y += nl80211.h
286header-y += nubus.h 286header-y += nubus.h
287header-y += nvme.h
287header-y += nvram.h 288header-y += nvram.h
288header-y += omap3isp.h 289header-y += omap3isp.h
289header-y += omapfb.h 290header-y += omapfb.h
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
new file mode 100644
index 000000000000..989c04e0c563
--- /dev/null
+++ b/include/uapi/linux/nvme.h
@@ -0,0 +1,477 @@
1/*
2 * Definitions for the NVM Express interface
3 * Copyright (c) 2011-2013, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#ifndef _UAPI_LINUX_NVME_H
20#define _UAPI_LINUX_NVME_H
21
22#include <linux/types.h>
23
24struct nvme_id_power_state {
25 __le16 max_power; /* centiwatts */
26 __u8 rsvd2;
27 __u8 flags;
28 __le32 entry_lat; /* microseconds */
29 __le32 exit_lat; /* microseconds */
30 __u8 read_tput;
31 __u8 read_lat;
32 __u8 write_tput;
33 __u8 write_lat;
34 __u8 rsvd16[16];
35};
36
37enum {
38 NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
39 NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
40};
41
42struct nvme_id_ctrl {
43 __le16 vid;
44 __le16 ssvid;
45 char sn[20];
46 char mn[40];
47 char fr[8];
48 __u8 rab;
49 __u8 ieee[3];
50 __u8 mic;
51 __u8 mdts;
52 __u8 rsvd78[178];
53 __le16 oacs;
54 __u8 acl;
55 __u8 aerl;
56 __u8 frmw;
57 __u8 lpa;
58 __u8 elpe;
59 __u8 npss;
60 __u8 rsvd264[248];
61 __u8 sqes;
62 __u8 cqes;
63 __u8 rsvd514[2];
64 __le32 nn;
65 __le16 oncs;
66 __le16 fuses;
67 __u8 fna;
68 __u8 vwc;
69 __le16 awun;
70 __le16 awupf;
71 __u8 rsvd530[1518];
72 struct nvme_id_power_state psd[32];
73 __u8 vs[1024];
74};
75
76enum {
77 NVME_CTRL_ONCS_COMPARE = 1 << 0,
78 NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
79 NVME_CTRL_ONCS_DSM = 1 << 2,
80};
81
82struct nvme_lbaf {
83 __le16 ms;
84 __u8 ds;
85 __u8 rp;
86};
87
88struct nvme_id_ns {
89 __le64 nsze;
90 __le64 ncap;
91 __le64 nuse;
92 __u8 nsfeat;
93 __u8 nlbaf;
94 __u8 flbas;
95 __u8 mc;
96 __u8 dpc;
97 __u8 dps;
98 __u8 rsvd30[98];
99 struct nvme_lbaf lbaf[16];
100 __u8 rsvd192[192];
101 __u8 vs[3712];
102};
103
104enum {
105 NVME_NS_FEAT_THIN = 1 << 0,
106 NVME_LBAF_RP_BEST = 0,
107 NVME_LBAF_RP_BETTER = 1,
108 NVME_LBAF_RP_GOOD = 2,
109 NVME_LBAF_RP_DEGRADED = 3,
110};
111
112struct nvme_smart_log {
113 __u8 critical_warning;
114 __u8 temperature[2];
115 __u8 avail_spare;
116 __u8 spare_thresh;
117 __u8 percent_used;
118 __u8 rsvd6[26];
119 __u8 data_units_read[16];
120 __u8 data_units_written[16];
121 __u8 host_reads[16];
122 __u8 host_writes[16];
123 __u8 ctrl_busy_time[16];
124 __u8 power_cycles[16];
125 __u8 power_on_hours[16];
126 __u8 unsafe_shutdowns[16];
127 __u8 media_errors[16];
128 __u8 num_err_log_entries[16];
129 __u8 rsvd192[320];
130};
131
132enum {
133 NVME_SMART_CRIT_SPARE = 1 << 0,
134 NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
135 NVME_SMART_CRIT_RELIABILITY = 1 << 2,
136 NVME_SMART_CRIT_MEDIA = 1 << 3,
137 NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
138};
139
140struct nvme_lba_range_type {
141 __u8 type;
142 __u8 attributes;
143 __u8 rsvd2[14];
144 __u64 slba;
145 __u64 nlb;
146 __u8 guid[16];
147 __u8 rsvd48[16];
148};
149
150enum {
151 NVME_LBART_TYPE_FS = 0x01,
152 NVME_LBART_TYPE_RAID = 0x02,
153 NVME_LBART_TYPE_CACHE = 0x03,
154 NVME_LBART_TYPE_SWAP = 0x04,
155
156 NVME_LBART_ATTRIB_TEMP = 1 << 0,
157 NVME_LBART_ATTRIB_HIDE = 1 << 1,
158};
159
160/* I/O commands */
161
162enum nvme_opcode {
163 nvme_cmd_flush = 0x00,
164 nvme_cmd_write = 0x01,
165 nvme_cmd_read = 0x02,
166 nvme_cmd_write_uncor = 0x04,
167 nvme_cmd_compare = 0x05,
168 nvme_cmd_dsm = 0x09,
169};
170
171struct nvme_common_command {
172 __u8 opcode;
173 __u8 flags;
174 __u16 command_id;
175 __le32 nsid;
176 __le32 cdw2[2];
177 __le64 metadata;
178 __le64 prp1;
179 __le64 prp2;
180 __le32 cdw10[6];
181};
182
183struct nvme_rw_command {
184 __u8 opcode;
185 __u8 flags;
186 __u16 command_id;
187 __le32 nsid;
188 __u64 rsvd2;
189 __le64 metadata;
190 __le64 prp1;
191 __le64 prp2;
192 __le64 slba;
193 __le16 length;
194 __le16 control;
195 __le32 dsmgmt;
196 __le32 reftag;
197 __le16 apptag;
198 __le16 appmask;
199};
200
201enum {
202 NVME_RW_LR = 1 << 15,
203 NVME_RW_FUA = 1 << 14,
204 NVME_RW_DSM_FREQ_UNSPEC = 0,
205 NVME_RW_DSM_FREQ_TYPICAL = 1,
206 NVME_RW_DSM_FREQ_RARE = 2,
207 NVME_RW_DSM_FREQ_READS = 3,
208 NVME_RW_DSM_FREQ_WRITES = 4,
209 NVME_RW_DSM_FREQ_RW = 5,
210 NVME_RW_DSM_FREQ_ONCE = 6,
211 NVME_RW_DSM_FREQ_PREFETCH = 7,
212 NVME_RW_DSM_FREQ_TEMP = 8,
213 NVME_RW_DSM_LATENCY_NONE = 0 << 4,
214 NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
215 NVME_RW_DSM_LATENCY_NORM = 2 << 4,
216 NVME_RW_DSM_LATENCY_LOW = 3 << 4,
217 NVME_RW_DSM_SEQ_REQ = 1 << 6,
218 NVME_RW_DSM_COMPRESSED = 1 << 7,
219};
220
221struct nvme_dsm_cmd {
222 __u8 opcode;
223 __u8 flags;
224 __u16 command_id;
225 __le32 nsid;
226 __u64 rsvd2[2];
227 __le64 prp1;
228 __le64 prp2;
229 __le32 nr;
230 __le32 attributes;
231 __u32 rsvd12[4];
232};
233
234enum {
235 NVME_DSMGMT_IDR = 1 << 0,
236 NVME_DSMGMT_IDW = 1 << 1,
237 NVME_DSMGMT_AD = 1 << 2,
238};
239
240struct nvme_dsm_range {
241 __le32 cattr;
242 __le32 nlb;
243 __le64 slba;
244};
245
246/* Admin commands */
247
248enum nvme_admin_opcode {
249 nvme_admin_delete_sq = 0x00,
250 nvme_admin_create_sq = 0x01,
251 nvme_admin_get_log_page = 0x02,
252 nvme_admin_delete_cq = 0x04,
253 nvme_admin_create_cq = 0x05,
254 nvme_admin_identify = 0x06,
255 nvme_admin_abort_cmd = 0x08,
256 nvme_admin_set_features = 0x09,
257 nvme_admin_get_features = 0x0a,
258 nvme_admin_async_event = 0x0c,
259 nvme_admin_activate_fw = 0x10,
260 nvme_admin_download_fw = 0x11,
261 nvme_admin_format_nvm = 0x80,
262 nvme_admin_security_send = 0x81,
263 nvme_admin_security_recv = 0x82,
264};
265
266enum {
267 NVME_QUEUE_PHYS_CONTIG = (1 << 0),
268 NVME_CQ_IRQ_ENABLED = (1 << 1),
269 NVME_SQ_PRIO_URGENT = (0 << 1),
270 NVME_SQ_PRIO_HIGH = (1 << 1),
271 NVME_SQ_PRIO_MEDIUM = (2 << 1),
272 NVME_SQ_PRIO_LOW = (3 << 1),
273 NVME_FEAT_ARBITRATION = 0x01,
274 NVME_FEAT_POWER_MGMT = 0x02,
275 NVME_FEAT_LBA_RANGE = 0x03,
276 NVME_FEAT_TEMP_THRESH = 0x04,
277 NVME_FEAT_ERR_RECOVERY = 0x05,
278 NVME_FEAT_VOLATILE_WC = 0x06,
279 NVME_FEAT_NUM_QUEUES = 0x07,
280 NVME_FEAT_IRQ_COALESCE = 0x08,
281 NVME_FEAT_IRQ_CONFIG = 0x09,
282 NVME_FEAT_WRITE_ATOMIC = 0x0a,
283 NVME_FEAT_ASYNC_EVENT = 0x0b,
284 NVME_FEAT_SW_PROGRESS = 0x0c,
285 NVME_FWACT_REPL = (0 << 3),
286 NVME_FWACT_REPL_ACTV = (1 << 3),
287 NVME_FWACT_ACTV = (2 << 3),
288};
289
290struct nvme_identify {
291 __u8 opcode;
292 __u8 flags;
293 __u16 command_id;
294 __le32 nsid;
295 __u64 rsvd2[2];
296 __le64 prp1;
297 __le64 prp2;
298 __le32 cns;
299 __u32 rsvd11[5];
300};
301
302struct nvme_features {
303 __u8 opcode;
304 __u8 flags;
305 __u16 command_id;
306 __le32 nsid;
307 __u64 rsvd2[2];
308 __le64 prp1;
309 __le64 prp2;
310 __le32 fid;
311 __le32 dword11;
312 __u32 rsvd12[4];
313};
314
315struct nvme_create_cq {
316 __u8 opcode;
317 __u8 flags;
318 __u16 command_id;
319 __u32 rsvd1[5];
320 __le64 prp1;
321 __u64 rsvd8;
322 __le16 cqid;
323 __le16 qsize;
324 __le16 cq_flags;
325 __le16 irq_vector;
326 __u32 rsvd12[4];
327};
328
329struct nvme_create_sq {
330 __u8 opcode;
331 __u8 flags;
332 __u16 command_id;
333 __u32 rsvd1[5];
334 __le64 prp1;
335 __u64 rsvd8;
336 __le16 sqid;
337 __le16 qsize;
338 __le16 sq_flags;
339 __le16 cqid;
340 __u32 rsvd12[4];
341};
342
343struct nvme_delete_queue {
344 __u8 opcode;
345 __u8 flags;
346 __u16 command_id;
347 __u32 rsvd1[9];
348 __le16 qid;
349 __u16 rsvd10;
350 __u32 rsvd11[5];
351};
352
353struct nvme_download_firmware {
354 __u8 opcode;
355 __u8 flags;
356 __u16 command_id;
357 __u32 rsvd1[5];
358 __le64 prp1;
359 __le64 prp2;
360 __le32 numd;
361 __le32 offset;
362 __u32 rsvd12[4];
363};
364
365struct nvme_format_cmd {
366 __u8 opcode;
367 __u8 flags;
368 __u16 command_id;
369 __le32 nsid;
370 __u64 rsvd2[4];
371 __le32 cdw10;
372 __u32 rsvd11[5];
373};
374
375struct nvme_command {
376 union {
377 struct nvme_common_command common;
378 struct nvme_rw_command rw;
379 struct nvme_identify identify;
380 struct nvme_features features;
381 struct nvme_create_cq create_cq;
382 struct nvme_create_sq create_sq;
383 struct nvme_delete_queue delete_queue;
384 struct nvme_download_firmware dlfw;
385 struct nvme_format_cmd format;
386 struct nvme_dsm_cmd dsm;
387 };
388};
389
390enum {
391 NVME_SC_SUCCESS = 0x0,
392 NVME_SC_INVALID_OPCODE = 0x1,
393 NVME_SC_INVALID_FIELD = 0x2,
394 NVME_SC_CMDID_CONFLICT = 0x3,
395 NVME_SC_DATA_XFER_ERROR = 0x4,
396 NVME_SC_POWER_LOSS = 0x5,
397 NVME_SC_INTERNAL = 0x6,
398 NVME_SC_ABORT_REQ = 0x7,
399 NVME_SC_ABORT_QUEUE = 0x8,
400 NVME_SC_FUSED_FAIL = 0x9,
401 NVME_SC_FUSED_MISSING = 0xa,
402 NVME_SC_INVALID_NS = 0xb,
403 NVME_SC_CMD_SEQ_ERROR = 0xc,
404 NVME_SC_LBA_RANGE = 0x80,
405 NVME_SC_CAP_EXCEEDED = 0x81,
406 NVME_SC_NS_NOT_READY = 0x82,
407 NVME_SC_CQ_INVALID = 0x100,
408 NVME_SC_QID_INVALID = 0x101,
409 NVME_SC_QUEUE_SIZE = 0x102,
410 NVME_SC_ABORT_LIMIT = 0x103,
411 NVME_SC_ABORT_MISSING = 0x104,
412 NVME_SC_ASYNC_LIMIT = 0x105,
413 NVME_SC_FIRMWARE_SLOT = 0x106,
414 NVME_SC_FIRMWARE_IMAGE = 0x107,
415 NVME_SC_INVALID_VECTOR = 0x108,
416 NVME_SC_INVALID_LOG_PAGE = 0x109,
417 NVME_SC_INVALID_FORMAT = 0x10a,
418 NVME_SC_BAD_ATTRIBUTES = 0x180,
419 NVME_SC_WRITE_FAULT = 0x280,
420 NVME_SC_READ_ERROR = 0x281,
421 NVME_SC_GUARD_CHECK = 0x282,
422 NVME_SC_APPTAG_CHECK = 0x283,
423 NVME_SC_REFTAG_CHECK = 0x284,
424 NVME_SC_COMPARE_FAILED = 0x285,
425 NVME_SC_ACCESS_DENIED = 0x286,
426};
427
428struct nvme_completion {
429 __le32 result; /* Used by admin commands to return data */
430 __u32 rsvd;
431 __le16 sq_head; /* how much of this queue may be reclaimed */
432 __le16 sq_id; /* submission queue that generated this entry */
433 __u16 command_id; /* of the command which completed */
434 __le16 status; /* did the command fail, and if so, why? */
435};
436
437struct nvme_user_io {
438 __u8 opcode;
439 __u8 flags;
440 __u16 control;
441 __u16 nblocks;
442 __u16 rsvd;
443 __u64 metadata;
444 __u64 addr;
445 __u64 slba;
446 __u32 dsmgmt;
447 __u32 reftag;
448 __u16 apptag;
449 __u16 appmask;
450};
451
452struct nvme_admin_cmd {
453 __u8 opcode;
454 __u8 flags;
455 __u16 rsvd1;
456 __u32 nsid;
457 __u32 cdw2;
458 __u32 cdw3;
459 __u64 metadata;
460 __u64 addr;
461 __u32 metadata_len;
462 __u32 data_len;
463 __u32 cdw10;
464 __u32 cdw11;
465 __u32 cdw12;
466 __u32 cdw13;
467 __u32 cdw14;
468 __u32 cdw15;
469 __u32 timeout_ms;
470 __u32 result;
471};
472
473#define NVME_IOCTL_ID _IO('N', 0x40)
474#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
475#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
476
477#endif /* _UAPI_LINUX_NVME_H */