diff options
author | Keith Busch <keith.busch@intel.com> | 2013-07-15 17:02:20 -0400 |
---|---|---|
committer | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2013-09-03 16:39:28 -0400 |
commit | 224042742582c9938788b81165180c876e997a07 (patch) | |
tree | 0d2593b96ff6f1639d7188e9b79ea3d6c1459202 | |
parent | 0877cb0d285c7f1d53d0b84b360bdea4be4f3f59 (diff) |
NVMe: Separate queue alloc/free from create/delete
This separates nvme queue allocation from creation, and queue deletion
from freeing. This is so that we may in the future temporarily disable
queues and reuse the same memory when bringing them back online, like
coming back from suspend state.
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
-rw-r--r-- | drivers/block/nvme-core.c | 133 |
1 files changed, 94 insertions, 39 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index a93f52c48036..95e28b6bcd09 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c | |||
@@ -82,6 +82,7 @@ struct nvme_queue { | |||
82 | u16 cq_head; | 82 | u16 cq_head; |
83 | u8 cq_phase; | 83 | u8 cq_phase; |
84 | u8 cqe_seen; | 84 | u8 cqe_seen; |
85 | u8 q_suspended; | ||
85 | unsigned long cmdid_data[]; | 86 | unsigned long cmdid_data[]; |
86 | }; | 87 | }; |
87 | 88 | ||
@@ -117,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) | |||
117 | return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; | 118 | return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; |
118 | } | 119 | } |
119 | 120 | ||
121 | static unsigned nvme_queue_extra(int depth) | ||
122 | { | ||
123 | return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info)); | ||
124 | } | ||
125 | |||
120 | /** | 126 | /** |
121 | * alloc_cmdid() - Allocate a Command ID | 127 | * alloc_cmdid() - Allocate a Command ID |
122 | * @nvmeq: The queue that will be used for this command | 128 | * @nvmeq: The queue that will be used for this command |
@@ -784,7 +790,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) | |||
784 | int result = -EBUSY; | 790 | int result = -EBUSY; |
785 | 791 | ||
786 | spin_lock_irq(&nvmeq->q_lock); | 792 | spin_lock_irq(&nvmeq->q_lock); |
787 | if (bio_list_empty(&nvmeq->sq_cong)) | 793 | if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) |
788 | result = nvme_submit_bio_queue(nvmeq, ns, bio); | 794 | result = nvme_submit_bio_queue(nvmeq, ns, bio); |
789 | if (unlikely(result)) { | 795 | if (unlikely(result)) { |
790 | if (bio_list_empty(&nvmeq->sq_cong)) | 796 | if (bio_list_empty(&nvmeq->sq_cong)) |
@@ -1018,8 +1024,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | |||
1018 | } | 1024 | } |
1019 | } | 1025 | } |
1020 | 1026 | ||
1021 | static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | 1027 | static void nvme_free_queue(struct nvme_queue *nvmeq) |
1022 | { | 1028 | { |
1029 | spin_lock_irq(&nvmeq->q_lock); | ||
1030 | while (bio_list_peek(&nvmeq->sq_cong)) { | ||
1031 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | ||
1032 | bio_endio(bio, -EIO); | ||
1033 | } | ||
1034 | spin_unlock_irq(&nvmeq->q_lock); | ||
1035 | |||
1023 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | 1036 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), |
1024 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | 1037 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); |
1025 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | 1038 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), |
@@ -1027,17 +1040,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | |||
1027 | kfree(nvmeq); | 1040 | kfree(nvmeq); |
1028 | } | 1041 | } |
1029 | 1042 | ||
1030 | static void nvme_free_queue(struct nvme_dev *dev, int qid) | 1043 | static void nvme_free_queues(struct nvme_dev *dev) |
1044 | { | ||
1045 | int i; | ||
1046 | |||
1047 | for (i = dev->queue_count - 1; i >= 0; i--) { | ||
1048 | nvme_free_queue(dev->queues[i]); | ||
1049 | dev->queue_count--; | ||
1050 | dev->queues[i] = NULL; | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | static void nvme_disable_queue(struct nvme_dev *dev, int qid) | ||
1031 | { | 1055 | { |
1032 | struct nvme_queue *nvmeq = dev->queues[qid]; | 1056 | struct nvme_queue *nvmeq = dev->queues[qid]; |
1033 | int vector = dev->entry[nvmeq->cq_vector].vector; | 1057 | int vector = dev->entry[nvmeq->cq_vector].vector; |
1034 | 1058 | ||
1035 | spin_lock_irq(&nvmeq->q_lock); | 1059 | spin_lock_irq(&nvmeq->q_lock); |
1036 | nvme_cancel_ios(nvmeq, false); | 1060 | if (nvmeq->q_suspended) { |
1037 | while (bio_list_peek(&nvmeq->sq_cong)) { | 1061 | spin_unlock_irq(&nvmeq->q_lock); |
1038 | struct bio *bio = bio_list_pop(&nvmeq->sq_cong); | 1062 | return; |
1039 | bio_endio(bio, -EIO); | ||
1040 | } | 1063 | } |
1064 | nvmeq->q_suspended = 1; | ||
1041 | spin_unlock_irq(&nvmeq->q_lock); | 1065 | spin_unlock_irq(&nvmeq->q_lock); |
1042 | 1066 | ||
1043 | irq_set_affinity_hint(vector, NULL); | 1067 | irq_set_affinity_hint(vector, NULL); |
@@ -1049,15 +1073,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) | |||
1049 | adapter_delete_cq(dev, qid); | 1073 | adapter_delete_cq(dev, qid); |
1050 | } | 1074 | } |
1051 | 1075 | ||
1052 | nvme_free_queue_mem(nvmeq); | 1076 | spin_lock_irq(&nvmeq->q_lock); |
1077 | nvme_process_cq(nvmeq); | ||
1078 | nvme_cancel_ios(nvmeq, false); | ||
1079 | spin_unlock_irq(&nvmeq->q_lock); | ||
1053 | } | 1080 | } |
1054 | 1081 | ||
1055 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | 1082 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, |
1056 | int depth, int vector) | 1083 | int depth, int vector) |
1057 | { | 1084 | { |
1058 | struct device *dmadev = &dev->pci_dev->dev; | 1085 | struct device *dmadev = &dev->pci_dev->dev; |
1059 | unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * | 1086 | unsigned extra = nvme_queue_extra(depth); |
1060 | sizeof(struct nvme_cmd_info)); | ||
1061 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); | 1087 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); |
1062 | if (!nvmeq) | 1088 | if (!nvmeq) |
1063 | return NULL; | 1089 | return NULL; |
@@ -1084,6 +1110,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | |||
1084 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; | 1110 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; |
1085 | nvmeq->q_depth = depth; | 1111 | nvmeq->q_depth = depth; |
1086 | nvmeq->cq_vector = vector; | 1112 | nvmeq->cq_vector = vector; |
1113 | nvmeq->q_suspended = 1; | ||
1114 | dev->queue_count++; | ||
1087 | 1115 | ||
1088 | return nvmeq; | 1116 | return nvmeq; |
1089 | 1117 | ||
@@ -1107,18 +1135,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, | |||
1107 | IRQF_DISABLED | IRQF_SHARED, name, nvmeq); | 1135 | IRQF_DISABLED | IRQF_SHARED, name, nvmeq); |
1108 | } | 1136 | } |
1109 | 1137 | ||
1110 | static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, | 1138 | static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) |
1111 | int cq_size, int vector) | ||
1112 | { | 1139 | { |
1113 | int result; | 1140 | struct nvme_dev *dev = nvmeq->dev; |
1114 | struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); | 1141 | unsigned extra = nvme_queue_extra(nvmeq->q_depth); |
1115 | 1142 | ||
1116 | if (!nvmeq) | 1143 | nvmeq->sq_tail = 0; |
1117 | return ERR_PTR(-ENOMEM); | 1144 | nvmeq->cq_head = 0; |
1145 | nvmeq->cq_phase = 1; | ||
1146 | nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; | ||
1147 | memset(nvmeq->cmdid_data, 0, extra); | ||
1148 | memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); | ||
1149 | nvme_cancel_ios(nvmeq, false); | ||
1150 | nvmeq->q_suspended = 0; | ||
1151 | } | ||
1152 | |||
1153 | static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) | ||
1154 | { | ||
1155 | struct nvme_dev *dev = nvmeq->dev; | ||
1156 | int result; | ||
1118 | 1157 | ||
1119 | result = adapter_alloc_cq(dev, qid, nvmeq); | 1158 | result = adapter_alloc_cq(dev, qid, nvmeq); |
1120 | if (result < 0) | 1159 | if (result < 0) |
1121 | goto free_nvmeq; | 1160 | return result; |
1122 | 1161 | ||
1123 | result = adapter_alloc_sq(dev, qid, nvmeq); | 1162 | result = adapter_alloc_sq(dev, qid, nvmeq); |
1124 | if (result < 0) | 1163 | if (result < 0) |
@@ -1128,19 +1167,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, | |||
1128 | if (result < 0) | 1167 | if (result < 0) |
1129 | goto release_sq; | 1168 | goto release_sq; |
1130 | 1169 | ||
1131 | return nvmeq; | 1170 | spin_lock(&nvmeq->q_lock); |
1171 | nvme_init_queue(nvmeq, qid); | ||
1172 | spin_unlock(&nvmeq->q_lock); | ||
1173 | |||
1174 | return result; | ||
1132 | 1175 | ||
1133 | release_sq: | 1176 | release_sq: |
1134 | adapter_delete_sq(dev, qid); | 1177 | adapter_delete_sq(dev, qid); |
1135 | release_cq: | 1178 | release_cq: |
1136 | adapter_delete_cq(dev, qid); | 1179 | adapter_delete_cq(dev, qid); |
1137 | free_nvmeq: | 1180 | return result; |
1138 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | ||
1139 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
1140 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
1141 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
1142 | kfree(nvmeq); | ||
1143 | return ERR_PTR(result); | ||
1144 | } | 1181 | } |
1145 | 1182 | ||
1146 | static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) | 1183 | static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) |
@@ -1221,10 +1258,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1221 | goto free_q; | 1258 | goto free_q; |
1222 | 1259 | ||
1223 | dev->queues[0] = nvmeq; | 1260 | dev->queues[0] = nvmeq; |
1261 | spin_lock(&nvmeq->q_lock); | ||
1262 | nvme_init_queue(nvmeq, 0); | ||
1263 | spin_unlock(&nvmeq->q_lock); | ||
1224 | return result; | 1264 | return result; |
1225 | 1265 | ||
1226 | free_q: | 1266 | free_q: |
1227 | nvme_free_queue_mem(nvmeq); | 1267 | nvme_free_queue(nvmeq); |
1228 | return result; | 1268 | return result; |
1229 | } | 1269 | } |
1230 | 1270 | ||
@@ -1386,6 +1426,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1386 | put_nvmeq(nvmeq); | 1426 | put_nvmeq(nvmeq); |
1387 | if (length != (io.nblocks + 1) << ns->lba_shift) | 1427 | if (length != (io.nblocks + 1) << ns->lba_shift) |
1388 | status = -ENOMEM; | 1428 | status = -ENOMEM; |
1429 | else if (!nvmeq || nvmeq->q_suspended) | ||
1430 | status = -EBUSY; | ||
1389 | else | 1431 | else |
1390 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); | 1432 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); |
1391 | 1433 | ||
@@ -1537,9 +1579,12 @@ static int nvme_kthread(void *data) | |||
1537 | if (!nvmeq) | 1579 | if (!nvmeq) |
1538 | continue; | 1580 | continue; |
1539 | spin_lock_irq(&nvmeq->q_lock); | 1581 | spin_lock_irq(&nvmeq->q_lock); |
1582 | if (nvmeq->q_suspended) | ||
1583 | goto unlock; | ||
1540 | nvme_process_cq(nvmeq); | 1584 | nvme_process_cq(nvmeq); |
1541 | nvme_cancel_ios(nvmeq, true); | 1585 | nvme_cancel_ios(nvmeq, true); |
1542 | nvme_resubmit_bios(nvmeq); | 1586 | nvme_resubmit_bios(nvmeq); |
1587 | unlock: | ||
1543 | spin_unlock_irq(&nvmeq->q_lock); | 1588 | spin_unlock_irq(&nvmeq->q_lock); |
1544 | } | 1589 | } |
1545 | } | 1590 | } |
@@ -1725,7 +1770,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1725 | nr_io_queues = vecs; | 1770 | nr_io_queues = vecs; |
1726 | 1771 | ||
1727 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); | 1772 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); |
1728 | /* XXX: handle failure here */ | 1773 | if (result) |
1774 | goto free_queues; | ||
1729 | 1775 | ||
1730 | cpu = cpumask_first(cpu_online_mask); | 1776 | cpu = cpumask_first(cpu_online_mask); |
1731 | for (i = 0; i < nr_io_queues; i++) { | 1777 | for (i = 0; i < nr_io_queues; i++) { |
@@ -1736,10 +1782,11 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1736 | q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, | 1782 | q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, |
1737 | NVME_Q_DEPTH); | 1783 | NVME_Q_DEPTH); |
1738 | for (i = 0; i < nr_io_queues; i++) { | 1784 | for (i = 0; i < nr_io_queues; i++) { |
1739 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); | 1785 | dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i); |
1740 | if (IS_ERR(dev->queues[i + 1])) | 1786 | if (!dev->queues[i + 1]) { |
1741 | return PTR_ERR(dev->queues[i + 1]); | 1787 | result = -ENOMEM; |
1742 | dev->queue_count++; | 1788 | goto free_queues; |
1789 | } | ||
1743 | } | 1790 | } |
1744 | 1791 | ||
1745 | for (; i < num_possible_cpus(); i++) { | 1792 | for (; i < num_possible_cpus(); i++) { |
@@ -1747,15 +1794,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1747 | dev->queues[i + 1] = dev->queues[target + 1]; | 1794 | dev->queues[i + 1] = dev->queues[target + 1]; |
1748 | } | 1795 | } |
1749 | 1796 | ||
1750 | return 0; | 1797 | for (i = 1; i < dev->queue_count; i++) { |
1751 | } | 1798 | result = nvme_create_queue(dev->queues[i], i); |
1799 | if (result) { | ||
1800 | for (--i; i > 0; i--) | ||
1801 | nvme_disable_queue(dev, i); | ||
1802 | goto free_queues; | ||
1803 | } | ||
1804 | } | ||
1752 | 1805 | ||
1753 | static void nvme_free_queues(struct nvme_dev *dev) | 1806 | return 0; |
1754 | { | ||
1755 | int i; | ||
1756 | 1807 | ||
1757 | for (i = dev->queue_count - 1; i >= 0; i--) | 1808 | free_queues: |
1758 | nvme_free_queue(dev, i); | 1809 | nvme_free_queues(dev); |
1810 | return result; | ||
1759 | } | 1811 | } |
1760 | 1812 | ||
1761 | /* | 1813 | /* |
@@ -1887,6 +1939,10 @@ static void nvme_dev_unmap(struct nvme_dev *dev) | |||
1887 | static int nvme_dev_remove(struct nvme_dev *dev) | 1939 | static int nvme_dev_remove(struct nvme_dev *dev) |
1888 | { | 1940 | { |
1889 | struct nvme_ns *ns, *next; | 1941 | struct nvme_ns *ns, *next; |
1942 | int i; | ||
1943 | |||
1944 | for (i = dev->queue_count - 1; i >= 0; i--) | ||
1945 | nvme_disable_queue(dev, i); | ||
1890 | 1946 | ||
1891 | spin_lock(&dev_list_lock); | 1947 | spin_lock(&dev_list_lock); |
1892 | list_del(&dev->node); | 1948 | list_del(&dev->node); |
@@ -2037,7 +2093,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2037 | result = nvme_configure_admin_queue(dev); | 2093 | result = nvme_configure_admin_queue(dev); |
2038 | if (result) | 2094 | if (result) |
2039 | goto unmap; | 2095 | goto unmap; |
2040 | dev->queue_count++; | ||
2041 | 2096 | ||
2042 | spin_lock(&dev_list_lock); | 2097 | spin_lock(&dev_list_lock); |
2043 | list_add(&dev->node, &dev_list); | 2098 | list_add(&dev->node, &dev_list); |