aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLogan Gunthorpe <logang@deltatee.com>2018-10-04 17:27:43 -0400
committerBjorn Helgaas <bhelgaas@google.com>2018-10-17 13:18:21 -0400
commit0f238ff5cc92554fe8ddc6c3776386f31a4d38fa (patch)
tree9f8640a038ba227ba7db1b7ccb0fe9cbf410a982
parent50b7d22079f74571a0fa73420586a7ad1ffebe2f (diff)
nvme-pci: Use PCI p2pmem subsystem to manage the CMB
Register the CMB buffer as p2pmem and use the appropriate allocation functions to create and destroy the IO submission queues. If the CMB supports WDS and RDS, publish it for use as P2P memory by other devices. Kernels without CONFIG_PCI_P2PDMA will also no longer support NVMe CMB. However, seeing the main use-cases for the CMB is P2P operations, this seems like a reasonable dependency. We drop the __iomem safety on the buffer seeing that, by convention, it's safe to directly access memory mapped by memremap()/devm_memremap_pages(). Architectures where this is not safe will not be supported by memremap() and therefore will not support PCI P2P and have no support for CMB. Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Keith Busch <keith.busch@intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--drivers/nvme/host/pci.c80
1 files changed, 45 insertions, 35 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d668682f91df..f434706a04e8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -30,6 +30,7 @@
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/io-64-nonatomic-lo-hi.h> 31#include <linux/io-64-nonatomic-lo-hi.h>
32#include <linux/sed-opal.h> 32#include <linux/sed-opal.h>
33#include <linux/pci-p2pdma.h>
33 34
34#include "nvme.h" 35#include "nvme.h"
35 36
@@ -99,9 +100,8 @@ struct nvme_dev {
99 struct work_struct remove_work; 100 struct work_struct remove_work;
100 struct mutex shutdown_lock; 101 struct mutex shutdown_lock;
101 bool subsystem; 102 bool subsystem;
102 void __iomem *cmb;
103 pci_bus_addr_t cmb_bus_addr;
104 u64 cmb_size; 103 u64 cmb_size;
104 bool cmb_use_sqes;
105 u32 cmbsz; 105 u32 cmbsz;
106 u32 cmbloc; 106 u32 cmbloc;
107 struct nvme_ctrl ctrl; 107 struct nvme_ctrl ctrl;
@@ -158,7 +158,7 @@ struct nvme_queue {
158 struct nvme_dev *dev; 158 struct nvme_dev *dev;
159 spinlock_t sq_lock; 159 spinlock_t sq_lock;
160 struct nvme_command *sq_cmds; 160 struct nvme_command *sq_cmds;
161 struct nvme_command __iomem *sq_cmds_io; 161 bool sq_cmds_is_io;
162 spinlock_t cq_lock ____cacheline_aligned_in_smp; 162 spinlock_t cq_lock ____cacheline_aligned_in_smp;
163 volatile struct nvme_completion *cqes; 163 volatile struct nvme_completion *cqes;
164 struct blk_mq_tags **tags; 164 struct blk_mq_tags **tags;
@@ -447,11 +447,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
447static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 447static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
448{ 448{
449 spin_lock(&nvmeq->sq_lock); 449 spin_lock(&nvmeq->sq_lock);
450 if (nvmeq->sq_cmds_io) 450
451 memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd, 451 memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
452 sizeof(*cmd));
453 else
454 memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
455 452
456 if (++nvmeq->sq_tail == nvmeq->q_depth) 453 if (++nvmeq->sq_tail == nvmeq->q_depth)
457 nvmeq->sq_tail = 0; 454 nvmeq->sq_tail = 0;
@@ -1232,9 +1229,18 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
1232{ 1229{
1233 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 1230 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
1234 (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 1231 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
1235 if (nvmeq->sq_cmds) 1232
1236 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 1233 if (nvmeq->sq_cmds) {
1237 nvmeq->sq_cmds, nvmeq->sq_dma_addr); 1234 if (nvmeq->sq_cmds_is_io)
1235 pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
1236 nvmeq->sq_cmds,
1237 SQ_SIZE(nvmeq->q_depth));
1238 else
1239 dma_free_coherent(nvmeq->q_dmadev,
1240 SQ_SIZE(nvmeq->q_depth),
1241 nvmeq->sq_cmds,
1242 nvmeq->sq_dma_addr);
1243 }
1238} 1244}
1239 1245
1240static void nvme_free_queues(struct nvme_dev *dev, int lowest) 1246static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1323,12 +1329,21 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
1323static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, 1329static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
1324 int qid, int depth) 1330 int qid, int depth)
1325{ 1331{
1326 /* CMB SQEs will be mapped before creation */ 1332 struct pci_dev *pdev = to_pci_dev(dev->dev);
1327 if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) 1333
1328 return 0; 1334 if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
1335 nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
1336 nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
1337 nvmeq->sq_cmds);
1338 nvmeq->sq_cmds_is_io = true;
1339 }
1340
1341 if (!nvmeq->sq_cmds) {
1342 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
1343 &nvmeq->sq_dma_addr, GFP_KERNEL);
1344 nvmeq->sq_cmds_is_io = false;
1345 }
1329 1346
1330 nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
1331 &nvmeq->sq_dma_addr, GFP_KERNEL);
1332 if (!nvmeq->sq_cmds) 1347 if (!nvmeq->sq_cmds)
1333 return -ENOMEM; 1348 return -ENOMEM;
1334 return 0; 1349 return 0;
@@ -1405,13 +1420,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
1405 int result; 1420 int result;
1406 s16 vector; 1421 s16 vector;
1407 1422
1408 if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
1409 unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
1410 dev->ctrl.page_size);
1411 nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
1412 nvmeq->sq_cmds_io = dev->cmb + offset;
1413 }
1414
1415 /* 1423 /*
1416 * A queue's vector matches the queue identifier unless the controller 1424 * A queue's vector matches the queue identifier unless the controller
1417 * has only one vector available. 1425 * has only one vector available.
@@ -1652,9 +1660,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
1652 return; 1660 return;
1653 dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); 1661 dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
1654 1662
1655 if (!use_cmb_sqes)
1656 return;
1657
1658 size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev); 1663 size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
1659 offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc); 1664 offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
1660 bar = NVME_CMB_BIR(dev->cmbloc); 1665 bar = NVME_CMB_BIR(dev->cmbloc);
@@ -1671,11 +1676,18 @@ static void nvme_map_cmb(struct nvme_dev *dev)
1671 if (size > bar_size - offset) 1676 if (size > bar_size - offset)
1672 size = bar_size - offset; 1677 size = bar_size - offset;
1673 1678
1674 dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size); 1679 if (pci_p2pdma_add_resource(pdev, bar, size, offset)) {
1675 if (!dev->cmb) 1680 dev_warn(dev->ctrl.device,
1681 "failed to register the CMB\n");
1676 return; 1682 return;
1677 dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset; 1683 }
1684
1678 dev->cmb_size = size; 1685 dev->cmb_size = size;
1686 dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS);
1687
1688 if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
1689 (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
1690 pci_p2pmem_publish(pdev, true);
1679 1691
1680 if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, 1692 if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
1681 &dev_attr_cmb.attr, NULL)) 1693 &dev_attr_cmb.attr, NULL))
@@ -1685,12 +1697,10 @@ static void nvme_map_cmb(struct nvme_dev *dev)
1685 1697
1686static inline void nvme_release_cmb(struct nvme_dev *dev) 1698static inline void nvme_release_cmb(struct nvme_dev *dev)
1687{ 1699{
1688 if (dev->cmb) { 1700 if (dev->cmb_size) {
1689 iounmap(dev->cmb);
1690 dev->cmb = NULL;
1691 sysfs_remove_file_from_group(&dev->ctrl.device->kobj, 1701 sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
1692 &dev_attr_cmb.attr, NULL); 1702 &dev_attr_cmb.attr, NULL);
1693 dev->cmbsz = 0; 1703 dev->cmb_size = 0;
1694 } 1704 }
1695} 1705}
1696 1706
@@ -1889,13 +1899,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1889 if (nr_io_queues == 0) 1899 if (nr_io_queues == 0)
1890 return 0; 1900 return 0;
1891 1901
1892 if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) { 1902 if (dev->cmb_use_sqes) {
1893 result = nvme_cmb_qdepth(dev, nr_io_queues, 1903 result = nvme_cmb_qdepth(dev, nr_io_queues,
1894 sizeof(struct nvme_command)); 1904 sizeof(struct nvme_command));
1895 if (result > 0) 1905 if (result > 0)
1896 dev->q_depth = result; 1906 dev->q_depth = result;
1897 else 1907 else
1898 nvme_release_cmb(dev); 1908 dev->cmb_use_sqes = false;
1899 } 1909 }
1900 1910
1901 do { 1911 do {