aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c126
1 files changed, 81 insertions, 45 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5a0bf6a24d50..7fee665ec45e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -95,6 +95,7 @@ struct nvme_dev;
95struct nvme_queue; 95struct nvme_queue;
96 96
97static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); 97static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
98static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
98 99
99/* 100/*
100 * Represents an NVM Express device. Each nvme_dev is a PCI function. 101 * Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1019,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
1019 1020
1020static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) 1021static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
1021{ 1022{
1022 if (++nvmeq->cq_head == nvmeq->q_depth) { 1023 if (nvmeq->cq_head == nvmeq->q_depth - 1) {
1023 nvmeq->cq_head = 0; 1024 nvmeq->cq_head = 0;
1024 nvmeq->cq_phase = !nvmeq->cq_phase; 1025 nvmeq->cq_phase = !nvmeq->cq_phase;
1026 } else {
1027 nvmeq->cq_head++;
1025 } 1028 }
1026} 1029}
1027 1030
@@ -1420,6 +1423,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
1420 return 0; 1423 return 0;
1421} 1424}
1422 1425
1426static void nvme_suspend_io_queues(struct nvme_dev *dev)
1427{
1428 int i;
1429
1430 for (i = dev->ctrl.queue_count - 1; i > 0; i--)
1431 nvme_suspend_queue(&dev->queues[i]);
1432}
1433
1423static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) 1434static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
1424{ 1435{
1425 struct nvme_queue *nvmeq = &dev->queues[0]; 1436 struct nvme_queue *nvmeq = &dev->queues[0];
@@ -1485,8 +1496,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
1485 if (dev->ctrl.queue_count > qid) 1496 if (dev->ctrl.queue_count > qid)
1486 return 0; 1497 return 0;
1487 1498
1488 nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth), 1499 nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
1489 &nvmeq->cq_dma_addr, GFP_KERNEL); 1500 &nvmeq->cq_dma_addr, GFP_KERNEL);
1490 if (!nvmeq->cqes) 1501 if (!nvmeq->cqes)
1491 goto free_nvmeq; 1502 goto free_nvmeq;
1492 1503
@@ -1885,8 +1896,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
1885 struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; 1896 struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
1886 size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; 1897 size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
1887 1898
1888 dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i], 1899 dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
1889 le64_to_cpu(desc->addr)); 1900 le64_to_cpu(desc->addr),
1901 DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
1890 } 1902 }
1891 1903
1892 kfree(dev->host_mem_desc_bufs); 1904 kfree(dev->host_mem_desc_bufs);
@@ -1915,8 +1927,8 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
1915 if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) 1927 if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
1916 max_entries = dev->ctrl.hmmaxd; 1928 max_entries = dev->ctrl.hmmaxd;
1917 1929
1918 descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs), 1930 descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
1919 &descs_dma, GFP_KERNEL); 1931 &descs_dma, GFP_KERNEL);
1920 if (!descs) 1932 if (!descs)
1921 goto out; 1933 goto out;
1922 1934
@@ -1952,8 +1964,9 @@ out_free_bufs:
1952 while (--i >= 0) { 1964 while (--i >= 0) {
1953 size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; 1965 size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
1954 1966
1955 dma_free_coherent(dev->dev, size, bufs[i], 1967 dma_free_attrs(dev->dev, size, bufs[i],
1956 le64_to_cpu(descs[i].addr)); 1968 le64_to_cpu(descs[i].addr),
1969 DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
1957 } 1970 }
1958 1971
1959 kfree(bufs); 1972 kfree(bufs);
@@ -2028,14 +2041,18 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
2028 return ret; 2041 return ret;
2029} 2042}
2030 2043
2044/* irq_queues covers admin queue */
2031static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) 2045static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
2032{ 2046{
2033 unsigned int this_w_queues = write_queues; 2047 unsigned int this_w_queues = write_queues;
2034 2048
2049 WARN_ON(!irq_queues);
2050
2035 /* 2051 /*
2036 * Setup read/write queue split 2052 * Setup read/write queue split, assign admin queue one independent
2053 * irq vector if irq_queues is > 1.
2037 */ 2054 */
2038 if (irq_queues == 1) { 2055 if (irq_queues <= 2) {
2039 dev->io_queues[HCTX_TYPE_DEFAULT] = 1; 2056 dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
2040 dev->io_queues[HCTX_TYPE_READ] = 0; 2057 dev->io_queues[HCTX_TYPE_READ] = 0;
2041 return; 2058 return;
@@ -2043,21 +2060,21 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
2043 2060
2044 /* 2061 /*
2045 * If 'write_queues' is set, ensure it leaves room for at least 2062 * If 'write_queues' is set, ensure it leaves room for at least
2046 * one read queue 2063 * one read queue and one admin queue
2047 */ 2064 */
2048 if (this_w_queues >= irq_queues) 2065 if (this_w_queues >= irq_queues)
2049 this_w_queues = irq_queues - 1; 2066 this_w_queues = irq_queues - 2;
2050 2067
2051 /* 2068 /*
2052 * If 'write_queues' is set to zero, reads and writes will share 2069 * If 'write_queues' is set to zero, reads and writes will share
2053 * a queue set. 2070 * a queue set.
2054 */ 2071 */
2055 if (!this_w_queues) { 2072 if (!this_w_queues) {
2056 dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues; 2073 dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1;
2057 dev->io_queues[HCTX_TYPE_READ] = 0; 2074 dev->io_queues[HCTX_TYPE_READ] = 0;
2058 } else { 2075 } else {
2059 dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; 2076 dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
2060 dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues; 2077 dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1;
2061 } 2078 }
2062} 2079}
2063 2080
@@ -2082,7 +2099,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
2082 this_p_queues = nr_io_queues - 1; 2099 this_p_queues = nr_io_queues - 1;
2083 irq_queues = 1; 2100 irq_queues = 1;
2084 } else { 2101 } else {
2085 irq_queues = nr_io_queues - this_p_queues; 2102 irq_queues = nr_io_queues - this_p_queues + 1;
2086 } 2103 }
2087 dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; 2104 dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
2088 2105
@@ -2102,8 +2119,9 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
2102 * If we got a failure and we're down to asking for just 2119 * If we got a failure and we're down to asking for just
2103 * 1 + 1 queues, just ask for a single vector. We'll share 2120 * 1 + 1 queues, just ask for a single vector. We'll share
2104 * that between the single IO queue and the admin queue. 2121 * that between the single IO queue and the admin queue.
2122 * Otherwise, we assign one independent vector to admin queue.
2105 */ 2123 */
2106 if (result >= 0 && irq_queues > 1) 2124 if (irq_queues > 1)
2107 irq_queues = irq_sets[0] + irq_sets[1] + 1; 2125 irq_queues = irq_sets[0] + irq_sets[1] + 1;
2108 2126
2109 result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, 2127 result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
@@ -2132,6 +2150,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
2132 return result; 2150 return result;
2133} 2151}
2134 2152
2153static void nvme_disable_io_queues(struct nvme_dev *dev)
2154{
2155 if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
2156 __nvme_disable_io_queues(dev, nvme_admin_delete_cq);
2157}
2158
2135static int nvme_setup_io_queues(struct nvme_dev *dev) 2159static int nvme_setup_io_queues(struct nvme_dev *dev)
2136{ 2160{
2137 struct nvme_queue *adminq = &dev->queues[0]; 2161 struct nvme_queue *adminq = &dev->queues[0];
@@ -2168,6 +2192,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
2168 } while (1); 2192 } while (1);
2169 adminq->q_db = dev->dbs; 2193 adminq->q_db = dev->dbs;
2170 2194
2195 retry:
2171 /* Deregister the admin queue's interrupt */ 2196 /* Deregister the admin queue's interrupt */
2172 pci_free_irq(pdev, 0, adminq); 2197 pci_free_irq(pdev, 0, adminq);
2173 2198
@@ -2185,25 +2210,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
2185 result = max(result - 1, 1); 2210 result = max(result - 1, 1);
2186 dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; 2211 dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
2187 2212
2188 dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
2189 dev->io_queues[HCTX_TYPE_DEFAULT],
2190 dev->io_queues[HCTX_TYPE_READ],
2191 dev->io_queues[HCTX_TYPE_POLL]);
2192
2193 /* 2213 /*
2194 * Should investigate if there's a performance win from allocating 2214 * Should investigate if there's a performance win from allocating
2195 * more queues than interrupt vectors; it might allow the submission 2215 * more queues than interrupt vectors; it might allow the submission
2196 * path to scale better, even if the receive path is limited by the 2216 * path to scale better, even if the receive path is limited by the
2197 * number of interrupts. 2217 * number of interrupts.
2198 */ 2218 */
2199
2200 result = queue_request_irq(adminq); 2219 result = queue_request_irq(adminq);
2201 if (result) { 2220 if (result) {
2202 adminq->cq_vector = -1; 2221 adminq->cq_vector = -1;
2203 return result; 2222 return result;
2204 } 2223 }
2205 set_bit(NVMEQ_ENABLED, &adminq->flags); 2224 set_bit(NVMEQ_ENABLED, &adminq->flags);
2206 return nvme_create_io_queues(dev); 2225
2226 result = nvme_create_io_queues(dev);
2227 if (result || dev->online_queues < 2)
2228 return result;
2229
2230 if (dev->online_queues - 1 < dev->max_qid) {
2231 nr_io_queues = dev->online_queues - 1;
2232 nvme_disable_io_queues(dev);
2233 nvme_suspend_io_queues(dev);
2234 goto retry;
2235 }
2236 dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
2237 dev->io_queues[HCTX_TYPE_DEFAULT],
2238 dev->io_queues[HCTX_TYPE_READ],
2239 dev->io_queues[HCTX_TYPE_POLL]);
2240 return 0;
2207} 2241}
2208 2242
2209static void nvme_del_queue_end(struct request *req, blk_status_t error) 2243static void nvme_del_queue_end(struct request *req, blk_status_t error)
@@ -2248,7 +2282,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
2248 return 0; 2282 return 0;
2249} 2283}
2250 2284
2251static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) 2285static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
2252{ 2286{
2253 int nr_queues = dev->online_queues - 1, sent = 0; 2287 int nr_queues = dev->online_queues - 1, sent = 0;
2254 unsigned long timeout; 2288 unsigned long timeout;
@@ -2294,7 +2328,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
2294 dev->tagset.nr_maps = 2; /* default + read */ 2328 dev->tagset.nr_maps = 2; /* default + read */
2295 if (dev->io_queues[HCTX_TYPE_POLL]) 2329 if (dev->io_queues[HCTX_TYPE_POLL])
2296 dev->tagset.nr_maps++; 2330 dev->tagset.nr_maps++;
2297 dev->tagset.nr_maps = HCTX_MAX_TYPES;
2298 dev->tagset.timeout = NVME_IO_TIMEOUT; 2331 dev->tagset.timeout = NVME_IO_TIMEOUT;
2299 dev->tagset.numa_node = dev_to_node(dev->dev); 2332 dev->tagset.numa_node = dev_to_node(dev->dev);
2300 dev->tagset.queue_depth = 2333 dev->tagset.queue_depth =
@@ -2410,7 +2443,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
2410 2443
2411static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) 2444static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
2412{ 2445{
2413 int i;
2414 bool dead = true; 2446 bool dead = true;
2415 struct pci_dev *pdev = to_pci_dev(dev->dev); 2447 struct pci_dev *pdev = to_pci_dev(dev->dev);
2416 2448
@@ -2437,13 +2469,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
2437 nvme_stop_queues(&dev->ctrl); 2469 nvme_stop_queues(&dev->ctrl);
2438 2470
2439 if (!dead && dev->ctrl.queue_count > 0) { 2471 if (!dead && dev->ctrl.queue_count > 0) {
2440 if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) 2472 nvme_disable_io_queues(dev);
2441 nvme_disable_io_queues(dev, nvme_admin_delete_cq);
2442 nvme_disable_admin_queue(dev, shutdown); 2473 nvme_disable_admin_queue(dev, shutdown);
2443 } 2474 }
2444 for (i = dev->ctrl.queue_count - 1; i >= 0; i--) 2475 nvme_suspend_io_queues(dev);
2445 nvme_suspend_queue(&dev->queues[i]); 2476 nvme_suspend_queue(&dev->queues[0]);
2446
2447 nvme_pci_disable(dev); 2477 nvme_pci_disable(dev);
2448 2478
2449 blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); 2479 blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
@@ -2527,27 +2557,18 @@ static void nvme_reset_work(struct work_struct *work)
2527 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) 2557 if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
2528 nvme_dev_disable(dev, false); 2558 nvme_dev_disable(dev, false);
2529 2559
2530 /* 2560 mutex_lock(&dev->shutdown_lock);
2531 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
2532 * initializing procedure here.
2533 */
2534 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
2535 dev_warn(dev->ctrl.device,
2536 "failed to mark controller CONNECTING\n");
2537 goto out;
2538 }
2539
2540 result = nvme_pci_enable(dev); 2561 result = nvme_pci_enable(dev);
2541 if (result) 2562 if (result)
2542 goto out; 2563 goto out_unlock;
2543 2564
2544 result = nvme_pci_configure_admin_queue(dev); 2565 result = nvme_pci_configure_admin_queue(dev);
2545 if (result) 2566 if (result)
2546 goto out; 2567 goto out_unlock;
2547 2568
2548 result = nvme_alloc_admin_tags(dev); 2569 result = nvme_alloc_admin_tags(dev);
2549 if (result) 2570 if (result)
2550 goto out; 2571 goto out_unlock;
2551 2572
2552 /* 2573 /*
2553 * Limit the max command size to prevent iod->sg allocations going 2574 * Limit the max command size to prevent iod->sg allocations going
@@ -2555,6 +2576,17 @@ static void nvme_reset_work(struct work_struct *work)
2555 */ 2576 */
2556 dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; 2577 dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
2557 dev->ctrl.max_segments = NVME_MAX_SEGS; 2578 dev->ctrl.max_segments = NVME_MAX_SEGS;
2579 mutex_unlock(&dev->shutdown_lock);
2580
2581 /*
2582 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
2583 * initializing procedure here.
2584 */
2585 if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
2586 dev_warn(dev->ctrl.device,
2587 "failed to mark controller CONNECTING\n");
2588 goto out;
2589 }
2558 2590
2559 result = nvme_init_identify(&dev->ctrl); 2591 result = nvme_init_identify(&dev->ctrl);
2560 if (result) 2592 if (result)
@@ -2619,6 +2651,8 @@ static void nvme_reset_work(struct work_struct *work)
2619 nvme_start_ctrl(&dev->ctrl); 2651 nvme_start_ctrl(&dev->ctrl);
2620 return; 2652 return;
2621 2653
2654 out_unlock:
2655 mutex_unlock(&dev->shutdown_lock);
2622 out: 2656 out:
2623 nvme_remove_dead_ctrl(dev, result); 2657 nvme_remove_dead_ctrl(dev, result);
2624} 2658}
@@ -2946,6 +2980,8 @@ static const struct pci_device_id nvme_id_table[] = {
2946 { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ 2980 { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
2947 .driver_data = NVME_QUIRK_NO_DEEPEST_PS | 2981 .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
2948 NVME_QUIRK_MEDIUM_PRIO_SQ }, 2982 NVME_QUIRK_MEDIUM_PRIO_SQ },
2983 { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
2984 .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
2949 { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ 2985 { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
2950 .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, 2986 .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
2951 { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ 2987 { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */