aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2016-01-12 16:41:17 -0500
committerJens Axboe <axboe@fb.com>2016-01-12 16:47:56 -0500
commitdb3cbfff5bcc0b9a82d8c71f00b9d60fad215871 (patch)
tree7c625cc2d49994454d56ae7c9823b598bb6203cd
parent25646264e15af96c5c630fc742708b1eb3339222 (diff)
NVMe: IO queue deletion re-write
The nvme driver deletes IO queues asynchronously since this operation may potentially take an undesirable amount of time with a large number of queues if done serially. The driver used to manage coordinating asynchronous deletions. This patch simplifies that by leveraging the block layer rather than using kthread workers and chaining more complicated callbacks. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/nvme/host/pci.c251
1 files changed, 81 insertions, 170 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 953fe485a258..72f284ff42b6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -89,13 +89,6 @@ static void nvme_process_cq(struct nvme_queue *nvmeq);
89static void nvme_remove_dead_ctrl(struct nvme_dev *dev); 89static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
90static void nvme_dev_shutdown(struct nvme_dev *dev); 90static void nvme_dev_shutdown(struct nvme_dev *dev);
91 91
92struct async_cmd_info {
93 struct kthread_work work;
94 struct kthread_worker *worker;
95 int status;
96 void *ctx;
97};
98
99/* 92/*
100 * Represents an NVM Express device. Each nvme_dev is a PCI function. 93 * Represents an NVM Express device. Each nvme_dev is a PCI function.
101 */ 94 */
@@ -125,9 +118,11 @@ struct nvme_dev {
125 u64 cmb_size; 118 u64 cmb_size;
126 u32 cmbsz; 119 u32 cmbsz;
127 unsigned long flags; 120 unsigned long flags;
121
128#define NVME_CTRL_RESETTING 0 122#define NVME_CTRL_RESETTING 0
129 123
130 struct nvme_ctrl ctrl; 124 struct nvme_ctrl ctrl;
125 struct completion ioq_wait;
131}; 126};
132 127
133static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) 128static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
@@ -159,7 +154,6 @@ struct nvme_queue {
159 u16 qid; 154 u16 qid;
160 u8 cq_phase; 155 u8 cq_phase;
161 u8 cqe_seen; 156 u8 cqe_seen;
162 struct async_cmd_info cmdinfo;
163}; 157};
164 158
165/* 159/*
@@ -844,15 +838,6 @@ static void nvme_submit_async_event(struct nvme_dev *dev)
844 __nvme_submit_cmd(dev->queues[0], &c); 838 __nvme_submit_cmd(dev->queues[0], &c);
845} 839}
846 840
847static void async_cmd_info_endio(struct request *req, int error)
848{
849 struct async_cmd_info *cmdinfo = req->end_io_data;
850
851 cmdinfo->status = req->errors;
852 queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
853 blk_mq_free_request(req);
854}
855
856static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 841static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
857{ 842{
858 struct nvme_command c; 843 struct nvme_command c;
@@ -1600,6 +1585,84 @@ static void nvme_dev_scan(struct work_struct *work)
1600 nvme_set_irq_hints(dev); 1585 nvme_set_irq_hints(dev);
1601} 1586}
1602 1587
1588static void nvme_del_queue_end(struct request *req, int error)
1589{
1590 struct nvme_queue *nvmeq = req->end_io_data;
1591
1592 blk_mq_free_request(req);
1593 complete(&nvmeq->dev->ioq_wait);
1594}
1595
1596static void nvme_del_cq_end(struct request *req, int error)
1597{
1598 struct nvme_queue *nvmeq = req->end_io_data;
1599
1600 if (!error) {
1601 unsigned long flags;
1602
1603 spin_lock_irqsave(&nvmeq->q_lock, flags);
1604 nvme_process_cq(nvmeq);
1605 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
1606 }
1607
1608 nvme_del_queue_end(req, error);
1609}
1610
1611static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
1612{
1613 struct request_queue *q = nvmeq->dev->ctrl.admin_q;
1614 struct request *req;
1615 struct nvme_command cmd;
1616
1617 memset(&cmd, 0, sizeof(cmd));
1618 cmd.delete_queue.opcode = opcode;
1619 cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
1620
1621 req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
1622 if (IS_ERR(req))
1623 return PTR_ERR(req);
1624
1625 req->timeout = ADMIN_TIMEOUT;
1626 req->end_io_data = nvmeq;
1627
1628 blk_execute_rq_nowait(q, NULL, req, false,
1629 opcode == nvme_admin_delete_cq ?
1630 nvme_del_cq_end : nvme_del_queue_end);
1631 return 0;
1632}
1633
1634static void nvme_disable_io_queues(struct nvme_dev *dev)
1635{
1636 int pass;
1637 unsigned long timeout;
1638 u8 opcode = nvme_admin_delete_sq;
1639
1640 for (pass = 0; pass < 2; pass++) {
1641 int sent = 0, i = dev->queue_count - 1;
1642
1643 reinit_completion(&dev->ioq_wait);
1644 retry:
1645 timeout = ADMIN_TIMEOUT;
1646 for (; i > 0; i--) {
1647 struct nvme_queue *nvmeq = dev->queues[i];
1648
1649 if (!pass)
1650 nvme_suspend_queue(nvmeq);
1651 if (nvme_delete_queue(nvmeq, opcode))
1652 break;
1653 ++sent;
1654 }
1655 while (sent--) {
1656 timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
1657 if (timeout == 0)
1658 return;
1659 if (i)
1660 goto retry;
1661 }
1662 opcode = nvme_admin_delete_cq;
1663 }
1664}
1665
1603/* 1666/*
1604 * Return: error value if an error occurred setting up the queues or calling 1667 * Return: error value if an error occurred setting up the queues or calling
1605 * Identify Device. 0 if these succeeded, even if adding some of the 1668 * Identify Device. 0 if these succeeded, even if adding some of the
@@ -1711,159 +1774,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
1711 } 1774 }
1712} 1775}
1713 1776
1714struct nvme_delq_ctx {
1715 struct task_struct *waiter;
1716 struct kthread_worker *worker;
1717 atomic_t refcount;
1718};
1719
1720static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
1721{
1722 dq->waiter = current;
1723 mb();
1724
1725 for (;;) {
1726 set_current_state(TASK_KILLABLE);
1727 if (!atomic_read(&dq->refcount))
1728 break;
1729 if (!schedule_timeout(ADMIN_TIMEOUT) ||
1730 fatal_signal_pending(current)) {
1731 /*
1732 * Disable the controller first since we can't trust it
1733 * at this point, but leave the admin queue enabled
1734 * until all queue deletion requests are flushed.
1735 * FIXME: This may take a while if there are more h/w
1736 * queues than admin tags.
1737 */
1738 set_current_state(TASK_RUNNING);
1739 nvme_disable_ctrl(&dev->ctrl,
1740 lo_hi_readq(dev->bar + NVME_REG_CAP));
1741 nvme_clear_queue(dev->queues[0]);
1742 flush_kthread_worker(dq->worker);
1743 nvme_disable_queue(dev, 0);
1744 return;
1745 }
1746 }
1747 set_current_state(TASK_RUNNING);
1748}
1749
1750static void nvme_put_dq(struct nvme_delq_ctx *dq)
1751{
1752 atomic_dec(&dq->refcount);
1753 if (dq->waiter)
1754 wake_up_process(dq->waiter);
1755}
1756
1757static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
1758{
1759 atomic_inc(&dq->refcount);
1760 return dq;
1761}
1762
1763static void nvme_del_queue_end(struct nvme_queue *nvmeq)
1764{
1765 struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
1766 nvme_put_dq(dq);
1767
1768 spin_lock_irq(&nvmeq->q_lock);
1769 nvme_process_cq(nvmeq);
1770 spin_unlock_irq(&nvmeq->q_lock);
1771}
1772
1773static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
1774 kthread_work_func_t fn)
1775{
1776 struct request *req;
1777 struct nvme_command c;
1778
1779 memset(&c, 0, sizeof(c));
1780 c.delete_queue.opcode = opcode;
1781 c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
1782
1783 init_kthread_work(&nvmeq->cmdinfo.work, fn);
1784
1785 req = nvme_alloc_request(nvmeq->dev->ctrl.admin_q, &c, 0);
1786 if (IS_ERR(req))
1787 return PTR_ERR(req);
1788
1789 req->timeout = ADMIN_TIMEOUT;
1790 req->end_io_data = &nvmeq->cmdinfo;
1791 blk_execute_rq_nowait(req->q, NULL, req, 0, async_cmd_info_endio);
1792 return 0;
1793}
1794
1795static void nvme_del_cq_work_handler(struct kthread_work *work)
1796{
1797 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
1798 cmdinfo.work);
1799 nvme_del_queue_end(nvmeq);
1800}
1801
1802static int nvme_delete_cq(struct nvme_queue *nvmeq)
1803{
1804 return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
1805 nvme_del_cq_work_handler);
1806}
1807
1808static void nvme_del_sq_work_handler(struct kthread_work *work)
1809{
1810 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
1811 cmdinfo.work);
1812 int status = nvmeq->cmdinfo.status;
1813
1814 if (!status)
1815 status = nvme_delete_cq(nvmeq);
1816 if (status)
1817 nvme_del_queue_end(nvmeq);
1818}
1819
1820static int nvme_delete_sq(struct nvme_queue *nvmeq)
1821{
1822 return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
1823 nvme_del_sq_work_handler);
1824}
1825
1826static void nvme_del_queue_start(struct kthread_work *work)
1827{
1828 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
1829 cmdinfo.work);
1830 if (nvme_delete_sq(nvmeq))
1831 nvme_del_queue_end(nvmeq);
1832}
1833
1834static void nvme_disable_io_queues(struct nvme_dev *dev)
1835{
1836 int i;
1837 DEFINE_KTHREAD_WORKER_ONSTACK(worker);
1838 struct nvme_delq_ctx dq;
1839 struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
1840 &worker, "nvme%d", dev->ctrl.instance);
1841
1842 if (IS_ERR(kworker_task)) {
1843 dev_err(dev->dev,
1844 "Failed to create queue del task\n");
1845 for (i = dev->queue_count - 1; i > 0; i--)
1846 nvme_disable_queue(dev, i);
1847 return;
1848 }
1849
1850 dq.waiter = NULL;
1851 atomic_set(&dq.refcount, 0);
1852 dq.worker = &worker;
1853 for (i = dev->queue_count - 1; i > 0; i--) {
1854 struct nvme_queue *nvmeq = dev->queues[i];
1855
1856 if (nvme_suspend_queue(nvmeq))
1857 continue;
1858 nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
1859 nvmeq->cmdinfo.worker = dq.worker;
1860 init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
1861 queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
1862 }
1863 nvme_wait_dq(&dq, dev);
1864 kthread_stop(kworker_task);
1865}
1866
1867static int nvme_dev_list_add(struct nvme_dev *dev) 1777static int nvme_dev_list_add(struct nvme_dev *dev)
1868{ 1778{
1869 bool start_thread = false; 1779 bool start_thread = false;
@@ -2146,6 +2056,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2146 INIT_WORK(&dev->reset_work, nvme_reset_work); 2056 INIT_WORK(&dev->reset_work, nvme_reset_work);
2147 INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); 2057 INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
2148 mutex_init(&dev->shutdown_lock); 2058 mutex_init(&dev->shutdown_lock);
2059 init_completion(&dev->ioq_wait);
2149 2060
2150 result = nvme_setup_prp_pools(dev); 2061 result = nvme_setup_prp_pools(dev);
2151 if (result) 2062 if (result)