aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2014-03-03 18:39:13 -0500
committerMatthew Wilcox <matthew.r.wilcox@intel.com>2014-03-24 08:54:40 -0400
commit4f5099af4f3d5f999d8ab7784472d93e810e3912 (patch)
treef0dc2bc897e723037ae63a5f7f41bb3bff710399 /drivers/block
parent5a92e700af2e5e0e6404988d6a7f2ed3dad3f46f (diff)
NVMe: IOCTL path RCU protect queue access
This adds rcu protected access to a queue in the nvme IOCTL path to fix potential races between a surprise removal and queue usage in nvme_submit_sync_cmd. The fix holds the rcu_read_lock() here to prevent the nvme_queue from freeing while this path is executing so it can't sleep, and so this path will no longer wait for a available command id should they all be in use at the time a passthrough IOCTL request is received. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c82
-rw-r--r--drivers/block/nvme-scsi.c31
2 files changed, 59 insertions, 54 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index b66ab1db4629..04664cadadfa 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -268,18 +268,30 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid)
268 return rcu_dereference_raw(dev->queues[qid]); 268 return rcu_dereference_raw(dev->queues[qid]);
269} 269}
270 270
271struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) 271static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
272{ 272{
273 rcu_read_lock(); 273 rcu_read_lock();
274 return rcu_dereference(dev->queues[get_cpu() + 1]); 274 return rcu_dereference(dev->queues[get_cpu() + 1]);
275} 275}
276 276
277void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) 277static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
278{ 278{
279 put_cpu(); 279 put_cpu();
280 rcu_read_unlock(); 280 rcu_read_unlock();
281} 281}
282 282
283static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
284 __acquires(RCU)
285{
286 rcu_read_lock();
287 return rcu_dereference(dev->queues[q_idx]);
288}
289
290static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
291{
292 rcu_read_unlock();
293}
294
283/** 295/**
284 * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell 296 * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
285 * @nvmeq: The queue to use 297 * @nvmeq: The queue to use
@@ -292,6 +304,10 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
292 unsigned long flags; 304 unsigned long flags;
293 u16 tail; 305 u16 tail;
294 spin_lock_irqsave(&nvmeq->q_lock, flags); 306 spin_lock_irqsave(&nvmeq->q_lock, flags);
307 if (nvmeq->q_suspended) {
308 spin_unlock_irqrestore(&nvmeq->q_lock, flags);
309 return -EBUSY;
310 }
295 tail = nvmeq->sq_tail; 311 tail = nvmeq->sq_tail;
296 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); 312 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
297 if (++tail == nvmeq->q_depth) 313 if (++tail == nvmeq->q_depth)
@@ -812,27 +828,46 @@ static void sync_completion(struct nvme_dev *dev, void *ctx,
812 * Returns 0 on success. If the result is negative, it's a Linux error code; 828 * Returns 0 on success. If the result is negative, it's a Linux error code;
813 * if the result is positive, it's an NVM Express status code 829 * if the result is positive, it's an NVM Express status code
814 */ 830 */
815int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, 831static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx,
832 struct nvme_command *cmd,
816 u32 *result, unsigned timeout) 833 u32 *result, unsigned timeout)
817{ 834{
818 int cmdid; 835 int cmdid, ret;
819 struct sync_cmd_info cmdinfo; 836 struct sync_cmd_info cmdinfo;
837 struct nvme_queue *nvmeq;
838
839 nvmeq = lock_nvmeq(dev, q_idx);
840 if (!nvmeq) {
841 unlock_nvmeq(nvmeq);
842 return -ENODEV;
843 }
820 844
821 cmdinfo.task = current; 845 cmdinfo.task = current;
822 cmdinfo.status = -EINTR; 846 cmdinfo.status = -EINTR;
823 847
824 cmdid = alloc_cmdid_killable(nvmeq, &cmdinfo, sync_completion, 848 cmdid = alloc_cmdid(nvmeq, &cmdinfo, sync_completion, timeout);
825 timeout); 849 if (cmdid < 0) {
826 if (cmdid < 0) 850 unlock_nvmeq(nvmeq);
827 return cmdid; 851 return cmdid;
852 }
828 cmd->common.command_id = cmdid; 853 cmd->common.command_id = cmdid;
829 854
830 set_current_state(TASK_KILLABLE); 855 set_current_state(TASK_KILLABLE);
831 nvme_submit_cmd(nvmeq, cmd); 856 ret = nvme_submit_cmd(nvmeq, cmd);
857 if (ret) {
858 free_cmdid(nvmeq, cmdid, NULL);
859 unlock_nvmeq(nvmeq);
860 set_current_state(TASK_RUNNING);
861 return ret;
862 }
863 unlock_nvmeq(nvmeq);
832 schedule_timeout(timeout); 864 schedule_timeout(timeout);
833 865
834 if (cmdinfo.status == -EINTR) { 866 if (cmdinfo.status == -EINTR) {
835 nvme_abort_command(nvmeq, cmdid); 867 nvmeq = lock_nvmeq(dev, q_idx);
868 if (nvmeq)
869 nvme_abort_command(nvmeq, cmdid);
870 unlock_nvmeq(nvmeq);
836 return -EINTR; 871 return -EINTR;
837 } 872 }
838 873
@@ -853,15 +888,20 @@ static int nvme_submit_async_cmd(struct nvme_queue *nvmeq,
853 return cmdid; 888 return cmdid;
854 cmdinfo->status = -EINTR; 889 cmdinfo->status = -EINTR;
855 cmd->common.command_id = cmdid; 890 cmd->common.command_id = cmdid;
856 nvme_submit_cmd(nvmeq, cmd); 891 return nvme_submit_cmd(nvmeq, cmd);
857 return 0;
858} 892}
859 893
860int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 894int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
861 u32 *result) 895 u32 *result)
862{ 896{
863 return nvme_submit_sync_cmd(raw_nvmeq(dev, 0), cmd, result, 897 return nvme_submit_sync_cmd(dev, 0, cmd, result, ADMIN_TIMEOUT);
864 ADMIN_TIMEOUT); 898}
899
900int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
901 u32 *result)
902{
903 return nvme_submit_sync_cmd(dev, smp_processor_id() + 1, cmd, result,
904 NVME_IO_TIMEOUT);
865} 905}
866 906
867static int nvme_submit_admin_cmd_async(struct nvme_dev *dev, 907static int nvme_submit_admin_cmd_async(struct nvme_dev *dev,
@@ -1434,7 +1474,6 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
1434static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 1474static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1435{ 1475{
1436 struct nvme_dev *dev = ns->dev; 1476 struct nvme_dev *dev = ns->dev;
1437 struct nvme_queue *nvmeq;
1438 struct nvme_user_io io; 1477 struct nvme_user_io io;
1439 struct nvme_command c; 1478 struct nvme_command c;
1440 unsigned length, meta_len; 1479 unsigned length, meta_len;
@@ -1510,20 +1549,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1510 1549
1511 length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL); 1550 length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL);
1512 1551
1513 nvmeq = get_nvmeq(dev);
1514 /*
1515 * Since nvme_submit_sync_cmd sleeps, we can't keep preemption
1516 * disabled. We may be preempted at any point, and be rescheduled
1517 * to a different CPU. That will cause cacheline bouncing, but no
1518 * additional races since q_lock already protects against other CPUs.
1519 */
1520 put_nvmeq(nvmeq);
1521 if (length != (io.nblocks + 1) << ns->lba_shift) 1552 if (length != (io.nblocks + 1) << ns->lba_shift)
1522 status = -ENOMEM; 1553 status = -ENOMEM;
1523 else if (!nvmeq || nvmeq->q_suspended)
1524 status = -EBUSY;
1525 else 1554 else
1526 status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); 1555 status = nvme_submit_io_cmd(dev, &c, NULL);
1527 1556
1528 if (meta_len) { 1557 if (meta_len) {
1529 if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) { 1558 if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
@@ -1597,8 +1626,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
1597 if (length != cmd.data_len) 1626 if (length != cmd.data_len)
1598 status = -ENOMEM; 1627 status = -ENOMEM;
1599 else 1628 else
1600 status = nvme_submit_sync_cmd(raw_nvmeq(dev, 0), &c, 1629 status = nvme_submit_sync_cmd(dev, 0, &c, &cmd.result, timeout);
1601 &cmd.result, timeout);
1602 1630
1603 if (cmd.data_len) { 1631 if (cmd.data_len) {
1604 nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); 1632 nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 4a0ceb64e269..e157e85bb5d7 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -2033,7 +2033,6 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2033 int res = SNTI_TRANSLATION_SUCCESS; 2033 int res = SNTI_TRANSLATION_SUCCESS;
2034 int nvme_sc; 2034 int nvme_sc;
2035 struct nvme_dev *dev = ns->dev; 2035 struct nvme_dev *dev = ns->dev;
2036 struct nvme_queue *nvmeq;
2037 u32 num_cmds; 2036 u32 num_cmds;
2038 struct nvme_iod *iod; 2037 struct nvme_iod *iod;
2039 u64 unit_len; 2038 u64 unit_len;
@@ -2106,18 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2106 2105
2107 nvme_offset += unit_num_blocks; 2106 nvme_offset += unit_num_blocks;
2108 2107
2109 nvmeq = get_nvmeq(dev); 2108 nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
2110 /*
2111 * Since nvme_submit_sync_cmd sleeps, we can't keep
2112 * preemption disabled. We may be preempted at any
2113 * point, and be rescheduled to a different CPU. That
2114 * will cause cacheline bouncing, but no additional
2115 * races since q_lock already protects against other
2116 * CPUs.
2117 */
2118 put_nvmeq(nvmeq);
2119 nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL,
2120 NVME_IO_TIMEOUT);
2121 if (nvme_sc != NVME_SC_SUCCESS) { 2109 if (nvme_sc != NVME_SC_SUCCESS) {
2122 nvme_unmap_user_pages(dev, 2110 nvme_unmap_user_pages(dev,
2123 (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, 2111 (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2644,7 +2632,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2644{ 2632{
2645 int res = SNTI_TRANSLATION_SUCCESS; 2633 int res = SNTI_TRANSLATION_SUCCESS;
2646 int nvme_sc; 2634 int nvme_sc;
2647 struct nvme_queue *nvmeq;
2648 struct nvme_command c; 2635 struct nvme_command c;
2649 u8 immed, pcmod, pc, no_flush, start; 2636 u8 immed, pcmod, pc, no_flush, start;
2650 2637
@@ -2671,10 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2671 c.common.opcode = nvme_cmd_flush; 2658 c.common.opcode = nvme_cmd_flush;
2672 c.common.nsid = cpu_to_le32(ns->ns_id); 2659 c.common.nsid = cpu_to_le32(ns->ns_id);
2673 2660
2674 nvmeq = get_nvmeq(ns->dev); 2661 nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
2675 put_nvmeq(nvmeq);
2676 nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
2677
2678 res = nvme_trans_status_code(hdr, nvme_sc); 2662 res = nvme_trans_status_code(hdr, nvme_sc);
2679 if (res) 2663 if (res)
2680 goto out; 2664 goto out;
@@ -2697,15 +2681,12 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
2697 int res = SNTI_TRANSLATION_SUCCESS; 2681 int res = SNTI_TRANSLATION_SUCCESS;
2698 int nvme_sc; 2682 int nvme_sc;
2699 struct nvme_command c; 2683 struct nvme_command c;
2700 struct nvme_queue *nvmeq;
2701 2684
2702 memset(&c, 0, sizeof(c)); 2685 memset(&c, 0, sizeof(c));
2703 c.common.opcode = nvme_cmd_flush; 2686 c.common.opcode = nvme_cmd_flush;
2704 c.common.nsid = cpu_to_le32(ns->ns_id); 2687 c.common.nsid = cpu_to_le32(ns->ns_id);
2705 2688
2706 nvmeq = get_nvmeq(ns->dev); 2689 nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
2707 put_nvmeq(nvmeq);
2708 nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
2709 2690
2710 res = nvme_trans_status_code(hdr, nvme_sc); 2691 res = nvme_trans_status_code(hdr, nvme_sc);
2711 if (res) 2692 if (res)
@@ -2872,7 +2853,6 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2872 struct nvme_dev *dev = ns->dev; 2853 struct nvme_dev *dev = ns->dev;
2873 struct scsi_unmap_parm_list *plist; 2854 struct scsi_unmap_parm_list *plist;
2874 struct nvme_dsm_range *range; 2855 struct nvme_dsm_range *range;
2875 struct nvme_queue *nvmeq;
2876 struct nvme_command c; 2856 struct nvme_command c;
2877 int i, nvme_sc, res = -ENOMEM; 2857 int i, nvme_sc, res = -ENOMEM;
2878 u16 ndesc, list_len; 2858 u16 ndesc, list_len;
@@ -2914,10 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2914 c.dsm.nr = cpu_to_le32(ndesc - 1); 2894 c.dsm.nr = cpu_to_le32(ndesc - 1);
2915 c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 2895 c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
2916 2896
2917 nvmeq = get_nvmeq(dev); 2897 nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
2918 put_nvmeq(nvmeq);
2919
2920 nvme_sc = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
2921 res = nvme_trans_status_code(hdr, nvme_sc); 2898 res = nvme_trans_status_code(hdr, nvme_sc);
2922 2899
2923 dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), 2900 dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),