diff options
Diffstat (limited to 'drivers/block/nvme.c')
-rw-r--r-- | drivers/block/nvme.c | 153 |
1 files changed, 103 insertions, 50 deletions
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 38a2d0631882..ad16c68c8645 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c | |||
@@ -79,6 +79,7 @@ struct nvme_dev { | |||
79 | char serial[20]; | 79 | char serial[20]; |
80 | char model[40]; | 80 | char model[40]; |
81 | char firmware_rev[8]; | 81 | char firmware_rev[8]; |
82 | u32 max_hw_sectors; | ||
82 | }; | 83 | }; |
83 | 84 | ||
84 | /* | 85 | /* |
@@ -835,15 +836,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, | |||
835 | } | 836 | } |
836 | 837 | ||
837 | static int nvme_get_features(struct nvme_dev *dev, unsigned fid, | 838 | static int nvme_get_features(struct nvme_dev *dev, unsigned fid, |
838 | unsigned dword11, dma_addr_t dma_addr) | 839 | unsigned nsid, dma_addr_t dma_addr) |
839 | { | 840 | { |
840 | struct nvme_command c; | 841 | struct nvme_command c; |
841 | 842 | ||
842 | memset(&c, 0, sizeof(c)); | 843 | memset(&c, 0, sizeof(c)); |
843 | c.features.opcode = nvme_admin_get_features; | 844 | c.features.opcode = nvme_admin_get_features; |
845 | c.features.nsid = cpu_to_le32(nsid); | ||
844 | c.features.prp1 = cpu_to_le64(dma_addr); | 846 | c.features.prp1 = cpu_to_le64(dma_addr); |
845 | c.features.fid = cpu_to_le32(fid); | 847 | c.features.fid = cpu_to_le32(fid); |
846 | c.features.dword11 = cpu_to_le32(dword11); | ||
847 | 848 | ||
848 | return nvme_submit_admin_cmd(dev, &c, NULL); | 849 | return nvme_submit_admin_cmd(dev, &c, NULL); |
849 | } | 850 | } |
@@ -862,11 +863,51 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid, | |||
862 | return nvme_submit_admin_cmd(dev, &c, result); | 863 | return nvme_submit_admin_cmd(dev, &c, result); |
863 | } | 864 | } |
864 | 865 | ||
866 | /** | ||
867 | * nvme_cancel_ios - Cancel outstanding I/Os | ||
868 | * @queue: The queue to cancel I/Os on | ||
869 | * @timeout: True to only cancel I/Os which have timed out | ||
870 | */ | ||
871 | static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | ||
872 | { | ||
873 | int depth = nvmeq->q_depth - 1; | ||
874 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
875 | unsigned long now = jiffies; | ||
876 | int cmdid; | ||
877 | |||
878 | for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | ||
879 | void *ctx; | ||
880 | nvme_completion_fn fn; | ||
881 | static struct nvme_completion cqe = { | ||
882 | .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, | ||
883 | }; | ||
884 | |||
885 | if (timeout && !time_after(now, info[cmdid].timeout)) | ||
886 | continue; | ||
887 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); | ||
888 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | ||
889 | fn(nvmeq->dev, ctx, &cqe); | ||
890 | } | ||
891 | } | ||
892 | |||
893 | static void nvme_free_queue_mem(struct nvme_queue *nvmeq) | ||
894 | { | ||
895 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | ||
896 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
897 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
898 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
899 | kfree(nvmeq); | ||
900 | } | ||
901 | |||
865 | static void nvme_free_queue(struct nvme_dev *dev, int qid) | 902 | static void nvme_free_queue(struct nvme_dev *dev, int qid) |
866 | { | 903 | { |
867 | struct nvme_queue *nvmeq = dev->queues[qid]; | 904 | struct nvme_queue *nvmeq = dev->queues[qid]; |
868 | int vector = dev->entry[nvmeq->cq_vector].vector; | 905 | int vector = dev->entry[nvmeq->cq_vector].vector; |
869 | 906 | ||
907 | spin_lock_irq(&nvmeq->q_lock); | ||
908 | nvme_cancel_ios(nvmeq, false); | ||
909 | spin_unlock_irq(&nvmeq->q_lock); | ||
910 | |||
870 | irq_set_affinity_hint(vector, NULL); | 911 | irq_set_affinity_hint(vector, NULL); |
871 | free_irq(vector, nvmeq); | 912 | free_irq(vector, nvmeq); |
872 | 913 | ||
@@ -876,18 +917,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) | |||
876 | adapter_delete_cq(dev, qid); | 917 | adapter_delete_cq(dev, qid); |
877 | } | 918 | } |
878 | 919 | ||
879 | dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), | 920 | nvme_free_queue_mem(nvmeq); |
880 | (void *)nvmeq->cqes, nvmeq->cq_dma_addr); | ||
881 | dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), | ||
882 | nvmeq->sq_cmds, nvmeq->sq_dma_addr); | ||
883 | kfree(nvmeq); | ||
884 | } | 921 | } |
885 | 922 | ||
886 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, | 923 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, |
887 | int depth, int vector) | 924 | int depth, int vector) |
888 | { | 925 | { |
889 | struct device *dmadev = &dev->pci_dev->dev; | 926 | struct device *dmadev = &dev->pci_dev->dev; |
890 | unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); | 927 | unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * |
928 | sizeof(struct nvme_cmd_info)); | ||
891 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); | 929 | struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); |
892 | if (!nvmeq) | 930 | if (!nvmeq) |
893 | return NULL; | 931 | return NULL; |
@@ -975,7 +1013,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, | |||
975 | 1013 | ||
976 | static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) | 1014 | static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) |
977 | { | 1015 | { |
978 | int result; | 1016 | int result = 0; |
979 | u32 aqa; | 1017 | u32 aqa; |
980 | u64 cap; | 1018 | u64 cap; |
981 | unsigned long timeout; | 1019 | unsigned long timeout; |
@@ -1005,17 +1043,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1005 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; | 1043 | timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; |
1006 | dev->db_stride = NVME_CAP_STRIDE(cap); | 1044 | dev->db_stride = NVME_CAP_STRIDE(cap); |
1007 | 1045 | ||
1008 | while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { | 1046 | while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { |
1009 | msleep(100); | 1047 | msleep(100); |
1010 | if (fatal_signal_pending(current)) | 1048 | if (fatal_signal_pending(current)) |
1011 | return -EINTR; | 1049 | result = -EINTR; |
1012 | if (time_after(jiffies, timeout)) { | 1050 | if (time_after(jiffies, timeout)) { |
1013 | dev_err(&dev->pci_dev->dev, | 1051 | dev_err(&dev->pci_dev->dev, |
1014 | "Device not ready; aborting initialisation\n"); | 1052 | "Device not ready; aborting initialisation\n"); |
1015 | return -ENODEV; | 1053 | result = -ENODEV; |
1016 | } | 1054 | } |
1017 | } | 1055 | } |
1018 | 1056 | ||
1057 | if (result) { | ||
1058 | nvme_free_queue_mem(nvmeq); | ||
1059 | return result; | ||
1060 | } | ||
1061 | |||
1019 | result = queue_request_irq(dev, nvmeq, "nvme admin"); | 1062 | result = queue_request_irq(dev, nvmeq, "nvme admin"); |
1020 | dev->queues[0] = nvmeq; | 1063 | dev->queues[0] = nvmeq; |
1021 | return result; | 1064 | return result; |
@@ -1037,6 +1080,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1037 | offset = offset_in_page(addr); | 1080 | offset = offset_in_page(addr); |
1038 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); | 1081 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); |
1039 | pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); | 1082 | pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); |
1083 | if (!pages) | ||
1084 | return ERR_PTR(-ENOMEM); | ||
1040 | 1085 | ||
1041 | err = get_user_pages_fast(addr, count, 1, pages); | 1086 | err = get_user_pages_fast(addr, count, 1, pages); |
1042 | if (err < count) { | 1087 | if (err < count) { |
@@ -1146,14 +1191,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1146 | return status; | 1191 | return status; |
1147 | } | 1192 | } |
1148 | 1193 | ||
1149 | static int nvme_user_admin_cmd(struct nvme_ns *ns, | 1194 | static int nvme_user_admin_cmd(struct nvme_dev *dev, |
1150 | struct nvme_admin_cmd __user *ucmd) | 1195 | struct nvme_admin_cmd __user *ucmd) |
1151 | { | 1196 | { |
1152 | struct nvme_dev *dev = ns->dev; | ||
1153 | struct nvme_admin_cmd cmd; | 1197 | struct nvme_admin_cmd cmd; |
1154 | struct nvme_command c; | 1198 | struct nvme_command c; |
1155 | int status, length; | 1199 | int status, length; |
1156 | struct nvme_iod *iod; | 1200 | struct nvme_iod *uninitialized_var(iod); |
1157 | 1201 | ||
1158 | if (!capable(CAP_SYS_ADMIN)) | 1202 | if (!capable(CAP_SYS_ADMIN)) |
1159 | return -EACCES; | 1203 | return -EACCES; |
@@ -1204,7 +1248,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, | |||
1204 | case NVME_IOCTL_ID: | 1248 | case NVME_IOCTL_ID: |
1205 | return ns->ns_id; | 1249 | return ns->ns_id; |
1206 | case NVME_IOCTL_ADMIN_CMD: | 1250 | case NVME_IOCTL_ADMIN_CMD: |
1207 | return nvme_user_admin_cmd(ns, (void __user *)arg); | 1251 | return nvme_user_admin_cmd(ns->dev, (void __user *)arg); |
1208 | case NVME_IOCTL_SUBMIT_IO: | 1252 | case NVME_IOCTL_SUBMIT_IO: |
1209 | return nvme_submit_io(ns, (void __user *)arg); | 1253 | return nvme_submit_io(ns, (void __user *)arg); |
1210 | default: | 1254 | default: |
@@ -1218,26 +1262,6 @@ static const struct block_device_operations nvme_fops = { | |||
1218 | .compat_ioctl = nvme_ioctl, | 1262 | .compat_ioctl = nvme_ioctl, |
1219 | }; | 1263 | }; |
1220 | 1264 | ||
1221 | static void nvme_timeout_ios(struct nvme_queue *nvmeq) | ||
1222 | { | ||
1223 | int depth = nvmeq->q_depth - 1; | ||
1224 | struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); | ||
1225 | unsigned long now = jiffies; | ||
1226 | int cmdid; | ||
1227 | |||
1228 | for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) { | ||
1229 | void *ctx; | ||
1230 | nvme_completion_fn fn; | ||
1231 | static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, }; | ||
1232 | |||
1233 | if (!time_after(now, info[cmdid].timeout)) | ||
1234 | continue; | ||
1235 | dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid); | ||
1236 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | ||
1237 | fn(nvmeq->dev, ctx, &cqe); | ||
1238 | } | ||
1239 | } | ||
1240 | |||
1241 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) | 1265 | static void nvme_resubmit_bios(struct nvme_queue *nvmeq) |
1242 | { | 1266 | { |
1243 | while (bio_list_peek(&nvmeq->sq_cong)) { | 1267 | while (bio_list_peek(&nvmeq->sq_cong)) { |
@@ -1269,7 +1293,7 @@ static int nvme_kthread(void *data) | |||
1269 | spin_lock_irq(&nvmeq->q_lock); | 1293 | spin_lock_irq(&nvmeq->q_lock); |
1270 | if (nvme_process_cq(nvmeq)) | 1294 | if (nvme_process_cq(nvmeq)) |
1271 | printk("process_cq did something\n"); | 1295 | printk("process_cq did something\n"); |
1272 | nvme_timeout_ios(nvmeq); | 1296 | nvme_cancel_ios(nvmeq, true); |
1273 | nvme_resubmit_bios(nvmeq); | 1297 | nvme_resubmit_bios(nvmeq); |
1274 | spin_unlock_irq(&nvmeq->q_lock); | 1298 | spin_unlock_irq(&nvmeq->q_lock); |
1275 | } | 1299 | } |
@@ -1339,6 +1363,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, | |||
1339 | ns->disk = disk; | 1363 | ns->disk = disk; |
1340 | lbaf = id->flbas & 0xf; | 1364 | lbaf = id->flbas & 0xf; |
1341 | ns->lba_shift = id->lbaf[lbaf].ds; | 1365 | ns->lba_shift = id->lbaf[lbaf].ds; |
1366 | blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); | ||
1367 | if (dev->max_hw_sectors) | ||
1368 | blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); | ||
1342 | 1369 | ||
1343 | disk->major = nvme_major; | 1370 | disk->major = nvme_major; |
1344 | disk->minors = NVME_MINORS; | 1371 | disk->minors = NVME_MINORS; |
@@ -1383,7 +1410,7 @@ static int set_queue_count(struct nvme_dev *dev, int count) | |||
1383 | 1410 | ||
1384 | static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) | 1411 | static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) |
1385 | { | 1412 | { |
1386 | int result, cpu, i, nr_io_queues, db_bar_size; | 1413 | int result, cpu, i, nr_io_queues, db_bar_size, q_depth; |
1387 | 1414 | ||
1388 | nr_io_queues = num_online_cpus(); | 1415 | nr_io_queues = num_online_cpus(); |
1389 | result = set_queue_count(dev, nr_io_queues); | 1416 | result = set_queue_count(dev, nr_io_queues); |
@@ -1429,9 +1456,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) | |||
1429 | cpu = cpumask_next(cpu, cpu_online_mask); | 1456 | cpu = cpumask_next(cpu, cpu_online_mask); |
1430 | } | 1457 | } |
1431 | 1458 | ||
1459 | q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, | ||
1460 | NVME_Q_DEPTH); | ||
1432 | for (i = 0; i < nr_io_queues; i++) { | 1461 | for (i = 0; i < nr_io_queues; i++) { |
1433 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, | 1462 | dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); |
1434 | NVME_Q_DEPTH, i); | ||
1435 | if (IS_ERR(dev->queues[i + 1])) | 1463 | if (IS_ERR(dev->queues[i + 1])) |
1436 | return PTR_ERR(dev->queues[i + 1]); | 1464 | return PTR_ERR(dev->queues[i + 1]); |
1437 | dev->queue_count++; | 1465 | dev->queue_count++; |
@@ -1480,6 +1508,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev) | |||
1480 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); | 1508 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); |
1481 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); | 1509 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); |
1482 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); | 1510 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); |
1511 | if (ctrl->mdts) { | ||
1512 | int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; | ||
1513 | dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); | ||
1514 | } | ||
1483 | 1515 | ||
1484 | id_ns = mem; | 1516 | id_ns = mem; |
1485 | for (i = 1; i <= nn; i++) { | 1517 | for (i = 1; i <= nn; i++) { |
@@ -1523,8 +1555,6 @@ static int nvme_dev_remove(struct nvme_dev *dev) | |||
1523 | list_del(&dev->node); | 1555 | list_del(&dev->node); |
1524 | spin_unlock(&dev_list_lock); | 1556 | spin_unlock(&dev_list_lock); |
1525 | 1557 | ||
1526 | /* TODO: wait all I/O finished or cancel them */ | ||
1527 | |||
1528 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { | 1558 | list_for_each_entry_safe(ns, next, &dev->namespaces, list) { |
1529 | list_del(&ns->list); | 1559 | list_del(&ns->list); |
1530 | del_gendisk(ns->disk); | 1560 | del_gendisk(ns->disk); |
@@ -1560,15 +1590,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) | |||
1560 | dma_pool_destroy(dev->prp_small_pool); | 1590 | dma_pool_destroy(dev->prp_small_pool); |
1561 | } | 1591 | } |
1562 | 1592 | ||
1563 | /* XXX: Use an ida or something to let remove / add work correctly */ | 1593 | static DEFINE_IDA(nvme_instance_ida); |
1564 | static void nvme_set_instance(struct nvme_dev *dev) | 1594 | |
1595 | static int nvme_set_instance(struct nvme_dev *dev) | ||
1565 | { | 1596 | { |
1566 | static int instance; | 1597 | int instance, error; |
1567 | dev->instance = instance++; | 1598 | |
1599 | do { | ||
1600 | if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) | ||
1601 | return -ENODEV; | ||
1602 | |||
1603 | spin_lock(&dev_list_lock); | ||
1604 | error = ida_get_new(&nvme_instance_ida, &instance); | ||
1605 | spin_unlock(&dev_list_lock); | ||
1606 | } while (error == -EAGAIN); | ||
1607 | |||
1608 | if (error) | ||
1609 | return -ENODEV; | ||
1610 | |||
1611 | dev->instance = instance; | ||
1612 | return 0; | ||
1568 | } | 1613 | } |
1569 | 1614 | ||
1570 | static void nvme_release_instance(struct nvme_dev *dev) | 1615 | static void nvme_release_instance(struct nvme_dev *dev) |
1571 | { | 1616 | { |
1617 | spin_lock(&dev_list_lock); | ||
1618 | ida_remove(&nvme_instance_ida, dev->instance); | ||
1619 | spin_unlock(&dev_list_lock); | ||
1572 | } | 1620 | } |
1573 | 1621 | ||
1574 | static int __devinit nvme_probe(struct pci_dev *pdev, | 1622 | static int __devinit nvme_probe(struct pci_dev *pdev, |
@@ -1601,7 +1649,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev, | |||
1601 | pci_set_drvdata(pdev, dev); | 1649 | pci_set_drvdata(pdev, dev); |
1602 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); | 1650 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); |
1603 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | 1651 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); |
1604 | nvme_set_instance(dev); | 1652 | result = nvme_set_instance(dev); |
1653 | if (result) | ||
1654 | goto disable; | ||
1655 | |||
1605 | dev->entry[0].vector = pdev->irq; | 1656 | dev->entry[0].vector = pdev->irq; |
1606 | 1657 | ||
1607 | result = nvme_setup_prp_pools(dev); | 1658 | result = nvme_setup_prp_pools(dev); |
@@ -1704,15 +1755,17 @@ static struct pci_driver nvme_driver = { | |||
1704 | 1755 | ||
1705 | static int __init nvme_init(void) | 1756 | static int __init nvme_init(void) |
1706 | { | 1757 | { |
1707 | int result = -EBUSY; | 1758 | int result; |
1708 | 1759 | ||
1709 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); | 1760 | nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); |
1710 | if (IS_ERR(nvme_thread)) | 1761 | if (IS_ERR(nvme_thread)) |
1711 | return PTR_ERR(nvme_thread); | 1762 | return PTR_ERR(nvme_thread); |
1712 | 1763 | ||
1713 | nvme_major = register_blkdev(nvme_major, "nvme"); | 1764 | result = register_blkdev(nvme_major, "nvme"); |
1714 | if (nvme_major <= 0) | 1765 | if (result < 0) |
1715 | goto kill_kthread; | 1766 | goto kill_kthread; |
1767 | else if (result > 0) | ||
1768 | nvme_major = result; | ||
1716 | 1769 | ||
1717 | result = pci_register_driver(&nvme_driver); | 1770 | result = pci_register_driver(&nvme_driver); |
1718 | if (result) | 1771 | if (result) |