aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/nvme.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/nvme.c')
-rw-r--r--drivers/block/nvme.c153
1 files changed, 103 insertions, 50 deletions
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c
index 38a2d0631882..ad16c68c8645 100644
--- a/drivers/block/nvme.c
+++ b/drivers/block/nvme.c
@@ -79,6 +79,7 @@ struct nvme_dev {
79 char serial[20]; 79 char serial[20];
80 char model[40]; 80 char model[40];
81 char firmware_rev[8]; 81 char firmware_rev[8];
82 u32 max_hw_sectors;
82}; 83};
83 84
84/* 85/*
@@ -835,15 +836,15 @@ static int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
835} 836}
836 837
837static int nvme_get_features(struct nvme_dev *dev, unsigned fid, 838static int nvme_get_features(struct nvme_dev *dev, unsigned fid,
838 unsigned dword11, dma_addr_t dma_addr) 839 unsigned nsid, dma_addr_t dma_addr)
839{ 840{
840 struct nvme_command c; 841 struct nvme_command c;
841 842
842 memset(&c, 0, sizeof(c)); 843 memset(&c, 0, sizeof(c));
843 c.features.opcode = nvme_admin_get_features; 844 c.features.opcode = nvme_admin_get_features;
845 c.features.nsid = cpu_to_le32(nsid);
844 c.features.prp1 = cpu_to_le64(dma_addr); 846 c.features.prp1 = cpu_to_le64(dma_addr);
845 c.features.fid = cpu_to_le32(fid); 847 c.features.fid = cpu_to_le32(fid);
846 c.features.dword11 = cpu_to_le32(dword11);
847 848
848 return nvme_submit_admin_cmd(dev, &c, NULL); 849 return nvme_submit_admin_cmd(dev, &c, NULL);
849} 850}
@@ -862,11 +863,51 @@ static int nvme_set_features(struct nvme_dev *dev, unsigned fid,
862 return nvme_submit_admin_cmd(dev, &c, result); 863 return nvme_submit_admin_cmd(dev, &c, result);
863} 864}
864 865
866/**
867 * nvme_cancel_ios - Cancel outstanding I/Os
868 * @queue: The queue to cancel I/Os on
869 * @timeout: True to only cancel I/Os which have timed out
870 */
871static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
872{
873 int depth = nvmeq->q_depth - 1;
874 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
875 unsigned long now = jiffies;
876 int cmdid;
877
878 for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
879 void *ctx;
880 nvme_completion_fn fn;
881 static struct nvme_completion cqe = {
882 .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1,
883 };
884
885 if (timeout && !time_after(now, info[cmdid].timeout))
886 continue;
887 dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid);
888 ctx = cancel_cmdid(nvmeq, cmdid, &fn);
889 fn(nvmeq->dev, ctx, &cqe);
890 }
891}
892
893static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
894{
895 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
896 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
897 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
898 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
899 kfree(nvmeq);
900}
901
865static void nvme_free_queue(struct nvme_dev *dev, int qid) 902static void nvme_free_queue(struct nvme_dev *dev, int qid)
866{ 903{
867 struct nvme_queue *nvmeq = dev->queues[qid]; 904 struct nvme_queue *nvmeq = dev->queues[qid];
868 int vector = dev->entry[nvmeq->cq_vector].vector; 905 int vector = dev->entry[nvmeq->cq_vector].vector;
869 906
907 spin_lock_irq(&nvmeq->q_lock);
908 nvme_cancel_ios(nvmeq, false);
909 spin_unlock_irq(&nvmeq->q_lock);
910
870 irq_set_affinity_hint(vector, NULL); 911 irq_set_affinity_hint(vector, NULL);
871 free_irq(vector, nvmeq); 912 free_irq(vector, nvmeq);
872 913
@@ -876,18 +917,15 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
876 adapter_delete_cq(dev, qid); 917 adapter_delete_cq(dev, qid);
877 } 918 }
878 919
879 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 920 nvme_free_queue_mem(nvmeq);
880 (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
881 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
882 nvmeq->sq_cmds, nvmeq->sq_dma_addr);
883 kfree(nvmeq);
884} 921}
885 922
886static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 923static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
887 int depth, int vector) 924 int depth, int vector)
888{ 925{
889 struct device *dmadev = &dev->pci_dev->dev; 926 struct device *dmadev = &dev->pci_dev->dev;
890 unsigned extra = (depth / 8) + (depth * sizeof(struct nvme_cmd_info)); 927 unsigned extra = DIV_ROUND_UP(depth, 8) + (depth *
928 sizeof(struct nvme_cmd_info));
891 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); 929 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
892 if (!nvmeq) 930 if (!nvmeq)
893 return NULL; 931 return NULL;
@@ -975,7 +1013,7 @@ static __devinit struct nvme_queue *nvme_create_queue(struct nvme_dev *dev,
975 1013
976static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) 1014static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
977{ 1015{
978 int result; 1016 int result = 0;
979 u32 aqa; 1017 u32 aqa;
980 u64 cap; 1018 u64 cap;
981 unsigned long timeout; 1019 unsigned long timeout;
@@ -1005,17 +1043,22 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev)
1005 timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; 1043 timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
1006 dev->db_stride = NVME_CAP_STRIDE(cap); 1044 dev->db_stride = NVME_CAP_STRIDE(cap);
1007 1045
1008 while (!(readl(&dev->bar->csts) & NVME_CSTS_RDY)) { 1046 while (!result && !(readl(&dev->bar->csts) & NVME_CSTS_RDY)) {
1009 msleep(100); 1047 msleep(100);
1010 if (fatal_signal_pending(current)) 1048 if (fatal_signal_pending(current))
1011 return -EINTR; 1049 result = -EINTR;
1012 if (time_after(jiffies, timeout)) { 1050 if (time_after(jiffies, timeout)) {
1013 dev_err(&dev->pci_dev->dev, 1051 dev_err(&dev->pci_dev->dev,
1014 "Device not ready; aborting initialisation\n"); 1052 "Device not ready; aborting initialisation\n");
1015 return -ENODEV; 1053 result = -ENODEV;
1016 } 1054 }
1017 } 1055 }
1018 1056
1057 if (result) {
1058 nvme_free_queue_mem(nvmeq);
1059 return result;
1060 }
1061
1019 result = queue_request_irq(dev, nvmeq, "nvme admin"); 1062 result = queue_request_irq(dev, nvmeq, "nvme admin");
1020 dev->queues[0] = nvmeq; 1063 dev->queues[0] = nvmeq;
1021 return result; 1064 return result;
@@ -1037,6 +1080,8 @@ static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
1037 offset = offset_in_page(addr); 1080 offset = offset_in_page(addr);
1038 count = DIV_ROUND_UP(offset + length, PAGE_SIZE); 1081 count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
1039 pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); 1082 pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
1083 if (!pages)
1084 return ERR_PTR(-ENOMEM);
1040 1085
1041 err = get_user_pages_fast(addr, count, 1, pages); 1086 err = get_user_pages_fast(addr, count, 1, pages);
1042 if (err < count) { 1087 if (err < count) {
@@ -1146,14 +1191,13 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1146 return status; 1191 return status;
1147} 1192}
1148 1193
1149static int nvme_user_admin_cmd(struct nvme_ns *ns, 1194static int nvme_user_admin_cmd(struct nvme_dev *dev,
1150 struct nvme_admin_cmd __user *ucmd) 1195 struct nvme_admin_cmd __user *ucmd)
1151{ 1196{
1152 struct nvme_dev *dev = ns->dev;
1153 struct nvme_admin_cmd cmd; 1197 struct nvme_admin_cmd cmd;
1154 struct nvme_command c; 1198 struct nvme_command c;
1155 int status, length; 1199 int status, length;
1156 struct nvme_iod *iod; 1200 struct nvme_iod *uninitialized_var(iod);
1157 1201
1158 if (!capable(CAP_SYS_ADMIN)) 1202 if (!capable(CAP_SYS_ADMIN))
1159 return -EACCES; 1203 return -EACCES;
@@ -1204,7 +1248,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
1204 case NVME_IOCTL_ID: 1248 case NVME_IOCTL_ID:
1205 return ns->ns_id; 1249 return ns->ns_id;
1206 case NVME_IOCTL_ADMIN_CMD: 1250 case NVME_IOCTL_ADMIN_CMD:
1207 return nvme_user_admin_cmd(ns, (void __user *)arg); 1251 return nvme_user_admin_cmd(ns->dev, (void __user *)arg);
1208 case NVME_IOCTL_SUBMIT_IO: 1252 case NVME_IOCTL_SUBMIT_IO:
1209 return nvme_submit_io(ns, (void __user *)arg); 1253 return nvme_submit_io(ns, (void __user *)arg);
1210 default: 1254 default:
@@ -1218,26 +1262,6 @@ static const struct block_device_operations nvme_fops = {
1218 .compat_ioctl = nvme_ioctl, 1262 .compat_ioctl = nvme_ioctl,
1219}; 1263};
1220 1264
1221static void nvme_timeout_ios(struct nvme_queue *nvmeq)
1222{
1223 int depth = nvmeq->q_depth - 1;
1224 struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
1225 unsigned long now = jiffies;
1226 int cmdid;
1227
1228 for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
1229 void *ctx;
1230 nvme_completion_fn fn;
1231 static struct nvme_completion cqe = { .status = cpu_to_le16(NVME_SC_ABORT_REQ) << 1, };
1232
1233 if (!time_after(now, info[cmdid].timeout))
1234 continue;
1235 dev_warn(nvmeq->q_dmadev, "Timing out I/O %d\n", cmdid);
1236 ctx = cancel_cmdid(nvmeq, cmdid, &fn);
1237 fn(nvmeq->dev, ctx, &cqe);
1238 }
1239}
1240
1241static void nvme_resubmit_bios(struct nvme_queue *nvmeq) 1265static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
1242{ 1266{
1243 while (bio_list_peek(&nvmeq->sq_cong)) { 1267 while (bio_list_peek(&nvmeq->sq_cong)) {
@@ -1269,7 +1293,7 @@ static int nvme_kthread(void *data)
1269 spin_lock_irq(&nvmeq->q_lock); 1293 spin_lock_irq(&nvmeq->q_lock);
1270 if (nvme_process_cq(nvmeq)) 1294 if (nvme_process_cq(nvmeq))
1271 printk("process_cq did something\n"); 1295 printk("process_cq did something\n");
1272 nvme_timeout_ios(nvmeq); 1296 nvme_cancel_ios(nvmeq, true);
1273 nvme_resubmit_bios(nvmeq); 1297 nvme_resubmit_bios(nvmeq);
1274 spin_unlock_irq(&nvmeq->q_lock); 1298 spin_unlock_irq(&nvmeq->q_lock);
1275 } 1299 }
@@ -1339,6 +1363,9 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid,
1339 ns->disk = disk; 1363 ns->disk = disk;
1340 lbaf = id->flbas & 0xf; 1364 lbaf = id->flbas & 0xf;
1341 ns->lba_shift = id->lbaf[lbaf].ds; 1365 ns->lba_shift = id->lbaf[lbaf].ds;
1366 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1367 if (dev->max_hw_sectors)
1368 blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
1342 1369
1343 disk->major = nvme_major; 1370 disk->major = nvme_major;
1344 disk->minors = NVME_MINORS; 1371 disk->minors = NVME_MINORS;
@@ -1383,7 +1410,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
1383 1410
1384static int __devinit nvme_setup_io_queues(struct nvme_dev *dev) 1411static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
1385{ 1412{
1386 int result, cpu, i, nr_io_queues, db_bar_size; 1413 int result, cpu, i, nr_io_queues, db_bar_size, q_depth;
1387 1414
1388 nr_io_queues = num_online_cpus(); 1415 nr_io_queues = num_online_cpus();
1389 result = set_queue_count(dev, nr_io_queues); 1416 result = set_queue_count(dev, nr_io_queues);
@@ -1429,9 +1456,10 @@ static int __devinit nvme_setup_io_queues(struct nvme_dev *dev)
1429 cpu = cpumask_next(cpu, cpu_online_mask); 1456 cpu = cpumask_next(cpu, cpu_online_mask);
1430 } 1457 }
1431 1458
1459 q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
1460 NVME_Q_DEPTH);
1432 for (i = 0; i < nr_io_queues; i++) { 1461 for (i = 0; i < nr_io_queues; i++) {
1433 dev->queues[i + 1] = nvme_create_queue(dev, i + 1, 1462 dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
1434 NVME_Q_DEPTH, i);
1435 if (IS_ERR(dev->queues[i + 1])) 1463 if (IS_ERR(dev->queues[i + 1]))
1436 return PTR_ERR(dev->queues[i + 1]); 1464 return PTR_ERR(dev->queues[i + 1]);
1437 dev->queue_count++; 1465 dev->queue_count++;
@@ -1480,6 +1508,10 @@ static int __devinit nvme_dev_add(struct nvme_dev *dev)
1480 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 1508 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
1481 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 1509 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
1482 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 1510 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
1511 if (ctrl->mdts) {
1512 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
1513 dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
1514 }
1483 1515
1484 id_ns = mem; 1516 id_ns = mem;
1485 for (i = 1; i <= nn; i++) { 1517 for (i = 1; i <= nn; i++) {
@@ -1523,8 +1555,6 @@ static int nvme_dev_remove(struct nvme_dev *dev)
1523 list_del(&dev->node); 1555 list_del(&dev->node);
1524 spin_unlock(&dev_list_lock); 1556 spin_unlock(&dev_list_lock);
1525 1557
1526 /* TODO: wait all I/O finished or cancel them */
1527
1528 list_for_each_entry_safe(ns, next, &dev->namespaces, list) { 1558 list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
1529 list_del(&ns->list); 1559 list_del(&ns->list);
1530 del_gendisk(ns->disk); 1560 del_gendisk(ns->disk);
@@ -1560,15 +1590,33 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
1560 dma_pool_destroy(dev->prp_small_pool); 1590 dma_pool_destroy(dev->prp_small_pool);
1561} 1591}
1562 1592
1563/* XXX: Use an ida or something to let remove / add work correctly */ 1593static DEFINE_IDA(nvme_instance_ida);
1564static void nvme_set_instance(struct nvme_dev *dev) 1594
1595static int nvme_set_instance(struct nvme_dev *dev)
1565{ 1596{
1566 static int instance; 1597 int instance, error;
1567 dev->instance = instance++; 1598
1599 do {
1600 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
1601 return -ENODEV;
1602
1603 spin_lock(&dev_list_lock);
1604 error = ida_get_new(&nvme_instance_ida, &instance);
1605 spin_unlock(&dev_list_lock);
1606 } while (error == -EAGAIN);
1607
1608 if (error)
1609 return -ENODEV;
1610
1611 dev->instance = instance;
1612 return 0;
1568} 1613}
1569 1614
1570static void nvme_release_instance(struct nvme_dev *dev) 1615static void nvme_release_instance(struct nvme_dev *dev)
1571{ 1616{
1617 spin_lock(&dev_list_lock);
1618 ida_remove(&nvme_instance_ida, dev->instance);
1619 spin_unlock(&dev_list_lock);
1572} 1620}
1573 1621
1574static int __devinit nvme_probe(struct pci_dev *pdev, 1622static int __devinit nvme_probe(struct pci_dev *pdev,
@@ -1601,7 +1649,10 @@ static int __devinit nvme_probe(struct pci_dev *pdev,
1601 pci_set_drvdata(pdev, dev); 1649 pci_set_drvdata(pdev, dev);
1602 dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); 1650 dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1603 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 1651 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1604 nvme_set_instance(dev); 1652 result = nvme_set_instance(dev);
1653 if (result)
1654 goto disable;
1655
1605 dev->entry[0].vector = pdev->irq; 1656 dev->entry[0].vector = pdev->irq;
1606 1657
1607 result = nvme_setup_prp_pools(dev); 1658 result = nvme_setup_prp_pools(dev);
@@ -1704,15 +1755,17 @@ static struct pci_driver nvme_driver = {
1704 1755
1705static int __init nvme_init(void) 1756static int __init nvme_init(void)
1706{ 1757{
1707 int result = -EBUSY; 1758 int result;
1708 1759
1709 nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); 1760 nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
1710 if (IS_ERR(nvme_thread)) 1761 if (IS_ERR(nvme_thread))
1711 return PTR_ERR(nvme_thread); 1762 return PTR_ERR(nvme_thread);
1712 1763
1713 nvme_major = register_blkdev(nvme_major, "nvme"); 1764 result = register_blkdev(nvme_major, "nvme");
1714 if (nvme_major <= 0) 1765 if (result < 0)
1715 goto kill_kthread; 1766 goto kill_kthread;
1767 else if (result > 0)
1768 nvme_major = result;
1716 1769
1717 result = pci_register_driver(&nvme_driver); 1770 result = pci_register_driver(&nvme_driver);
1718 if (result) 1771 if (result)