aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2014-06-23 13:34:01 -0400
committerJens Axboe <axboe@fb.com>2014-11-04 15:17:07 -0500
commit1d0906246095184d1624c643c2088152d330c40a (patch)
tree3d7ea112d661961bd94c293edcee8bb2e6dcdce0 /drivers/block
parenta7dd7957acf798ac406afd6631e64a27ac4a5bf1 (diff)
NVMe: Mismatched host/device page size support
Adds support for devices with max page size smaller than the host's. In the case we encounter such a host/device combination, the driver will split a page into as many PRP entries as necessary for the device's page size capabilities. If the device's reported minimum page size is greater than the host's, the driver will not attempt to enable the device and return an error instead. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c59
1 files changed, 40 insertions, 19 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 42a62bbf4a11..e60bb0fec7e3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -373,17 +373,17 @@ static __le64 **iod_list(struct nvme_iod *iod)
373 * as it only leads to a small amount of wasted memory for the lifetime of 373 * as it only leads to a small amount of wasted memory for the lifetime of
374 * the I/O. 374 * the I/O.
375 */ 375 */
376static int nvme_npages(unsigned size) 376static int nvme_npages(unsigned size, struct nvme_dev *dev)
377{ 377{
378 unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE); 378 unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
379 return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); 379 return DIV_ROUND_UP(8 * nprps, dev->page_size - 8);
380} 380}
381 381
382static struct nvme_iod * 382static struct nvme_iod *
383nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) 383nvme_alloc_iod(unsigned nseg, unsigned nbytes, struct nvme_dev *dev, gfp_t gfp)
384{ 384{
385 struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + 385 struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
386 sizeof(__le64 *) * nvme_npages(nbytes) + 386 sizeof(__le64 *) * nvme_npages(nbytes, dev) +
387 sizeof(struct scatterlist) * nseg, gfp); 387 sizeof(struct scatterlist) * nseg, gfp);
388 388
389 if (iod) { 389 if (iod) {
@@ -400,7 +400,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
400 400
401void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) 401void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
402{ 402{
403 const int last_prp = PAGE_SIZE / 8 - 1; 403 const int last_prp = dev->page_size / 8 - 1;
404 int i; 404 int i;
405 __le64 **list = iod_list(iod); 405 __le64 **list = iod_list(iod);
406 dma_addr_t prp_dma = iod->first_dma; 406 dma_addr_t prp_dma = iod->first_dma;
@@ -491,26 +491,27 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
491 __le64 **list = iod_list(iod); 491 __le64 **list = iod_list(iod);
492 dma_addr_t prp_dma; 492 dma_addr_t prp_dma;
493 int nprps, i; 493 int nprps, i;
494 u32 page_size = dev->page_size;
494 495
495 length -= (PAGE_SIZE - offset); 496 length -= (page_size - offset);
496 if (length <= 0) 497 if (length <= 0)
497 return total_len; 498 return total_len;
498 499
499 dma_len -= (PAGE_SIZE - offset); 500 dma_len -= (page_size - offset);
500 if (dma_len) { 501 if (dma_len) {
501 dma_addr += (PAGE_SIZE - offset); 502 dma_addr += (page_size - offset);
502 } else { 503 } else {
503 sg = sg_next(sg); 504 sg = sg_next(sg);
504 dma_addr = sg_dma_address(sg); 505 dma_addr = sg_dma_address(sg);
505 dma_len = sg_dma_len(sg); 506 dma_len = sg_dma_len(sg);
506 } 507 }
507 508
508 if (length <= PAGE_SIZE) { 509 if (length <= page_size) {
509 iod->first_dma = dma_addr; 510 iod->first_dma = dma_addr;
510 return total_len; 511 return total_len;
511 } 512 }
512 513
513 nprps = DIV_ROUND_UP(length, PAGE_SIZE); 514 nprps = DIV_ROUND_UP(length, page_size);
514 if (nprps <= (256 / 8)) { 515 if (nprps <= (256 / 8)) {
515 pool = dev->prp_small_pool; 516 pool = dev->prp_small_pool;
516 iod->npages = 0; 517 iod->npages = 0;
@@ -523,13 +524,13 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
523 if (!prp_list) { 524 if (!prp_list) {
524 iod->first_dma = dma_addr; 525 iod->first_dma = dma_addr;
525 iod->npages = -1; 526 iod->npages = -1;
526 return (total_len - length) + PAGE_SIZE; 527 return (total_len - length) + page_size;
527 } 528 }
528 list[0] = prp_list; 529 list[0] = prp_list;
529 iod->first_dma = prp_dma; 530 iod->first_dma = prp_dma;
530 i = 0; 531 i = 0;
531 for (;;) { 532 for (;;) {
532 if (i == PAGE_SIZE / 8) { 533 if (i == page_size >> 3) {
533 __le64 *old_prp_list = prp_list; 534 __le64 *old_prp_list = prp_list;
534 prp_list = dma_pool_alloc(pool, gfp, &prp_dma); 535 prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
535 if (!prp_list) 536 if (!prp_list)
@@ -540,9 +541,9 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
540 i = 1; 541 i = 1;
541 } 542 }
542 prp_list[i++] = cpu_to_le64(dma_addr); 543 prp_list[i++] = cpu_to_le64(dma_addr);
543 dma_len -= PAGE_SIZE; 544 dma_len -= page_size;
544 dma_addr += PAGE_SIZE; 545 dma_addr += page_size;
545 length -= PAGE_SIZE; 546 length -= page_size;
546 if (length <= 0) 547 if (length <= 0)
547 break; 548 break;
548 if (dma_len > 0) 549 if (dma_len > 0)
@@ -749,7 +750,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
749 if ((bio->bi_rw & REQ_FLUSH) && psegs) 750 if ((bio->bi_rw & REQ_FLUSH) && psegs)
750 return nvme_split_flush_data(nvmeq, bio); 751 return nvme_split_flush_data(nvmeq, bio);
751 752
752 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); 753 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, ns->dev, GFP_ATOMIC);
753 if (!iod) 754 if (!iod)
754 return -ENOMEM; 755 return -ENOMEM;
755 756
@@ -1463,6 +1464,24 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1463 u32 aqa; 1464 u32 aqa;
1464 u64 cap = readq(&dev->bar->cap); 1465 u64 cap = readq(&dev->bar->cap);
1465 struct nvme_queue *nvmeq; 1466 struct nvme_queue *nvmeq;
1467 unsigned page_shift = PAGE_SHIFT;
1468 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
1469 unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
1470
1471 if (page_shift < dev_page_min) {
1472 dev_err(&dev->pci_dev->dev,
1473 "Minimum device page size (%u) too large for "
1474 "host (%u)\n", 1 << dev_page_min,
1475 1 << page_shift);
1476 return -ENODEV;
1477 }
1478 if (page_shift > dev_page_max) {
1479 dev_info(&dev->pci_dev->dev,
1480 "Device maximum page size (%u) smaller than "
1481 "host (%u); enabling work-around\n",
1482 1 << dev_page_max, 1 << page_shift);
1483 page_shift = dev_page_max;
1484 }
1466 1485
1467 result = nvme_disable_ctrl(dev, cap); 1486 result = nvme_disable_ctrl(dev, cap);
1468 if (result < 0) 1487 if (result < 0)
@@ -1478,8 +1497,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
1478 aqa = nvmeq->q_depth - 1; 1497 aqa = nvmeq->q_depth - 1;
1479 aqa |= aqa << 16; 1498 aqa |= aqa << 16;
1480 1499
1500 dev->page_size = 1 << page_shift;
1501
1481 dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM; 1502 dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
1482 dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; 1503 dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
1483 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 1504 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
1484 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 1505 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
1485 1506
@@ -1529,7 +1550,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
1529 } 1550 }
1530 1551
1531 err = -ENOMEM; 1552 err = -ENOMEM;
1532 iod = nvme_alloc_iod(count, length, GFP_KERNEL); 1553 iod = nvme_alloc_iod(count, length, dev, GFP_KERNEL);
1533 if (!iod) 1554 if (!iod)
1534 goto put_pages; 1555 goto put_pages;
1535 1556