aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 22:08:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-16 22:08:15 -0400
commit1718de78e6235c04ecb7f87a6875fdf90aafe382 (patch)
treea8b5c2f89bd2c71bd5b1dc47a0fa46446ba2cd0f
parent815d469d8c9a3360ee0a8b7857dd95352a6c7bde (diff)
parent7a102d9044e720ac887c0cd82b6d5cad236f6d71 (diff)
Merge tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: "This is mainly some late lightnvm changes that came in just before the merge window, as well as fixes that have been queued up since the initial pull request was frozen. This contains: - lightnvm changes, fixing race conditions, improving memory utilization, and improving pblk compatability (Chansol, Igor, Marcin) - NVMe pull request with minor fixes all over the map (via Christoph) - remove redundant error print in sata_rcar (Geert) - struct_size() cleanup (Jackie) - dasd CONFIG_LBADF warning fix (Ming) - brd cond_resched() improvement (Mikulas)" * tag 'for-5.2/block-post-20190516' of git://git.kernel.dk/linux-block: (41 commits) block/bio-integrity: use struct_size() in kmalloc() nvme: validate cntlid during controller initialisation nvme: change locking for the per-subsystem controller list nvme: trace all async notice events nvme: fix typos in nvme status code values nvme-fabrics: remove unused argument nvme-multipath: avoid crash on invalid subsystem cntlid enumeration nvme-fc: use separate work queue to avoid warning nvme-rdma: remove redundant reference between ib_device and tagset nvme-pci: mark expected switch fall-through nvme-pci: add known admin effects to augument admin effects log page nvme-pci: init shadow doorbell after each reset brd: add cond_resched to brd_free_pages sata_rcar: Remove ata_host_alloc() error printing s390/dasd: fix build warning in dasd_eckd_build_cp_raw lightnvm: pblk: use nvm_rq_to_ppa_list() lightnvm: pblk: simplify partial read path lightnvm: do not remove instance under global lock lightnvm: track inflight target creations lightnvm: pblk: recover only written metadata ...
-rw-r--r--block/bio-integrity.c3
-rw-r--r--drivers/ata/sata_rcar.c1
-rw-r--r--drivers/block/brd.c6
-rw-r--r--drivers/lightnvm/core.c82
-rw-r--r--drivers/lightnvm/pblk-cache.c8
-rw-r--r--drivers/lightnvm/pblk-core.c65
-rw-r--r--drivers/lightnvm/pblk-gc.c52
-rw-r--r--drivers/lightnvm/pblk-init.c65
-rw-r--r--drivers/lightnvm/pblk-map.c1
-rw-r--r--drivers/lightnvm/pblk-rb.c13
-rw-r--r--drivers/lightnvm/pblk-read.c394
-rw-r--r--drivers/lightnvm/pblk-recovery.c74
-rw-r--r--drivers/lightnvm/pblk-write.c1
-rw-r--r--drivers/lightnvm/pblk.h28
-rw-r--r--drivers/nvme/host/core.c79
-rw-r--r--drivers/nvme/host/fabrics.c4
-rw-r--r--drivers/nvme/host/fc.c14
-rw-r--r--drivers/nvme/host/lightnvm.c1
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/pci.c4
-rw-r--r--drivers/nvme/host/rdma.c34
-rw-r--r--drivers/nvme/host/trace.h1
-rw-r--r--drivers/s390/block/dasd_eckd.c2
-rw-r--r--include/linux/lightnvm.h2
-rw-r--r--include/linux/nvme.h4
25 files changed, 398 insertions, 542 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 42536674020a..4db620849515 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -43,8 +43,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
43 unsigned inline_vecs; 43 unsigned inline_vecs;
44 44
45 if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) { 45 if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
46 bip = kmalloc(sizeof(struct bio_integrity_payload) + 46 bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
47 sizeof(struct bio_vec) * nr_vecs, gfp_mask);
48 inline_vecs = nr_vecs; 47 inline_vecs = nr_vecs;
49 } else { 48 } else {
50 bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask); 49 bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 59b2317acea9..3495e1733a8e 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -909,7 +909,6 @@ static int sata_rcar_probe(struct platform_device *pdev)
909 909
910 host = ata_host_alloc(dev, 1); 910 host = ata_host_alloc(dev, 1);
911 if (!host) { 911 if (!host) {
912 dev_err(dev, "ata_host_alloc failed\n");
913 ret = -ENOMEM; 912 ret = -ENOMEM;
914 goto err_pm_put; 913 goto err_pm_put;
915 } 914 }
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 17defbf4f332..2da615b45b31 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -153,6 +153,12 @@ static void brd_free_pages(struct brd_device *brd)
153 pos++; 153 pos++;
154 154
155 /* 155 /*
156 * It takes 3.4 seconds to remove 80GiB ramdisk.
157 * So, we need cond_resched to avoid stalling the CPU.
158 */
159 cond_resched();
160
161 /*
156 * This assumes radix_tree_gang_lookup always returns as 162 * This assumes radix_tree_gang_lookup always returns as
157 * many pages as possible. If the radix-tree code changes, 163 * many pages as possible. If the radix-tree code changes,
158 * so will this have to. 164 * so will this have to.
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 5f82036fe322..0df7454832ef 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -45,6 +45,8 @@ struct nvm_dev_map {
45 int num_ch; 45 int num_ch;
46}; 46};
47 47
48static void nvm_free(struct kref *ref);
49
48static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) 50static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name)
49{ 51{
50 struct nvm_target *tgt; 52 struct nvm_target *tgt;
@@ -325,6 +327,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
325 struct nvm_target *t; 327 struct nvm_target *t;
326 struct nvm_tgt_dev *tgt_dev; 328 struct nvm_tgt_dev *tgt_dev;
327 void *targetdata; 329 void *targetdata;
330 unsigned int mdts;
328 int ret; 331 int ret;
329 332
330 switch (create->conf.type) { 333 switch (create->conf.type) {
@@ -412,8 +415,12 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
412 tdisk->private_data = targetdata; 415 tdisk->private_data = targetdata;
413 tqueue->queuedata = targetdata; 416 tqueue->queuedata = targetdata;
414 417
415 blk_queue_max_hw_sectors(tqueue, 418 mdts = (dev->geo.csecs >> 9) * NVM_MAX_VLBA;
416 (dev->geo.csecs >> 9) * NVM_MAX_VLBA); 419 if (dev->geo.mdts) {
420 mdts = min_t(u32, dev->geo.mdts,
421 (dev->geo.csecs >> 9) * NVM_MAX_VLBA);
422 }
423 blk_queue_max_hw_sectors(tqueue, mdts);
417 424
418 set_capacity(tdisk, tt->capacity(targetdata)); 425 set_capacity(tdisk, tt->capacity(targetdata));
419 add_disk(tdisk); 426 add_disk(tdisk);
@@ -476,7 +483,6 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
476 483
477/** 484/**
478 * nvm_remove_tgt - Removes a target from the media manager 485 * nvm_remove_tgt - Removes a target from the media manager
479 * @dev: device
480 * @remove: ioctl structure with target name to remove. 486 * @remove: ioctl structure with target name to remove.
481 * 487 *
482 * Returns: 488 * Returns:
@@ -484,18 +490,28 @@ static void __nvm_remove_target(struct nvm_target *t, bool graceful)
484 * 1: on not found 490 * 1: on not found
485 * <0: on error 491 * <0: on error
486 */ 492 */
487static int nvm_remove_tgt(struct nvm_dev *dev, struct nvm_ioctl_remove *remove) 493static int nvm_remove_tgt(struct nvm_ioctl_remove *remove)
488{ 494{
489 struct nvm_target *t; 495 struct nvm_target *t;
496 struct nvm_dev *dev;
490 497
491 mutex_lock(&dev->mlock); 498 down_read(&nvm_lock);
492 t = nvm_find_target(dev, remove->tgtname); 499 list_for_each_entry(dev, &nvm_devices, devices) {
493 if (!t) { 500 mutex_lock(&dev->mlock);
501 t = nvm_find_target(dev, remove->tgtname);
502 if (t) {
503 mutex_unlock(&dev->mlock);
504 break;
505 }
494 mutex_unlock(&dev->mlock); 506 mutex_unlock(&dev->mlock);
495 return 1;
496 } 507 }
508 up_read(&nvm_lock);
509
510 if (!t)
511 return 1;
512
497 __nvm_remove_target(t, true); 513 __nvm_remove_target(t, true);
498 mutex_unlock(&dev->mlock); 514 kref_put(&dev->ref, nvm_free);
499 515
500 return 0; 516 return 0;
501} 517}
@@ -1089,15 +1105,16 @@ err_fmtype:
1089 return ret; 1105 return ret;
1090} 1106}
1091 1107
1092static void nvm_free(struct nvm_dev *dev) 1108static void nvm_free(struct kref *ref)
1093{ 1109{
1094 if (!dev) 1110 struct nvm_dev *dev = container_of(ref, struct nvm_dev, ref);
1095 return;
1096 1111
1097 if (dev->dma_pool) 1112 if (dev->dma_pool)
1098 dev->ops->destroy_dma_pool(dev->dma_pool); 1113 dev->ops->destroy_dma_pool(dev->dma_pool);
1099 1114
1100 nvm_unregister_map(dev); 1115 if (dev->rmap)
1116 nvm_unregister_map(dev);
1117
1101 kfree(dev->lun_map); 1118 kfree(dev->lun_map);
1102 kfree(dev); 1119 kfree(dev);
1103} 1120}
@@ -1134,7 +1151,13 @@ err:
1134 1151
1135struct nvm_dev *nvm_alloc_dev(int node) 1152struct nvm_dev *nvm_alloc_dev(int node)
1136{ 1153{
1137 return kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node); 1154 struct nvm_dev *dev;
1155
1156 dev = kzalloc_node(sizeof(struct nvm_dev), GFP_KERNEL, node);
1157 if (dev)
1158 kref_init(&dev->ref);
1159
1160 return dev;
1138} 1161}
1139EXPORT_SYMBOL(nvm_alloc_dev); 1162EXPORT_SYMBOL(nvm_alloc_dev);
1140 1163
@@ -1142,12 +1165,16 @@ int nvm_register(struct nvm_dev *dev)
1142{ 1165{
1143 int ret, exp_pool_size; 1166 int ret, exp_pool_size;
1144 1167
1145 if (!dev->q || !dev->ops) 1168 if (!dev->q || !dev->ops) {
1169 kref_put(&dev->ref, nvm_free);
1146 return -EINVAL; 1170 return -EINVAL;
1171 }
1147 1172
1148 ret = nvm_init(dev); 1173 ret = nvm_init(dev);
1149 if (ret) 1174 if (ret) {
1175 kref_put(&dev->ref, nvm_free);
1150 return ret; 1176 return ret;
1177 }
1151 1178
1152 exp_pool_size = max_t(int, PAGE_SIZE, 1179 exp_pool_size = max_t(int, PAGE_SIZE,
1153 (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos))); 1180 (NVM_MAX_VLBA * (sizeof(u64) + dev->geo.sos)));
@@ -1157,7 +1184,7 @@ int nvm_register(struct nvm_dev *dev)
1157 exp_pool_size); 1184 exp_pool_size);
1158 if (!dev->dma_pool) { 1185 if (!dev->dma_pool) {
1159 pr_err("nvm: could not create dma pool\n"); 1186 pr_err("nvm: could not create dma pool\n");
1160 nvm_free(dev); 1187 kref_put(&dev->ref, nvm_free);
1161 return -ENOMEM; 1188 return -ENOMEM;
1162 } 1189 }
1163 1190
@@ -1179,6 +1206,7 @@ void nvm_unregister(struct nvm_dev *dev)
1179 if (t->dev->parent != dev) 1206 if (t->dev->parent != dev)
1180 continue; 1207 continue;
1181 __nvm_remove_target(t, false); 1208 __nvm_remove_target(t, false);
1209 kref_put(&dev->ref, nvm_free);
1182 } 1210 }
1183 mutex_unlock(&dev->mlock); 1211 mutex_unlock(&dev->mlock);
1184 1212
@@ -1186,13 +1214,14 @@ void nvm_unregister(struct nvm_dev *dev)
1186 list_del(&dev->devices); 1214 list_del(&dev->devices);
1187 up_write(&nvm_lock); 1215 up_write(&nvm_lock);
1188 1216
1189 nvm_free(dev); 1217 kref_put(&dev->ref, nvm_free);
1190} 1218}
1191EXPORT_SYMBOL(nvm_unregister); 1219EXPORT_SYMBOL(nvm_unregister);
1192 1220
1193static int __nvm_configure_create(struct nvm_ioctl_create *create) 1221static int __nvm_configure_create(struct nvm_ioctl_create *create)
1194{ 1222{
1195 struct nvm_dev *dev; 1223 struct nvm_dev *dev;
1224 int ret;
1196 1225
1197 down_write(&nvm_lock); 1226 down_write(&nvm_lock);
1198 dev = nvm_find_nvm_dev(create->dev); 1227 dev = nvm_find_nvm_dev(create->dev);
@@ -1203,7 +1232,12 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
1203 return -EINVAL; 1232 return -EINVAL;
1204 } 1233 }
1205 1234
1206 return nvm_create_tgt(dev, create); 1235 kref_get(&dev->ref);
1236 ret = nvm_create_tgt(dev, create);
1237 if (ret)
1238 kref_put(&dev->ref, nvm_free);
1239
1240 return ret;
1207} 1241}
1208 1242
1209static long nvm_ioctl_info(struct file *file, void __user *arg) 1243static long nvm_ioctl_info(struct file *file, void __user *arg)
@@ -1322,8 +1356,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg)
1322static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) 1356static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
1323{ 1357{
1324 struct nvm_ioctl_remove remove; 1358 struct nvm_ioctl_remove remove;
1325 struct nvm_dev *dev;
1326 int ret = 0;
1327 1359
1328 if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) 1360 if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove)))
1329 return -EFAULT; 1361 return -EFAULT;
@@ -1335,13 +1367,7 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg)
1335 return -EINVAL; 1367 return -EINVAL;
1336 } 1368 }
1337 1369
1338 list_for_each_entry(dev, &nvm_devices, devices) { 1370 return nvm_remove_tgt(&remove);
1339 ret = nvm_remove_tgt(dev, &remove);
1340 if (!ret)
1341 break;
1342 }
1343
1344 return ret;
1345} 1371}
1346 1372
1347/* kept for compatibility reasons */ 1373/* kept for compatibility reasons */
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index c9fa26f95659..5c1034c22197 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -18,7 +18,8 @@
18 18
19#include "pblk.h" 19#include "pblk.h"
20 20
21int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags) 21void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
22 unsigned long flags)
22{ 23{
23 struct request_queue *q = pblk->dev->q; 24 struct request_queue *q = pblk->dev->q;
24 struct pblk_w_ctx w_ctx; 25 struct pblk_w_ctx w_ctx;
@@ -43,6 +44,7 @@ retry:
43 goto retry; 44 goto retry;
44 case NVM_IO_ERR: 45 case NVM_IO_ERR:
45 pblk_pipeline_stop(pblk); 46 pblk_pipeline_stop(pblk);
47 bio_io_error(bio);
46 goto out; 48 goto out;
47 } 49 }
48 50
@@ -79,7 +81,9 @@ retry:
79out: 81out:
80 generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time); 82 generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
81 pblk_write_should_kick(pblk); 83 pblk_write_should_kick(pblk);
82 return ret; 84
85 if (ret == NVM_IO_DONE)
86 bio_endio(bio);
83} 87}
84 88
85/* 89/*
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 6ca868868fee..773537804319 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -562,11 +562,9 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
562 562
563int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd) 563int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
564{ 564{
565 struct ppa_addr *ppa_list; 565 struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
566 int ret; 566 int ret;
567 567
568 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
569
570 pblk_down_chunk(pblk, ppa_list[0]); 568 pblk_down_chunk(pblk, ppa_list[0]);
571 ret = pblk_submit_io_sync(pblk, rqd); 569 ret = pblk_submit_io_sync(pblk, rqd);
572 pblk_up_chunk(pblk, ppa_list[0]); 570 pblk_up_chunk(pblk, ppa_list[0]);
@@ -725,6 +723,7 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
725 struct nvm_tgt_dev *dev = pblk->dev; 723 struct nvm_tgt_dev *dev = pblk->dev;
726 struct pblk_line_meta *lm = &pblk->lm; 724 struct pblk_line_meta *lm = &pblk->lm;
727 struct bio *bio; 725 struct bio *bio;
726 struct ppa_addr *ppa_list;
728 struct nvm_rq rqd; 727 struct nvm_rq rqd;
729 u64 paddr = pblk_line_smeta_start(pblk, line); 728 u64 paddr = pblk_line_smeta_start(pblk, line);
730 int i, ret; 729 int i, ret;
@@ -748,9 +747,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
748 rqd.opcode = NVM_OP_PREAD; 747 rqd.opcode = NVM_OP_PREAD;
749 rqd.nr_ppas = lm->smeta_sec; 748 rqd.nr_ppas = lm->smeta_sec;
750 rqd.is_seq = 1; 749 rqd.is_seq = 1;
750 ppa_list = nvm_rq_to_ppa_list(&rqd);
751 751
752 for (i = 0; i < lm->smeta_sec; i++, paddr++) 752 for (i = 0; i < lm->smeta_sec; i++, paddr++)
753 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 753 ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
754 754
755 ret = pblk_submit_io_sync(pblk, &rqd); 755 ret = pblk_submit_io_sync(pblk, &rqd);
756 if (ret) { 756 if (ret) {
@@ -761,8 +761,10 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
761 761
762 atomic_dec(&pblk->inflight_io); 762 atomic_dec(&pblk->inflight_io);
763 763
764 if (rqd.error) 764 if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
765 pblk_log_read_err(pblk, &rqd); 765 pblk_log_read_err(pblk, &rqd);
766 ret = -EIO;
767 }
766 768
767clear_rqd: 769clear_rqd:
768 pblk_free_rqd_meta(pblk, &rqd); 770 pblk_free_rqd_meta(pblk, &rqd);
@@ -775,6 +777,7 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
775 struct nvm_tgt_dev *dev = pblk->dev; 777 struct nvm_tgt_dev *dev = pblk->dev;
776 struct pblk_line_meta *lm = &pblk->lm; 778 struct pblk_line_meta *lm = &pblk->lm;
777 struct bio *bio; 779 struct bio *bio;
780 struct ppa_addr *ppa_list;
778 struct nvm_rq rqd; 781 struct nvm_rq rqd;
779 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); 782 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
780 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); 783 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
@@ -799,12 +802,13 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
799 rqd.opcode = NVM_OP_PWRITE; 802 rqd.opcode = NVM_OP_PWRITE;
800 rqd.nr_ppas = lm->smeta_sec; 803 rqd.nr_ppas = lm->smeta_sec;
801 rqd.is_seq = 1; 804 rqd.is_seq = 1;
805 ppa_list = nvm_rq_to_ppa_list(&rqd);
802 806
803 for (i = 0; i < lm->smeta_sec; i++, paddr++) { 807 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
804 struct pblk_sec_meta *meta = pblk_get_meta(pblk, 808 struct pblk_sec_meta *meta = pblk_get_meta(pblk,
805 rqd.meta_list, i); 809 rqd.meta_list, i);
806 810
807 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); 811 ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
808 meta->lba = lba_list[paddr] = addr_empty; 812 meta->lba = lba_list[paddr] = addr_empty;
809 } 813 }
810 814
@@ -834,8 +838,9 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
834 struct nvm_geo *geo = &dev->geo; 838 struct nvm_geo *geo = &dev->geo;
835 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 839 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
836 struct pblk_line_meta *lm = &pblk->lm; 840 struct pblk_line_meta *lm = &pblk->lm;
837 void *ppa_list, *meta_list; 841 void *ppa_list_buf, *meta_list;
838 struct bio *bio; 842 struct bio *bio;
843 struct ppa_addr *ppa_list;
839 struct nvm_rq rqd; 844 struct nvm_rq rqd;
840 u64 paddr = line->emeta_ssec; 845 u64 paddr = line->emeta_ssec;
841 dma_addr_t dma_ppa_list, dma_meta_list; 846 dma_addr_t dma_ppa_list, dma_meta_list;
@@ -851,7 +856,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
851 if (!meta_list) 856 if (!meta_list)
852 return -ENOMEM; 857 return -ENOMEM;
853 858
854 ppa_list = meta_list + pblk_dma_meta_size(pblk); 859 ppa_list_buf = meta_list + pblk_dma_meta_size(pblk);
855 dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); 860 dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk);
856 861
857next_rq: 862next_rq:
@@ -872,11 +877,12 @@ next_rq:
872 877
873 rqd.bio = bio; 878 rqd.bio = bio;
874 rqd.meta_list = meta_list; 879 rqd.meta_list = meta_list;
875 rqd.ppa_list = ppa_list; 880 rqd.ppa_list = ppa_list_buf;
876 rqd.dma_meta_list = dma_meta_list; 881 rqd.dma_meta_list = dma_meta_list;
877 rqd.dma_ppa_list = dma_ppa_list; 882 rqd.dma_ppa_list = dma_ppa_list;
878 rqd.opcode = NVM_OP_PREAD; 883 rqd.opcode = NVM_OP_PREAD;
879 rqd.nr_ppas = rq_ppas; 884 rqd.nr_ppas = rq_ppas;
885 ppa_list = nvm_rq_to_ppa_list(&rqd);
880 886
881 for (i = 0; i < rqd.nr_ppas; ) { 887 for (i = 0; i < rqd.nr_ppas; ) {
882 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id); 888 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, line_id);
@@ -904,7 +910,7 @@ next_rq:
904 } 910 }
905 911
906 for (j = 0; j < min; j++, i++, paddr++) 912 for (j = 0; j < min; j++, i++, paddr++)
907 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id); 913 ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
908 } 914 }
909 915
910 ret = pblk_submit_io_sync(pblk, &rqd); 916 ret = pblk_submit_io_sync(pblk, &rqd);
@@ -916,8 +922,11 @@ next_rq:
916 922
917 atomic_dec(&pblk->inflight_io); 923 atomic_dec(&pblk->inflight_io);
918 924
919 if (rqd.error) 925 if (rqd.error && rqd.error != NVM_RSP_WARN_HIGHECC) {
920 pblk_log_read_err(pblk, &rqd); 926 pblk_log_read_err(pblk, &rqd);
927 ret = -EIO;
928 goto free_rqd_dma;
929 }
921 930
922 emeta_buf += rq_len; 931 emeta_buf += rq_len;
923 left_ppas -= rq_ppas; 932 left_ppas -= rq_ppas;
@@ -1162,7 +1171,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1162 off = bit * geo->ws_opt; 1171 off = bit * geo->ws_opt;
1163 bitmap_set(line->map_bitmap, off, lm->smeta_sec); 1172 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
1164 line->sec_in_line -= lm->smeta_sec; 1173 line->sec_in_line -= lm->smeta_sec;
1165 line->smeta_ssec = off;
1166 line->cur_sec = off + lm->smeta_sec; 1174 line->cur_sec = off + lm->smeta_sec;
1167 1175
1168 if (init && pblk_line_smeta_write(pblk, line, off)) { 1176 if (init && pblk_line_smeta_write(pblk, line, off)) {
@@ -1521,11 +1529,9 @@ void pblk_ppa_to_line_put(struct pblk *pblk, struct ppa_addr ppa)
1521 1529
1522void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd) 1530void pblk_rq_to_line_put(struct pblk *pblk, struct nvm_rq *rqd)
1523{ 1531{
1524 struct ppa_addr *ppa_list; 1532 struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
1525 int i; 1533 int i;
1526 1534
1527 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
1528
1529 for (i = 0; i < rqd->nr_ppas; i++) 1535 for (i = 0; i < rqd->nr_ppas; i++)
1530 pblk_ppa_to_line_put(pblk, ppa_list[i]); 1536 pblk_ppa_to_line_put(pblk, ppa_list[i]);
1531} 1537}
@@ -1699,6 +1705,14 @@ static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
1699 1705
1700 spin_lock(&line->lock); 1706 spin_lock(&line->lock);
1701 WARN_ON(line->state != PBLK_LINESTATE_GC); 1707 WARN_ON(line->state != PBLK_LINESTATE_GC);
1708 if (line->w_err_gc->has_gc_err) {
1709 spin_unlock(&line->lock);
1710 pblk_err(pblk, "line %d had errors during GC\n", line->id);
1711 pblk_put_line_back(pblk, line);
1712 line->w_err_gc->has_gc_err = 0;
1713 return;
1714 }
1715
1702 line->state = PBLK_LINESTATE_FREE; 1716 line->state = PBLK_LINESTATE_FREE;
1703 trace_pblk_line_state(pblk_disk_name(pblk), line->id, 1717 trace_pblk_line_state(pblk_disk_name(pblk), line->id,
1704 line->state); 1718 line->state);
@@ -2023,7 +2037,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
2023 struct ppa_addr ppa_l2p; 2037 struct ppa_addr ppa_l2p;
2024 2038
2025 /* logic error: lba out-of-bounds. Ignore update */ 2039 /* logic error: lba out-of-bounds. Ignore update */
2026 if (!(lba < pblk->rl.nr_secs)) { 2040 if (!(lba < pblk->capacity)) {
2027 WARN(1, "pblk: corrupted L2P map request\n"); 2041 WARN(1, "pblk: corrupted L2P map request\n");
2028 return; 2042 return;
2029 } 2043 }
@@ -2063,7 +2077,7 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new,
2063#endif 2077#endif
2064 2078
2065 /* logic error: lba out-of-bounds. Ignore update */ 2079 /* logic error: lba out-of-bounds. Ignore update */
2066 if (!(lba < pblk->rl.nr_secs)) { 2080 if (!(lba < pblk->capacity)) {
2067 WARN(1, "pblk: corrupted L2P map request\n"); 2081 WARN(1, "pblk: corrupted L2P map request\n");
2068 return 0; 2082 return 0;
2069 } 2083 }
@@ -2109,7 +2123,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
2109 } 2123 }
2110 2124
2111 /* logic error: lba out-of-bounds. Ignore update */ 2125 /* logic error: lba out-of-bounds. Ignore update */
2112 if (!(lba < pblk->rl.nr_secs)) { 2126 if (!(lba < pblk->capacity)) {
2113 WARN(1, "pblk: corrupted L2P map request\n"); 2127 WARN(1, "pblk: corrupted L2P map request\n");
2114 return; 2128 return;
2115 } 2129 }
@@ -2135,8 +2149,8 @@ out:
2135 spin_unlock(&pblk->trans_lock); 2149 spin_unlock(&pblk->trans_lock);
2136} 2150}
2137 2151
2138void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, 2152int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
2139 sector_t blba, int nr_secs) 2153 sector_t blba, int nr_secs, bool *from_cache)
2140{ 2154{
2141 int i; 2155 int i;
2142 2156
@@ -2150,10 +2164,19 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
2150 if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { 2164 if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) {
2151 struct pblk_line *line = pblk_ppa_to_line(pblk, ppa); 2165 struct pblk_line *line = pblk_ppa_to_line(pblk, ppa);
2152 2166
2167 if (i > 0 && *from_cache)
2168 break;
2169 *from_cache = false;
2170
2153 kref_get(&line->ref); 2171 kref_get(&line->ref);
2172 } else {
2173 if (i > 0 && !*from_cache)
2174 break;
2175 *from_cache = true;
2154 } 2176 }
2155 } 2177 }
2156 spin_unlock(&pblk->trans_lock); 2178 spin_unlock(&pblk->trans_lock);
2179 return i;
2157} 2180}
2158 2181
2159void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, 2182void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
@@ -2167,7 +2190,7 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
2167 lba = lba_list[i]; 2190 lba = lba_list[i];
2168 if (lba != ADDR_EMPTY) { 2191 if (lba != ADDR_EMPTY) {
2169 /* logic error: lba out-of-bounds. Ignore update */ 2192 /* logic error: lba out-of-bounds. Ignore update */
2170 if (!(lba < pblk->rl.nr_secs)) { 2193 if (!(lba < pblk->capacity)) {
2171 WARN(1, "pblk: corrupted L2P map request\n"); 2194 WARN(1, "pblk: corrupted L2P map request\n");
2172 continue; 2195 continue;
2173 } 2196 }
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 26a52ea7ec45..63ee205b41c4 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -59,24 +59,28 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
59 wake_up_process(gc->gc_writer_ts); 59 wake_up_process(gc->gc_writer_ts);
60} 60}
61 61
62static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) 62void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
63{ 63{
64 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 64 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
65 struct list_head *move_list; 65 struct list_head *move_list;
66 66
67 spin_lock(&l_mg->gc_lock);
67 spin_lock(&line->lock); 68 spin_lock(&line->lock);
68 WARN_ON(line->state != PBLK_LINESTATE_GC); 69 WARN_ON(line->state != PBLK_LINESTATE_GC);
69 line->state = PBLK_LINESTATE_CLOSED; 70 line->state = PBLK_LINESTATE_CLOSED;
70 trace_pblk_line_state(pblk_disk_name(pblk), line->id, 71 trace_pblk_line_state(pblk_disk_name(pblk), line->id,
71 line->state); 72 line->state);
73
74 /* We need to reset gc_group in order to ensure that
75 * pblk_line_gc_list will return proper move_list
76 * since right now current line is not on any of the
77 * gc lists.
78 */
79 line->gc_group = PBLK_LINEGC_NONE;
72 move_list = pblk_line_gc_list(pblk, line); 80 move_list = pblk_line_gc_list(pblk, line);
73 spin_unlock(&line->lock); 81 spin_unlock(&line->lock);
74 82 list_add_tail(&line->list, move_list);
75 if (move_list) { 83 spin_unlock(&l_mg->gc_lock);
76 spin_lock(&l_mg->gc_lock);
77 list_add_tail(&line->list, move_list);
78 spin_unlock(&l_mg->gc_lock);
79 }
80} 84}
81 85
82static void pblk_gc_line_ws(struct work_struct *work) 86static void pblk_gc_line_ws(struct work_struct *work)
@@ -84,8 +88,6 @@ static void pblk_gc_line_ws(struct work_struct *work)
84 struct pblk_line_ws *gc_rq_ws = container_of(work, 88 struct pblk_line_ws *gc_rq_ws = container_of(work,
85 struct pblk_line_ws, ws); 89 struct pblk_line_ws, ws);
86 struct pblk *pblk = gc_rq_ws->pblk; 90 struct pblk *pblk = gc_rq_ws->pblk;
87 struct nvm_tgt_dev *dev = pblk->dev;
88 struct nvm_geo *geo = &dev->geo;
89 struct pblk_gc *gc = &pblk->gc; 91 struct pblk_gc *gc = &pblk->gc;
90 struct pblk_line *line = gc_rq_ws->line; 92 struct pblk_line *line = gc_rq_ws->line;
91 struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; 93 struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
@@ -93,18 +95,10 @@ static void pblk_gc_line_ws(struct work_struct *work)
93 95
94 up(&gc->gc_sem); 96 up(&gc->gc_sem);
95 97
96 gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
97 if (!gc_rq->data) {
98 pblk_err(pblk, "could not GC line:%d (%d/%d)\n",
99 line->id, *line->vsc, gc_rq->nr_secs);
100 goto out;
101 }
102
103 /* Read from GC victim block */ 98 /* Read from GC victim block */
104 ret = pblk_submit_read_gc(pblk, gc_rq); 99 ret = pblk_submit_read_gc(pblk, gc_rq);
105 if (ret) { 100 if (ret) {
106 pblk_err(pblk, "failed GC read in line:%d (err:%d)\n", 101 line->w_err_gc->has_gc_err = 1;
107 line->id, ret);
108 goto out; 102 goto out;
109 } 103 }
110 104
@@ -189,6 +183,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work)
189 struct pblk_line *line = line_ws->line; 183 struct pblk_line *line = line_ws->line;
190 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 184 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
191 struct pblk_line_meta *lm = &pblk->lm; 185 struct pblk_line_meta *lm = &pblk->lm;
186 struct nvm_tgt_dev *dev = pblk->dev;
187 struct nvm_geo *geo = &dev->geo;
192 struct pblk_gc *gc = &pblk->gc; 188 struct pblk_gc *gc = &pblk->gc;
193 struct pblk_line_ws *gc_rq_ws; 189 struct pblk_line_ws *gc_rq_ws;
194 struct pblk_gc_rq *gc_rq; 190 struct pblk_gc_rq *gc_rq;
@@ -247,9 +243,13 @@ next_rq:
247 gc_rq->nr_secs = nr_secs; 243 gc_rq->nr_secs = nr_secs;
248 gc_rq->line = line; 244 gc_rq->line = line;
249 245
246 gc_rq->data = vmalloc(array_size(gc_rq->nr_secs, geo->csecs));
247 if (!gc_rq->data)
248 goto fail_free_gc_rq;
249
250 gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); 250 gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
251 if (!gc_rq_ws) 251 if (!gc_rq_ws)
252 goto fail_free_gc_rq; 252 goto fail_free_gc_data;
253 253
254 gc_rq_ws->pblk = pblk; 254 gc_rq_ws->pblk = pblk;
255 gc_rq_ws->line = line; 255 gc_rq_ws->line = line;
@@ -281,6 +281,8 @@ out:
281 281
282 return; 282 return;
283 283
284fail_free_gc_data:
285 vfree(gc_rq->data);
284fail_free_gc_rq: 286fail_free_gc_rq:
285 kfree(gc_rq); 287 kfree(gc_rq);
286fail_free_lba_list: 288fail_free_lba_list:
@@ -290,8 +292,11 @@ fail_free_invalid_bitmap:
290fail_free_ws: 292fail_free_ws:
291 kfree(line_ws); 293 kfree(line_ws);
292 294
295 /* Line goes back to closed state, so we cannot release additional
296 * reference for line, since we do that only when we want to do
297 * gc to free line state transition.
298 */
293 pblk_put_line_back(pblk, line); 299 pblk_put_line_back(pblk, line);
294 kref_put(&line->ref, pblk_line_put);
295 atomic_dec(&gc->read_inflight_gc); 300 atomic_dec(&gc->read_inflight_gc);
296 301
297 pblk_err(pblk, "failed to GC line %d\n", line->id); 302 pblk_err(pblk, "failed to GC line %d\n", line->id);
@@ -355,8 +360,13 @@ static int pblk_gc_read(struct pblk *pblk)
355 360
356 pblk_gc_kick(pblk); 361 pblk_gc_kick(pblk);
357 362
358 if (pblk_gc_line(pblk, line)) 363 if (pblk_gc_line(pblk, line)) {
359 pblk_err(pblk, "failed to GC line %d\n", line->id); 364 pblk_err(pblk, "failed to GC line %d\n", line->id);
365 /* rollback */
366 spin_lock(&gc->r_lock);
367 list_add_tail(&line->list, &gc->r_list);
368 spin_unlock(&gc->r_lock);
369 }
360 370
361 return 0; 371 return 0;
362} 372}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 8b643d0bffae..b351c7f002de 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -47,33 +47,6 @@ static struct pblk_global_caches pblk_caches = {
47 47
48struct bio_set pblk_bio_set; 48struct bio_set pblk_bio_set;
49 49
50static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
51 struct bio *bio)
52{
53 int ret;
54
55 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
56 * constraint. Writes can be of arbitrary size.
57 */
58 if (bio_data_dir(bio) == READ) {
59 blk_queue_split(q, &bio);
60 ret = pblk_submit_read(pblk, bio);
61 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
62 bio_put(bio);
63
64 return ret;
65 }
66
67 /* Prevent deadlock in the case of a modest LUN configuration and large
68 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
69 * available for user I/O.
70 */
71 if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
72 blk_queue_split(q, &bio);
73
74 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
75}
76
77static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) 50static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
78{ 51{
79 struct pblk *pblk = q->queuedata; 52 struct pblk *pblk = q->queuedata;
@@ -86,13 +59,21 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
86 } 59 }
87 } 60 }
88 61
89 switch (pblk_rw_io(q, pblk, bio)) { 62 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
90 case NVM_IO_ERR: 63 * constraint. Writes can be of arbitrary size.
91 bio_io_error(bio); 64 */
92 break; 65 if (bio_data_dir(bio) == READ) {
93 case NVM_IO_DONE: 66 blk_queue_split(q, &bio);
94 bio_endio(bio); 67 pblk_submit_read(pblk, bio);
95 break; 68 } else {
69 /* Prevent deadlock in the case of a modest LUN configuration
70 * and large user I/Os. Unless stalled, the rate limiter
71 * leaves at least 256KB available for user I/O.
72 */
73 if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
74 blk_queue_split(q, &bio);
75
76 pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
96 } 77 }
97 78
98 return BLK_QC_T_NONE; 79 return BLK_QC_T_NONE;
@@ -105,7 +86,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk)
105 if (pblk->addrf_len < 32) 86 if (pblk->addrf_len < 32)
106 entry_size = 4; 87 entry_size = 4;
107 88
108 return entry_size * pblk->rl.nr_secs; 89 return entry_size * pblk->capacity;
109} 90}
110 91
111#ifdef CONFIG_NVM_PBLK_DEBUG 92#ifdef CONFIG_NVM_PBLK_DEBUG
@@ -164,13 +145,18 @@ static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
164 int ret = 0; 145 int ret = 0;
165 146
166 map_size = pblk_trans_map_size(pblk); 147 map_size = pblk_trans_map_size(pblk);
167 pblk->trans_map = vmalloc(map_size); 148 pblk->trans_map = __vmalloc(map_size, GFP_KERNEL | __GFP_NOWARN
168 if (!pblk->trans_map) 149 | __GFP_RETRY_MAYFAIL | __GFP_HIGHMEM,
150 PAGE_KERNEL);
151 if (!pblk->trans_map) {
152 pblk_err(pblk, "failed to allocate L2P (need %zu of memory)\n",
153 map_size);
169 return -ENOMEM; 154 return -ENOMEM;
155 }
170 156
171 pblk_ppa_set_empty(&ppa); 157 pblk_ppa_set_empty(&ppa);
172 158
173 for (i = 0; i < pblk->rl.nr_secs; i++) 159 for (i = 0; i < pblk->capacity; i++)
174 pblk_trans_map_set(pblk, i, ppa); 160 pblk_trans_map_set(pblk, i, ppa);
175 161
176 ret = pblk_l2p_recover(pblk, factory_init); 162 ret = pblk_l2p_recover(pblk, factory_init);
@@ -701,7 +687,6 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
701 * on user capacity consider only provisioned blocks 687 * on user capacity consider only provisioned blocks
702 */ 688 */
703 pblk->rl.total_blocks = nr_free_chks; 689 pblk->rl.total_blocks = nr_free_chks;
704 pblk->rl.nr_secs = nr_free_chks * geo->clba;
705 690
706 /* Consider sectors used for metadata */ 691 /* Consider sectors used for metadata */
707 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; 692 sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
@@ -1284,7 +1269,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
1284 1269
1285 pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n", 1270 pblk_info(pblk, "luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
1286 geo->all_luns, pblk->l_mg.nr_lines, 1271 geo->all_luns, pblk->l_mg.nr_lines,
1287 (unsigned long long)pblk->rl.nr_secs, 1272 (unsigned long long)pblk->capacity,
1288 pblk->rwb.nr_entries); 1273 pblk->rwb.nr_entries);
1289 1274
1290 wake_up_process(pblk->writer_ts); 1275 wake_up_process(pblk->writer_ts);
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 7fbc99b60cac..5408e32b2f13 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -162,6 +162,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
162 162
163 *erase_ppa = ppa_list[i]; 163 *erase_ppa = ppa_list[i];
164 erase_ppa->a.blk = e_line->id; 164 erase_ppa->a.blk = e_line->id;
165 erase_ppa->a.reserved = 0;
165 166
166 spin_unlock(&e_line->lock); 167 spin_unlock(&e_line->lock);
167 168
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 03c241b340ea..5abb1705b039 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -642,7 +642,7 @@ try:
642 * be directed to disk. 642 * be directed to disk.
643 */ 643 */
644int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 644int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
645 struct ppa_addr ppa, int bio_iter, bool advanced_bio) 645 struct ppa_addr ppa)
646{ 646{
647 struct pblk *pblk = container_of(rb, struct pblk, rwb); 647 struct pblk *pblk = container_of(rb, struct pblk, rwb);
648 struct pblk_rb_entry *entry; 648 struct pblk_rb_entry *entry;
@@ -673,15 +673,6 @@ int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
673 ret = 0; 673 ret = 0;
674 goto out; 674 goto out;
675 } 675 }
676
677 /* Only advance the bio if it hasn't been advanced already. If advanced,
678 * this bio is at least a partial bio (i.e., it has partially been
679 * filled with data from the cache). If part of the data resides on the
680 * media, we will read later on
681 */
682 if (unlikely(!advanced_bio))
683 bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
684
685 data = bio_data(bio); 676 data = bio_data(bio);
686 memcpy(data, entry->data, rb->seg_size); 677 memcpy(data, entry->data, rb->seg_size);
687 678
@@ -799,8 +790,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb)
799 } 790 }
800 791
801out: 792out:
802 spin_unlock(&rb->w_lock);
803 spin_unlock_irq(&rb->s_lock); 793 spin_unlock_irq(&rb->s_lock);
794 spin_unlock(&rb->w_lock);
804 795
805 return ret; 796 return ret;
806} 797}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 0b7d5fb4548d..d98ea392fe33 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -26,8 +26,7 @@
26 * issued. 26 * issued.
27 */ 27 */
28static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, 28static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
29 sector_t lba, struct ppa_addr ppa, 29 sector_t lba, struct ppa_addr ppa)
30 int bio_iter, bool advanced_bio)
31{ 30{
32#ifdef CONFIG_NVM_PBLK_DEBUG 31#ifdef CONFIG_NVM_PBLK_DEBUG
33 /* Callers must ensure that the ppa points to a cache address */ 32 /* Callers must ensure that the ppa points to a cache address */
@@ -35,73 +34,75 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
35 BUG_ON(!pblk_addr_in_cache(ppa)); 34 BUG_ON(!pblk_addr_in_cache(ppa));
36#endif 35#endif
37 36
38 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa, 37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
39 bio_iter, advanced_bio);
40} 38}
41 39
42static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, 40static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
43 struct bio *bio, sector_t blba, 41 struct bio *bio, sector_t blba,
44 unsigned long *read_bitmap) 42 bool *from_cache)
45{ 43{
46 void *meta_list = rqd->meta_list; 44 void *meta_list = rqd->meta_list;
47 struct ppa_addr ppas[NVM_MAX_VLBA]; 45 int nr_secs, i;
48 int nr_secs = rqd->nr_ppas;
49 bool advanced_bio = false;
50 int i, j = 0;
51 46
52 pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); 47retry:
48 nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
49 from_cache);
50
51 if (!*from_cache)
52 goto end;
53 53
54 for (i = 0; i < nr_secs; i++) { 54 for (i = 0; i < nr_secs; i++) {
55 struct ppa_addr p = ppas[i];
56 struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i); 55 struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
57 sector_t lba = blba + i; 56 sector_t lba = blba + i;
58 57
59retry: 58 if (pblk_ppa_empty(rqd->ppa_list[i])) {
60 if (pblk_ppa_empty(p)) {
61 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); 59 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
62 60
63 WARN_ON(test_and_set_bit(i, read_bitmap));
64 meta->lba = addr_empty; 61 meta->lba = addr_empty;
65 62 } else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
66 if (unlikely(!advanced_bio)) { 63 /*
67 bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); 64 * Try to read from write buffer. The address is later
68 advanced_bio = true; 65 * checked on the write buffer to prevent retrieving
66 * overwritten data.
67 */
68 if (!pblk_read_from_cache(pblk, bio, lba,
69 rqd->ppa_list[i])) {
70 if (i == 0) {
71 /*
72 * We didn't call with bio_advance()
73 * yet, so we can just retry.
74 */
75 goto retry;
76 } else {
77 /*
78 * We already call bio_advance()
79 * so we cannot retry and we need
80 * to quit that function in order
81 * to allow caller to handle the bio
82 * splitting in the current sector
83 * position.
84 */
85 nr_secs = i;
86 goto end;
87 }
69 } 88 }
70
71 goto next;
72 }
73
74 /* Try to read from write buffer. The address is later checked
75 * on the write buffer to prevent retrieving overwritten data.
76 */
77 if (pblk_addr_in_cache(p)) {
78 if (!pblk_read_from_cache(pblk, bio, lba, p, i,
79 advanced_bio)) {
80 pblk_lookup_l2p_seq(pblk, &p, lba, 1);
81 goto retry;
82 }
83 WARN_ON(test_and_set_bit(i, read_bitmap));
84 meta->lba = cpu_to_le64(lba); 89 meta->lba = cpu_to_le64(lba);
85 advanced_bio = true;
86#ifdef CONFIG_NVM_PBLK_DEBUG 90#ifdef CONFIG_NVM_PBLK_DEBUG
87 atomic_long_inc(&pblk->cache_reads); 91 atomic_long_inc(&pblk->cache_reads);
88#endif 92#endif
89 } else {
90 /* Read from media non-cached sectors */
91 rqd->ppa_list[j++] = p;
92 } 93 }
93 94 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
94next:
95 if (advanced_bio)
96 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
97 } 95 }
98 96
97end:
99 if (pblk_io_aligned(pblk, nr_secs)) 98 if (pblk_io_aligned(pblk, nr_secs))
100 rqd->is_seq = 1; 99 rqd->is_seq = 1;
101 100
102#ifdef CONFIG_NVM_PBLK_DEBUG 101#ifdef CONFIG_NVM_PBLK_DEBUG
103 atomic_long_add(nr_secs, &pblk->inflight_reads); 102 atomic_long_add(nr_secs, &pblk->inflight_reads);
104#endif 103#endif
104
105 return nr_secs;
105} 106}
106 107
107 108
@@ -175,12 +176,12 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
175 WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n"); 176 WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
176} 177}
177 178
178static void pblk_end_user_read(struct bio *bio) 179static void pblk_end_user_read(struct bio *bio, int error)
179{ 180{
180#ifdef CONFIG_NVM_PBLK_DEBUG 181 if (error && error != NVM_RSP_WARN_HIGHECC)
181 WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); 182 bio_io_error(bio);
182#endif 183 else
183 bio_endio(bio); 184 bio_endio(bio);
184} 185}
185 186
186static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, 187static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
@@ -197,9 +198,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
197 pblk_log_read_err(pblk, rqd); 198 pblk_log_read_err(pblk, rqd);
198 199
199 pblk_read_check_seq(pblk, rqd, r_ctx->lba); 200 pblk_read_check_seq(pblk, rqd, r_ctx->lba);
200 201 bio_put(int_bio);
201 if (int_bio)
202 bio_put(int_bio);
203 202
204 if (put_line) 203 if (put_line)
205 pblk_rq_to_line_put(pblk, rqd); 204 pblk_rq_to_line_put(pblk, rqd);
@@ -219,188 +218,17 @@ static void pblk_end_io_read(struct nvm_rq *rqd)
219 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); 218 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
220 struct bio *bio = (struct bio *)r_ctx->private; 219 struct bio *bio = (struct bio *)r_ctx->private;
221 220
222 pblk_end_user_read(bio); 221 pblk_end_user_read(bio, rqd->error);
223 __pblk_end_io_read(pblk, rqd, true); 222 __pblk_end_io_read(pblk, rqd, true);
224} 223}
225 224
226static void pblk_end_partial_read(struct nvm_rq *rqd)
227{
228 struct pblk *pblk = rqd->private;
229 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
230 struct pblk_pr_ctx *pr_ctx = r_ctx->private;
231 struct pblk_sec_meta *meta;
232 struct bio *new_bio = rqd->bio;
233 struct bio *bio = pr_ctx->orig_bio;
234 void *meta_list = rqd->meta_list;
235 unsigned long *read_bitmap = pr_ctx->bitmap;
236 struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
237 struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
238 int nr_secs = pr_ctx->orig_nr_secs;
239 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
240 void *src_p, *dst_p;
241 int bit, i;
242
243 if (unlikely(nr_holes == 1)) {
244 struct ppa_addr ppa;
245
246 ppa = rqd->ppa_addr;
247 rqd->ppa_list = pr_ctx->ppa_ptr;
248 rqd->dma_ppa_list = pr_ctx->dma_ppa_list;
249 rqd->ppa_list[0] = ppa;
250 }
251
252 for (i = 0; i < nr_secs; i++) {
253 meta = pblk_get_meta(pblk, meta_list, i);
254 pr_ctx->lba_list_media[i] = le64_to_cpu(meta->lba);
255 meta->lba = cpu_to_le64(pr_ctx->lba_list_mem[i]);
256 }
257
258 /* Fill the holes in the original bio */
259 i = 0;
260 for (bit = 0; bit < nr_secs; bit++) {
261 if (!test_bit(bit, read_bitmap)) {
262 struct bio_vec dst_bv, src_bv;
263 struct pblk_line *line;
264
265 line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
266 kref_put(&line->ref, pblk_line_put);
267
268 meta = pblk_get_meta(pblk, meta_list, bit);
269 meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
270
271 dst_bv = bio_iter_iovec(bio, orig_iter);
272 src_bv = bio_iter_iovec(new_bio, new_iter);
273
274 src_p = kmap_atomic(src_bv.bv_page);
275 dst_p = kmap_atomic(dst_bv.bv_page);
276
277 memcpy(dst_p + dst_bv.bv_offset,
278 src_p + src_bv.bv_offset,
279 PBLK_EXPOSED_PAGE_SIZE);
280
281 kunmap_atomic(src_p);
282 kunmap_atomic(dst_p);
283
284 flush_dcache_page(dst_bv.bv_page);
285 mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
286
287 bio_advance_iter(new_bio, &new_iter,
288 PBLK_EXPOSED_PAGE_SIZE);
289 i++;
290 }
291 bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
292 }
293
294 bio_put(new_bio);
295 kfree(pr_ctx);
296
297 /* restore original request */
298 rqd->bio = NULL;
299 rqd->nr_ppas = nr_secs;
300
301 bio_endio(bio);
302 __pblk_end_io_read(pblk, rqd, false);
303}
304
305static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
306 unsigned int bio_init_idx,
307 unsigned long *read_bitmap,
308 int nr_holes)
309{
310 void *meta_list = rqd->meta_list;
311 struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
312 struct pblk_pr_ctx *pr_ctx;
313 struct bio *new_bio, *bio = r_ctx->private;
314 int nr_secs = rqd->nr_ppas;
315 int i;
316
317 new_bio = bio_alloc(GFP_KERNEL, nr_holes);
318
319 if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
320 goto fail_bio_put;
321
322 if (nr_holes != new_bio->bi_vcnt) {
323 WARN_ONCE(1, "pblk: malformed bio\n");
324 goto fail_free_pages;
325 }
326
327 pr_ctx = kzalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL);
328 if (!pr_ctx)
329 goto fail_free_pages;
330
331 for (i = 0; i < nr_secs; i++) {
332 struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
333
334 pr_ctx->lba_list_mem[i] = le64_to_cpu(meta->lba);
335 }
336
337 new_bio->bi_iter.bi_sector = 0; /* internal bio */
338 bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
339
340 rqd->bio = new_bio;
341 rqd->nr_ppas = nr_holes;
342
343 pr_ctx->orig_bio = bio;
344 bitmap_copy(pr_ctx->bitmap, read_bitmap, NVM_MAX_VLBA);
345 pr_ctx->bio_init_idx = bio_init_idx;
346 pr_ctx->orig_nr_secs = nr_secs;
347 r_ctx->private = pr_ctx;
348
349 if (unlikely(nr_holes == 1)) {
350 pr_ctx->ppa_ptr = rqd->ppa_list;
351 pr_ctx->dma_ppa_list = rqd->dma_ppa_list;
352 rqd->ppa_addr = rqd->ppa_list[0];
353 }
354 return 0;
355
356fail_free_pages:
357 pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
358fail_bio_put:
359 bio_put(new_bio);
360
361 return -ENOMEM;
362}
363
364static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
365 unsigned int bio_init_idx,
366 unsigned long *read_bitmap, int nr_secs)
367{
368 int nr_holes;
369 int ret;
370
371 nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
372
373 if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap,
374 nr_holes))
375 return NVM_IO_ERR;
376
377 rqd->end_io = pblk_end_partial_read;
378
379 ret = pblk_submit_io(pblk, rqd);
380 if (ret) {
381 bio_put(rqd->bio);
382 pblk_err(pblk, "partial read IO submission failed\n");
383 goto err;
384 }
385
386 return NVM_IO_OK;
387
388err:
389 pblk_err(pblk, "failed to perform partial read\n");
390
391 /* Free allocated pages in new bio */
392 pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt);
393 __pblk_end_io_read(pblk, rqd, false);
394 return NVM_IO_ERR;
395}
396
397static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio, 225static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
398 sector_t lba, unsigned long *read_bitmap) 226 sector_t lba, bool *from_cache)
399{ 227{
400 struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0); 228 struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
401 struct ppa_addr ppa; 229 struct ppa_addr ppa;
402 230
403 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 231 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
404 232
405#ifdef CONFIG_NVM_PBLK_DEBUG 233#ifdef CONFIG_NVM_PBLK_DEBUG
406 atomic_long_inc(&pblk->inflight_reads); 234 atomic_long_inc(&pblk->inflight_reads);
@@ -410,7 +238,6 @@ retry:
410 if (pblk_ppa_empty(ppa)) { 238 if (pblk_ppa_empty(ppa)) {
411 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); 239 __le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
412 240
413 WARN_ON(test_and_set_bit(0, read_bitmap));
414 meta->lba = addr_empty; 241 meta->lba = addr_empty;
415 return; 242 return;
416 } 243 }
@@ -419,12 +246,11 @@ retry:
419 * write buffer to prevent retrieving overwritten data. 246 * write buffer to prevent retrieving overwritten data.
420 */ 247 */
421 if (pblk_addr_in_cache(ppa)) { 248 if (pblk_addr_in_cache(ppa)) {
422 if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0, 1)) { 249 if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
423 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); 250 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
424 goto retry; 251 goto retry;
425 } 252 }
426 253
427 WARN_ON(test_and_set_bit(0, read_bitmap));
428 meta->lba = cpu_to_le64(lba); 254 meta->lba = cpu_to_le64(lba);
429 255
430#ifdef CONFIG_NVM_PBLK_DEBUG 256#ifdef CONFIG_NVM_PBLK_DEBUG
@@ -435,95 +261,92 @@ retry:
435 } 261 }
436} 262}
437 263
438int pblk_submit_read(struct pblk *pblk, struct bio *bio) 264void pblk_submit_read(struct pblk *pblk, struct bio *bio)
439{ 265{
440 struct nvm_tgt_dev *dev = pblk->dev; 266 struct nvm_tgt_dev *dev = pblk->dev;
441 struct request_queue *q = dev->q; 267 struct request_queue *q = dev->q;
442 sector_t blba = pblk_get_lba(bio); 268 sector_t blba = pblk_get_lba(bio);
443 unsigned int nr_secs = pblk_get_secs(bio); 269 unsigned int nr_secs = pblk_get_secs(bio);
270 bool from_cache;
444 struct pblk_g_ctx *r_ctx; 271 struct pblk_g_ctx *r_ctx;
445 struct nvm_rq *rqd; 272 struct nvm_rq *rqd;
446 unsigned int bio_init_idx; 273 struct bio *int_bio, *split_bio;
447 DECLARE_BITMAP(read_bitmap, NVM_MAX_VLBA);
448 int ret = NVM_IO_ERR;
449 274
450 generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio), 275 generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
451 &pblk->disk->part0); 276 &pblk->disk->part0);
452 277
453 bitmap_zero(read_bitmap, nr_secs);
454
455 rqd = pblk_alloc_rqd(pblk, PBLK_READ); 278 rqd = pblk_alloc_rqd(pblk, PBLK_READ);
456 279
457 rqd->opcode = NVM_OP_PREAD; 280 rqd->opcode = NVM_OP_PREAD;
458 rqd->nr_ppas = nr_secs; 281 rqd->nr_ppas = nr_secs;
459 rqd->bio = NULL; /* cloned bio if needed */
460 rqd->private = pblk; 282 rqd->private = pblk;
461 rqd->end_io = pblk_end_io_read; 283 rqd->end_io = pblk_end_io_read;
462 284
463 r_ctx = nvm_rq_to_pdu(rqd); 285 r_ctx = nvm_rq_to_pdu(rqd);
464 r_ctx->start_time = jiffies; 286 r_ctx->start_time = jiffies;
465 r_ctx->lba = blba; 287 r_ctx->lba = blba;
466 r_ctx->private = bio; /* original bio */
467 288
468 /* Save the index for this bio's start. This is needed in case 289 if (pblk_alloc_rqd_meta(pblk, rqd)) {
469 * we need to fill a partial read. 290 bio_io_error(bio);
470 */ 291 pblk_free_rqd(pblk, rqd, PBLK_READ);
471 bio_init_idx = pblk_get_bi_idx(bio); 292 return;
293 }
472 294
473 if (pblk_alloc_rqd_meta(pblk, rqd)) 295 /* Clone read bio to deal internally with:
474 goto fail_rqd_free; 296 * -read errors when reading from drive
297 * -bio_advance() calls during cache reads
298 */
299 int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
475 300
476 if (nr_secs > 1) 301 if (nr_secs > 1)
477 pblk_read_ppalist_rq(pblk, rqd, bio, blba, read_bitmap); 302 nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
303 &from_cache);
478 else 304 else
479 pblk_read_rq(pblk, rqd, bio, blba, read_bitmap); 305 pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
480 306
481 if (bitmap_full(read_bitmap, nr_secs)) { 307split_retry:
308 r_ctx->private = bio; /* original bio */
309 rqd->bio = int_bio; /* internal bio */
310
311 if (from_cache && nr_secs == rqd->nr_ppas) {
312 /* All data was read from cache, we can complete the IO. */
313 pblk_end_user_read(bio, 0);
482 atomic_inc(&pblk->inflight_io); 314 atomic_inc(&pblk->inflight_io);
483 __pblk_end_io_read(pblk, rqd, false); 315 __pblk_end_io_read(pblk, rqd, false);
484 return NVM_IO_DONE; 316 } else if (nr_secs != rqd->nr_ppas) {
485 } 317 /* The read bio request could be partially filled by the write
486 318 * buffer, but there are some holes that need to be read from
487 /* All sectors are to be read from the device */ 319 * the drive. In order to handle this, we will use block layer
488 if (bitmap_empty(read_bitmap, rqd->nr_ppas)) { 320 * mechanism to split this request in to smaller ones and make
489 struct bio *int_bio = NULL; 321 * a chain of it.
322 */
323 split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
324 &pblk_bio_set);
325 bio_chain(split_bio, bio);
326 generic_make_request(bio);
327
328 /* New bio contains first N sectors of the previous one, so
329 * we can continue to use existing rqd, but we need to shrink
330 * the number of PPAs in it. New bio is also guaranteed that
331 * it contains only either data from cache or from drive, newer
332 * mix of them.
333 */
334 bio = split_bio;
335 rqd->nr_ppas = nr_secs;
336 if (rqd->nr_ppas == 1)
337 rqd->ppa_addr = rqd->ppa_list[0];
490 338
491 /* Clone read bio to deal with read errors internally */ 339 /* Recreate int_bio - existing might have some needed internal
340 * fields modified already.
341 */
342 bio_put(int_bio);
492 int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set); 343 int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
493 if (!int_bio) { 344 goto split_retry;
494 pblk_err(pblk, "could not clone read bio\n"); 345 } else if (pblk_submit_io(pblk, rqd)) {
495 goto fail_end_io; 346 /* Submitting IO to drive failed, let's report an error */
496 } 347 rqd->error = -ENODEV;
497 348 pblk_end_io_read(rqd);
498 rqd->bio = int_bio;
499
500 if (pblk_submit_io(pblk, rqd)) {
501 pblk_err(pblk, "read IO submission failed\n");
502 ret = NVM_IO_ERR;
503 goto fail_end_io;
504 }
505
506 return NVM_IO_OK;
507 } 349 }
508
509 /* The read bio request could be partially filled by the write buffer,
510 * but there are some holes that need to be read from the drive.
511 */
512 ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, read_bitmap,
513 nr_secs);
514 if (ret)
515 goto fail_meta_free;
516
517 return NVM_IO_OK;
518
519fail_meta_free:
520 nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
521fail_rqd_free:
522 pblk_free_rqd(pblk, rqd, PBLK_READ);
523 return ret;
524fail_end_io:
525 __pblk_end_io_read(pblk, rqd, false);
526 return ret;
527} 350}
528 351
529static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, 352static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
@@ -568,7 +391,7 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
568 goto out; 391 goto out;
569 392
570 /* logic error: lba out-of-bounds */ 393 /* logic error: lba out-of-bounds */
571 if (lba >= pblk->rl.nr_secs) { 394 if (lba >= pblk->capacity) {
572 WARN(1, "pblk: read lba out of bounds\n"); 395 WARN(1, "pblk: read lba out of bounds\n");
573 goto out; 396 goto out;
574 } 397 }
@@ -642,7 +465,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
642 465
643 if (pblk_submit_io_sync(pblk, &rqd)) { 466 if (pblk_submit_io_sync(pblk, &rqd)) {
644 ret = -EIO; 467 ret = -EIO;
645 pblk_err(pblk, "GC read request failed\n");
646 goto err_free_bio; 468 goto err_free_bio;
647 } 469 }
648 470
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index d86f580036d3..e6dda04de144 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -93,10 +93,24 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
93static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line, 93static void pblk_update_line_wp(struct pblk *pblk, struct pblk_line *line,
94 u64 written_secs) 94 u64 written_secs)
95{ 95{
96 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
96 int i; 97 int i;
97 98
98 for (i = 0; i < written_secs; i += pblk->min_write_pgs) 99 for (i = 0; i < written_secs; i += pblk->min_write_pgs)
99 pblk_alloc_page(pblk, line, pblk->min_write_pgs); 100 __pblk_alloc_page(pblk, line, pblk->min_write_pgs);
101
102 spin_lock(&l_mg->free_lock);
103 if (written_secs > line->left_msecs) {
104 /*
105 * We have all data sectors written
106 * and some emeta sectors written too.
107 */
108 line->left_msecs = 0;
109 } else {
110 /* We have only some data sectors written. */
111 line->left_msecs -= written_secs;
112 }
113 spin_unlock(&l_mg->free_lock);
100} 114}
101 115
102static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line) 116static u64 pblk_sec_in_open_line(struct pblk *pblk, struct pblk_line *line)
@@ -165,6 +179,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
165 struct pblk_pad_rq *pad_rq; 179 struct pblk_pad_rq *pad_rq;
166 struct nvm_rq *rqd; 180 struct nvm_rq *rqd;
167 struct bio *bio; 181 struct bio *bio;
182 struct ppa_addr *ppa_list;
168 void *data; 183 void *data;
169 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); 184 __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
170 u64 w_ptr = line->cur_sec; 185 u64 w_ptr = line->cur_sec;
@@ -194,7 +209,7 @@ next_pad_rq:
194 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false); 209 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
195 if (rq_ppas < pblk->min_write_pgs) { 210 if (rq_ppas < pblk->min_write_pgs) {
196 pblk_err(pblk, "corrupted pad line %d\n", line->id); 211 pblk_err(pblk, "corrupted pad line %d\n", line->id);
197 goto fail_free_pad; 212 goto fail_complete;
198 } 213 }
199 214
200 rq_len = rq_ppas * geo->csecs; 215 rq_len = rq_ppas * geo->csecs;
@@ -203,7 +218,7 @@ next_pad_rq:
203 PBLK_VMALLOC_META, GFP_KERNEL); 218 PBLK_VMALLOC_META, GFP_KERNEL);
204 if (IS_ERR(bio)) { 219 if (IS_ERR(bio)) {
205 ret = PTR_ERR(bio); 220 ret = PTR_ERR(bio);
206 goto fail_free_pad; 221 goto fail_complete;
207 } 222 }
208 223
209 bio->bi_iter.bi_sector = 0; /* internal bio */ 224 bio->bi_iter.bi_sector = 0; /* internal bio */
@@ -212,8 +227,11 @@ next_pad_rq:
212 rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); 227 rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
213 228
214 ret = pblk_alloc_rqd_meta(pblk, rqd); 229 ret = pblk_alloc_rqd_meta(pblk, rqd);
215 if (ret) 230 if (ret) {
216 goto fail_free_rqd; 231 pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
232 bio_put(bio);
233 goto fail_complete;
234 }
217 235
218 rqd->bio = bio; 236 rqd->bio = bio;
219 rqd->opcode = NVM_OP_PWRITE; 237 rqd->opcode = NVM_OP_PWRITE;
@@ -222,6 +240,7 @@ next_pad_rq:
222 rqd->end_io = pblk_end_io_recov; 240 rqd->end_io = pblk_end_io_recov;
223 rqd->private = pad_rq; 241 rqd->private = pad_rq;
224 242
243 ppa_list = nvm_rq_to_ppa_list(rqd);
225 meta_list = rqd->meta_list; 244 meta_list = rqd->meta_list;
226 245
227 for (i = 0; i < rqd->nr_ppas; ) { 246 for (i = 0; i < rqd->nr_ppas; ) {
@@ -249,18 +268,21 @@ next_pad_rq:
249 lba_list[w_ptr] = addr_empty; 268 lba_list[w_ptr] = addr_empty;
250 meta = pblk_get_meta(pblk, meta_list, i); 269 meta = pblk_get_meta(pblk, meta_list, i);
251 meta->lba = addr_empty; 270 meta->lba = addr_empty;
252 rqd->ppa_list[i] = dev_ppa; 271 ppa_list[i] = dev_ppa;
253 } 272 }
254 } 273 }
255 274
256 kref_get(&pad_rq->ref); 275 kref_get(&pad_rq->ref);
257 pblk_down_chunk(pblk, rqd->ppa_list[0]); 276 pblk_down_chunk(pblk, ppa_list[0]);
258 277
259 ret = pblk_submit_io(pblk, rqd); 278 ret = pblk_submit_io(pblk, rqd);
260 if (ret) { 279 if (ret) {
261 pblk_err(pblk, "I/O submission failed: %d\n", ret); 280 pblk_err(pblk, "I/O submission failed: %d\n", ret);
262 pblk_up_chunk(pblk, rqd->ppa_list[0]); 281 pblk_up_chunk(pblk, ppa_list[0]);
263 goto fail_free_rqd; 282 kref_put(&pad_rq->ref, pblk_recov_complete);
283 pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
284 bio_put(bio);
285 goto fail_complete;
264 } 286 }
265 287
266 left_line_ppas -= rq_ppas; 288 left_line_ppas -= rq_ppas;
@@ -268,13 +290,9 @@ next_pad_rq:
268 if (left_ppas && left_line_ppas) 290 if (left_ppas && left_line_ppas)
269 goto next_pad_rq; 291 goto next_pad_rq;
270 292
293fail_complete:
271 kref_put(&pad_rq->ref, pblk_recov_complete); 294 kref_put(&pad_rq->ref, pblk_recov_complete);
272 295 wait_for_completion(&pad_rq->wait);
273 if (!wait_for_completion_io_timeout(&pad_rq->wait,
274 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
275 pblk_err(pblk, "pad write timed out\n");
276 ret = -ETIME;
277 }
278 296
279 if (!pblk_line_is_full(line)) 297 if (!pblk_line_is_full(line))
280 pblk_err(pblk, "corrupted padded line: %d\n", line->id); 298 pblk_err(pblk, "corrupted padded line: %d\n", line->id);
@@ -283,14 +301,6 @@ next_pad_rq:
283free_rq: 301free_rq:
284 kfree(pad_rq); 302 kfree(pad_rq);
285 return ret; 303 return ret;
286
287fail_free_rqd:
288 pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
289 bio_put(bio);
290fail_free_pad:
291 kfree(pad_rq);
292 vfree(data);
293 return ret;
294} 304}
295 305
296static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line) 306static int pblk_pad_distance(struct pblk *pblk, struct pblk_line *line)
@@ -412,6 +422,7 @@ retry_rq:
412 rqd->ppa_list = ppa_list; 422 rqd->ppa_list = ppa_list;
413 rqd->dma_ppa_list = dma_ppa_list; 423 rqd->dma_ppa_list = dma_ppa_list;
414 rqd->dma_meta_list = dma_meta_list; 424 rqd->dma_meta_list = dma_meta_list;
425 ppa_list = nvm_rq_to_ppa_list(rqd);
415 426
416 if (pblk_io_aligned(pblk, rq_ppas)) 427 if (pblk_io_aligned(pblk, rq_ppas))
417 rqd->is_seq = 1; 428 rqd->is_seq = 1;
@@ -430,7 +441,7 @@ retry_rq:
430 } 441 }
431 442
432 for (j = 0; j < pblk->min_write_pgs; j++, i++) 443 for (j = 0; j < pblk->min_write_pgs; j++, i++)
433 rqd->ppa_list[i] = 444 ppa_list[i] =
434 addr_to_gen_ppa(pblk, paddr + j, line->id); 445 addr_to_gen_ppa(pblk, paddr + j, line->id);
435 } 446 }
436 447
@@ -444,7 +455,7 @@ retry_rq:
444 atomic_dec(&pblk->inflight_io); 455 atomic_dec(&pblk->inflight_io);
445 456
446 /* If a read fails, do a best effort by padding the line and retrying */ 457 /* If a read fails, do a best effort by padding the line and retrying */
447 if (rqd->error) { 458 if (rqd->error && rqd->error != NVM_RSP_WARN_HIGHECC) {
448 int pad_distance, ret; 459 int pad_distance, ret;
449 460
450 if (padded) { 461 if (padded) {
@@ -474,11 +485,11 @@ retry_rq:
474 485
475 lba_list[paddr++] = cpu_to_le64(lba); 486 lba_list[paddr++] = cpu_to_le64(lba);
476 487
477 if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) 488 if (lba == ADDR_EMPTY || lba >= pblk->capacity)
478 continue; 489 continue;
479 490
480 line->nr_valid_lbas++; 491 line->nr_valid_lbas++;
481 pblk_update_map(pblk, lba, rqd->ppa_list[i]); 492 pblk_update_map(pblk, lba, ppa_list[i]);
482 } 493 }
483 494
484 left_ppas -= rq_ppas; 495 left_ppas -= rq_ppas;
@@ -647,10 +658,12 @@ static int pblk_line_was_written(struct pblk_line *line,
647 bppa = pblk->luns[smeta_blk].bppa; 658 bppa = pblk->luns[smeta_blk].bppa;
648 chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)]; 659 chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
649 660
650 if (chunk->state & NVM_CHK_ST_FREE) 661 if (chunk->state & NVM_CHK_ST_CLOSED ||
651 return 0; 662 (chunk->state & NVM_CHK_ST_OPEN
663 && chunk->wp >= lm->smeta_sec))
664 return 1;
652 665
653 return 1; 666 return 0;
654} 667}
655 668
656static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line) 669static bool pblk_line_is_open(struct pblk *pblk, struct pblk_line *line)
@@ -844,6 +857,7 @@ next:
844 spin_unlock(&l_mg->free_lock); 857 spin_unlock(&l_mg->free_lock);
845 } else { 858 } else {
846 spin_lock(&l_mg->free_lock); 859 spin_lock(&l_mg->free_lock);
860 l_mg->data_line = data_line;
847 /* Allocate next line for preparation */ 861 /* Allocate next line for preparation */
848 l_mg->data_next = pblk_line_get(pblk); 862 l_mg->data_next = pblk_line_get(pblk);
849 if (l_mg->data_next) { 863 if (l_mg->data_next) {
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 6593deab52da..4e63f9b5954c 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -228,6 +228,7 @@ static void pblk_submit_rec(struct work_struct *work)
228 mempool_free(recovery, &pblk->rec_pool); 228 mempool_free(recovery, &pblk->rec_pool);
229 229
230 atomic_dec(&pblk->inflight_io); 230 atomic_dec(&pblk->inflight_io);
231 pblk_write_kick(pblk);
231} 232}
232 233
233 234
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index ac3ab778e976..a67855387f53 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -43,8 +43,6 @@
43 43
44#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) 44#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
45 45
46#define PBLK_COMMAND_TIMEOUT_MS 30000
47
48/* Max 512 LUNs per device */ 46/* Max 512 LUNs per device */
49#define PBLK_MAX_LUNS_BITMAP (4) 47#define PBLK_MAX_LUNS_BITMAP (4)
50 48
@@ -123,18 +121,6 @@ struct pblk_g_ctx {
123 u64 lba; 121 u64 lba;
124}; 122};
125 123
126/* partial read context */
127struct pblk_pr_ctx {
128 struct bio *orig_bio;
129 DECLARE_BITMAP(bitmap, NVM_MAX_VLBA);
130 unsigned int orig_nr_secs;
131 unsigned int bio_init_idx;
132 void *ppa_ptr;
133 dma_addr_t dma_ppa_list;
134 u64 lba_list_mem[NVM_MAX_VLBA];
135 u64 lba_list_media[NVM_MAX_VLBA];
136};
137
138/* Pad context */ 124/* Pad context */
139struct pblk_pad_rq { 125struct pblk_pad_rq {
140 struct pblk *pblk; 126 struct pblk *pblk;
@@ -305,7 +291,6 @@ struct pblk_rl {
305 291
306 struct timer_list u_timer; 292 struct timer_list u_timer;
307 293
308 unsigned long long nr_secs;
309 unsigned long total_blocks; 294 unsigned long total_blocks;
310 295
311 atomic_t free_blocks; /* Total number of free blocks (+ OP) */ 296 atomic_t free_blocks; /* Total number of free blocks (+ OP) */
@@ -440,6 +425,7 @@ struct pblk_smeta {
440 425
441struct pblk_w_err_gc { 426struct pblk_w_err_gc {
442 int has_write_err; 427 int has_write_err;
428 int has_gc_err;
443 __le64 *lba_list; 429 __le64 *lba_list;
444}; 430};
445 431
@@ -465,7 +451,6 @@ struct pblk_line {
465 int meta_line; /* Metadata line id */ 451 int meta_line; /* Metadata line id */
466 int meta_distance; /* Distance between data and metadata */ 452 int meta_distance; /* Distance between data and metadata */
467 453
468 u64 smeta_ssec; /* Sector where smeta starts */
469 u64 emeta_ssec; /* Sector where emeta starts */ 454 u64 emeta_ssec; /* Sector where emeta starts */
470 455
471 unsigned int sec_in_line; /* Number of usable secs in line */ 456 unsigned int sec_in_line; /* Number of usable secs in line */
@@ -762,7 +747,7 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
762 unsigned int pos, unsigned int nr_entries, 747 unsigned int pos, unsigned int nr_entries,
763 unsigned int count); 748 unsigned int count);
764int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba, 749int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
765 struct ppa_addr ppa, int bio_iter, bool advanced_bio); 750 struct ppa_addr ppa);
766unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries); 751unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
767 752
768unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags); 753unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
@@ -862,15 +847,15 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
862 struct pblk_line *gc_line, u64 paddr); 847 struct pblk_line *gc_line, u64 paddr);
863void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, 848void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
864 u64 *lba_list, int nr_secs); 849 u64 *lba_list, int nr_secs);
865void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, 850int pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
866 sector_t blba, int nr_secs); 851 sector_t blba, int nr_secs, bool *from_cache);
867void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd); 852void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
868void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd); 853void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
869 854
870/* 855/*
871 * pblk user I/O write path 856 * pblk user I/O write path
872 */ 857 */
873int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, 858void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
874 unsigned long flags); 859 unsigned long flags);
875int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); 860int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
876 861
@@ -896,7 +881,7 @@ void pblk_write_kick(struct pblk *pblk);
896 * pblk read path 881 * pblk read path
897 */ 882 */
898extern struct bio_set pblk_bio_set; 883extern struct bio_set pblk_bio_set;
899int pblk_submit_read(struct pblk *pblk, struct bio *bio); 884void pblk_submit_read(struct pblk *pblk, struct bio *bio);
900int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); 885int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
901/* 886/*
902 * pblk recovery 887 * pblk recovery
@@ -921,6 +906,7 @@ void pblk_gc_free_full_lines(struct pblk *pblk);
921void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, 906void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
922 int *gc_active); 907 int *gc_active);
923int pblk_gc_sysfs_force(struct pblk *pblk, int force); 908int pblk_gc_sysfs_force(struct pblk *pblk, int force);
909void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line);
924 910
925/* 911/*
926 * pblk rate limiter 912 * pblk rate limiter
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a6644a2c3ef7..7da80f375315 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1257,10 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
1257 return 0; 1257 return 0;
1258 } 1258 }
1259 1259
1260 effects |= nvme_known_admin_effects(opcode);
1260 if (ctrl->effects) 1261 if (ctrl->effects)
1261 effects = le32_to_cpu(ctrl->effects->acs[opcode]); 1262 effects = le32_to_cpu(ctrl->effects->acs[opcode]);
1262 else
1263 effects = nvme_known_admin_effects(opcode);
1264 1263
1265 /* 1264 /*
1266 * For simplicity, IO to all namespaces is quiesced even if the command 1265 * For simplicity, IO to all namespaces is quiesced even if the command
@@ -2342,20 +2341,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
2342 NULL, 2341 NULL,
2343}; 2342};
2344 2343
2345static int nvme_active_ctrls(struct nvme_subsystem *subsys) 2344static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
2345 struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2346{ 2346{
2347 int count = 0; 2347 struct nvme_ctrl *tmp;
2348 struct nvme_ctrl *ctrl; 2348
2349 lockdep_assert_held(&nvme_subsystems_lock);
2350
2351 list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
2352 if (ctrl->state == NVME_CTRL_DELETING ||
2353 ctrl->state == NVME_CTRL_DEAD)
2354 continue;
2355
2356 if (tmp->cntlid == ctrl->cntlid) {
2357 dev_err(ctrl->device,
2358 "Duplicate cntlid %u with %s, rejecting\n",
2359 ctrl->cntlid, dev_name(tmp->device));
2360 return false;
2361 }
2349 2362
2350 mutex_lock(&subsys->lock); 2363 if ((id->cmic & (1 << 1)) ||
2351 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 2364 (ctrl->opts && ctrl->opts->discovery_nqn))
2352 if (ctrl->state != NVME_CTRL_DELETING && 2365 continue;
2353 ctrl->state != NVME_CTRL_DEAD) 2366
2354 count++; 2367 dev_err(ctrl->device,
2368 "Subsystem does not support multiple controllers\n");
2369 return false;
2355 } 2370 }
2356 mutex_unlock(&subsys->lock);
2357 2371
2358 return count; 2372 return true;
2359} 2373}
2360 2374
2361static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) 2375static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
@@ -2395,22 +2409,13 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2395 mutex_lock(&nvme_subsystems_lock); 2409 mutex_lock(&nvme_subsystems_lock);
2396 found = __nvme_find_get_subsystem(subsys->subnqn); 2410 found = __nvme_find_get_subsystem(subsys->subnqn);
2397 if (found) { 2411 if (found) {
2398 /*
2399 * Verify that the subsystem actually supports multiple
2400 * controllers, else bail out.
2401 */
2402 if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
2403 nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
2404 dev_err(ctrl->device,
2405 "ignoring ctrl due to duplicate subnqn (%s).\n",
2406 found->subnqn);
2407 nvme_put_subsystem(found);
2408 ret = -EINVAL;
2409 goto out_unlock;
2410 }
2411
2412 __nvme_release_subsystem(subsys); 2412 __nvme_release_subsystem(subsys);
2413 subsys = found; 2413 subsys = found;
2414
2415 if (!nvme_validate_cntlid(subsys, ctrl, id)) {
2416 ret = -EINVAL;
2417 goto out_put_subsystem;
2418 }
2414 } else { 2419 } else {
2415 ret = device_add(&subsys->dev); 2420 ret = device_add(&subsys->dev);
2416 if (ret) { 2421 if (ret) {
@@ -2422,23 +2427,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2422 list_add_tail(&subsys->entry, &nvme_subsystems); 2427 list_add_tail(&subsys->entry, &nvme_subsystems);
2423 } 2428 }
2424 2429
2425 ctrl->subsys = subsys;
2426 mutex_unlock(&nvme_subsystems_lock);
2427
2428 if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, 2430 if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
2429 dev_name(ctrl->device))) { 2431 dev_name(ctrl->device))) {
2430 dev_err(ctrl->device, 2432 dev_err(ctrl->device,
2431 "failed to create sysfs link from subsystem.\n"); 2433 "failed to create sysfs link from subsystem.\n");
2432 /* the transport driver will eventually put the subsystem */ 2434 goto out_put_subsystem;
2433 return -EINVAL;
2434 } 2435 }
2435 2436
2436 mutex_lock(&subsys->lock); 2437 ctrl->subsys = subsys;
2437 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); 2438 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
2438 mutex_unlock(&subsys->lock); 2439 mutex_unlock(&nvme_subsystems_lock);
2439
2440 return 0; 2440 return 0;
2441 2441
2442out_put_subsystem:
2443 nvme_put_subsystem(subsys);
2442out_unlock: 2444out_unlock:
2443 mutex_unlock(&nvme_subsystems_lock); 2445 mutex_unlock(&nvme_subsystems_lock);
2444 put_device(&subsys->dev); 2446 put_device(&subsys->dev);
@@ -3605,19 +3607,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
3605{ 3607{
3606 u32 aer_notice_type = (result & 0xff00) >> 8; 3608 u32 aer_notice_type = (result & 0xff00) >> 8;
3607 3609
3610 trace_nvme_async_event(ctrl, aer_notice_type);
3611
3608 switch (aer_notice_type) { 3612 switch (aer_notice_type) {
3609 case NVME_AER_NOTICE_NS_CHANGED: 3613 case NVME_AER_NOTICE_NS_CHANGED:
3610 trace_nvme_async_event(ctrl, aer_notice_type);
3611 set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); 3614 set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
3612 nvme_queue_scan(ctrl); 3615 nvme_queue_scan(ctrl);
3613 break; 3616 break;
3614 case NVME_AER_NOTICE_FW_ACT_STARTING: 3617 case NVME_AER_NOTICE_FW_ACT_STARTING:
3615 trace_nvme_async_event(ctrl, aer_notice_type);
3616 queue_work(nvme_wq, &ctrl->fw_act_work); 3618 queue_work(nvme_wq, &ctrl->fw_act_work);
3617 break; 3619 break;
3618#ifdef CONFIG_NVME_MULTIPATH 3620#ifdef CONFIG_NVME_MULTIPATH
3619 case NVME_AER_NOTICE_ANA: 3621 case NVME_AER_NOTICE_ANA:
3620 trace_nvme_async_event(ctrl, aer_notice_type);
3621 if (!ctrl->ana_log_buf) 3622 if (!ctrl->ana_log_buf)
3622 break; 3623 break;
3623 queue_work(nvme_wq, &ctrl->ana_work); 3624 queue_work(nvme_wq, &ctrl->ana_work);
@@ -3696,10 +3697,10 @@ static void nvme_free_ctrl(struct device *dev)
3696 __free_page(ctrl->discard_page); 3697 __free_page(ctrl->discard_page);
3697 3698
3698 if (subsys) { 3699 if (subsys) {
3699 mutex_lock(&subsys->lock); 3700 mutex_lock(&nvme_subsystems_lock);
3700 list_del(&ctrl->subsys_entry); 3701 list_del(&ctrl->subsys_entry);
3701 mutex_unlock(&subsys->lock);
3702 sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device)); 3702 sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device));
3703 mutex_unlock(&nvme_subsystems_lock);
3703 } 3704 }
3704 3705
3705 ctrl->ops->free_ctrl(ctrl); 3706 ctrl->ops->free_ctrl(ctrl);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 592d1e61ef7e..5838f7cd53ac 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -978,7 +978,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
978 NVMF_OPT_DISABLE_SQFLOW) 978 NVMF_OPT_DISABLE_SQFLOW)
979 979
980static struct nvme_ctrl * 980static struct nvme_ctrl *
981nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) 981nvmf_create_ctrl(struct device *dev, const char *buf)
982{ 982{
983 struct nvmf_ctrl_options *opts; 983 struct nvmf_ctrl_options *opts;
984 struct nvmf_transport_ops *ops; 984 struct nvmf_transport_ops *ops;
@@ -1073,7 +1073,7 @@ static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
1073 goto out_unlock; 1073 goto out_unlock;
1074 } 1074 }
1075 1075
1076 ctrl = nvmf_create_ctrl(nvmf_device, buf, count); 1076 ctrl = nvmf_create_ctrl(nvmf_device, buf);
1077 if (IS_ERR(ctrl)) { 1077 if (IS_ERR(ctrl)) {
1078 ret = PTR_ERR(ctrl); 1078 ret = PTR_ERR(ctrl);
1079 goto out_unlock; 1079 goto out_unlock;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9544eb60f725..dd8169bbf0d2 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -202,7 +202,7 @@ static LIST_HEAD(nvme_fc_lport_list);
202static DEFINE_IDA(nvme_fc_local_port_cnt); 202static DEFINE_IDA(nvme_fc_local_port_cnt);
203static DEFINE_IDA(nvme_fc_ctrl_cnt); 203static DEFINE_IDA(nvme_fc_ctrl_cnt);
204 204
205 205static struct workqueue_struct *nvme_fc_wq;
206 206
207/* 207/*
208 * These items are short-term. They will eventually be moved into 208 * These items are short-term. They will eventually be moved into
@@ -2054,7 +2054,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
2054 */ 2054 */
2055 if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { 2055 if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
2056 active = atomic_xchg(&ctrl->err_work_active, 1); 2056 active = atomic_xchg(&ctrl->err_work_active, 1);
2057 if (!active && !schedule_work(&ctrl->err_work)) { 2057 if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) {
2058 atomic_set(&ctrl->err_work_active, 0); 2058 atomic_set(&ctrl->err_work_active, 0);
2059 WARN_ON(1); 2059 WARN_ON(1);
2060 } 2060 }
@@ -3399,6 +3399,10 @@ static int __init nvme_fc_init_module(void)
3399{ 3399{
3400 int ret; 3400 int ret;
3401 3401
3402 nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0);
3403 if (!nvme_fc_wq)
3404 return -ENOMEM;
3405
3402 /* 3406 /*
3403 * NOTE: 3407 * NOTE:
3404 * It is expected that in the future the kernel will combine 3408 * It is expected that in the future the kernel will combine
@@ -3416,7 +3420,7 @@ static int __init nvme_fc_init_module(void)
3416 ret = class_register(&fc_class); 3420 ret = class_register(&fc_class);
3417 if (ret) { 3421 if (ret) {
3418 pr_err("couldn't register class fc\n"); 3422 pr_err("couldn't register class fc\n");
3419 return ret; 3423 goto out_destroy_wq;
3420 } 3424 }
3421 3425
3422 /* 3426 /*
@@ -3440,6 +3444,9 @@ out_destroy_device:
3440 device_destroy(&fc_class, MKDEV(0, 0)); 3444 device_destroy(&fc_class, MKDEV(0, 0));
3441out_destroy_class: 3445out_destroy_class:
3442 class_unregister(&fc_class); 3446 class_unregister(&fc_class);
3447out_destroy_wq:
3448 destroy_workqueue(nvme_fc_wq);
3449
3443 return ret; 3450 return ret;
3444} 3451}
3445 3452
@@ -3456,6 +3463,7 @@ static void __exit nvme_fc_exit_module(void)
3456 3463
3457 device_destroy(&fc_class, MKDEV(0, 0)); 3464 device_destroy(&fc_class, MKDEV(0, 0));
3458 class_unregister(&fc_class); 3465 class_unregister(&fc_class);
3466 destroy_workqueue(nvme_fc_wq);
3459} 3467}
3460 3468
3461module_init(nvme_fc_init_module); 3469module_init(nvme_fc_init_module);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 949e29e1d782..4f20a10b39d3 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -977,6 +977,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
977 geo->csecs = 1 << ns->lba_shift; 977 geo->csecs = 1 << ns->lba_shift;
978 geo->sos = ns->ms; 978 geo->sos = ns->ms;
979 geo->ext = ns->ext; 979 geo->ext = ns->ext;
980 geo->mdts = ns->ctrl->max_hw_sectors;
980 981
981 dev->q = q; 982 dev->q = q;
982 memcpy(dev->name, disk_name, DISK_NAME_LEN); 983 memcpy(dev->name, disk_name, DISK_NAME_LEN);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5c9429d41120..499acf07d61a 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -31,7 +31,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
31 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); 31 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
32 } else if (ns->head->disk) { 32 } else if (ns->head->disk) {
33 sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, 33 sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
34 ctrl->cntlid, ns->head->instance); 34 ctrl->instance, ns->head->instance);
35 *flags = GENHD_FL_HIDDEN; 35 *flags = GENHD_FL_HIDDEN;
36 } else { 36 } else {
37 sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, 37 sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3e4fb891a95a..2a8708c9ac18 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1296,6 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
1296 switch (dev->ctrl.state) { 1296 switch (dev->ctrl.state) {
1297 case NVME_CTRL_DELETING: 1297 case NVME_CTRL_DELETING:
1298 shutdown = true; 1298 shutdown = true;
1299 /* fall through */
1299 case NVME_CTRL_CONNECTING: 1300 case NVME_CTRL_CONNECTING:
1300 case NVME_CTRL_RESETTING: 1301 case NVME_CTRL_RESETTING:
1301 dev_warn_ratelimited(dev->ctrl.device, 1302 dev_warn_ratelimited(dev->ctrl.device,
@@ -2280,8 +2281,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
2280 return ret; 2281 return ret;
2281 } 2282 }
2282 dev->ctrl.tagset = &dev->tagset; 2283 dev->ctrl.tagset = &dev->tagset;
2283
2284 nvme_dbbuf_set(dev);
2285 } else { 2284 } else {
2286 blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); 2285 blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
2287 2286
@@ -2289,6 +2288,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
2289 nvme_free_queues(dev, dev->online_queues); 2288 nvme_free_queues(dev, dev->online_queues);
2290 } 2289 }
2291 2290
2291 nvme_dbbuf_set(dev);
2292 return 0; 2292 return 0;
2293} 2293}
2294 2294
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index e1824c2e0a1c..f383146e7d0f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -697,15 +697,6 @@ out_free_queues:
697 return ret; 697 return ret;
698} 698}
699 699
700static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
701 struct blk_mq_tag_set *set)
702{
703 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
704
705 blk_mq_free_tag_set(set);
706 nvme_rdma_dev_put(ctrl->device);
707}
708
709static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, 700static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
710 bool admin) 701 bool admin)
711{ 702{
@@ -744,24 +735,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
744 735
745 ret = blk_mq_alloc_tag_set(set); 736 ret = blk_mq_alloc_tag_set(set);
746 if (ret) 737 if (ret)
747 goto out; 738 return ERR_PTR(ret);
748
749 /*
750 * We need a reference on the device as long as the tag_set is alive,
751 * as the MRs in the request structures need a valid ib_device.
752 */
753 ret = nvme_rdma_dev_get(ctrl->device);
754 if (!ret) {
755 ret = -EINVAL;
756 goto out_free_tagset;
757 }
758 739
759 return set; 740 return set;
760
761out_free_tagset:
762 blk_mq_free_tag_set(set);
763out:
764 return ERR_PTR(ret);
765} 741}
766 742
767static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, 743static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
@@ -769,7 +745,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
769{ 745{
770 if (remove) { 746 if (remove) {
771 blk_cleanup_queue(ctrl->ctrl.admin_q); 747 blk_cleanup_queue(ctrl->ctrl.admin_q);
772 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 748 blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
773 } 749 }
774 if (ctrl->async_event_sqe.data) { 750 if (ctrl->async_event_sqe.data) {
775 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 751 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
@@ -847,7 +823,7 @@ out_cleanup_queue:
847 blk_cleanup_queue(ctrl->ctrl.admin_q); 823 blk_cleanup_queue(ctrl->ctrl.admin_q);
848out_free_tagset: 824out_free_tagset:
849 if (new) 825 if (new)
850 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); 826 blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
851out_free_async_qe: 827out_free_async_qe:
852 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 828 nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
853 sizeof(struct nvme_command), DMA_TO_DEVICE); 829 sizeof(struct nvme_command), DMA_TO_DEVICE);
@@ -862,7 +838,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
862{ 838{
863 if (remove) { 839 if (remove) {
864 blk_cleanup_queue(ctrl->ctrl.connect_q); 840 blk_cleanup_queue(ctrl->ctrl.connect_q);
865 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 841 blk_mq_free_tag_set(ctrl->ctrl.tagset);
866 } 842 }
867 nvme_rdma_free_io_queues(ctrl); 843 nvme_rdma_free_io_queues(ctrl);
868} 844}
@@ -903,7 +879,7 @@ out_cleanup_connect_q:
903 blk_cleanup_queue(ctrl->ctrl.connect_q); 879 blk_cleanup_queue(ctrl->ctrl.connect_q);
904out_free_tag_set: 880out_free_tag_set:
905 if (new) 881 if (new)
906 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); 882 blk_mq_free_tag_set(ctrl->ctrl.tagset);
907out_free_io_queues: 883out_free_io_queues:
908 nvme_rdma_free_io_queues(ctrl); 884 nvme_rdma_free_io_queues(ctrl);
909 return ret; 885 return ret;
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index 97d3c77365b8..e71502d141ed 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -167,6 +167,7 @@ TRACE_EVENT(nvme_async_event,
167 aer_name(NVME_AER_NOTICE_NS_CHANGED), 167 aer_name(NVME_AER_NOTICE_NS_CHANGED),
168 aer_name(NVME_AER_NOTICE_ANA), 168 aer_name(NVME_AER_NOTICE_ANA),
169 aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), 169 aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
170 aer_name(NVME_AER_NOTICE_DISC_CHANGED),
170 aer_name(NVME_AER_ERROR), 171 aer_name(NVME_AER_ERROR),
171 aer_name(NVME_AER_SMART), 172 aer_name(NVME_AER_SMART),
172 aer_name(NVME_AER_CSS), 173 aer_name(NVME_AER_CSS),
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index f89f9d02e788..c09039eea707 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -3827,7 +3827,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
3827 if ((start_padding_sectors || end_padding_sectors) && 3827 if ((start_padding_sectors || end_padding_sectors) &&
3828 (rq_data_dir(req) == WRITE)) { 3828 (rq_data_dir(req) == WRITE)) {
3829 DBF_DEV_EVENT(DBF_ERR, basedev, 3829 DBF_DEV_EVENT(DBF_ERR, basedev,
3830 "raw write not track aligned (%lu,%lu) req %p", 3830 "raw write not track aligned (%llu,%llu) req %p",
3831 start_padding_sectors, end_padding_sectors, req); 3831 start_padding_sectors, end_padding_sectors, req);
3832 return ERR_PTR(-EINVAL); 3832 return ERR_PTR(-EINVAL);
3833 } 3833 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 5d865a5d5cdc..4d0d5655c7b2 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -358,6 +358,7 @@ struct nvm_geo {
358 u16 csecs; /* sector size */ 358 u16 csecs; /* sector size */
359 u16 sos; /* out-of-band area size */ 359 u16 sos; /* out-of-band area size */
360 bool ext; /* metadata in extended data buffer */ 360 bool ext; /* metadata in extended data buffer */
361 u32 mdts; /* Max data transfer size*/
361 362
362 /* device write constrains */ 363 /* device write constrains */
363 u32 ws_min; /* minimum write size */ 364 u32 ws_min; /* minimum write size */
@@ -427,6 +428,7 @@ struct nvm_dev {
427 char name[DISK_NAME_LEN]; 428 char name[DISK_NAME_LEN];
428 void *private_data; 429 void *private_data;
429 430
431 struct kref ref;
430 void *rmap; 432 void *rmap;
431 433
432 struct mutex mlock; 434 struct mutex mlock;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c40720cb59ac..8028adacaff3 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1246,9 +1246,9 @@ enum {
1246 NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, 1246 NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110,
1247 NVME_SC_FW_NEEDS_RESET = 0x111, 1247 NVME_SC_FW_NEEDS_RESET = 0x111,
1248 NVME_SC_FW_NEEDS_MAX_TIME = 0x112, 1248 NVME_SC_FW_NEEDS_MAX_TIME = 0x112,
1249 NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, 1249 NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113,
1250 NVME_SC_OVERLAPPING_RANGE = 0x114, 1250 NVME_SC_OVERLAPPING_RANGE = 0x114,
1251 NVME_SC_NS_INSUFFICENT_CAP = 0x115, 1251 NVME_SC_NS_INSUFFICIENT_CAP = 0x115,
1252 NVME_SC_NS_ID_UNAVAILABLE = 0x116, 1252 NVME_SC_NS_ID_UNAVAILABLE = 0x116,
1253 NVME_SC_NS_ALREADY_ATTACHED = 0x118, 1253 NVME_SC_NS_ALREADY_ATTACHED = 0x118,
1254 NVME_SC_NS_IS_PRIVATE = 0x119, 1254 NVME_SC_NS_IS_PRIVATE = 0x119,