From 900148296b78c61aa8c443dc594c0da968c3be53 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:50 +0200 Subject: lightnvm: prevent target type module removal when in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If target type module e.g. pblk here is unloaded (rmmod) while module is in use (after creating target) system crashes. We fix this by using module API refcnt. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 1b0f61233c21..6df65d14a2c5 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1044,6 +1044,7 @@ static struct nvm_tgt_type tt_pblk = { .sysfs_init = pblk_sysfs_init, .sysfs_exit = pblk_sysfs_exit, + .owner = THIS_MODULE, }; static int __init pblk_module_init(void) -- cgit v1.2.2 From c9d84b350f9b253872fc24f4dcfea166c884ee15 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:54 +0200 Subject: lightnvm: pblk: fix error path in pblk_lines_alloc_metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use appropriate memory free calls based on allocation type used and also fix number of times free is called if kmalloc fails. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 6df65d14a2c5..05665a7e648c 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -630,7 +630,10 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk) fail_free_emeta: while (--i >= 0) { - vfree(l_mg->eline_meta[i]->buf); + if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META) + vfree(l_mg->eline_meta[i]->buf); + else + kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } -- cgit v1.2.2 From 4e76af53e132bff9e2b94f018457fadabf5ab419 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:57 +0200 Subject: lightnvm: pblk: fix message if L2P MAP is in device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This usually happens if we are developing with qemu and ll2pmode has default value. Improve description. Signed-off-by: Rakesh Pandit Reviewed-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 05665a7e648c..8c85779e9635 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -914,7 +914,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, int ret; if (dev->identity.dom & NVM_RSP_L2P) { - pr_err("pblk: device-side L2P table not supported. (%x)\n", + pr_err("pblk: host-side L2P table not supported. (%x)\n", dev->identity.dom); return ERR_PTR(-EINVAL); } -- cgit v1.2.2 From a1121176ff757e3c073490a69608ea0b18a00ec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:01 +0200 Subject: lightnvm: pblk: initialize debug stat counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize the stat counter for garbage collected reads. Fixes: a4bd217b43268 ("lightnvm: physical block device (pblk) target") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 8c85779e9635..83445115a922 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -947,6 +947,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_gc_writes, 0); + atomic_long_set(&pblk->recov_gc_reads, 0); #endif atomic_long_set(&pblk->read_failed, 0); -- cgit v1.2.2 From da67e68fb9d37fb9072b20cc75d4337a73bc01b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:05 +0200 Subject: lightnvm: pblk: avoid deadlock on low LUN config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On low LUN configurations, make sure not to send bios that are bigger than the buffer size. Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 83445115a922..eee4eeb47d07 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -46,7 +46,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, * user I/Os. Unless stalled, the rate limiter leaves at least 256KB * available for user I/O. */ - if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) blk_queue_split(q, &bio); return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); -- cgit v1.2.2 From bd432417681a224d9fa4a9d43be7d4edc82135b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:06 +0200 Subject: lightnvm: pblk: fix min size for page mempool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk uses an internal page mempool for allocating pages on internal bios. The main two users of this memory pool are partial reads (reads with some sectors in cache and some on media) and padded writes, which need to add dummy pages to an existing bio already containing valid data (and with a large enough bioset allocated). In both cases, the maximum number of pages per bio is defined by the maximum number of physical sectors supported by the underlying device. This patch fixes a bad mempool allocation, where the min_nr of elements on the pool was fixed (to 16), which is lower than the maximum number of sectors supported by NVMe (as of the time for this patch). Instead, use the maximum number of allowed sectors reported by the device. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index eee4eeb47d07..7b1f29c71338 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -132,7 +132,6 @@ static int pblk_rwb_init(struct pblk *pblk) } /* Minimum pages needed within a lun */ -#define PAGE_POOL_SIZE 16 #define ADDR_POOL_SIZE 64 static int pblk_set_ppaf(struct pblk *pblk) @@ -247,14 +246,16 @@ static int pblk_core_init(struct pblk *pblk) if (pblk_init_global_caches(pblk)) return -ENOMEM; - pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); - if (!pblk->page_pool) + /* internal bios can be at most the sectors signaled by the device. */ + pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), + 0); + if (!pblk->page_bio_pool) return -ENOMEM; pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE, pblk_blk_ws_cache); if (!pblk->line_ws_pool) - goto free_page_pool; + goto free_page_bio_pool; pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); if (!pblk->rec_pool) @@ -309,8 +310,8 @@ free_rec_pool: mempool_destroy(pblk->rec_pool); free_blk_ws_pool: mempool_destroy(pblk->line_ws_pool); -free_page_pool: - mempool_destroy(pblk->page_pool); +free_page_bio_pool: + mempool_destroy(pblk->page_bio_pool); return -ENOMEM; } @@ -322,7 +323,7 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); - mempool_destroy(pblk->page_pool); + mempool_destroy(pblk->page_bio_pool); mempool_destroy(pblk->line_ws_pool); mempool_destroy(pblk->rec_pool); mempool_destroy(pblk->g_rq_pool); -- cgit v1.2.2 From b84ae4a8b883b96b95fff0e3979ff2c65bbf96b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:07 +0200 Subject: lightnvm: pblk: simplify work_queue mempool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In pblk, we have a mempool to allocate a generic structure that we pass along workqueues. This is heavily used in the GC path in order to have enough inflight reads and fully utilize the GC bandwidth. However, the current GC path copies data to the host memory and puts it back into the write buffer. This requires a vmalloc allocation for the data and a memory copy. Thus, guaranteeing the allocation by using a mempool for the structure in itself does not give us much. Until we implement support for vector copy to avoid moving data through the host, just allocate the workqueue structure using kmalloc. This allows us to have a much smaller mempool. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 7b1f29c71338..340552253580 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -20,7 +20,7 @@ #include "pblk.h" -static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, +static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, *pblk_w_rq_cache, *pblk_line_meta_cache; static DECLARE_RWSEM(pblk_lock); struct bio_set *pblk_bio_set; @@ -184,9 +184,9 @@ static int pblk_init_global_caches(struct pblk *pblk) char cache_name[PBLK_CACHE_NAME_LEN]; down_write(&pblk_lock); - pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", + pblk_ws_cache = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_blk_ws_cache) { + if (!pblk_ws_cache) { up_write(&pblk_lock); return -ENOMEM; } @@ -194,7 +194,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_rec_cache = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); up_write(&pblk_lock); return -ENOMEM; } @@ -202,7 +202,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); up_write(&pblk_lock); return -ENOMEM; @@ -211,7 +211,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); up_write(&pblk_lock); @@ -223,7 +223,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_line_meta_cache = kmem_cache_create(cache_name, pblk->lm.sec_bitmap_len, 0, 0, NULL); if (!pblk_line_meta_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); kmem_cache_destroy(pblk_w_rq_cache); @@ -246,20 +246,20 @@ static int pblk_core_init(struct pblk *pblk) if (pblk_init_global_caches(pblk)) return -ENOMEM; - /* internal bios can be at most the sectors signaled by the device. */ + /* Internal bios can be at most the sectors signaled by the device. */ pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), 0); if (!pblk->page_bio_pool) return -ENOMEM; - pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE, - pblk_blk_ws_cache); - if (!pblk->line_ws_pool) + pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, + pblk_ws_cache); + if (!pblk->gen_ws_pool) goto free_page_bio_pool; pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); if (!pblk->rec_pool) - goto free_blk_ws_pool; + goto free_gen_ws_pool; pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE, pblk_g_rq_cache); @@ -308,8 +308,8 @@ free_g_rq_pool: mempool_destroy(pblk->g_rq_pool); free_rec_pool: mempool_destroy(pblk->rec_pool); -free_blk_ws_pool: - mempool_destroy(pblk->line_ws_pool); +free_gen_ws_pool: + mempool_destroy(pblk->gen_ws_pool); free_page_bio_pool: mempool_destroy(pblk->page_bio_pool); return -ENOMEM; @@ -324,13 +324,13 @@ static void pblk_core_free(struct pblk *pblk) destroy_workqueue(pblk->bb_wq); mempool_destroy(pblk->page_bio_pool); - mempool_destroy(pblk->line_ws_pool); + mempool_destroy(pblk->gen_ws_pool); mempool_destroy(pblk->rec_pool); mempool_destroy(pblk->g_rq_pool); mempool_destroy(pblk->w_rq_pool); mempool_destroy(pblk->line_meta_pool); - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); kmem_cache_destroy(pblk_w_rq_cache); -- cgit v1.2.2 From 0d880398cb6254ab3e110e2a8a659da65a56ffee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:08 +0200 Subject: lightnvm: pblk: decouple read/erase mempools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since read and erase paths offer different guarantees for inflight I/Os, separate the mempools to set the right min_nr for each on creation. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 340552253580..2f8d3f9ffbaf 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -261,15 +261,20 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->rec_pool) goto free_gen_ws_pool; - pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE, + pblk->r_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_g_rq_cache); - if (!pblk->g_rq_pool) + if (!pblk->r_rq_pool) goto free_rec_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2, + pblk->e_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk_g_rq_cache); + if (!pblk->e_rq_pool) + goto free_r_rq_pool; + + pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_w_rq_cache); if (!pblk->w_rq_pool) - goto free_g_rq_pool; + goto free_e_rq_pool; pblk->line_meta_pool = mempool_create_slab_pool(PBLK_META_POOL_SIZE, @@ -304,8 +309,10 @@ free_line_meta_pool: mempool_destroy(pblk->line_meta_pool); free_w_rq_pool: mempool_destroy(pblk->w_rq_pool); -free_g_rq_pool: - mempool_destroy(pblk->g_rq_pool); +free_e_rq_pool: + mempool_destroy(pblk->e_rq_pool); +free_r_rq_pool: + mempool_destroy(pblk->r_rq_pool); free_rec_pool: mempool_destroy(pblk->rec_pool); free_gen_ws_pool: @@ -326,7 +333,8 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->page_bio_pool); mempool_destroy(pblk->gen_ws_pool); mempool_destroy(pblk->rec_pool); - mempool_destroy(pblk->g_rq_pool); + mempool_destroy(pblk->r_rq_pool); + mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); mempool_destroy(pblk->line_meta_pool); -- cgit v1.2.2 From e72ec1d31bcb6dffe325418c6d96f2fcab7c2654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:09 +0200 Subject: lightnvm: pblk: do not use a mempool for line bitmaps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk holds two sector bitmaps: one to keep track of the mapped sectors while the line is active and another one to keep track of the invalid sectors. The latter is kept during the whole live of the line, until it is recycled. Since we cannot guarantee forward progress for the mempool in this case, get rid of the mempool and simply allocate memory through kmalloc. Reported-by: Jens Axboe Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 2f8d3f9ffbaf..4d719782f65b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -21,7 +21,7 @@ #include "pblk.h" static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache, *pblk_line_meta_cache; + *pblk_w_rq_cache; static DECLARE_RWSEM(pblk_lock); struct bio_set *pblk_bio_set; @@ -181,8 +181,6 @@ static int pblk_set_ppaf(struct pblk *pblk) static int pblk_init_global_caches(struct pblk *pblk) { - char cache_name[PBLK_CACHE_NAME_LEN]; - down_write(&pblk_lock); pblk_ws_cache = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); @@ -217,19 +215,6 @@ static int pblk_init_global_caches(struct pblk *pblk) up_write(&pblk_lock); return -ENOMEM; } - - snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", - pblk->disk->disk_name); - pblk_line_meta_cache = kmem_cache_create(cache_name, - pblk->lm.sec_bitmap_len, 0, 0, NULL); - if (!pblk_line_meta_cache) { - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } up_write(&pblk_lock); return 0; @@ -276,16 +261,10 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->w_rq_pool) goto free_e_rq_pool; - pblk->line_meta_pool = - mempool_create_slab_pool(PBLK_META_POOL_SIZE, - pblk_line_meta_cache); - if (!pblk->line_meta_pool) - goto free_w_rq_pool; - pblk->close_wq = alloc_workqueue("pblk-close-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS); if (!pblk->close_wq) - goto free_line_meta_pool; + goto free_w_rq_pool; pblk->bb_wq = alloc_workqueue("pblk-bb-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); @@ -305,8 +284,6 @@ free_bb_wq: destroy_workqueue(pblk->bb_wq); free_close_wq: destroy_workqueue(pblk->close_wq); -free_line_meta_pool: - mempool_destroy(pblk->line_meta_pool); free_w_rq_pool: mempool_destroy(pblk->w_rq_pool); free_e_rq_pool: @@ -336,13 +313,11 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->r_rq_pool); mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - mempool_destroy(pblk->line_meta_pool); kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); kmem_cache_destroy(pblk_w_rq_cache); - kmem_cache_destroy(pblk_line_meta_cache); } static void pblk_luns_free(struct pblk *pblk) -- cgit v1.2.2 From 7bd4d370db6090004a06deb526f0f01fa99a3f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:23 +0200 Subject: lightnvm: pblk: guarantee line integrity on reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a line is recycled during garbage collection, reads can still be issued to the line. If the line is freed in the middle of this process, data corruption might occur. This patch guarantees that lines are not freed in the middle of reads that target them (lines). Specifically, we use the existing line reference to decide when a line is eligible for being freed after the recycle process. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 4d719782f65b..34527646c01b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -271,15 +271,22 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->bb_wq) goto free_close_wq; - if (pblk_set_ppaf(pblk)) + pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (!pblk->r_end_wq) goto free_bb_wq; + if (pblk_set_ppaf(pblk)) + goto free_r_end_wq; + if (pblk_rwb_init(pblk)) - goto free_bb_wq; + goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); return 0; +free_r_end_wq: + destroy_workqueue(pblk->r_end_wq); free_bb_wq: destroy_workqueue(pblk->bb_wq); free_close_wq: @@ -304,6 +311,9 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->close_wq) destroy_workqueue(pblk->close_wq); + if (pblk->r_end_wq) + destroy_workqueue(pblk->r_end_wq); + if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); -- cgit v1.2.2 From 21d2287119e843929c29fb1adbd271bde1fac7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:26 +0200 Subject: lightnvm: pblk: enable 1 LUN configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metadata I/Os are scheduled to minimize their impact on user data I/Os. When there are enough LUNs instantiated (i.e., enough bandwidth), it is easy to interleave metadata and data one after the other so that metadata I/Os are the ones being blocked and not vice-versa. We do this by calculating the distance between the I/Os in terms of the LUNs that are not in used, and selecting a free LUN that satisfies a the simple heuristic that metadata is scheduled behind. The per-LUN semaphores guarantee consistency. This works fine on >1 LUN configuration. However, when a single LUN is instantiated, this design leads to a deadlock, where metadata waits to be scheduled on a free LUN. This patch implements the 1 LUN case by simply scheduling the metadada I/O after the data I/O. In the process, we refactor the way a line is replaced to ensure that metadata writes are submitted after data writes in order to guarantee block sequentiality. Note that, since there is only one LUN, both I/Os will block each other by design. However, such configuration only pursues tight read latencies, not write bandwidth. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 34527646c01b..3c3749186053 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -711,8 +711,12 @@ add_emeta_page: } lm->emeta_bb = geo->nr_luns - i; - lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0], - geo->sec_per_blk); + + lm->min_blk_line = 1; + if (geo->nr_luns > 1) + lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + + lm->emeta_sec[0], geo->sec_per_blk); + if (lm->min_blk_line > lm->blk_per_line) { pr_err("pblk: config. not supported. Min. LUN in line:%d\n", lm->blk_per_line); -- cgit v1.2.2 From e6b754c252bacebdfbe3c57e790431ab8f445d1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:27 +0200 Subject: lightnvm: pblk: ensure right bad block calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that the variable controlling block threshold for allocating extra metadata sectors in case of a line with bad blocks does not get a negative value. Otherwise, the line will be marked as corrupted and wasted. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 3c3749186053..c7239c41ba40 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -710,7 +710,7 @@ add_emeta_page: goto add_emeta_page; } - lm->emeta_bb = geo->nr_luns - i; + lm->emeta_bb = geo->nr_luns > i ? geo->nr_luns - i : 0; lm->min_blk_line = 1; if (geo->nr_luns > 1) -- cgit v1.2.2 From 27b978725d895e704aab44b99242a0514485d798 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:28 +0200 Subject: lightnvm: pblk: fix changing GC group list for a line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pblk_line_gc_list seems to had a bug since the introduction of pblk in getting GC list for a line. In b20ba1bc7 while redesigning the GC algorithm, the naming for the GC thresholds was altered, but the values for high_thrs and mid_thrs were not. The result is that when moving to the GC lists, the mid threshold is never evaluated. Fixes: a4bd217b4("lightnvm: physical block device (pblk) target") Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index c7239c41ba40..56ece7dfac0e 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -678,8 +678,8 @@ static int pblk_lines_init(struct pblk *pblk) lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); - lm->high_thrs = lm->sec_per_line / 2; - lm->mid_thrs = lm->sec_per_line / 4; + lm->mid_thrs = lm->sec_per_line / 2; + lm->high_thrs = lm->sec_per_line / 4; lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs; /* Calculate necessary pages for smeta. See comment over struct -- cgit v1.2.2 From 22a4e061ea11cc754785a12b3ba5f3e135bc0c63 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:33 +0200 Subject: lightnvm: pblk: fix releases of kmem cache in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pblk_core_init fails lets destroy all global caches. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 56ece7dfac0e..2e599738372d 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -220,6 +220,14 @@ static int pblk_init_global_caches(struct pblk *pblk) return 0; } +static void pblk_free_global_caches(struct pblk *pblk) +{ + kmem_cache_destroy(pblk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_g_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); +} + static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -235,7 +243,7 @@ static int pblk_core_init(struct pblk *pblk) pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), 0); if (!pblk->page_bio_pool) - return -ENOMEM; + goto free_global_caches; pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, pblk_ws_cache); @@ -303,6 +311,8 @@ free_gen_ws_pool: mempool_destroy(pblk->gen_ws_pool); free_page_bio_pool: mempool_destroy(pblk->page_bio_pool); +free_global_caches: + pblk_free_global_caches(pblk); return -ENOMEM; } @@ -324,10 +334,7 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - kmem_cache_destroy(pblk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); + pblk_free_global_caches(pblk); } static void pblk_luns_free(struct pblk *pblk) -- cgit v1.2.2 From 3e3a5b8ebd5d3b1d68facc58b0674a2564653222 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:34 +0200 Subject: lightnvm: pblk: prevent gc kicks when gc is not operational MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GC can be kicked after it has been shut down when closing the last line during exit, resulting in accesses to freed structures. Make sure that GC is not triggered while it is not operational. Also make sure that GC won't be re-activated during exit when running on another processor by using timer_del_sync. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 2e599738372d..27eb430958ff 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -931,6 +931,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + pblk->gc.gc_enabled = 0; spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); -- cgit v1.2.2 From 37ce33d5756f4ba8bdd45371a1918ceeeba5b158 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:36 +0200 Subject: lightnvm: pblk: free full lines during recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When rebuilding the L2P table, any full lines (lines without any valid sectors) will be identified. If these lines are not freed, we risk not being able to allocate the first data line. This patch refactors the part of GC that frees empty lines into a separate function and adds a call to this after the L2P table has been rebuilt. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 27eb430958ff..f08fa2083fbc 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -508,6 +508,9 @@ static int pblk_lines_configure(struct pblk *pblk, int flags) } } + /* Free full lines directly as GC has not been started yet */ + pblk_gc_free_full_lines(pblk); + if (!line) { /* Configure next line for user data */ line = pblk_line_get_first_data(pblk); -- cgit v1.2.2 From 03661b5f756c92b9924869334a2afa19753c4fe7 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:37 +0200 Subject: lightnvm: pblk: start gc if needed during init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start GC if needed, directly after init, as we might need to garbage collect in order to make room for user writes. Create a helper function that allows to kick GC without exposing the internals of the GC/rate-limiter interaction. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index f08fa2083fbc..ad9f014a086b 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -1025,6 +1025,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->rwb.nr_entries); wake_up_process(pblk->writer_ts); + + /* Check if we need to start GC */ + pblk_gc_should_kick(pblk); + return pblk; fail_stop_writer: -- cgit v1.2.2 From c55861926a78bf129e06bd3372b34225f4968757 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:40 +0200 Subject: lightnvm: pblk: add l2p crc debug printouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Print the CRC of the logical-to-physical mapping during exit and after recovering the L2P table to facilitate detection of meta data corruption/recovery issues. The CRC printed after recovery should match the CRC printed during the previous exit - if it doesn't this indicates that either the meta data written to the disk is corrupt or recovery failed. Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index ad9f014a086b..52c85f4f672d 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -76,6 +76,28 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static size_t pblk_trans_map_size(struct pblk *pblk) +{ + int entry_size = 8; + + if (pblk->ppaf_bitsize < 32) + entry_size = 4; + + return entry_size * pblk->rl.nr_secs; +} + +#ifdef CONFIG_NVM_DEBUG +static u32 pblk_l2p_crc(struct pblk *pblk) +{ + size_t map_size; + u32 crc = ~(u32)0; + + map_size = pblk_trans_map_size(pblk); + crc = crc32_le(crc, pblk->trans_map, map_size); + return crc; +} +#endif + static void pblk_l2p_free(struct pblk *pblk) { vfree(pblk->trans_map); @@ -85,12 +107,10 @@ static int pblk_l2p_init(struct pblk *pblk) { sector_t i; struct ppa_addr ppa; - int entry_size = 8; - - if (pblk->ppaf_bitsize < 32) - entry_size = 4; + size_t map_size; - pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); + map_size = pblk_trans_map_size(pblk); + pblk->trans_map = vmalloc(map_size); if (!pblk->trans_map) return -ENOMEM; @@ -508,6 +528,10 @@ static int pblk_lines_configure(struct pblk *pblk, int flags) } } +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + /* Free full lines directly as GC has not been started yet */ pblk_gc_free_full_lines(pblk); @@ -901,6 +925,11 @@ static void pblk_exit(void *private) down_write(&pblk_lock); pblk_gc_exit(pblk); pblk_tear_down(pblk); + +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + pblk_free(pblk); up_write(&pblk_lock); } -- cgit v1.2.2 From 28bd109411eaa4c541f2e240d1285c154de4dfb7 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 13 Oct 2017 14:46:43 +0200 Subject: lightnvm: pblk: remove spinlock when freeing line metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep complains about being in atomic context while freeing line metadata - and rightly so as we take a spinlock and end up calling vfree that might sleep(in pblk_mfree). There is no need for holding the line manager free_lock while freeing line metadata as the pipeline as stopped, so remove the lock. Fixes: 588726d3ec68 ("lightnvm: pblk: fail gracefully on irrec. error") Signed-off-by: Hans Holmberg Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/pblk-init.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/lightnvm/pblk-init.c') diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 52c85f4f672d..f62112ba5482 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -393,13 +393,11 @@ static void pblk_line_meta_free(struct pblk *pblk) kfree(l_mg->bb_aux); kfree(l_mg->vsc_list); - spin_lock(&l_mg->free_lock); for (i = 0; i < PBLK_DATA_LINES; i++) { kfree(l_mg->sline_meta[i]); pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } - spin_unlock(&l_mg->free_lock); kfree(pblk->lines); } -- cgit v1.2.2