aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJavier González <jg@lightnvm.io>2017-04-15 14:55:50 -0400
committerJens Axboe <axboe@fb.com>2017-04-16 12:06:33 -0400
commita4bd217b432685d6a177c28a2af187f041c473b7 (patch)
tree3670d0322655bdef412c415e04c8515e865c1e37
parent6eb082452df1218e9c0ce1168c456f839ce5acb2 (diff)
lightnvm: physical block device (pblk) target
This patch introduces pblk, a host-side translation layer for Open-Channel SSDs to expose them like block devices. The translation layer allows data placement decisions, and I/O scheduling to be managed by the host, enabling users to optimize the SSD for their specific workloads. An open-channel SSD has a set of LUNs (parallel units) and a collection of blocks. Each block can be read in any order, but writes must be sequential. Writes may also fail, and if a block requires it, must also be reset before new writes can be applied. To manage the constraints, pblk maintains a logical to physical address (L2P) table, write cache, garbage collection logic, recovery scheme, and logic to rate-limit user I/Os versus garbage collection I/Os. The L2P table is fully-associative and manages sectors at a 4KB granularity. Pblk stores the L2P table in two places, in the out-of-band area of the media and on the last page of a line. In the cause of a power failure, pblk will perform a scan to recover the L2P table. The user data is organized into lines. A line is data striped across blocks and LUNs. The lines enable the host to reduce the amount of metadata to maintain besides the user data and makes it easier to implement RAID or erasure coding in the future. pblk implements multi-tenant support and can be instantiated multiple times on the same drive. Each instance owns a portion of the SSD - both regarding I/O bandwidth and capacity - providing I/O isolation for each case. Finally, pblk also exposes a sysfs interface that allows user-space to peek into the internals of pblk. The interface is available at /dev/block/*/pblk/ where * is the block device name exposed. This work also contains contributions from: Matias Bjørling <matias@cnexlabs.com> Simon A. F. Lund <slund@cnexlabs.com> Young Tack Jin <youngtack.jin@gmail.com> Huaicheng Li <huaicheng@cs.uchicago.edu> Signed-off-by: Javier González <javier@cnexlabs.com> Signed-off-by: Matias Bjørling <matias@cnexlabs.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--Documentation/lightnvm/pblk.txt21
-rw-r--r--drivers/lightnvm/Kconfig9
-rw-r--r--drivers/lightnvm/Makefile5
-rw-r--r--drivers/lightnvm/pblk-cache.c114
-rw-r--r--drivers/lightnvm/pblk-core.c1655
-rw-r--r--drivers/lightnvm/pblk-gc.c555
-rw-r--r--drivers/lightnvm/pblk-init.c949
-rw-r--r--drivers/lightnvm/pblk-map.c136
-rw-r--r--drivers/lightnvm/pblk-rb.c852
-rw-r--r--drivers/lightnvm/pblk-read.c529
-rw-r--r--drivers/lightnvm/pblk-recovery.c998
-rw-r--r--drivers/lightnvm/pblk-rl.c182
-rw-r--r--drivers/lightnvm/pblk-sysfs.c507
-rw-r--r--drivers/lightnvm/pblk-write.c411
-rw-r--r--drivers/lightnvm/pblk.h1121
15 files changed, 8044 insertions, 0 deletions
diff --git a/Documentation/lightnvm/pblk.txt b/Documentation/lightnvm/pblk.txt
new file mode 100644
index 000000000000..1040ed1cec81
--- /dev/null
+++ b/Documentation/lightnvm/pblk.txt
@@ -0,0 +1,21 @@
1pblk: Physical Block Device Target
2==================================
3
4pblk implements a fully associative, host-based FTL that exposes a traditional
5block I/O interface. Its primary responsibilities are:
6
7 - Map logical addresses onto physical addresses (4KB granularity) in a
8 logical-to-physical (L2P) table.
9 - Maintain the integrity and consistency of the L2P table as well as its
10 recovery from normal tear down and power outage.
11 - Deal with controller- and media-specific constrains.
12 - Handle I/O errors.
13 - Implement garbage collection.
14 - Maintain consistency across the I/O stack during synchronization points.
15
16For more information please refer to:
17
18 http://lightnvm.io
19
20which maintains updated FAQs, manual pages, technical documentation, tools,
21contacts, etc.
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 052714106b7b..ead61a93cb4e 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -33,4 +33,13 @@ config NVM_RRPC
33 host. The target is implemented using a linear mapping table and 33 host. The target is implemented using a linear mapping table and
34 cost-based garbage collection. It is optimized for 4K IO sizes. 34 cost-based garbage collection. It is optimized for 4K IO sizes.
35 35
36config NVM_PBLK
37 tristate "Physical Block Device Open-Channel SSD target"
38 ---help---
39 Allows an open-channel SSD to be exposed as a block device to the
40 host. The target assumes the device exposes raw flash and must be
41 explicitly managed by the host.
42
43 Please note the disk format is considered EXPERIMENTAL for now.
44
36endif # NVM 45endif # NVM
diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile
index b2a39e2d2895..82d1a117fb27 100644
--- a/drivers/lightnvm/Makefile
+++ b/drivers/lightnvm/Makefile
@@ -4,3 +4,8 @@
4 4
5obj-$(CONFIG_NVM) := core.o 5obj-$(CONFIG_NVM) := core.o
6obj-$(CONFIG_NVM_RRPC) += rrpc.o 6obj-$(CONFIG_NVM_RRPC) += rrpc.o
7obj-$(CONFIG_NVM_PBLK) += pblk.o
8pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
9 pblk-write.o pblk-cache.o pblk-read.o \
10 pblk-gc.o pblk-recovery.o pblk-map.o \
11 pblk-rl.o pblk-sysfs.o
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
new file mode 100644
index 000000000000..59bcea88db84
--- /dev/null
+++ b/drivers/lightnvm/pblk-cache.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-cache.c - pblk's write cache
16 */
17
18#include "pblk.h"
19
20int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
21{
22 struct pblk_w_ctx w_ctx;
23 sector_t lba = pblk_get_lba(bio);
24 unsigned int bpos, pos;
25 int nr_entries = pblk_get_secs(bio);
26 int i, ret;
27
28 /* Update the write buffer head (mem) with the entries that we can
29 * write. The write in itself cannot fail, so there is no need to
30 * rollback from here on.
31 */
32retry:
33 ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
34 if (ret == NVM_IO_REQUEUE) {
35 io_schedule();
36 goto retry;
37 }
38
39 if (unlikely(!bio_has_data(bio)))
40 goto out;
41
42 w_ctx.flags = flags;
43 pblk_ppa_set_empty(&w_ctx.ppa);
44
45 for (i = 0; i < nr_entries; i++) {
46 void *data = bio_data(bio);
47
48 w_ctx.lba = lba + i;
49
50 pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
51 pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
52
53 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
54 }
55
56#ifdef CONFIG_NVM_DEBUG
57 atomic_long_add(nr_entries, &pblk->inflight_writes);
58 atomic_long_add(nr_entries, &pblk->req_writes);
59#endif
60
61out:
62 pblk_write_should_kick(pblk);
63 return ret;
64}
65
66/*
67 * On GC the incoming lbas are not necessarily sequential. Also, some of the
68 * lbas might not be valid entries, which are marked as empty by the GC thread
69 */
70int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
71 unsigned int nr_entries, unsigned int nr_rec_entries,
72 struct pblk_line *gc_line, unsigned long flags)
73{
74 struct pblk_w_ctx w_ctx;
75 unsigned int bpos, pos;
76 int i, valid_entries;
77
78 /* Update the write buffer head (mem) with the entries that we can
79 * write. The write in itself cannot fail, so there is no need to
80 * rollback from here on.
81 */
82retry:
83 if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) {
84 io_schedule();
85 goto retry;
86 }
87
88 w_ctx.flags = flags;
89 pblk_ppa_set_empty(&w_ctx.ppa);
90
91 for (i = 0, valid_entries = 0; i < nr_entries; i++) {
92 if (lba_list[i] == ADDR_EMPTY)
93 continue;
94
95 w_ctx.lba = lba_list[i];
96
97 pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
98 pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos);
99
100 data += PBLK_EXPOSED_PAGE_SIZE;
101 valid_entries++;
102 }
103
104 WARN_ONCE(nr_rec_entries != valid_entries,
105 "pblk: inconsistent GC write\n");
106
107#ifdef CONFIG_NVM_DEBUG
108 atomic_long_add(valid_entries, &pblk->inflight_writes);
109 atomic_long_add(valid_entries, &pblk->recov_gc_writes);
110#endif
111
112 pblk_write_should_kick(pblk);
113 return NVM_IO_OK;
114}
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
new file mode 100644
index 000000000000..a2bcd098babc
--- /dev/null
+++ b/drivers/lightnvm/pblk-core.c
@@ -0,0 +1,1655 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-core.c - pblk's core functionality
16 *
17 */
18
19#include "pblk.h"
20#include <linux/time.h>
21
22static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
23 struct ppa_addr *ppa)
24{
25 struct nvm_tgt_dev *dev = pblk->dev;
26 struct nvm_geo *geo = &dev->geo;
27 int pos = pblk_dev_ppa_to_pos(geo, *ppa);
28
29 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
30 atomic_long_inc(&pblk->erase_failed);
31
32 if (test_and_set_bit(pos, line->blk_bitmap))
33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
34 line->id, pos);
35
36 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
37}
38
39static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
40{
41 struct pblk_line *line;
42
43 line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
44 atomic_dec(&line->left_seblks);
45
46 if (rqd->error) {
47 struct ppa_addr *ppa;
48
49 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
50 if (!ppa)
51 return;
52
53 *ppa = rqd->ppa_addr;
54 pblk_mark_bb(pblk, line, ppa);
55 }
56}
57
58/* Erase completion assumes that only one block is erased at the time */
59static void pblk_end_io_erase(struct nvm_rq *rqd)
60{
61 struct pblk *pblk = rqd->private;
62
63 up(&pblk->erase_sem);
64 __pblk_end_io_erase(pblk, rqd);
65 mempool_free(rqd, pblk->r_rq_pool);
66}
67
68static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
69 u64 paddr)
70{
71 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
72 struct list_head *move_list = NULL;
73
74 /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
75 * table is modified with reclaimed sectors, a check is done to endure
76 * that newer updates are not overwritten.
77 */
78 spin_lock(&line->lock);
79 if (line->state == PBLK_LINESTATE_GC ||
80 line->state == PBLK_LINESTATE_FREE) {
81 spin_unlock(&line->lock);
82 return;
83 }
84
85 if (test_and_set_bit(paddr, line->invalid_bitmap)) {
86 WARN_ONCE(1, "pblk: double invalidate\n");
87 spin_unlock(&line->lock);
88 return;
89 }
90 line->vsc--;
91
92 if (line->state == PBLK_LINESTATE_CLOSED)
93 move_list = pblk_line_gc_list(pblk, line);
94 spin_unlock(&line->lock);
95
96 if (move_list) {
97 spin_lock(&l_mg->gc_lock);
98 spin_lock(&line->lock);
99 /* Prevent moving a line that has just been chosen for GC */
100 if (line->state == PBLK_LINESTATE_GC ||
101 line->state == PBLK_LINESTATE_FREE) {
102 spin_unlock(&line->lock);
103 spin_unlock(&l_mg->gc_lock);
104 return;
105 }
106 spin_unlock(&line->lock);
107
108 list_move_tail(&line->list, move_list);
109 spin_unlock(&l_mg->gc_lock);
110 }
111}
112
113void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
114{
115 struct pblk_line *line;
116 u64 paddr;
117 int line_id;
118
119#ifdef CONFIG_NVM_DEBUG
120 /* Callers must ensure that the ppa points to a device address */
121 BUG_ON(pblk_addr_in_cache(ppa));
122 BUG_ON(pblk_ppa_empty(ppa));
123#endif
124
125 line_id = pblk_tgt_ppa_to_line(ppa);
126 line = &pblk->lines[line_id];
127 paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
128
129 __pblk_map_invalidate(pblk, line, paddr);
130}
131
132void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
133 u64 paddr)
134{
135 __pblk_map_invalidate(pblk, line, paddr);
136
137 pblk_rb_sync_init(&pblk->rwb, NULL);
138 line->left_ssecs--;
139 if (!line->left_ssecs)
140 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
141 pblk_rb_sync_end(&pblk->rwb, NULL);
142}
143
144static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
145 unsigned int nr_secs)
146{
147 sector_t lba;
148
149 spin_lock(&pblk->trans_lock);
150 for (lba = slba; lba < slba + nr_secs; lba++) {
151 struct ppa_addr ppa;
152
153 ppa = pblk_trans_map_get(pblk, lba);
154
155 if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
156 pblk_map_invalidate(pblk, ppa);
157
158 pblk_ppa_set_empty(&ppa);
159 pblk_trans_map_set(pblk, lba, ppa);
160 }
161 spin_unlock(&pblk->trans_lock);
162}
163
164struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
165{
166 mempool_t *pool;
167 struct nvm_rq *rqd;
168 int rq_size;
169
170 if (rw == WRITE) {
171 pool = pblk->w_rq_pool;
172 rq_size = pblk_w_rq_size;
173 } else {
174 pool = pblk->r_rq_pool;
175 rq_size = pblk_r_rq_size;
176 }
177
178 rqd = mempool_alloc(pool, GFP_KERNEL);
179 memset(rqd, 0, rq_size);
180
181 return rqd;
182}
183
184void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
185{
186 mempool_t *pool;
187
188 if (rw == WRITE)
189 pool = pblk->w_rq_pool;
190 else
191 pool = pblk->r_rq_pool;
192
193 mempool_free(rqd, pool);
194}
195
196void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
197 int nr_pages)
198{
199 struct bio_vec bv;
200 int i;
201
202 WARN_ON(off + nr_pages != bio->bi_vcnt);
203
204 bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
205 for (i = off; i < nr_pages + off; i++) {
206 bv = bio->bi_io_vec[i];
207 mempool_free(bv.bv_page, pblk->page_pool);
208 }
209}
210
211int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
212 int nr_pages)
213{
214 struct request_queue *q = pblk->dev->q;
215 struct page *page;
216 int i, ret;
217
218 for (i = 0; i < nr_pages; i++) {
219 page = mempool_alloc(pblk->page_pool, flags);
220 if (!page)
221 goto err;
222
223 ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
224 if (ret != PBLK_EXPOSED_PAGE_SIZE) {
225 pr_err("pblk: could not add page to bio\n");
226 mempool_free(page, pblk->page_pool);
227 goto err;
228 }
229 }
230
231 return 0;
232err:
233 pblk_bio_free_pages(pblk, bio, 0, i - 1);
234 return -1;
235}
236
237static void pblk_write_kick(struct pblk *pblk)
238{
239 wake_up_process(pblk->writer_ts);
240 mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
241}
242
243void pblk_write_timer_fn(unsigned long data)
244{
245 struct pblk *pblk = (struct pblk *)data;
246
247 /* kick the write thread every tick to flush outstanding data */
248 pblk_write_kick(pblk);
249}
250
251void pblk_write_should_kick(struct pblk *pblk)
252{
253 unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
254
255 if (secs_avail >= pblk->min_write_pgs)
256 pblk_write_kick(pblk);
257}
258
259void pblk_end_bio_sync(struct bio *bio)
260{
261 struct completion *waiting = bio->bi_private;
262
263 complete(waiting);
264}
265
266void pblk_end_io_sync(struct nvm_rq *rqd)
267{
268 struct completion *waiting = rqd->private;
269
270 complete(waiting);
271}
272
273void pblk_flush_writer(struct pblk *pblk)
274{
275 struct bio *bio;
276 int ret;
277 DECLARE_COMPLETION_ONSTACK(wait);
278
279 bio = bio_alloc(GFP_KERNEL, 1);
280 if (!bio)
281 return;
282
283 bio->bi_iter.bi_sector = 0; /* internal bio */
284 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
285 bio->bi_private = &wait;
286 bio->bi_end_io = pblk_end_bio_sync;
287
288 ret = pblk_write_to_cache(pblk, bio, 0);
289 if (ret == NVM_IO_OK) {
290 if (!wait_for_completion_io_timeout(&wait,
291 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
292 pr_err("pblk: flush cache timed out\n");
293 }
294 } else if (ret != NVM_IO_DONE) {
295 pr_err("pblk: tear down bio failed\n");
296 }
297
298 if (bio->bi_error)
299 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
300
301 bio_put(bio);
302}
303
304struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
305{
306 struct pblk_line_meta *lm = &pblk->lm;
307 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
308 struct list_head *move_list = NULL;
309
310 if (!line->vsc) {
311 if (line->gc_group != PBLK_LINEGC_FULL) {
312 line->gc_group = PBLK_LINEGC_FULL;
313 move_list = &l_mg->gc_full_list;
314 }
315 } else if (line->vsc < lm->mid_thrs) {
316 if (line->gc_group != PBLK_LINEGC_HIGH) {
317 line->gc_group = PBLK_LINEGC_HIGH;
318 move_list = &l_mg->gc_high_list;
319 }
320 } else if (line->vsc < lm->high_thrs) {
321 if (line->gc_group != PBLK_LINEGC_MID) {
322 line->gc_group = PBLK_LINEGC_MID;
323 move_list = &l_mg->gc_mid_list;
324 }
325 } else if (line->vsc < line->sec_in_line) {
326 if (line->gc_group != PBLK_LINEGC_LOW) {
327 line->gc_group = PBLK_LINEGC_LOW;
328 move_list = &l_mg->gc_low_list;
329 }
330 } else if (line->vsc == line->sec_in_line) {
331 if (line->gc_group != PBLK_LINEGC_EMPTY) {
332 line->gc_group = PBLK_LINEGC_EMPTY;
333 move_list = &l_mg->gc_empty_list;
334 }
335 } else {
336 line->state = PBLK_LINESTATE_CORRUPT;
337 line->gc_group = PBLK_LINEGC_NONE;
338 move_list = &l_mg->corrupt_list;
339 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
340 line->id, line->vsc,
341 line->sec_in_line,
342 lm->high_thrs, lm->mid_thrs);
343 }
344
345 return move_list;
346}
347
348void pblk_discard(struct pblk *pblk, struct bio *bio)
349{
350 sector_t slba = pblk_get_lba(bio);
351 sector_t nr_secs = pblk_get_secs(bio);
352
353 pblk_invalidate_range(pblk, slba, nr_secs);
354}
355
356struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
357{
358 struct ppa_addr ppa;
359
360 spin_lock(&pblk->trans_lock);
361 ppa = pblk_trans_map_get(pblk, lba);
362 spin_unlock(&pblk->trans_lock);
363
364 return ppa;
365}
366
367void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
368{
369 atomic_long_inc(&pblk->write_failed);
370#ifdef CONFIG_NVM_DEBUG
371 pblk_print_failed_rqd(pblk, rqd, rqd->error);
372#endif
373}
374
375void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
376{
377 /* Empty page read is not necessarily an error (e.g., L2P recovery) */
378 if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
379 atomic_long_inc(&pblk->read_empty);
380 return;
381 }
382
383 switch (rqd->error) {
384 case NVM_RSP_WARN_HIGHECC:
385 atomic_long_inc(&pblk->read_high_ecc);
386 break;
387 case NVM_RSP_ERR_FAILECC:
388 case NVM_RSP_ERR_FAILCRC:
389 atomic_long_inc(&pblk->read_failed);
390 break;
391 default:
392 pr_err("pblk: unknown read error:%d\n", rqd->error);
393 }
394#ifdef CONFIG_NVM_DEBUG
395 pblk_print_failed_rqd(pblk, rqd, rqd->error);
396#endif
397}
398
399int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
400{
401 struct nvm_tgt_dev *dev = pblk->dev;
402
403#ifdef CONFIG_NVM_DEBUG
404 struct ppa_addr *ppa_list;
405
406 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
407 if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
408 WARN_ON(1);
409 return -EINVAL;
410 }
411
412 if (rqd->opcode == NVM_OP_PWRITE) {
413 struct pblk_line *line;
414 struct ppa_addr ppa;
415 int i;
416
417 for (i = 0; i < rqd->nr_ppas; i++) {
418 ppa = ppa_list[i];
419 line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
420
421 spin_lock(&line->lock);
422 if (line->state != PBLK_LINESTATE_OPEN) {
423 pr_err("pblk: bad ppa: line:%d,state:%d\n",
424 line->id, line->state);
425 WARN_ON(1);
426 spin_unlock(&line->lock);
427 return -EINVAL;
428 }
429 spin_unlock(&line->lock);
430 }
431 }
432#endif
433 return nvm_submit_io(dev, rqd);
434}
435
436struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
437 unsigned int nr_secs, unsigned int len,
438 gfp_t gfp_mask)
439{
440 struct nvm_tgt_dev *dev = pblk->dev;
441 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
442 void *kaddr = data;
443 struct page *page;
444 struct bio *bio;
445 int i, ret;
446
447 if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
448 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
449
450 bio = bio_kmalloc(gfp_mask, nr_secs);
451 if (!bio)
452 return ERR_PTR(-ENOMEM);
453
454 for (i = 0; i < nr_secs; i++) {
455 page = vmalloc_to_page(kaddr);
456 if (!page) {
457 pr_err("pblk: could not map vmalloc bio\n");
458 bio_put(bio);
459 bio = ERR_PTR(-ENOMEM);
460 goto out;
461 }
462
463 ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
464 if (ret != PAGE_SIZE) {
465 pr_err("pblk: could not add page to bio\n");
466 bio_put(bio);
467 bio = ERR_PTR(-ENOMEM);
468 goto out;
469 }
470
471 kaddr += PAGE_SIZE;
472 }
473out:
474 return bio;
475}
476
477int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
478 unsigned long secs_to_flush)
479{
480 int max = pblk->max_write_pgs;
481 int min = pblk->min_write_pgs;
482 int secs_to_sync = 0;
483
484 if (secs_avail >= max)
485 secs_to_sync = max;
486 else if (secs_avail >= min)
487 secs_to_sync = min * (secs_avail / min);
488 else if (secs_to_flush)
489 secs_to_sync = min;
490
491 return secs_to_sync;
492}
493
494static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
495 int nr_secs)
496{
497 u64 addr;
498 int i;
499
500 /* logic error: ppa out-of-bounds. Prevent generating bad address */
501 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
502 WARN(1, "pblk: page allocation out of bounds\n");
503 nr_secs = pblk->lm.sec_per_line - line->cur_sec;
504 }
505
506 line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
507 pblk->lm.sec_per_line, line->cur_sec);
508 for (i = 0; i < nr_secs; i++, line->cur_sec++)
509 WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
510
511 return addr;
512}
513
514u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
515{
516 u64 addr;
517
518 /* Lock needed in case a write fails and a recovery needs to remap
519 * failed write buffer entries
520 */
521 spin_lock(&line->lock);
522 addr = __pblk_alloc_page(pblk, line, nr_secs);
523 line->left_msecs -= nr_secs;
524 WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
525 spin_unlock(&line->lock);
526
527 return addr;
528}
529
530/*
531 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
532 * taking the per LUN semaphore.
533 */
534static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
535 u64 paddr, int dir)
536{
537 struct nvm_tgt_dev *dev = pblk->dev;
538 struct nvm_geo *geo = &dev->geo;
539 struct pblk_line_meta *lm = &pblk->lm;
540 struct bio *bio;
541 struct nvm_rq rqd;
542 struct ppa_addr *ppa_list;
543 dma_addr_t dma_ppa_list;
544 void *emeta = line->emeta;
545 int min = pblk->min_write_pgs;
546 int left_ppas = lm->emeta_sec;
547 int id = line->id;
548 int rq_ppas, rq_len;
549 int cmd_op, bio_op;
550 int flags;
551 int i, j;
552 int ret;
553 DECLARE_COMPLETION_ONSTACK(wait);
554
555 if (dir == WRITE) {
556 bio_op = REQ_OP_WRITE;
557 cmd_op = NVM_OP_PWRITE;
558 flags = pblk_set_progr_mode(pblk, WRITE);
559 } else if (dir == READ) {
560 bio_op = REQ_OP_READ;
561 cmd_op = NVM_OP_PREAD;
562 flags = pblk_set_read_mode(pblk);
563 } else
564 return -EINVAL;
565
566 ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
567 if (!ppa_list)
568 return -ENOMEM;
569
570next_rq:
571 memset(&rqd, 0, sizeof(struct nvm_rq));
572
573 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
574 rq_len = rq_ppas * geo->sec_size;
575
576 bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
577 if (IS_ERR(bio)) {
578 ret = PTR_ERR(bio);
579 goto free_rqd_dma;
580 }
581
582 bio->bi_iter.bi_sector = 0; /* internal bio */
583 bio_set_op_attrs(bio, bio_op, 0);
584
585 rqd.bio = bio;
586 rqd.opcode = cmd_op;
587 rqd.flags = flags;
588 rqd.nr_ppas = rq_ppas;
589 rqd.ppa_list = ppa_list;
590 rqd.dma_ppa_list = dma_ppa_list;
591 rqd.end_io = pblk_end_io_sync;
592 rqd.private = &wait;
593
594 if (dir == WRITE) {
595 for (i = 0; i < rqd.nr_ppas; ) {
596 spin_lock(&line->lock);
597 paddr = __pblk_alloc_page(pblk, line, min);
598 spin_unlock(&line->lock);
599 for (j = 0; j < min; j++, i++, paddr++)
600 rqd.ppa_list[i] =
601 addr_to_gen_ppa(pblk, paddr, id);
602 }
603 } else {
604 for (i = 0; i < rqd.nr_ppas; ) {
605 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
606 int pos = pblk_dev_ppa_to_pos(geo, ppa);
607
608 while (test_bit(pos, line->blk_bitmap)) {
609 paddr += min;
610 if (pblk_boundary_paddr_checks(pblk, paddr)) {
611 pr_err("pblk: corrupt emeta line:%d\n",
612 line->id);
613 bio_put(bio);
614 ret = -EINTR;
615 goto free_rqd_dma;
616 }
617
618 ppa = addr_to_gen_ppa(pblk, paddr, id);
619 pos = pblk_dev_ppa_to_pos(geo, ppa);
620 }
621
622 if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
623 pr_err("pblk: corrupt emeta line:%d\n",
624 line->id);
625 bio_put(bio);
626 ret = -EINTR;
627 goto free_rqd_dma;
628 }
629
630 for (j = 0; j < min; j++, i++, paddr++)
631 rqd.ppa_list[i] =
632 addr_to_gen_ppa(pblk, paddr, line->id);
633 }
634 }
635
636 ret = pblk_submit_io(pblk, &rqd);
637 if (ret) {
638 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
639 bio_put(bio);
640 goto free_rqd_dma;
641 }
642
643 if (!wait_for_completion_io_timeout(&wait,
644 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
645 pr_err("pblk: emeta I/O timed out\n");
646 }
647 reinit_completion(&wait);
648
649 bio_put(bio);
650
651 if (rqd.error) {
652 if (dir == WRITE)
653 pblk_log_write_err(pblk, &rqd);
654 else
655 pblk_log_read_err(pblk, &rqd);
656 }
657
658 emeta += rq_len;
659 left_ppas -= rq_ppas;
660 if (left_ppas)
661 goto next_rq;
662free_rqd_dma:
663 nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
664 return ret;
665}
666
667u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
668{
669 struct nvm_tgt_dev *dev = pblk->dev;
670 struct nvm_geo *geo = &dev->geo;
671 struct pblk_line_meta *lm = &pblk->lm;
672 int bit;
673
674 /* This usually only happens on bad lines */
675 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
676 if (bit >= lm->blk_per_line)
677 return -1;
678
679 return bit * geo->sec_per_pl;
680}
681
682static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
683 u64 paddr, int dir)
684{
685 struct nvm_tgt_dev *dev = pblk->dev;
686 struct pblk_line_meta *lm = &pblk->lm;
687 struct bio *bio;
688 struct nvm_rq rqd;
689 __le64 *lba_list = NULL;
690 int i, ret;
691 int cmd_op, bio_op;
692 int flags;
693 DECLARE_COMPLETION_ONSTACK(wait);
694
695 if (dir == WRITE) {
696 bio_op = REQ_OP_WRITE;
697 cmd_op = NVM_OP_PWRITE;
698 flags = pblk_set_progr_mode(pblk, WRITE);
699 lba_list = pblk_line_emeta_to_lbas(line->emeta);
700 } else if (dir == READ) {
701 bio_op = REQ_OP_READ;
702 cmd_op = NVM_OP_PREAD;
703 flags = pblk_set_read_mode(pblk);
704 } else
705 return -EINVAL;
706
707 memset(&rqd, 0, sizeof(struct nvm_rq));
708
709 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
710 &rqd.dma_ppa_list);
711 if (!rqd.ppa_list)
712 return -ENOMEM;
713
714 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
715 if (IS_ERR(bio)) {
716 ret = PTR_ERR(bio);
717 goto free_ppa_list;
718 }
719
720 bio->bi_iter.bi_sector = 0; /* internal bio */
721 bio_set_op_attrs(bio, bio_op, 0);
722
723 rqd.bio = bio;
724 rqd.opcode = cmd_op;
725 rqd.flags = flags;
726 rqd.nr_ppas = lm->smeta_sec;
727 rqd.end_io = pblk_end_io_sync;
728 rqd.private = &wait;
729
730 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
731 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
732 if (dir == WRITE)
733 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
734 }
735
736 /*
737 * This I/O is sent by the write thread when a line is replace. Since
738 * the write thread is the only one sending write and erase commands,
739 * there is no need to take the LUN semaphore.
740 */
741 ret = pblk_submit_io(pblk, &rqd);
742 if (ret) {
743 pr_err("pblk: smeta I/O submission failed: %d\n", ret);
744 bio_put(bio);
745 goto free_ppa_list;
746 }
747
748 if (!wait_for_completion_io_timeout(&wait,
749 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
750 pr_err("pblk: smeta I/O timed out\n");
751 }
752
753 if (rqd.error) {
754 if (dir == WRITE)
755 pblk_log_write_err(pblk, &rqd);
756 else
757 pblk_log_read_err(pblk, &rqd);
758 }
759
760free_ppa_list:
761 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
762
763 return ret;
764}
765
766int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
767{
768 u64 bpaddr = pblk_line_smeta_start(pblk, line);
769
770 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
771}
772
773int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
774{
775 return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
776}
777
778static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
779 struct ppa_addr ppa)
780{
781 rqd->opcode = NVM_OP_ERASE;
782 rqd->ppa_addr = ppa;
783 rqd->nr_ppas = 1;
784 rqd->flags = pblk_set_progr_mode(pblk, ERASE);
785 rqd->bio = NULL;
786}
787
788static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
789{
790 struct nvm_rq rqd;
791 int ret;
792 DECLARE_COMPLETION_ONSTACK(wait);
793
794 memset(&rqd, 0, sizeof(struct nvm_rq));
795
796 pblk_setup_e_rq(pblk, &rqd, ppa);
797
798 rqd.end_io = pblk_end_io_sync;
799 rqd.private = &wait;
800
801 /* The write thread schedules erases so that it minimizes disturbances
802 * with writes. Thus, there is no need to take the LUN semaphore.
803 */
804 ret = pblk_submit_io(pblk, &rqd);
805 if (ret) {
806 struct nvm_tgt_dev *dev = pblk->dev;
807 struct nvm_geo *geo = &dev->geo;
808
809 pr_err("pblk: could not sync erase line:%d,blk:%d\n",
810 pblk_dev_ppa_to_line(ppa),
811 pblk_dev_ppa_to_pos(geo, ppa));
812
813 rqd.error = ret;
814 goto out;
815 }
816
817 if (!wait_for_completion_io_timeout(&wait,
818 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
819 pr_err("pblk: sync erase timed out\n");
820 }
821
822out:
823 rqd.private = pblk;
824 __pblk_end_io_erase(pblk, &rqd);
825
826 return 0;
827}
828
829int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
830{
831 struct pblk_line_meta *lm = &pblk->lm;
832 struct ppa_addr ppa;
833 int bit = -1;
834
835 /* Erase one block at the time and only erase good blocks */
836 while ((bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
837 bit + 1)) < lm->blk_per_line) {
838 ppa = pblk->luns[bit].bppa; /* set ch and lun */
839 ppa.g.blk = line->id;
840
841 /* If the erase fails, the block is bad and should be marked */
842 line->left_eblks--;
843 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
844
845 if (pblk_blk_erase_sync(pblk, ppa)) {
846 pr_err("pblk: failed to erase line %d\n", line->id);
847 return -ENOMEM;
848 }
849 }
850
851 return 0;
852}
853
854/* For now lines are always assumed full lines. Thus, smeta former and current
855 * lun bitmaps are omitted.
856 */
857static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
858 struct pblk_line *cur)
859{
860 struct nvm_tgt_dev *dev = pblk->dev;
861 struct nvm_geo *geo = &dev->geo;
862 struct pblk_line_meta *lm = &pblk->lm;
863 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
864 struct line_smeta *smeta = line->smeta;
865 struct line_emeta *emeta = line->emeta;
866 int nr_blk_line;
867
868 /* After erasing the line, new bad blocks might appear and we risk
869 * having an invalid line
870 */
871 nr_blk_line = lm->blk_per_line -
872 bitmap_weight(line->blk_bitmap, lm->blk_per_line);
873 if (nr_blk_line < lm->min_blk_line) {
874 spin_lock(&l_mg->free_lock);
875 spin_lock(&line->lock);
876 line->state = PBLK_LINESTATE_BAD;
877 spin_unlock(&line->lock);
878
879 list_add_tail(&line->list, &l_mg->bad_list);
880 spin_unlock(&l_mg->free_lock);
881
882 pr_debug("pblk: line %d is bad\n", line->id);
883
884 return 0;
885 }
886
887 /* Run-time metadata */
888 line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
889
890 /* Mark LUNs allocated in this line (all for now) */
891 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
892
893 smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
894 memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
895 smeta->header.id = cpu_to_le32(line->id);
896 smeta->header.type = cpu_to_le16(line->type);
897 smeta->header.version = cpu_to_le16(1);
898
899 /* Start metadata */
900 smeta->seq_nr = cpu_to_le64(line->seq_nr);
901 smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
902
903 /* Fill metadata among lines */
904 if (cur) {
905 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
906 smeta->prev_id = cpu_to_le32(cur->id);
907 cur->emeta->next_id = cpu_to_le32(line->id);
908 } else {
909 smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
910 }
911
912 /* All smeta must be set at this point */
913 smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
914 smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
915
916 /* End metadata */
917 memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
918 emeta->seq_nr = cpu_to_le64(line->seq_nr);
919 emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
920 emeta->nr_valid_lbas = cpu_to_le64(0);
921 emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
922 emeta->crc = cpu_to_le32(0);
923 emeta->prev_id = smeta->prev_id;
924
925 return 1;
926}
927
928/* For now lines are always assumed full lines. Thus, smeta former and current
929 * lun bitmaps are omitted.
930 */
931static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
932 int init)
933{
934 struct nvm_tgt_dev *dev = pblk->dev;
935 struct nvm_geo *geo = &dev->geo;
936 struct pblk_line_meta *lm = &pblk->lm;
937 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
938 int nr_bb = 0;
939 u64 off;
940 int bit = -1;
941
942 line->sec_in_line = lm->sec_per_line;
943
944 /* Capture bad block information on line mapping bitmaps */
945 while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
946 bit + 1)) < lm->blk_per_line) {
947 off = bit * geo->sec_per_pl;
948 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
949 lm->sec_per_line);
950 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
951 lm->sec_per_line);
952 line->sec_in_line -= geo->sec_per_blk;
953 if (bit >= lm->emeta_bb)
954 nr_bb++;
955 }
956
957 /* Mark smeta metadata sectors as bad sectors */
958 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
959 off = bit * geo->sec_per_pl;
960retry_smeta:
961 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
962 line->sec_in_line -= lm->smeta_sec;
963 line->smeta_ssec = off;
964 line->cur_sec = off + lm->smeta_sec;
965
966 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
967 pr_debug("pblk: line smeta I/O failed. Retry\n");
968 off += geo->sec_per_pl;
969 goto retry_smeta;
970 }
971
972 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
973
974 /* Mark emeta metadata sectors as bad sectors. We need to consider bad
975 * blocks to make sure that there are enough sectors to store emeta
976 */
977 bit = lm->sec_per_line;
978 off = lm->sec_per_line - lm->emeta_sec;
979 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
980 while (nr_bb) {
981 off -= geo->sec_per_pl;
982 if (!test_bit(off, line->invalid_bitmap)) {
983 bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
984 nr_bb--;
985 }
986 }
987
988 line->sec_in_line -= lm->emeta_sec;
989 line->emeta_ssec = off;
990 line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
991
992 if (lm->sec_per_line - line->sec_in_line !=
993 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
994 spin_lock(&line->lock);
995 line->state = PBLK_LINESTATE_BAD;
996 spin_unlock(&line->lock);
997
998 list_add_tail(&line->list, &l_mg->bad_list);
999 pr_err("pblk: unexpected line %d is bad\n", line->id);
1000
1001 return 0;
1002 }
1003
1004 return 1;
1005}
1006
1007static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1008{
1009 struct pblk_line_meta *lm = &pblk->lm;
1010
1011 line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1012 if (!line->map_bitmap)
1013 return -ENOMEM;
1014 memset(line->map_bitmap, 0, lm->sec_bitmap_len);
1015
1016 /* invalid_bitmap is special since it is used when line is closed. No
1017 * need to zeroized; it will be initialized using bb info form
1018 * map_bitmap
1019 */
1020 line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1021 if (!line->invalid_bitmap) {
1022 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1023 return -ENOMEM;
1024 }
1025
1026 spin_lock(&line->lock);
1027 if (line->state != PBLK_LINESTATE_FREE) {
1028 spin_unlock(&line->lock);
1029 WARN(1, "pblk: corrupted line state\n");
1030 return -EINTR;
1031 }
1032 line->state = PBLK_LINESTATE_OPEN;
1033 spin_unlock(&line->lock);
1034
1035 /* Bad blocks do not need to be erased */
1036 bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
1037 line->left_eblks = line->blk_in_line;
1038 atomic_set(&line->left_seblks, line->left_eblks);
1039
1040 kref_init(&line->ref);
1041
1042 return 0;
1043}
1044
1045int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
1046{
1047 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1048 int ret;
1049
1050 spin_lock(&l_mg->free_lock);
1051 l_mg->data_line = line;
1052 list_del(&line->list);
1053 spin_unlock(&l_mg->free_lock);
1054
1055 ret = pblk_line_prepare(pblk, line);
1056 if (ret) {
1057 list_add(&line->list, &l_mg->free_list);
1058 return ret;
1059 }
1060
1061 pblk_rl_free_lines_dec(&pblk->rl, line);
1062
1063 if (!pblk_line_init_bb(pblk, line, 0)) {
1064 list_add(&line->list, &l_mg->free_list);
1065 return -EINTR;
1066 }
1067
1068 return 0;
1069}
1070
1071void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
1072{
1073 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1074 line->map_bitmap = NULL;
1075 line->smeta = NULL;
1076 line->emeta = NULL;
1077}
1078
1079struct pblk_line *pblk_line_get(struct pblk *pblk)
1080{
1081 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1082 struct pblk_line_meta *lm = &pblk->lm;
1083 struct pblk_line *line = NULL;
1084 int bit;
1085
1086 lockdep_assert_held(&l_mg->free_lock);
1087
1088retry_get:
1089 if (list_empty(&l_mg->free_list)) {
1090 pr_err("pblk: no free lines\n");
1091 goto out;
1092 }
1093
1094 line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
1095 list_del(&line->list);
1096 l_mg->nr_free_lines--;
1097
1098 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1099 if (unlikely(bit >= lm->blk_per_line)) {
1100 spin_lock(&line->lock);
1101 line->state = PBLK_LINESTATE_BAD;
1102 spin_unlock(&line->lock);
1103
1104 list_add_tail(&line->list, &l_mg->bad_list);
1105
1106 pr_debug("pblk: line %d is bad\n", line->id);
1107 goto retry_get;
1108 }
1109
1110 if (pblk_line_prepare(pblk, line)) {
1111 pr_err("pblk: failed to prepare line %d\n", line->id);
1112 list_add(&line->list, &l_mg->free_list);
1113 return NULL;
1114 }
1115
1116out:
1117 return line;
1118}
1119
1120static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1121 struct pblk_line *line)
1122{
1123 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1124 struct pblk_line *retry_line;
1125
1126 spin_lock(&l_mg->free_lock);
1127 retry_line = pblk_line_get(pblk);
1128 if (!retry_line) {
1129 spin_unlock(&l_mg->free_lock);
1130 return NULL;
1131 }
1132
1133 retry_line->smeta = line->smeta;
1134 retry_line->emeta = line->emeta;
1135 retry_line->meta_line = line->meta_line;
1136 retry_line->map_bitmap = line->map_bitmap;
1137 retry_line->invalid_bitmap = line->invalid_bitmap;
1138
1139 line->map_bitmap = NULL;
1140 line->invalid_bitmap = NULL;
1141 line->smeta = NULL;
1142 line->emeta = NULL;
1143 spin_unlock(&l_mg->free_lock);
1144
1145 if (pblk_line_erase(pblk, retry_line))
1146 return NULL;
1147
1148 pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1149
1150 l_mg->data_line = retry_line;
1151
1152 return retry_line;
1153}
1154
1155struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1156{
1157 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1158 struct pblk_line *line;
1159 int meta_line;
1160 int is_next = 0;
1161
1162 spin_lock(&l_mg->free_lock);
1163 line = pblk_line_get(pblk);
1164 if (!line) {
1165 spin_unlock(&l_mg->free_lock);
1166 return NULL;
1167 }
1168
1169 line->seq_nr = l_mg->d_seq_nr++;
1170 line->type = PBLK_LINETYPE_DATA;
1171 l_mg->data_line = line;
1172
1173 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
1174 set_bit(meta_line, &l_mg->meta_bitmap);
1175 line->smeta = l_mg->sline_meta[meta_line].meta;
1176 line->emeta = l_mg->eline_meta[meta_line].meta;
1177 line->meta_line = meta_line;
1178
1179 /* Allocate next line for preparation */
1180 l_mg->data_next = pblk_line_get(pblk);
1181 if (l_mg->data_next) {
1182 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1183 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1184 is_next = 1;
1185 }
1186 spin_unlock(&l_mg->free_lock);
1187
1188 pblk_rl_free_lines_dec(&pblk->rl, line);
1189 if (is_next)
1190 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1191
1192 if (pblk_line_erase(pblk, line))
1193 return NULL;
1194
1195retry_setup:
1196 if (!pblk_line_set_metadata(pblk, line, NULL)) {
1197 line = pblk_line_retry(pblk, line);
1198 if (!line)
1199 return NULL;
1200
1201 goto retry_setup;
1202 }
1203
1204 if (!pblk_line_init_bb(pblk, line, 1)) {
1205 line = pblk_line_retry(pblk, line);
1206 if (!line)
1207 return NULL;
1208
1209 goto retry_setup;
1210 }
1211
1212 return line;
1213}
1214
1215struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
1216{
1217 struct pblk_line_meta *lm = &pblk->lm;
1218 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1219 struct pblk_line *cur, *new;
1220 unsigned int left_seblks;
1221 int meta_line;
1222 int is_next = 0;
1223
1224 cur = l_mg->data_line;
1225 new = l_mg->data_next;
1226 if (!new)
1227 return NULL;
1228 l_mg->data_line = new;
1229
1230retry_line:
1231 left_seblks = atomic_read(&new->left_seblks);
1232 if (left_seblks) {
1233 /* If line is not fully erased, erase it */
1234 if (new->left_eblks) {
1235 if (pblk_line_erase(pblk, new))
1236 return NULL;
1237 } else {
1238 io_schedule();
1239 }
1240 goto retry_line;
1241 }
1242
1243 spin_lock(&l_mg->free_lock);
1244 /* Allocate next line for preparation */
1245 l_mg->data_next = pblk_line_get(pblk);
1246 if (l_mg->data_next) {
1247 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1248 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1249 is_next = 1;
1250 }
1251
1252retry_meta:
1253 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
1254 if (meta_line == PBLK_DATA_LINES) {
1255 spin_unlock(&l_mg->free_lock);
1256 io_schedule();
1257 spin_lock(&l_mg->free_lock);
1258 goto retry_meta;
1259 }
1260
1261 set_bit(meta_line, &l_mg->meta_bitmap);
1262 new->smeta = l_mg->sline_meta[meta_line].meta;
1263 new->emeta = l_mg->eline_meta[meta_line].meta;
1264 new->meta_line = meta_line;
1265
1266 memset(new->smeta, 0, lm->smeta_len);
1267 memset(new->emeta, 0, lm->emeta_len);
1268 spin_unlock(&l_mg->free_lock);
1269
1270 if (is_next)
1271 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1272
1273retry_setup:
1274 if (!pblk_line_set_metadata(pblk, new, cur)) {
1275 new = pblk_line_retry(pblk, new);
1276 if (new)
1277 return NULL;
1278
1279 goto retry_setup;
1280 }
1281
1282 if (!pblk_line_init_bb(pblk, new, 1)) {
1283 new = pblk_line_retry(pblk, new);
1284 if (!new)
1285 return NULL;
1286
1287 goto retry_setup;
1288 }
1289
1290 return new;
1291}
1292
1293void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1294{
1295 if (line->map_bitmap)
1296 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1297 if (line->invalid_bitmap)
1298 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1299
1300 line->map_bitmap = NULL;
1301 line->invalid_bitmap = NULL;
1302}
1303
1304void pblk_line_put(struct kref *ref)
1305{
1306 struct pblk_line *line = container_of(ref, struct pblk_line, ref);
1307 struct pblk *pblk = line->pblk;
1308 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1309
1310 spin_lock(&line->lock);
1311 WARN_ON(line->state != PBLK_LINESTATE_GC);
1312 line->state = PBLK_LINESTATE_FREE;
1313 line->gc_group = PBLK_LINEGC_NONE;
1314 pblk_line_free(pblk, line);
1315 spin_unlock(&line->lock);
1316
1317 spin_lock(&l_mg->free_lock);
1318 list_add_tail(&line->list, &l_mg->free_list);
1319 l_mg->nr_free_lines++;
1320 spin_unlock(&l_mg->free_lock);
1321
1322 pblk_rl_free_lines_inc(&pblk->rl, line);
1323}
1324
1325int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1326{
1327 struct nvm_rq *rqd;
1328 int err;
1329
1330 rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
1331 memset(rqd, 0, pblk_r_rq_size);
1332
1333 pblk_setup_e_rq(pblk, rqd, ppa);
1334
1335 rqd->end_io = pblk_end_io_erase;
1336 rqd->private = pblk;
1337
1338 /* The write thread schedules erases so that it minimizes disturbances
1339 * with writes. Thus, there is no need to take the LUN semaphore.
1340 */
1341 err = pblk_submit_io(pblk, rqd);
1342 if (err) {
1343 struct nvm_tgt_dev *dev = pblk->dev;
1344 struct nvm_geo *geo = &dev->geo;
1345
1346 pr_err("pblk: could not async erase line:%d,blk:%d\n",
1347 pblk_dev_ppa_to_line(ppa),
1348 pblk_dev_ppa_to_pos(geo, ppa));
1349 }
1350
1351 return err;
1352}
1353
1354struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1355{
1356 return pblk->l_mg.data_line;
1357}
1358
1359struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
1360{
1361 return pblk->l_mg.data_next;
1362}
1363
1364int pblk_line_is_full(struct pblk_line *line)
1365{
1366 return (line->left_msecs == 0);
1367}
1368
1369void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1370{
1371 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1372 struct list_head *move_list;
1373
1374 line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
1375
1376 if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
1377 pr_err("pblk: line %d close I/O failed\n", line->id);
1378
1379 WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
1380 "pblk: corrupt closed line %d\n", line->id);
1381
1382 spin_lock(&l_mg->free_lock);
1383 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
1384 spin_unlock(&l_mg->free_lock);
1385
1386 spin_lock(&l_mg->gc_lock);
1387 spin_lock(&line->lock);
1388 WARN_ON(line->state != PBLK_LINESTATE_OPEN);
1389 line->state = PBLK_LINESTATE_CLOSED;
1390 move_list = pblk_line_gc_list(pblk, line);
1391
1392 list_add_tail(&line->list, move_list);
1393
1394 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1395 line->map_bitmap = NULL;
1396 line->smeta = NULL;
1397 line->emeta = NULL;
1398
1399 spin_unlock(&line->lock);
1400 spin_unlock(&l_mg->gc_lock);
1401}
1402
1403void pblk_line_close_ws(struct work_struct *work)
1404{
1405 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1406 ws);
1407 struct pblk *pblk = line_ws->pblk;
1408 struct pblk_line *line = line_ws->line;
1409
1410 pblk_line_close(pblk, line);
1411 mempool_free(line_ws, pblk->line_ws_pool);
1412}
1413
1414void pblk_line_mark_bb(struct work_struct *work)
1415{
1416 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1417 ws);
1418 struct pblk *pblk = line_ws->pblk;
1419 struct nvm_tgt_dev *dev = pblk->dev;
1420 struct ppa_addr *ppa = line_ws->priv;
1421 int ret;
1422
1423 ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
1424 if (ret) {
1425 struct pblk_line *line;
1426 int pos;
1427
1428 line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
1429 pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
1430
1431 pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
1432 line->id, pos);
1433 }
1434
1435 kfree(ppa);
1436 mempool_free(line_ws, pblk->line_ws_pool);
1437}
1438
1439void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1440 void (*work)(struct work_struct *))
1441{
1442 struct pblk_line_ws *line_ws;
1443
1444 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
1445 if (!line_ws)
1446 return;
1447
1448 line_ws->pblk = pblk;
1449 line_ws->line = line;
1450 line_ws->priv = priv;
1451
1452 INIT_WORK(&line_ws->ws, work);
1453 queue_work(pblk->kw_wq, &line_ws->ws);
1454}
1455
1456void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1457 unsigned long *lun_bitmap)
1458{
1459 struct nvm_tgt_dev *dev = pblk->dev;
1460 struct nvm_geo *geo = &dev->geo;
1461 struct pblk_lun *rlun;
1462 int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
1463 int ret;
1464
1465 /*
1466 * Only send one inflight I/O per LUN. Since we map at a page
1467 * granurality, all ppas in the I/O will map to the same LUN
1468 */
1469#ifdef CONFIG_NVM_DEBUG
1470 int i;
1471
1472 for (i = 1; i < nr_ppas; i++)
1473 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
1474 ppa_list[0].g.ch != ppa_list[i].g.ch);
1475#endif
1476 /* If the LUN has been locked for this same request, do no attempt to
1477 * lock it again
1478 */
1479 if (test_and_set_bit(lun_id, lun_bitmap))
1480 return;
1481
1482 rlun = &pblk->luns[lun_id];
1483 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1484 if (ret) {
1485 switch (ret) {
1486 case -ETIME:
1487 pr_err("pblk: lun semaphore timed out\n");
1488 break;
1489 case -EINTR:
1490 pr_err("pblk: lun semaphore timed out\n");
1491 break;
1492 }
1493 }
1494}
1495
1496void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1497 unsigned long *lun_bitmap)
1498{
1499 struct nvm_tgt_dev *dev = pblk->dev;
1500 struct nvm_geo *geo = &dev->geo;
1501 struct pblk_lun *rlun;
1502 int nr_luns = geo->nr_luns;
1503 int bit = -1;
1504
1505 while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
1506 rlun = &pblk->luns[bit];
1507 up(&rlun->wr_sem);
1508 }
1509
1510 kfree(lun_bitmap);
1511}
1512
1513void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1514{
1515 struct ppa_addr l2p_ppa;
1516
1517 /* logic error: lba out-of-bounds. Ignore update */
1518 if (!(lba < pblk->rl.nr_secs)) {
1519 WARN(1, "pblk: corrupted L2P map request\n");
1520 return;
1521 }
1522
1523 spin_lock(&pblk->trans_lock);
1524 l2p_ppa = pblk_trans_map_get(pblk, lba);
1525
1526 if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
1527 pblk_map_invalidate(pblk, l2p_ppa);
1528
1529 pblk_trans_map_set(pblk, lba, ppa);
1530 spin_unlock(&pblk->trans_lock);
1531}
1532
1533void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1534{
1535#ifdef CONFIG_NVM_DEBUG
1536 /* Callers must ensure that the ppa points to a cache address */
1537 BUG_ON(!pblk_addr_in_cache(ppa));
1538 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1539#endif
1540
1541 pblk_update_map(pblk, lba, ppa);
1542}
1543
1544int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1545 struct pblk_line *gc_line)
1546{
1547 struct ppa_addr l2p_ppa;
1548 int ret = 1;
1549
1550#ifdef CONFIG_NVM_DEBUG
1551 /* Callers must ensure that the ppa points to a cache address */
1552 BUG_ON(!pblk_addr_in_cache(ppa));
1553 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1554#endif
1555
1556 /* logic error: lba out-of-bounds. Ignore update */
1557 if (!(lba < pblk->rl.nr_secs)) {
1558 WARN(1, "pblk: corrupted L2P map request\n");
1559 return 0;
1560 }
1561
1562 spin_lock(&pblk->trans_lock);
1563 l2p_ppa = pblk_trans_map_get(pblk, lba);
1564
1565 /* Prevent updated entries to be overwritten by GC */
1566 if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
1567 pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
1568 ret = 0;
1569 goto out;
1570 }
1571
1572 pblk_trans_map_set(pblk, lba, ppa);
1573out:
1574 spin_unlock(&pblk->trans_lock);
1575 return ret;
1576}
1577
1578void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1579 struct ppa_addr entry_line)
1580{
1581 struct ppa_addr l2p_line;
1582
1583#ifdef CONFIG_NVM_DEBUG
1584 /* Callers must ensure that the ppa points to a device address */
1585 BUG_ON(pblk_addr_in_cache(ppa));
1586#endif
1587 /* Invalidate and discard padded entries */
1588 if (lba == ADDR_EMPTY) {
1589#ifdef CONFIG_NVM_DEBUG
1590 atomic_long_inc(&pblk->padded_wb);
1591#endif
1592 pblk_map_invalidate(pblk, ppa);
1593 return;
1594 }
1595
1596 /* logic error: lba out-of-bounds. Ignore update */
1597 if (!(lba < pblk->rl.nr_secs)) {
1598 WARN(1, "pblk: corrupted L2P map request\n");
1599 return;
1600 }
1601
1602 spin_lock(&pblk->trans_lock);
1603 l2p_line = pblk_trans_map_get(pblk, lba);
1604
1605 /* Do not update L2P if the cacheline has been updated. In this case,
1606 * the mapped ppa must be invalidated
1607 */
1608 if (l2p_line.ppa != entry_line.ppa) {
1609 if (!pblk_ppa_empty(ppa))
1610 pblk_map_invalidate(pblk, ppa);
1611 goto out;
1612 }
1613
1614#ifdef CONFIG_NVM_DEBUG
1615 WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
1616#endif
1617
1618 pblk_trans_map_set(pblk, lba, ppa);
1619out:
1620 spin_unlock(&pblk->trans_lock);
1621}
1622
1623void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
1624 sector_t blba, int nr_secs)
1625{
1626 int i;
1627
1628 spin_lock(&pblk->trans_lock);
1629 for (i = 0; i < nr_secs; i++)
1630 ppas[i] = pblk_trans_map_get(pblk, blba + i);
1631 spin_unlock(&pblk->trans_lock);
1632}
1633
1634void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
1635 u64 *lba_list, int nr_secs)
1636{
1637 sector_t lba;
1638 int i;
1639
1640 spin_lock(&pblk->trans_lock);
1641 for (i = 0; i < nr_secs; i++) {
1642 lba = lba_list[i];
1643 if (lba == ADDR_EMPTY) {
1644 ppas[i].ppa = ADDR_EMPTY;
1645 } else {
1646 /* logic error: lba out-of-bounds. Ignore update */
1647 if (!(lba < pblk->rl.nr_secs)) {
1648 WARN(1, "pblk: corrupted L2P map request\n");
1649 continue;
1650 }
1651 ppas[i] = pblk_trans_map_get(pblk, lba);
1652 }
1653 }
1654 spin_unlock(&pblk->trans_lock);
1655}
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
new file mode 100644
index 000000000000..9b147cfd8a41
--- /dev/null
+++ b/drivers/lightnvm/pblk-gc.c
@@ -0,0 +1,555 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-gc.c - pblk's garbage collector
16 */
17
18#include "pblk.h"
19#include <linux/delay.h>
20
21static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
22{
23 kfree(gc_rq->data);
24 kfree(gc_rq->lba_list);
25 kfree(gc_rq);
26}
27
28static int pblk_gc_write(struct pblk *pblk)
29{
30 struct pblk_gc *gc = &pblk->gc;
31 struct pblk_gc_rq *gc_rq, *tgc_rq;
32 LIST_HEAD(w_list);
33
34 spin_lock(&gc->w_lock);
35 if (list_empty(&gc->w_list)) {
36 spin_unlock(&gc->w_lock);
37 return 1;
38 }
39
40 list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) {
41 list_move_tail(&gc_rq->list, &w_list);
42 gc->w_entries--;
43 }
44 spin_unlock(&gc->w_lock);
45
46 list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
47 pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
48 gc_rq->nr_secs, gc_rq->secs_to_gc,
49 gc_rq->line, PBLK_IOTYPE_GC);
50
51 kref_put(&gc_rq->line->ref, pblk_line_put);
52
53 list_del(&gc_rq->list);
54 pblk_gc_free_gc_rq(gc_rq);
55 }
56
57 return 0;
58}
59
60static void pblk_gc_writer_kick(struct pblk_gc *gc)
61{
62 wake_up_process(gc->gc_writer_ts);
63}
64
65/*
66 * Responsible for managing all memory related to a gc request. Also in case of
67 * failure
68 */
69static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
70 u64 *lba_list, unsigned int nr_secs)
71{
72 struct nvm_tgt_dev *dev = pblk->dev;
73 struct nvm_geo *geo = &dev->geo;
74 struct pblk_gc *gc = &pblk->gc;
75 struct pblk_gc_rq *gc_rq;
76 void *data;
77 unsigned int secs_to_gc;
78 int ret = NVM_IO_OK;
79
80 data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL);
81 if (!data) {
82 ret = NVM_IO_ERR;
83 goto free_lba_list;
84 }
85
86 /* Read from GC victim block */
87 if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs,
88 &secs_to_gc, line)) {
89 ret = NVM_IO_ERR;
90 goto free_data;
91 }
92
93 if (!secs_to_gc)
94 goto free_data;
95
96 gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
97 if (!gc_rq) {
98 ret = NVM_IO_ERR;
99 goto free_data;
100 }
101
102 gc_rq->line = line;
103 gc_rq->data = data;
104 gc_rq->lba_list = lba_list;
105 gc_rq->nr_secs = nr_secs;
106 gc_rq->secs_to_gc = secs_to_gc;
107
108 kref_get(&line->ref);
109
110retry:
111 spin_lock(&gc->w_lock);
112 if (gc->w_entries > 256) {
113 spin_unlock(&gc->w_lock);
114 usleep_range(256, 1024);
115 goto retry;
116 }
117 gc->w_entries++;
118 list_add_tail(&gc_rq->list, &gc->w_list);
119 spin_unlock(&gc->w_lock);
120
121 pblk_gc_writer_kick(&pblk->gc);
122
123 return NVM_IO_OK;
124
125free_data:
126 kfree(data);
127free_lba_list:
128 kfree(lba_list);
129
130 return ret;
131}
132
133static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
134{
135 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
136 struct list_head *move_list;
137
138 spin_lock(&line->lock);
139 WARN_ON(line->state != PBLK_LINESTATE_GC);
140 line->state = PBLK_LINESTATE_CLOSED;
141 move_list = pblk_line_gc_list(pblk, line);
142 spin_unlock(&line->lock);
143
144 if (move_list) {
145 spin_lock(&l_mg->gc_lock);
146 list_add_tail(&line->list, move_list);
147 spin_unlock(&l_mg->gc_lock);
148 }
149}
150
151static void pblk_gc_line_ws(struct work_struct *work)
152{
153 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
154 ws);
155 struct pblk *pblk = line_ws->pblk;
156 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
157 struct pblk_line *line = line_ws->line;
158 struct pblk_line_meta *lm = &pblk->lm;
159 __le64 *lba_list = line_ws->priv;
160 u64 *gc_list;
161 int sec_left;
162 int nr_ppas, bit;
163 int put_line = 1;
164
165 pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
166
167 spin_lock(&line->lock);
168 sec_left = line->vsc;
169 if (!sec_left) {
170 /* Lines are erased before being used (l_mg->data_/log_next) */
171 spin_unlock(&line->lock);
172 goto out;
173 }
174 spin_unlock(&line->lock);
175
176 if (sec_left < 0) {
177 pr_err("pblk: corrupted GC line (%d)\n", line->id);
178 put_line = 0;
179 pblk_put_line_back(pblk, line);
180 goto out;
181 }
182
183 bit = -1;
184next_rq:
185 gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL);
186 if (!gc_list) {
187 put_line = 0;
188 pblk_put_line_back(pblk, line);
189 goto out;
190 }
191
192 nr_ppas = 0;
193 do {
194 bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
195 bit + 1);
196 if (bit > line->emeta_ssec)
197 break;
198
199 gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]);
200 } while (nr_ppas < pblk->max_write_pgs);
201
202 if (unlikely(!nr_ppas)) {
203 kfree(gc_list);
204 goto out;
205 }
206
207 if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) {
208 pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n",
209 line->id, line->vsc,
210 nr_ppas, nr_ppas);
211 put_line = 0;
212 pblk_put_line_back(pblk, line);
213 goto out;
214 }
215
216 sec_left -= nr_ppas;
217 if (sec_left > 0)
218 goto next_rq;
219
220out:
221 pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
222 mempool_free(line_ws, pblk->line_ws_pool);
223 atomic_dec(&pblk->gc.inflight_gc);
224 if (put_line)
225 kref_put(&line->ref, pblk_line_put);
226}
227
228static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
229{
230 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
231 struct pblk_line_meta *lm = &pblk->lm;
232 struct pblk_line_ws *line_ws;
233 __le64 *lba_list;
234 int ret;
235
236 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
237 line->emeta = pblk_malloc(lm->emeta_len, l_mg->emeta_alloc_type,
238 GFP_KERNEL);
239 if (!line->emeta) {
240 pr_err("pblk: cannot use GC emeta\n");
241 goto fail_free_ws;
242 }
243
244 ret = pblk_line_read_emeta(pblk, line);
245 if (ret) {
246 pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
247 goto fail_free_emeta;
248 }
249
250 /* If this read fails, it means that emeta is corrupted. For now, leave
251 * the line untouched. TODO: Implement a recovery routine that scans and
252 * moves all sectors on the line.
253 */
254 lba_list = pblk_recov_get_lba_list(pblk, line->emeta);
255 if (!lba_list) {
256 pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
257 goto fail_free_emeta;
258 }
259
260 line_ws->pblk = pblk;
261 line_ws->line = line;
262 line_ws->priv = lba_list;
263
264 INIT_WORK(&line_ws->ws, pblk_gc_line_ws);
265 queue_work(pblk->gc.gc_reader_wq, &line_ws->ws);
266
267 return 0;
268
269fail_free_emeta:
270 pblk_mfree(line->emeta, l_mg->emeta_alloc_type);
271fail_free_ws:
272 mempool_free(line_ws, pblk->line_ws_pool);
273 pblk_put_line_back(pblk, line);
274
275 return 1;
276}
277
278static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list)
279{
280 struct pblk_line *line, *tline;
281
282 list_for_each_entry_safe(line, tline, gc_list, list) {
283 if (pblk_gc_line(pblk, line))
284 pr_err("pblk: failed to GC line %d\n", line->id);
285 list_del(&line->list);
286 }
287}
288
289/*
290 * Lines with no valid sectors will be returned to the free list immediately. If
291 * GC is activated - either because the free block count is under the determined
292 * threshold, or because it is being forced from user space - only lines with a
293 * high count of invalid sectors will be recycled.
294 */
295static void pblk_gc_run(struct pblk *pblk)
296{
297 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
298 struct pblk_gc *gc = &pblk->gc;
299 struct pblk_line *line, *tline;
300 unsigned int nr_blocks_free, nr_blocks_need;
301 struct list_head *group_list;
302 int run_gc, gc_group = 0;
303 int prev_gc = 0;
304 int inflight_gc = atomic_read(&gc->inflight_gc);
305 LIST_HEAD(gc_list);
306
307 spin_lock(&l_mg->gc_lock);
308 list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
309 spin_lock(&line->lock);
310 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
311 line->state = PBLK_LINESTATE_GC;
312 spin_unlock(&line->lock);
313
314 list_del(&line->list);
315 kref_put(&line->ref, pblk_line_put);
316 }
317 spin_unlock(&l_mg->gc_lock);
318
319 nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl);
320 nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl);
321 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
322
323next_gc_group:
324 group_list = l_mg->gc_lists[gc_group++];
325 spin_lock(&l_mg->gc_lock);
326 while (run_gc && !list_empty(group_list)) {
327 /* No need to queue up more GC lines than we can handle */
328 if (!run_gc || inflight_gc > gc->gc_jobs_active) {
329 spin_unlock(&l_mg->gc_lock);
330 pblk_gc_lines(pblk, &gc_list);
331 return;
332 }
333
334 line = list_first_entry(group_list, struct pblk_line, list);
335 nr_blocks_free += line->blk_in_line;
336
337 spin_lock(&line->lock);
338 WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
339 line->state = PBLK_LINESTATE_GC;
340 list_move_tail(&line->list, &gc_list);
341 atomic_inc(&gc->inflight_gc);
342 inflight_gc++;
343 spin_unlock(&line->lock);
344
345 prev_gc = 1;
346 run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced);
347 }
348 spin_unlock(&l_mg->gc_lock);
349
350 pblk_gc_lines(pblk, &gc_list);
351
352 if (!prev_gc && pblk->rl.rb_state > gc_group &&
353 gc_group < PBLK_NR_GC_LISTS)
354 goto next_gc_group;
355}
356
357
358static void pblk_gc_kick(struct pblk *pblk)
359{
360 struct pblk_gc *gc = &pblk->gc;
361
362 wake_up_process(gc->gc_ts);
363 pblk_gc_writer_kick(gc);
364 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
365}
366
367static void pblk_gc_timer(unsigned long data)
368{
369 struct pblk *pblk = (struct pblk *)data;
370
371 pblk_gc_kick(pblk);
372}
373
374static int pblk_gc_ts(void *data)
375{
376 struct pblk *pblk = data;
377
378 while (!kthread_should_stop()) {
379 pblk_gc_run(pblk);
380 set_current_state(TASK_INTERRUPTIBLE);
381 io_schedule();
382 }
383
384 return 0;
385}
386
387static int pblk_gc_writer_ts(void *data)
388{
389 struct pblk *pblk = data;
390
391 while (!kthread_should_stop()) {
392 if (!pblk_gc_write(pblk))
393 continue;
394 set_current_state(TASK_INTERRUPTIBLE);
395 io_schedule();
396 }
397
398 return 0;
399}
400
401static void pblk_gc_start(struct pblk *pblk)
402{
403 pblk->gc.gc_active = 1;
404
405 pr_debug("pblk: gc start\n");
406}
407
408int pblk_gc_status(struct pblk *pblk)
409{
410 struct pblk_gc *gc = &pblk->gc;
411 int ret;
412
413 spin_lock(&gc->lock);
414 ret = gc->gc_active;
415 spin_unlock(&gc->lock);
416
417 return ret;
418}
419
420static void __pblk_gc_should_start(struct pblk *pblk)
421{
422 struct pblk_gc *gc = &pblk->gc;
423
424 lockdep_assert_held(&gc->lock);
425
426 if (gc->gc_enabled && !gc->gc_active)
427 pblk_gc_start(pblk);
428}
429
430void pblk_gc_should_start(struct pblk *pblk)
431{
432 struct pblk_gc *gc = &pblk->gc;
433
434 spin_lock(&gc->lock);
435 __pblk_gc_should_start(pblk);
436 spin_unlock(&gc->lock);
437}
438
439/*
440 * If flush_wq == 1 then no lock should be held by the caller since
441 * flush_workqueue can sleep
442 */
443static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
444{
445 spin_lock(&pblk->gc.lock);
446 pblk->gc.gc_active = 0;
447 spin_unlock(&pblk->gc.lock);
448
449 pr_debug("pblk: gc stop\n");
450}
451
452void pblk_gc_should_stop(struct pblk *pblk)
453{
454 struct pblk_gc *gc = &pblk->gc;
455
456 if (gc->gc_active && !gc->gc_forced)
457 pblk_gc_stop(pblk, 0);
458}
459
460void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
461 int *gc_active)
462{
463 struct pblk_gc *gc = &pblk->gc;
464
465 spin_lock(&gc->lock);
466 *gc_enabled = gc->gc_enabled;
467 *gc_active = gc->gc_active;
468 spin_unlock(&gc->lock);
469}
470
471void pblk_gc_sysfs_force(struct pblk *pblk, int force)
472{
473 struct pblk_gc *gc = &pblk->gc;
474 int rsv = 0;
475
476 spin_lock(&gc->lock);
477 if (force) {
478 gc->gc_enabled = 1;
479 rsv = 64;
480 }
481 pblk_rl_set_gc_rsc(&pblk->rl, rsv);
482 gc->gc_forced = force;
483 __pblk_gc_should_start(pblk);
484 spin_unlock(&gc->lock);
485}
486
487int pblk_gc_init(struct pblk *pblk)
488{
489 struct pblk_gc *gc = &pblk->gc;
490 int ret;
491
492 gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
493 if (IS_ERR(gc->gc_ts)) {
494 pr_err("pblk: could not allocate GC main kthread\n");
495 return PTR_ERR(gc->gc_ts);
496 }
497
498 gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
499 "pblk-gc-writer-ts");
500 if (IS_ERR(gc->gc_writer_ts)) {
501 pr_err("pblk: could not allocate GC writer kthread\n");
502 ret = PTR_ERR(gc->gc_writer_ts);
503 goto fail_free_main_kthread;
504 }
505
506 setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
507 mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
508
509 gc->gc_active = 0;
510 gc->gc_forced = 0;
511 gc->gc_enabled = 1;
512 gc->gc_jobs_active = 8;
513 gc->w_entries = 0;
514 atomic_set(&gc->inflight_gc, 0);
515
516 gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq",
517 WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active);
518 if (!gc->gc_reader_wq) {
519 pr_err("pblk: could not allocate GC reader workqueue\n");
520 ret = -ENOMEM;
521 goto fail_free_writer_kthread;
522 }
523
524 spin_lock_init(&gc->lock);
525 spin_lock_init(&gc->w_lock);
526 INIT_LIST_HEAD(&gc->w_list);
527
528 return 0;
529
530fail_free_main_kthread:
531 kthread_stop(gc->gc_ts);
532fail_free_writer_kthread:
533 kthread_stop(gc->gc_writer_ts);
534
535 return ret;
536}
537
538void pblk_gc_exit(struct pblk *pblk)
539{
540 struct pblk_gc *gc = &pblk->gc;
541
542 flush_workqueue(gc->gc_reader_wq);
543
544 del_timer(&gc->gc_timer);
545 pblk_gc_stop(pblk, 1);
546
547 if (gc->gc_ts)
548 kthread_stop(gc->gc_ts);
549
550 if (pblk->gc.gc_reader_wq)
551 destroy_workqueue(pblk->gc.gc_reader_wq);
552
553 if (gc->gc_writer_ts)
554 kthread_stop(gc->gc_writer_ts);
555}
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
new file mode 100644
index 000000000000..94653b1f1300
--- /dev/null
+++ b/drivers/lightnvm/pblk-init.c
@@ -0,0 +1,949 @@
1/*
2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * Implementation of a physical block-device target for Open-channel SSDs.
17 *
18 * pblk-init.c - pblk's initialization.
19 */
20
21#include "pblk.h"
22
23static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
24 *pblk_w_rq_cache, *pblk_line_meta_cache;
25static DECLARE_RWSEM(pblk_lock);
26
27static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
28 struct bio *bio)
29{
30 int ret;
31
32 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
33 * constraint. Writes can be of arbitrary size.
34 */
35 if (bio_data_dir(bio) == READ) {
36 blk_queue_split(q, &bio, q->bio_split);
37 ret = pblk_submit_read(pblk, bio);
38 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
39 bio_put(bio);
40
41 return ret;
42 }
43
44 /* Prevent deadlock in the case of a modest LUN configuration and large
45 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
46 * available for user I/O.
47 */
48 if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
49 blk_queue_split(q, &bio, q->bio_split);
50
51 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
52}
53
54static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
55{
56 struct pblk *pblk = q->queuedata;
57
58 if (bio_op(bio) == REQ_OP_DISCARD) {
59 pblk_discard(pblk, bio);
60 if (!(bio->bi_opf & REQ_PREFLUSH)) {
61 bio_endio(bio);
62 return BLK_QC_T_NONE;
63 }
64 }
65
66 switch (pblk_rw_io(q, pblk, bio)) {
67 case NVM_IO_ERR:
68 bio_io_error(bio);
69 break;
70 case NVM_IO_DONE:
71 bio_endio(bio);
72 break;
73 }
74
75 return BLK_QC_T_NONE;
76}
77
78static void pblk_l2p_free(struct pblk *pblk)
79{
80 vfree(pblk->trans_map);
81}
82
83static int pblk_l2p_init(struct pblk *pblk)
84{
85 sector_t i;
86 struct ppa_addr ppa;
87 int entry_size = 8;
88
89 if (pblk->ppaf_bitsize < 32)
90 entry_size = 4;
91
92 pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs);
93 if (!pblk->trans_map)
94 return -ENOMEM;
95
96 pblk_ppa_set_empty(&ppa);
97
98 for (i = 0; i < pblk->rl.nr_secs; i++)
99 pblk_trans_map_set(pblk, i, ppa);
100
101 return 0;
102}
103
104static void pblk_rwb_free(struct pblk *pblk)
105{
106 if (pblk_rb_tear_down_check(&pblk->rwb))
107 pr_err("pblk: write buffer error on tear down\n");
108
109 pblk_rb_data_free(&pblk->rwb);
110 vfree(pblk_rb_entries_ref(&pblk->rwb));
111}
112
113static int pblk_rwb_init(struct pblk *pblk)
114{
115 struct nvm_tgt_dev *dev = pblk->dev;
116 struct nvm_geo *geo = &dev->geo;
117 struct pblk_rb_entry *entries;
118 unsigned long nr_entries;
119 unsigned int power_size, power_seg_sz;
120
121 nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
122
123 entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
124 if (!entries)
125 return -ENOMEM;
126
127 power_size = get_count_order(nr_entries);
128 power_seg_sz = get_count_order(geo->sec_size);
129
130 return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
131}
132
133/* Minimum pages needed within a lun */
134#define PAGE_POOL_SIZE 16
135#define ADDR_POOL_SIZE 64
136
137static int pblk_set_ppaf(struct pblk *pblk)
138{
139 struct nvm_tgt_dev *dev = pblk->dev;
140 struct nvm_geo *geo = &dev->geo;
141 struct nvm_addr_format ppaf = geo->ppaf;
142 int power_len;
143
144 /* Re-calculate channel and lun format to adapt to configuration */
145 power_len = get_count_order(geo->nr_chnls);
146 if (1 << power_len != geo->nr_chnls) {
147 pr_err("pblk: supports only power-of-two channel config.\n");
148 return -EINVAL;
149 }
150 ppaf.ch_len = power_len;
151
152 power_len = get_count_order(geo->luns_per_chnl);
153 if (1 << power_len != geo->luns_per_chnl) {
154 pr_err("pblk: supports only power-of-two LUN config.\n");
155 return -EINVAL;
156 }
157 ppaf.lun_len = power_len;
158
159 pblk->ppaf.sec_offset = 0;
160 pblk->ppaf.pln_offset = ppaf.sect_len;
161 pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
162 pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
163 pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
164 pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
165 pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
166 pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
167 pblk->ppaf.pln_offset;
168 pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
169 pblk->ppaf.ch_offset;
170 pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
171 pblk->ppaf.lun_offset;
172 pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
173 pblk->ppaf.pg_offset;
174 pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
175 pblk->ppaf.blk_offset;
176
177 pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;
178
179 return 0;
180}
181
182static int pblk_init_global_caches(struct pblk *pblk)
183{
184 char cache_name[PBLK_CACHE_NAME_LEN];
185
186 down_write(&pblk_lock);
187 pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws",
188 sizeof(struct pblk_line_ws), 0, 0, NULL);
189 if (!pblk_blk_ws_cache) {
190 up_write(&pblk_lock);
191 return -ENOMEM;
192 }
193
194 pblk_rec_cache = kmem_cache_create("pblk_rec",
195 sizeof(struct pblk_rec_ctx), 0, 0, NULL);
196 if (!pblk_rec_cache) {
197 kmem_cache_destroy(pblk_blk_ws_cache);
198 up_write(&pblk_lock);
199 return -ENOMEM;
200 }
201
202 pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
203 0, 0, NULL);
204 if (!pblk_r_rq_cache) {
205 kmem_cache_destroy(pblk_blk_ws_cache);
206 kmem_cache_destroy(pblk_rec_cache);
207 up_write(&pblk_lock);
208 return -ENOMEM;
209 }
210
211 pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
212 0, 0, NULL);
213 if (!pblk_w_rq_cache) {
214 kmem_cache_destroy(pblk_blk_ws_cache);
215 kmem_cache_destroy(pblk_rec_cache);
216 kmem_cache_destroy(pblk_r_rq_cache);
217 up_write(&pblk_lock);
218 return -ENOMEM;
219 }
220
221 snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s",
222 pblk->disk->disk_name);
223 pblk_line_meta_cache = kmem_cache_create(cache_name,
224 pblk->lm.sec_bitmap_len, 0, 0, NULL);
225 if (!pblk_line_meta_cache) {
226 kmem_cache_destroy(pblk_blk_ws_cache);
227 kmem_cache_destroy(pblk_rec_cache);
228 kmem_cache_destroy(pblk_r_rq_cache);
229 kmem_cache_destroy(pblk_w_rq_cache);
230 up_write(&pblk_lock);
231 return -ENOMEM;
232 }
233 up_write(&pblk_lock);
234
235 return 0;
236}
237
238static int pblk_core_init(struct pblk *pblk)
239{
240 struct nvm_tgt_dev *dev = pblk->dev;
241 struct nvm_geo *geo = &dev->geo;
242 int max_write_ppas;
243 int mod;
244
245 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
246 max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
247 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
248 max_write_ppas : nvm_max_phys_sects(dev);
249 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
250 geo->nr_planes * geo->nr_luns;
251
252 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
253 pr_err("pblk: cannot support device max_phys_sect\n");
254 return -EINVAL;
255 }
256
257 div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
258 if (mod) {
259 pr_err("pblk: bad configuration of sectors/pages\n");
260 return -EINVAL;
261 }
262
263 if (pblk_init_global_caches(pblk))
264 return -ENOMEM;
265
266 pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
267 if (!pblk->page_pool)
268 return -ENOMEM;
269
270 pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
271 pblk_blk_ws_cache);
272 if (!pblk->line_ws_pool)
273 goto free_page_pool;
274
275 pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache);
276 if (!pblk->rec_pool)
277 goto free_blk_ws_pool;
278
279 pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
280 if (!pblk->r_rq_pool)
281 goto free_rec_pool;
282
283 pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
284 if (!pblk->w_rq_pool)
285 goto free_r_rq_pool;
286
287 pblk->line_meta_pool =
288 mempool_create_slab_pool(16, pblk_line_meta_cache);
289 if (!pblk->line_meta_pool)
290 goto free_w_rq_pool;
291
292 pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
293 WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
294 if (!pblk->kw_wq)
295 goto free_line_meta_pool;
296
297 if (pblk_set_ppaf(pblk))
298 goto free_kw_wq;
299
300 if (pblk_rwb_init(pblk))
301 goto free_kw_wq;
302
303 INIT_LIST_HEAD(&pblk->compl_list);
304 return 0;
305
306free_kw_wq:
307 destroy_workqueue(pblk->kw_wq);
308free_line_meta_pool:
309 mempool_destroy(pblk->line_meta_pool);
310free_w_rq_pool:
311 mempool_destroy(pblk->w_rq_pool);
312free_r_rq_pool:
313 mempool_destroy(pblk->r_rq_pool);
314free_rec_pool:
315 mempool_destroy(pblk->rec_pool);
316free_blk_ws_pool:
317 mempool_destroy(pblk->line_ws_pool);
318free_page_pool:
319 mempool_destroy(pblk->page_pool);
320 return -ENOMEM;
321}
322
323static void pblk_core_free(struct pblk *pblk)
324{
325 if (pblk->kw_wq)
326 destroy_workqueue(pblk->kw_wq);
327
328 mempool_destroy(pblk->page_pool);
329 mempool_destroy(pblk->line_ws_pool);
330 mempool_destroy(pblk->rec_pool);
331 mempool_destroy(pblk->r_rq_pool);
332 mempool_destroy(pblk->w_rq_pool);
333 mempool_destroy(pblk->line_meta_pool);
334
335 kmem_cache_destroy(pblk_blk_ws_cache);
336 kmem_cache_destroy(pblk_rec_cache);
337 kmem_cache_destroy(pblk_r_rq_cache);
338 kmem_cache_destroy(pblk_w_rq_cache);
339 kmem_cache_destroy(pblk_line_meta_cache);
340}
341
342static void pblk_luns_free(struct pblk *pblk)
343{
344 kfree(pblk->luns);
345}
346
347static void pblk_lines_free(struct pblk *pblk)
348{
349 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
350 struct pblk_line *line;
351 int i;
352
353 spin_lock(&l_mg->free_lock);
354 for (i = 0; i < l_mg->nr_lines; i++) {
355 line = &pblk->lines[i];
356
357 pblk_line_free(pblk, line);
358 kfree(line->blk_bitmap);
359 kfree(line->erase_bitmap);
360 }
361 spin_unlock(&l_mg->free_lock);
362}
363
364static void pblk_line_meta_free(struct pblk *pblk)
365{
366 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
367 int i;
368
369 kfree(l_mg->bb_template);
370 kfree(l_mg->bb_aux);
371
372 for (i = 0; i < PBLK_DATA_LINES; i++) {
373 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
374 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
375 }
376
377 kfree(pblk->lines);
378}
379
380static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
381{
382 struct nvm_geo *geo = &dev->geo;
383 struct ppa_addr ppa;
384 u8 *blks;
385 int nr_blks, ret;
386
387 nr_blks = geo->blks_per_lun * geo->plane_mode;
388 blks = kmalloc(nr_blks, GFP_KERNEL);
389 if (!blks)
390 return -ENOMEM;
391
392 ppa.ppa = 0;
393 ppa.g.ch = rlun->bppa.g.ch;
394 ppa.g.lun = rlun->bppa.g.lun;
395
396 ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
397 if (ret)
398 goto out;
399
400 nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
401 if (nr_blks < 0) {
402 kfree(blks);
403 ret = nr_blks;
404 }
405
406 rlun->bb_list = blks;
407
408out:
409 return ret;
410}
411
412static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
413{
414 struct pblk_line_meta *lm = &pblk->lm;
415 struct pblk_lun *rlun;
416 int bb_cnt = 0;
417 int i;
418
419 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
420 if (!line->blk_bitmap)
421 return -ENOMEM;
422
423 line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
424 if (!line->erase_bitmap) {
425 kfree(line->blk_bitmap);
426 return -ENOMEM;
427 }
428
429 for (i = 0; i < lm->blk_per_line; i++) {
430 rlun = &pblk->luns[i];
431 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
432 continue;
433
434 set_bit(i, line->blk_bitmap);
435 bb_cnt++;
436 }
437
438 return bb_cnt;
439}
440
441static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
442{
443 struct nvm_tgt_dev *dev = pblk->dev;
444 struct nvm_geo *geo = &dev->geo;
445 struct pblk_lun *rlun;
446 int i, ret;
447
448 /* TODO: Implement unbalanced LUN support */
449 if (geo->luns_per_chnl < 0) {
450 pr_err("pblk: unbalanced LUN config.\n");
451 return -EINVAL;
452 }
453
454 pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL);
455 if (!pblk->luns)
456 return -ENOMEM;
457
458 for (i = 0; i < geo->nr_luns; i++) {
459 /* Stripe across channels */
460 int ch = i % geo->nr_chnls;
461 int lun_raw = i / geo->nr_chnls;
462 int lunid = lun_raw + ch * geo->luns_per_chnl;
463
464 rlun = &pblk->luns[i];
465 rlun->bppa = luns[lunid];
466
467 sema_init(&rlun->wr_sem, 1);
468
469 ret = pblk_bb_discovery(dev, rlun);
470 if (ret) {
471 while (--i >= 0)
472 kfree(pblk->luns[i].bb_list);
473 return ret;
474 }
475 }
476
477 return 0;
478}
479
480static int pblk_lines_configure(struct pblk *pblk, int flags)
481{
482 struct pblk_line *line = NULL;
483 int ret = 0;
484
485 if (!(flags & NVM_TARGET_FACTORY)) {
486 line = pblk_recov_l2p(pblk);
487 if (IS_ERR(line)) {
488 pr_err("pblk: could not recover l2p table\n");
489 ret = -EFAULT;
490 }
491 }
492
493 if (!line) {
494 /* Configure next line for user data */
495 line = pblk_line_get_first_data(pblk);
496 if (!line) {
497 pr_err("pblk: line list corrupted\n");
498 ret = -EFAULT;
499 }
500 }
501
502 return ret;
503}
504
505/* See comment over struct line_emeta definition */
506static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
507{
508 return (sizeof(struct line_emeta) +
509 ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
510 (pblk->l_mg.nr_lines * sizeof(u32)) +
511 lm->blk_bitmap_len);
512}
513
514static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
515{
516 struct nvm_tgt_dev *dev = pblk->dev;
517 struct nvm_geo *geo = &dev->geo;
518 sector_t provisioned;
519
520 pblk->over_pct = 20;
521
522 provisioned = nr_free_blks;
523 provisioned *= (100 - pblk->over_pct);
524 sector_div(provisioned, 100);
525
526 /* Internally pblk manages all free blocks, but all calculations based
527 * on user capacity consider only provisioned blocks
528 */
529 pblk->rl.total_blocks = nr_free_blks;
530 pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk;
531 pblk->capacity = provisioned * geo->sec_per_blk;
532 atomic_set(&pblk->rl.free_blocks, nr_free_blks);
533}
534
535static int pblk_lines_init(struct pblk *pblk)
536{
537 struct nvm_tgt_dev *dev = pblk->dev;
538 struct nvm_geo *geo = &dev->geo;
539 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
540 struct pblk_line_meta *lm = &pblk->lm;
541 struct pblk_line *line;
542 unsigned int smeta_len, emeta_len;
543 long nr_bad_blks, nr_meta_blks, nr_free_blks;
544 int bb_distance;
545 int i;
546 int ret = 0;
547
548 lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
549 lm->blk_per_line = geo->nr_luns;
550 lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
551 lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
552 lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
553 lm->high_thrs = lm->sec_per_line / 2;
554 lm->mid_thrs = lm->sec_per_line / 4;
555
556 /* Calculate necessary pages for smeta. See comment over struct
557 * line_smeta definition
558 */
559 lm->smeta_len = sizeof(struct line_smeta) +
560 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
561
562 i = 1;
563add_smeta_page:
564 lm->smeta_sec = i * geo->sec_per_pl;
565 lm->smeta_len = lm->smeta_sec * geo->sec_size;
566
567 smeta_len = sizeof(struct line_smeta) +
568 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
569 if (smeta_len > lm->smeta_len) {
570 i++;
571 goto add_smeta_page;
572 }
573
574 /* Calculate necessary pages for emeta. See comment over struct
575 * line_emeta definition
576 */
577 i = 1;
578add_emeta_page:
579 lm->emeta_sec = i * geo->sec_per_pl;
580 lm->emeta_len = lm->emeta_sec * geo->sec_size;
581
582 emeta_len = calc_emeta_len(pblk, lm);
583 if (emeta_len > lm->emeta_len) {
584 i++;
585 goto add_emeta_page;
586 }
587 lm->emeta_bb = geo->nr_luns - i;
588
589 nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
590 (geo->sec_per_blk / 2)) / geo->sec_per_blk;
591 lm->min_blk_line = nr_meta_blks + 1;
592
593 l_mg->nr_lines = geo->blks_per_lun;
594 l_mg->log_line = l_mg->data_line = NULL;
595 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
596 l_mg->nr_free_lines = 0;
597 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
598
599 /* smeta is always small enough to fit on a kmalloc memory allocation,
600 * emeta depends on the number of LUNs allocated to the pblk instance
601 */
602 l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
603 for (i = 0; i < PBLK_DATA_LINES; i++) {
604 l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
605 if (!l_mg->sline_meta[i].meta)
606 while (--i >= 0) {
607 kfree(l_mg->sline_meta[i].meta);
608 ret = -ENOMEM;
609 goto fail;
610 }
611 }
612
613 if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
614 l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
615
616 for (i = 0; i < PBLK_DATA_LINES; i++) {
617 l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
618 if (!l_mg->eline_meta[i].meta)
619 while (--i >= 0) {
620 vfree(l_mg->eline_meta[i].meta);
621 ret = -ENOMEM;
622 goto fail;
623 }
624 }
625 } else {
626 l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
627
628 for (i = 0; i < PBLK_DATA_LINES; i++) {
629 l_mg->eline_meta[i].meta =
630 kmalloc(lm->emeta_len, GFP_KERNEL);
631 if (!l_mg->eline_meta[i].meta)
632 while (--i >= 0) {
633 kfree(l_mg->eline_meta[i].meta);
634 ret = -ENOMEM;
635 goto fail;
636 }
637 }
638 }
639
640 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
641 if (!l_mg->bb_template)
642 goto fail_free_meta;
643
644 l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
645 if (!l_mg->bb_aux)
646 goto fail_free_bb_template;
647
648 bb_distance = (geo->nr_luns) * geo->sec_per_pl;
649 for (i = 0; i < lm->sec_per_line; i += bb_distance)
650 bitmap_set(l_mg->bb_template, i, geo->sec_per_pl);
651
652 INIT_LIST_HEAD(&l_mg->free_list);
653 INIT_LIST_HEAD(&l_mg->corrupt_list);
654 INIT_LIST_HEAD(&l_mg->bad_list);
655 INIT_LIST_HEAD(&l_mg->gc_full_list);
656 INIT_LIST_HEAD(&l_mg->gc_high_list);
657 INIT_LIST_HEAD(&l_mg->gc_mid_list);
658 INIT_LIST_HEAD(&l_mg->gc_low_list);
659 INIT_LIST_HEAD(&l_mg->gc_empty_list);
660
661 l_mg->gc_lists[0] = &l_mg->gc_high_list;
662 l_mg->gc_lists[1] = &l_mg->gc_mid_list;
663 l_mg->gc_lists[2] = &l_mg->gc_low_list;
664
665 spin_lock_init(&l_mg->free_lock);
666 spin_lock_init(&l_mg->gc_lock);
667
668 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
669 GFP_KERNEL);
670 if (!pblk->lines)
671 goto fail_free_bb_aux;
672
673 nr_free_blks = 0;
674 for (i = 0; i < l_mg->nr_lines; i++) {
675 line = &pblk->lines[i];
676
677 line->pblk = pblk;
678 line->id = i;
679 line->type = PBLK_LINETYPE_FREE;
680 line->state = PBLK_LINESTATE_FREE;
681 line->gc_group = PBLK_LINEGC_NONE;
682 spin_lock_init(&line->lock);
683
684 nr_bad_blks = pblk_bb_line(pblk, line);
685 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line)
686 goto fail_free_lines;
687
688 line->blk_in_line = lm->blk_per_line - nr_bad_blks;
689 if (line->blk_in_line < lm->min_blk_line) {
690 line->state = PBLK_LINESTATE_BAD;
691 list_add_tail(&line->list, &l_mg->bad_list);
692 continue;
693 }
694
695 nr_free_blks += line->blk_in_line;
696
697 l_mg->nr_free_lines++;
698 list_add_tail(&line->list, &l_mg->free_list);
699 }
700
701 pblk_set_provision(pblk, nr_free_blks);
702
703 sema_init(&pblk->erase_sem, 1);
704
705 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
706 for (i = 0; i < geo->nr_luns; i++)
707 kfree(pblk->luns[i].bb_list);
708
709 return 0;
710fail_free_lines:
711 kfree(pblk->lines);
712fail_free_bb_aux:
713 kfree(l_mg->bb_aux);
714fail_free_bb_template:
715 kfree(l_mg->bb_template);
716fail_free_meta:
717 for (i = 0; i < PBLK_DATA_LINES; i++) {
718 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
719 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
720 }
721fail:
722 for (i = 0; i < geo->nr_luns; i++)
723 kfree(pblk->luns[i].bb_list);
724
725 return ret;
726}
727
728static int pblk_writer_init(struct pblk *pblk)
729{
730 setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk);
731 mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
732
733 pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
734 if (IS_ERR(pblk->writer_ts)) {
735 pr_err("pblk: could not allocate writer kthread\n");
736 return 1;
737 }
738
739 return 0;
740}
741
742static void pblk_writer_stop(struct pblk *pblk)
743{
744 if (pblk->writer_ts)
745 kthread_stop(pblk->writer_ts);
746 del_timer(&pblk->wtimer);
747}
748
749static void pblk_free(struct pblk *pblk)
750{
751 pblk_luns_free(pblk);
752 pblk_lines_free(pblk);
753 pblk_line_meta_free(pblk);
754 pblk_core_free(pblk);
755 pblk_l2p_free(pblk);
756
757 kfree(pblk);
758}
759
760static void pblk_tear_down(struct pblk *pblk)
761{
762 pblk_flush_writer(pblk);
763 pblk_writer_stop(pblk);
764 pblk_rb_sync_l2p(&pblk->rwb);
765 pblk_recov_pad(pblk);
766 pblk_rwb_free(pblk);
767 pblk_rl_free(&pblk->rl);
768
769 pr_debug("pblk: consistent tear down\n");
770}
771
772static void pblk_exit(void *private)
773{
774 struct pblk *pblk = private;
775
776 down_write(&pblk_lock);
777 pblk_gc_exit(pblk);
778 pblk_tear_down(pblk);
779 pblk_free(pblk);
780 up_write(&pblk_lock);
781}
782
783static sector_t pblk_capacity(void *private)
784{
785 struct pblk *pblk = private;
786
787 return pblk->capacity * NR_PHY_IN_LOG;
788}
789
790static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
791 int flags)
792{
793 struct nvm_geo *geo = &dev->geo;
794 struct request_queue *bqueue = dev->q;
795 struct request_queue *tqueue = tdisk->queue;
796 struct pblk *pblk;
797 int ret;
798
799 if (dev->identity.dom & NVM_RSP_L2P) {
800 pr_err("pblk: device-side L2P table not supported. (%x)\n",
801 dev->identity.dom);
802 return ERR_PTR(-EINVAL);
803 }
804
805 pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
806 if (!pblk)
807 return ERR_PTR(-ENOMEM);
808
809 pblk->dev = dev;
810 pblk->disk = tdisk;
811
812 spin_lock_init(&pblk->trans_lock);
813 spin_lock_init(&pblk->lock);
814
815 if (flags & NVM_TARGET_FACTORY)
816 pblk_setup_uuid(pblk);
817
818#ifdef CONFIG_NVM_DEBUG
819 atomic_long_set(&pblk->inflight_writes, 0);
820 atomic_long_set(&pblk->padded_writes, 0);
821 atomic_long_set(&pblk->padded_wb, 0);
822 atomic_long_set(&pblk->nr_flush, 0);
823 atomic_long_set(&pblk->req_writes, 0);
824 atomic_long_set(&pblk->sub_writes, 0);
825 atomic_long_set(&pblk->sync_writes, 0);
826 atomic_long_set(&pblk->compl_writes, 0);
827 atomic_long_set(&pblk->inflight_reads, 0);
828 atomic_long_set(&pblk->sync_reads, 0);
829 atomic_long_set(&pblk->recov_writes, 0);
830 atomic_long_set(&pblk->recov_writes, 0);
831 atomic_long_set(&pblk->recov_gc_writes, 0);
832#endif
833
834 atomic_long_set(&pblk->read_failed, 0);
835 atomic_long_set(&pblk->read_empty, 0);
836 atomic_long_set(&pblk->read_high_ecc, 0);
837 atomic_long_set(&pblk->read_failed_gc, 0);
838 atomic_long_set(&pblk->write_failed, 0);
839 atomic_long_set(&pblk->erase_failed, 0);
840
841 ret = pblk_luns_init(pblk, dev->luns);
842 if (ret) {
843 pr_err("pblk: could not initialize luns\n");
844 goto fail;
845 }
846
847 ret = pblk_lines_init(pblk);
848 if (ret) {
849 pr_err("pblk: could not initialize lines\n");
850 goto fail_free_luns;
851 }
852
853 ret = pblk_core_init(pblk);
854 if (ret) {
855 pr_err("pblk: could not initialize core\n");
856 goto fail_free_line_meta;
857 }
858
859 ret = pblk_l2p_init(pblk);
860 if (ret) {
861 pr_err("pblk: could not initialize maps\n");
862 goto fail_free_core;
863 }
864
865 ret = pblk_lines_configure(pblk, flags);
866 if (ret) {
867 pr_err("pblk: could not configure lines\n");
868 goto fail_free_l2p;
869 }
870
871 ret = pblk_writer_init(pblk);
872 if (ret) {
873 pr_err("pblk: could not initialize write thread\n");
874 goto fail_free_lines;
875 }
876
877 ret = pblk_gc_init(pblk);
878 if (ret) {
879 pr_err("pblk: could not initialize gc\n");
880 goto fail_stop_writer;
881 }
882
883 /* inherit the size from the underlying device */
884 blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
885 blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
886
887 blk_queue_write_cache(tqueue, true, false);
888
889 tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size;
890 tqueue->limits.discard_alignment = 0;
891 blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
892 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
893
894 pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
895 geo->nr_luns, pblk->l_mg.nr_lines,
896 (unsigned long long)pblk->rl.nr_secs,
897 pblk->rwb.nr_entries);
898
899 wake_up_process(pblk->writer_ts);
900 return pblk;
901
902fail_stop_writer:
903 pblk_writer_stop(pblk);
904fail_free_lines:
905 pblk_lines_free(pblk);
906fail_free_l2p:
907 pblk_l2p_free(pblk);
908fail_free_core:
909 pblk_core_free(pblk);
910fail_free_line_meta:
911 pblk_line_meta_free(pblk);
912fail_free_luns:
913 pblk_luns_free(pblk);
914fail:
915 kfree(pblk);
916 return ERR_PTR(ret);
917}
918
919/* physical block device target */
920static struct nvm_tgt_type tt_pblk = {
921 .name = "pblk",
922 .version = {1, 0, 0},
923
924 .make_rq = pblk_make_rq,
925 .capacity = pblk_capacity,
926
927 .init = pblk_init,
928 .exit = pblk_exit,
929
930 .sysfs_init = pblk_sysfs_init,
931 .sysfs_exit = pblk_sysfs_exit,
932};
933
934static int __init pblk_module_init(void)
935{
936 return nvm_register_tgt_type(&tt_pblk);
937}
938
939static void pblk_module_exit(void)
940{
941 nvm_unregister_tgt_type(&tt_pblk);
942}
943
944module_init(pblk_module_init);
945module_exit(pblk_module_exit);
946MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
947MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
948MODULE_LICENSE("GPL v2");
949MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
new file mode 100644
index 000000000000..3f8bab4c4d5c
--- /dev/null
+++ b/drivers/lightnvm/pblk-map.c
@@ -0,0 +1,136 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-map.c - pblk's lba-ppa mapping strategy
16 *
17 */
18
19#include "pblk.h"
20
21static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
22 struct ppa_addr *ppa_list,
23 unsigned long *lun_bitmap,
24 struct pblk_sec_meta *meta_list,
25 unsigned int valid_secs)
26{
27 struct pblk_line *line = pblk_line_get_data(pblk);
28 struct line_emeta *emeta = line->emeta;
29 struct pblk_w_ctx *w_ctx;
30 __le64 *lba_list = pblk_line_emeta_to_lbas(emeta);
31 u64 paddr;
32 int nr_secs = pblk->min_write_pgs;
33 int i;
34
35 paddr = pblk_alloc_page(pblk, line, nr_secs);
36
37 for (i = 0; i < nr_secs; i++, paddr++) {
38 /* ppa to be sent to the device */
39 ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
40
41 /* Write context for target bio completion on write buffer. Note
42 * that the write buffer is protected by the sync backpointer,
43 * and a single writer thread have access to each specific entry
44 * at a time. Thus, it is safe to modify the context for the
45 * entry we are setting up for submission without taking any
46 * lock or memory barrier.
47 */
48 if (i < valid_secs) {
49 kref_get(&line->ref);
50 w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
51 w_ctx->ppa = ppa_list[i];
52 meta_list[i].lba = cpu_to_le64(w_ctx->lba);
53 lba_list[paddr] = cpu_to_le64(w_ctx->lba);
54 le64_add_cpu(&line->emeta->nr_valid_lbas, 1);
55 } else {
56 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
57 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
58 pblk_map_pad_invalidate(pblk, line, paddr);
59 }
60 }
61
62 if (pblk_line_is_full(line)) {
63 line = pblk_line_replace_data(pblk);
64 if (!line)
65 return;
66 }
67
68 pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
69}
70
71void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
72 unsigned long *lun_bitmap, unsigned int valid_secs,
73 unsigned int off)
74{
75 struct pblk_sec_meta *meta_list = rqd->meta_list;
76 unsigned int map_secs;
77 int min = pblk->min_write_pgs;
78 int i;
79
80 for (i = off; i < rqd->nr_ppas; i += min) {
81 map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
82 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
83 lun_bitmap, &meta_list[i], map_secs);
84 }
85}
86
87/* only if erase_ppa is set, acquire erase semaphore */
88void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
89 unsigned int sentry, unsigned long *lun_bitmap,
90 unsigned int valid_secs, struct ppa_addr *erase_ppa)
91{
92 struct nvm_tgt_dev *dev = pblk->dev;
93 struct nvm_geo *geo = &dev->geo;
94 struct pblk_line *e_line = pblk_line_get_data_next(pblk);
95 struct pblk_sec_meta *meta_list = rqd->meta_list;
96 unsigned int map_secs;
97 int min = pblk->min_write_pgs;
98 int i, erase_lun;
99
100 for (i = 0; i < rqd->nr_ppas; i += min) {
101 map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
102 pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
103 lun_bitmap, &meta_list[i], map_secs);
104
105 erase_lun = rqd->ppa_list[i].g.lun * geo->nr_chnls +
106 rqd->ppa_list[i].g.ch;
107
108 if (!test_bit(erase_lun, e_line->erase_bitmap)) {
109 if (down_trylock(&pblk->erase_sem))
110 continue;
111
112 set_bit(erase_lun, e_line->erase_bitmap);
113 e_line->left_eblks--;
114 *erase_ppa = rqd->ppa_list[i];
115 erase_ppa->g.blk = e_line->id;
116
117 /* Avoid evaluating e_line->left_eblks */
118 return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
119 valid_secs, i + min);
120 }
121 }
122
123 /* Erase blocks that are bad in this line but might not be in next */
124 if (unlikely(ppa_empty(*erase_ppa))) {
125 struct pblk_line_meta *lm = &pblk->lm;
126
127 i = find_first_zero_bit(e_line->erase_bitmap, lm->blk_per_line);
128 if (i == lm->blk_per_line)
129 return;
130
131 set_bit(i, e_line->erase_bitmap);
132 e_line->left_eblks--;
133 *erase_ppa = pblk->luns[i].bppa; /* set ch and lun */
134 erase_ppa->g.blk = e_line->id;
135 }
136}
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
new file mode 100644
index 000000000000..045384ddc1f9
--- /dev/null
+++ b/drivers/lightnvm/pblk-rb.c
@@ -0,0 +1,852 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 *
5 * Based upon the circular ringbuffer.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * pblk-rb.c - pblk's write buffer
17 */
18
19#include <linux/circ_buf.h>
20
21#include "pblk.h"
22
23static DECLARE_RWSEM(pblk_rb_lock);
24
25void pblk_rb_data_free(struct pblk_rb *rb)
26{
27 struct pblk_rb_pages *p, *t;
28
29 down_write(&pblk_rb_lock);
30 list_for_each_entry_safe(p, t, &rb->pages, list) {
31 free_pages((unsigned long)page_address(p->pages), p->order);
32 list_del(&p->list);
33 kfree(p);
34 }
35 up_write(&pblk_rb_lock);
36}
37
38/*
39 * Initialize ring buffer. The data and metadata buffers must be previously
40 * allocated and their size must be a power of two
41 * (Documentation/circular-buffers.txt)
42 */
43int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
44 unsigned int power_size, unsigned int power_seg_sz)
45{
46 struct pblk *pblk = container_of(rb, struct pblk, rwb);
47 unsigned int init_entry = 0;
48 unsigned int alloc_order = power_size;
49 unsigned int max_order = MAX_ORDER - 1;
50 unsigned int order, iter;
51
52 down_write(&pblk_rb_lock);
53 rb->entries = rb_entry_base;
54 rb->seg_size = (1 << power_seg_sz);
55 rb->nr_entries = (1 << power_size);
56 rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
57 rb->sync_point = EMPTY_ENTRY;
58
59 spin_lock_init(&rb->w_lock);
60 spin_lock_init(&rb->s_lock);
61
62 INIT_LIST_HEAD(&rb->pages);
63
64 if (alloc_order >= max_order) {
65 order = max_order;
66 iter = (1 << (alloc_order - max_order));
67 } else {
68 order = alloc_order;
69 iter = 1;
70 }
71
72 do {
73 struct pblk_rb_entry *entry;
74 struct pblk_rb_pages *page_set;
75 void *kaddr;
76 unsigned long set_size;
77 int i;
78
79 page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
80 if (!page_set) {
81 up_write(&pblk_rb_lock);
82 return -ENOMEM;
83 }
84
85 page_set->order = order;
86 page_set->pages = alloc_pages(GFP_KERNEL, order);
87 if (!page_set->pages) {
88 kfree(page_set);
89 pblk_rb_data_free(rb);
90 up_write(&pblk_rb_lock);
91 return -ENOMEM;
92 }
93 kaddr = page_address(page_set->pages);
94
95 entry = &rb->entries[init_entry];
96 entry->data = kaddr;
97 entry->cacheline = pblk_cacheline_to_addr(init_entry++);
98 entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
99
100 set_size = (1 << order);
101 for (i = 1; i < set_size; i++) {
102 entry = &rb->entries[init_entry];
103 entry->cacheline = pblk_cacheline_to_addr(init_entry++);
104 entry->data = kaddr + (i * rb->seg_size);
105 entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
106 bio_list_init(&entry->w_ctx.bios);
107 }
108
109 list_add_tail(&page_set->list, &rb->pages);
110 iter--;
111 } while (iter > 0);
112 up_write(&pblk_rb_lock);
113
114#ifdef CONFIG_NVM_DEBUG
115 atomic_set(&rb->inflight_sync_point, 0);
116#endif
117
118 /*
119 * Initialize rate-limiter, which controls access to the write buffer
120 * but user and GC I/O
121 */
122 pblk_rl_init(&pblk->rl, rb->nr_entries);
123
124 return 0;
125}
126
127/*
128 * pblk_rb_calculate_size -- calculate the size of the write buffer
129 */
130unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
131{
132 /* Alloc a write buffer that can at least fit 128 entries */
133 return (1 << max(get_count_order(nr_entries), 7));
134}
135
136void *pblk_rb_entries_ref(struct pblk_rb *rb)
137{
138 return rb->entries;
139}
140
141static void clean_wctx(struct pblk_w_ctx *w_ctx)
142{
143 int flags;
144
145try:
146 flags = READ_ONCE(w_ctx->flags);
147 if (!(flags & PBLK_SUBMITTED_ENTRY))
148 goto try;
149
150 /* Release flags on context. Protect from writes and reads */
151 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
152 pblk_ppa_set_empty(&w_ctx->ppa);
153}
154
155#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
156#define pblk_rb_ring_space(rb, head, tail, size) \
157 (CIRC_SPACE(head, tail, size))
158
159/*
160 * Buffer space is calculated with respect to the back pointer signaling
161 * synchronized entries to the media.
162 */
163static unsigned int pblk_rb_space(struct pblk_rb *rb)
164{
165 unsigned int mem = READ_ONCE(rb->mem);
166 unsigned int sync = READ_ONCE(rb->sync);
167
168 return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
169}
170
171/*
172 * Buffer count is calculated with respect to the submission entry signaling the
173 * entries that are available to send to the media
174 */
175unsigned int pblk_rb_read_count(struct pblk_rb *rb)
176{
177 unsigned int mem = READ_ONCE(rb->mem);
178 unsigned int subm = READ_ONCE(rb->subm);
179
180 return pblk_rb_ring_count(mem, subm, rb->nr_entries);
181}
182
183unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
184{
185 unsigned int subm;
186
187 subm = READ_ONCE(rb->subm);
188 /* Commit read means updating submission pointer */
189 smp_store_release(&rb->subm,
190 (subm + nr_entries) & (rb->nr_entries - 1));
191
192 return subm;
193}
194
195static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
196 unsigned int to_update)
197{
198 struct pblk *pblk = container_of(rb, struct pblk, rwb);
199 struct pblk_line *line;
200 struct pblk_rb_entry *entry;
201 struct pblk_w_ctx *w_ctx;
202 unsigned int i;
203
204 for (i = 0; i < to_update; i++) {
205 entry = &rb->entries[*l2p_upd];
206 w_ctx = &entry->w_ctx;
207
208 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
209 entry->cacheline);
210
211 line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)];
212 kref_put(&line->ref, pblk_line_put);
213 clean_wctx(w_ctx);
214 *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
215 }
216
217 return 0;
218}
219
220/*
221 * When we move the l2p_update pointer, we update the l2p table - lookups will
222 * point to the physical address instead of to the cacheline in the write buffer
223 * from this moment on.
224 */
225static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
226 unsigned int mem, unsigned int sync)
227{
228 unsigned int space, count;
229 int ret = 0;
230
231 lockdep_assert_held(&rb->w_lock);
232
233 /* Update l2p only as buffer entries are being overwritten */
234 space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
235 if (space > nr_entries)
236 goto out;
237
238 count = nr_entries - space;
239 /* l2p_update used exclusively under rb->w_lock */
240 ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count);
241
242out:
243 return ret;
244}
245
246/*
247 * Update the l2p entry for all sectors stored on the write buffer. This means
248 * that all future lookups to the l2p table will point to a device address, not
249 * to the cacheline in the write buffer.
250 */
251void pblk_rb_sync_l2p(struct pblk_rb *rb)
252{
253 unsigned int sync;
254 unsigned int to_update;
255
256 spin_lock(&rb->w_lock);
257
258 /* Protect from reads and writes */
259 sync = smp_load_acquire(&rb->sync);
260
261 to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
262 __pblk_rb_update_l2p(rb, &rb->l2p_update, to_update);
263
264 spin_unlock(&rb->w_lock);
265}
266
267/*
268 * Write @nr_entries to ring buffer from @data buffer if there is enough space.
269 * Typically, 4KB data chunks coming from a bio will be copied to the ring
270 * buffer, thus the write will fail if not all incoming data can be copied.
271 *
272 */
273static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
274 struct pblk_w_ctx w_ctx,
275 struct pblk_rb_entry *entry)
276{
277 memcpy(entry->data, data, rb->seg_size);
278
279 entry->w_ctx.lba = w_ctx.lba;
280 entry->w_ctx.ppa = w_ctx.ppa;
281}
282
283void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
284 struct pblk_w_ctx w_ctx, unsigned int ring_pos)
285{
286 struct pblk *pblk = container_of(rb, struct pblk, rwb);
287 struct pblk_rb_entry *entry;
288 int flags;
289
290 entry = &rb->entries[ring_pos];
291 flags = READ_ONCE(entry->w_ctx.flags);
292#ifdef CONFIG_NVM_DEBUG
293 /* Caller must guarantee that the entry is free */
294 BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
295#endif
296
297 __pblk_rb_write_entry(rb, data, w_ctx, entry);
298
299 pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
300 flags = w_ctx.flags | PBLK_WRITTEN_DATA;
301
302 /* Release flags on write context. Protect from writes */
303 smp_store_release(&entry->w_ctx.flags, flags);
304}
305
306void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
307 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
308 unsigned int ring_pos)
309{
310 struct pblk *pblk = container_of(rb, struct pblk, rwb);
311 struct pblk_rb_entry *entry;
312 int flags;
313
314 entry = &rb->entries[ring_pos];
315 flags = READ_ONCE(entry->w_ctx.flags);
316#ifdef CONFIG_NVM_DEBUG
317 /* Caller must guarantee that the entry is free */
318 BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
319#endif
320
321 __pblk_rb_write_entry(rb, data, w_ctx, entry);
322
323 if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line))
324 entry->w_ctx.lba = ADDR_EMPTY;
325
326 flags = w_ctx.flags | PBLK_WRITTEN_DATA;
327
328 /* Release flags on write context. Protect from writes */
329 smp_store_release(&entry->w_ctx.flags, flags);
330}
331
332static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio,
333 unsigned int pos)
334{
335 struct pblk_rb_entry *entry;
336 unsigned int subm, sync_point;
337 int flags;
338
339 subm = READ_ONCE(rb->subm);
340
341#ifdef CONFIG_NVM_DEBUG
342 atomic_inc(&rb->inflight_sync_point);
343#endif
344
345 if (pos == subm)
346 return 0;
347
348 sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
349 entry = &rb->entries[sync_point];
350
351 flags = READ_ONCE(entry->w_ctx.flags);
352 flags |= PBLK_FLUSH_ENTRY;
353
354 /* Release flags on context. Protect from writes */
355 smp_store_release(&entry->w_ctx.flags, flags);
356
357 /* Protect syncs */
358 smp_store_release(&rb->sync_point, sync_point);
359
360 spin_lock_irq(&rb->s_lock);
361 bio_list_add(&entry->w_ctx.bios, bio);
362 spin_unlock_irq(&rb->s_lock);
363
364 return 1;
365}
366
367static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
368 unsigned int *pos)
369{
370 unsigned int mem;
371 unsigned int sync;
372
373 sync = READ_ONCE(rb->sync);
374 mem = READ_ONCE(rb->mem);
375
376 if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < nr_entries)
377 return 0;
378
379 if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
380 return 0;
381
382 *pos = mem;
383
384 return 1;
385}
386
387static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
388 unsigned int *pos)
389{
390 if (!__pblk_rb_may_write(rb, nr_entries, pos))
391 return 0;
392
393 /* Protect from read count */
394 smp_store_release(&rb->mem, (*pos + nr_entries) & (rb->nr_entries - 1));
395 return 1;
396}
397
398static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
399 unsigned int *pos, struct bio *bio,
400 int *io_ret)
401{
402 unsigned int mem;
403
404 if (!__pblk_rb_may_write(rb, nr_entries, pos))
405 return 0;
406
407 mem = (*pos + nr_entries) & (rb->nr_entries - 1);
408 *io_ret = NVM_IO_DONE;
409
410 if (bio->bi_opf & REQ_PREFLUSH) {
411 struct pblk *pblk = container_of(rb, struct pblk, rwb);
412
413#ifdef CONFIG_NVM_DEBUG
414 atomic_long_inc(&pblk->nr_flush);
415#endif
416 if (pblk_rb_sync_point_set(&pblk->rwb, bio, mem))
417 *io_ret = NVM_IO_OK;
418 }
419
420 /* Protect from read count */
421 smp_store_release(&rb->mem, mem);
422 return 1;
423}
424
425/*
426 * Atomically check that (i) there is space on the write buffer for the
427 * incoming I/O, and (ii) the current I/O type has enough budget in the write
428 * buffer (rate-limiter).
429 */
430int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
431 unsigned int nr_entries, unsigned int *pos)
432{
433 struct pblk *pblk = container_of(rb, struct pblk, rwb);
434 int flush_done;
435
436 spin_lock(&rb->w_lock);
437 if (!pblk_rl_user_may_insert(&pblk->rl, nr_entries)) {
438 spin_unlock(&rb->w_lock);
439 return NVM_IO_REQUEUE;
440 }
441
442 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &flush_done)) {
443 spin_unlock(&rb->w_lock);
444 return NVM_IO_REQUEUE;
445 }
446
447 pblk_rl_user_in(&pblk->rl, nr_entries);
448 spin_unlock(&rb->w_lock);
449
450 return flush_done;
451}
452
453/*
454 * Look at pblk_rb_may_write_user comment
455 */
456int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
457 unsigned int *pos)
458{
459 struct pblk *pblk = container_of(rb, struct pblk, rwb);
460
461 spin_lock(&rb->w_lock);
462 if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
463 spin_unlock(&rb->w_lock);
464 return 0;
465 }
466
467 if (!pblk_rb_may_write(rb, nr_entries, pos)) {
468 spin_unlock(&rb->w_lock);
469 return 0;
470 }
471
472 pblk_rl_gc_in(&pblk->rl, nr_entries);
473 spin_unlock(&rb->w_lock);
474
475 return 1;
476}
477
478/*
479 * The caller of this function must ensure that the backpointer will not
480 * overwrite the entries passed on the list.
481 */
482unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
483 struct list_head *list,
484 unsigned int max)
485{
486 struct pblk_rb_entry *entry, *tentry;
487 struct page *page;
488 unsigned int read = 0;
489 int ret;
490
491 list_for_each_entry_safe(entry, tentry, list, index) {
492 if (read > max) {
493 pr_err("pblk: too many entries on list\n");
494 goto out;
495 }
496
497 page = virt_to_page(entry->data);
498 if (!page) {
499 pr_err("pblk: could not allocate write bio page\n");
500 goto out;
501 }
502
503 ret = bio_add_page(bio, page, rb->seg_size, 0);
504 if (ret != rb->seg_size) {
505 pr_err("pblk: could not add page to write bio\n");
506 goto out;
507 }
508
509 list_del(&entry->index);
510 read++;
511 }
512
513out:
514 return read;
515}
516
517/*
518 * Read available entries on rb and add them to the given bio. To avoid a memory
519 * copy, a page reference to the write buffer is used to be added to the bio.
520 *
521 * This function is used by the write thread to form the write bio that will
522 * persist data on the write buffer to the media.
523 */
524unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
525 struct pblk_c_ctx *c_ctx,
526 unsigned int pos,
527 unsigned int nr_entries,
528 unsigned int count)
529{
530 struct pblk *pblk = container_of(rb, struct pblk, rwb);
531 struct pblk_rb_entry *entry;
532 struct page *page;
533 unsigned int pad = 0, read = 0, to_read = nr_entries;
534 unsigned int user_io = 0, gc_io = 0;
535 unsigned int i;
536 int flags;
537 int ret;
538
539 if (count < nr_entries) {
540 pad = nr_entries - count;
541 to_read = count;
542 }
543
544 c_ctx->sentry = pos;
545 c_ctx->nr_valid = to_read;
546 c_ctx->nr_padded = pad;
547
548 for (i = 0; i < to_read; i++) {
549 entry = &rb->entries[pos];
550
551 /* A write has been allowed into the buffer, but data is still
552 * being copied to it. It is ok to busy wait.
553 */
554try:
555 flags = READ_ONCE(entry->w_ctx.flags);
556 if (!(flags & PBLK_WRITTEN_DATA))
557 goto try;
558
559 if (flags & PBLK_IOTYPE_USER)
560 user_io++;
561 else if (flags & PBLK_IOTYPE_GC)
562 gc_io++;
563 else
564 WARN(1, "pblk: unknown IO type\n");
565
566 page = virt_to_page(entry->data);
567 if (!page) {
568 pr_err("pblk: could not allocate write bio page\n");
569 flags &= ~PBLK_WRITTEN_DATA;
570 flags |= PBLK_SUBMITTED_ENTRY;
571 /* Release flags on context. Protect from writes */
572 smp_store_release(&entry->w_ctx.flags, flags);
573 goto out;
574 }
575
576 ret = bio_add_page(bio, page, rb->seg_size, 0);
577 if (ret != rb->seg_size) {
578 pr_err("pblk: could not add page to write bio\n");
579 flags &= ~PBLK_WRITTEN_DATA;
580 flags |= PBLK_SUBMITTED_ENTRY;
581 /* Release flags on context. Protect from writes */
582 smp_store_release(&entry->w_ctx.flags, flags);
583 goto out;
584 }
585
586 if (flags & PBLK_FLUSH_ENTRY) {
587 unsigned int sync_point;
588
589 sync_point = READ_ONCE(rb->sync_point);
590 if (sync_point == pos) {
591 /* Protect syncs */
592 smp_store_release(&rb->sync_point, EMPTY_ENTRY);
593 }
594
595 flags &= ~PBLK_FLUSH_ENTRY;
596#ifdef CONFIG_NVM_DEBUG
597 atomic_dec(&rb->inflight_sync_point);
598#endif
599 }
600
601 flags &= ~PBLK_WRITTEN_DATA;
602 flags |= PBLK_SUBMITTED_ENTRY;
603
604 /* Release flags on context. Protect from writes */
605 smp_store_release(&entry->w_ctx.flags, flags);
606
607 pos = (pos + 1) & (rb->nr_entries - 1);
608 }
609
610 read = to_read;
611 pblk_rl_out(&pblk->rl, user_io, gc_io);
612#ifdef CONFIG_NVM_DEBUG
613 atomic_long_add(pad, &((struct pblk *)
614 (container_of(rb, struct pblk, rwb)))->padded_writes);
615#endif
616out:
617 return read;
618}
619
620/*
621 * Copy to bio only if the lba matches the one on the given cache entry.
622 * Otherwise, it means that the entry has been overwritten, and the bio should
623 * be directed to disk.
624 */
625int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
626 u64 pos, int bio_iter)
627{
628 struct pblk_rb_entry *entry;
629 struct pblk_w_ctx *w_ctx;
630 void *data;
631 int flags;
632 int ret = 1;
633
634 spin_lock(&rb->w_lock);
635
636#ifdef CONFIG_NVM_DEBUG
637 /* Caller must ensure that the access will not cause an overflow */
638 BUG_ON(pos >= rb->nr_entries);
639#endif
640 entry = &rb->entries[pos];
641 w_ctx = &entry->w_ctx;
642 flags = READ_ONCE(w_ctx->flags);
643
644 /* Check if the entry has been overwritten or is scheduled to be */
645 if (w_ctx->lba != lba || flags & PBLK_WRITABLE_ENTRY) {
646 ret = 0;
647 goto out;
648 }
649
650 /* Only advance the bio if it hasn't been advanced already. If advanced,
651 * this bio is at least a partial bio (i.e., it has partially been
652 * filled with data from the cache). If part of the data resides on the
653 * media, we will read later on
654 */
655 if (unlikely(!bio->bi_iter.bi_idx))
656 bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
657
658 data = bio_data(bio);
659 memcpy(data, entry->data, rb->seg_size);
660
661out:
662 spin_unlock(&rb->w_lock);
663 return ret;
664}
665
666struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
667{
668 unsigned int entry = pos & (rb->nr_entries - 1);
669
670 return &rb->entries[entry].w_ctx;
671}
672
673unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
674 __acquires(&rb->s_lock)
675{
676 if (flags)
677 spin_lock_irqsave(&rb->s_lock, *flags);
678 else
679 spin_lock_irq(&rb->s_lock);
680
681 return rb->sync;
682}
683
684void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
685 __releases(&rb->s_lock)
686{
687 lockdep_assert_held(&rb->s_lock);
688
689 if (flags)
690 spin_unlock_irqrestore(&rb->s_lock, *flags);
691 else
692 spin_unlock_irq(&rb->s_lock);
693}
694
695unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
696{
697 unsigned int sync;
698 unsigned int i;
699
700 lockdep_assert_held(&rb->s_lock);
701
702 sync = READ_ONCE(rb->sync);
703
704 for (i = 0; i < nr_entries; i++)
705 sync = (sync + 1) & (rb->nr_entries - 1);
706
707 /* Protect from counts */
708 smp_store_release(&rb->sync, sync);
709
710 return sync;
711}
712
713unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb)
714{
715 unsigned int subm, sync_point;
716 unsigned int count;
717
718 /* Protect syncs */
719 sync_point = smp_load_acquire(&rb->sync_point);
720 if (sync_point == EMPTY_ENTRY)
721 return 0;
722
723 subm = READ_ONCE(rb->subm);
724
725 /* The sync point itself counts as a sector to sync */
726 count = pblk_rb_ring_count(sync_point, subm, rb->nr_entries) + 1;
727
728 return count;
729}
730
731/*
732 * Scan from the current position of the sync pointer to find the entry that
733 * corresponds to the given ppa. This is necessary since write requests can be
734 * completed out of order. The assumption is that the ppa is close to the sync
735 * pointer thus the search will not take long.
736 *
737 * The caller of this function must guarantee that the sync pointer will no
738 * reach the entry while it is using the metadata associated with it. With this
739 * assumption in mind, there is no need to take the sync lock.
740 */
741struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
742 struct ppa_addr *ppa)
743{
744 unsigned int sync, subm, count;
745 unsigned int i;
746
747 sync = READ_ONCE(rb->sync);
748 subm = READ_ONCE(rb->subm);
749 count = pblk_rb_ring_count(subm, sync, rb->nr_entries);
750
751 for (i = 0; i < count; i++)
752 sync = (sync + 1) & (rb->nr_entries - 1);
753
754 return NULL;
755}
756
757int pblk_rb_tear_down_check(struct pblk_rb *rb)
758{
759 struct pblk_rb_entry *entry;
760 int i;
761 int ret = 0;
762
763 spin_lock(&rb->w_lock);
764 spin_lock_irq(&rb->s_lock);
765
766 if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
767 (rb->sync == rb->l2p_update) &&
768 (rb->sync_point == EMPTY_ENTRY)) {
769 goto out;
770 }
771
772 if (!rb->entries) {
773 ret = 1;
774 goto out;
775 }
776
777 for (i = 0; i < rb->nr_entries; i++) {
778 entry = &rb->entries[i];
779
780 if (!entry->data) {
781 ret = 1;
782 goto out;
783 }
784 }
785
786out:
787 spin_unlock(&rb->w_lock);
788 spin_unlock_irq(&rb->s_lock);
789
790 return ret;
791}
792
793unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
794{
795 return (pos & (rb->nr_entries - 1));
796}
797
798int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
799{
800 return (pos >= rb->nr_entries);
801}
802
803ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
804{
805 struct pblk *pblk = container_of(rb, struct pblk, rwb);
806 struct pblk_c_ctx *c;
807 ssize_t offset;
808 int queued_entries = 0;
809
810 spin_lock_irq(&rb->s_lock);
811 list_for_each_entry(c, &pblk->compl_list, list)
812 queued_entries++;
813 spin_unlock_irq(&rb->s_lock);
814
815 if (rb->sync_point != EMPTY_ENTRY)
816 offset = scnprintf(buf, PAGE_SIZE,
817 "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
818 rb->nr_entries,
819 rb->mem,
820 rb->subm,
821 rb->sync,
822 rb->l2p_update,
823#ifdef CONFIG_NVM_DEBUG
824 atomic_read(&rb->inflight_sync_point),
825#else
826 0,
827#endif
828 rb->sync_point,
829 pblk_rb_read_count(rb),
830 pblk_rb_space(rb),
831 pblk_rb_sync_point_count(rb),
832 queued_entries);
833 else
834 offset = scnprintf(buf, PAGE_SIZE,
835 "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
836 rb->nr_entries,
837 rb->mem,
838 rb->subm,
839 rb->sync,
840 rb->l2p_update,
841#ifdef CONFIG_NVM_DEBUG
842 atomic_read(&rb->inflight_sync_point),
843#else
844 0,
845#endif
846 pblk_rb_read_count(rb),
847 pblk_rb_space(rb),
848 pblk_rb_sync_point_count(rb),
849 queued_entries);
850
851 return offset;
852}
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
new file mode 100644
index 000000000000..eff0982c076f
--- /dev/null
+++ b/drivers/lightnvm/pblk-read.c
@@ -0,0 +1,529 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-read.c - pblk's read path
16 */
17
18#include "pblk.h"
19
20/*
21 * There is no guarantee that the value read from cache has not been updated and
22 * resides at another location in the cache. We guarantee though that if the
23 * value is read from the cache, it belongs to the mapped lba. In order to
24 * guarantee and order between writes and reads are ordered, a flush must be
25 * issued.
26 */
27static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
28 sector_t lba, struct ppa_addr ppa,
29 int bio_iter)
30{
31#ifdef CONFIG_NVM_DEBUG
32 /* Callers must ensure that the ppa points to a cache address */
33 BUG_ON(pblk_ppa_empty(ppa));
34 BUG_ON(!pblk_addr_in_cache(ppa));
35#endif
36
37 return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba,
38 pblk_addr_to_cacheline(ppa), bio_iter);
39}
40
41static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
42 unsigned long *read_bitmap)
43{
44 struct bio *bio = rqd->bio;
45 struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
46 sector_t blba = pblk_get_lba(bio);
47 int nr_secs = rqd->nr_ppas;
48 int advanced_bio = 0;
49 int i, j = 0;
50
51 /* logic error: lba out-of-bounds. Ignore read request */
52 if (!(blba + nr_secs < pblk->rl.nr_secs)) {
53 WARN_ON("pblk: read lbas out of bounds\n");
54 return;
55 }
56
57 pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs);
58
59 for (i = 0; i < nr_secs; i++) {
60 struct ppa_addr p = ppas[i];
61 sector_t lba = blba + i;
62
63retry:
64 if (pblk_ppa_empty(p)) {
65 WARN_ON(test_and_set_bit(i, read_bitmap));
66 continue;
67 }
68
69 /* Try to read from write buffer. The address is later checked
70 * on the write buffer to prevent retrieving overwritten data.
71 */
72 if (pblk_addr_in_cache(p)) {
73 if (!pblk_read_from_cache(pblk, bio, lba, p, i)) {
74 pblk_lookup_l2p_seq(pblk, &p, lba, 1);
75 goto retry;
76 }
77 WARN_ON(test_and_set_bit(i, read_bitmap));
78 advanced_bio = 1;
79 } else {
80 /* Read from media non-cached sectors */
81 rqd->ppa_list[j++] = p;
82 }
83
84 if (advanced_bio)
85 bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
86 }
87
88#ifdef CONFIG_NVM_DEBUG
89 atomic_long_add(nr_secs, &pblk->inflight_reads);
90#endif
91}
92
93static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
94{
95 int err;
96
97 rqd->flags = pblk_set_read_mode(pblk);
98
99 err = pblk_submit_io(pblk, rqd);
100 if (err)
101 return NVM_IO_ERR;
102
103 return NVM_IO_OK;
104}
105
106static void pblk_end_io_read(struct nvm_rq *rqd)
107{
108 struct pblk *pblk = rqd->private;
109 struct nvm_tgt_dev *dev = pblk->dev;
110 struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
111 struct bio *bio = rqd->bio;
112
113 if (rqd->error)
114 pblk_log_read_err(pblk, rqd);
115#ifdef CONFIG_NVM_DEBUG
116 else
117 WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n");
118#endif
119
120 if (rqd->nr_ppas > 1)
121 nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
122
123 bio_put(bio);
124 if (r_ctx->orig_bio) {
125#ifdef CONFIG_NVM_DEBUG
126 WARN_ONCE(r_ctx->orig_bio->bi_error,
127 "pblk: corrupted read bio\n");
128#endif
129 bio_endio(r_ctx->orig_bio);
130 bio_put(r_ctx->orig_bio);
131 }
132
133#ifdef CONFIG_NVM_DEBUG
134 atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
135 atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
136#endif
137
138 pblk_free_rqd(pblk, rqd, READ);
139}
140
141static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
142 unsigned int bio_init_idx,
143 unsigned long *read_bitmap)
144{
145 struct bio *new_bio, *bio = rqd->bio;
146 struct bio_vec src_bv, dst_bv;
147 void *ppa_ptr = NULL;
148 void *src_p, *dst_p;
149 dma_addr_t dma_ppa_list = 0;
150 int nr_secs = rqd->nr_ppas;
151 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
152 int i, ret, hole;
153 DECLARE_COMPLETION_ONSTACK(wait);
154
155 new_bio = bio_alloc(GFP_KERNEL, nr_holes);
156 if (!new_bio) {
157 pr_err("pblk: could not alloc read bio\n");
158 return NVM_IO_ERR;
159 }
160
161 if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
162 goto err;
163
164 if (nr_holes != new_bio->bi_vcnt) {
165 pr_err("pblk: malformed bio\n");
166 goto err;
167 }
168
169 new_bio->bi_iter.bi_sector = 0; /* internal bio */
170 bio_set_op_attrs(new_bio, REQ_OP_READ, 0);
171 new_bio->bi_private = &wait;
172 new_bio->bi_end_io = pblk_end_bio_sync;
173
174 rqd->bio = new_bio;
175 rqd->nr_ppas = nr_holes;
176 rqd->end_io = NULL;
177
178 if (unlikely(nr_secs > 1 && nr_holes == 1)) {
179 ppa_ptr = rqd->ppa_list;
180 dma_ppa_list = rqd->dma_ppa_list;
181 rqd->ppa_addr = rqd->ppa_list[0];
182 }
183
184 ret = pblk_submit_read_io(pblk, rqd);
185 if (ret) {
186 bio_put(rqd->bio);
187 pr_err("pblk: read IO submission failed\n");
188 goto err;
189 }
190
191 if (!wait_for_completion_io_timeout(&wait,
192 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
193 pr_err("pblk: partial read I/O timed out\n");
194 }
195
196 if (rqd->error) {
197 atomic_long_inc(&pblk->read_failed);
198#ifdef CONFIG_NVM_DEBUG
199 pblk_print_failed_rqd(pblk, rqd, rqd->error);
200#endif
201 }
202
203 if (unlikely(nr_secs > 1 && nr_holes == 1)) {
204 rqd->ppa_list = ppa_ptr;
205 rqd->dma_ppa_list = dma_ppa_list;
206 }
207
208 /* Fill the holes in the original bio */
209 i = 0;
210 hole = find_first_zero_bit(read_bitmap, nr_secs);
211 do {
212 src_bv = new_bio->bi_io_vec[i++];
213 dst_bv = bio->bi_io_vec[bio_init_idx + hole];
214
215 src_p = kmap_atomic(src_bv.bv_page);
216 dst_p = kmap_atomic(dst_bv.bv_page);
217
218 memcpy(dst_p + dst_bv.bv_offset,
219 src_p + src_bv.bv_offset,
220 PBLK_EXPOSED_PAGE_SIZE);
221
222 kunmap_atomic(src_p);
223 kunmap_atomic(dst_p);
224
225 mempool_free(src_bv.bv_page, pblk->page_pool);
226
227 hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
228 } while (hole < nr_secs);
229
230 bio_put(new_bio);
231
232 /* Complete the original bio and associated request */
233 rqd->bio = bio;
234 rqd->nr_ppas = nr_secs;
235 rqd->private = pblk;
236
237 bio_endio(bio);
238 pblk_end_io_read(rqd);
239 return NVM_IO_OK;
240
241err:
242 /* Free allocated pages in new bio */
243 pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
244 rqd->private = pblk;
245 pblk_end_io_read(rqd);
246 return NVM_IO_ERR;
247}
248
249static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
250 unsigned long *read_bitmap)
251{
252 struct bio *bio = rqd->bio;
253 struct ppa_addr ppa;
254 sector_t lba = pblk_get_lba(bio);
255
256 /* logic error: lba out-of-bounds. Ignore read request */
257 if (!(lba < pblk->rl.nr_secs)) {
258 WARN_ON("pblk: read lba out of bounds\n");
259 return;
260 }
261
262 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
263
264#ifdef CONFIG_NVM_DEBUG
265 atomic_long_inc(&pblk->inflight_reads);
266#endif
267
268retry:
269 if (pblk_ppa_empty(ppa)) {
270 WARN_ON(test_and_set_bit(0, read_bitmap));
271 return;
272 }
273
274 /* Try to read from write buffer. The address is later checked on the
275 * write buffer to prevent retrieving overwritten data.
276 */
277 if (pblk_addr_in_cache(ppa)) {
278 if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) {
279 pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
280 goto retry;
281 }
282 WARN_ON(test_and_set_bit(0, read_bitmap));
283 } else {
284 rqd->ppa_addr = ppa;
285 }
286}
287
288int pblk_submit_read(struct pblk *pblk, struct bio *bio)
289{
290 struct nvm_tgt_dev *dev = pblk->dev;
291 int nr_secs = pblk_get_secs(bio);
292 struct nvm_rq *rqd;
293 unsigned long read_bitmap; /* Max 64 ppas per request */
294 unsigned int bio_init_idx;
295 int ret = NVM_IO_ERR;
296
297 if (nr_secs > PBLK_MAX_REQ_ADDRS)
298 return NVM_IO_ERR;
299
300 bitmap_zero(&read_bitmap, nr_secs);
301
302 rqd = pblk_alloc_rqd(pblk, READ);
303 if (IS_ERR(rqd)) {
304 pr_err_ratelimited("pblk: not able to alloc rqd");
305 return NVM_IO_ERR;
306 }
307
308 rqd->opcode = NVM_OP_PREAD;
309 rqd->bio = bio;
310 rqd->nr_ppas = nr_secs;
311 rqd->private = pblk;
312 rqd->end_io = pblk_end_io_read;
313
314 /* Save the index for this bio's start. This is needed in case
315 * we need to fill a partial read.
316 */
317 bio_init_idx = pblk_get_bi_idx(bio);
318
319 if (nr_secs > 1) {
320 rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
321 &rqd->dma_ppa_list);
322 if (!rqd->ppa_list) {
323 pr_err("pblk: not able to allocate ppa list\n");
324 goto fail_rqd_free;
325 }
326
327 pblk_read_ppalist_rq(pblk, rqd, &read_bitmap);
328 } else {
329 pblk_read_rq(pblk, rqd, &read_bitmap);
330 }
331
332 bio_get(bio);
333 if (bitmap_full(&read_bitmap, nr_secs)) {
334 bio_endio(bio);
335 pblk_end_io_read(rqd);
336 return NVM_IO_OK;
337 }
338
339 /* All sectors are to be read from the device */
340 if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) {
341 struct bio *int_bio = NULL;
342 struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd);
343
344 /* Clone read bio to deal with read errors internally */
345 int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set);
346 if (!int_bio) {
347 pr_err("pblk: could not clone read bio\n");
348 return NVM_IO_ERR;
349 }
350
351 rqd->bio = int_bio;
352 r_ctx->orig_bio = bio;
353
354 ret = pblk_submit_read_io(pblk, rqd);
355 if (ret) {
356 pr_err("pblk: read IO submission failed\n");
357 if (int_bio)
358 bio_put(int_bio);
359 return ret;
360 }
361
362 return NVM_IO_OK;
363 }
364
365 /* The read bio request could be partially filled by the write buffer,
366 * but there are some holes that need to be read from the drive.
367 */
368 ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
369 if (ret) {
370 pr_err("pblk: failed to perform partial read\n");
371 return ret;
372 }
373
374 return NVM_IO_OK;
375
376fail_rqd_free:
377 pblk_free_rqd(pblk, rqd, READ);
378 return ret;
379}
380
381static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
382 struct pblk_line *line, u64 *lba_list,
383 unsigned int nr_secs)
384{
385 struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
386 int valid_secs = 0;
387 int i;
388
389 pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs);
390
391 for (i = 0; i < nr_secs; i++) {
392 if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id ||
393 pblk_ppa_empty(ppas[i])) {
394 lba_list[i] = ADDR_EMPTY;
395 continue;
396 }
397
398 rqd->ppa_list[valid_secs++] = ppas[i];
399 }
400
401#ifdef CONFIG_NVM_DEBUG
402 atomic_long_add(valid_secs, &pblk->inflight_reads);
403#endif
404 return valid_secs;
405}
406
407static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
408 struct pblk_line *line, sector_t lba)
409{
410 struct ppa_addr ppa;
411 int valid_secs = 0;
412
413 /* logic error: lba out-of-bounds */
414 if (!(lba < pblk->rl.nr_secs)) {
415 WARN_ON("pblk: read lba out of bounds\n");
416 goto out;
417 }
418
419 if (lba == ADDR_EMPTY)
420 goto out;
421
422 spin_lock(&pblk->trans_lock);
423 ppa = pblk_trans_map_get(pblk, lba);
424 spin_unlock(&pblk->trans_lock);
425
426 /* Ignore updated values until the moment */
427 if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id ||
428 pblk_ppa_empty(ppa))
429 goto out;
430
431 rqd->ppa_addr = ppa;
432 valid_secs = 1;
433
434#ifdef CONFIG_NVM_DEBUG
435 atomic_long_inc(&pblk->inflight_reads);
436#endif
437
438out:
439 return valid_secs;
440}
441
442int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
443 unsigned int nr_secs, unsigned int *secs_to_gc,
444 struct pblk_line *line)
445{
446 struct nvm_tgt_dev *dev = pblk->dev;
447 struct nvm_geo *geo = &dev->geo;
448 struct request_queue *q = dev->q;
449 struct bio *bio;
450 struct nvm_rq rqd;
451 int ret, data_len;
452 DECLARE_COMPLETION_ONSTACK(wait);
453
454 memset(&rqd, 0, sizeof(struct nvm_rq));
455
456 if (nr_secs > 1) {
457 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
458 &rqd.dma_ppa_list);
459 if (!rqd.ppa_list)
460 return NVM_IO_ERR;
461
462 *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list,
463 nr_secs);
464 if (*secs_to_gc == 1) {
465 struct ppa_addr ppa;
466
467 ppa = rqd.ppa_list[0];
468 nvm_dev_dma_free(dev->parent, rqd.ppa_list,
469 rqd.dma_ppa_list);
470 rqd.ppa_addr = ppa;
471 }
472 } else {
473 *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]);
474 }
475
476 if (!(*secs_to_gc))
477 goto out;
478
479 data_len = (*secs_to_gc) * geo->sec_size;
480 bio = bio_map_kern(q, data, data_len, GFP_KERNEL);
481 if (IS_ERR(bio)) {
482 pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio));
483 goto err_free_dma;
484 }
485
486 bio->bi_iter.bi_sector = 0; /* internal bio */
487 bio_set_op_attrs(bio, REQ_OP_READ, 0);
488
489 rqd.opcode = NVM_OP_PREAD;
490 rqd.end_io = pblk_end_io_sync;
491 rqd.private = &wait;
492 rqd.nr_ppas = *secs_to_gc;
493 rqd.bio = bio;
494
495 ret = pblk_submit_read_io(pblk, &rqd);
496 if (ret) {
497 bio_endio(bio);
498 pr_err("pblk: GC read request failed\n");
499 goto err_free_dma;
500 }
501
502 if (!wait_for_completion_io_timeout(&wait,
503 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
504 pr_err("pblk: GC read I/O timed out\n");
505 }
506
507 if (rqd.error) {
508 atomic_long_inc(&pblk->read_failed_gc);
509#ifdef CONFIG_NVM_DEBUG
510 pblk_print_failed_rqd(pblk, &rqd, rqd.error);
511#endif
512 }
513
514#ifdef CONFIG_NVM_DEBUG
515 atomic_long_add(*secs_to_gc, &pblk->sync_reads);
516 atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads);
517 atomic_long_sub(*secs_to_gc, &pblk->inflight_reads);
518#endif
519
520out:
521 if (rqd.nr_ppas > 1)
522 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
523 return NVM_IO_OK;
524
525err_free_dma:
526 if (rqd.nr_ppas > 1)
527 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
528 return NVM_IO_ERR;
529}
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
new file mode 100644
index 000000000000..0d50f415cfde
--- /dev/null
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -0,0 +1,998 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial: Javier Gonzalez <javier@cnexlabs.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version
7 * 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * pblk-recovery.c - pblk's recovery path
15 */
16
17#include "pblk.h"
18
19void pblk_submit_rec(struct work_struct *work)
20{
21 struct pblk_rec_ctx *recovery =
22 container_of(work, struct pblk_rec_ctx, ws_rec);
23 struct pblk *pblk = recovery->pblk;
24 struct nvm_tgt_dev *dev = pblk->dev;
25 struct nvm_rq *rqd = recovery->rqd;
26 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
27 int max_secs = nvm_max_phys_sects(dev);
28 struct bio *bio;
29 unsigned int nr_rec_secs;
30 unsigned int pgs_read;
31 int ret;
32
33 nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
34 max_secs);
35
36 bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
37 if (!bio) {
38 pr_err("pblk: not able to create recovery bio\n");
39 return;
40 }
41
42 bio->bi_iter.bi_sector = 0;
43 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
44 rqd->bio = bio;
45 rqd->nr_ppas = nr_rec_secs;
46
47 pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
48 nr_rec_secs);
49 if (pgs_read != nr_rec_secs) {
50 pr_err("pblk: could not read recovery entries\n");
51 goto err;
52 }
53
54 if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
55 pr_err("pblk: could not setup recovery request\n");
56 goto err;
57 }
58
59#ifdef CONFIG_NVM_DEBUG
60 atomic_long_add(nr_rec_secs, &pblk->recov_writes);
61#endif
62
63 ret = pblk_submit_io(pblk, rqd);
64 if (ret) {
65 pr_err("pblk: I/O submission failed: %d\n", ret);
66 goto err;
67 }
68
69 mempool_free(recovery, pblk->rec_pool);
70 return;
71
72err:
73 bio_put(bio);
74 pblk_free_rqd(pblk, rqd, WRITE);
75}
76
77int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
78 struct pblk_rec_ctx *recovery, u64 *comp_bits,
79 unsigned int comp)
80{
81 struct nvm_tgt_dev *dev = pblk->dev;
82 int max_secs = nvm_max_phys_sects(dev);
83 struct nvm_rq *rec_rqd;
84 struct pblk_c_ctx *rec_ctx;
85 int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
86
87 rec_rqd = pblk_alloc_rqd(pblk, WRITE);
88 if (IS_ERR(rec_rqd)) {
89 pr_err("pblk: could not create recovery req.\n");
90 return -ENOMEM;
91 }
92
93 rec_ctx = nvm_rq_to_pdu(rec_rqd);
94
95 /* Copy completion bitmap, but exclude the first X completed entries */
96 bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
97 (unsigned long int *)comp_bits,
98 comp, max_secs);
99
100 /* Save the context for the entries that need to be re-written and
101 * update current context with the completed entries.
102 */
103 rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
104 if (comp >= c_ctx->nr_valid) {
105 rec_ctx->nr_valid = 0;
106 rec_ctx->nr_padded = nr_entries - comp;
107
108 c_ctx->nr_padded = comp - c_ctx->nr_valid;
109 } else {
110 rec_ctx->nr_valid = c_ctx->nr_valid - comp;
111 rec_ctx->nr_padded = c_ctx->nr_padded;
112
113 c_ctx->nr_valid = comp;
114 c_ctx->nr_padded = 0;
115 }
116
117 recovery->rqd = rec_rqd;
118 recovery->pblk = pblk;
119
120 return 0;
121}
122
123__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta)
124{
125 u32 crc;
126
127 crc = pblk_calc_emeta_crc(pblk, emeta);
128 if (le32_to_cpu(emeta->crc) != crc)
129 return NULL;
130
131 if (le32_to_cpu(emeta->header.identifier) != PBLK_MAGIC)
132 return NULL;
133
134 return pblk_line_emeta_to_lbas(emeta);
135}
136
137static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line)
138{
139 struct nvm_tgt_dev *dev = pblk->dev;
140 struct nvm_geo *geo = &dev->geo;
141 struct pblk_line_meta *lm = &pblk->lm;
142 struct line_emeta *emeta = line->emeta;
143 __le64 *lba_list;
144 int data_start;
145 int nr_data_lbas, nr_valid_lbas, nr_lbas = 0;
146 int i;
147
148 lba_list = pblk_recov_get_lba_list(pblk, emeta);
149 if (!lba_list)
150 return 1;
151
152 data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec;
153 nr_data_lbas = lm->sec_per_line - lm->emeta_sec;
154 nr_valid_lbas = le64_to_cpu(emeta->nr_valid_lbas);
155
156 for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) {
157 struct ppa_addr ppa;
158 int pos;
159
160 ppa = addr_to_pblk_ppa(pblk, i, line->id);
161 pos = pblk_ppa_to_pos(geo, ppa);
162
163 /* Do not update bad blocks */
164 if (test_bit(pos, line->blk_bitmap))
165 continue;
166
167 if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) {
168 spin_lock(&line->lock);
169 if (test_and_set_bit(i, line->invalid_bitmap))
170 WARN_ON_ONCE("pblk: rec. double invalidate:\n");
171 else
172 line->vsc--;
173 spin_unlock(&line->lock);
174
175 continue;
176 }
177
178 pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa);
179 nr_lbas++;
180 }
181
182 if (nr_valid_lbas != nr_lbas)
183 pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n",
184 line->id, line->emeta->nr_valid_lbas, nr_lbas);
185
186 line->left_msecs = 0;
187
188 return 0;
189}
190
191static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line)
192{
193 struct nvm_tgt_dev *dev = pblk->dev;
194 struct nvm_geo *geo = &dev->geo;
195 struct pblk_line_meta *lm = &pblk->lm;
196 int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
197
198 return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec -
199 nr_bb * geo->sec_per_blk;
200}
201
202struct pblk_recov_alloc {
203 struct ppa_addr *ppa_list;
204 struct pblk_sec_meta *meta_list;
205 struct nvm_rq *rqd;
206 void *data;
207 dma_addr_t dma_ppa_list;
208 dma_addr_t dma_meta_list;
209};
210
211static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line,
212 struct pblk_recov_alloc p, u64 r_ptr)
213{
214 struct nvm_tgt_dev *dev = pblk->dev;
215 struct nvm_geo *geo = &dev->geo;
216 struct ppa_addr *ppa_list;
217 struct pblk_sec_meta *meta_list;
218 struct nvm_rq *rqd;
219 struct bio *bio;
220 void *data;
221 dma_addr_t dma_ppa_list, dma_meta_list;
222 u64 r_ptr_int;
223 int left_ppas;
224 int rq_ppas, rq_len;
225 int i, j;
226 int ret = 0;
227 DECLARE_COMPLETION_ONSTACK(wait);
228
229 ppa_list = p.ppa_list;
230 meta_list = p.meta_list;
231 rqd = p.rqd;
232 data = p.data;
233 dma_ppa_list = p.dma_ppa_list;
234 dma_meta_list = p.dma_meta_list;
235
236 left_ppas = line->cur_sec - r_ptr;
237 if (!left_ppas)
238 return 0;
239
240 r_ptr_int = r_ptr;
241
242next_read_rq:
243 memset(rqd, 0, pblk_r_rq_size);
244
245 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
246 if (!rq_ppas)
247 rq_ppas = pblk->min_write_pgs;
248 rq_len = rq_ppas * geo->sec_size;
249
250 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
251 if (IS_ERR(bio))
252 return PTR_ERR(bio);
253
254 bio->bi_iter.bi_sector = 0; /* internal bio */
255 bio_set_op_attrs(bio, REQ_OP_READ, 0);
256
257 rqd->bio = bio;
258 rqd->opcode = NVM_OP_PREAD;
259 rqd->flags = pblk_set_read_mode(pblk);
260 rqd->meta_list = meta_list;
261 rqd->nr_ppas = rq_ppas;
262 rqd->ppa_list = ppa_list;
263 rqd->dma_ppa_list = dma_ppa_list;
264 rqd->dma_meta_list = dma_meta_list;
265 rqd->end_io = pblk_end_io_sync;
266 rqd->private = &wait;
267
268 for (i = 0; i < rqd->nr_ppas; ) {
269 struct ppa_addr ppa;
270 int pos;
271
272 ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
273 pos = pblk_dev_ppa_to_pos(geo, ppa);
274
275 while (test_bit(pos, line->blk_bitmap)) {
276 r_ptr_int += pblk->min_write_pgs;
277 ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id);
278 pos = pblk_dev_ppa_to_pos(geo, ppa);
279 }
280
281 for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
282 rqd->ppa_list[i] =
283 addr_to_gen_ppa(pblk, r_ptr_int, line->id);
284 }
285
286 /* If read fails, more padding is needed */
287 ret = pblk_submit_io(pblk, rqd);
288 if (ret) {
289 pr_err("pblk: I/O submission failed: %d\n", ret);
290 return ret;
291 }
292
293 if (!wait_for_completion_io_timeout(&wait,
294 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
295 pr_err("pblk: L2P recovery read timed out\n");
296 return -EINTR;
297 }
298
299 reinit_completion(&wait);
300
301 /* At this point, the read should not fail. If it does, it is a problem
302 * we cannot recover from here. Need FTL log.
303 */
304 if (rqd->error) {
305 pr_err("pblk: L2P recovery failed (%d)\n", rqd->error);
306 return -EINTR;
307 }
308
309 for (i = 0; i < rqd->nr_ppas; i++) {
310 u64 lba = le64_to_cpu(meta_list[i].lba);
311
312 if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
313 continue;
314
315 pblk_update_map(pblk, lba, rqd->ppa_list[i]);
316 }
317
318 left_ppas -= rq_ppas;
319 if (left_ppas > 0)
320 goto next_read_rq;
321
322 return 0;
323}
324
325static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line,
326 struct pblk_recov_alloc p, int left_ppas)
327{
328 struct nvm_tgt_dev *dev = pblk->dev;
329 struct nvm_geo *geo = &dev->geo;
330 struct ppa_addr *ppa_list;
331 struct pblk_sec_meta *meta_list;
332 struct nvm_rq *rqd;
333 struct bio *bio;
334 void *data;
335 dma_addr_t dma_ppa_list, dma_meta_list;
336 __le64 *lba_list = pblk_line_emeta_to_lbas(line->emeta);
337 u64 w_ptr = line->cur_sec;
338 int left_line_ppas = line->left_msecs;
339 int rq_ppas, rq_len;
340 int i, j;
341 int ret = 0;
342 DECLARE_COMPLETION_ONSTACK(wait);
343
344 ppa_list = p.ppa_list;
345 meta_list = p.meta_list;
346 rqd = p.rqd;
347 data = p.data;
348 dma_ppa_list = p.dma_ppa_list;
349 dma_meta_list = p.dma_meta_list;
350
351next_pad_rq:
352 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
353 if (!rq_ppas)
354 rq_ppas = pblk->min_write_pgs;
355 rq_len = rq_ppas * geo->sec_size;
356
357 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
358 if (IS_ERR(bio))
359 return PTR_ERR(bio);
360
361 bio->bi_iter.bi_sector = 0; /* internal bio */
362 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
363
364 memset(rqd, 0, pblk_r_rq_size);
365
366 rqd->bio = bio;
367 rqd->opcode = NVM_OP_PWRITE;
368 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
369 rqd->meta_list = meta_list;
370 rqd->nr_ppas = rq_ppas;
371 rqd->ppa_list = ppa_list;
372 rqd->dma_ppa_list = dma_ppa_list;
373 rqd->dma_meta_list = dma_meta_list;
374 rqd->end_io = pblk_end_io_sync;
375 rqd->private = &wait;
376
377 for (i = 0; i < rqd->nr_ppas; ) {
378 struct ppa_addr ppa;
379 int pos;
380
381 w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
382 ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
383 pos = pblk_ppa_to_pos(geo, ppa);
384
385 while (test_bit(pos, line->blk_bitmap)) {
386 w_ptr += pblk->min_write_pgs;
387 ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id);
388 pos = pblk_ppa_to_pos(geo, ppa);
389 }
390
391 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) {
392 struct ppa_addr dev_ppa;
393
394 dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
395
396 pblk_map_invalidate(pblk, dev_ppa);
397 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
398 lba_list[w_ptr] = cpu_to_le64(ADDR_EMPTY);
399 rqd->ppa_list[i] = dev_ppa;
400 }
401 }
402
403 ret = pblk_submit_io(pblk, rqd);
404 if (ret) {
405 pr_err("pblk: I/O submission failed: %d\n", ret);
406 return ret;
407 }
408
409 if (!wait_for_completion_io_timeout(&wait,
410 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
411 pr_err("pblk: L2P recovery write timed out\n");
412 }
413 reinit_completion(&wait);
414
415 left_line_ppas -= rq_ppas;
416 left_ppas -= rq_ppas;
417 if (left_ppas > 0 && left_line_ppas)
418 goto next_pad_rq;
419
420 return 0;
421}
422
423/* When this function is called, it means that not all upper pages have been
424 * written in a page that contains valid data. In order to recover this data, we
425 * first find the write pointer on the device, then we pad all necessary
426 * sectors, and finally attempt to read the valid data
427 */
428static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line,
429 struct pblk_recov_alloc p)
430{
431 struct nvm_tgt_dev *dev = pblk->dev;
432 struct nvm_geo *geo = &dev->geo;
433 struct ppa_addr *ppa_list;
434 struct pblk_sec_meta *meta_list;
435 struct nvm_rq *rqd;
436 struct bio *bio;
437 void *data;
438 dma_addr_t dma_ppa_list, dma_meta_list;
439 u64 w_ptr = 0, r_ptr;
440 int rq_ppas, rq_len;
441 int i, j;
442 int ret = 0;
443 int rec_round;
444 int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
445 DECLARE_COMPLETION_ONSTACK(wait);
446
447 ppa_list = p.ppa_list;
448 meta_list = p.meta_list;
449 rqd = p.rqd;
450 data = p.data;
451 dma_ppa_list = p.dma_ppa_list;
452 dma_meta_list = p.dma_meta_list;
453
454 /* we could recover up until the line write pointer */
455 r_ptr = line->cur_sec;
456 rec_round = 0;
457
458next_rq:
459 memset(rqd, 0, pblk_r_rq_size);
460
461 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
462 if (!rq_ppas)
463 rq_ppas = pblk->min_write_pgs;
464 rq_len = rq_ppas * geo->sec_size;
465
466 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
467 if (IS_ERR(bio))
468 return PTR_ERR(bio);
469
470 bio->bi_iter.bi_sector = 0; /* internal bio */
471 bio_set_op_attrs(bio, REQ_OP_READ, 0);
472
473 rqd->bio = bio;
474 rqd->opcode = NVM_OP_PREAD;
475 rqd->flags = pblk_set_read_mode(pblk);
476 rqd->meta_list = meta_list;
477 rqd->nr_ppas = rq_ppas;
478 rqd->ppa_list = ppa_list;
479 rqd->dma_ppa_list = dma_ppa_list;
480 rqd->dma_meta_list = dma_meta_list;
481 rqd->end_io = pblk_end_io_sync;
482 rqd->private = &wait;
483
484 for (i = 0; i < rqd->nr_ppas; ) {
485 struct ppa_addr ppa;
486 int pos;
487
488 w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
489 ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
490 pos = pblk_dev_ppa_to_pos(geo, ppa);
491
492 while (test_bit(pos, line->blk_bitmap)) {
493 w_ptr += pblk->min_write_pgs;
494 ppa = addr_to_gen_ppa(pblk, w_ptr, line->id);
495 pos = pblk_dev_ppa_to_pos(geo, ppa);
496 }
497
498 for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++)
499 rqd->ppa_list[i] =
500 addr_to_gen_ppa(pblk, w_ptr, line->id);
501 }
502
503 ret = pblk_submit_io(pblk, rqd);
504 if (ret) {
505 pr_err("pblk: I/O submission failed: %d\n", ret);
506 return ret;
507 }
508
509 if (!wait_for_completion_io_timeout(&wait,
510 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
511 pr_err("pblk: L2P recovery read timed out\n");
512 }
513 reinit_completion(&wait);
514
515 /* This should not happen since the read failed during normal recovery,
516 * but the media works funny sometimes...
517 */
518 if (!rec_round++ && !rqd->error) {
519 rec_round = 0;
520 for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) {
521 u64 lba = le64_to_cpu(meta_list[i].lba);
522
523 if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
524 continue;
525
526 pblk_update_map(pblk, lba, rqd->ppa_list[i]);
527 }
528 }
529
530 /* Reached the end of the written line */
531 if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
532 int pad_secs, nr_error_bits, bit;
533 int ret;
534
535 bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
536 nr_error_bits = rqd->nr_ppas - bit;
537
538 /* Roll back failed sectors */
539 line->cur_sec -= nr_error_bits;
540 line->left_msecs += nr_error_bits;
541 bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
542
543 pad_secs = pblk_pad_distance(pblk);
544 if (pad_secs > line->left_msecs)
545 pad_secs = line->left_msecs;
546
547 ret = pblk_recov_pad_oob(pblk, line, p, pad_secs);
548 if (ret)
549 pr_err("pblk: OOB padding failed (err:%d)\n", ret);
550
551 ret = pblk_recov_read_oob(pblk, line, p, r_ptr);
552 if (ret)
553 pr_err("pblk: OOB read failed (err:%d)\n", ret);
554
555 line->left_ssecs = line->left_msecs;
556 left_ppas = 0;
557 }
558
559 left_ppas -= rq_ppas;
560 if (left_ppas > 0)
561 goto next_rq;
562
563 return ret;
564}
565
566static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
567 struct pblk_recov_alloc p, int *done)
568{
569 struct nvm_tgt_dev *dev = pblk->dev;
570 struct nvm_geo *geo = &dev->geo;
571 struct ppa_addr *ppa_list;
572 struct pblk_sec_meta *meta_list;
573 struct nvm_rq *rqd;
574 struct bio *bio;
575 void *data;
576 dma_addr_t dma_ppa_list, dma_meta_list;
577 u64 paddr;
578 int rq_ppas, rq_len;
579 int i, j;
580 int ret = 0;
581 int left_ppas = pblk_calc_sec_in_line(pblk, line);
582 DECLARE_COMPLETION_ONSTACK(wait);
583
584 ppa_list = p.ppa_list;
585 meta_list = p.meta_list;
586 rqd = p.rqd;
587 data = p.data;
588 dma_ppa_list = p.dma_ppa_list;
589 dma_meta_list = p.dma_meta_list;
590
591 *done = 1;
592
593next_rq:
594 memset(rqd, 0, pblk_r_rq_size);
595
596 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
597 if (!rq_ppas)
598 rq_ppas = pblk->min_write_pgs;
599 rq_len = rq_ppas * geo->sec_size;
600
601 bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
602 if (IS_ERR(bio))
603 return PTR_ERR(bio);
604
605 bio->bi_iter.bi_sector = 0; /* internal bio */
606 bio_set_op_attrs(bio, REQ_OP_READ, 0);
607
608 rqd->bio = bio;
609 rqd->opcode = NVM_OP_PREAD;
610 rqd->flags = pblk_set_read_mode(pblk);
611 rqd->meta_list = meta_list;
612 rqd->nr_ppas = rq_ppas;
613 rqd->ppa_list = ppa_list;
614 rqd->dma_ppa_list = dma_ppa_list;
615 rqd->dma_meta_list = dma_meta_list;
616 rqd->end_io = pblk_end_io_sync;
617 rqd->private = &wait;
618
619 for (i = 0; i < rqd->nr_ppas; ) {
620 struct ppa_addr ppa;
621 int pos;
622
623 paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs);
624 ppa = addr_to_gen_ppa(pblk, paddr, line->id);
625 pos = pblk_dev_ppa_to_pos(geo, ppa);
626
627 while (test_bit(pos, line->blk_bitmap)) {
628 paddr += pblk->min_write_pgs;
629 ppa = addr_to_gen_ppa(pblk, paddr, line->id);
630 pos = pblk_dev_ppa_to_pos(geo, ppa);
631 }
632
633 for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++)
634 rqd->ppa_list[i] =
635 addr_to_gen_ppa(pblk, paddr, line->id);
636 }
637
638 ret = pblk_submit_io(pblk, rqd);
639 if (ret) {
640 pr_err("pblk: I/O submission failed: %d\n", ret);
641 bio_put(bio);
642 return ret;
643 }
644
645 if (!wait_for_completion_io_timeout(&wait,
646 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
647 pr_err("pblk: L2P recovery read timed out\n");
648 }
649 reinit_completion(&wait);
650
651 /* Reached the end of the written line */
652 if (rqd->error) {
653 int nr_error_bits, bit;
654
655 bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas);
656 nr_error_bits = rqd->nr_ppas - bit;
657
658 /* Roll back failed sectors */
659 line->cur_sec -= nr_error_bits;
660 line->left_msecs += nr_error_bits;
661 line->left_ssecs = line->left_msecs;
662 bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits);
663
664 left_ppas = 0;
665 rqd->nr_ppas = bit;
666
667 if (rqd->error != NVM_RSP_ERR_EMPTYPAGE)
668 *done = 0;
669 }
670
671 for (i = 0; i < rqd->nr_ppas; i++) {
672 u64 lba = le64_to_cpu(meta_list[i].lba);
673
674 if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
675 continue;
676
677 pblk_update_map(pblk, lba, rqd->ppa_list[i]);
678 }
679
680 left_ppas -= rq_ppas;
681 if (left_ppas > 0)
682 goto next_rq;
683
684 return ret;
685}
686
687/* Scan line for lbas on out of bound area */
688static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line)
689{
690 struct nvm_tgt_dev *dev = pblk->dev;
691 struct nvm_geo *geo = &dev->geo;
692 struct nvm_rq *rqd;
693 struct ppa_addr *ppa_list;
694 struct pblk_sec_meta *meta_list;
695 struct pblk_recov_alloc p;
696 void *data;
697 dma_addr_t dma_ppa_list, dma_meta_list;
698 int done, ret = 0;
699
700 rqd = pblk_alloc_rqd(pblk, READ);
701 if (IS_ERR(rqd))
702 return PTR_ERR(rqd);
703
704 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
705 if (!meta_list) {
706 ret = -ENOMEM;
707 goto free_rqd;
708 }
709
710 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
711 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
712
713 data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
714 if (!data) {
715 ret = -ENOMEM;
716 goto free_meta_list;
717 }
718
719 p.ppa_list = ppa_list;
720 p.meta_list = meta_list;
721 p.rqd = rqd;
722 p.data = data;
723 p.dma_ppa_list = dma_ppa_list;
724 p.dma_meta_list = dma_meta_list;
725
726 ret = pblk_recov_scan_oob(pblk, line, p, &done);
727 if (ret) {
728 pr_err("pblk: could not recover L2P from OOB\n");
729 goto out;
730 }
731
732 if (!done) {
733 ret = pblk_recov_scan_all_oob(pblk, line, p);
734 if (ret) {
735 pr_err("pblk: could not recover L2P from OOB\n");
736 goto out;
737 }
738 }
739
740 if (pblk_line_is_full(line))
741 pblk_line_recov_close(pblk, line);
742
743out:
744 kfree(data);
745free_meta_list:
746 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
747free_rqd:
748 pblk_free_rqd(pblk, rqd, READ);
749
750 return ret;
751}
752
753/* Insert lines ordered by sequence number (seq_num) on list */
754static void pblk_recov_line_add_ordered(struct list_head *head,
755 struct pblk_line *line)
756{
757 struct pblk_line *t = NULL;
758
759 list_for_each_entry(t, head, list)
760 if (t->seq_nr > line->seq_nr)
761 break;
762
763 __list_add(&line->list, t->list.prev, &t->list);
764}
765
766struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
767{
768 struct nvm_tgt_dev *dev = pblk->dev;
769 struct nvm_geo *geo = &dev->geo;
770 struct pblk_line_meta *lm = &pblk->lm;
771 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
772 struct pblk_line *line, *tline, *data_line = NULL;
773 struct line_smeta *smeta;
774 struct line_emeta *emeta;
775 int found_lines = 0, recovered_lines = 0, open_lines = 0;
776 int is_next = 0;
777 int meta_line;
778 int i, valid_uuid = 0;
779 LIST_HEAD(recov_list);
780
781 /* TODO: Implement FTL snapshot */
782
783 /* Scan recovery - takes place when FTL snapshot fails */
784 spin_lock(&l_mg->free_lock);
785 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
786 set_bit(meta_line, &l_mg->meta_bitmap);
787 smeta = l_mg->sline_meta[meta_line].meta;
788 emeta = l_mg->eline_meta[meta_line].meta;
789 spin_unlock(&l_mg->free_lock);
790
791 /* Order data lines using their sequence number */
792 for (i = 0; i < l_mg->nr_lines; i++) {
793 u32 crc;
794
795 line = &pblk->lines[i];
796
797 memset(smeta, 0, lm->smeta_len);
798 line->smeta = smeta;
799 line->lun_bitmap = ((void *)(smeta)) +
800 sizeof(struct line_smeta);
801
802 /* Lines that cannot be read are assumed as not written here */
803 if (pblk_line_read_smeta(pblk, line))
804 continue;
805
806 crc = pblk_calc_smeta_crc(pblk, smeta);
807 if (le32_to_cpu(smeta->crc) != crc)
808 continue;
809
810 if (le32_to_cpu(smeta->header.identifier) != PBLK_MAGIC)
811 continue;
812
813 if (le16_to_cpu(smeta->header.version) != 1) {
814 pr_err("pblk: found incompatible line version %u\n",
815 smeta->header.version);
816 return ERR_PTR(-EINVAL);
817 }
818
819 /* The first valid instance uuid is used for initialization */
820 if (!valid_uuid) {
821 memcpy(pblk->instance_uuid, smeta->header.uuid, 16);
822 valid_uuid = 1;
823 }
824
825 if (memcmp(pblk->instance_uuid, smeta->header.uuid, 16)) {
826 pr_debug("pblk: ignore line %u due to uuid mismatch\n",
827 i);
828 continue;
829 }
830
831 /* Update line metadata */
832 spin_lock(&line->lock);
833 line->id = le32_to_cpu(line->smeta->header.id);
834 line->type = le16_to_cpu(line->smeta->header.type);
835 line->seq_nr = le64_to_cpu(line->smeta->seq_nr);
836 spin_unlock(&line->lock);
837
838 /* Update general metadata */
839 spin_lock(&l_mg->free_lock);
840 if (line->seq_nr >= l_mg->d_seq_nr)
841 l_mg->d_seq_nr = line->seq_nr + 1;
842 l_mg->nr_free_lines--;
843 spin_unlock(&l_mg->free_lock);
844
845 if (pblk_line_recov_alloc(pblk, line))
846 goto out;
847
848 pblk_recov_line_add_ordered(&recov_list, line);
849 found_lines++;
850 pr_debug("pblk: recovering data line %d, seq:%llu\n",
851 line->id, smeta->seq_nr);
852 }
853
854 if (!found_lines) {
855 pblk_setup_uuid(pblk);
856
857 spin_lock(&l_mg->free_lock);
858 WARN_ON_ONCE(!test_and_clear_bit(meta_line,
859 &l_mg->meta_bitmap));
860 spin_unlock(&l_mg->free_lock);
861
862 goto out;
863 }
864
865 /* Verify closed blocks and recover this portion of L2P table*/
866 list_for_each_entry_safe(line, tline, &recov_list, list) {
867 int off, nr_bb;
868
869 recovered_lines++;
870 /* Calculate where emeta starts based on the line bb */
871 off = lm->sec_per_line - lm->emeta_sec;
872 nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line);
873 off -= nr_bb * geo->sec_per_pl;
874
875 memset(emeta, 0, lm->emeta_len);
876 line->emeta = emeta;
877 line->emeta_ssec = off;
878
879 if (pblk_line_read_emeta(pblk, line)) {
880 pblk_recov_l2p_from_oob(pblk, line);
881 goto next;
882 }
883
884 if (pblk_recov_l2p_from_emeta(pblk, line))
885 pblk_recov_l2p_from_oob(pblk, line);
886
887next:
888 if (pblk_line_is_full(line)) {
889 struct list_head *move_list;
890
891 spin_lock(&line->lock);
892 line->state = PBLK_LINESTATE_CLOSED;
893 move_list = pblk_line_gc_list(pblk, line);
894 spin_unlock(&line->lock);
895
896 spin_lock(&l_mg->gc_lock);
897 list_move_tail(&line->list, move_list);
898 spin_unlock(&l_mg->gc_lock);
899
900 mempool_free(line->map_bitmap, pblk->line_meta_pool);
901 line->map_bitmap = NULL;
902 line->smeta = NULL;
903 line->emeta = NULL;
904 } else {
905 if (open_lines > 1)
906 pr_err("pblk: failed to recover L2P\n");
907
908 open_lines++;
909 line->meta_line = meta_line;
910 data_line = line;
911 }
912 }
913
914 spin_lock(&l_mg->free_lock);
915 if (!open_lines) {
916 WARN_ON_ONCE(!test_and_clear_bit(meta_line,
917 &l_mg->meta_bitmap));
918 pblk_line_replace_data(pblk);
919 } else {
920 /* Allocate next line for preparation */
921 l_mg->data_next = pblk_line_get(pblk);
922 if (l_mg->data_next) {
923 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
924 l_mg->data_next->type = PBLK_LINETYPE_DATA;
925 is_next = 1;
926 }
927 }
928 spin_unlock(&l_mg->free_lock);
929
930 if (is_next) {
931 pblk_line_erase(pblk, l_mg->data_next);
932 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
933 }
934
935out:
936 if (found_lines != recovered_lines)
937 pr_err("pblk: failed to recover all found lines %d/%d\n",
938 found_lines, recovered_lines);
939
940 return data_line;
941}
942
943/*
944 * Pad until smeta can be read on current data line
945 */
946void pblk_recov_pad(struct pblk *pblk)
947{
948 struct nvm_tgt_dev *dev = pblk->dev;
949 struct nvm_geo *geo = &dev->geo;
950 struct pblk_line *line;
951 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
952 struct nvm_rq *rqd;
953 struct pblk_recov_alloc p;
954 struct ppa_addr *ppa_list;
955 struct pblk_sec_meta *meta_list;
956 void *data;
957 dma_addr_t dma_ppa_list, dma_meta_list;
958
959 spin_lock(&l_mg->free_lock);
960 line = l_mg->data_line;
961 spin_unlock(&l_mg->free_lock);
962
963 rqd = pblk_alloc_rqd(pblk, READ);
964 if (IS_ERR(rqd))
965 return;
966
967 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list);
968 if (!meta_list)
969 goto free_rqd;
970
971 ppa_list = (void *)(meta_list) + pblk_dma_meta_size;
972 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
973
974 data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL);
975 if (!data)
976 goto free_meta_list;
977
978 p.ppa_list = ppa_list;
979 p.meta_list = meta_list;
980 p.rqd = rqd;
981 p.data = data;
982 p.dma_ppa_list = dma_ppa_list;
983 p.dma_meta_list = dma_meta_list;
984
985 if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) {
986 pr_err("pblk: Tear down padding failed\n");
987 goto free_data;
988 }
989
990 pblk_line_close(pblk, line);
991
992free_data:
993 kfree(data);
994free_meta_list:
995 nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list);
996free_rqd:
997 pblk_free_rqd(pblk, rqd, READ);
998}
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
new file mode 100644
index 000000000000..4042162ec9bc
--- /dev/null
+++ b/drivers/lightnvm/pblk-rl.c
@@ -0,0 +1,182 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-rl.c - pblk's rate limiter for user I/O
16 *
17 */
18
19#include "pblk.h"
20
21static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
22{
23 mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
24}
25
26int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
27{
28 int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
29
30 return (!(rb_user_cnt + nr_entries > rl->rb_user_max));
31}
32
33int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
34{
35 int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
36 int rb_user_active;
37
38 /* If there is no user I/O let GC take over space on the write buffer */
39 rb_user_active = READ_ONCE(rl->rb_user_active);
40 return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active));
41}
42
43void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
44{
45 atomic_add(nr_entries, &rl->rb_user_cnt);
46
47 /* Release user I/O state. Protect from GC */
48 smp_store_release(&rl->rb_user_active, 1);
49 pblk_rl_kick_u_timer(rl);
50}
51
52void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
53{
54 atomic_add(nr_entries, &rl->rb_gc_cnt);
55}
56
57void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
58{
59 atomic_sub(nr_user, &rl->rb_user_cnt);
60 atomic_sub(nr_gc, &rl->rb_gc_cnt);
61}
62
63unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
64{
65 return atomic_read(&rl->free_blocks);
66}
67
68/*
69 * We check for (i) the number of free blocks in the current LUN and (ii) the
70 * total number of free blocks in the pblk instance. This is to even out the
71 * number of free blocks on each LUN when GC kicks in.
72 *
73 * Only the total number of free blocks is used to configure the rate limiter.
74 */
75static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
76{
77 unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
78
79 if (free_blocks >= rl->high) {
80 rl->rb_user_max = max - rl->rb_gc_rsv;
81 rl->rb_gc_max = rl->rb_gc_rsv;
82 rl->rb_state = PBLK_RL_HIGH;
83 } else if (free_blocks < rl->high) {
84 int shift = rl->high_pw - rl->rb_windows_pw;
85 int user_windows = free_blocks >> shift;
86 int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
87 int gc_max;
88
89 rl->rb_user_max = user_max;
90 gc_max = max - rl->rb_user_max;
91 rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
92
93 if (free_blocks > rl->low)
94 rl->rb_state = PBLK_RL_MID;
95 else
96 rl->rb_state = PBLK_RL_LOW;
97 }
98
99 return rl->rb_state;
100}
101
102void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
103{
104 rl->rb_gc_rsv = rl->rb_gc_max = rsv;
105}
106
107void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
108{
109 struct pblk *pblk = container_of(rl, struct pblk, rl);
110 int ret;
111
112 atomic_add(line->blk_in_line, &rl->free_blocks);
113 /* Rates will not change that often - no need to lock update */
114 ret = pblk_rl_update_rates(rl, rl->rb_budget);
115
116 if (ret == (PBLK_RL_MID | PBLK_RL_LOW))
117 pblk_gc_should_start(pblk);
118 else
119 pblk_gc_should_stop(pblk);
120}
121
122void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
123{
124 struct pblk *pblk = container_of(rl, struct pblk, rl);
125 int ret;
126
127 atomic_sub(line->blk_in_line, &rl->free_blocks);
128
129 /* Rates will not change that often - no need to lock update */
130 ret = pblk_rl_update_rates(rl, rl->rb_budget);
131 if (ret == (PBLK_RL_MID | PBLK_RL_LOW))
132 pblk_gc_should_start(pblk);
133 else
134 pblk_gc_should_stop(pblk);
135}
136
137int pblk_rl_gc_thrs(struct pblk_rl *rl)
138{
139 return rl->high;
140}
141
142int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
143{
144 return rl->rb_user_max;
145}
146
147static void pblk_rl_u_timer(unsigned long data)
148{
149 struct pblk_rl *rl = (struct pblk_rl *)data;
150
151 /* Release user I/O state. Protect from GC */
152 smp_store_release(&rl->rb_user_active, 0);
153}
154
155void pblk_rl_free(struct pblk_rl *rl)
156{
157 del_timer(&rl->u_timer);
158}
159
160void pblk_rl_init(struct pblk_rl *rl, int budget)
161{
162 unsigned int rb_windows;
163
164 rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
165 rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
166 rl->high_pw = get_count_order(rl->high);
167
168 /* This will always be a power-of-2 */
169 rb_windows = budget / PBLK_MAX_REQ_ADDRS;
170 rl->rb_windows_pw = get_count_order(rb_windows) + 1;
171
172 /* To start with, all buffer is available to user I/O writers */
173 rl->rb_budget = budget;
174 rl->rb_user_max = budget;
175 atomic_set(&rl->rb_user_cnt, 0);
176 rl->rb_gc_max = 0;
177 rl->rb_state = PBLK_RL_HIGH;
178 atomic_set(&rl->rb_gc_cnt, 0);
179
180 setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
181 rl->rb_user_active = 0;
182}
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
new file mode 100644
index 000000000000..f0af1d1ceeff
--- /dev/null
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -0,0 +1,507 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Implementation of a physical block-device target for Open-channel SSDs.
16 *
17 * pblk-sysfs.c - pblk's sysfs
18 *
19 */
20
21#include "pblk.h"
22
23static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
24{
25 struct nvm_tgt_dev *dev = pblk->dev;
26 struct nvm_geo *geo = &dev->geo;
27 struct pblk_lun *rlun;
28 ssize_t sz = 0;
29 int i;
30
31 for (i = 0; i < geo->nr_luns; i++) {
32 int active = 1;
33
34 rlun = &pblk->luns[i];
35 if (!down_trylock(&rlun->wr_sem)) {
36 active = 0;
37 up(&rlun->wr_sem);
38 }
39 sz += snprintf(page + sz, PAGE_SIZE - sz,
40 "pblk: pos:%d, ch:%d, lun:%d - %d\n",
41 i,
42 rlun->bppa.g.ch,
43 rlun->bppa.g.lun,
44 active);
45 }
46
47 return sz;
48}
49
50static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
51{
52 struct nvm_tgt_dev *dev = pblk->dev;
53 struct nvm_geo *geo = &dev->geo;
54 int free_blocks, total_blocks;
55 int rb_user_max, rb_user_cnt;
56 int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state;
57
58 free_blocks = atomic_read(&pblk->rl.free_blocks);
59 rb_user_max = pblk->rl.rb_user_max;
60 rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
61 rb_gc_max = pblk->rl.rb_gc_max;
62 rb_gc_rsv = pblk->rl.rb_gc_rsv;
63 rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
64 rb_budget = pblk->rl.rb_budget;
65 rb_state = pblk->rl.rb_state;
66
67 total_blocks = geo->blks_per_lun * geo->nr_luns;
68
69 return snprintf(page, PAGE_SIZE,
70 "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
71 rb_user_cnt,
72 rb_user_max,
73 rb_gc_cnt,
74 rb_gc_max,
75 rb_gc_rsv,
76 rb_state,
77 rb_budget,
78 pblk->rl.low,
79 pblk->rl.high,
80 free_blocks,
81 total_blocks,
82 READ_ONCE(pblk->rl.rb_user_active));
83}
84
85static ssize_t pblk_sysfs_gc_state_show(struct pblk *pblk, char *page)
86{
87 int gc_enabled, gc_active;
88
89 pblk_gc_sysfs_state_show(pblk, &gc_enabled, &gc_active);
90 return snprintf(page, PAGE_SIZE, "gc_enabled=%d, gc_active=%d\n",
91 gc_enabled, gc_active);
92}
93
94static ssize_t pblk_sysfs_stats(struct pblk *pblk, char *page)
95{
96 ssize_t sz;
97
98 sz = snprintf(page, PAGE_SIZE,
99 "read_failed=%lu, read_high_ecc=%lu, read_empty=%lu, read_failed_gc=%lu, write_failed=%lu, erase_failed=%lu\n",
100 atomic_long_read(&pblk->read_failed),
101 atomic_long_read(&pblk->read_high_ecc),
102 atomic_long_read(&pblk->read_empty),
103 atomic_long_read(&pblk->read_failed_gc),
104 atomic_long_read(&pblk->write_failed),
105 atomic_long_read(&pblk->erase_failed));
106
107 return sz;
108}
109
110static ssize_t pblk_sysfs_write_buffer(struct pblk *pblk, char *page)
111{
112 return pblk_rb_sysfs(&pblk->rwb, page);
113}
114
115static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
116{
117 struct nvm_tgt_dev *dev = pblk->dev;
118 struct nvm_geo *geo = &dev->geo;
119 ssize_t sz = 0;
120
121 sz = snprintf(page, PAGE_SIZE - sz,
122 "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
123 pblk->ppaf_bitsize,
124 pblk->ppaf.blk_offset, geo->ppaf.blk_len,
125 pblk->ppaf.pg_offset, geo->ppaf.pg_len,
126 pblk->ppaf.lun_offset, geo->ppaf.lun_len,
127 pblk->ppaf.ch_offset, geo->ppaf.ch_len,
128 pblk->ppaf.pln_offset, geo->ppaf.pln_len,
129 pblk->ppaf.sec_offset, geo->ppaf.sect_len);
130
131 sz += snprintf(page + sz, PAGE_SIZE - sz,
132 "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
133 geo->ppaf.blk_offset, geo->ppaf.blk_len,
134 geo->ppaf.pg_offset, geo->ppaf.pg_len,
135 geo->ppaf.lun_offset, geo->ppaf.lun_len,
136 geo->ppaf.ch_offset, geo->ppaf.ch_len,
137 geo->ppaf.pln_offset, geo->ppaf.pln_len,
138 geo->ppaf.sect_offset, geo->ppaf.sect_len);
139
140 return sz;
141}
142
143static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
144{
145 struct nvm_tgt_dev *dev = pblk->dev;
146 struct nvm_geo *geo = &dev->geo;
147 struct pblk_line_meta *lm = &pblk->lm;
148 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
149 struct pblk_line *line;
150 ssize_t sz = 0;
151 int nr_free_lines;
152 int cur_data, cur_log;
153 int free_line_cnt = 0, closed_line_cnt = 0;
154 int d_line_cnt = 0, l_line_cnt = 0;
155 int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
156 int free = 0, bad = 0, cor = 0;
157 int msecs = 0, ssecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
158 int map_weight = 0, meta_weight = 0;
159
160 spin_lock(&l_mg->free_lock);
161 cur_data = (l_mg->data_line) ? l_mg->data_line->id : -1;
162 cur_log = (l_mg->log_line) ? l_mg->log_line->id : -1;
163 nr_free_lines = l_mg->nr_free_lines;
164
165 list_for_each_entry(line, &l_mg->free_list, list)
166 free_line_cnt++;
167 spin_unlock(&l_mg->free_lock);
168
169 spin_lock(&l_mg->gc_lock);
170 list_for_each_entry(line, &l_mg->gc_full_list, list) {
171 if (line->type == PBLK_LINETYPE_DATA)
172 d_line_cnt++;
173 else if (line->type == PBLK_LINETYPE_LOG)
174 l_line_cnt++;
175 closed_line_cnt++;
176 gc_full++;
177 }
178
179 list_for_each_entry(line, &l_mg->gc_high_list, list) {
180 if (line->type == PBLK_LINETYPE_DATA)
181 d_line_cnt++;
182 else if (line->type == PBLK_LINETYPE_LOG)
183 l_line_cnt++;
184 closed_line_cnt++;
185 gc_high++;
186 }
187
188 list_for_each_entry(line, &l_mg->gc_mid_list, list) {
189 if (line->type == PBLK_LINETYPE_DATA)
190 d_line_cnt++;
191 else if (line->type == PBLK_LINETYPE_LOG)
192 l_line_cnt++;
193 closed_line_cnt++;
194 gc_mid++;
195 }
196
197 list_for_each_entry(line, &l_mg->gc_low_list, list) {
198 if (line->type == PBLK_LINETYPE_DATA)
199 d_line_cnt++;
200 else if (line->type == PBLK_LINETYPE_LOG)
201 l_line_cnt++;
202 closed_line_cnt++;
203 gc_low++;
204 }
205
206 list_for_each_entry(line, &l_mg->gc_empty_list, list) {
207 if (line->type == PBLK_LINETYPE_DATA)
208 d_line_cnt++;
209 else if (line->type == PBLK_LINETYPE_LOG)
210 l_line_cnt++;
211 closed_line_cnt++;
212 gc_empty++;
213 }
214
215 list_for_each_entry(line, &l_mg->free_list, list)
216 free++;
217 list_for_each_entry(line, &l_mg->bad_list, list)
218 bad++;
219 list_for_each_entry(line, &l_mg->corrupt_list, list)
220 cor++;
221 spin_unlock(&l_mg->gc_lock);
222
223 spin_lock(&l_mg->free_lock);
224 if (l_mg->data_line) {
225 cur_sec = l_mg->data_line->cur_sec;
226 msecs = l_mg->data_line->left_msecs;
227 ssecs = l_mg->data_line->left_ssecs;
228 vsc = l_mg->data_line->vsc;
229 sec_in_line = l_mg->data_line->sec_in_line;
230 meta_weight = bitmap_weight(&l_mg->meta_bitmap,
231 PBLK_DATA_LINES);
232 map_weight = bitmap_weight(l_mg->data_line->map_bitmap,
233 lm->sec_per_line);
234 }
235 spin_unlock(&l_mg->free_lock);
236
237 if (nr_free_lines != free_line_cnt)
238 pr_err("pblk: corrupted free line list\n");
239
240 sz = snprintf(page, PAGE_SIZE - sz,
241 "line: nluns:%d, nblks:%d, nsecs:%d\n",
242 geo->nr_luns, lm->blk_per_line, lm->sec_per_line);
243
244 sz += snprintf(page + sz, PAGE_SIZE - sz,
245 "lines:d:%d,l:%d-f:%d(%d),b:%d,co:%d,c:%d(d:%d,l:%d)t:%d\n",
246 cur_data, cur_log,
247 free, nr_free_lines, bad, cor,
248 closed_line_cnt,
249 d_line_cnt, l_line_cnt,
250 l_mg->nr_lines);
251
252 sz += snprintf(page + sz, PAGE_SIZE - sz,
253 "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
254 gc_full, gc_high, gc_mid, gc_low, gc_empty,
255 atomic_read(&pblk->gc.inflight_gc));
256
257 sz += snprintf(page + sz, PAGE_SIZE - sz,
258 "data (%d) cur:%d, left:%d/%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
259 cur_data, cur_sec, msecs, ssecs, vsc, sec_in_line,
260 map_weight, lm->sec_per_line, meta_weight);
261
262 return sz;
263}
264
265static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
266{
267 struct nvm_tgt_dev *dev = pblk->dev;
268 struct nvm_geo *geo = &dev->geo;
269 struct pblk_line_meta *lm = &pblk->lm;
270 ssize_t sz = 0;
271
272 sz = snprintf(page, PAGE_SIZE - sz,
273 "smeta - len:%d, secs:%d\n",
274 lm->smeta_len, lm->smeta_sec);
275 sz += snprintf(page + sz, PAGE_SIZE - sz,
276 "emeta - len:%d, sec:%d, bb_start:%d\n",
277 lm->emeta_len, lm->emeta_sec,
278 lm->emeta_bb);
279 sz += snprintf(page + sz, PAGE_SIZE - sz,
280 "bitmap lengths: sec:%d, blk:%d, lun:%d\n",
281 lm->sec_bitmap_len,
282 lm->blk_bitmap_len,
283 lm->lun_bitmap_len);
284 sz += snprintf(page + sz, PAGE_SIZE - sz,
285 "blk_line:%d, sec_line:%d, sec_blk:%d\n",
286 lm->blk_per_line,
287 lm->sec_per_line,
288 geo->sec_per_blk);
289
290 return sz;
291}
292
293#ifdef CONFIG_NVM_DEBUG
294static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
295{
296 return snprintf(page, PAGE_SIZE,
297 "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
298 atomic_long_read(&pblk->inflight_writes),
299 atomic_long_read(&pblk->inflight_reads),
300 atomic_long_read(&pblk->req_writes),
301 atomic_long_read(&pblk->nr_flush),
302 atomic_long_read(&pblk->padded_writes),
303 atomic_long_read(&pblk->padded_wb),
304 atomic_long_read(&pblk->sub_writes),
305 atomic_long_read(&pblk->sync_writes),
306 atomic_long_read(&pblk->compl_writes),
307 atomic_long_read(&pblk->recov_writes),
308 atomic_long_read(&pblk->recov_gc_writes),
309 atomic_long_read(&pblk->recov_gc_reads),
310 atomic_long_read(&pblk->sync_reads));
311}
312#endif
313
314static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
315 size_t len)
316{
317 struct pblk_gc *gc = &pblk->gc;
318 size_t c_len;
319 int value;
320
321 c_len = strcspn(page, "\n");
322 if (c_len >= len)
323 return -EINVAL;
324
325 if (kstrtouint(page, 0, &value))
326 return -EINVAL;
327
328 spin_lock(&gc->lock);
329 pblk_rl_set_gc_rsc(&pblk->rl, value);
330 spin_unlock(&gc->lock);
331
332 return len;
333}
334
335static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
336 size_t len)
337{
338 size_t c_len;
339 int force;
340
341 c_len = strcspn(page, "\n");
342 if (c_len >= len)
343 return -EINVAL;
344
345 if (kstrtouint(page, 0, &force))
346 return -EINVAL;
347
348 if (force < 0 || force > 1)
349 return -EINVAL;
350
351 pblk_gc_sysfs_force(pblk, force);
352
353 return len;
354}
355
356static struct attribute sys_write_luns = {
357 .name = "write_luns",
358 .mode = 0444,
359};
360
361static struct attribute sys_rate_limiter_attr = {
362 .name = "rate_limiter",
363 .mode = 0444,
364};
365
366static struct attribute sys_gc_state = {
367 .name = "gc_state",
368 .mode = 0444,
369};
370
371static struct attribute sys_errors_attr = {
372 .name = "errors",
373 .mode = 0444,
374};
375
376static struct attribute sys_rb_attr = {
377 .name = "write_buffer",
378 .mode = 0444,
379};
380
381static struct attribute sys_stats_ppaf_attr = {
382 .name = "ppa_format",
383 .mode = 0444,
384};
385
386static struct attribute sys_lines_attr = {
387 .name = "lines",
388 .mode = 0444,
389};
390
391static struct attribute sys_lines_info_attr = {
392 .name = "lines_info",
393 .mode = 0444,
394};
395
396static struct attribute sys_gc_force = {
397 .name = "gc_force",
398 .mode = 0200,
399};
400
401static struct attribute sys_gc_rl_max = {
402 .name = "gc_rl_max",
403 .mode = 0200,
404};
405
406#ifdef CONFIG_NVM_DEBUG
407static struct attribute sys_stats_debug_attr = {
408 .name = "stats",
409 .mode = 0444,
410};
411#endif
412
413static struct attribute *pblk_attrs[] = {
414 &sys_write_luns,
415 &sys_rate_limiter_attr,
416 &sys_errors_attr,
417 &sys_gc_state,
418 &sys_gc_force,
419 &sys_gc_rl_max,
420 &sys_rb_attr,
421 &sys_stats_ppaf_attr,
422 &sys_lines_attr,
423 &sys_lines_info_attr,
424#ifdef CONFIG_NVM_DEBUG
425 &sys_stats_debug_attr,
426#endif
427 NULL,
428};
429
430static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
431 char *buf)
432{
433 struct pblk *pblk = container_of(kobj, struct pblk, kobj);
434
435 if (strcmp(attr->name, "rate_limiter") == 0)
436 return pblk_sysfs_rate_limiter(pblk, buf);
437 else if (strcmp(attr->name, "write_luns") == 0)
438 return pblk_sysfs_luns_show(pblk, buf);
439 else if (strcmp(attr->name, "gc_state") == 0)
440 return pblk_sysfs_gc_state_show(pblk, buf);
441 else if (strcmp(attr->name, "errors") == 0)
442 return pblk_sysfs_stats(pblk, buf);
443 else if (strcmp(attr->name, "write_buffer") == 0)
444 return pblk_sysfs_write_buffer(pblk, buf);
445 else if (strcmp(attr->name, "ppa_format") == 0)
446 return pblk_sysfs_ppaf(pblk, buf);
447 else if (strcmp(attr->name, "lines") == 0)
448 return pblk_sysfs_lines(pblk, buf);
449 else if (strcmp(attr->name, "lines_info") == 0)
450 return pblk_sysfs_lines_info(pblk, buf);
451#ifdef CONFIG_NVM_DEBUG
452 else if (strcmp(attr->name, "stats") == 0)
453 return pblk_sysfs_stats_debug(pblk, buf);
454#endif
455 return 0;
456}
457
458static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
459 const char *buf, size_t len)
460{
461 struct pblk *pblk = container_of(kobj, struct pblk, kobj);
462
463 if (strcmp(attr->name, "gc_rl_max") == 0)
464 return pblk_sysfs_rate_store(pblk, buf, len);
465 else if (strcmp(attr->name, "gc_force") == 0)
466 return pblk_sysfs_gc_force(pblk, buf, len);
467
468 return 0;
469}
470
471static const struct sysfs_ops pblk_sysfs_ops = {
472 .show = pblk_sysfs_show,
473 .store = pblk_sysfs_store,
474};
475
476static struct kobj_type pblk_ktype = {
477 .sysfs_ops = &pblk_sysfs_ops,
478 .default_attrs = pblk_attrs,
479};
480
481int pblk_sysfs_init(struct gendisk *tdisk)
482{
483 struct pblk *pblk = tdisk->private_data;
484 struct device *parent_dev = disk_to_dev(pblk->disk);
485 int ret;
486
487 ret = kobject_init_and_add(&pblk->kobj, &pblk_ktype,
488 kobject_get(&parent_dev->kobj),
489 "%s", "pblk");
490 if (ret) {
491 pr_err("pblk: could not register %s/pblk\n",
492 tdisk->disk_name);
493 return ret;
494 }
495
496 kobject_uevent(&pblk->kobj, KOBJ_ADD);
497 return 0;
498}
499
500void pblk_sysfs_exit(struct gendisk *tdisk)
501{
502 struct pblk *pblk = tdisk->private_data;
503
504 kobject_uevent(&pblk->kobj, KOBJ_REMOVE);
505 kobject_del(&pblk->kobj);
506 kobject_put(&pblk->kobj);
507}
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
new file mode 100644
index 000000000000..ee57db993cd1
--- /dev/null
+++ b/drivers/lightnvm/pblk-write.c
@@ -0,0 +1,411 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-write.c - pblk's write path from write buffer to media
16 */
17
18#include "pblk.h"
19
20static void pblk_sync_line(struct pblk *pblk, struct pblk_line *line)
21{
22#ifdef CONFIG_NVM_DEBUG
23 atomic_long_inc(&pblk->sync_writes);
24#endif
25
26 /* Counter protected by rb sync lock */
27 line->left_ssecs--;
28 if (!line->left_ssecs)
29 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
30}
31
32static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd,
33 struct pblk_c_ctx *c_ctx)
34{
35 struct nvm_tgt_dev *dev = pblk->dev;
36 struct bio *original_bio;
37 unsigned long ret;
38 int i;
39
40 for (i = 0; i < c_ctx->nr_valid; i++) {
41 struct pblk_w_ctx *w_ctx;
42 struct ppa_addr p;
43 struct pblk_line *line;
44
45 w_ctx = pblk_rb_w_ctx(&pblk->rwb, c_ctx->sentry + i);
46
47 p = rqd->ppa_list[i];
48 line = &pblk->lines[pblk_dev_ppa_to_line(p)];
49 pblk_sync_line(pblk, line);
50
51 while ((original_bio = bio_list_pop(&w_ctx->bios)))
52 bio_endio(original_bio);
53 }
54
55#ifdef CONFIG_NVM_DEBUG
56 atomic_long_add(c_ctx->nr_valid, &pblk->compl_writes);
57#endif
58
59 ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid);
60
61 if (rqd->meta_list)
62 nvm_dev_dma_free(dev->parent, rqd->meta_list,
63 rqd->dma_meta_list);
64
65 bio_put(rqd->bio);
66 pblk_free_rqd(pblk, rqd, WRITE);
67
68 return ret;
69}
70
71static unsigned long pblk_end_queued_w_bio(struct pblk *pblk,
72 struct nvm_rq *rqd,
73 struct pblk_c_ctx *c_ctx)
74{
75 list_del(&c_ctx->list);
76 return pblk_end_w_bio(pblk, rqd, c_ctx);
77}
78
79static void pblk_complete_write(struct pblk *pblk, struct nvm_rq *rqd,
80 struct pblk_c_ctx *c_ctx)
81{
82 struct pblk_c_ctx *c, *r;
83 unsigned long flags;
84 unsigned long pos;
85
86#ifdef CONFIG_NVM_DEBUG
87 atomic_long_sub(c_ctx->nr_valid, &pblk->inflight_writes);
88#endif
89
90 pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
91
92 pos = pblk_rb_sync_init(&pblk->rwb, &flags);
93 if (pos == c_ctx->sentry) {
94 pos = pblk_end_w_bio(pblk, rqd, c_ctx);
95
96retry:
97 list_for_each_entry_safe(c, r, &pblk->compl_list, list) {
98 rqd = nvm_rq_from_c_ctx(c);
99 if (c->sentry == pos) {
100 pos = pblk_end_queued_w_bio(pblk, rqd, c);
101 goto retry;
102 }
103 }
104 } else {
105 WARN_ON(nvm_rq_from_c_ctx(c_ctx) != rqd);
106 list_add_tail(&c_ctx->list, &pblk->compl_list);
107 }
108 pblk_rb_sync_end(&pblk->rwb, &flags);
109}
110
111/* When a write fails, we are not sure whether the block has grown bad or a page
112 * range is more susceptible to write errors. If a high number of pages fail, we
113 * assume that the block is bad and we mark it accordingly. In all cases, we
114 * remap and resubmit the failed entries as fast as possible; if a flush is
115 * waiting on a completion, the whole stack would stall otherwise.
116 */
117static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
118{
119 void *comp_bits = &rqd->ppa_status;
120 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
121 struct pblk_rec_ctx *recovery;
122 struct ppa_addr *ppa_list = rqd->ppa_list;
123 int nr_ppas = rqd->nr_ppas;
124 unsigned int c_entries;
125 int bit, ret;
126
127 if (unlikely(nr_ppas == 1))
128 ppa_list = &rqd->ppa_addr;
129
130 recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
131 if (!recovery) {
132 pr_err("pblk: could not allocate recovery context\n");
133 return;
134 }
135 INIT_LIST_HEAD(&recovery->failed);
136
137 bit = -1;
138 while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
139 struct pblk_rb_entry *entry;
140 struct ppa_addr ppa;
141
142 /* Logic error */
143 if (bit > c_ctx->nr_valid) {
144 WARN_ON_ONCE("pblk: corrupted write request\n");
145 goto out;
146 }
147
148 ppa = ppa_list[bit];
149 entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
150 if (!entry) {
151 pr_err("pblk: could not scan entry on write failure\n");
152 goto out;
153 }
154
155 /* The list is filled first and emptied afterwards. No need for
156 * protecting it with a lock
157 */
158 list_add_tail(&entry->index, &recovery->failed);
159 }
160
161 c_entries = find_first_bit(comp_bits, nr_ppas);
162 ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
163 if (ret) {
164 pr_err("pblk: could not recover from write failure\n");
165 goto out;
166 }
167
168 INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
169 queue_work(pblk->kw_wq, &recovery->ws_rec);
170
171out:
172 pblk_complete_write(pblk, rqd, c_ctx);
173}
174
175static void pblk_end_io_write(struct nvm_rq *rqd)
176{
177 struct pblk *pblk = rqd->private;
178 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
179
180 if (rqd->error) {
181 pblk_log_write_err(pblk, rqd);
182 return pblk_end_w_fail(pblk, rqd);
183 }
184#ifdef CONFIG_NVM_DEBUG
185 else
186 WARN_ONCE(rqd->bio->bi_error, "pblk: corrupted write error\n");
187#endif
188
189 pblk_complete_write(pblk, rqd, c_ctx);
190}
191
192static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
193 unsigned int nr_secs)
194{
195 struct nvm_tgt_dev *dev = pblk->dev;
196
197 /* Setup write request */
198 rqd->opcode = NVM_OP_PWRITE;
199 rqd->nr_ppas = nr_secs;
200 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
201 rqd->private = pblk;
202 rqd->end_io = pblk_end_io_write;
203
204 rqd->meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
205 &rqd->dma_meta_list);
206 if (!rqd->meta_list)
207 return -ENOMEM;
208
209 if (unlikely(nr_secs == 1))
210 return 0;
211
212 rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
213 rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
214
215 return 0;
216}
217
218static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
219 struct pblk_c_ctx *c_ctx)
220{
221 struct pblk_line_meta *lm = &pblk->lm;
222 struct pblk_line *e_line = pblk_line_get_data_next(pblk);
223 struct ppa_addr erase_ppa;
224 unsigned int valid = c_ctx->nr_valid;
225 unsigned int padded = c_ctx->nr_padded;
226 unsigned int nr_secs = valid + padded;
227 unsigned long *lun_bitmap;
228 int ret = 0;
229
230 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
231 if (!lun_bitmap) {
232 ret = -ENOMEM;
233 goto out;
234 }
235 c_ctx->lun_bitmap = lun_bitmap;
236
237 ret = pblk_alloc_w_rq(pblk, rqd, nr_secs);
238 if (ret) {
239 kfree(lun_bitmap);
240 goto out;
241 }
242
243 ppa_set_empty(&erase_ppa);
244 if (likely(!e_line || !e_line->left_eblks))
245 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, valid, 0);
246 else
247 pblk_map_erase_rq(pblk, rqd, c_ctx->sentry, lun_bitmap,
248 valid, &erase_ppa);
249
250out:
251 if (unlikely(e_line && !ppa_empty(erase_ppa))) {
252 if (pblk_blk_erase_async(pblk, erase_ppa)) {
253 struct nvm_tgt_dev *dev = pblk->dev;
254 struct nvm_geo *geo = &dev->geo;
255 int bit;
256
257 e_line->left_eblks++;
258 bit = erase_ppa.g.lun * geo->nr_chnls + erase_ppa.g.ch;
259 WARN_ON(!test_and_clear_bit(bit, e_line->erase_bitmap));
260 up(&pblk->erase_sem);
261 }
262 }
263
264 return ret;
265}
266
267int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
268 struct pblk_c_ctx *c_ctx)
269{
270 struct pblk_line_meta *lm = &pblk->lm;
271 unsigned long *lun_bitmap;
272 int ret;
273
274 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
275 if (!lun_bitmap)
276 return -ENOMEM;
277
278 c_ctx->lun_bitmap = lun_bitmap;
279
280 ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas);
281 if (ret)
282 return ret;
283
284 pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
285
286 rqd->ppa_status = (u64)0;
287 rqd->flags = pblk_set_progr_mode(pblk, WRITE);
288
289 return ret;
290}
291
292static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
293 unsigned int secs_to_flush)
294{
295 int secs_to_sync;
296
297 secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
298
299#ifdef CONFIG_NVM_DEBUG
300 if ((!secs_to_sync && secs_to_flush)
301 || (secs_to_sync < 0)
302 || (secs_to_sync > secs_avail && !secs_to_flush)) {
303 pr_err("pblk: bad sector calculation (a:%d,s:%d,f:%d)\n",
304 secs_avail, secs_to_sync, secs_to_flush);
305 }
306#endif
307
308 return secs_to_sync;
309}
310
311static int pblk_submit_write(struct pblk *pblk)
312{
313 struct bio *bio;
314 struct nvm_rq *rqd;
315 struct pblk_c_ctx *c_ctx;
316 unsigned int pgs_read;
317 unsigned int secs_avail, secs_to_sync, secs_to_com;
318 unsigned int secs_to_flush;
319 unsigned long pos;
320 int err;
321
322 /* If there are no sectors in the cache, flushes (bios without data)
323 * will be cleared on the cache threads
324 */
325 secs_avail = pblk_rb_read_count(&pblk->rwb);
326 if (!secs_avail)
327 return 1;
328
329 secs_to_flush = pblk_rb_sync_point_count(&pblk->rwb);
330 if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
331 return 1;
332
333 rqd = pblk_alloc_rqd(pblk, WRITE);
334 if (IS_ERR(rqd)) {
335 pr_err("pblk: cannot allocate write req.\n");
336 return 1;
337 }
338 c_ctx = nvm_rq_to_pdu(rqd);
339
340 bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs);
341 if (!bio) {
342 pr_err("pblk: cannot allocate write bio\n");
343 goto fail_free_rqd;
344 }
345 bio->bi_iter.bi_sector = 0; /* internal bio */
346 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
347 rqd->bio = bio;
348
349 secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
350 if (secs_to_sync > pblk->max_write_pgs) {
351 pr_err("pblk: bad buffer sync calculation\n");
352 goto fail_put_bio;
353 }
354
355 secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
356 pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
357
358 pgs_read = pblk_rb_read_to_bio(&pblk->rwb, bio, c_ctx, pos,
359 secs_to_sync, secs_avail);
360 if (!pgs_read) {
361 pr_err("pblk: corrupted write bio\n");
362 goto fail_put_bio;
363 }
364
365 if (c_ctx->nr_padded)
366 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, c_ctx->nr_padded))
367 goto fail_put_bio;
368
369 /* Assign lbas to ppas and populate request structure */
370 err = pblk_setup_w_rq(pblk, rqd, c_ctx);
371 if (err) {
372 pr_err("pblk: could not setup write request\n");
373 goto fail_free_bio;
374 }
375
376 err = pblk_submit_io(pblk, rqd);
377 if (err) {
378 pr_err("pblk: I/O submission failed: %d\n", err);
379 goto fail_free_bio;
380 }
381
382#ifdef CONFIG_NVM_DEBUG
383 atomic_long_add(secs_to_sync, &pblk->sub_writes);
384#endif
385
386 return 0;
387
388fail_free_bio:
389 if (c_ctx->nr_padded)
390 pblk_bio_free_pages(pblk, bio, secs_to_sync, c_ctx->nr_padded);
391fail_put_bio:
392 bio_put(bio);
393fail_free_rqd:
394 pblk_free_rqd(pblk, rqd, WRITE);
395
396 return 1;
397}
398
399int pblk_write_ts(void *data)
400{
401 struct pblk *pblk = data;
402
403 while (!kthread_should_stop()) {
404 if (!pblk_submit_write(pblk))
405 continue;
406 set_current_state(TASK_INTERRUPTIBLE);
407 io_schedule();
408 }
409
410 return 0;
411}
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
new file mode 100644
index 000000000000..c82120ce3be5
--- /dev/null
+++ b/drivers/lightnvm/pblk.h
@@ -0,0 +1,1121 @@
1/*
2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.h)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Matias Bjorling <matias@cnexlabs.com>
5 * Write buffering: Javier Gonzalez <javier@cnexlabs.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * Implementation of a Physical Block-device target for Open-channel SSDs.
17 *
18 */
19
20#ifndef PBLK_H_
21#define PBLK_H_
22
23#include <linux/blkdev.h>
24#include <linux/blk-mq.h>
25#include <linux/bio.h>
26#include <linux/module.h>
27#include <linux/kthread.h>
28#include <linux/vmalloc.h>
29#include <linux/crc32.h>
30#include <linux/uuid.h>
31
32#include <linux/lightnvm.h>
33
34/* Run only GC if less than 1/X blocks are free */
35#define GC_LIMIT_INVERSE 5
36#define GC_TIME_MSECS 1000
37
38#define PBLK_SECTOR (512)
39#define PBLK_EXPOSED_PAGE_SIZE (4096)
40#define PBLK_MAX_REQ_ADDRS (64)
41#define PBLK_MAX_REQ_ADDRS_PW (6)
42
43#define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16)
44
45#define PBLK_COMMAND_TIMEOUT_MS 30000
46
47/* Max 512 LUNs per device */
48#define PBLK_MAX_LUNS_BITMAP (4)
49
50#define NR_PHY_IN_LOG (PBLK_EXPOSED_PAGE_SIZE / PBLK_SECTOR)
51
52#define pblk_for_each_lun(pblk, rlun, i) \
53 for ((i) = 0, rlun = &(pblk)->luns[0]; \
54 (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)])
55
56#define ERASE 2 /* READ = 0, WRITE = 1 */
57
58enum {
59 /* IO Types */
60 PBLK_IOTYPE_USER = 1 << 0,
61 PBLK_IOTYPE_GC = 1 << 1,
62
63 /* Write buffer flags */
64 PBLK_FLUSH_ENTRY = 1 << 2,
65 PBLK_WRITTEN_DATA = 1 << 3,
66 PBLK_SUBMITTED_ENTRY = 1 << 4,
67 PBLK_WRITABLE_ENTRY = 1 << 5,
68};
69
70enum {
71 PBLK_BLK_ST_OPEN = 0x1,
72 PBLK_BLK_ST_CLOSED = 0x2,
73};
74
75/* The number of GC lists and the rate-limiter states go together. This way the
76 * rate-limiter can dictate how much GC is needed based on resource utilization.
77 */
78#define PBLK_NR_GC_LISTS 3
79#define PBLK_MAX_GC_JOBS 32
80
81enum {
82 PBLK_RL_HIGH = 1,
83 PBLK_RL_MID = 2,
84 PBLK_RL_LOW = 3,
85};
86
87struct pblk_sec_meta {
88 u64 reserved;
89 __le64 lba;
90};
91
92#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
93
94/* write completion context */
95struct pblk_c_ctx {
96 struct list_head list; /* Head for out-of-order completion */
97
98 unsigned long *lun_bitmap; /* Luns used on current request */
99 unsigned int sentry;
100 unsigned int nr_valid;
101 unsigned int nr_padded;
102};
103
104/* Read context */
105struct pblk_r_ctx {
106 struct bio *orig_bio;
107};
108
109/* Recovery context */
110struct pblk_rec_ctx {
111 struct pblk *pblk;
112 struct nvm_rq *rqd;
113 struct list_head failed;
114 struct work_struct ws_rec;
115};
116
117/* Write context */
118struct pblk_w_ctx {
119 struct bio_list bios; /* Original bios - used for completion
120 * in REQ_FUA, REQ_FLUSH case
121 */
122 sector_t lba; /* Logic addr. associated with entry */
123 struct ppa_addr ppa; /* Physic addr. associated with entry */
124 int flags; /* Write context flags */
125};
126
127struct pblk_rb_entry {
128 struct ppa_addr cacheline; /* Cacheline for this entry */
129 void *data; /* Pointer to data on this entry */
130 struct pblk_w_ctx w_ctx; /* Context for this entry */
131 struct list_head index; /* List head to enable indexes */
132};
133
134#define EMPTY_ENTRY (~0U)
135
136struct pblk_rb_pages {
137 struct page *pages;
138 int order;
139 struct list_head list;
140};
141
142struct pblk_rb {
143 struct pblk_rb_entry *entries; /* Ring buffer entries */
144 unsigned int mem; /* Write offset - points to next
145 * writable entry in memory
146 */
147 unsigned int subm; /* Read offset - points to last entry
148 * that has been submitted to the media
149 * to be persisted
150 */
151 unsigned int sync; /* Synced - backpointer that signals
152 * the last submitted entry that has
153 * been successfully persisted to media
154 */
155 unsigned int sync_point; /* Sync point - last entry that must be
156 * flushed to the media. Used with
157 * REQ_FLUSH and REQ_FUA
158 */
159 unsigned int l2p_update; /* l2p update point - next entry for
160 * which l2p mapping will be updated to
161 * contain a device ppa address (instead
162 * of a cacheline
163 */
164 unsigned int nr_entries; /* Number of entries in write buffer -
165 * must be a power of two
166 */
167 unsigned int seg_size; /* Size of the data segments being
168 * stored on each entry. Typically this
169 * will be 4KB
170 */
171
172 struct list_head pages; /* List of data pages */
173
174 spinlock_t w_lock; /* Write lock */
175 spinlock_t s_lock; /* Sync lock */
176
177#ifdef CONFIG_NVM_DEBUG
178 atomic_t inflight_sync_point; /* Not served REQ_FLUSH | REQ_FUA */
179#endif
180};
181
182#define PBLK_RECOVERY_SECTORS 16
183
184struct pblk_lun {
185 struct ppa_addr bppa;
186
187 u8 *bb_list; /* Bad block list for LUN. Only used on
188 * bring up. Bad blocks are managed
189 * within lines on run-time.
190 */
191
192 struct semaphore wr_sem;
193};
194
195struct pblk_gc_rq {
196 struct pblk_line *line;
197 void *data;
198 u64 *lba_list;
199 int nr_secs;
200 int secs_to_gc;
201 struct list_head list;
202};
203
204struct pblk_gc {
205 int gc_active;
206 int gc_enabled;
207 int gc_forced;
208 int gc_jobs_active;
209 atomic_t inflight_gc;
210
211 struct task_struct *gc_ts;
212 struct task_struct *gc_writer_ts;
213 struct workqueue_struct *gc_reader_wq;
214 struct timer_list gc_timer;
215
216 int w_entries;
217 struct list_head w_list;
218
219 spinlock_t lock;
220 spinlock_t w_lock;
221};
222
223struct pblk_rl {
224 unsigned int high; /* Upper threshold for rate limiter (free run -
225 * user I/O rate limiter
226 */
227 unsigned int low; /* Lower threshold for rate limiter (user I/O
228 * rate limiter - stall)
229 */
230 unsigned int high_pw; /* High rounded up as a power of 2 */
231
232#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent
233 * available blks
234 */
235#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
236
237 int rb_windows_pw; /* Number of rate windows in the write buffer
238 * given as a power-of-2. This guarantees that
239 * when user I/O is being rate limited, there
240 * will be reserved enough space for the GC to
241 * place its payload. A window is of
242 * pblk->max_write_pgs size, which in NVMe is
243 * 64, i.e., 256kb.
244 */
245 int rb_budget; /* Total number of entries available for I/O */
246 int rb_user_max; /* Max buffer entries available for user I/O */
247 atomic_t rb_user_cnt; /* User I/O buffer counter */
248 int rb_gc_max; /* Max buffer entries available for GC I/O */
249 int rb_gc_rsv; /* Reserved buffer entries for GC I/O */
250 int rb_state; /* Rate-limiter current state */
251 atomic_t rb_gc_cnt; /* GC I/O buffer counter */
252
253 int rb_user_active;
254 struct timer_list u_timer;
255
256 unsigned long long nr_secs;
257 unsigned long total_blocks;
258 atomic_t free_blocks;
259};
260
261#define PBLK_LINE_NR_LUN_BITMAP 2
262#define PBLK_LINE_NR_SEC_BITMAP 2
263#define PBLK_LINE_EMPTY (~0U)
264
265enum {
266 /* Line Types */
267 PBLK_LINETYPE_FREE = 0,
268 PBLK_LINETYPE_LOG = 1,
269 PBLK_LINETYPE_DATA = 2,
270
271 /* Line state */
272 PBLK_LINESTATE_FREE = 10,
273 PBLK_LINESTATE_OPEN = 11,
274 PBLK_LINESTATE_CLOSED = 12,
275 PBLK_LINESTATE_GC = 13,
276 PBLK_LINESTATE_BAD = 14,
277 PBLK_LINESTATE_CORRUPT = 15,
278
279 /* GC group */
280 PBLK_LINEGC_NONE = 20,
281 PBLK_LINEGC_EMPTY = 21,
282 PBLK_LINEGC_LOW = 22,
283 PBLK_LINEGC_MID = 23,
284 PBLK_LINEGC_HIGH = 24,
285 PBLK_LINEGC_FULL = 25,
286};
287
288#define PBLK_MAGIC 0x70626c6b /*pblk*/
289
290struct line_header {
291 __le32 crc;
292 __le32 identifier; /* pblk identifier */
293 __u8 uuid[16]; /* instance uuid */
294 __le16 type; /* line type */
295 __le16 version; /* type version */
296 __le32 id; /* line id for current line */
297};
298
299struct line_smeta {
300 struct line_header header;
301
302 __le32 crc; /* Full structure including struct crc */
303 /* Previous line metadata */
304 __le32 prev_id; /* Line id for previous line */
305
306 /* Current line metadata */
307 __le64 seq_nr; /* Sequence number for current line */
308
309 /* Active writers */
310 __le32 window_wr_lun; /* Number of parallel LUNs to write */
311
312 __le32 rsvd[2];
313};
314
315/*
316 * Metadata Layout:
317 * 1. struct pblk_emeta
318 * 2. nr_lbas u64 forming lba list
319 * 3. nr_lines (all) u32 valid sector count (vsc) (~0U: non-alloc line)
320 * 4. nr_luns bits (u64 format) forming line bad block bitmap
321 *
322 * 3. and 4. will be part of FTL log
323 */
324struct line_emeta {
325 struct line_header header;
326
327 __le32 crc; /* Full structure including struct crc */
328
329 /* Previous line metadata */
330 __le32 prev_id; /* Line id for prev line */
331
332 /* Current line metadata */
333 __le64 seq_nr; /* Sequence number for current line */
334
335 /* Active writers */
336 __le32 window_wr_lun; /* Number of parallel LUNs to write */
337
338 /* Bookkeeping for recovery */
339 __le32 next_id; /* Line id for next line */
340 __le64 nr_lbas; /* Number of lbas mapped in line */
341 __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */
342};
343
344struct pblk_line {
345 struct pblk *pblk;
346 unsigned int id; /* Line number corresponds to the
347 * block line
348 */
349 unsigned int seq_nr; /* Unique line sequence number */
350
351 int state; /* PBLK_LINESTATE_X */
352 int type; /* PBLK_LINETYPE_X */
353 int gc_group; /* PBLK_LINEGC_X */
354 struct list_head list; /* Free, GC lists */
355
356 unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */
357
358 struct line_smeta *smeta; /* Start metadata */
359 struct line_emeta *emeta; /* End metadata */
360 int meta_line; /* Metadata line id */
361 u64 smeta_ssec; /* Sector where smeta starts */
362 u64 emeta_ssec; /* Sector where emeta starts */
363
364 unsigned int sec_in_line; /* Number of usable secs in line */
365
366 unsigned int blk_in_line; /* Number of good blocks in line */
367 unsigned long *blk_bitmap; /* Bitmap for valid/invalid blocks */
368 unsigned long *erase_bitmap; /* Bitmap for erased blocks */
369
370 unsigned long *map_bitmap; /* Bitmap for mapped sectors in line */
371 unsigned long *invalid_bitmap; /* Bitmap for invalid sectors in line */
372
373 int left_eblks; /* Blocks left for erasing */
374 atomic_t left_seblks; /* Blocks left for sync erasing */
375
376 int left_msecs; /* Sectors left for mapping */
377 int left_ssecs; /* Sectors left to sync */
378 unsigned int cur_sec; /* Sector map pointer */
379 unsigned int vsc; /* Valid sector count in line */
380
381 struct kref ref; /* Write buffer L2P references */
382
383 spinlock_t lock; /* Necessary for invalid_bitmap only */
384};
385
386#define PBLK_DATA_LINES 2
387
388enum{
389 PBLK_KMALLOC_META = 1,
390 PBLK_VMALLOC_META = 2,
391};
392
393struct pblk_line_metadata {
394 void *meta;
395};
396
397struct pblk_line_mgmt {
398 int nr_lines; /* Total number of full lines */
399 int nr_free_lines; /* Number of full lines in free list */
400
401 /* Free lists - use free_lock */
402 struct list_head free_list; /* Full lines ready to use */
403 struct list_head corrupt_list; /* Full lines corrupted */
404 struct list_head bad_list; /* Full lines bad */
405
406 /* GC lists - use gc_lock */
407 struct list_head *gc_lists[PBLK_NR_GC_LISTS];
408 struct list_head gc_high_list; /* Full lines ready to GC, high isc */
409 struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
410 struct list_head gc_low_list; /* Full lines ready to GC, low isc */
411
412 struct list_head gc_full_list; /* Full lines ready to GC, no valid */
413 struct list_head gc_empty_list; /* Full lines close, all valid */
414
415 struct pblk_line *log_line; /* Current FTL log line */
416 struct pblk_line *data_line; /* Current data line */
417 struct pblk_line *log_next; /* Next FTL log line */
418 struct pblk_line *data_next; /* Next data line */
419
420 /* Metadata allocation type: VMALLOC | KMALLOC */
421 int smeta_alloc_type;
422 int emeta_alloc_type;
423
424 /* Pre-allocated metadata for data lines */
425 struct pblk_line_metadata sline_meta[PBLK_DATA_LINES];
426 struct pblk_line_metadata eline_meta[PBLK_DATA_LINES];
427 unsigned long meta_bitmap;
428
429 /* Helpers for fast bitmap calculations */
430 unsigned long *bb_template;
431 unsigned long *bb_aux;
432
433 unsigned long d_seq_nr; /* Data line unique sequence number */
434 unsigned long l_seq_nr; /* Log line unique sequence number */
435
436 spinlock_t free_lock;
437 spinlock_t gc_lock;
438};
439
440struct pblk_line_meta {
441 unsigned int smeta_len; /* Total length for smeta */
442 unsigned int smeta_sec; /* Sectors needed for smeta*/
443 unsigned int emeta_len; /* Total length for emeta */
444 unsigned int emeta_sec; /* Sectors needed for emeta*/
445 unsigned int emeta_bb; /* Boundary for bb that affects emeta */
446 unsigned int sec_bitmap_len; /* Length for sector bitmap in line */
447 unsigned int blk_bitmap_len; /* Length for block bitmap in line */
448 unsigned int lun_bitmap_len; /* Length for lun bitmap in line */
449
450 unsigned int blk_per_line; /* Number of blocks in a full line */
451 unsigned int sec_per_line; /* Number of sectors in a line */
452 unsigned int min_blk_line; /* Min. number of good blocks in line */
453
454 unsigned int mid_thrs; /* Threshold for GC mid list */
455 unsigned int high_thrs; /* Threshold for GC high list */
456};
457
458struct pblk_addr_format {
459 u64 ch_mask;
460 u64 lun_mask;
461 u64 pln_mask;
462 u64 blk_mask;
463 u64 pg_mask;
464 u64 sec_mask;
465 u8 ch_offset;
466 u8 lun_offset;
467 u8 pln_offset;
468 u8 blk_offset;
469 u8 pg_offset;
470 u8 sec_offset;
471};
472
473struct pblk {
474 struct nvm_tgt_dev *dev;
475 struct gendisk *disk;
476
477 struct kobject kobj;
478
479 struct pblk_lun *luns;
480
481 struct pblk_line *lines; /* Line array */
482 struct pblk_line_mgmt l_mg; /* Line management */
483 struct pblk_line_meta lm; /* Line metadata */
484
485 int ppaf_bitsize;
486 struct pblk_addr_format ppaf;
487
488 struct pblk_rb rwb;
489
490 int min_write_pgs; /* Minimum amount of pages required by controller */
491 int max_write_pgs; /* Maximum amount of pages supported by controller */
492 int pgs_in_buffer; /* Number of pages that need to be held in buffer to
493 * guarantee successful reads.
494 */
495
496 sector_t capacity; /* Device capacity when bad blocks are subtracted */
497 int over_pct; /* Percentage of device used for over-provisioning */
498
499 /* pblk provisioning values. Used by rate limiter */
500 struct pblk_rl rl;
501
502 struct semaphore erase_sem;
503
504 unsigned char instance_uuid[16];
505#ifdef CONFIG_NVM_DEBUG
506 /* All debug counters apply to 4kb sector I/Os */
507 atomic_long_t inflight_writes; /* Inflight writes (user and gc) */
508 atomic_long_t padded_writes; /* Sectors padded due to flush/fua */
509 atomic_long_t padded_wb; /* Sectors padded in write buffer */
510 atomic_long_t nr_flush; /* Number of flush/fua I/O */
511 atomic_long_t req_writes; /* Sectors stored on write buffer */
512 atomic_long_t sub_writes; /* Sectors submitted from buffer */
513 atomic_long_t sync_writes; /* Sectors synced to media */
514 atomic_long_t compl_writes; /* Sectors completed in write bio */
515 atomic_long_t inflight_reads; /* Inflight sector read requests */
516 atomic_long_t sync_reads; /* Completed sector read requests */
517 atomic_long_t recov_writes; /* Sectors submitted from recovery */
518 atomic_long_t recov_gc_writes; /* Sectors submitted from write GC */
519 atomic_long_t recov_gc_reads; /* Sectors submitted from read GC */
520#endif
521
522 spinlock_t lock;
523
524 atomic_long_t read_failed;
525 atomic_long_t read_empty;
526 atomic_long_t read_high_ecc;
527 atomic_long_t read_failed_gc;
528 atomic_long_t write_failed;
529 atomic_long_t erase_failed;
530
531 struct task_struct *writer_ts;
532
533 /* Simple translation map of logical addresses to physical addresses.
534 * The logical addresses is known by the host system, while the physical
535 * addresses are used when writing to the disk block device.
536 */
537 unsigned char *trans_map;
538 spinlock_t trans_lock;
539
540 struct list_head compl_list;
541
542 mempool_t *page_pool;
543 mempool_t *line_ws_pool;
544 mempool_t *rec_pool;
545 mempool_t *r_rq_pool;
546 mempool_t *w_rq_pool;
547 mempool_t *line_meta_pool;
548
549 struct workqueue_struct *kw_wq;
550 struct timer_list wtimer;
551
552 struct pblk_gc gc;
553};
554
555struct pblk_line_ws {
556 struct pblk *pblk;
557 struct pblk_line *line;
558 void *priv;
559 struct work_struct ws;
560};
561
562#define pblk_r_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_r_ctx))
563#define pblk_w_rq_size (sizeof(struct nvm_rq) + sizeof(struct pblk_c_ctx))
564
565/*
566 * pblk ring buffer operations
567 */
568int pblk_rb_init(struct pblk_rb *rb, struct pblk_rb_entry *rb_entry_base,
569 unsigned int power_size, unsigned int power_seg_sz);
570unsigned int pblk_rb_calculate_size(unsigned int nr_entries);
571void *pblk_rb_entries_ref(struct pblk_rb *rb);
572int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
573 unsigned int nr_entries, unsigned int *pos);
574int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
575 unsigned int *pos);
576void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
577 struct pblk_w_ctx w_ctx, unsigned int pos);
578void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
579 struct pblk_w_ctx w_ctx, struct pblk_line *gc_line,
580 unsigned int pos);
581struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos);
582
583void pblk_rb_sync_l2p(struct pblk_rb *rb);
584unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct bio *bio,
585 struct pblk_c_ctx *c_ctx,
586 unsigned int pos,
587 unsigned int nr_entries,
588 unsigned int count);
589unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
590 struct list_head *list,
591 unsigned int max);
592int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
593 u64 pos, int bio_iter);
594unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
595
596unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags);
597unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries);
598struct pblk_rb_entry *pblk_rb_sync_scan_entry(struct pblk_rb *rb,
599 struct ppa_addr *ppa);
600void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags);
601unsigned int pblk_rb_sync_point_count(struct pblk_rb *rb);
602
603unsigned int pblk_rb_read_count(struct pblk_rb *rb);
604unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos);
605
606int pblk_rb_tear_down_check(struct pblk_rb *rb);
607int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos);
608void pblk_rb_data_free(struct pblk_rb *rb);
609ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf);
610
611/*
612 * pblk core
613 */
614struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw);
615int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
616 struct pblk_c_ctx *c_ctx);
617void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw);
618void pblk_flush_writer(struct pblk *pblk);
619struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba);
620void pblk_discard(struct pblk *pblk, struct bio *bio);
621void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
622void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
623int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
624struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
625 unsigned int nr_secs, unsigned int len,
626 gfp_t gfp_mask);
627struct pblk_line *pblk_line_get(struct pblk *pblk);
628struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
629struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
630int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line);
631void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line);
632struct pblk_line *pblk_line_get_data(struct pblk *pblk);
633struct pblk_line *pblk_line_get_data_next(struct pblk *pblk);
634int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
635int pblk_line_is_full(struct pblk_line *line);
636void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
637void pblk_line_close_ws(struct work_struct *work);
638void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
639void pblk_line_mark_bb(struct work_struct *work);
640void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
641 void (*work)(struct work_struct *));
642u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line);
643int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line);
644int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line);
645int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa);
646void pblk_line_put(struct kref *ref);
647struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line);
648u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
649int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
650 unsigned long secs_to_flush);
651void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
652 unsigned long *lun_bitmap);
653void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
654 unsigned long *lun_bitmap);
655void pblk_end_bio_sync(struct bio *bio);
656void pblk_end_io_sync(struct nvm_rq *rqd);
657int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
658 int nr_pages);
659void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
660 u64 paddr);
661void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
662 int nr_pages);
663void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa);
664void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa);
665void pblk_update_map_cache(struct pblk *pblk, sector_t lba,
666 struct ppa_addr ppa);
667void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
668 struct ppa_addr ppa, struct ppa_addr entry_line);
669int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
670 struct pblk_line *gc_line);
671void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
672 u64 *lba_list, int nr_secs);
673void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
674 sector_t blba, int nr_secs);
675
676/*
677 * pblk user I/O write path
678 */
679int pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
680 unsigned long flags);
681int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list,
682 unsigned int nr_entries, unsigned int nr_rec_entries,
683 struct pblk_line *gc_line, unsigned long flags);
684
685/*
686 * pblk map
687 */
688void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
689 unsigned int sentry, unsigned long *lun_bitmap,
690 unsigned int valid_secs, struct ppa_addr *erase_ppa);
691void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
692 unsigned long *lun_bitmap, unsigned int valid_secs,
693 unsigned int off);
694
695/*
696 * pblk write thread
697 */
698int pblk_write_ts(void *data);
699void pblk_write_timer_fn(unsigned long data);
700void pblk_write_should_kick(struct pblk *pblk);
701
702/*
703 * pblk read path
704 */
705int pblk_submit_read(struct pblk *pblk, struct bio *bio);
706int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data,
707 unsigned int nr_secs, unsigned int *secs_to_gc,
708 struct pblk_line *line);
709/*
710 * pblk recovery
711 */
712void pblk_submit_rec(struct work_struct *work);
713struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
714void pblk_recov_pad(struct pblk *pblk);
715__le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta);
716int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
717 struct pblk_rec_ctx *recovery, u64 *comp_bits,
718 unsigned int comp);
719
720/*
721 * pblk gc
722 */
723#define PBLK_GC_TRIES 3
724
725int pblk_gc_init(struct pblk *pblk);
726void pblk_gc_exit(struct pblk *pblk);
727void pblk_gc_should_start(struct pblk *pblk);
728void pblk_gc_should_stop(struct pblk *pblk);
729int pblk_gc_status(struct pblk *pblk);
730void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
731 int *gc_active);
732void pblk_gc_sysfs_force(struct pblk *pblk, int force);
733
734/*
735 * pblk rate limiter
736 */
737void pblk_rl_init(struct pblk_rl *rl, int budget);
738void pblk_rl_free(struct pblk_rl *rl);
739int pblk_rl_gc_thrs(struct pblk_rl *rl);
740unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
741int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
742void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
743int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
744void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
745void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
746void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
747int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
748void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
749void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
750
751/*
752 * pblk sysfs
753 */
754int pblk_sysfs_init(struct gendisk *tdisk);
755void pblk_sysfs_exit(struct gendisk *tdisk);
756
757static inline void *pblk_malloc(size_t size, int type, gfp_t flags)
758{
759 if (type == PBLK_KMALLOC_META)
760 return kmalloc(size, flags);
761 return vmalloc(size);
762}
763
764static inline void pblk_mfree(void *ptr, int type)
765{
766 if (type == PBLK_KMALLOC_META)
767 kfree(ptr);
768 else
769 vfree(ptr);
770}
771
772static inline struct nvm_rq *nvm_rq_from_c_ctx(void *c_ctx)
773{
774 return c_ctx - sizeof(struct nvm_rq);
775}
776
777static inline void *pblk_line_emeta_to_lbas(struct line_emeta *emeta)
778{
779 return (emeta) + 1;
780}
781
782#define NVM_MEM_PAGE_WRITE (8)
783
784static inline int pblk_pad_distance(struct pblk *pblk)
785{
786 struct nvm_tgt_dev *dev = pblk->dev;
787 struct nvm_geo *geo = &dev->geo;
788
789 return NVM_MEM_PAGE_WRITE * geo->nr_luns * geo->sec_per_pl;
790}
791
792static inline int pblk_dev_ppa_to_line(struct ppa_addr p)
793{
794 return p.g.blk;
795}
796
797static inline int pblk_tgt_ppa_to_line(struct ppa_addr p)
798{
799 return p.g.blk;
800}
801
802static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
803{
804 return p.g.lun * geo->nr_chnls + p.g.ch;
805}
806
807/* A block within a line corresponds to the lun */
808static inline int pblk_dev_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p)
809{
810 return p.g.lun * geo->nr_chnls + p.g.ch;
811}
812
813static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
814{
815 struct ppa_addr ppa64;
816
817 ppa64.ppa = 0;
818
819 if (ppa32 == -1) {
820 ppa64.ppa = ADDR_EMPTY;
821 } else if (ppa32 & (1U << 31)) {
822 ppa64.c.line = ppa32 & ((~0U) >> 1);
823 ppa64.c.is_cached = 1;
824 } else {
825 ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >>
826 pblk->ppaf.blk_offset;
827 ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >>
828 pblk->ppaf.pg_offset;
829 ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >>
830 pblk->ppaf.lun_offset;
831 ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >>
832 pblk->ppaf.ch_offset;
833 ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >>
834 pblk->ppaf.pln_offset;
835 ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >>
836 pblk->ppaf.sec_offset;
837 }
838
839 return ppa64;
840}
841
842static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
843 sector_t lba)
844{
845 struct ppa_addr ppa;
846
847 if (pblk->ppaf_bitsize < 32) {
848 u32 *map = (u32 *)pblk->trans_map;
849
850 ppa = pblk_ppa32_to_ppa64(pblk, map[lba]);
851 } else {
852 struct ppa_addr *map = (struct ppa_addr *)pblk->trans_map;
853
854 ppa = map[lba];
855 }
856
857 return ppa;
858}
859
860static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
861{
862 u32 ppa32 = 0;
863
864 if (ppa64.ppa == ADDR_EMPTY) {
865 ppa32 = ~0U;
866 } else if (ppa64.c.is_cached) {
867 ppa32 |= ppa64.c.line;
868 ppa32 |= 1U << 31;
869 } else {
870 ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset;
871 ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset;
872 ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset;
873 ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset;
874 ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset;
875 ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset;
876 }
877
878 return ppa32;
879}
880
881static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba,
882 struct ppa_addr ppa)
883{
884 if (pblk->ppaf_bitsize < 32) {
885 u32 *map = (u32 *)pblk->trans_map;
886
887 map[lba] = pblk_ppa64_to_ppa32(pblk, ppa);
888 } else {
889 u64 *map = (u64 *)pblk->trans_map;
890
891 map[lba] = ppa.ppa;
892 }
893}
894
895static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
896 struct ppa_addr p)
897{
898 u64 paddr;
899
900 paddr = 0;
901 paddr |= (u64)p.g.pg << pblk->ppaf.pg_offset;
902 paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset;
903 paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset;
904 paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset;
905 paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset;
906
907 return paddr;
908}
909
910static inline int pblk_ppa_empty(struct ppa_addr ppa_addr)
911{
912 return (ppa_addr.ppa == ADDR_EMPTY);
913}
914
915static inline void pblk_ppa_set_empty(struct ppa_addr *ppa_addr)
916{
917 ppa_addr->ppa = ADDR_EMPTY;
918}
919
920static inline int pblk_addr_in_cache(struct ppa_addr ppa)
921{
922 return (ppa.ppa != ADDR_EMPTY && ppa.c.is_cached);
923}
924
925static inline int pblk_addr_to_cacheline(struct ppa_addr ppa)
926{
927 return ppa.c.line;
928}
929
930static inline struct ppa_addr pblk_cacheline_to_addr(int addr)
931{
932 struct ppa_addr p;
933
934 p.c.line = addr;
935 p.c.is_cached = 1;
936
937 return p;
938}
939
940static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
941 u64 line_id)
942{
943 struct ppa_addr ppa;
944
945 ppa.ppa = 0;
946 ppa.g.blk = line_id;
947 ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset;
948 ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset;
949 ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset;
950 ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset;
951 ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset;
952
953 return ppa;
954}
955
956static inline struct ppa_addr addr_to_pblk_ppa(struct pblk *pblk, u64 paddr,
957 u64 line_id)
958{
959 struct ppa_addr ppa;
960
961 ppa = addr_to_gen_ppa(pblk, paddr, line_id);
962
963 return ppa;
964}
965
966static inline u32 pblk_calc_meta_header_crc(struct pblk *pblk,
967 struct line_smeta *smeta)
968{
969 u32 crc = ~(u32)0;
970
971 crc = crc32_le(crc, (unsigned char *)smeta + sizeof(crc),
972 sizeof(struct line_header) - sizeof(crc));
973
974 return crc;
975}
976
977static inline u32 pblk_calc_smeta_crc(struct pblk *pblk,
978 struct line_smeta *smeta)
979{
980 struct pblk_line_meta *lm = &pblk->lm;
981 u32 crc = ~(u32)0;
982
983 crc = crc32_le(crc, (unsigned char *)smeta +
984 sizeof(struct line_header) + sizeof(crc),
985 lm->smeta_len -
986 sizeof(struct line_header) - sizeof(crc));
987
988 return crc;
989}
990
991static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
992 struct line_emeta *emeta)
993{
994 struct pblk_line_meta *lm = &pblk->lm;
995 u32 crc = ~(u32)0;
996
997 crc = crc32_le(crc, (unsigned char *)emeta +
998 sizeof(struct line_header) + sizeof(crc),
999 lm->emeta_len -
1000 sizeof(struct line_header) - sizeof(crc));
1001
1002 return crc;
1003}
1004
1005static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
1006{
1007 struct nvm_tgt_dev *dev = pblk->dev;
1008 struct nvm_geo *geo = &dev->geo;
1009 int flags;
1010
1011 flags = geo->plane_mode >> 1;
1012
1013 if (type == WRITE)
1014 flags |= NVM_IO_SCRAMBLE_ENABLE;
1015
1016 return flags;
1017}
1018
1019static inline int pblk_set_read_mode(struct pblk *pblk)
1020{
1021 return NVM_IO_SNGL_ACCESS | NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
1022}
1023
1024#ifdef CONFIG_NVM_DEBUG
1025static inline void print_ppa(struct ppa_addr *p, char *msg, int error)
1026{
1027 if (p->c.is_cached) {
1028 pr_err("ppa: (%s: %x) cache line: %llu\n",
1029 msg, error, (u64)p->c.line);
1030 } else {
1031 pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n",
1032 msg, error,
1033 p->g.ch, p->g.lun, p->g.blk,
1034 p->g.pg, p->g.pl, p->g.sec);
1035 }
1036}
1037
1038static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd,
1039 int error)
1040{
1041 int bit = -1;
1042
1043 if (rqd->nr_ppas == 1) {
1044 print_ppa(&rqd->ppa_addr, "rqd", error);
1045 return;
1046 }
1047
1048 while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas,
1049 bit + 1)) < rqd->nr_ppas) {
1050 print_ppa(&rqd->ppa_list[bit], "rqd", error);
1051 }
1052
1053 pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
1054}
1055#endif
1056
1057static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
1058 struct ppa_addr *ppas, int nr_ppas)
1059{
1060 struct nvm_geo *geo = &tgt_dev->geo;
1061 struct ppa_addr *ppa;
1062 int i;
1063
1064 for (i = 0; i < nr_ppas; i++) {
1065 ppa = &ppas[i];
1066
1067 if (!ppa->c.is_cached &&
1068 ppa->g.ch < geo->nr_chnls &&
1069 ppa->g.lun < geo->luns_per_chnl &&
1070 ppa->g.pl < geo->nr_planes &&
1071 ppa->g.blk < geo->blks_per_lun &&
1072 ppa->g.pg < geo->pgs_per_blk &&
1073 ppa->g.sec < geo->sec_per_pg)
1074 continue;
1075
1076#ifdef CONFIG_NVM_DEBUG
1077 print_ppa(ppa, "boundary", i);
1078#endif
1079 return 1;
1080 }
1081 return 0;
1082}
1083
1084static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr)
1085{
1086 struct pblk_line_meta *lm = &pblk->lm;
1087
1088 if (paddr > lm->sec_per_line)
1089 return 1;
1090
1091 return 0;
1092}
1093
1094static inline unsigned int pblk_get_bi_idx(struct bio *bio)
1095{
1096 return bio->bi_iter.bi_idx;
1097}
1098
1099static inline sector_t pblk_get_lba(struct bio *bio)
1100{
1101 return bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
1102}
1103
1104static inline unsigned int pblk_get_secs(struct bio *bio)
1105{
1106 return bio->bi_iter.bi_size / PBLK_EXPOSED_PAGE_SIZE;
1107}
1108
1109static inline sector_t pblk_get_sector(sector_t lba)
1110{
1111 return lba * NR_PHY_IN_LOG;
1112}
1113
1114static inline void pblk_setup_uuid(struct pblk *pblk)
1115{
1116 uuid_le uuid;
1117
1118 uuid_le_gen(&uuid);
1119 memcpy(pblk->instance_uuid, uuid.b, 16);
1120}
1121#endif /* PBLK_H_ */