aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm/pblk-core.c
diff options
context:
space:
mode:
authorJavier González <jg@lightnvm.io>2017-04-15 14:55:50 -0400
committerJens Axboe <axboe@fb.com>2017-04-16 12:06:33 -0400
commita4bd217b432685d6a177c28a2af187f041c473b7 (patch)
tree3670d0322655bdef412c415e04c8515e865c1e37 /drivers/lightnvm/pblk-core.c
parent6eb082452df1218e9c0ce1168c456f839ce5acb2 (diff)
lightnvm: physical block device (pblk) target
This patch introduces pblk, a host-side translation layer for Open-Channel SSDs to expose them like block devices. The translation layer allows data placement decisions, and I/O scheduling to be managed by the host, enabling users to optimize the SSD for their specific workloads. An open-channel SSD has a set of LUNs (parallel units) and a collection of blocks. Each block can be read in any order, but writes must be sequential. Writes may also fail, and if a block requires it, must also be reset before new writes can be applied. To manage the constraints, pblk maintains a logical to physical address (L2P) table, write cache, garbage collection logic, recovery scheme, and logic to rate-limit user I/Os versus garbage collection I/Os. The L2P table is fully-associative and manages sectors at a 4KB granularity. Pblk stores the L2P table in two places, in the out-of-band area of the media and on the last page of a line. In the cause of a power failure, pblk will perform a scan to recover the L2P table. The user data is organized into lines. A line is data striped across blocks and LUNs. The lines enable the host to reduce the amount of metadata to maintain besides the user data and makes it easier to implement RAID or erasure coding in the future. pblk implements multi-tenant support and can be instantiated multiple times on the same drive. Each instance owns a portion of the SSD - both regarding I/O bandwidth and capacity - providing I/O isolation for each case. Finally, pblk also exposes a sysfs interface that allows user-space to peek into the internals of pblk. The interface is available at /dev/block/*/pblk/ where * is the block device name exposed. This work also contains contributions from: Matias Bjørling <matias@cnexlabs.com> Simon A. F. Lund <slund@cnexlabs.com> Young Tack Jin <youngtack.jin@gmail.com> Huaicheng Li <huaicheng@cs.uchicago.edu> Signed-off-by: Javier González <javier@cnexlabs.com> Signed-off-by: Matias Bjørling <matias@cnexlabs.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/lightnvm/pblk-core.c')
-rw-r--r--drivers/lightnvm/pblk-core.c1655
1 files changed, 1655 insertions, 0 deletions
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
new file mode 100644
index 000000000000..a2bcd098babc
--- /dev/null
+++ b/drivers/lightnvm/pblk-core.c
@@ -0,0 +1,1655 @@
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-core.c - pblk's core functionality
16 *
17 */
18
19#include "pblk.h"
20#include <linux/time.h>
21
22static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
23 struct ppa_addr *ppa)
24{
25 struct nvm_tgt_dev *dev = pblk->dev;
26 struct nvm_geo *geo = &dev->geo;
27 int pos = pblk_dev_ppa_to_pos(geo, *ppa);
28
29 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
30 atomic_long_inc(&pblk->erase_failed);
31
32 if (test_and_set_bit(pos, line->blk_bitmap))
33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
34 line->id, pos);
35
36 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
37}
38
39static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
40{
41 struct pblk_line *line;
42
43 line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
44 atomic_dec(&line->left_seblks);
45
46 if (rqd->error) {
47 struct ppa_addr *ppa;
48
49 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
50 if (!ppa)
51 return;
52
53 *ppa = rqd->ppa_addr;
54 pblk_mark_bb(pblk, line, ppa);
55 }
56}
57
58/* Erase completion assumes that only one block is erased at the time */
59static void pblk_end_io_erase(struct nvm_rq *rqd)
60{
61 struct pblk *pblk = rqd->private;
62
63 up(&pblk->erase_sem);
64 __pblk_end_io_erase(pblk, rqd);
65 mempool_free(rqd, pblk->r_rq_pool);
66}
67
68static void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
69 u64 paddr)
70{
71 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
72 struct list_head *move_list = NULL;
73
74 /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
75 * table is modified with reclaimed sectors, a check is done to endure
76 * that newer updates are not overwritten.
77 */
78 spin_lock(&line->lock);
79 if (line->state == PBLK_LINESTATE_GC ||
80 line->state == PBLK_LINESTATE_FREE) {
81 spin_unlock(&line->lock);
82 return;
83 }
84
85 if (test_and_set_bit(paddr, line->invalid_bitmap)) {
86 WARN_ONCE(1, "pblk: double invalidate\n");
87 spin_unlock(&line->lock);
88 return;
89 }
90 line->vsc--;
91
92 if (line->state == PBLK_LINESTATE_CLOSED)
93 move_list = pblk_line_gc_list(pblk, line);
94 spin_unlock(&line->lock);
95
96 if (move_list) {
97 spin_lock(&l_mg->gc_lock);
98 spin_lock(&line->lock);
99 /* Prevent moving a line that has just been chosen for GC */
100 if (line->state == PBLK_LINESTATE_GC ||
101 line->state == PBLK_LINESTATE_FREE) {
102 spin_unlock(&line->lock);
103 spin_unlock(&l_mg->gc_lock);
104 return;
105 }
106 spin_unlock(&line->lock);
107
108 list_move_tail(&line->list, move_list);
109 spin_unlock(&l_mg->gc_lock);
110 }
111}
112
113void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
114{
115 struct pblk_line *line;
116 u64 paddr;
117 int line_id;
118
119#ifdef CONFIG_NVM_DEBUG
120 /* Callers must ensure that the ppa points to a device address */
121 BUG_ON(pblk_addr_in_cache(ppa));
122 BUG_ON(pblk_ppa_empty(ppa));
123#endif
124
125 line_id = pblk_tgt_ppa_to_line(ppa);
126 line = &pblk->lines[line_id];
127 paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
128
129 __pblk_map_invalidate(pblk, line, paddr);
130}
131
132void pblk_map_pad_invalidate(struct pblk *pblk, struct pblk_line *line,
133 u64 paddr)
134{
135 __pblk_map_invalidate(pblk, line, paddr);
136
137 pblk_rb_sync_init(&pblk->rwb, NULL);
138 line->left_ssecs--;
139 if (!line->left_ssecs)
140 pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws);
141 pblk_rb_sync_end(&pblk->rwb, NULL);
142}
143
144static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
145 unsigned int nr_secs)
146{
147 sector_t lba;
148
149 spin_lock(&pblk->trans_lock);
150 for (lba = slba; lba < slba + nr_secs; lba++) {
151 struct ppa_addr ppa;
152
153 ppa = pblk_trans_map_get(pblk, lba);
154
155 if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
156 pblk_map_invalidate(pblk, ppa);
157
158 pblk_ppa_set_empty(&ppa);
159 pblk_trans_map_set(pblk, lba, ppa);
160 }
161 spin_unlock(&pblk->trans_lock);
162}
163
164struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
165{
166 mempool_t *pool;
167 struct nvm_rq *rqd;
168 int rq_size;
169
170 if (rw == WRITE) {
171 pool = pblk->w_rq_pool;
172 rq_size = pblk_w_rq_size;
173 } else {
174 pool = pblk->r_rq_pool;
175 rq_size = pblk_r_rq_size;
176 }
177
178 rqd = mempool_alloc(pool, GFP_KERNEL);
179 memset(rqd, 0, rq_size);
180
181 return rqd;
182}
183
184void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
185{
186 mempool_t *pool;
187
188 if (rw == WRITE)
189 pool = pblk->w_rq_pool;
190 else
191 pool = pblk->r_rq_pool;
192
193 mempool_free(rqd, pool);
194}
195
196void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
197 int nr_pages)
198{
199 struct bio_vec bv;
200 int i;
201
202 WARN_ON(off + nr_pages != bio->bi_vcnt);
203
204 bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
205 for (i = off; i < nr_pages + off; i++) {
206 bv = bio->bi_io_vec[i];
207 mempool_free(bv.bv_page, pblk->page_pool);
208 }
209}
210
211int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
212 int nr_pages)
213{
214 struct request_queue *q = pblk->dev->q;
215 struct page *page;
216 int i, ret;
217
218 for (i = 0; i < nr_pages; i++) {
219 page = mempool_alloc(pblk->page_pool, flags);
220 if (!page)
221 goto err;
222
223 ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
224 if (ret != PBLK_EXPOSED_PAGE_SIZE) {
225 pr_err("pblk: could not add page to bio\n");
226 mempool_free(page, pblk->page_pool);
227 goto err;
228 }
229 }
230
231 return 0;
232err:
233 pblk_bio_free_pages(pblk, bio, 0, i - 1);
234 return -1;
235}
236
237static void pblk_write_kick(struct pblk *pblk)
238{
239 wake_up_process(pblk->writer_ts);
240 mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
241}
242
243void pblk_write_timer_fn(unsigned long data)
244{
245 struct pblk *pblk = (struct pblk *)data;
246
247 /* kick the write thread every tick to flush outstanding data */
248 pblk_write_kick(pblk);
249}
250
251void pblk_write_should_kick(struct pblk *pblk)
252{
253 unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
254
255 if (secs_avail >= pblk->min_write_pgs)
256 pblk_write_kick(pblk);
257}
258
259void pblk_end_bio_sync(struct bio *bio)
260{
261 struct completion *waiting = bio->bi_private;
262
263 complete(waiting);
264}
265
266void pblk_end_io_sync(struct nvm_rq *rqd)
267{
268 struct completion *waiting = rqd->private;
269
270 complete(waiting);
271}
272
273void pblk_flush_writer(struct pblk *pblk)
274{
275 struct bio *bio;
276 int ret;
277 DECLARE_COMPLETION_ONSTACK(wait);
278
279 bio = bio_alloc(GFP_KERNEL, 1);
280 if (!bio)
281 return;
282
283 bio->bi_iter.bi_sector = 0; /* internal bio */
284 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
285 bio->bi_private = &wait;
286 bio->bi_end_io = pblk_end_bio_sync;
287
288 ret = pblk_write_to_cache(pblk, bio, 0);
289 if (ret == NVM_IO_OK) {
290 if (!wait_for_completion_io_timeout(&wait,
291 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
292 pr_err("pblk: flush cache timed out\n");
293 }
294 } else if (ret != NVM_IO_DONE) {
295 pr_err("pblk: tear down bio failed\n");
296 }
297
298 if (bio->bi_error)
299 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_error);
300
301 bio_put(bio);
302}
303
304struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
305{
306 struct pblk_line_meta *lm = &pblk->lm;
307 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
308 struct list_head *move_list = NULL;
309
310 if (!line->vsc) {
311 if (line->gc_group != PBLK_LINEGC_FULL) {
312 line->gc_group = PBLK_LINEGC_FULL;
313 move_list = &l_mg->gc_full_list;
314 }
315 } else if (line->vsc < lm->mid_thrs) {
316 if (line->gc_group != PBLK_LINEGC_HIGH) {
317 line->gc_group = PBLK_LINEGC_HIGH;
318 move_list = &l_mg->gc_high_list;
319 }
320 } else if (line->vsc < lm->high_thrs) {
321 if (line->gc_group != PBLK_LINEGC_MID) {
322 line->gc_group = PBLK_LINEGC_MID;
323 move_list = &l_mg->gc_mid_list;
324 }
325 } else if (line->vsc < line->sec_in_line) {
326 if (line->gc_group != PBLK_LINEGC_LOW) {
327 line->gc_group = PBLK_LINEGC_LOW;
328 move_list = &l_mg->gc_low_list;
329 }
330 } else if (line->vsc == line->sec_in_line) {
331 if (line->gc_group != PBLK_LINEGC_EMPTY) {
332 line->gc_group = PBLK_LINEGC_EMPTY;
333 move_list = &l_mg->gc_empty_list;
334 }
335 } else {
336 line->state = PBLK_LINESTATE_CORRUPT;
337 line->gc_group = PBLK_LINEGC_NONE;
338 move_list = &l_mg->corrupt_list;
339 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
340 line->id, line->vsc,
341 line->sec_in_line,
342 lm->high_thrs, lm->mid_thrs);
343 }
344
345 return move_list;
346}
347
348void pblk_discard(struct pblk *pblk, struct bio *bio)
349{
350 sector_t slba = pblk_get_lba(bio);
351 sector_t nr_secs = pblk_get_secs(bio);
352
353 pblk_invalidate_range(pblk, slba, nr_secs);
354}
355
356struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
357{
358 struct ppa_addr ppa;
359
360 spin_lock(&pblk->trans_lock);
361 ppa = pblk_trans_map_get(pblk, lba);
362 spin_unlock(&pblk->trans_lock);
363
364 return ppa;
365}
366
367void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
368{
369 atomic_long_inc(&pblk->write_failed);
370#ifdef CONFIG_NVM_DEBUG
371 pblk_print_failed_rqd(pblk, rqd, rqd->error);
372#endif
373}
374
375void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
376{
377 /* Empty page read is not necessarily an error (e.g., L2P recovery) */
378 if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
379 atomic_long_inc(&pblk->read_empty);
380 return;
381 }
382
383 switch (rqd->error) {
384 case NVM_RSP_WARN_HIGHECC:
385 atomic_long_inc(&pblk->read_high_ecc);
386 break;
387 case NVM_RSP_ERR_FAILECC:
388 case NVM_RSP_ERR_FAILCRC:
389 atomic_long_inc(&pblk->read_failed);
390 break;
391 default:
392 pr_err("pblk: unknown read error:%d\n", rqd->error);
393 }
394#ifdef CONFIG_NVM_DEBUG
395 pblk_print_failed_rqd(pblk, rqd, rqd->error);
396#endif
397}
398
399int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
400{
401 struct nvm_tgt_dev *dev = pblk->dev;
402
403#ifdef CONFIG_NVM_DEBUG
404 struct ppa_addr *ppa_list;
405
406 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
407 if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
408 WARN_ON(1);
409 return -EINVAL;
410 }
411
412 if (rqd->opcode == NVM_OP_PWRITE) {
413 struct pblk_line *line;
414 struct ppa_addr ppa;
415 int i;
416
417 for (i = 0; i < rqd->nr_ppas; i++) {
418 ppa = ppa_list[i];
419 line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
420
421 spin_lock(&line->lock);
422 if (line->state != PBLK_LINESTATE_OPEN) {
423 pr_err("pblk: bad ppa: line:%d,state:%d\n",
424 line->id, line->state);
425 WARN_ON(1);
426 spin_unlock(&line->lock);
427 return -EINVAL;
428 }
429 spin_unlock(&line->lock);
430 }
431 }
432#endif
433 return nvm_submit_io(dev, rqd);
434}
435
436struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
437 unsigned int nr_secs, unsigned int len,
438 gfp_t gfp_mask)
439{
440 struct nvm_tgt_dev *dev = pblk->dev;
441 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
442 void *kaddr = data;
443 struct page *page;
444 struct bio *bio;
445 int i, ret;
446
447 if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
448 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
449
450 bio = bio_kmalloc(gfp_mask, nr_secs);
451 if (!bio)
452 return ERR_PTR(-ENOMEM);
453
454 for (i = 0; i < nr_secs; i++) {
455 page = vmalloc_to_page(kaddr);
456 if (!page) {
457 pr_err("pblk: could not map vmalloc bio\n");
458 bio_put(bio);
459 bio = ERR_PTR(-ENOMEM);
460 goto out;
461 }
462
463 ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
464 if (ret != PAGE_SIZE) {
465 pr_err("pblk: could not add page to bio\n");
466 bio_put(bio);
467 bio = ERR_PTR(-ENOMEM);
468 goto out;
469 }
470
471 kaddr += PAGE_SIZE;
472 }
473out:
474 return bio;
475}
476
477int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
478 unsigned long secs_to_flush)
479{
480 int max = pblk->max_write_pgs;
481 int min = pblk->min_write_pgs;
482 int secs_to_sync = 0;
483
484 if (secs_avail >= max)
485 secs_to_sync = max;
486 else if (secs_avail >= min)
487 secs_to_sync = min * (secs_avail / min);
488 else if (secs_to_flush)
489 secs_to_sync = min;
490
491 return secs_to_sync;
492}
493
494static u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line,
495 int nr_secs)
496{
497 u64 addr;
498 int i;
499
500 /* logic error: ppa out-of-bounds. Prevent generating bad address */
501 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
502 WARN(1, "pblk: page allocation out of bounds\n");
503 nr_secs = pblk->lm.sec_per_line - line->cur_sec;
504 }
505
506 line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
507 pblk->lm.sec_per_line, line->cur_sec);
508 for (i = 0; i < nr_secs; i++, line->cur_sec++)
509 WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
510
511 return addr;
512}
513
514u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
515{
516 u64 addr;
517
518 /* Lock needed in case a write fails and a recovery needs to remap
519 * failed write buffer entries
520 */
521 spin_lock(&line->lock);
522 addr = __pblk_alloc_page(pblk, line, nr_secs);
523 line->left_msecs -= nr_secs;
524 WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
525 spin_unlock(&line->lock);
526
527 return addr;
528}
529
530/*
531 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
532 * taking the per LUN semaphore.
533 */
534static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
535 u64 paddr, int dir)
536{
537 struct nvm_tgt_dev *dev = pblk->dev;
538 struct nvm_geo *geo = &dev->geo;
539 struct pblk_line_meta *lm = &pblk->lm;
540 struct bio *bio;
541 struct nvm_rq rqd;
542 struct ppa_addr *ppa_list;
543 dma_addr_t dma_ppa_list;
544 void *emeta = line->emeta;
545 int min = pblk->min_write_pgs;
546 int left_ppas = lm->emeta_sec;
547 int id = line->id;
548 int rq_ppas, rq_len;
549 int cmd_op, bio_op;
550 int flags;
551 int i, j;
552 int ret;
553 DECLARE_COMPLETION_ONSTACK(wait);
554
555 if (dir == WRITE) {
556 bio_op = REQ_OP_WRITE;
557 cmd_op = NVM_OP_PWRITE;
558 flags = pblk_set_progr_mode(pblk, WRITE);
559 } else if (dir == READ) {
560 bio_op = REQ_OP_READ;
561 cmd_op = NVM_OP_PREAD;
562 flags = pblk_set_read_mode(pblk);
563 } else
564 return -EINVAL;
565
566 ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_ppa_list);
567 if (!ppa_list)
568 return -ENOMEM;
569
570next_rq:
571 memset(&rqd, 0, sizeof(struct nvm_rq));
572
573 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
574 rq_len = rq_ppas * geo->sec_size;
575
576 bio = pblk_bio_map_addr(pblk, emeta, rq_ppas, rq_len, GFP_KERNEL);
577 if (IS_ERR(bio)) {
578 ret = PTR_ERR(bio);
579 goto free_rqd_dma;
580 }
581
582 bio->bi_iter.bi_sector = 0; /* internal bio */
583 bio_set_op_attrs(bio, bio_op, 0);
584
585 rqd.bio = bio;
586 rqd.opcode = cmd_op;
587 rqd.flags = flags;
588 rqd.nr_ppas = rq_ppas;
589 rqd.ppa_list = ppa_list;
590 rqd.dma_ppa_list = dma_ppa_list;
591 rqd.end_io = pblk_end_io_sync;
592 rqd.private = &wait;
593
594 if (dir == WRITE) {
595 for (i = 0; i < rqd.nr_ppas; ) {
596 spin_lock(&line->lock);
597 paddr = __pblk_alloc_page(pblk, line, min);
598 spin_unlock(&line->lock);
599 for (j = 0; j < min; j++, i++, paddr++)
600 rqd.ppa_list[i] =
601 addr_to_gen_ppa(pblk, paddr, id);
602 }
603 } else {
604 for (i = 0; i < rqd.nr_ppas; ) {
605 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
606 int pos = pblk_dev_ppa_to_pos(geo, ppa);
607
608 while (test_bit(pos, line->blk_bitmap)) {
609 paddr += min;
610 if (pblk_boundary_paddr_checks(pblk, paddr)) {
611 pr_err("pblk: corrupt emeta line:%d\n",
612 line->id);
613 bio_put(bio);
614 ret = -EINTR;
615 goto free_rqd_dma;
616 }
617
618 ppa = addr_to_gen_ppa(pblk, paddr, id);
619 pos = pblk_dev_ppa_to_pos(geo, ppa);
620 }
621
622 if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
623 pr_err("pblk: corrupt emeta line:%d\n",
624 line->id);
625 bio_put(bio);
626 ret = -EINTR;
627 goto free_rqd_dma;
628 }
629
630 for (j = 0; j < min; j++, i++, paddr++)
631 rqd.ppa_list[i] =
632 addr_to_gen_ppa(pblk, paddr, line->id);
633 }
634 }
635
636 ret = pblk_submit_io(pblk, &rqd);
637 if (ret) {
638 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
639 bio_put(bio);
640 goto free_rqd_dma;
641 }
642
643 if (!wait_for_completion_io_timeout(&wait,
644 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
645 pr_err("pblk: emeta I/O timed out\n");
646 }
647 reinit_completion(&wait);
648
649 bio_put(bio);
650
651 if (rqd.error) {
652 if (dir == WRITE)
653 pblk_log_write_err(pblk, &rqd);
654 else
655 pblk_log_read_err(pblk, &rqd);
656 }
657
658 emeta += rq_len;
659 left_ppas -= rq_ppas;
660 if (left_ppas)
661 goto next_rq;
662free_rqd_dma:
663 nvm_dev_dma_free(dev->parent, ppa_list, dma_ppa_list);
664 return ret;
665}
666
667u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
668{
669 struct nvm_tgt_dev *dev = pblk->dev;
670 struct nvm_geo *geo = &dev->geo;
671 struct pblk_line_meta *lm = &pblk->lm;
672 int bit;
673
674 /* This usually only happens on bad lines */
675 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
676 if (bit >= lm->blk_per_line)
677 return -1;
678
679 return bit * geo->sec_per_pl;
680}
681
682static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
683 u64 paddr, int dir)
684{
685 struct nvm_tgt_dev *dev = pblk->dev;
686 struct pblk_line_meta *lm = &pblk->lm;
687 struct bio *bio;
688 struct nvm_rq rqd;
689 __le64 *lba_list = NULL;
690 int i, ret;
691 int cmd_op, bio_op;
692 int flags;
693 DECLARE_COMPLETION_ONSTACK(wait);
694
695 if (dir == WRITE) {
696 bio_op = REQ_OP_WRITE;
697 cmd_op = NVM_OP_PWRITE;
698 flags = pblk_set_progr_mode(pblk, WRITE);
699 lba_list = pblk_line_emeta_to_lbas(line->emeta);
700 } else if (dir == READ) {
701 bio_op = REQ_OP_READ;
702 cmd_op = NVM_OP_PREAD;
703 flags = pblk_set_read_mode(pblk);
704 } else
705 return -EINVAL;
706
707 memset(&rqd, 0, sizeof(struct nvm_rq));
708
709 rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
710 &rqd.dma_ppa_list);
711 if (!rqd.ppa_list)
712 return -ENOMEM;
713
714 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
715 if (IS_ERR(bio)) {
716 ret = PTR_ERR(bio);
717 goto free_ppa_list;
718 }
719
720 bio->bi_iter.bi_sector = 0; /* internal bio */
721 bio_set_op_attrs(bio, bio_op, 0);
722
723 rqd.bio = bio;
724 rqd.opcode = cmd_op;
725 rqd.flags = flags;
726 rqd.nr_ppas = lm->smeta_sec;
727 rqd.end_io = pblk_end_io_sync;
728 rqd.private = &wait;
729
730 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
731 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
732 if (dir == WRITE)
733 lba_list[paddr] = cpu_to_le64(ADDR_EMPTY);
734 }
735
736 /*
737 * This I/O is sent by the write thread when a line is replace. Since
738 * the write thread is the only one sending write and erase commands,
739 * there is no need to take the LUN semaphore.
740 */
741 ret = pblk_submit_io(pblk, &rqd);
742 if (ret) {
743 pr_err("pblk: smeta I/O submission failed: %d\n", ret);
744 bio_put(bio);
745 goto free_ppa_list;
746 }
747
748 if (!wait_for_completion_io_timeout(&wait,
749 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
750 pr_err("pblk: smeta I/O timed out\n");
751 }
752
753 if (rqd.error) {
754 if (dir == WRITE)
755 pblk_log_write_err(pblk, &rqd);
756 else
757 pblk_log_read_err(pblk, &rqd);
758 }
759
760free_ppa_list:
761 nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list);
762
763 return ret;
764}
765
766int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
767{
768 u64 bpaddr = pblk_line_smeta_start(pblk, line);
769
770 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
771}
772
773int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line)
774{
775 return pblk_line_submit_emeta_io(pblk, line, line->emeta_ssec, READ);
776}
777
778static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
779 struct ppa_addr ppa)
780{
781 rqd->opcode = NVM_OP_ERASE;
782 rqd->ppa_addr = ppa;
783 rqd->nr_ppas = 1;
784 rqd->flags = pblk_set_progr_mode(pblk, ERASE);
785 rqd->bio = NULL;
786}
787
788static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
789{
790 struct nvm_rq rqd;
791 int ret;
792 DECLARE_COMPLETION_ONSTACK(wait);
793
794 memset(&rqd, 0, sizeof(struct nvm_rq));
795
796 pblk_setup_e_rq(pblk, &rqd, ppa);
797
798 rqd.end_io = pblk_end_io_sync;
799 rqd.private = &wait;
800
801 /* The write thread schedules erases so that it minimizes disturbances
802 * with writes. Thus, there is no need to take the LUN semaphore.
803 */
804 ret = pblk_submit_io(pblk, &rqd);
805 if (ret) {
806 struct nvm_tgt_dev *dev = pblk->dev;
807 struct nvm_geo *geo = &dev->geo;
808
809 pr_err("pblk: could not sync erase line:%d,blk:%d\n",
810 pblk_dev_ppa_to_line(ppa),
811 pblk_dev_ppa_to_pos(geo, ppa));
812
813 rqd.error = ret;
814 goto out;
815 }
816
817 if (!wait_for_completion_io_timeout(&wait,
818 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
819 pr_err("pblk: sync erase timed out\n");
820 }
821
822out:
823 rqd.private = pblk;
824 __pblk_end_io_erase(pblk, &rqd);
825
826 return 0;
827}
828
829int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
830{
831 struct pblk_line_meta *lm = &pblk->lm;
832 struct ppa_addr ppa;
833 int bit = -1;
834
835 /* Erase one block at the time and only erase good blocks */
836 while ((bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
837 bit + 1)) < lm->blk_per_line) {
838 ppa = pblk->luns[bit].bppa; /* set ch and lun */
839 ppa.g.blk = line->id;
840
841 /* If the erase fails, the block is bad and should be marked */
842 line->left_eblks--;
843 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
844
845 if (pblk_blk_erase_sync(pblk, ppa)) {
846 pr_err("pblk: failed to erase line %d\n", line->id);
847 return -ENOMEM;
848 }
849 }
850
851 return 0;
852}
853
854/* For now lines are always assumed full lines. Thus, smeta former and current
855 * lun bitmaps are omitted.
856 */
857static int pblk_line_set_metadata(struct pblk *pblk, struct pblk_line *line,
858 struct pblk_line *cur)
859{
860 struct nvm_tgt_dev *dev = pblk->dev;
861 struct nvm_geo *geo = &dev->geo;
862 struct pblk_line_meta *lm = &pblk->lm;
863 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
864 struct line_smeta *smeta = line->smeta;
865 struct line_emeta *emeta = line->emeta;
866 int nr_blk_line;
867
868 /* After erasing the line, new bad blocks might appear and we risk
869 * having an invalid line
870 */
871 nr_blk_line = lm->blk_per_line -
872 bitmap_weight(line->blk_bitmap, lm->blk_per_line);
873 if (nr_blk_line < lm->min_blk_line) {
874 spin_lock(&l_mg->free_lock);
875 spin_lock(&line->lock);
876 line->state = PBLK_LINESTATE_BAD;
877 spin_unlock(&line->lock);
878
879 list_add_tail(&line->list, &l_mg->bad_list);
880 spin_unlock(&l_mg->free_lock);
881
882 pr_debug("pblk: line %d is bad\n", line->id);
883
884 return 0;
885 }
886
887 /* Run-time metadata */
888 line->lun_bitmap = ((void *)(smeta)) + sizeof(struct line_smeta);
889
890 /* Mark LUNs allocated in this line (all for now) */
891 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
892
893 smeta->header.identifier = cpu_to_le32(PBLK_MAGIC);
894 memcpy(smeta->header.uuid, pblk->instance_uuid, 16);
895 smeta->header.id = cpu_to_le32(line->id);
896 smeta->header.type = cpu_to_le16(line->type);
897 smeta->header.version = cpu_to_le16(1);
898
899 /* Start metadata */
900 smeta->seq_nr = cpu_to_le64(line->seq_nr);
901 smeta->window_wr_lun = cpu_to_le32(geo->nr_luns);
902
903 /* Fill metadata among lines */
904 if (cur) {
905 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
906 smeta->prev_id = cpu_to_le32(cur->id);
907 cur->emeta->next_id = cpu_to_le32(line->id);
908 } else {
909 smeta->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
910 }
911
912 /* All smeta must be set at this point */
913 smeta->header.crc = cpu_to_le32(pblk_calc_meta_header_crc(pblk, smeta));
914 smeta->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta));
915
916 /* End metadata */
917 memcpy(&emeta->header, &smeta->header, sizeof(struct line_header));
918 emeta->seq_nr = cpu_to_le64(line->seq_nr);
919 emeta->nr_lbas = cpu_to_le64(line->sec_in_line);
920 emeta->nr_valid_lbas = cpu_to_le64(0);
921 emeta->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
922 emeta->crc = cpu_to_le32(0);
923 emeta->prev_id = smeta->prev_id;
924
925 return 1;
926}
927
928/* For now lines are always assumed full lines. Thus, smeta former and current
929 * lun bitmaps are omitted.
930 */
931static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
932 int init)
933{
934 struct nvm_tgt_dev *dev = pblk->dev;
935 struct nvm_geo *geo = &dev->geo;
936 struct pblk_line_meta *lm = &pblk->lm;
937 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
938 int nr_bb = 0;
939 u64 off;
940 int bit = -1;
941
942 line->sec_in_line = lm->sec_per_line;
943
944 /* Capture bad block information on line mapping bitmaps */
945 while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
946 bit + 1)) < lm->blk_per_line) {
947 off = bit * geo->sec_per_pl;
948 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
949 lm->sec_per_line);
950 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
951 lm->sec_per_line);
952 line->sec_in_line -= geo->sec_per_blk;
953 if (bit >= lm->emeta_bb)
954 nr_bb++;
955 }
956
957 /* Mark smeta metadata sectors as bad sectors */
958 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
959 off = bit * geo->sec_per_pl;
960retry_smeta:
961 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
962 line->sec_in_line -= lm->smeta_sec;
963 line->smeta_ssec = off;
964 line->cur_sec = off + lm->smeta_sec;
965
966 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
967 pr_debug("pblk: line smeta I/O failed. Retry\n");
968 off += geo->sec_per_pl;
969 goto retry_smeta;
970 }
971
972 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
973
974 /* Mark emeta metadata sectors as bad sectors. We need to consider bad
975 * blocks to make sure that there are enough sectors to store emeta
976 */
977 bit = lm->sec_per_line;
978 off = lm->sec_per_line - lm->emeta_sec;
979 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec);
980 while (nr_bb) {
981 off -= geo->sec_per_pl;
982 if (!test_bit(off, line->invalid_bitmap)) {
983 bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
984 nr_bb--;
985 }
986 }
987
988 line->sec_in_line -= lm->emeta_sec;
989 line->emeta_ssec = off;
990 line->vsc = line->left_ssecs = line->left_msecs = line->sec_in_line;
991
992 if (lm->sec_per_line - line->sec_in_line !=
993 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
994 spin_lock(&line->lock);
995 line->state = PBLK_LINESTATE_BAD;
996 spin_unlock(&line->lock);
997
998 list_add_tail(&line->list, &l_mg->bad_list);
999 pr_err("pblk: unexpected line %d is bad\n", line->id);
1000
1001 return 0;
1002 }
1003
1004 return 1;
1005}
1006
1007static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1008{
1009 struct pblk_line_meta *lm = &pblk->lm;
1010
1011 line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1012 if (!line->map_bitmap)
1013 return -ENOMEM;
1014 memset(line->map_bitmap, 0, lm->sec_bitmap_len);
1015
1016 /* invalid_bitmap is special since it is used when line is closed. No
1017 * need to zeroized; it will be initialized using bb info form
1018 * map_bitmap
1019 */
1020 line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1021 if (!line->invalid_bitmap) {
1022 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1023 return -ENOMEM;
1024 }
1025
1026 spin_lock(&line->lock);
1027 if (line->state != PBLK_LINESTATE_FREE) {
1028 spin_unlock(&line->lock);
1029 WARN(1, "pblk: corrupted line state\n");
1030 return -EINTR;
1031 }
1032 line->state = PBLK_LINESTATE_OPEN;
1033 spin_unlock(&line->lock);
1034
1035 /* Bad blocks do not need to be erased */
1036 bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
1037 line->left_eblks = line->blk_in_line;
1038 atomic_set(&line->left_seblks, line->left_eblks);
1039
1040 kref_init(&line->ref);
1041
1042 return 0;
1043}
1044
1045int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
1046{
1047 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1048 int ret;
1049
1050 spin_lock(&l_mg->free_lock);
1051 l_mg->data_line = line;
1052 list_del(&line->list);
1053 spin_unlock(&l_mg->free_lock);
1054
1055 ret = pblk_line_prepare(pblk, line);
1056 if (ret) {
1057 list_add(&line->list, &l_mg->free_list);
1058 return ret;
1059 }
1060
1061 pblk_rl_free_lines_dec(&pblk->rl, line);
1062
1063 if (!pblk_line_init_bb(pblk, line, 0)) {
1064 list_add(&line->list, &l_mg->free_list);
1065 return -EINTR;
1066 }
1067
1068 return 0;
1069}
1070
1071void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
1072{
1073 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1074 line->map_bitmap = NULL;
1075 line->smeta = NULL;
1076 line->emeta = NULL;
1077}
1078
1079struct pblk_line *pblk_line_get(struct pblk *pblk)
1080{
1081 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1082 struct pblk_line_meta *lm = &pblk->lm;
1083 struct pblk_line *line = NULL;
1084 int bit;
1085
1086 lockdep_assert_held(&l_mg->free_lock);
1087
1088retry_get:
1089 if (list_empty(&l_mg->free_list)) {
1090 pr_err("pblk: no free lines\n");
1091 goto out;
1092 }
1093
1094 line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
1095 list_del(&line->list);
1096 l_mg->nr_free_lines--;
1097
1098 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1099 if (unlikely(bit >= lm->blk_per_line)) {
1100 spin_lock(&line->lock);
1101 line->state = PBLK_LINESTATE_BAD;
1102 spin_unlock(&line->lock);
1103
1104 list_add_tail(&line->list, &l_mg->bad_list);
1105
1106 pr_debug("pblk: line %d is bad\n", line->id);
1107 goto retry_get;
1108 }
1109
1110 if (pblk_line_prepare(pblk, line)) {
1111 pr_err("pblk: failed to prepare line %d\n", line->id);
1112 list_add(&line->list, &l_mg->free_list);
1113 return NULL;
1114 }
1115
1116out:
1117 return line;
1118}
1119
1120static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1121 struct pblk_line *line)
1122{
1123 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1124 struct pblk_line *retry_line;
1125
1126 spin_lock(&l_mg->free_lock);
1127 retry_line = pblk_line_get(pblk);
1128 if (!retry_line) {
1129 spin_unlock(&l_mg->free_lock);
1130 return NULL;
1131 }
1132
1133 retry_line->smeta = line->smeta;
1134 retry_line->emeta = line->emeta;
1135 retry_line->meta_line = line->meta_line;
1136 retry_line->map_bitmap = line->map_bitmap;
1137 retry_line->invalid_bitmap = line->invalid_bitmap;
1138
1139 line->map_bitmap = NULL;
1140 line->invalid_bitmap = NULL;
1141 line->smeta = NULL;
1142 line->emeta = NULL;
1143 spin_unlock(&l_mg->free_lock);
1144
1145 if (pblk_line_erase(pblk, retry_line))
1146 return NULL;
1147
1148 pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1149
1150 l_mg->data_line = retry_line;
1151
1152 return retry_line;
1153}
1154
1155struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1156{
1157 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1158 struct pblk_line *line;
1159 int meta_line;
1160 int is_next = 0;
1161
1162 spin_lock(&l_mg->free_lock);
1163 line = pblk_line_get(pblk);
1164 if (!line) {
1165 spin_unlock(&l_mg->free_lock);
1166 return NULL;
1167 }
1168
1169 line->seq_nr = l_mg->d_seq_nr++;
1170 line->type = PBLK_LINETYPE_DATA;
1171 l_mg->data_line = line;
1172
1173 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
1174 set_bit(meta_line, &l_mg->meta_bitmap);
1175 line->smeta = l_mg->sline_meta[meta_line].meta;
1176 line->emeta = l_mg->eline_meta[meta_line].meta;
1177 line->meta_line = meta_line;
1178
1179 /* Allocate next line for preparation */
1180 l_mg->data_next = pblk_line_get(pblk);
1181 if (l_mg->data_next) {
1182 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1183 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1184 is_next = 1;
1185 }
1186 spin_unlock(&l_mg->free_lock);
1187
1188 pblk_rl_free_lines_dec(&pblk->rl, line);
1189 if (is_next)
1190 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1191
1192 if (pblk_line_erase(pblk, line))
1193 return NULL;
1194
1195retry_setup:
1196 if (!pblk_line_set_metadata(pblk, line, NULL)) {
1197 line = pblk_line_retry(pblk, line);
1198 if (!line)
1199 return NULL;
1200
1201 goto retry_setup;
1202 }
1203
1204 if (!pblk_line_init_bb(pblk, line, 1)) {
1205 line = pblk_line_retry(pblk, line);
1206 if (!line)
1207 return NULL;
1208
1209 goto retry_setup;
1210 }
1211
1212 return line;
1213}
1214
1215struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
1216{
1217 struct pblk_line_meta *lm = &pblk->lm;
1218 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1219 struct pblk_line *cur, *new;
1220 unsigned int left_seblks;
1221 int meta_line;
1222 int is_next = 0;
1223
1224 cur = l_mg->data_line;
1225 new = l_mg->data_next;
1226 if (!new)
1227 return NULL;
1228 l_mg->data_line = new;
1229
1230retry_line:
1231 left_seblks = atomic_read(&new->left_seblks);
1232 if (left_seblks) {
1233 /* If line is not fully erased, erase it */
1234 if (new->left_eblks) {
1235 if (pblk_line_erase(pblk, new))
1236 return NULL;
1237 } else {
1238 io_schedule();
1239 }
1240 goto retry_line;
1241 }
1242
1243 spin_lock(&l_mg->free_lock);
1244 /* Allocate next line for preparation */
1245 l_mg->data_next = pblk_line_get(pblk);
1246 if (l_mg->data_next) {
1247 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1248 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1249 is_next = 1;
1250 }
1251
1252retry_meta:
1253 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
1254 if (meta_line == PBLK_DATA_LINES) {
1255 spin_unlock(&l_mg->free_lock);
1256 io_schedule();
1257 spin_lock(&l_mg->free_lock);
1258 goto retry_meta;
1259 }
1260
1261 set_bit(meta_line, &l_mg->meta_bitmap);
1262 new->smeta = l_mg->sline_meta[meta_line].meta;
1263 new->emeta = l_mg->eline_meta[meta_line].meta;
1264 new->meta_line = meta_line;
1265
1266 memset(new->smeta, 0, lm->smeta_len);
1267 memset(new->emeta, 0, lm->emeta_len);
1268 spin_unlock(&l_mg->free_lock);
1269
1270 if (is_next)
1271 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1272
1273retry_setup:
1274 if (!pblk_line_set_metadata(pblk, new, cur)) {
1275 new = pblk_line_retry(pblk, new);
1276 if (new)
1277 return NULL;
1278
1279 goto retry_setup;
1280 }
1281
1282 if (!pblk_line_init_bb(pblk, new, 1)) {
1283 new = pblk_line_retry(pblk, new);
1284 if (!new)
1285 return NULL;
1286
1287 goto retry_setup;
1288 }
1289
1290 return new;
1291}
1292
1293void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1294{
1295 if (line->map_bitmap)
1296 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1297 if (line->invalid_bitmap)
1298 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1299
1300 line->map_bitmap = NULL;
1301 line->invalid_bitmap = NULL;
1302}
1303
1304void pblk_line_put(struct kref *ref)
1305{
1306 struct pblk_line *line = container_of(ref, struct pblk_line, ref);
1307 struct pblk *pblk = line->pblk;
1308 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1309
1310 spin_lock(&line->lock);
1311 WARN_ON(line->state != PBLK_LINESTATE_GC);
1312 line->state = PBLK_LINESTATE_FREE;
1313 line->gc_group = PBLK_LINEGC_NONE;
1314 pblk_line_free(pblk, line);
1315 spin_unlock(&line->lock);
1316
1317 spin_lock(&l_mg->free_lock);
1318 list_add_tail(&line->list, &l_mg->free_list);
1319 l_mg->nr_free_lines++;
1320 spin_unlock(&l_mg->free_lock);
1321
1322 pblk_rl_free_lines_inc(&pblk->rl, line);
1323}
1324
1325int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1326{
1327 struct nvm_rq *rqd;
1328 int err;
1329
1330 rqd = mempool_alloc(pblk->r_rq_pool, GFP_KERNEL);
1331 memset(rqd, 0, pblk_r_rq_size);
1332
1333 pblk_setup_e_rq(pblk, rqd, ppa);
1334
1335 rqd->end_io = pblk_end_io_erase;
1336 rqd->private = pblk;
1337
1338 /* The write thread schedules erases so that it minimizes disturbances
1339 * with writes. Thus, there is no need to take the LUN semaphore.
1340 */
1341 err = pblk_submit_io(pblk, rqd);
1342 if (err) {
1343 struct nvm_tgt_dev *dev = pblk->dev;
1344 struct nvm_geo *geo = &dev->geo;
1345
1346 pr_err("pblk: could not async erase line:%d,blk:%d\n",
1347 pblk_dev_ppa_to_line(ppa),
1348 pblk_dev_ppa_to_pos(geo, ppa));
1349 }
1350
1351 return err;
1352}
1353
1354struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1355{
1356 return pblk->l_mg.data_line;
1357}
1358
1359struct pblk_line *pblk_line_get_data_next(struct pblk *pblk)
1360{
1361 return pblk->l_mg.data_next;
1362}
1363
1364int pblk_line_is_full(struct pblk_line *line)
1365{
1366 return (line->left_msecs == 0);
1367}
1368
1369void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1370{
1371 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1372 struct list_head *move_list;
1373
1374 line->emeta->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, line->emeta));
1375
1376 if (pblk_line_submit_emeta_io(pblk, line, line->cur_sec, WRITE))
1377 pr_err("pblk: line %d close I/O failed\n", line->id);
1378
1379 WARN(!bitmap_full(line->map_bitmap, line->sec_in_line),
1380 "pblk: corrupt closed line %d\n", line->id);
1381
1382 spin_lock(&l_mg->free_lock);
1383 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
1384 spin_unlock(&l_mg->free_lock);
1385
1386 spin_lock(&l_mg->gc_lock);
1387 spin_lock(&line->lock);
1388 WARN_ON(line->state != PBLK_LINESTATE_OPEN);
1389 line->state = PBLK_LINESTATE_CLOSED;
1390 move_list = pblk_line_gc_list(pblk, line);
1391
1392 list_add_tail(&line->list, move_list);
1393
1394 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1395 line->map_bitmap = NULL;
1396 line->smeta = NULL;
1397 line->emeta = NULL;
1398
1399 spin_unlock(&line->lock);
1400 spin_unlock(&l_mg->gc_lock);
1401}
1402
1403void pblk_line_close_ws(struct work_struct *work)
1404{
1405 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1406 ws);
1407 struct pblk *pblk = line_ws->pblk;
1408 struct pblk_line *line = line_ws->line;
1409
1410 pblk_line_close(pblk, line);
1411 mempool_free(line_ws, pblk->line_ws_pool);
1412}
1413
1414void pblk_line_mark_bb(struct work_struct *work)
1415{
1416 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1417 ws);
1418 struct pblk *pblk = line_ws->pblk;
1419 struct nvm_tgt_dev *dev = pblk->dev;
1420 struct ppa_addr *ppa = line_ws->priv;
1421 int ret;
1422
1423 ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
1424 if (ret) {
1425 struct pblk_line *line;
1426 int pos;
1427
1428 line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
1429 pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
1430
1431 pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
1432 line->id, pos);
1433 }
1434
1435 kfree(ppa);
1436 mempool_free(line_ws, pblk->line_ws_pool);
1437}
1438
1439void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1440 void (*work)(struct work_struct *))
1441{
1442 struct pblk_line_ws *line_ws;
1443
1444 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
1445 if (!line_ws)
1446 return;
1447
1448 line_ws->pblk = pblk;
1449 line_ws->line = line;
1450 line_ws->priv = priv;
1451
1452 INIT_WORK(&line_ws->ws, work);
1453 queue_work(pblk->kw_wq, &line_ws->ws);
1454}
1455
1456void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1457 unsigned long *lun_bitmap)
1458{
1459 struct nvm_tgt_dev *dev = pblk->dev;
1460 struct nvm_geo *geo = &dev->geo;
1461 struct pblk_lun *rlun;
1462 int lun_id = ppa_list[0].g.ch * geo->luns_per_chnl + ppa_list[0].g.lun;
1463 int ret;
1464
1465 /*
1466 * Only send one inflight I/O per LUN. Since we map at a page
1467 * granurality, all ppas in the I/O will map to the same LUN
1468 */
1469#ifdef CONFIG_NVM_DEBUG
1470 int i;
1471
1472 for (i = 1; i < nr_ppas; i++)
1473 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
1474 ppa_list[0].g.ch != ppa_list[i].g.ch);
1475#endif
1476 /* If the LUN has been locked for this same request, do no attempt to
1477 * lock it again
1478 */
1479 if (test_and_set_bit(lun_id, lun_bitmap))
1480 return;
1481
1482 rlun = &pblk->luns[lun_id];
1483 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1484 if (ret) {
1485 switch (ret) {
1486 case -ETIME:
1487 pr_err("pblk: lun semaphore timed out\n");
1488 break;
1489 case -EINTR:
1490 pr_err("pblk: lun semaphore timed out\n");
1491 break;
1492 }
1493 }
1494}
1495
1496void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1497 unsigned long *lun_bitmap)
1498{
1499 struct nvm_tgt_dev *dev = pblk->dev;
1500 struct nvm_geo *geo = &dev->geo;
1501 struct pblk_lun *rlun;
1502 int nr_luns = geo->nr_luns;
1503 int bit = -1;
1504
1505 while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
1506 rlun = &pblk->luns[bit];
1507 up(&rlun->wr_sem);
1508 }
1509
1510 kfree(lun_bitmap);
1511}
1512
1513void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1514{
1515 struct ppa_addr l2p_ppa;
1516
1517 /* logic error: lba out-of-bounds. Ignore update */
1518 if (!(lba < pblk->rl.nr_secs)) {
1519 WARN(1, "pblk: corrupted L2P map request\n");
1520 return;
1521 }
1522
1523 spin_lock(&pblk->trans_lock);
1524 l2p_ppa = pblk_trans_map_get(pblk, lba);
1525
1526 if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
1527 pblk_map_invalidate(pblk, l2p_ppa);
1528
1529 pblk_trans_map_set(pblk, lba, ppa);
1530 spin_unlock(&pblk->trans_lock);
1531}
1532
1533void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1534{
1535#ifdef CONFIG_NVM_DEBUG
1536 /* Callers must ensure that the ppa points to a cache address */
1537 BUG_ON(!pblk_addr_in_cache(ppa));
1538 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1539#endif
1540
1541 pblk_update_map(pblk, lba, ppa);
1542}
1543
1544int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1545 struct pblk_line *gc_line)
1546{
1547 struct ppa_addr l2p_ppa;
1548 int ret = 1;
1549
1550#ifdef CONFIG_NVM_DEBUG
1551 /* Callers must ensure that the ppa points to a cache address */
1552 BUG_ON(!pblk_addr_in_cache(ppa));
1553 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1554#endif
1555
1556 /* logic error: lba out-of-bounds. Ignore update */
1557 if (!(lba < pblk->rl.nr_secs)) {
1558 WARN(1, "pblk: corrupted L2P map request\n");
1559 return 0;
1560 }
1561
1562 spin_lock(&pblk->trans_lock);
1563 l2p_ppa = pblk_trans_map_get(pblk, lba);
1564
1565 /* Prevent updated entries to be overwritten by GC */
1566 if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
1567 pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
1568 ret = 0;
1569 goto out;
1570 }
1571
1572 pblk_trans_map_set(pblk, lba, ppa);
1573out:
1574 spin_unlock(&pblk->trans_lock);
1575 return ret;
1576}
1577
1578void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1579 struct ppa_addr entry_line)
1580{
1581 struct ppa_addr l2p_line;
1582
1583#ifdef CONFIG_NVM_DEBUG
1584 /* Callers must ensure that the ppa points to a device address */
1585 BUG_ON(pblk_addr_in_cache(ppa));
1586#endif
1587 /* Invalidate and discard padded entries */
1588 if (lba == ADDR_EMPTY) {
1589#ifdef CONFIG_NVM_DEBUG
1590 atomic_long_inc(&pblk->padded_wb);
1591#endif
1592 pblk_map_invalidate(pblk, ppa);
1593 return;
1594 }
1595
1596 /* logic error: lba out-of-bounds. Ignore update */
1597 if (!(lba < pblk->rl.nr_secs)) {
1598 WARN(1, "pblk: corrupted L2P map request\n");
1599 return;
1600 }
1601
1602 spin_lock(&pblk->trans_lock);
1603 l2p_line = pblk_trans_map_get(pblk, lba);
1604
1605 /* Do not update L2P if the cacheline has been updated. In this case,
1606 * the mapped ppa must be invalidated
1607 */
1608 if (l2p_line.ppa != entry_line.ppa) {
1609 if (!pblk_ppa_empty(ppa))
1610 pblk_map_invalidate(pblk, ppa);
1611 goto out;
1612 }
1613
1614#ifdef CONFIG_NVM_DEBUG
1615 WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
1616#endif
1617
1618 pblk_trans_map_set(pblk, lba, ppa);
1619out:
1620 spin_unlock(&pblk->trans_lock);
1621}
1622
1623void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
1624 sector_t blba, int nr_secs)
1625{
1626 int i;
1627
1628 spin_lock(&pblk->trans_lock);
1629 for (i = 0; i < nr_secs; i++)
1630 ppas[i] = pblk_trans_map_get(pblk, blba + i);
1631 spin_unlock(&pblk->trans_lock);
1632}
1633
1634void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
1635 u64 *lba_list, int nr_secs)
1636{
1637 sector_t lba;
1638 int i;
1639
1640 spin_lock(&pblk->trans_lock);
1641 for (i = 0; i < nr_secs; i++) {
1642 lba = lba_list[i];
1643 if (lba == ADDR_EMPTY) {
1644 ppas[i].ppa = ADDR_EMPTY;
1645 } else {
1646 /* logic error: lba out-of-bounds. Ignore update */
1647 if (!(lba < pblk->rl.nr_secs)) {
1648 WARN(1, "pblk: corrupted L2P map request\n");
1649 continue;
1650 }
1651 ppas[i] = pblk_trans_map_get(pblk, lba);
1652 }
1653 }
1654 spin_unlock(&pblk->trans_lock);
1655}