aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/lightnvm/pblk-init.c
diff options
context:
space:
mode:
authorJavier González <jg@lightnvm.io>2017-04-15 14:55:50 -0400
committerJens Axboe <axboe@fb.com>2017-04-16 12:06:33 -0400
commita4bd217b432685d6a177c28a2af187f041c473b7 (patch)
tree3670d0322655bdef412c415e04c8515e865c1e37 /drivers/lightnvm/pblk-init.c
parent6eb082452df1218e9c0ce1168c456f839ce5acb2 (diff)
lightnvm: physical block device (pblk) target
This patch introduces pblk, a host-side translation layer for Open-Channel SSDs to expose them like block devices. The translation layer allows data placement decisions, and I/O scheduling to be managed by the host, enabling users to optimize the SSD for their specific workloads. An open-channel SSD has a set of LUNs (parallel units) and a collection of blocks. Each block can be read in any order, but writes must be sequential. Writes may also fail, and if a block requires it, must also be reset before new writes can be applied. To manage the constraints, pblk maintains a logical to physical address (L2P) table, write cache, garbage collection logic, recovery scheme, and logic to rate-limit user I/Os versus garbage collection I/Os. The L2P table is fully-associative and manages sectors at a 4KB granularity. Pblk stores the L2P table in two places, in the out-of-band area of the media and on the last page of a line. In the cause of a power failure, pblk will perform a scan to recover the L2P table. The user data is organized into lines. A line is data striped across blocks and LUNs. The lines enable the host to reduce the amount of metadata to maintain besides the user data and makes it easier to implement RAID or erasure coding in the future. pblk implements multi-tenant support and can be instantiated multiple times on the same drive. Each instance owns a portion of the SSD - both regarding I/O bandwidth and capacity - providing I/O isolation for each case. Finally, pblk also exposes a sysfs interface that allows user-space to peek into the internals of pblk. The interface is available at /dev/block/*/pblk/ where * is the block device name exposed. This work also contains contributions from: Matias Bjørling <matias@cnexlabs.com> Simon A. F. Lund <slund@cnexlabs.com> Young Tack Jin <youngtack.jin@gmail.com> Huaicheng Li <huaicheng@cs.uchicago.edu> Signed-off-by: Javier González <javier@cnexlabs.com> Signed-off-by: Matias Bjørling <matias@cnexlabs.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers/lightnvm/pblk-init.c')
-rw-r--r--drivers/lightnvm/pblk-init.c949
1 files changed, 949 insertions, 0 deletions
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
new file mode 100644
index 000000000000..94653b1f1300
--- /dev/null
+++ b/drivers/lightnvm/pblk-init.c
@@ -0,0 +1,949 @@
1/*
2 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 * Matias Bjorling <matias@cnexlabs.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * Implementation of a physical block-device target for Open-channel SSDs.
17 *
18 * pblk-init.c - pblk's initialization.
19 */
20
21#include "pblk.h"
22
23static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_r_rq_cache,
24 *pblk_w_rq_cache, *pblk_line_meta_cache;
25static DECLARE_RWSEM(pblk_lock);
26
27static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
28 struct bio *bio)
29{
30 int ret;
31
32 /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
33 * constraint. Writes can be of arbitrary size.
34 */
35 if (bio_data_dir(bio) == READ) {
36 blk_queue_split(q, &bio, q->bio_split);
37 ret = pblk_submit_read(pblk, bio);
38 if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
39 bio_put(bio);
40
41 return ret;
42 }
43
44 /* Prevent deadlock in the case of a modest LUN configuration and large
45 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
46 * available for user I/O.
47 */
48 if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl)))
49 blk_queue_split(q, &bio, q->bio_split);
50
51 return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
52}
53
54static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
55{
56 struct pblk *pblk = q->queuedata;
57
58 if (bio_op(bio) == REQ_OP_DISCARD) {
59 pblk_discard(pblk, bio);
60 if (!(bio->bi_opf & REQ_PREFLUSH)) {
61 bio_endio(bio);
62 return BLK_QC_T_NONE;
63 }
64 }
65
66 switch (pblk_rw_io(q, pblk, bio)) {
67 case NVM_IO_ERR:
68 bio_io_error(bio);
69 break;
70 case NVM_IO_DONE:
71 bio_endio(bio);
72 break;
73 }
74
75 return BLK_QC_T_NONE;
76}
77
78static void pblk_l2p_free(struct pblk *pblk)
79{
80 vfree(pblk->trans_map);
81}
82
83static int pblk_l2p_init(struct pblk *pblk)
84{
85 sector_t i;
86 struct ppa_addr ppa;
87 int entry_size = 8;
88
89 if (pblk->ppaf_bitsize < 32)
90 entry_size = 4;
91
92 pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs);
93 if (!pblk->trans_map)
94 return -ENOMEM;
95
96 pblk_ppa_set_empty(&ppa);
97
98 for (i = 0; i < pblk->rl.nr_secs; i++)
99 pblk_trans_map_set(pblk, i, ppa);
100
101 return 0;
102}
103
104static void pblk_rwb_free(struct pblk *pblk)
105{
106 if (pblk_rb_tear_down_check(&pblk->rwb))
107 pr_err("pblk: write buffer error on tear down\n");
108
109 pblk_rb_data_free(&pblk->rwb);
110 vfree(pblk_rb_entries_ref(&pblk->rwb));
111}
112
113static int pblk_rwb_init(struct pblk *pblk)
114{
115 struct nvm_tgt_dev *dev = pblk->dev;
116 struct nvm_geo *geo = &dev->geo;
117 struct pblk_rb_entry *entries;
118 unsigned long nr_entries;
119 unsigned int power_size, power_seg_sz;
120
121 nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
122
123 entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
124 if (!entries)
125 return -ENOMEM;
126
127 power_size = get_count_order(nr_entries);
128 power_seg_sz = get_count_order(geo->sec_size);
129
130 return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
131}
132
133/* Minimum pages needed within a lun */
134#define PAGE_POOL_SIZE 16
135#define ADDR_POOL_SIZE 64
136
137static int pblk_set_ppaf(struct pblk *pblk)
138{
139 struct nvm_tgt_dev *dev = pblk->dev;
140 struct nvm_geo *geo = &dev->geo;
141 struct nvm_addr_format ppaf = geo->ppaf;
142 int power_len;
143
144 /* Re-calculate channel and lun format to adapt to configuration */
145 power_len = get_count_order(geo->nr_chnls);
146 if (1 << power_len != geo->nr_chnls) {
147 pr_err("pblk: supports only power-of-two channel config.\n");
148 return -EINVAL;
149 }
150 ppaf.ch_len = power_len;
151
152 power_len = get_count_order(geo->luns_per_chnl);
153 if (1 << power_len != geo->luns_per_chnl) {
154 pr_err("pblk: supports only power-of-two LUN config.\n");
155 return -EINVAL;
156 }
157 ppaf.lun_len = power_len;
158
159 pblk->ppaf.sec_offset = 0;
160 pblk->ppaf.pln_offset = ppaf.sect_len;
161 pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
162 pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
163 pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
164 pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
165 pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
166 pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
167 pblk->ppaf.pln_offset;
168 pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
169 pblk->ppaf.ch_offset;
170 pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
171 pblk->ppaf.lun_offset;
172 pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
173 pblk->ppaf.pg_offset;
174 pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
175 pblk->ppaf.blk_offset;
176
177 pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;
178
179 return 0;
180}
181
182static int pblk_init_global_caches(struct pblk *pblk)
183{
184 char cache_name[PBLK_CACHE_NAME_LEN];
185
186 down_write(&pblk_lock);
187 pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws",
188 sizeof(struct pblk_line_ws), 0, 0, NULL);
189 if (!pblk_blk_ws_cache) {
190 up_write(&pblk_lock);
191 return -ENOMEM;
192 }
193
194 pblk_rec_cache = kmem_cache_create("pblk_rec",
195 sizeof(struct pblk_rec_ctx), 0, 0, NULL);
196 if (!pblk_rec_cache) {
197 kmem_cache_destroy(pblk_blk_ws_cache);
198 up_write(&pblk_lock);
199 return -ENOMEM;
200 }
201
202 pblk_r_rq_cache = kmem_cache_create("pblk_r_rq", pblk_r_rq_size,
203 0, 0, NULL);
204 if (!pblk_r_rq_cache) {
205 kmem_cache_destroy(pblk_blk_ws_cache);
206 kmem_cache_destroy(pblk_rec_cache);
207 up_write(&pblk_lock);
208 return -ENOMEM;
209 }
210
211 pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
212 0, 0, NULL);
213 if (!pblk_w_rq_cache) {
214 kmem_cache_destroy(pblk_blk_ws_cache);
215 kmem_cache_destroy(pblk_rec_cache);
216 kmem_cache_destroy(pblk_r_rq_cache);
217 up_write(&pblk_lock);
218 return -ENOMEM;
219 }
220
221 snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s",
222 pblk->disk->disk_name);
223 pblk_line_meta_cache = kmem_cache_create(cache_name,
224 pblk->lm.sec_bitmap_len, 0, 0, NULL);
225 if (!pblk_line_meta_cache) {
226 kmem_cache_destroy(pblk_blk_ws_cache);
227 kmem_cache_destroy(pblk_rec_cache);
228 kmem_cache_destroy(pblk_r_rq_cache);
229 kmem_cache_destroy(pblk_w_rq_cache);
230 up_write(&pblk_lock);
231 return -ENOMEM;
232 }
233 up_write(&pblk_lock);
234
235 return 0;
236}
237
238static int pblk_core_init(struct pblk *pblk)
239{
240 struct nvm_tgt_dev *dev = pblk->dev;
241 struct nvm_geo *geo = &dev->geo;
242 int max_write_ppas;
243 int mod;
244
245 pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
246 max_write_ppas = pblk->min_write_pgs * geo->nr_luns;
247 pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ?
248 max_write_ppas : nvm_max_phys_sects(dev);
249 pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
250 geo->nr_planes * geo->nr_luns;
251
252 if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
253 pr_err("pblk: cannot support device max_phys_sect\n");
254 return -EINVAL;
255 }
256
257 div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod);
258 if (mod) {
259 pr_err("pblk: bad configuration of sectors/pages\n");
260 return -EINVAL;
261 }
262
263 if (pblk_init_global_caches(pblk))
264 return -ENOMEM;
265
266 pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
267 if (!pblk->page_pool)
268 return -ENOMEM;
269
270 pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns,
271 pblk_blk_ws_cache);
272 if (!pblk->line_ws_pool)
273 goto free_page_pool;
274
275 pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache);
276 if (!pblk->rec_pool)
277 goto free_blk_ws_pool;
278
279 pblk->r_rq_pool = mempool_create_slab_pool(64, pblk_r_rq_cache);
280 if (!pblk->r_rq_pool)
281 goto free_rec_pool;
282
283 pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache);
284 if (!pblk->w_rq_pool)
285 goto free_r_rq_pool;
286
287 pblk->line_meta_pool =
288 mempool_create_slab_pool(16, pblk_line_meta_cache);
289 if (!pblk->line_meta_pool)
290 goto free_w_rq_pool;
291
292 pblk->kw_wq = alloc_workqueue("pblk-aux-wq",
293 WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
294 if (!pblk->kw_wq)
295 goto free_line_meta_pool;
296
297 if (pblk_set_ppaf(pblk))
298 goto free_kw_wq;
299
300 if (pblk_rwb_init(pblk))
301 goto free_kw_wq;
302
303 INIT_LIST_HEAD(&pblk->compl_list);
304 return 0;
305
306free_kw_wq:
307 destroy_workqueue(pblk->kw_wq);
308free_line_meta_pool:
309 mempool_destroy(pblk->line_meta_pool);
310free_w_rq_pool:
311 mempool_destroy(pblk->w_rq_pool);
312free_r_rq_pool:
313 mempool_destroy(pblk->r_rq_pool);
314free_rec_pool:
315 mempool_destroy(pblk->rec_pool);
316free_blk_ws_pool:
317 mempool_destroy(pblk->line_ws_pool);
318free_page_pool:
319 mempool_destroy(pblk->page_pool);
320 return -ENOMEM;
321}
322
323static void pblk_core_free(struct pblk *pblk)
324{
325 if (pblk->kw_wq)
326 destroy_workqueue(pblk->kw_wq);
327
328 mempool_destroy(pblk->page_pool);
329 mempool_destroy(pblk->line_ws_pool);
330 mempool_destroy(pblk->rec_pool);
331 mempool_destroy(pblk->r_rq_pool);
332 mempool_destroy(pblk->w_rq_pool);
333 mempool_destroy(pblk->line_meta_pool);
334
335 kmem_cache_destroy(pblk_blk_ws_cache);
336 kmem_cache_destroy(pblk_rec_cache);
337 kmem_cache_destroy(pblk_r_rq_cache);
338 kmem_cache_destroy(pblk_w_rq_cache);
339 kmem_cache_destroy(pblk_line_meta_cache);
340}
341
342static void pblk_luns_free(struct pblk *pblk)
343{
344 kfree(pblk->luns);
345}
346
347static void pblk_lines_free(struct pblk *pblk)
348{
349 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
350 struct pblk_line *line;
351 int i;
352
353 spin_lock(&l_mg->free_lock);
354 for (i = 0; i < l_mg->nr_lines; i++) {
355 line = &pblk->lines[i];
356
357 pblk_line_free(pblk, line);
358 kfree(line->blk_bitmap);
359 kfree(line->erase_bitmap);
360 }
361 spin_unlock(&l_mg->free_lock);
362}
363
364static void pblk_line_meta_free(struct pblk *pblk)
365{
366 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
367 int i;
368
369 kfree(l_mg->bb_template);
370 kfree(l_mg->bb_aux);
371
372 for (i = 0; i < PBLK_DATA_LINES; i++) {
373 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
374 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
375 }
376
377 kfree(pblk->lines);
378}
379
380static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun)
381{
382 struct nvm_geo *geo = &dev->geo;
383 struct ppa_addr ppa;
384 u8 *blks;
385 int nr_blks, ret;
386
387 nr_blks = geo->blks_per_lun * geo->plane_mode;
388 blks = kmalloc(nr_blks, GFP_KERNEL);
389 if (!blks)
390 return -ENOMEM;
391
392 ppa.ppa = 0;
393 ppa.g.ch = rlun->bppa.g.ch;
394 ppa.g.lun = rlun->bppa.g.lun;
395
396 ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
397 if (ret)
398 goto out;
399
400 nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
401 if (nr_blks < 0) {
402 kfree(blks);
403 ret = nr_blks;
404 }
405
406 rlun->bb_list = blks;
407
408out:
409 return ret;
410}
411
412static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line)
413{
414 struct pblk_line_meta *lm = &pblk->lm;
415 struct pblk_lun *rlun;
416 int bb_cnt = 0;
417 int i;
418
419 line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
420 if (!line->blk_bitmap)
421 return -ENOMEM;
422
423 line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
424 if (!line->erase_bitmap) {
425 kfree(line->blk_bitmap);
426 return -ENOMEM;
427 }
428
429 for (i = 0; i < lm->blk_per_line; i++) {
430 rlun = &pblk->luns[i];
431 if (rlun->bb_list[line->id] == NVM_BLK_T_FREE)
432 continue;
433
434 set_bit(i, line->blk_bitmap);
435 bb_cnt++;
436 }
437
438 return bb_cnt;
439}
440
441static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns)
442{
443 struct nvm_tgt_dev *dev = pblk->dev;
444 struct nvm_geo *geo = &dev->geo;
445 struct pblk_lun *rlun;
446 int i, ret;
447
448 /* TODO: Implement unbalanced LUN support */
449 if (geo->luns_per_chnl < 0) {
450 pr_err("pblk: unbalanced LUN config.\n");
451 return -EINVAL;
452 }
453
454 pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL);
455 if (!pblk->luns)
456 return -ENOMEM;
457
458 for (i = 0; i < geo->nr_luns; i++) {
459 /* Stripe across channels */
460 int ch = i % geo->nr_chnls;
461 int lun_raw = i / geo->nr_chnls;
462 int lunid = lun_raw + ch * geo->luns_per_chnl;
463
464 rlun = &pblk->luns[i];
465 rlun->bppa = luns[lunid];
466
467 sema_init(&rlun->wr_sem, 1);
468
469 ret = pblk_bb_discovery(dev, rlun);
470 if (ret) {
471 while (--i >= 0)
472 kfree(pblk->luns[i].bb_list);
473 return ret;
474 }
475 }
476
477 return 0;
478}
479
480static int pblk_lines_configure(struct pblk *pblk, int flags)
481{
482 struct pblk_line *line = NULL;
483 int ret = 0;
484
485 if (!(flags & NVM_TARGET_FACTORY)) {
486 line = pblk_recov_l2p(pblk);
487 if (IS_ERR(line)) {
488 pr_err("pblk: could not recover l2p table\n");
489 ret = -EFAULT;
490 }
491 }
492
493 if (!line) {
494 /* Configure next line for user data */
495 line = pblk_line_get_first_data(pblk);
496 if (!line) {
497 pr_err("pblk: line list corrupted\n");
498 ret = -EFAULT;
499 }
500 }
501
502 return ret;
503}
504
505/* See comment over struct line_emeta definition */
506static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm)
507{
508 return (sizeof(struct line_emeta) +
509 ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) +
510 (pblk->l_mg.nr_lines * sizeof(u32)) +
511 lm->blk_bitmap_len);
512}
513
514static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
515{
516 struct nvm_tgt_dev *dev = pblk->dev;
517 struct nvm_geo *geo = &dev->geo;
518 sector_t provisioned;
519
520 pblk->over_pct = 20;
521
522 provisioned = nr_free_blks;
523 provisioned *= (100 - pblk->over_pct);
524 sector_div(provisioned, 100);
525
526 /* Internally pblk manages all free blocks, but all calculations based
527 * on user capacity consider only provisioned blocks
528 */
529 pblk->rl.total_blocks = nr_free_blks;
530 pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk;
531 pblk->capacity = provisioned * geo->sec_per_blk;
532 atomic_set(&pblk->rl.free_blocks, nr_free_blks);
533}
534
535static int pblk_lines_init(struct pblk *pblk)
536{
537 struct nvm_tgt_dev *dev = pblk->dev;
538 struct nvm_geo *geo = &dev->geo;
539 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
540 struct pblk_line_meta *lm = &pblk->lm;
541 struct pblk_line *line;
542 unsigned int smeta_len, emeta_len;
543 long nr_bad_blks, nr_meta_blks, nr_free_blks;
544 int bb_distance;
545 int i;
546 int ret = 0;
547
548 lm->sec_per_line = geo->sec_per_blk * geo->nr_luns;
549 lm->blk_per_line = geo->nr_luns;
550 lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
551 lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
552 lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long);
553 lm->high_thrs = lm->sec_per_line / 2;
554 lm->mid_thrs = lm->sec_per_line / 4;
555
556 /* Calculate necessary pages for smeta. See comment over struct
557 * line_smeta definition
558 */
559 lm->smeta_len = sizeof(struct line_smeta) +
560 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
561
562 i = 1;
563add_smeta_page:
564 lm->smeta_sec = i * geo->sec_per_pl;
565 lm->smeta_len = lm->smeta_sec * geo->sec_size;
566
567 smeta_len = sizeof(struct line_smeta) +
568 PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len;
569 if (smeta_len > lm->smeta_len) {
570 i++;
571 goto add_smeta_page;
572 }
573
574 /* Calculate necessary pages for emeta. See comment over struct
575 * line_emeta definition
576 */
577 i = 1;
578add_emeta_page:
579 lm->emeta_sec = i * geo->sec_per_pl;
580 lm->emeta_len = lm->emeta_sec * geo->sec_size;
581
582 emeta_len = calc_emeta_len(pblk, lm);
583 if (emeta_len > lm->emeta_len) {
584 i++;
585 goto add_emeta_page;
586 }
587 lm->emeta_bb = geo->nr_luns - i;
588
589 nr_meta_blks = (lm->smeta_sec + lm->emeta_sec +
590 (geo->sec_per_blk / 2)) / geo->sec_per_blk;
591 lm->min_blk_line = nr_meta_blks + 1;
592
593 l_mg->nr_lines = geo->blks_per_lun;
594 l_mg->log_line = l_mg->data_line = NULL;
595 l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
596 l_mg->nr_free_lines = 0;
597 bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);
598
599 /* smeta is always small enough to fit on a kmalloc memory allocation,
600 * emeta depends on the number of LUNs allocated to the pblk instance
601 */
602 l_mg->smeta_alloc_type = PBLK_KMALLOC_META;
603 for (i = 0; i < PBLK_DATA_LINES; i++) {
604 l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL);
605 if (!l_mg->sline_meta[i].meta)
606 while (--i >= 0) {
607 kfree(l_mg->sline_meta[i].meta);
608 ret = -ENOMEM;
609 goto fail;
610 }
611 }
612
613 if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) {
614 l_mg->emeta_alloc_type = PBLK_VMALLOC_META;
615
616 for (i = 0; i < PBLK_DATA_LINES; i++) {
617 l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len);
618 if (!l_mg->eline_meta[i].meta)
619 while (--i >= 0) {
620 vfree(l_mg->eline_meta[i].meta);
621 ret = -ENOMEM;
622 goto fail;
623 }
624 }
625 } else {
626 l_mg->emeta_alloc_type = PBLK_KMALLOC_META;
627
628 for (i = 0; i < PBLK_DATA_LINES; i++) {
629 l_mg->eline_meta[i].meta =
630 kmalloc(lm->emeta_len, GFP_KERNEL);
631 if (!l_mg->eline_meta[i].meta)
632 while (--i >= 0) {
633 kfree(l_mg->eline_meta[i].meta);
634 ret = -ENOMEM;
635 goto fail;
636 }
637 }
638 }
639
640 l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
641 if (!l_mg->bb_template)
642 goto fail_free_meta;
643
644 l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
645 if (!l_mg->bb_aux)
646 goto fail_free_bb_template;
647
648 bb_distance = (geo->nr_luns) * geo->sec_per_pl;
649 for (i = 0; i < lm->sec_per_line; i += bb_distance)
650 bitmap_set(l_mg->bb_template, i, geo->sec_per_pl);
651
652 INIT_LIST_HEAD(&l_mg->free_list);
653 INIT_LIST_HEAD(&l_mg->corrupt_list);
654 INIT_LIST_HEAD(&l_mg->bad_list);
655 INIT_LIST_HEAD(&l_mg->gc_full_list);
656 INIT_LIST_HEAD(&l_mg->gc_high_list);
657 INIT_LIST_HEAD(&l_mg->gc_mid_list);
658 INIT_LIST_HEAD(&l_mg->gc_low_list);
659 INIT_LIST_HEAD(&l_mg->gc_empty_list);
660
661 l_mg->gc_lists[0] = &l_mg->gc_high_list;
662 l_mg->gc_lists[1] = &l_mg->gc_mid_list;
663 l_mg->gc_lists[2] = &l_mg->gc_low_list;
664
665 spin_lock_init(&l_mg->free_lock);
666 spin_lock_init(&l_mg->gc_lock);
667
668 pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
669 GFP_KERNEL);
670 if (!pblk->lines)
671 goto fail_free_bb_aux;
672
673 nr_free_blks = 0;
674 for (i = 0; i < l_mg->nr_lines; i++) {
675 line = &pblk->lines[i];
676
677 line->pblk = pblk;
678 line->id = i;
679 line->type = PBLK_LINETYPE_FREE;
680 line->state = PBLK_LINESTATE_FREE;
681 line->gc_group = PBLK_LINEGC_NONE;
682 spin_lock_init(&line->lock);
683
684 nr_bad_blks = pblk_bb_line(pblk, line);
685 if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line)
686 goto fail_free_lines;
687
688 line->blk_in_line = lm->blk_per_line - nr_bad_blks;
689 if (line->blk_in_line < lm->min_blk_line) {
690 line->state = PBLK_LINESTATE_BAD;
691 list_add_tail(&line->list, &l_mg->bad_list);
692 continue;
693 }
694
695 nr_free_blks += line->blk_in_line;
696
697 l_mg->nr_free_lines++;
698 list_add_tail(&line->list, &l_mg->free_list);
699 }
700
701 pblk_set_provision(pblk, nr_free_blks);
702
703 sema_init(&pblk->erase_sem, 1);
704
705 /* Cleanup per-LUN bad block lists - managed within lines on run-time */
706 for (i = 0; i < geo->nr_luns; i++)
707 kfree(pblk->luns[i].bb_list);
708
709 return 0;
710fail_free_lines:
711 kfree(pblk->lines);
712fail_free_bb_aux:
713 kfree(l_mg->bb_aux);
714fail_free_bb_template:
715 kfree(l_mg->bb_template);
716fail_free_meta:
717 for (i = 0; i < PBLK_DATA_LINES; i++) {
718 pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type);
719 pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type);
720 }
721fail:
722 for (i = 0; i < geo->nr_luns; i++)
723 kfree(pblk->luns[i].bb_list);
724
725 return ret;
726}
727
728static int pblk_writer_init(struct pblk *pblk)
729{
730 setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk);
731 mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
732
733 pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
734 if (IS_ERR(pblk->writer_ts)) {
735 pr_err("pblk: could not allocate writer kthread\n");
736 return 1;
737 }
738
739 return 0;
740}
741
742static void pblk_writer_stop(struct pblk *pblk)
743{
744 if (pblk->writer_ts)
745 kthread_stop(pblk->writer_ts);
746 del_timer(&pblk->wtimer);
747}
748
749static void pblk_free(struct pblk *pblk)
750{
751 pblk_luns_free(pblk);
752 pblk_lines_free(pblk);
753 pblk_line_meta_free(pblk);
754 pblk_core_free(pblk);
755 pblk_l2p_free(pblk);
756
757 kfree(pblk);
758}
759
760static void pblk_tear_down(struct pblk *pblk)
761{
762 pblk_flush_writer(pblk);
763 pblk_writer_stop(pblk);
764 pblk_rb_sync_l2p(&pblk->rwb);
765 pblk_recov_pad(pblk);
766 pblk_rwb_free(pblk);
767 pblk_rl_free(&pblk->rl);
768
769 pr_debug("pblk: consistent tear down\n");
770}
771
772static void pblk_exit(void *private)
773{
774 struct pblk *pblk = private;
775
776 down_write(&pblk_lock);
777 pblk_gc_exit(pblk);
778 pblk_tear_down(pblk);
779 pblk_free(pblk);
780 up_write(&pblk_lock);
781}
782
783static sector_t pblk_capacity(void *private)
784{
785 struct pblk *pblk = private;
786
787 return pblk->capacity * NR_PHY_IN_LOG;
788}
789
790static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
791 int flags)
792{
793 struct nvm_geo *geo = &dev->geo;
794 struct request_queue *bqueue = dev->q;
795 struct request_queue *tqueue = tdisk->queue;
796 struct pblk *pblk;
797 int ret;
798
799 if (dev->identity.dom & NVM_RSP_L2P) {
800 pr_err("pblk: device-side L2P table not supported. (%x)\n",
801 dev->identity.dom);
802 return ERR_PTR(-EINVAL);
803 }
804
805 pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
806 if (!pblk)
807 return ERR_PTR(-ENOMEM);
808
809 pblk->dev = dev;
810 pblk->disk = tdisk;
811
812 spin_lock_init(&pblk->trans_lock);
813 spin_lock_init(&pblk->lock);
814
815 if (flags & NVM_TARGET_FACTORY)
816 pblk_setup_uuid(pblk);
817
818#ifdef CONFIG_NVM_DEBUG
819 atomic_long_set(&pblk->inflight_writes, 0);
820 atomic_long_set(&pblk->padded_writes, 0);
821 atomic_long_set(&pblk->padded_wb, 0);
822 atomic_long_set(&pblk->nr_flush, 0);
823 atomic_long_set(&pblk->req_writes, 0);
824 atomic_long_set(&pblk->sub_writes, 0);
825 atomic_long_set(&pblk->sync_writes, 0);
826 atomic_long_set(&pblk->compl_writes, 0);
827 atomic_long_set(&pblk->inflight_reads, 0);
828 atomic_long_set(&pblk->sync_reads, 0);
829 atomic_long_set(&pblk->recov_writes, 0);
830 atomic_long_set(&pblk->recov_writes, 0);
831 atomic_long_set(&pblk->recov_gc_writes, 0);
832#endif
833
834 atomic_long_set(&pblk->read_failed, 0);
835 atomic_long_set(&pblk->read_empty, 0);
836 atomic_long_set(&pblk->read_high_ecc, 0);
837 atomic_long_set(&pblk->read_failed_gc, 0);
838 atomic_long_set(&pblk->write_failed, 0);
839 atomic_long_set(&pblk->erase_failed, 0);
840
841 ret = pblk_luns_init(pblk, dev->luns);
842 if (ret) {
843 pr_err("pblk: could not initialize luns\n");
844 goto fail;
845 }
846
847 ret = pblk_lines_init(pblk);
848 if (ret) {
849 pr_err("pblk: could not initialize lines\n");
850 goto fail_free_luns;
851 }
852
853 ret = pblk_core_init(pblk);
854 if (ret) {
855 pr_err("pblk: could not initialize core\n");
856 goto fail_free_line_meta;
857 }
858
859 ret = pblk_l2p_init(pblk);
860 if (ret) {
861 pr_err("pblk: could not initialize maps\n");
862 goto fail_free_core;
863 }
864
865 ret = pblk_lines_configure(pblk, flags);
866 if (ret) {
867 pr_err("pblk: could not configure lines\n");
868 goto fail_free_l2p;
869 }
870
871 ret = pblk_writer_init(pblk);
872 if (ret) {
873 pr_err("pblk: could not initialize write thread\n");
874 goto fail_free_lines;
875 }
876
877 ret = pblk_gc_init(pblk);
878 if (ret) {
879 pr_err("pblk: could not initialize gc\n");
880 goto fail_stop_writer;
881 }
882
883 /* inherit the size from the underlying device */
884 blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
885 blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
886
887 blk_queue_write_cache(tqueue, true, false);
888
889 tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size;
890 tqueue->limits.discard_alignment = 0;
891 blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
892 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue);
893
894 pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
895 geo->nr_luns, pblk->l_mg.nr_lines,
896 (unsigned long long)pblk->rl.nr_secs,
897 pblk->rwb.nr_entries);
898
899 wake_up_process(pblk->writer_ts);
900 return pblk;
901
902fail_stop_writer:
903 pblk_writer_stop(pblk);
904fail_free_lines:
905 pblk_lines_free(pblk);
906fail_free_l2p:
907 pblk_l2p_free(pblk);
908fail_free_core:
909 pblk_core_free(pblk);
910fail_free_line_meta:
911 pblk_line_meta_free(pblk);
912fail_free_luns:
913 pblk_luns_free(pblk);
914fail:
915 kfree(pblk);
916 return ERR_PTR(ret);
917}
918
919/* physical block device target */
920static struct nvm_tgt_type tt_pblk = {
921 .name = "pblk",
922 .version = {1, 0, 0},
923
924 .make_rq = pblk_make_rq,
925 .capacity = pblk_capacity,
926
927 .init = pblk_init,
928 .exit = pblk_exit,
929
930 .sysfs_init = pblk_sysfs_init,
931 .sysfs_exit = pblk_sysfs_exit,
932};
933
934static int __init pblk_module_init(void)
935{
936 return nvm_register_tgt_type(&tt_pblk);
937}
938
939static void pblk_module_exit(void)
940{
941 nvm_unregister_tgt_type(&tt_pblk);
942}
943
944module_init(pblk_module_init);
945module_exit(pblk_module_exit);
946MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
947MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
948MODULE_LICENSE("GPL v2");
949MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");