aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJavier González <javier@cnexlabs.com>2017-10-13 08:46:25 -0400
committerJens Axboe <axboe@kernel.dk>2017-10-13 10:34:57 -0400
commit1e82123da6a4c6019ef03bcd47e4b3dc18dd136e (patch)
treecb631249f72b69c72d2abba3cd1a69c005c8da86
parent0f9248cf1e22333b2a0458540aafb1ad3b2b3337 (diff)
lightnvm: pblk: remove I/O dependency on write path
pblk schedules user I/O, metadata I/O and erases on the write path in order to minimize collisions at the media level. Until now, there has been a dependency between user and metadata I/Os that could lead to a deadlock as both take the per-LUN semaphore to schedule submission. This path removes this dependency and guarantees forward progress at a per I/O granurality. Signed-off-by: Javier González <javier@cnexlabs.com> Signed-off-by: Matias Bjørling <m@bjorling.me> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/lightnvm/pblk-write.c145
1 files changed, 65 insertions, 80 deletions
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index f2e846fe9242..6c1cafafef53 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -220,15 +220,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
220} 220}
221 221
222static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, 222static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
223 struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa) 223 struct ppa_addr *erase_ppa)
224{ 224{
225 struct pblk_line_meta *lm = &pblk->lm; 225 struct pblk_line_meta *lm = &pblk->lm;
226 struct pblk_line *e_line = pblk_line_get_erase(pblk); 226 struct pblk_line *e_line = pblk_line_get_erase(pblk);
227 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
227 unsigned int valid = c_ctx->nr_valid; 228 unsigned int valid = c_ctx->nr_valid;
228 unsigned int padded = c_ctx->nr_padded; 229 unsigned int padded = c_ctx->nr_padded;
229 unsigned int nr_secs = valid + padded; 230 unsigned int nr_secs = valid + padded;
230 unsigned long *lun_bitmap; 231 unsigned long *lun_bitmap;
231 int ret = 0; 232 int ret;
232 233
233 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); 234 lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
234 if (!lun_bitmap) 235 if (!lun_bitmap)
@@ -294,55 +295,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
294 return secs_to_sync; 295 return secs_to_sync;
295} 296}
296 297
297static inline int pblk_valid_meta_ppa(struct pblk *pblk,
298 struct pblk_line *meta_line,
299 struct ppa_addr *ppa_list, int nr_ppas)
300{
301 struct nvm_tgt_dev *dev = pblk->dev;
302 struct nvm_geo *geo = &dev->geo;
303 struct pblk_line *data_line;
304 struct ppa_addr ppa, ppa_opt;
305 u64 paddr;
306 int i;
307
308 data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])];
309 paddr = pblk_lookup_page(pblk, meta_line);
310 ppa = addr_to_gen_ppa(pblk, paddr, 0);
311
312 if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap))
313 return 1;
314
315 /* Schedule a metadata I/O that is half the distance from the data I/O
316 * with regards to the number of LUNs forming the pblk instance. This
317 * balances LUN conflicts across every I/O.
318 *
319 * When the LUN configuration changes (e.g., due to GC), this distance
320 * can align, which would result on a LUN deadlock. In this case, modify
321 * the distance to not be optimal, but allow metadata I/Os to succeed.
322 */
323 ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
324 if (unlikely(ppa_opt.ppa == ppa.ppa)) {
325 data_line->meta_distance--;
326 return 0;
327 }
328
329 for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
330 if (ppa_list[i].g.ch == ppa_opt.g.ch &&
331 ppa_list[i].g.lun == ppa_opt.g.lun)
332 return 1;
333
334 if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) {
335 for (i = 0; i < nr_ppas; i += pblk->min_write_pgs)
336 if (ppa_list[i].g.ch == ppa.g.ch &&
337 ppa_list[i].g.lun == ppa.g.lun)
338 return 0;
339
340 return 1;
341 }
342
343 return 0;
344}
345
346int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) 298int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
347{ 299{
348 struct nvm_tgt_dev *dev = pblk->dev; 300 struct nvm_tgt_dev *dev = pblk->dev;
@@ -421,8 +373,44 @@ fail_free_rqd:
421 return ret; 373 return ret;
422} 374}
423 375
424static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, 376static inline bool pblk_valid_meta_ppa(struct pblk *pblk,
425 int prev_n) 377 struct pblk_line *meta_line,
378 struct nvm_rq *data_rqd)
379{
380 struct nvm_tgt_dev *dev = pblk->dev;
381 struct nvm_geo *geo = &dev->geo;
382 struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd);
383 struct pblk_line *data_line = pblk_line_get_data(pblk);
384 struct ppa_addr ppa, ppa_opt;
385 u64 paddr;
386 int pos_opt;
387
388 /* Schedule a metadata I/O that is half the distance from the data I/O
389 * with regards to the number of LUNs forming the pblk instance. This
390 * balances LUN conflicts across every I/O.
391 *
392 * When the LUN configuration changes (e.g., due to GC), this distance
393 * can align, which would result on metadata and data I/Os colliding. In
394 * this case, modify the distance to not be optimal, but move the
395 * optimal in the right direction.
396 */
397 paddr = pblk_lookup_page(pblk, meta_line);
398 ppa = addr_to_gen_ppa(pblk, paddr, 0);
399 ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0);
400 pos_opt = pblk_ppa_to_pos(geo, ppa_opt);
401
402 if (test_bit(pos_opt, data_c_ctx->lun_bitmap) ||
403 test_bit(pos_opt, data_line->blk_bitmap))
404 return true;
405
406 if (unlikely(pblk_ppa_comp(ppa_opt, ppa)))
407 data_line->meta_distance--;
408
409 return false;
410}
411
412static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk,
413 struct nvm_rq *data_rqd)
426{ 414{
427 struct pblk_line_meta *lm = &pblk->lm; 415 struct pblk_line_meta *lm = &pblk->lm;
428 struct pblk_line_mgmt *l_mg = &pblk->l_mg; 416 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -432,57 +420,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list,
432retry: 420retry:
433 if (list_empty(&l_mg->emeta_list)) { 421 if (list_empty(&l_mg->emeta_list)) {
434 spin_unlock(&l_mg->close_lock); 422 spin_unlock(&l_mg->close_lock);
435 return 0; 423 return NULL;
436 } 424 }
437 meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); 425 meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list);
438 if (meta_line->emeta->mem >= lm->emeta_len[0]) 426 if (meta_line->emeta->mem >= lm->emeta_len[0])
439 goto retry; 427 goto retry;
440 spin_unlock(&l_mg->close_lock); 428 spin_unlock(&l_mg->close_lock);
441 429
442 if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n)) 430 if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd))
443 return 0; 431 return NULL;
444 432
445 return pblk_submit_meta_io(pblk, meta_line); 433 return meta_line;
446} 434}
447 435
448static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) 436static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
449{ 437{
450 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
451 struct ppa_addr erase_ppa; 438 struct ppa_addr erase_ppa;
439 struct pblk_line *meta_line;
452 int err; 440 int err;
453 441
454 ppa_set_empty(&erase_ppa); 442 ppa_set_empty(&erase_ppa);
455 443
456 /* Assign lbas to ppas and populate request structure */ 444 /* Assign lbas to ppas and populate request structure */
457 err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa); 445 err = pblk_setup_w_rq(pblk, rqd, &erase_ppa);
458 if (err) { 446 if (err) {
459 pr_err("pblk: could not setup write request: %d\n", err); 447 pr_err("pblk: could not setup write request: %d\n", err);
460 return NVM_IO_ERR; 448 return NVM_IO_ERR;
461 } 449 }
462 450
463 if (likely(ppa_empty(erase_ppa))) { 451 meta_line = pblk_should_submit_meta_io(pblk, rqd);
464 /* Submit metadata write for previous data line */
465 err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas);
466 if (err) {
467 pr_err("pblk: metadata I/O submission failed: %d", err);
468 return NVM_IO_ERR;
469 }
470 452
471 /* Submit data write for current data line */ 453 /* Submit data write for current data line */
472 err = pblk_submit_io(pblk, rqd); 454 err = pblk_submit_io(pblk, rqd);
473 if (err) { 455 if (err) {
474 pr_err("pblk: data I/O submission failed: %d\n", err); 456 pr_err("pblk: data I/O submission failed: %d\n", err);
475 return NVM_IO_ERR; 457 return NVM_IO_ERR;
476 } 458 }
477 } else {
478 /* Submit data write for current data line */
479 err = pblk_submit_io(pblk, rqd);
480 if (err) {
481 pr_err("pblk: data I/O submission failed: %d\n", err);
482 return NVM_IO_ERR;
483 }
484 459
485 /* Submit available erase for next data line */ 460 if (!ppa_empty(erase_ppa)) {
461 /* Submit erase for next data line */
486 if (pblk_blk_erase_async(pblk, erase_ppa)) { 462 if (pblk_blk_erase_async(pblk, erase_ppa)) {
487 struct pblk_line *e_line = pblk_line_get_erase(pblk); 463 struct pblk_line *e_line = pblk_line_get_erase(pblk);
488 struct nvm_tgt_dev *dev = pblk->dev; 464 struct nvm_tgt_dev *dev = pblk->dev;
@@ -495,6 +471,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
495 } 471 }
496 } 472 }
497 473
474 if (meta_line) {
475 /* Submit metadata write for previous data line */
476 err = pblk_submit_meta_io(pblk, meta_line);
477 if (err) {
478 pr_err("pblk: metadata I/O submission failed: %d", err);
479 return NVM_IO_ERR;
480 }
481 }
482
498 return NVM_IO_OK; 483 return NVM_IO_OK;
499} 484}
500 485