diff options
author | Javier González <javier@cnexlabs.com> | 2017-10-13 08:46:25 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2017-10-13 10:34:57 -0400 |
commit | 1e82123da6a4c6019ef03bcd47e4b3dc18dd136e (patch) | |
tree | cb631249f72b69c72d2abba3cd1a69c005c8da86 | |
parent | 0f9248cf1e22333b2a0458540aafb1ad3b2b3337 (diff) |
lightnvm: pblk: remove I/O dependency on write path
pblk schedules user I/O, metadata I/O and erases on the write path in
order to minimize collisions at the media level. Until now, there has
been a dependency between user and metadata I/Os that could lead to a
deadlock as both take the per-LUN semaphore to schedule submission.
This path removes this dependency and guarantees forward progress at a
per I/O granurality.
Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | drivers/lightnvm/pblk-write.c | 145 |
1 files changed, 65 insertions, 80 deletions
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index f2e846fe9242..6c1cafafef53 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c | |||
@@ -220,15 +220,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, | |||
220 | } | 220 | } |
221 | 221 | ||
222 | static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, | 222 | static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, |
223 | struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa) | 223 | struct ppa_addr *erase_ppa) |
224 | { | 224 | { |
225 | struct pblk_line_meta *lm = &pblk->lm; | 225 | struct pblk_line_meta *lm = &pblk->lm; |
226 | struct pblk_line *e_line = pblk_line_get_erase(pblk); | 226 | struct pblk_line *e_line = pblk_line_get_erase(pblk); |
227 | struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); | ||
227 | unsigned int valid = c_ctx->nr_valid; | 228 | unsigned int valid = c_ctx->nr_valid; |
228 | unsigned int padded = c_ctx->nr_padded; | 229 | unsigned int padded = c_ctx->nr_padded; |
229 | unsigned int nr_secs = valid + padded; | 230 | unsigned int nr_secs = valid + padded; |
230 | unsigned long *lun_bitmap; | 231 | unsigned long *lun_bitmap; |
231 | int ret = 0; | 232 | int ret; |
232 | 233 | ||
233 | lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); | 234 | lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); |
234 | if (!lun_bitmap) | 235 | if (!lun_bitmap) |
@@ -294,55 +295,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, | |||
294 | return secs_to_sync; | 295 | return secs_to_sync; |
295 | } | 296 | } |
296 | 297 | ||
297 | static inline int pblk_valid_meta_ppa(struct pblk *pblk, | ||
298 | struct pblk_line *meta_line, | ||
299 | struct ppa_addr *ppa_list, int nr_ppas) | ||
300 | { | ||
301 | struct nvm_tgt_dev *dev = pblk->dev; | ||
302 | struct nvm_geo *geo = &dev->geo; | ||
303 | struct pblk_line *data_line; | ||
304 | struct ppa_addr ppa, ppa_opt; | ||
305 | u64 paddr; | ||
306 | int i; | ||
307 | |||
308 | data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])]; | ||
309 | paddr = pblk_lookup_page(pblk, meta_line); | ||
310 | ppa = addr_to_gen_ppa(pblk, paddr, 0); | ||
311 | |||
312 | if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap)) | ||
313 | return 1; | ||
314 | |||
315 | /* Schedule a metadata I/O that is half the distance from the data I/O | ||
316 | * with regards to the number of LUNs forming the pblk instance. This | ||
317 | * balances LUN conflicts across every I/O. | ||
318 | * | ||
319 | * When the LUN configuration changes (e.g., due to GC), this distance | ||
320 | * can align, which would result on a LUN deadlock. In this case, modify | ||
321 | * the distance to not be optimal, but allow metadata I/Os to succeed. | ||
322 | */ | ||
323 | ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); | ||
324 | if (unlikely(ppa_opt.ppa == ppa.ppa)) { | ||
325 | data_line->meta_distance--; | ||
326 | return 0; | ||
327 | } | ||
328 | |||
329 | for (i = 0; i < nr_ppas; i += pblk->min_write_pgs) | ||
330 | if (ppa_list[i].g.ch == ppa_opt.g.ch && | ||
331 | ppa_list[i].g.lun == ppa_opt.g.lun) | ||
332 | return 1; | ||
333 | |||
334 | if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) { | ||
335 | for (i = 0; i < nr_ppas; i += pblk->min_write_pgs) | ||
336 | if (ppa_list[i].g.ch == ppa.g.ch && | ||
337 | ppa_list[i].g.lun == ppa.g.lun) | ||
338 | return 0; | ||
339 | |||
340 | return 1; | ||
341 | } | ||
342 | |||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) | 298 | int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) |
347 | { | 299 | { |
348 | struct nvm_tgt_dev *dev = pblk->dev; | 300 | struct nvm_tgt_dev *dev = pblk->dev; |
@@ -421,8 +373,44 @@ fail_free_rqd: | |||
421 | return ret; | 373 | return ret; |
422 | } | 374 | } |
423 | 375 | ||
424 | static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, | 376 | static inline bool pblk_valid_meta_ppa(struct pblk *pblk, |
425 | int prev_n) | 377 | struct pblk_line *meta_line, |
378 | struct nvm_rq *data_rqd) | ||
379 | { | ||
380 | struct nvm_tgt_dev *dev = pblk->dev; | ||
381 | struct nvm_geo *geo = &dev->geo; | ||
382 | struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd); | ||
383 | struct pblk_line *data_line = pblk_line_get_data(pblk); | ||
384 | struct ppa_addr ppa, ppa_opt; | ||
385 | u64 paddr; | ||
386 | int pos_opt; | ||
387 | |||
388 | /* Schedule a metadata I/O that is half the distance from the data I/O | ||
389 | * with regards to the number of LUNs forming the pblk instance. This | ||
390 | * balances LUN conflicts across every I/O. | ||
391 | * | ||
392 | * When the LUN configuration changes (e.g., due to GC), this distance | ||
393 | * can align, which would result on metadata and data I/Os colliding. In | ||
394 | * this case, modify the distance to not be optimal, but move the | ||
395 | * optimal in the right direction. | ||
396 | */ | ||
397 | paddr = pblk_lookup_page(pblk, meta_line); | ||
398 | ppa = addr_to_gen_ppa(pblk, paddr, 0); | ||
399 | ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); | ||
400 | pos_opt = pblk_ppa_to_pos(geo, ppa_opt); | ||
401 | |||
402 | if (test_bit(pos_opt, data_c_ctx->lun_bitmap) || | ||
403 | test_bit(pos_opt, data_line->blk_bitmap)) | ||
404 | return true; | ||
405 | |||
406 | if (unlikely(pblk_ppa_comp(ppa_opt, ppa))) | ||
407 | data_line->meta_distance--; | ||
408 | |||
409 | return false; | ||
410 | } | ||
411 | |||
412 | static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, | ||
413 | struct nvm_rq *data_rqd) | ||
426 | { | 414 | { |
427 | struct pblk_line_meta *lm = &pblk->lm; | 415 | struct pblk_line_meta *lm = &pblk->lm; |
428 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | 416 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; |
@@ -432,57 +420,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, | |||
432 | retry: | 420 | retry: |
433 | if (list_empty(&l_mg->emeta_list)) { | 421 | if (list_empty(&l_mg->emeta_list)) { |
434 | spin_unlock(&l_mg->close_lock); | 422 | spin_unlock(&l_mg->close_lock); |
435 | return 0; | 423 | return NULL; |
436 | } | 424 | } |
437 | meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); | 425 | meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); |
438 | if (meta_line->emeta->mem >= lm->emeta_len[0]) | 426 | if (meta_line->emeta->mem >= lm->emeta_len[0]) |
439 | goto retry; | 427 | goto retry; |
440 | spin_unlock(&l_mg->close_lock); | 428 | spin_unlock(&l_mg->close_lock); |
441 | 429 | ||
442 | if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n)) | 430 | if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) |
443 | return 0; | 431 | return NULL; |
444 | 432 | ||
445 | return pblk_submit_meta_io(pblk, meta_line); | 433 | return meta_line; |
446 | } | 434 | } |
447 | 435 | ||
448 | static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) | 436 | static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) |
449 | { | 437 | { |
450 | struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); | ||
451 | struct ppa_addr erase_ppa; | 438 | struct ppa_addr erase_ppa; |
439 | struct pblk_line *meta_line; | ||
452 | int err; | 440 | int err; |
453 | 441 | ||
454 | ppa_set_empty(&erase_ppa); | 442 | ppa_set_empty(&erase_ppa); |
455 | 443 | ||
456 | /* Assign lbas to ppas and populate request structure */ | 444 | /* Assign lbas to ppas and populate request structure */ |
457 | err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa); | 445 | err = pblk_setup_w_rq(pblk, rqd, &erase_ppa); |
458 | if (err) { | 446 | if (err) { |
459 | pr_err("pblk: could not setup write request: %d\n", err); | 447 | pr_err("pblk: could not setup write request: %d\n", err); |
460 | return NVM_IO_ERR; | 448 | return NVM_IO_ERR; |
461 | } | 449 | } |
462 | 450 | ||
463 | if (likely(ppa_empty(erase_ppa))) { | 451 | meta_line = pblk_should_submit_meta_io(pblk, rqd); |
464 | /* Submit metadata write for previous data line */ | ||
465 | err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas); | ||
466 | if (err) { | ||
467 | pr_err("pblk: metadata I/O submission failed: %d", err); | ||
468 | return NVM_IO_ERR; | ||
469 | } | ||
470 | 452 | ||
471 | /* Submit data write for current data line */ | 453 | /* Submit data write for current data line */ |
472 | err = pblk_submit_io(pblk, rqd); | 454 | err = pblk_submit_io(pblk, rqd); |
473 | if (err) { | 455 | if (err) { |
474 | pr_err("pblk: data I/O submission failed: %d\n", err); | 456 | pr_err("pblk: data I/O submission failed: %d\n", err); |
475 | return NVM_IO_ERR; | 457 | return NVM_IO_ERR; |
476 | } | 458 | } |
477 | } else { | ||
478 | /* Submit data write for current data line */ | ||
479 | err = pblk_submit_io(pblk, rqd); | ||
480 | if (err) { | ||
481 | pr_err("pblk: data I/O submission failed: %d\n", err); | ||
482 | return NVM_IO_ERR; | ||
483 | } | ||
484 | 459 | ||
485 | /* Submit available erase for next data line */ | 460 | if (!ppa_empty(erase_ppa)) { |
461 | /* Submit erase for next data line */ | ||
486 | if (pblk_blk_erase_async(pblk, erase_ppa)) { | 462 | if (pblk_blk_erase_async(pblk, erase_ppa)) { |
487 | struct pblk_line *e_line = pblk_line_get_erase(pblk); | 463 | struct pblk_line *e_line = pblk_line_get_erase(pblk); |
488 | struct nvm_tgt_dev *dev = pblk->dev; | 464 | struct nvm_tgt_dev *dev = pblk->dev; |
@@ -495,6 +471,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) | |||
495 | } | 471 | } |
496 | } | 472 | } |
497 | 473 | ||
474 | if (meta_line) { | ||
475 | /* Submit metadata write for previous data line */ | ||
476 | err = pblk_submit_meta_io(pblk, meta_line); | ||
477 | if (err) { | ||
478 | pr_err("pblk: metadata I/O submission failed: %d", err); | ||
479 | return NVM_IO_ERR; | ||
480 | } | ||
481 | } | ||
482 | |||
498 | return NVM_IO_OK; | 483 | return NVM_IO_OK; |
499 | } | 484 | } |
500 | 485 | ||