diff options
author | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2011-02-23 15:20:00 -0500 |
---|---|---|
committer | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2011-11-04 15:52:59 -0400 |
commit | 1ad2f8932a72bf375361727949ced2cb4e8cfcef (patch) | |
tree | 5f95cca42d31666591987800db5c488c8ae09aa7 /drivers/block | |
parent | 00df5cb4eb927078850086f8becc3286a69ea12e (diff) |
NVMe: Handle bios that contain non-virtually contiguous addresses
NVMe scatterlists must be virtually contiguous, like almost all I/Os.
However, when the filesystem lays out files with a hole, it can be that
adjacent LBAs map to non-adjacent virtual addresses. Handle this by
submitting one NVMe command at a time for each virtually discontiguous
range.
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/nvme.c | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index d99b400ccd79..240922706a93 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c | |||
@@ -315,7 +315,14 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, | |||
315 | dma_unmap_sg(nvmeq->q_dmadev, nbio->sg, nbio->nents, | 315 | dma_unmap_sg(nvmeq->q_dmadev, nbio->sg, nbio->nents, |
316 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 316 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); |
317 | free_nbio(nvmeq, nbio); | 317 | free_nbio(nvmeq, nbio); |
318 | bio_endio(bio, status ? -EIO : 0); | 318 | if (status) |
319 | bio_endio(bio, -EIO); | ||
320 | if (bio->bi_vcnt > bio->bi_idx) { | ||
321 | bio_list_add(&nvmeq->sq_cong, bio); | ||
322 | wake_up_process(nvme_thread); | ||
323 | } else { | ||
324 | bio_endio(bio, 0); | ||
325 | } | ||
319 | } | 326 | } |
320 | 327 | ||
321 | /* length is in bytes */ | 328 | /* length is in bytes */ |
@@ -393,29 +400,41 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_dev *dev, | |||
393 | return prps; | 400 | return prps; |
394 | } | 401 | } |
395 | 402 | ||
403 | /* NVMe scatterlists require no holes in the virtual address */ | ||
404 | #define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \ | ||
405 | (((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE)) | ||
406 | |||
396 | static int nvme_map_bio(struct device *dev, struct nvme_bio *nbio, | 407 | static int nvme_map_bio(struct device *dev, struct nvme_bio *nbio, |
397 | struct bio *bio, enum dma_data_direction dma_dir, int psegs) | 408 | struct bio *bio, enum dma_data_direction dma_dir, int psegs) |
398 | { | 409 | { |
399 | struct bio_vec *bvec, *bvprv = NULL; | 410 | struct bio_vec *bvec, *bvprv = NULL; |
400 | struct scatterlist *sg = NULL; | 411 | struct scatterlist *sg = NULL; |
401 | int i, nsegs = 0; | 412 | int i, old_idx, length = 0, nsegs = 0; |
402 | 413 | ||
403 | sg_init_table(nbio->sg, psegs); | 414 | sg_init_table(nbio->sg, psegs); |
415 | old_idx = bio->bi_idx; | ||
404 | bio_for_each_segment(bvec, bio, i) { | 416 | bio_for_each_segment(bvec, bio, i) { |
405 | if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { | 417 | if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { |
406 | sg->length += bvec->bv_len; | 418 | sg->length += bvec->bv_len; |
407 | } else { | 419 | } else { |
408 | /* Check bvprv && offset == 0 */ | 420 | if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) |
421 | break; | ||
409 | sg = sg ? sg + 1 : nbio->sg; | 422 | sg = sg ? sg + 1 : nbio->sg; |
410 | sg_set_page(sg, bvec->bv_page, bvec->bv_len, | 423 | sg_set_page(sg, bvec->bv_page, bvec->bv_len, |
411 | bvec->bv_offset); | 424 | bvec->bv_offset); |
412 | nsegs++; | 425 | nsegs++; |
413 | } | 426 | } |
427 | length += bvec->bv_len; | ||
414 | bvprv = bvec; | 428 | bvprv = bvec; |
415 | } | 429 | } |
430 | bio->bi_idx = i; | ||
416 | nbio->nents = nsegs; | 431 | nbio->nents = nsegs; |
417 | sg_mark_end(sg); | 432 | sg_mark_end(sg); |
418 | return dma_map_sg(dev, nbio->sg, nbio->nents, dma_dir); | 433 | if (dma_map_sg(dev, nbio->sg, nbio->nents, dma_dir) == 0) { |
434 | bio->bi_idx = old_idx; | ||
435 | return -ENOMEM; | ||
436 | } | ||
437 | return length; | ||
419 | } | 438 | } |
420 | 439 | ||
421 | static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, | 440 | static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, |
@@ -451,7 +470,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
451 | struct nvme_command *cmnd; | 470 | struct nvme_command *cmnd; |
452 | struct nvme_bio *nbio; | 471 | struct nvme_bio *nbio; |
453 | enum dma_data_direction dma_dir; | 472 | enum dma_data_direction dma_dir; |
454 | int cmdid, result = -ENOMEM; | 473 | int cmdid, length, result = -ENOMEM; |
455 | u16 control; | 474 | u16 control; |
456 | u32 dsmgmt; | 475 | u32 dsmgmt; |
457 | int psegs = bio_phys_segments(ns->queue, bio); | 476 | int psegs = bio_phys_segments(ns->queue, bio); |
@@ -496,16 +515,17 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
496 | dma_dir = DMA_FROM_DEVICE; | 515 | dma_dir = DMA_FROM_DEVICE; |
497 | } | 516 | } |
498 | 517 | ||
499 | result = -ENOMEM; | 518 | result = nvme_map_bio(nvmeq->q_dmadev, nbio, bio, dma_dir, psegs); |
500 | if (nvme_map_bio(nvmeq->q_dmadev, nbio, bio, dma_dir, psegs) == 0) | 519 | if (result < 0) |
501 | goto free_nbio; | 520 | goto free_nbio; |
521 | length = result; | ||
502 | 522 | ||
503 | cmnd->rw.command_id = cmdid; | 523 | cmnd->rw.command_id = cmdid; |
504 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); | 524 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); |
505 | nbio->prps = nvme_setup_prps(nvmeq->dev, &cmnd->common, nbio->sg, | 525 | nbio->prps = nvme_setup_prps(nvmeq->dev, &cmnd->common, nbio->sg, |
506 | bio->bi_size); | 526 | length); |
507 | cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); | 527 | cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); |
508 | cmnd->rw.length = cpu_to_le16((bio->bi_size >> ns->lba_shift) - 1); | 528 | cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); |
509 | cmnd->rw.control = cpu_to_le16(control); | 529 | cmnd->rw.control = cpu_to_le16(control); |
510 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); | 530 | cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); |
511 | 531 | ||