diff options
author | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2011-12-20 13:34:52 -0500 |
---|---|---|
committer | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2012-01-10 14:51:20 -0500 |
commit | eca18b2394a9387feeaf14cd884ddddd7a809d19 (patch) | |
tree | b20244dcfbe2fa1e86ca782c4f879a3174da25d1 /drivers/block/nvme.c | |
parent | 5c1281a3bf5655ec1b90db495da3a2b77826ba88 (diff) |
NVMe: Merge the nvme_bio and nvme_prp data structures
The new merged data structure is called nvme_iod. This improves performance
for mid-sized I/Os (in the 16k range) since we save a memory allocation.
It is also a slightly simpler interface to use.
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block/nvme.c')
-rw-r--r-- | drivers/block/nvme.c | 239 |
1 files changed, 124 insertions, 115 deletions
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index b0e8a6dd33b1..4517608c068f 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c | |||
@@ -290,52 +290,70 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | |||
290 | return 0; | 290 | return 0; |
291 | } | 291 | } |
292 | 292 | ||
293 | struct nvme_prps { | 293 | /* |
294 | int npages; /* 0 means small pool in use */ | 294 | * The nvme_iod describes the data in an I/O, including the list of PRP |
295 | * entries. You can't see it in this data structure because C doesn't let | ||
296 | * me express that. Use nvme_alloc_iod to ensure there's enough space | ||
297 | * allocated to store the PRP list. | ||
298 | */ | ||
299 | struct nvme_iod { | ||
300 | void *private; /* For the use of the submitter of the I/O */ | ||
301 | int npages; /* In the PRP list. 0 means small pool in use */ | ||
302 | int offset; /* Of PRP list */ | ||
303 | int nents; /* Used in scatterlist */ | ||
304 | int length; /* Of data, in bytes */ | ||
295 | dma_addr_t first_dma; | 305 | dma_addr_t first_dma; |
296 | __le64 *list[0]; | 306 | struct scatterlist sg[0]; |
297 | }; | 307 | }; |
298 | 308 | ||
299 | static void nvme_free_prps(struct nvme_dev *dev, struct nvme_prps *prps) | 309 | static __le64 **iod_list(struct nvme_iod *iod) |
300 | { | 310 | { |
301 | const int last_prp = PAGE_SIZE / 8 - 1; | 311 | return ((void *)iod) + iod->offset; |
302 | int i; | 312 | } |
303 | dma_addr_t prp_dma; | ||
304 | 313 | ||
305 | if (!prps) | 314 | /* |
306 | return; | 315 | * Will slightly overestimate the number of pages needed. This is OK |
316 | * as it only leads to a small amount of wasted memory for the lifetime of | ||
317 | * the I/O. | ||
318 | */ | ||
319 | static int nvme_npages(unsigned size) | ||
320 | { | ||
321 | unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE); | ||
322 | return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); | ||
323 | } | ||
307 | 324 | ||
308 | prp_dma = prps->first_dma; | 325 | static struct nvme_iod * |
326 | nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) | ||
327 | { | ||
328 | struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + | ||
329 | sizeof(__le64 *) * nvme_npages(nbytes) + | ||
330 | sizeof(struct scatterlist) * nseg, gfp); | ||
309 | 331 | ||
310 | if (prps->npages == 0) | 332 | if (iod) { |
311 | dma_pool_free(dev->prp_small_pool, prps->list[0], prp_dma); | 333 | iod->offset = offsetof(struct nvme_iod, sg[nseg]); |
312 | for (i = 0; i < prps->npages; i++) { | 334 | iod->npages = -1; |
313 | __le64 *prp_list = prps->list[i]; | 335 | iod->length = nbytes; |
314 | dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); | ||
315 | dma_pool_free(dev->prp_page_pool, prp_list, prp_dma); | ||
316 | prp_dma = next_prp_dma; | ||
317 | } | 336 | } |
318 | kfree(prps); | ||
319 | } | ||
320 | 337 | ||
321 | struct nvme_bio { | 338 | return iod; |
322 | struct bio *bio; | ||
323 | int nents; | ||
324 | struct nvme_prps *prps; | ||
325 | struct scatterlist sg[0]; | ||
326 | }; | ||
327 | |||
328 | /* XXX: use a mempool */ | ||
329 | static struct nvme_bio *alloc_nbio(unsigned nseg, gfp_t gfp) | ||
330 | { | ||
331 | return kzalloc(sizeof(struct nvme_bio) + | ||
332 | sizeof(struct scatterlist) * nseg, gfp); | ||
333 | } | 339 | } |
334 | 340 | ||
335 | static void free_nbio(struct nvme_dev *dev, struct nvme_bio *nbio) | 341 | static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) |
336 | { | 342 | { |
337 | nvme_free_prps(dev, nbio->prps); | 343 | const int last_prp = PAGE_SIZE / 8 - 1; |
338 | kfree(nbio); | 344 | int i; |
345 | __le64 **list = iod_list(iod); | ||
346 | dma_addr_t prp_dma = iod->first_dma; | ||
347 | |||
348 | if (iod->npages == 0) | ||
349 | dma_pool_free(dev->prp_small_pool, list[0], prp_dma); | ||
350 | for (i = 0; i < iod->npages; i++) { | ||
351 | __le64 *prp_list = list[i]; | ||
352 | dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); | ||
353 | dma_pool_free(dev->prp_page_pool, prp_list, prp_dma); | ||
354 | prp_dma = next_prp_dma; | ||
355 | } | ||
356 | kfree(iod); | ||
339 | } | 357 | } |
340 | 358 | ||
341 | static void requeue_bio(struct nvme_dev *dev, struct bio *bio) | 359 | static void requeue_bio(struct nvme_dev *dev, struct bio *bio) |
@@ -351,13 +369,13 @@ static void requeue_bio(struct nvme_dev *dev, struct bio *bio) | |||
351 | static void bio_completion(struct nvme_dev *dev, void *ctx, | 369 | static void bio_completion(struct nvme_dev *dev, void *ctx, |
352 | struct nvme_completion *cqe) | 370 | struct nvme_completion *cqe) |
353 | { | 371 | { |
354 | struct nvme_bio *nbio = ctx; | 372 | struct nvme_iod *iod = ctx; |
355 | struct bio *bio = nbio->bio; | 373 | struct bio *bio = iod->private; |
356 | u16 status = le16_to_cpup(&cqe->status) >> 1; | 374 | u16 status = le16_to_cpup(&cqe->status) >> 1; |
357 | 375 | ||
358 | dma_unmap_sg(&dev->pci_dev->dev, nbio->sg, nbio->nents, | 376 | dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, |
359 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 377 | bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); |
360 | free_nbio(dev, nbio); | 378 | nvme_free_iod(dev, iod); |
361 | if (status) { | 379 | if (status) { |
362 | bio_endio(bio, -EIO); | 380 | bio_endio(bio, -EIO); |
363 | } else if (bio->bi_vcnt > bio->bi_idx) { | 381 | } else if (bio->bi_vcnt > bio->bi_idx) { |
@@ -368,25 +386,25 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, | |||
368 | } | 386 | } |
369 | 387 | ||
370 | /* length is in bytes. gfp flags indicates whether we may sleep. */ | 388 | /* length is in bytes. gfp flags indicates whether we may sleep. */ |
371 | static struct nvme_prps *nvme_setup_prps(struct nvme_dev *dev, | 389 | static int nvme_setup_prps(struct nvme_dev *dev, |
372 | struct nvme_common_command *cmd, | 390 | struct nvme_common_command *cmd, struct nvme_iod *iod, |
373 | struct scatterlist *sg, int *len, | 391 | int total_len, gfp_t gfp) |
374 | gfp_t gfp) | ||
375 | { | 392 | { |
376 | struct dma_pool *pool; | 393 | struct dma_pool *pool; |
377 | int length = *len; | 394 | int length = total_len; |
395 | struct scatterlist *sg = iod->sg; | ||
378 | int dma_len = sg_dma_len(sg); | 396 | int dma_len = sg_dma_len(sg); |
379 | u64 dma_addr = sg_dma_address(sg); | 397 | u64 dma_addr = sg_dma_address(sg); |
380 | int offset = offset_in_page(dma_addr); | 398 | int offset = offset_in_page(dma_addr); |
381 | __le64 *prp_list; | 399 | __le64 *prp_list; |
400 | __le64 **list = iod_list(iod); | ||
382 | dma_addr_t prp_dma; | 401 | dma_addr_t prp_dma; |
383 | int nprps, npages, i; | 402 | int nprps, i; |
384 | struct nvme_prps *prps = NULL; | ||
385 | 403 | ||
386 | cmd->prp1 = cpu_to_le64(dma_addr); | 404 | cmd->prp1 = cpu_to_le64(dma_addr); |
387 | length -= (PAGE_SIZE - offset); | 405 | length -= (PAGE_SIZE - offset); |
388 | if (length <= 0) | 406 | if (length <= 0) |
389 | return prps; | 407 | return total_len; |
390 | 408 | ||
391 | dma_len -= (PAGE_SIZE - offset); | 409 | dma_len -= (PAGE_SIZE - offset); |
392 | if (dma_len) { | 410 | if (dma_len) { |
@@ -399,46 +417,35 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_dev *dev, | |||
399 | 417 | ||
400 | if (length <= PAGE_SIZE) { | 418 | if (length <= PAGE_SIZE) { |
401 | cmd->prp2 = cpu_to_le64(dma_addr); | 419 | cmd->prp2 = cpu_to_le64(dma_addr); |
402 | return prps; | 420 | return total_len; |
403 | } | 421 | } |
404 | 422 | ||
405 | nprps = DIV_ROUND_UP(length, PAGE_SIZE); | 423 | nprps = DIV_ROUND_UP(length, PAGE_SIZE); |
406 | npages = DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); | ||
407 | prps = kmalloc(sizeof(*prps) + sizeof(__le64 *) * npages, gfp); | ||
408 | if (!prps) { | ||
409 | cmd->prp2 = cpu_to_le64(dma_addr); | ||
410 | *len = (*len - length) + PAGE_SIZE; | ||
411 | return prps; | ||
412 | } | ||
413 | |||
414 | if (nprps <= (256 / 8)) { | 424 | if (nprps <= (256 / 8)) { |
415 | pool = dev->prp_small_pool; | 425 | pool = dev->prp_small_pool; |
416 | prps->npages = 0; | 426 | iod->npages = 0; |
417 | } else { | 427 | } else { |
418 | pool = dev->prp_page_pool; | 428 | pool = dev->prp_page_pool; |
419 | prps->npages = 1; | 429 | iod->npages = 1; |
420 | } | 430 | } |
421 | 431 | ||
422 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | 432 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); |
423 | if (!prp_list) { | 433 | if (!prp_list) { |
424 | cmd->prp2 = cpu_to_le64(dma_addr); | 434 | cmd->prp2 = cpu_to_le64(dma_addr); |
425 | *len = (*len - length) + PAGE_SIZE; | 435 | iod->npages = -1; |
426 | kfree(prps); | 436 | return (total_len - length) + PAGE_SIZE; |
427 | return NULL; | ||
428 | } | 437 | } |
429 | prps->list[0] = prp_list; | 438 | list[0] = prp_list; |
430 | prps->first_dma = prp_dma; | 439 | iod->first_dma = prp_dma; |
431 | cmd->prp2 = cpu_to_le64(prp_dma); | 440 | cmd->prp2 = cpu_to_le64(prp_dma); |
432 | i = 0; | 441 | i = 0; |
433 | for (;;) { | 442 | for (;;) { |
434 | if (i == PAGE_SIZE / 8) { | 443 | if (i == PAGE_SIZE / 8) { |
435 | __le64 *old_prp_list = prp_list; | 444 | __le64 *old_prp_list = prp_list; |
436 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); | 445 | prp_list = dma_pool_alloc(pool, gfp, &prp_dma); |
437 | if (!prp_list) { | 446 | if (!prp_list) |
438 | *len = (*len - length); | 447 | return total_len - length; |
439 | return prps; | 448 | list[iod->npages++] = prp_list; |
440 | } | ||
441 | prps->list[prps->npages++] = prp_list; | ||
442 | prp_list[0] = old_prp_list[i - 1]; | 449 | prp_list[0] = old_prp_list[i - 1]; |
443 | old_prp_list[i - 1] = cpu_to_le64(prp_dma); | 450 | old_prp_list[i - 1] = cpu_to_le64(prp_dma); |
444 | i = 1; | 451 | i = 1; |
@@ -457,21 +464,21 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_dev *dev, | |||
457 | dma_len = sg_dma_len(sg); | 464 | dma_len = sg_dma_len(sg); |
458 | } | 465 | } |
459 | 466 | ||
460 | return prps; | 467 | return total_len; |
461 | } | 468 | } |
462 | 469 | ||
463 | /* NVMe scatterlists require no holes in the virtual address */ | 470 | /* NVMe scatterlists require no holes in the virtual address */ |
464 | #define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \ | 471 | #define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2) ((vec2)->bv_offset || \ |
465 | (((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE)) | 472 | (((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE)) |
466 | 473 | ||
467 | static int nvme_map_bio(struct device *dev, struct nvme_bio *nbio, | 474 | static int nvme_map_bio(struct device *dev, struct nvme_iod *iod, |
468 | struct bio *bio, enum dma_data_direction dma_dir, int psegs) | 475 | struct bio *bio, enum dma_data_direction dma_dir, int psegs) |
469 | { | 476 | { |
470 | struct bio_vec *bvec, *bvprv = NULL; | 477 | struct bio_vec *bvec, *bvprv = NULL; |
471 | struct scatterlist *sg = NULL; | 478 | struct scatterlist *sg = NULL; |
472 | int i, old_idx, length = 0, nsegs = 0; | 479 | int i, old_idx, length = 0, nsegs = 0; |
473 | 480 | ||
474 | sg_init_table(nbio->sg, psegs); | 481 | sg_init_table(iod->sg, psegs); |
475 | old_idx = bio->bi_idx; | 482 | old_idx = bio->bi_idx; |
476 | bio_for_each_segment(bvec, bio, i) { | 483 | bio_for_each_segment(bvec, bio, i) { |
477 | if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { | 484 | if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) { |
@@ -479,7 +486,7 @@ static int nvme_map_bio(struct device *dev, struct nvme_bio *nbio, | |||
479 | } else { | 486 | } else { |
480 | if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) | 487 | if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec)) |
481 | break; | 488 | break; |
482 | sg = sg ? sg + 1 : nbio->sg; | 489 | sg = sg ? sg + 1 : iod->sg; |
483 | sg_set_page(sg, bvec->bv_page, bvec->bv_len, | 490 | sg_set_page(sg, bvec->bv_page, bvec->bv_len, |
484 | bvec->bv_offset); | 491 | bvec->bv_offset); |
485 | nsegs++; | 492 | nsegs++; |
@@ -488,9 +495,9 @@ static int nvme_map_bio(struct device *dev, struct nvme_bio *nbio, | |||
488 | bvprv = bvec; | 495 | bvprv = bvec; |
489 | } | 496 | } |
490 | bio->bi_idx = i; | 497 | bio->bi_idx = i; |
491 | nbio->nents = nsegs; | 498 | iod->nents = nsegs; |
492 | sg_mark_end(sg); | 499 | sg_mark_end(sg); |
493 | if (dma_map_sg(dev, nbio->sg, nbio->nents, dma_dir) == 0) { | 500 | if (dma_map_sg(dev, iod->sg, iod->nents, dma_dir) == 0) { |
494 | bio->bi_idx = old_idx; | 501 | bio->bi_idx = old_idx; |
495 | return -ENOMEM; | 502 | return -ENOMEM; |
496 | } | 503 | } |
@@ -531,7 +538,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
531 | struct bio *bio) | 538 | struct bio *bio) |
532 | { | 539 | { |
533 | struct nvme_command *cmnd; | 540 | struct nvme_command *cmnd; |
534 | struct nvme_bio *nbio; | 541 | struct nvme_iod *iod; |
535 | enum dma_data_direction dma_dir; | 542 | enum dma_data_direction dma_dir; |
536 | int cmdid, length, result = -ENOMEM; | 543 | int cmdid, length, result = -ENOMEM; |
537 | u16 control; | 544 | u16 control; |
@@ -544,15 +551,15 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
544 | return result; | 551 | return result; |
545 | } | 552 | } |
546 | 553 | ||
547 | nbio = alloc_nbio(psegs, GFP_ATOMIC); | 554 | iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); |
548 | if (!nbio) | 555 | if (!iod) |
549 | goto nomem; | 556 | goto nomem; |
550 | nbio->bio = bio; | 557 | iod->private = bio; |
551 | 558 | ||
552 | result = -EBUSY; | 559 | result = -EBUSY; |
553 | cmdid = alloc_cmdid(nvmeq, nbio, bio_completion, IO_TIMEOUT); | 560 | cmdid = alloc_cmdid(nvmeq, iod, bio_completion, IO_TIMEOUT); |
554 | if (unlikely(cmdid < 0)) | 561 | if (unlikely(cmdid < 0)) |
555 | goto free_nbio; | 562 | goto free_iod; |
556 | 563 | ||
557 | if ((bio->bi_rw & REQ_FLUSH) && !psegs) | 564 | if ((bio->bi_rw & REQ_FLUSH) && !psegs) |
558 | return nvme_submit_flush(nvmeq, ns, cmdid); | 565 | return nvme_submit_flush(nvmeq, ns, cmdid); |
@@ -578,15 +585,15 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
578 | dma_dir = DMA_FROM_DEVICE; | 585 | dma_dir = DMA_FROM_DEVICE; |
579 | } | 586 | } |
580 | 587 | ||
581 | result = nvme_map_bio(nvmeq->q_dmadev, nbio, bio, dma_dir, psegs); | 588 | result = nvme_map_bio(nvmeq->q_dmadev, iod, bio, dma_dir, psegs); |
582 | if (result < 0) | 589 | if (result < 0) |
583 | goto free_nbio; | 590 | goto free_iod; |
584 | length = result; | 591 | length = result; |
585 | 592 | ||
586 | cmnd->rw.command_id = cmdid; | 593 | cmnd->rw.command_id = cmdid; |
587 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); | 594 | cmnd->rw.nsid = cpu_to_le32(ns->ns_id); |
588 | nbio->prps = nvme_setup_prps(nvmeq->dev, &cmnd->common, nbio->sg, | 595 | length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length, |
589 | &length, GFP_ATOMIC); | 596 | GFP_ATOMIC); |
590 | cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); | 597 | cmnd->rw.slba = cpu_to_le64(bio->bi_sector >> (ns->lba_shift - 9)); |
591 | cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); | 598 | cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); |
592 | cmnd->rw.control = cpu_to_le16(control); | 599 | cmnd->rw.control = cpu_to_le16(control); |
@@ -600,8 +607,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
600 | 607 | ||
601 | return 0; | 608 | return 0; |
602 | 609 | ||
603 | free_nbio: | 610 | free_iod: |
604 | free_nbio(nvmeq->dev, nbio); | 611 | nvme_free_iod(nvmeq->dev, iod); |
605 | nomem: | 612 | nomem: |
606 | return result; | 613 | return result; |
607 | } | 614 | } |
@@ -1005,18 +1012,18 @@ static int __devinit nvme_configure_admin_queue(struct nvme_dev *dev) | |||
1005 | return result; | 1012 | return result; |
1006 | } | 1013 | } |
1007 | 1014 | ||
1008 | static int nvme_map_user_pages(struct nvme_dev *dev, int write, | 1015 | static struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, |
1009 | unsigned long addr, unsigned length, | 1016 | unsigned long addr, unsigned length) |
1010 | struct scatterlist **sgp) | ||
1011 | { | 1017 | { |
1012 | int i, err, count, nents, offset; | 1018 | int i, err, count, nents, offset; |
1013 | struct scatterlist *sg; | 1019 | struct scatterlist *sg; |
1014 | struct page **pages; | 1020 | struct page **pages; |
1021 | struct nvme_iod *iod; | ||
1015 | 1022 | ||
1016 | if (addr & 3) | 1023 | if (addr & 3) |
1017 | return -EINVAL; | 1024 | return ERR_PTR(-EINVAL); |
1018 | if (!length) | 1025 | if (!length) |
1019 | return -EINVAL; | 1026 | return ERR_PTR(-EINVAL); |
1020 | 1027 | ||
1021 | offset = offset_in_page(addr); | 1028 | offset = offset_in_page(addr); |
1022 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); | 1029 | count = DIV_ROUND_UP(offset + length, PAGE_SIZE); |
@@ -1029,7 +1036,8 @@ static int nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1029 | goto put_pages; | 1036 | goto put_pages; |
1030 | } | 1037 | } |
1031 | 1038 | ||
1032 | sg = kcalloc(count, sizeof(*sg), GFP_KERNEL); | 1039 | iod = nvme_alloc_iod(count, length, GFP_KERNEL); |
1040 | sg = iod->sg; | ||
1033 | sg_init_table(sg, count); | 1041 | sg_init_table(sg, count); |
1034 | for (i = 0; i < count; i++) { | 1042 | for (i = 0; i < count; i++) { |
1035 | sg_set_page(&sg[i], pages[i], | 1043 | sg_set_page(&sg[i], pages[i], |
@@ -1042,22 +1050,24 @@ static int nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1042 | nents = dma_map_sg(&dev->pci_dev->dev, sg, count, | 1050 | nents = dma_map_sg(&dev->pci_dev->dev, sg, count, |
1043 | write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 1051 | write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); |
1044 | if (!nents) | 1052 | if (!nents) |
1045 | goto put_pages; | 1053 | goto free_iod; |
1046 | 1054 | ||
1047 | kfree(pages); | 1055 | kfree(pages); |
1048 | *sgp = sg; | 1056 | return iod; |
1049 | return nents; | ||
1050 | 1057 | ||
1058 | free_iod: | ||
1059 | kfree(iod); | ||
1051 | put_pages: | 1060 | put_pages: |
1052 | for (i = 0; i < count; i++) | 1061 | for (i = 0; i < count; i++) |
1053 | put_page(pages[i]); | 1062 | put_page(pages[i]); |
1054 | kfree(pages); | 1063 | kfree(pages); |
1055 | return err; | 1064 | return ERR_PTR(err); |
1056 | } | 1065 | } |
1057 | 1066 | ||
1058 | static void nvme_unmap_user_pages(struct nvme_dev *dev, int write, | 1067 | static void nvme_unmap_user_pages(struct nvme_dev *dev, int write, |
1059 | unsigned long addr, int length, struct scatterlist *sg) | 1068 | unsigned long addr, int length, struct nvme_iod *iod) |
1060 | { | 1069 | { |
1070 | struct scatterlist *sg = iod->sg; | ||
1061 | int i, count; | 1071 | int i, count; |
1062 | 1072 | ||
1063 | count = DIV_ROUND_UP(offset_in_page(addr) + length, PAGE_SIZE); | 1073 | count = DIV_ROUND_UP(offset_in_page(addr) + length, PAGE_SIZE); |
@@ -1074,9 +1084,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1074 | struct nvme_user_io io; | 1084 | struct nvme_user_io io; |
1075 | struct nvme_command c; | 1085 | struct nvme_command c; |
1076 | unsigned length; | 1086 | unsigned length; |
1077 | int nents, status; | 1087 | int status; |
1078 | struct scatterlist *sg; | 1088 | struct nvme_iod *iod; |
1079 | struct nvme_prps *prps; | ||
1080 | 1089 | ||
1081 | if (copy_from_user(&io, uio, sizeof(io))) | 1090 | if (copy_from_user(&io, uio, sizeof(io))) |
1082 | return -EFAULT; | 1091 | return -EFAULT; |
@@ -1086,15 +1095,14 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1086 | case nvme_cmd_write: | 1095 | case nvme_cmd_write: |
1087 | case nvme_cmd_read: | 1096 | case nvme_cmd_read: |
1088 | case nvme_cmd_compare: | 1097 | case nvme_cmd_compare: |
1089 | nents = nvme_map_user_pages(dev, io.opcode & 1, io.addr, | 1098 | iod = nvme_map_user_pages(dev, io.opcode & 1, io.addr, length); |
1090 | length, &sg); | ||
1091 | break; | 1099 | break; |
1092 | default: | 1100 | default: |
1093 | return -EINVAL; | 1101 | return -EINVAL; |
1094 | } | 1102 | } |
1095 | 1103 | ||
1096 | if (nents < 0) | 1104 | if (IS_ERR(iod)) |
1097 | return nents; | 1105 | return PTR_ERR(iod); |
1098 | 1106 | ||
1099 | memset(&c, 0, sizeof(c)); | 1107 | memset(&c, 0, sizeof(c)); |
1100 | c.rw.opcode = io.opcode; | 1108 | c.rw.opcode = io.opcode; |
@@ -1108,7 +1116,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1108 | c.rw.apptag = io.apptag; | 1116 | c.rw.apptag = io.apptag; |
1109 | c.rw.appmask = io.appmask; | 1117 | c.rw.appmask = io.appmask; |
1110 | /* XXX: metadata */ | 1118 | /* XXX: metadata */ |
1111 | prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL); | 1119 | length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL); |
1112 | 1120 | ||
1113 | nvmeq = get_nvmeq(dev); | 1121 | nvmeq = get_nvmeq(dev); |
1114 | /* | 1122 | /* |
@@ -1123,8 +1131,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) | |||
1123 | else | 1131 | else |
1124 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, IO_TIMEOUT); | 1132 | status = nvme_submit_sync_cmd(nvmeq, &c, NULL, IO_TIMEOUT); |
1125 | 1133 | ||
1126 | nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, sg); | 1134 | nvme_unmap_user_pages(dev, io.opcode & 1, io.addr, length, iod); |
1127 | nvme_free_prps(dev, prps); | 1135 | nvme_free_iod(dev, iod); |
1128 | return status; | 1136 | return status; |
1129 | } | 1137 | } |
1130 | 1138 | ||
@@ -1134,9 +1142,8 @@ static int nvme_user_admin_cmd(struct nvme_ns *ns, | |||
1134 | struct nvme_dev *dev = ns->dev; | 1142 | struct nvme_dev *dev = ns->dev; |
1135 | struct nvme_admin_cmd cmd; | 1143 | struct nvme_admin_cmd cmd; |
1136 | struct nvme_command c; | 1144 | struct nvme_command c; |
1137 | int status, length, nents = 0; | 1145 | int status, length; |
1138 | struct scatterlist *sg; | 1146 | struct nvme_iod *iod; |
1139 | struct nvme_prps *prps = NULL; | ||
1140 | 1147 | ||
1141 | if (!capable(CAP_SYS_ADMIN)) | 1148 | if (!capable(CAP_SYS_ADMIN)) |
1142 | return -EACCES; | 1149 | return -EACCES; |
@@ -1158,19 +1165,21 @@ static int nvme_user_admin_cmd(struct nvme_ns *ns, | |||
1158 | 1165 | ||
1159 | length = cmd.data_len; | 1166 | length = cmd.data_len; |
1160 | if (cmd.data_len) { | 1167 | if (cmd.data_len) { |
1161 | nents = nvme_map_user_pages(dev, 1, cmd.addr, length, &sg); | 1168 | iod = nvme_map_user_pages(dev, 1, cmd.addr, length); |
1162 | if (nents < 0) | 1169 | if (IS_ERR(iod)) |
1163 | return nents; | 1170 | return PTR_ERR(iod); |
1164 | prps = nvme_setup_prps(dev, &c.common, sg, &length, GFP_KERNEL); | 1171 | length = nvme_setup_prps(dev, &c.common, iod, length, |
1172 | GFP_KERNEL); | ||
1165 | } | 1173 | } |
1166 | 1174 | ||
1167 | if (length != cmd.data_len) | 1175 | if (length != cmd.data_len) |
1168 | status = -ENOMEM; | 1176 | status = -ENOMEM; |
1169 | else | 1177 | else |
1170 | status = nvme_submit_admin_cmd(dev, &c, NULL); | 1178 | status = nvme_submit_admin_cmd(dev, &c, NULL); |
1179 | |||
1171 | if (cmd.data_len) { | 1180 | if (cmd.data_len) { |
1172 | nvme_unmap_user_pages(dev, 0, cmd.addr, cmd.data_len, sg); | 1181 | nvme_unmap_user_pages(dev, 0, cmd.addr, cmd.data_len, iod); |
1173 | nvme_free_prps(dev, prps); | 1182 | nvme_free_iod(dev, iod); |
1174 | } | 1183 | } |
1175 | return status; | 1184 | return status; |
1176 | } | 1185 | } |