aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMing Lei <ming.lei@canonical.com>2015-08-16 22:31:51 -0400
committerJens Axboe <axboe@fb.com>2015-09-23 13:01:16 -0400
commitbc07c10a3603a5ab3ef01ba42b3d41f9ac63d1b6 (patch)
tree1ebe0510f1b1f707635861e1e773b9176fbe0490
parentab1cb278bc7027663adbfb0b81404f8398437e11 (diff)
block: loop: support DIO & AIO
There are at least 3 advantages to use direct I/O and AIO on read/write loop's backing file: 1) double cache can be avoided, then memory usage gets decreased a lot 2) not like user space direct I/O, there isn't cost of pinning pages 3) avoid context switch for obtaining good throughput - in buffered file read, random I/O top throughput is often obtained only if they are submitted concurrently from lots of tasks; but for sequential I/O, most of times they can be hit from page cache, so concurrent submissions often introduce unnecessary context switch and can't improve throughput much. There was such discussion[1] to use non-blocking I/O to improve the problem for application. - with direct I/O and AIO, concurrent submissions can be avoided and random read throughput can't be affected meantime xfstests(-g auto, ext4) is basically passed when running with direct I/O(aio), one exception is generic/232, but it failed in loop buffered I/O(4.2-rc6-next-20150814) too. Follows the fio test result for performance purpose: 4 jobs fio test inside ext4 file system over loop block 1) How to run - KVM: 4 VCPUs, 2G RAM - linux kernel: 4.2-rc6-next-20150814(base) with the patchset - the loop block is over one image on SSD. - linux psync, 4 jobs, size 1500M, ext4 over loop block - test result: IOPS from fio output 2) Throughput(IOPS) becomes a bit better with direct I/O(aio) ------------------------------------------------------------- test cases |randread |read |randwrite |write | ------------------------------------------------------------- base |8015 |113811 |67442 |106978 ------------------------------------------------------------- base+loop aio |8136 |125040 |67811 |111376 ------------------------------------------------------------- - somehow, it should be caused by more page cache avaiable for application or one extra page copy is avoided in case of direct I/O 3) context switch - context switch decreased by ~50% with loop direct I/O(aio) compared with loop buffered I/O(4.2-rc6-next-20150814) 4) memory usage from /proc/meminfo ------------------------------------------------------------- | Buffers | Cached ------------------------------------------------------------- base | > 760MB | ~950MB ------------------------------------------------------------- base+loop direct I/O(aio) | < 5MB | ~1.6GB ------------------------------------------------------------- - so there are much more page caches available for application with direct I/O [1] https://lwn.net/Articles/612483/ Signed-off-by: Ming Lei <ming.lei@canonical.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/loop.c98
-rw-r--r--drivers/block/loop.h2
2 files changed, 97 insertions, 3 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 75db3b98ec2b..23376084a5cb 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -445,6 +445,90 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
445 return ret; 445 return ret;
446} 446}
447 447
448static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
449{
450 if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE))
451 return;
452
453 if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
454 struct bio *bio = cmd->rq->bio;
455
456 bio_advance(bio, bytes);
457 zero_fill_bio(bio);
458 }
459}
460
461static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
462{
463 struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
464 struct request *rq = cmd->rq;
465
466 handle_partial_read(cmd, ret);
467
468 if (ret > 0)
469 ret = 0;
470 else if (ret < 0)
471 ret = -EIO;
472
473 rq->errors = ret;
474 blk_mq_complete_request(rq);
475}
476
477static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
478 loff_t pos, bool rw)
479{
480 struct iov_iter iter;
481 struct bio_vec *bvec;
482 struct bio *bio = cmd->rq->bio;
483 struct file *file = lo->lo_backing_file;
484 int ret;
485
486 /* nomerge for loop request queue */
487 WARN_ON(cmd->rq->bio != cmd->rq->biotail);
488
489 bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
490 iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
491 bio_segments(bio), blk_rq_bytes(cmd->rq));
492
493 cmd->iocb.ki_pos = pos;
494 cmd->iocb.ki_filp = file;
495 cmd->iocb.ki_complete = lo_rw_aio_complete;
496 cmd->iocb.ki_flags = IOCB_DIRECT;
497
498 if (rw == WRITE)
499 ret = file->f_op->write_iter(&cmd->iocb, &iter);
500 else
501 ret = file->f_op->read_iter(&cmd->iocb, &iter);
502
503 if (ret != -EIOCBQUEUED)
504 cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
505 return 0;
506}
507
508
509static inline int lo_rw_simple(struct loop_device *lo,
510 struct request *rq, loff_t pos, bool rw)
511{
512 struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
513
514 if (cmd->use_aio)
515 return lo_rw_aio(lo, cmd, pos, rw);
516
517 /*
518 * lo_write_simple and lo_read_simple should have been covered
519 * by io submit style function like lo_rw_aio(), one blocker
520 * is that lo_read_simple() need to call flush_dcache_page after
521 * the page is written from kernel, and it isn't easy to handle
522 * this in io submit style function which submits all segments
523 * of the req at one time. And direct read IO doesn't need to
524 * run flush_dcache_page().
525 */
526 if (rw == WRITE)
527 return lo_write_simple(lo, rq, pos);
528 else
529 return lo_read_simple(lo, rq, pos);
530}
531
448static int do_req_filebacked(struct loop_device *lo, struct request *rq) 532static int do_req_filebacked(struct loop_device *lo, struct request *rq)
449{ 533{
450 loff_t pos; 534 loff_t pos;
@@ -460,13 +544,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
460 else if (lo->transfer) 544 else if (lo->transfer)
461 ret = lo_write_transfer(lo, rq, pos); 545 ret = lo_write_transfer(lo, rq, pos);
462 else 546 else
463 ret = lo_write_simple(lo, rq, pos); 547 ret = lo_rw_simple(lo, rq, pos, WRITE);
464 548
465 } else { 549 } else {
466 if (lo->transfer) 550 if (lo->transfer)
467 ret = lo_read_transfer(lo, rq, pos); 551 ret = lo_read_transfer(lo, rq, pos);
468 else 552 else
469 ret = lo_read_simple(lo, rq, pos); 553 ret = lo_rw_simple(lo, rq, pos, READ);
470 } 554 }
471 555
472 return ret; 556 return ret;
@@ -1570,6 +1654,12 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1570 if (lo->lo_state != Lo_bound) 1654 if (lo->lo_state != Lo_bound)
1571 return -EIO; 1655 return -EIO;
1572 1656
1657 if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH |
1658 REQ_DISCARD)))
1659 cmd->use_aio = true;
1660 else
1661 cmd->use_aio = false;
1662
1573 queue_kthread_work(&lo->worker, &cmd->work); 1663 queue_kthread_work(&lo->worker, &cmd->work);
1574 1664
1575 return BLK_MQ_RQ_QUEUE_OK; 1665 return BLK_MQ_RQ_QUEUE_OK;
@@ -1589,7 +1679,9 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
1589 failed: 1679 failed:
1590 if (ret) 1680 if (ret)
1591 cmd->rq->errors = -EIO; 1681 cmd->rq->errors = -EIO;
1592 blk_mq_complete_request(cmd->rq); 1682 /* complete non-aio request */
1683 if (!cmd->use_aio || ret)
1684 blk_mq_complete_request(cmd->rq);
1593} 1685}
1594 1686
1595static void loop_queue_work(struct kthread_work *work) 1687static void loop_queue_work(struct kthread_work *work)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index d1de2217c09a..fb2237c73e61 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -69,6 +69,8 @@ struct loop_cmd {
69 struct kthread_work work; 69 struct kthread_work work;
70 struct request *rq; 70 struct request *rq;
71 struct list_head list; 71 struct list_head list;
72 bool use_aio; /* use AIO interface to handle I/O */
73 struct kiocb iocb;
72}; 74};
73 75
74/* Support for loadable transfer modules */ 76/* Support for loadable transfer modules */