diff options
author | Jeff Moyer <jmoyer@redhat.com> | 2009-10-02 18:57:36 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-10-28 04:29:25 -0400 |
commit | cfb1e33eed48165763edc7a4a067cf5f74898d0b (patch) | |
tree | d0e0bdd0664615b1f7be6cf770476e16dbcad116 /fs | |
parent | 1af60fbd759d31f565552fea315c2033947cfbe6 (diff) |
aio: implement request batching
Hi,
Some workloads issue batches of small I/O, and the performance is poor
due to the call to blk_run_address_space for every single iocb. Nathan
Roberts pointed this out, and suggested that by deferring this call
until all I/Os in the iocb array are submitted to the block layer, we
can realize some impressive performance gains (up to 30% for sequential
4k reads in batches of 16).
Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/aio.c | 61 | ||||
-rw-r--r-- | fs/direct-io.c | 8 |
2 files changed, 63 insertions, 6 deletions
@@ -32,6 +32,9 @@ | |||
32 | #include <linux/workqueue.h> | 32 | #include <linux/workqueue.h> |
33 | #include <linux/security.h> | 33 | #include <linux/security.h> |
34 | #include <linux/eventfd.h> | 34 | #include <linux/eventfd.h> |
35 | #include <linux/blkdev.h> | ||
36 | #include <linux/mempool.h> | ||
37 | #include <linux/hash.h> | ||
35 | 38 | ||
36 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
37 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
@@ -60,6 +63,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine); | |||
60 | static DEFINE_SPINLOCK(fput_lock); | 63 | static DEFINE_SPINLOCK(fput_lock); |
61 | static LIST_HEAD(fput_head); | 64 | static LIST_HEAD(fput_head); |
62 | 65 | ||
66 | #define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */ | ||
67 | #define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS) | ||
68 | struct aio_batch_entry { | ||
69 | struct hlist_node list; | ||
70 | struct address_space *mapping; | ||
71 | }; | ||
72 | mempool_t *abe_pool; | ||
73 | |||
63 | static void aio_kick_handler(struct work_struct *); | 74 | static void aio_kick_handler(struct work_struct *); |
64 | static void aio_queue_work(struct kioctx *); | 75 | static void aio_queue_work(struct kioctx *); |
65 | 76 | ||
@@ -73,6 +84,8 @@ static int __init aio_setup(void) | |||
73 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 84 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
74 | 85 | ||
75 | aio_wq = create_workqueue("aio"); | 86 | aio_wq = create_workqueue("aio"); |
87 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); | ||
88 | BUG_ON(!abe_pool); | ||
76 | 89 | ||
77 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); | 90 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); |
78 | 91 | ||
@@ -1531,8 +1544,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, | |||
1531 | return 1; | 1544 | return 1; |
1532 | } | 1545 | } |
1533 | 1546 | ||
1547 | static void aio_batch_add(struct address_space *mapping, | ||
1548 | struct hlist_head *batch_hash) | ||
1549 | { | ||
1550 | struct aio_batch_entry *abe; | ||
1551 | struct hlist_node *pos; | ||
1552 | unsigned bucket; | ||
1553 | |||
1554 | bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS); | ||
1555 | hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) { | ||
1556 | if (abe->mapping == mapping) | ||
1557 | return; | ||
1558 | } | ||
1559 | |||
1560 | abe = mempool_alloc(abe_pool, GFP_KERNEL); | ||
1561 | BUG_ON(!igrab(mapping->host)); | ||
1562 | abe->mapping = mapping; | ||
1563 | hlist_add_head(&abe->list, &batch_hash[bucket]); | ||
1564 | return; | ||
1565 | } | ||
1566 | |||
1567 | static void aio_batch_free(struct hlist_head *batch_hash) | ||
1568 | { | ||
1569 | struct aio_batch_entry *abe; | ||
1570 | struct hlist_node *pos, *n; | ||
1571 | int i; | ||
1572 | |||
1573 | for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) { | ||
1574 | hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) { | ||
1575 | blk_run_address_space(abe->mapping); | ||
1576 | iput(abe->mapping->host); | ||
1577 | hlist_del(&abe->list); | ||
1578 | mempool_free(abe, abe_pool); | ||
1579 | } | ||
1580 | } | ||
1581 | } | ||
1582 | |||
1534 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1583 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
1535 | struct iocb *iocb) | 1584 | struct iocb *iocb, struct hlist_head *batch_hash) |
1536 | { | 1585 | { |
1537 | struct kiocb *req; | 1586 | struct kiocb *req; |
1538 | struct file *file; | 1587 | struct file *file; |
@@ -1608,6 +1657,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1608 | ; | 1657 | ; |
1609 | } | 1658 | } |
1610 | spin_unlock_irq(&ctx->ctx_lock); | 1659 | spin_unlock_irq(&ctx->ctx_lock); |
1660 | if (req->ki_opcode == IOCB_CMD_PREAD || | ||
1661 | req->ki_opcode == IOCB_CMD_PREADV || | ||
1662 | req->ki_opcode == IOCB_CMD_PWRITE || | ||
1663 | req->ki_opcode == IOCB_CMD_PWRITEV) | ||
1664 | aio_batch_add(file->f_mapping, batch_hash); | ||
1665 | |||
1611 | aio_put_req(req); /* drop extra ref to req */ | 1666 | aio_put_req(req); /* drop extra ref to req */ |
1612 | return 0; | 1667 | return 0; |
1613 | 1668 | ||
@@ -1635,6 +1690,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1635 | struct kioctx *ctx; | 1690 | struct kioctx *ctx; |
1636 | long ret = 0; | 1691 | long ret = 0; |
1637 | int i; | 1692 | int i; |
1693 | struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; | ||
1638 | 1694 | ||
1639 | if (unlikely(nr < 0)) | 1695 | if (unlikely(nr < 0)) |
1640 | return -EINVAL; | 1696 | return -EINVAL; |
@@ -1666,10 +1722,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1666 | break; | 1722 | break; |
1667 | } | 1723 | } |
1668 | 1724 | ||
1669 | ret = io_submit_one(ctx, user_iocb, &tmp); | 1725 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); |
1670 | if (ret) | 1726 | if (ret) |
1671 | break; | 1727 | break; |
1672 | } | 1728 | } |
1729 | aio_batch_free(batch_hash); | ||
1673 | 1730 | ||
1674 | put_ioctx(ctx); | 1731 | put_ioctx(ctx); |
1675 | return i ? i : ret; | 1732 | return i ? i : ret; |
diff --git a/fs/direct-io.c b/fs/direct-io.c index c86d35f142de..3af761c8c5cc 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -1028,9 +1028,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1028 | if (dio->bio) | 1028 | if (dio->bio) |
1029 | dio_bio_submit(dio); | 1029 | dio_bio_submit(dio); |
1030 | 1030 | ||
1031 | /* All IO is now issued, send it on its way */ | ||
1032 | blk_run_address_space(inode->i_mapping); | ||
1033 | |||
1034 | /* | 1031 | /* |
1035 | * It is possible that, we return short IO due to end of file. | 1032 | * It is possible that, we return short IO due to end of file. |
1036 | * In that case, we need to release all the pages we got hold on. | 1033 | * In that case, we need to release all the pages we got hold on. |
@@ -1057,8 +1054,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1057 | ((rw & READ) || (dio->result == dio->size))) | 1054 | ((rw & READ) || (dio->result == dio->size))) |
1058 | ret = -EIOCBQUEUED; | 1055 | ret = -EIOCBQUEUED; |
1059 | 1056 | ||
1060 | if (ret != -EIOCBQUEUED) | 1057 | if (ret != -EIOCBQUEUED) { |
1058 | /* All IO is now issued, send it on its way */ | ||
1059 | blk_run_address_space(inode->i_mapping); | ||
1061 | dio_await_completion(dio); | 1060 | dio_await_completion(dio); |
1061 | } | ||
1062 | 1062 | ||
1063 | /* | 1063 | /* |
1064 | * Sync will always be dropping the final ref and completing the | 1064 | * Sync will always be dropping the final ref and completing the |