aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2009-10-02 18:57:36 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-10-28 04:29:25 -0400
commitcfb1e33eed48165763edc7a4a067cf5f74898d0b (patch)
treed0e0bdd0664615b1f7be6cf770476e16dbcad116
parent1af60fbd759d31f565552fea315c2033947cfbe6 (diff)
aio: implement request batching
Hi, Some workloads issue batches of small I/O, and the performance is poor due to the call to blk_run_address_space for every single iocb. Nathan Roberts pointed this out, and suggested that by deferring this call until all I/Os in the iocb array are submitted to the block layer, we can realize some impressive performance gains (up to 30% for sequential 4k reads in batches of 16). Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--fs/aio.c61
-rw-r--r--fs/direct-io.c8
2 files changed, 63 insertions, 6 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c9340573..cf0bef428f88 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -32,6 +32,9 @@
32#include <linux/workqueue.h> 32#include <linux/workqueue.h>
33#include <linux/security.h> 33#include <linux/security.h>
34#include <linux/eventfd.h> 34#include <linux/eventfd.h>
35#include <linux/blkdev.h>
36#include <linux/mempool.h>
37#include <linux/hash.h>
35 38
36#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
37#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -60,6 +63,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
60static DEFINE_SPINLOCK(fput_lock); 63static DEFINE_SPINLOCK(fput_lock);
61static LIST_HEAD(fput_head); 64static LIST_HEAD(fput_head);
62 65
66#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
67#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
68struct aio_batch_entry {
69 struct hlist_node list;
70 struct address_space *mapping;
71};
72mempool_t *abe_pool;
73
63static void aio_kick_handler(struct work_struct *); 74static void aio_kick_handler(struct work_struct *);
64static void aio_queue_work(struct kioctx *); 75static void aio_queue_work(struct kioctx *);
65 76
@@ -73,6 +84,8 @@ static int __init aio_setup(void)
73 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 84 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
74 85
75 aio_wq = create_workqueue("aio"); 86 aio_wq = create_workqueue("aio");
87 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
88 BUG_ON(!abe_pool);
76 89
77 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 90 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
78 91
@@ -1531,8 +1544,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1531 return 1; 1544 return 1;
1532} 1545}
1533 1546
1547static void aio_batch_add(struct address_space *mapping,
1548 struct hlist_head *batch_hash)
1549{
1550 struct aio_batch_entry *abe;
1551 struct hlist_node *pos;
1552 unsigned bucket;
1553
1554 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1555 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1556 if (abe->mapping == mapping)
1557 return;
1558 }
1559
1560 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1561 BUG_ON(!igrab(mapping->host));
1562 abe->mapping = mapping;
1563 hlist_add_head(&abe->list, &batch_hash[bucket]);
1564 return;
1565}
1566
1567static void aio_batch_free(struct hlist_head *batch_hash)
1568{
1569 struct aio_batch_entry *abe;
1570 struct hlist_node *pos, *n;
1571 int i;
1572
1573 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1574 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1575 blk_run_address_space(abe->mapping);
1576 iput(abe->mapping->host);
1577 hlist_del(&abe->list);
1578 mempool_free(abe, abe_pool);
1579 }
1580 }
1581}
1582
1534static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1583static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1535 struct iocb *iocb) 1584 struct iocb *iocb, struct hlist_head *batch_hash)
1536{ 1585{
1537 struct kiocb *req; 1586 struct kiocb *req;
1538 struct file *file; 1587 struct file *file;
@@ -1608,6 +1657,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1608 ; 1657 ;
1609 } 1658 }
1610 spin_unlock_irq(&ctx->ctx_lock); 1659 spin_unlock_irq(&ctx->ctx_lock);
1660 if (req->ki_opcode == IOCB_CMD_PREAD ||
1661 req->ki_opcode == IOCB_CMD_PREADV ||
1662 req->ki_opcode == IOCB_CMD_PWRITE ||
1663 req->ki_opcode == IOCB_CMD_PWRITEV)
1664 aio_batch_add(file->f_mapping, batch_hash);
1665
1611 aio_put_req(req); /* drop extra ref to req */ 1666 aio_put_req(req); /* drop extra ref to req */
1612 return 0; 1667 return 0;
1613 1668
@@ -1635,6 +1690,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1635 struct kioctx *ctx; 1690 struct kioctx *ctx;
1636 long ret = 0; 1691 long ret = 0;
1637 int i; 1692 int i;
1693 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
1638 1694
1639 if (unlikely(nr < 0)) 1695 if (unlikely(nr < 0))
1640 return -EINVAL; 1696 return -EINVAL;
@@ -1666,10 +1722,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1666 break; 1722 break;
1667 } 1723 }
1668 1724
1669 ret = io_submit_one(ctx, user_iocb, &tmp); 1725 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
1670 if (ret) 1726 if (ret)
1671 break; 1727 break;
1672 } 1728 }
1729 aio_batch_free(batch_hash);
1673 1730
1674 put_ioctx(ctx); 1731 put_ioctx(ctx);
1675 return i ? i : ret; 1732 return i ? i : ret;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index c86d35f142de..3af761c8c5cc 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1028,9 +1028,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1028 if (dio->bio) 1028 if (dio->bio)
1029 dio_bio_submit(dio); 1029 dio_bio_submit(dio);
1030 1030
1031 /* All IO is now issued, send it on its way */
1032 blk_run_address_space(inode->i_mapping);
1033
1034 /* 1031 /*
1035 * It is possible that, we return short IO due to end of file. 1032 * It is possible that, we return short IO due to end of file.
1036 * In that case, we need to release all the pages we got hold on. 1033 * In that case, we need to release all the pages we got hold on.
@@ -1057,8 +1054,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1057 ((rw & READ) || (dio->result == dio->size))) 1054 ((rw & READ) || (dio->result == dio->size)))
1058 ret = -EIOCBQUEUED; 1055 ret = -EIOCBQUEUED;
1059 1056
1060 if (ret != -EIOCBQUEUED) 1057 if (ret != -EIOCBQUEUED) {
1058 /* All IO is now issued, send it on its way */
1059 blk_run_address_space(inode->i_mapping);
1061 dio_await_completion(dio); 1060 dio_await_completion(dio);
1061 }
1062 1062
1063 /* 1063 /*
1064 * Sync will always be dropping the final ref and completing the 1064 * Sync will always be dropping the final ref and completing the