aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
authorJeff Moyer <jmoyer@redhat.com>2009-10-02 18:57:36 -0400
committerJens Axboe <jens.axboe@oracle.com>2009-10-28 04:29:25 -0400
commitcfb1e33eed48165763edc7a4a067cf5f74898d0b (patch)
treed0e0bdd0664615b1f7be6cf770476e16dbcad116 /fs/aio.c
parent1af60fbd759d31f565552fea315c2033947cfbe6 (diff)
aio: implement request batching
Hi, Some workloads issue batches of small I/O, and the performance is poor due to the call to blk_run_address_space for every single iocb. Nathan Roberts pointed this out, and suggested that by deferring this call until all I/Os in the iocb array are submitted to the block layer, we can realize some impressive performance gains (up to 30% for sequential 4k reads in batches of 16). Signed-off-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c61
1 files changed, 59 insertions, 2 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c9340573..cf0bef428f88 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -32,6 +32,9 @@
32#include <linux/workqueue.h> 32#include <linux/workqueue.h>
33#include <linux/security.h> 33#include <linux/security.h>
34#include <linux/eventfd.h> 34#include <linux/eventfd.h>
35#include <linux/blkdev.h>
36#include <linux/mempool.h>
37#include <linux/hash.h>
35 38
36#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
37#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -60,6 +63,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
60static DEFINE_SPINLOCK(fput_lock); 63static DEFINE_SPINLOCK(fput_lock);
61static LIST_HEAD(fput_head); 64static LIST_HEAD(fput_head);
62 65
66#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
67#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
68struct aio_batch_entry {
69 struct hlist_node list;
70 struct address_space *mapping;
71};
72mempool_t *abe_pool;
73
63static void aio_kick_handler(struct work_struct *); 74static void aio_kick_handler(struct work_struct *);
64static void aio_queue_work(struct kioctx *); 75static void aio_queue_work(struct kioctx *);
65 76
@@ -73,6 +84,8 @@ static int __init aio_setup(void)
73 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 84 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
74 85
75 aio_wq = create_workqueue("aio"); 86 aio_wq = create_workqueue("aio");
87 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
88 BUG_ON(!abe_pool);
76 89
77 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 90 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
78 91
@@ -1531,8 +1544,44 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode,
1531 return 1; 1544 return 1;
1532} 1545}
1533 1546
1547static void aio_batch_add(struct address_space *mapping,
1548 struct hlist_head *batch_hash)
1549{
1550 struct aio_batch_entry *abe;
1551 struct hlist_node *pos;
1552 unsigned bucket;
1553
1554 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1555 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1556 if (abe->mapping == mapping)
1557 return;
1558 }
1559
1560 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1561 BUG_ON(!igrab(mapping->host));
1562 abe->mapping = mapping;
1563 hlist_add_head(&abe->list, &batch_hash[bucket]);
1564 return;
1565}
1566
1567static void aio_batch_free(struct hlist_head *batch_hash)
1568{
1569 struct aio_batch_entry *abe;
1570 struct hlist_node *pos, *n;
1571 int i;
1572
1573 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1574 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1575 blk_run_address_space(abe->mapping);
1576 iput(abe->mapping->host);
1577 hlist_del(&abe->list);
1578 mempool_free(abe, abe_pool);
1579 }
1580 }
1581}
1582
1534static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1583static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1535 struct iocb *iocb) 1584 struct iocb *iocb, struct hlist_head *batch_hash)
1536{ 1585{
1537 struct kiocb *req; 1586 struct kiocb *req;
1538 struct file *file; 1587 struct file *file;
@@ -1608,6 +1657,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1608 ; 1657 ;
1609 } 1658 }
1610 spin_unlock_irq(&ctx->ctx_lock); 1659 spin_unlock_irq(&ctx->ctx_lock);
1660 if (req->ki_opcode == IOCB_CMD_PREAD ||
1661 req->ki_opcode == IOCB_CMD_PREADV ||
1662 req->ki_opcode == IOCB_CMD_PWRITE ||
1663 req->ki_opcode == IOCB_CMD_PWRITEV)
1664 aio_batch_add(file->f_mapping, batch_hash);
1665
1611 aio_put_req(req); /* drop extra ref to req */ 1666 aio_put_req(req); /* drop extra ref to req */
1612 return 0; 1667 return 0;
1613 1668
@@ -1635,6 +1690,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1635 struct kioctx *ctx; 1690 struct kioctx *ctx;
1636 long ret = 0; 1691 long ret = 0;
1637 int i; 1692 int i;
1693 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
1638 1694
1639 if (unlikely(nr < 0)) 1695 if (unlikely(nr < 0))
1640 return -EINVAL; 1696 return -EINVAL;
@@ -1666,10 +1722,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1666 break; 1722 break;
1667 } 1723 }
1668 1724
1669 ret = io_submit_one(ctx, user_iocb, &tmp); 1725 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
1670 if (ret) 1726 if (ret)
1671 break; 1727 break;
1672 } 1728 }
1729 aio_batch_free(batch_hash);
1673 1730
1674 put_ioctx(ctx); 1731 put_ioctx(ctx);
1675 return i ? i : ret; 1732 return i ? i : ret;