aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aio.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/aio.c')
-rw-r--r--fs/aio.c94
1 files changed, 58 insertions, 36 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c9340573..1cf12b3dd83a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
15#include <linux/aio_abi.h> 15#include <linux/aio_abi.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/backing-dev.h>
18#include <linux/uio.h> 19#include <linux/uio.h>
19 20
20#define DEBUG 0 21#define DEBUG 0
@@ -32,6 +33,9 @@
32#include <linux/workqueue.h> 33#include <linux/workqueue.h>
33#include <linux/security.h> 34#include <linux/security.h>
34#include <linux/eventfd.h> 35#include <linux/eventfd.h>
36#include <linux/blkdev.h>
37#include <linux/mempool.h>
38#include <linux/hash.h>
35 39
36#include <asm/kmap_types.h> 40#include <asm/kmap_types.h>
37#include <asm/uaccess.h> 41#include <asm/uaccess.h>
@@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
60static DEFINE_SPINLOCK(fput_lock); 64static DEFINE_SPINLOCK(fput_lock);
61static LIST_HEAD(fput_head); 65static LIST_HEAD(fput_head);
62 66
67#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
68#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
69struct aio_batch_entry {
70 struct hlist_node list;
71 struct address_space *mapping;
72};
73mempool_t *abe_pool;
74
63static void aio_kick_handler(struct work_struct *); 75static void aio_kick_handler(struct work_struct *);
64static void aio_queue_work(struct kioctx *); 76static void aio_queue_work(struct kioctx *);
65 77
@@ -73,6 +85,8 @@ static int __init aio_setup(void)
73 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
74 86
75 aio_wq = create_workqueue("aio"); 87 aio_wq = create_workqueue("aio");
88 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
89 BUG_ON(!abe_pool);
76 90
77 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); 91 pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
78 92
@@ -697,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
697 */ 711 */
698 ret = retry(iocb); 712 ret = retry(iocb);
699 713
700 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { 714 if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED)
701 BUG_ON(!list_empty(&iocb->ki_wait.task_list));
702 aio_complete(iocb, ret, 0); 715 aio_complete(iocb, ret, 0);
703 }
704out: 716out:
705 spin_lock_irq(&ctx->ctx_lock); 717 spin_lock_irq(&ctx->ctx_lock);
706 718
@@ -852,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
852 unsigned long flags; 864 unsigned long flags;
853 int run = 0; 865 int run = 0;
854 866
855 /* We're supposed to be the only path putting the iocb back on the run
856 * list. If we find that the iocb is *back* on a wait queue already
857 * than retry has happened before we could queue the iocb. This also
858 * means that the retry could have completed and freed our iocb, no
859 * good. */
860 BUG_ON((!list_empty(&iocb->ki_wait.task_list)));
861
862 spin_lock_irqsave(&ctx->ctx_lock, flags); 867 spin_lock_irqsave(&ctx->ctx_lock, flags);
863 /* set this inside the lock so that we can't race with aio_run_iocb() 868 /* set this inside the lock so that we can't race with aio_run_iocb()
864 * testing it and putting the iocb on the run list under the lock */ 869 * testing it and putting the iocb on the run list under the lock */
@@ -872,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb)
872/* 877/*
873 * kick_iocb: 878 * kick_iocb:
874 * Called typically from a wait queue callback context 879 * Called typically from a wait queue callback context
875 * (aio_wake_function) to trigger a retry of the iocb. 880 * to trigger a retry of the iocb.
876 * The retry is usually executed by aio workqueue 881 * The retry is usually executed by aio workqueue
877 * threads (See aio_kick_handler). 882 * threads (See aio_kick_handler).
878 */ 883 */
@@ -1506,33 +1511,44 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb)
1506 return 0; 1511 return 0;
1507} 1512}
1508 1513
1509/* 1514static void aio_batch_add(struct address_space *mapping,
1510 * aio_wake_function: 1515 struct hlist_head *batch_hash)
1511 * wait queue callback function for aio notification, 1516{
1512 * Simply triggers a retry of the operation via kick_iocb. 1517 struct aio_batch_entry *abe;
1513 * 1518 struct hlist_node *pos;
1514 * This callback is specified in the wait queue entry in 1519 unsigned bucket;
1515 * a kiocb. 1520
1516 * 1521 bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
1517 * Note: 1522 hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
1518 * This routine is executed with the wait queue lock held. 1523 if (abe->mapping == mapping)
1519 * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests 1524 return;
1520 * the ioctx lock inside the wait queue lock. This is safe 1525 }
1521 * because this callback isn't used for wait queues which 1526
1522 * are nested inside ioctx lock (i.e. ctx->wait) 1527 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1523 */ 1528 BUG_ON(!igrab(mapping->host));
1524static int aio_wake_function(wait_queue_t *wait, unsigned mode, 1529 abe->mapping = mapping;
1525 int sync, void *key) 1530 hlist_add_head(&abe->list, &batch_hash[bucket]);
1531 return;
1532}
1533
1534static void aio_batch_free(struct hlist_head *batch_hash)
1526{ 1535{
1527 struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait); 1536 struct aio_batch_entry *abe;
1537 struct hlist_node *pos, *n;
1538 int i;
1528 1539
1529 list_del_init(&wait->task_list); 1540 for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
1530 kick_iocb(iocb); 1541 hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
1531 return 1; 1542 blk_run_address_space(abe->mapping);
1543 iput(abe->mapping->host);
1544 hlist_del(&abe->list);
1545 mempool_free(abe, abe_pool);
1546 }
1547 }
1532} 1548}
1533 1549
1534static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, 1550static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1535 struct iocb *iocb) 1551 struct iocb *iocb, struct hlist_head *batch_hash)
1536{ 1552{
1537 struct kiocb *req; 1553 struct kiocb *req;
1538 struct file *file; 1554 struct file *file;
@@ -1592,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1592 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; 1608 req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
1593 req->ki_left = req->ki_nbytes = iocb->aio_nbytes; 1609 req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
1594 req->ki_opcode = iocb->aio_lio_opcode; 1610 req->ki_opcode = iocb->aio_lio_opcode;
1595 init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
1596 INIT_LIST_HEAD(&req->ki_wait.task_list);
1597 1611
1598 ret = aio_setup_iocb(req); 1612 ret = aio_setup_iocb(req);
1599 1613
@@ -1608,6 +1622,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1608 ; 1622 ;
1609 } 1623 }
1610 spin_unlock_irq(&ctx->ctx_lock); 1624 spin_unlock_irq(&ctx->ctx_lock);
1625 if (req->ki_opcode == IOCB_CMD_PREAD ||
1626 req->ki_opcode == IOCB_CMD_PREADV ||
1627 req->ki_opcode == IOCB_CMD_PWRITE ||
1628 req->ki_opcode == IOCB_CMD_PWRITEV)
1629 aio_batch_add(file->f_mapping, batch_hash);
1630
1611 aio_put_req(req); /* drop extra ref to req */ 1631 aio_put_req(req); /* drop extra ref to req */
1612 return 0; 1632 return 0;
1613 1633
@@ -1635,6 +1655,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1635 struct kioctx *ctx; 1655 struct kioctx *ctx;
1636 long ret = 0; 1656 long ret = 0;
1637 int i; 1657 int i;
1658 struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
1638 1659
1639 if (unlikely(nr < 0)) 1660 if (unlikely(nr < 0))
1640 return -EINVAL; 1661 return -EINVAL;
@@ -1666,10 +1687,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
1666 break; 1687 break;
1667 } 1688 }
1668 1689
1669 ret = io_submit_one(ctx, user_iocb, &tmp); 1690 ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
1670 if (ret) 1691 if (ret)
1671 break; 1692 break;
1672 } 1693 }
1694 aio_batch_free(batch_hash);
1673 1695
1674 put_ioctx(ctx); 1696 put_ioctx(ctx);
1675 return i ? i : ret; 1697 return i ? i : ret;