diff options
Diffstat (limited to 'fs/aio.c')
-rw-r--r-- | fs/aio.c | 94 |
1 files changed, 58 insertions, 36 deletions
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/aio_abi.h> | 15 | #include <linux/aio_abi.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <linux/syscalls.h> | 17 | #include <linux/syscalls.h> |
18 | #include <linux/backing-dev.h> | ||
18 | #include <linux/uio.h> | 19 | #include <linux/uio.h> |
19 | 20 | ||
20 | #define DEBUG 0 | 21 | #define DEBUG 0 |
@@ -32,6 +33,9 @@ | |||
32 | #include <linux/workqueue.h> | 33 | #include <linux/workqueue.h> |
33 | #include <linux/security.h> | 34 | #include <linux/security.h> |
34 | #include <linux/eventfd.h> | 35 | #include <linux/eventfd.h> |
36 | #include <linux/blkdev.h> | ||
37 | #include <linux/mempool.h> | ||
38 | #include <linux/hash.h> | ||
35 | 39 | ||
36 | #include <asm/kmap_types.h> | 40 | #include <asm/kmap_types.h> |
37 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
@@ -60,6 +64,14 @@ static DECLARE_WORK(fput_work, aio_fput_routine); | |||
60 | static DEFINE_SPINLOCK(fput_lock); | 64 | static DEFINE_SPINLOCK(fput_lock); |
61 | static LIST_HEAD(fput_head); | 65 | static LIST_HEAD(fput_head); |
62 | 66 | ||
67 | #define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */ | ||
68 | #define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS) | ||
69 | struct aio_batch_entry { | ||
70 | struct hlist_node list; | ||
71 | struct address_space *mapping; | ||
72 | }; | ||
73 | mempool_t *abe_pool; | ||
74 | |||
63 | static void aio_kick_handler(struct work_struct *); | 75 | static void aio_kick_handler(struct work_struct *); |
64 | static void aio_queue_work(struct kioctx *); | 76 | static void aio_queue_work(struct kioctx *); |
65 | 77 | ||
@@ -73,6 +85,8 @@ static int __init aio_setup(void) | |||
73 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 85 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
74 | 86 | ||
75 | aio_wq = create_workqueue("aio"); | 87 | aio_wq = create_workqueue("aio"); |
88 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); | ||
89 | BUG_ON(!abe_pool); | ||
76 | 90 | ||
77 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); | 91 | pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page)); |
78 | 92 | ||
@@ -697,10 +711,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) | |||
697 | */ | 711 | */ |
698 | ret = retry(iocb); | 712 | ret = retry(iocb); |
699 | 713 | ||
700 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { | 714 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) |
701 | BUG_ON(!list_empty(&iocb->ki_wait.task_list)); | ||
702 | aio_complete(iocb, ret, 0); | 715 | aio_complete(iocb, ret, 0); |
703 | } | ||
704 | out: | 716 | out: |
705 | spin_lock_irq(&ctx->ctx_lock); | 717 | spin_lock_irq(&ctx->ctx_lock); |
706 | 718 | ||
@@ -852,13 +864,6 @@ static void try_queue_kicked_iocb(struct kiocb *iocb) | |||
852 | unsigned long flags; | 864 | unsigned long flags; |
853 | int run = 0; | 865 | int run = 0; |
854 | 866 | ||
855 | /* We're supposed to be the only path putting the iocb back on the run | ||
856 | * list. If we find that the iocb is *back* on a wait queue already | ||
857 | * than retry has happened before we could queue the iocb. This also | ||
858 | * means that the retry could have completed and freed our iocb, no | ||
859 | * good. */ | ||
860 | BUG_ON((!list_empty(&iocb->ki_wait.task_list))); | ||
861 | |||
862 | spin_lock_irqsave(&ctx->ctx_lock, flags); | 867 | spin_lock_irqsave(&ctx->ctx_lock, flags); |
863 | /* set this inside the lock so that we can't race with aio_run_iocb() | 868 | /* set this inside the lock so that we can't race with aio_run_iocb() |
864 | * testing it and putting the iocb on the run list under the lock */ | 869 | * testing it and putting the iocb on the run list under the lock */ |
@@ -872,7 +877,7 @@ static void try_queue_kicked_iocb(struct kiocb *iocb) | |||
872 | /* | 877 | /* |
873 | * kick_iocb: | 878 | * kick_iocb: |
874 | * Called typically from a wait queue callback context | 879 | * Called typically from a wait queue callback context |
875 | * (aio_wake_function) to trigger a retry of the iocb. | 880 | * to trigger a retry of the iocb. |
876 | * The retry is usually executed by aio workqueue | 881 | * The retry is usually executed by aio workqueue |
877 | * threads (See aio_kick_handler). | 882 | * threads (See aio_kick_handler). |
878 | */ | 883 | */ |
@@ -1506,33 +1511,44 @@ static ssize_t aio_setup_iocb(struct kiocb *kiocb) | |||
1506 | return 0; | 1511 | return 0; |
1507 | } | 1512 | } |
1508 | 1513 | ||
1509 | /* | 1514 | static void aio_batch_add(struct address_space *mapping, |
1510 | * aio_wake_function: | 1515 | struct hlist_head *batch_hash) |
1511 | * wait queue callback function for aio notification, | 1516 | { |
1512 | * Simply triggers a retry of the operation via kick_iocb. | 1517 | struct aio_batch_entry *abe; |
1513 | * | 1518 | struct hlist_node *pos; |
1514 | * This callback is specified in the wait queue entry in | 1519 | unsigned bucket; |
1515 | * a kiocb. | 1520 | |
1516 | * | 1521 | bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS); |
1517 | * Note: | 1522 | hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) { |
1518 | * This routine is executed with the wait queue lock held. | 1523 | if (abe->mapping == mapping) |
1519 | * Since kick_iocb acquires iocb->ctx->ctx_lock, it nests | 1524 | return; |
1520 | * the ioctx lock inside the wait queue lock. This is safe | 1525 | } |
1521 | * because this callback isn't used for wait queues which | 1526 | |
1522 | * are nested inside ioctx lock (i.e. ctx->wait) | 1527 | abe = mempool_alloc(abe_pool, GFP_KERNEL); |
1523 | */ | 1528 | BUG_ON(!igrab(mapping->host)); |
1524 | static int aio_wake_function(wait_queue_t *wait, unsigned mode, | 1529 | abe->mapping = mapping; |
1525 | int sync, void *key) | 1530 | hlist_add_head(&abe->list, &batch_hash[bucket]); |
1531 | return; | ||
1532 | } | ||
1533 | |||
1534 | static void aio_batch_free(struct hlist_head *batch_hash) | ||
1526 | { | 1535 | { |
1527 | struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait); | 1536 | struct aio_batch_entry *abe; |
1537 | struct hlist_node *pos, *n; | ||
1538 | int i; | ||
1528 | 1539 | ||
1529 | list_del_init(&wait->task_list); | 1540 | for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) { |
1530 | kick_iocb(iocb); | 1541 | hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) { |
1531 | return 1; | 1542 | blk_run_address_space(abe->mapping); |
1543 | iput(abe->mapping->host); | ||
1544 | hlist_del(&abe->list); | ||
1545 | mempool_free(abe, abe_pool); | ||
1546 | } | ||
1547 | } | ||
1532 | } | 1548 | } |
1533 | 1549 | ||
1534 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | 1550 | static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, |
1535 | struct iocb *iocb) | 1551 | struct iocb *iocb, struct hlist_head *batch_hash) |
1536 | { | 1552 | { |
1537 | struct kiocb *req; | 1553 | struct kiocb *req; |
1538 | struct file *file; | 1554 | struct file *file; |
@@ -1592,8 +1608,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1592 | req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; | 1608 | req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; |
1593 | req->ki_left = req->ki_nbytes = iocb->aio_nbytes; | 1609 | req->ki_left = req->ki_nbytes = iocb->aio_nbytes; |
1594 | req->ki_opcode = iocb->aio_lio_opcode; | 1610 | req->ki_opcode = iocb->aio_lio_opcode; |
1595 | init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); | ||
1596 | INIT_LIST_HEAD(&req->ki_wait.task_list); | ||
1597 | 1611 | ||
1598 | ret = aio_setup_iocb(req); | 1612 | ret = aio_setup_iocb(req); |
1599 | 1613 | ||
@@ -1608,6 +1622,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
1608 | ; | 1622 | ; |
1609 | } | 1623 | } |
1610 | spin_unlock_irq(&ctx->ctx_lock); | 1624 | spin_unlock_irq(&ctx->ctx_lock); |
1625 | if (req->ki_opcode == IOCB_CMD_PREAD || | ||
1626 | req->ki_opcode == IOCB_CMD_PREADV || | ||
1627 | req->ki_opcode == IOCB_CMD_PWRITE || | ||
1628 | req->ki_opcode == IOCB_CMD_PWRITEV) | ||
1629 | aio_batch_add(file->f_mapping, batch_hash); | ||
1630 | |||
1611 | aio_put_req(req); /* drop extra ref to req */ | 1631 | aio_put_req(req); /* drop extra ref to req */ |
1612 | return 0; | 1632 | return 0; |
1613 | 1633 | ||
@@ -1635,6 +1655,7 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1635 | struct kioctx *ctx; | 1655 | struct kioctx *ctx; |
1636 | long ret = 0; | 1656 | long ret = 0; |
1637 | int i; | 1657 | int i; |
1658 | struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, }; | ||
1638 | 1659 | ||
1639 | if (unlikely(nr < 0)) | 1660 | if (unlikely(nr < 0)) |
1640 | return -EINVAL; | 1661 | return -EINVAL; |
@@ -1666,10 +1687,11 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, | |||
1666 | break; | 1687 | break; |
1667 | } | 1688 | } |
1668 | 1689 | ||
1669 | ret = io_submit_one(ctx, user_iocb, &tmp); | 1690 | ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash); |
1670 | if (ret) | 1691 | if (ret) |
1671 | break; | 1692 | break; |
1672 | } | 1693 | } |
1694 | aio_batch_free(batch_hash); | ||
1673 | 1695 | ||
1674 | put_ioctx(ctx); | 1696 | put_ioctx(ctx); |
1675 | return i ? i : ret; | 1697 | return i ? i : ret; |