aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-10-27 21:30:10 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 21:30:10 -0400
commitbd2d0210cf22f2bd0cef72eb97cf94fc7d31d8cc (patch)
treef0d1902b7ff4294114614cc706855c3d6b131f73 /fs/ext4/inode.c
parent1de3e3df917459422cb2aecac440febc8879d410 (diff)
ext4: use bio layer instead of buffer layer in mpage_da_submit_io
Call the block I/O layer directly instad of going through the buffer layer. This should give us much better performance and scalability, as well as lowering our CPU utilization when doing buffered writeback. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c118
1 files changed, 14 insertions, 104 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c65d647378f9..58604fe11f4f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2016,8 +2016,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2016 struct buffer_head *bh, *page_bufs = NULL; 2016 struct buffer_head *bh, *page_bufs = NULL;
2017 int journal_data = ext4_should_journal_data(inode); 2017 int journal_data = ext4_should_journal_data(inode);
2018 sector_t pblock = 0, cur_logical = 0; 2018 sector_t pblock = 0, cur_logical = 0;
2019 struct ext4_io_submit io_submit;
2019 2020
2020 BUG_ON(mpd->next_page <= mpd->first_page); 2021 BUG_ON(mpd->next_page <= mpd->first_page);
2022 memset(&io_submit, 0, sizeof(io_submit));
2021 /* 2023 /*
2022 * We need to start from the first_page to the next_page - 1 2024 * We need to start from the first_page to the next_page - 1
2023 * to make sure we also write the mapped dirty buffer_heads. 2025 * to make sure we also write the mapped dirty buffer_heads.
@@ -2109,16 +2111,16 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2109 /* mark the buffer_heads as dirty & uptodate */ 2111 /* mark the buffer_heads as dirty & uptodate */
2110 block_commit_write(page, 0, len); 2112 block_commit_write(page, 0, len);
2111 2113
2112 if (journal_data && PageChecked(page)) 2114 /*
2115 * Delalloc doesn't support data journalling,
2116 * but eventually maybe we'll lift this
2117 * restriction.
2118 */
2119 if (unlikely(journal_data && PageChecked(page)))
2113 err = __ext4_journalled_writepage(page, len); 2120 err = __ext4_journalled_writepage(page, len);
2114 else if (buffer_uninit(page_bufs)) { 2121 else
2115 ext4_set_bh_endio(page_bufs, inode); 2122 err = ext4_bio_write_page(&io_submit, page,
2116 err = block_write_full_page_endio(page, 2123 len, mpd->wbc);
2117 noalloc_get_block_write,
2118 mpd->wbc, ext4_end_io_buffer_write);
2119 } else
2120 err = block_write_full_page(page,
2121 noalloc_get_block_write, mpd->wbc);
2122 2124
2123 if (!err) 2125 if (!err)
2124 mpd->pages_written++; 2126 mpd->pages_written++;
@@ -2131,6 +2133,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2131 } 2133 }
2132 pagevec_release(&pvec); 2134 pagevec_release(&pvec);
2133 } 2135 }
2136 ext4_io_submit(&io_submit);
2134 return ret; 2137 return ret;
2135} 2138}
2136 2139
@@ -3426,15 +3429,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
3426 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); 3429 return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
3427} 3430}
3428 3431
3429static void ext4_free_io_end(ext4_io_end_t *io)
3430{
3431 BUG_ON(!io);
3432 if (io->page)
3433 put_page(io->page);
3434 iput(io->inode);
3435 kfree(io);
3436}
3437
3438static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) 3432static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
3439{ 3433{
3440 struct buffer_head *head, *bh; 3434 struct buffer_head *head, *bh;
@@ -3640,68 +3634,6 @@ static void dump_completed_IO(struct inode * inode)
3640} 3634}
3641 3635
3642/* 3636/*
3643 * check a range of space and convert unwritten extents to written.
3644 */
3645static int ext4_end_io_nolock(ext4_io_end_t *io)
3646{
3647 struct inode *inode = io->inode;
3648 loff_t offset = io->offset;
3649 ssize_t size = io->size;
3650 int ret = 0;
3651
3652 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
3653 "list->prev 0x%p\n",
3654 io, inode->i_ino, io->list.next, io->list.prev);
3655
3656 if (list_empty(&io->list))
3657 return ret;
3658
3659 if (io->flag != EXT4_IO_UNWRITTEN)
3660 return ret;
3661
3662 ret = ext4_convert_unwritten_extents(inode, offset, size);
3663 if (ret < 0) {
3664 printk(KERN_EMERG "%s: failed to convert unwritten"
3665 "extents to written extents, error is %d"
3666 " io is still on inode %lu aio dio list\n",
3667 __func__, ret, inode->i_ino);
3668 return ret;
3669 }
3670
3671 if (io->iocb)
3672 aio_complete(io->iocb, io->result, 0);
3673 /* clear the DIO AIO unwritten flag */
3674 io->flag = 0;
3675 return ret;
3676}
3677
3678/*
3679 * work on completed aio dio IO, to convert unwritten extents to extents
3680 */
3681static void ext4_end_io_work(struct work_struct *work)
3682{
3683 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
3684 struct inode *inode = io->inode;
3685 struct ext4_inode_info *ei = EXT4_I(inode);
3686 unsigned long flags;
3687 int ret;
3688
3689 mutex_lock(&inode->i_mutex);
3690 ret = ext4_end_io_nolock(io);
3691 if (ret < 0) {
3692 mutex_unlock(&inode->i_mutex);
3693 return;
3694 }
3695
3696 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
3697 if (!list_empty(&io->list))
3698 list_del_init(&io->list);
3699 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
3700 mutex_unlock(&inode->i_mutex);
3701 ext4_free_io_end(io);
3702}
3703
3704/*
3705 * This function is called from ext4_sync_file(). 3637 * This function is called from ext4_sync_file().
3706 * 3638 *
3707 * When IO is completed, the work to convert unwritten extents to 3639 * When IO is completed, the work to convert unwritten extents to
@@ -3756,28 +3688,6 @@ int flush_completed_IO(struct inode *inode)
3756 return (ret2 < 0) ? ret2 : 0; 3688 return (ret2 < 0) ? ret2 : 0;
3757} 3689}
3758 3690
3759static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
3760{
3761 ext4_io_end_t *io = NULL;
3762
3763 io = kmalloc(sizeof(*io), flags);
3764
3765 if (io) {
3766 igrab(inode);
3767 io->inode = inode;
3768 io->flag = 0;
3769 io->offset = 0;
3770 io->size = 0;
3771 io->page = NULL;
3772 io->iocb = NULL;
3773 io->result = 0;
3774 INIT_WORK(&io->work, ext4_end_io_work);
3775 INIT_LIST_HEAD(&io->list);
3776 }
3777
3778 return io;
3779}
3780
3781static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3691static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3782 ssize_t size, void *private, int ret, 3692 ssize_t size, void *private, int ret,
3783 bool is_async) 3693 bool is_async)
@@ -3797,7 +3707,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3797 size); 3707 size);
3798 3708
3799 /* if not aio dio with unwritten extents, just free io and return */ 3709 /* if not aio dio with unwritten extents, just free io and return */
3800 if (io_end->flag != EXT4_IO_UNWRITTEN){ 3710 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3801 ext4_free_io_end(io_end); 3711 ext4_free_io_end(io_end);
3802 iocb->private = NULL; 3712 iocb->private = NULL;
3803out: 3713out:
@@ -3842,7 +3752,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
3842 goto out; 3752 goto out;
3843 } 3753 }
3844 3754
3845 io_end->flag = EXT4_IO_UNWRITTEN; 3755 io_end->flag = EXT4_IO_END_UNWRITTEN;
3846 inode = io_end->inode; 3756 inode = io_end->inode;
3847 3757
3848 /* Add the io_end to per-inode completed io list*/ 3758 /* Add the io_end to per-inode completed io list*/