diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 21:30:10 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-10-27 21:30:10 -0400 |
commit | bd2d0210cf22f2bd0cef72eb97cf94fc7d31d8cc (patch) | |
tree | f0d1902b7ff4294114614cc706855c3d6b131f73 /fs/ext4/inode.c | |
parent | 1de3e3df917459422cb2aecac440febc8879d410 (diff) |
ext4: use bio layer instead of buffer layer in mpage_da_submit_io
Call the block I/O layer directly instad of going through the buffer
layer. This should give us much better performance and scalability,
as well as lowering our CPU utilization when doing buffered writeback.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 118 |
1 files changed, 14 insertions, 104 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c65d647378f9..58604fe11f4f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2016,8 +2016,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2016 | struct buffer_head *bh, *page_bufs = NULL; | 2016 | struct buffer_head *bh, *page_bufs = NULL; |
2017 | int journal_data = ext4_should_journal_data(inode); | 2017 | int journal_data = ext4_should_journal_data(inode); |
2018 | sector_t pblock = 0, cur_logical = 0; | 2018 | sector_t pblock = 0, cur_logical = 0; |
2019 | struct ext4_io_submit io_submit; | ||
2019 | 2020 | ||
2020 | BUG_ON(mpd->next_page <= mpd->first_page); | 2021 | BUG_ON(mpd->next_page <= mpd->first_page); |
2022 | memset(&io_submit, 0, sizeof(io_submit)); | ||
2021 | /* | 2023 | /* |
2022 | * We need to start from the first_page to the next_page - 1 | 2024 | * We need to start from the first_page to the next_page - 1 |
2023 | * to make sure we also write the mapped dirty buffer_heads. | 2025 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -2109,16 +2111,16 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2109 | /* mark the buffer_heads as dirty & uptodate */ | 2111 | /* mark the buffer_heads as dirty & uptodate */ |
2110 | block_commit_write(page, 0, len); | 2112 | block_commit_write(page, 0, len); |
2111 | 2113 | ||
2112 | if (journal_data && PageChecked(page)) | 2114 | /* |
2115 | * Delalloc doesn't support data journalling, | ||
2116 | * but eventually maybe we'll lift this | ||
2117 | * restriction. | ||
2118 | */ | ||
2119 | if (unlikely(journal_data && PageChecked(page))) | ||
2113 | err = __ext4_journalled_writepage(page, len); | 2120 | err = __ext4_journalled_writepage(page, len); |
2114 | else if (buffer_uninit(page_bufs)) { | 2121 | else |
2115 | ext4_set_bh_endio(page_bufs, inode); | 2122 | err = ext4_bio_write_page(&io_submit, page, |
2116 | err = block_write_full_page_endio(page, | 2123 | len, mpd->wbc); |
2117 | noalloc_get_block_write, | ||
2118 | mpd->wbc, ext4_end_io_buffer_write); | ||
2119 | } else | ||
2120 | err = block_write_full_page(page, | ||
2121 | noalloc_get_block_write, mpd->wbc); | ||
2122 | 2124 | ||
2123 | if (!err) | 2125 | if (!err) |
2124 | mpd->pages_written++; | 2126 | mpd->pages_written++; |
@@ -2131,6 +2133,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2131 | } | 2133 | } |
2132 | pagevec_release(&pvec); | 2134 | pagevec_release(&pvec); |
2133 | } | 2135 | } |
2136 | ext4_io_submit(&io_submit); | ||
2134 | return ret; | 2137 | return ret; |
2135 | } | 2138 | } |
2136 | 2139 | ||
@@ -3426,15 +3429,6 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
3426 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 3429 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
3427 | } | 3430 | } |
3428 | 3431 | ||
3429 | static void ext4_free_io_end(ext4_io_end_t *io) | ||
3430 | { | ||
3431 | BUG_ON(!io); | ||
3432 | if (io->page) | ||
3433 | put_page(io->page); | ||
3434 | iput(io->inode); | ||
3435 | kfree(io); | ||
3436 | } | ||
3437 | |||
3438 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) | 3432 | static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) |
3439 | { | 3433 | { |
3440 | struct buffer_head *head, *bh; | 3434 | struct buffer_head *head, *bh; |
@@ -3640,68 +3634,6 @@ static void dump_completed_IO(struct inode * inode) | |||
3640 | } | 3634 | } |
3641 | 3635 | ||
3642 | /* | 3636 | /* |
3643 | * check a range of space and convert unwritten extents to written. | ||
3644 | */ | ||
3645 | static int ext4_end_io_nolock(ext4_io_end_t *io) | ||
3646 | { | ||
3647 | struct inode *inode = io->inode; | ||
3648 | loff_t offset = io->offset; | ||
3649 | ssize_t size = io->size; | ||
3650 | int ret = 0; | ||
3651 | |||
3652 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | ||
3653 | "list->prev 0x%p\n", | ||
3654 | io, inode->i_ino, io->list.next, io->list.prev); | ||
3655 | |||
3656 | if (list_empty(&io->list)) | ||
3657 | return ret; | ||
3658 | |||
3659 | if (io->flag != EXT4_IO_UNWRITTEN) | ||
3660 | return ret; | ||
3661 | |||
3662 | ret = ext4_convert_unwritten_extents(inode, offset, size); | ||
3663 | if (ret < 0) { | ||
3664 | printk(KERN_EMERG "%s: failed to convert unwritten" | ||
3665 | "extents to written extents, error is %d" | ||
3666 | " io is still on inode %lu aio dio list\n", | ||
3667 | __func__, ret, inode->i_ino); | ||
3668 | return ret; | ||
3669 | } | ||
3670 | |||
3671 | if (io->iocb) | ||
3672 | aio_complete(io->iocb, io->result, 0); | ||
3673 | /* clear the DIO AIO unwritten flag */ | ||
3674 | io->flag = 0; | ||
3675 | return ret; | ||
3676 | } | ||
3677 | |||
3678 | /* | ||
3679 | * work on completed aio dio IO, to convert unwritten extents to extents | ||
3680 | */ | ||
3681 | static void ext4_end_io_work(struct work_struct *work) | ||
3682 | { | ||
3683 | ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); | ||
3684 | struct inode *inode = io->inode; | ||
3685 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
3686 | unsigned long flags; | ||
3687 | int ret; | ||
3688 | |||
3689 | mutex_lock(&inode->i_mutex); | ||
3690 | ret = ext4_end_io_nolock(io); | ||
3691 | if (ret < 0) { | ||
3692 | mutex_unlock(&inode->i_mutex); | ||
3693 | return; | ||
3694 | } | ||
3695 | |||
3696 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
3697 | if (!list_empty(&io->list)) | ||
3698 | list_del_init(&io->list); | ||
3699 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
3700 | mutex_unlock(&inode->i_mutex); | ||
3701 | ext4_free_io_end(io); | ||
3702 | } | ||
3703 | |||
3704 | /* | ||
3705 | * This function is called from ext4_sync_file(). | 3637 | * This function is called from ext4_sync_file(). |
3706 | * | 3638 | * |
3707 | * When IO is completed, the work to convert unwritten extents to | 3639 | * When IO is completed, the work to convert unwritten extents to |
@@ -3756,28 +3688,6 @@ int flush_completed_IO(struct inode *inode) | |||
3756 | return (ret2 < 0) ? ret2 : 0; | 3688 | return (ret2 < 0) ? ret2 : 0; |
3757 | } | 3689 | } |
3758 | 3690 | ||
3759 | static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) | ||
3760 | { | ||
3761 | ext4_io_end_t *io = NULL; | ||
3762 | |||
3763 | io = kmalloc(sizeof(*io), flags); | ||
3764 | |||
3765 | if (io) { | ||
3766 | igrab(inode); | ||
3767 | io->inode = inode; | ||
3768 | io->flag = 0; | ||
3769 | io->offset = 0; | ||
3770 | io->size = 0; | ||
3771 | io->page = NULL; | ||
3772 | io->iocb = NULL; | ||
3773 | io->result = 0; | ||
3774 | INIT_WORK(&io->work, ext4_end_io_work); | ||
3775 | INIT_LIST_HEAD(&io->list); | ||
3776 | } | ||
3777 | |||
3778 | return io; | ||
3779 | } | ||
3780 | |||
3781 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3691 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3782 | ssize_t size, void *private, int ret, | 3692 | ssize_t size, void *private, int ret, |
3783 | bool is_async) | 3693 | bool is_async) |
@@ -3797,7 +3707,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3797 | size); | 3707 | size); |
3798 | 3708 | ||
3799 | /* if not aio dio with unwritten extents, just free io and return */ | 3709 | /* if not aio dio with unwritten extents, just free io and return */ |
3800 | if (io_end->flag != EXT4_IO_UNWRITTEN){ | 3710 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
3801 | ext4_free_io_end(io_end); | 3711 | ext4_free_io_end(io_end); |
3802 | iocb->private = NULL; | 3712 | iocb->private = NULL; |
3803 | out: | 3713 | out: |
@@ -3842,7 +3752,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
3842 | goto out; | 3752 | goto out; |
3843 | } | 3753 | } |
3844 | 3754 | ||
3845 | io_end->flag = EXT4_IO_UNWRITTEN; | 3755 | io_end->flag = EXT4_IO_END_UNWRITTEN; |
3846 | inode = io_end->inode; | 3756 | inode = io_end->inode; |
3847 | 3757 | ||
3848 | /* Add the io_end to per-inode completed io list*/ | 3758 | /* Add the io_end to per-inode completed io list*/ |