diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-07-11 19:27:31 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-07-11 19:27:31 -0400 |
commit | cd1aac32923a9c8adcc0ae85e33c1ca0c5855838 (patch) | |
tree | 3d55d9249ef960a7e345969404d537e36dbd9609 /fs/ext4 | |
parent | 61628a3f3a37af2bf25daf8e26fd6b76a78c4f76 (diff) |
ext4: Add ordered mode support for delalloc
This provides a new ordered mode implementation which gets rid of using
buffer heads to enforce the ordering between metadata change with the
related data chage. Instead, in the new ordering mode, it keeps track
of all of the inodes touched by each transaction on a list, and when
that transaction is committed, it flushes all of the dirty pages for
those inodes. In addition, the new ordered mode reverses the lock
ordering of the page lock and transaction lock, which provides easier
support for delayed allocation.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/inode.c | 30 |
1 files changed, 25 insertions, 5 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7923336ecf94..24518b57733e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2043,11 +2043,12 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | |||
2043 | return !buffer_mapped(bh) || buffer_delay(bh); | 2043 | return !buffer_mapped(bh) || buffer_delay(bh); |
2044 | } | 2044 | } |
2045 | 2045 | ||
2046 | /* FIXME!! only support data=writeback mode */ | ||
2047 | /* | 2046 | /* |
2048 | * get called vi ext4_da_writepages after taking page lock | 2047 | * get called vi ext4_da_writepages after taking page lock |
2049 | * We may end up doing block allocation here in case | 2048 | * We may end up doing block allocation here in case |
2050 | * mpage_da_map_blocks failed to allocate blocks. | 2049 | * mpage_da_map_blocks failed to allocate blocks. |
2050 | * | ||
2051 | * We also get called via journal_submit_inode_data_buffers | ||
2051 | */ | 2052 | */ |
2052 | static int ext4_da_writepage(struct page *page, | 2053 | static int ext4_da_writepage(struct page *page, |
2053 | struct writeback_control *wbc) | 2054 | struct writeback_control *wbc) |
@@ -2066,6 +2067,7 @@ static int ext4_da_writepage(struct page *page, | |||
2066 | * ext4_da_writepages() but directly (shrink_page_list). | 2067 | * ext4_da_writepages() but directly (shrink_page_list). |
2067 | * We cannot easily start a transaction here so we just skip | 2068 | * We cannot easily start a transaction here so we just skip |
2068 | * writing the page in case we would have to do so. | 2069 | * writing the page in case we would have to do so. |
2070 | * We reach here also via journal_submit_inode_data_buffers | ||
2069 | */ | 2071 | */ |
2070 | size = i_size_read(inode); | 2072 | size = i_size_read(inode); |
2071 | 2073 | ||
@@ -2081,8 +2083,11 @@ static int ext4_da_writepage(struct page *page, | |||
2081 | * We can't do block allocation under | 2083 | * We can't do block allocation under |
2082 | * page lock without a handle . So redirty | 2084 | * page lock without a handle . So redirty |
2083 | * the page and return | 2085 | * the page and return |
2086 | * We may reach here when we do a journal commit | ||
2087 | * via journal_submit_inode_data_buffers. | ||
2088 | * If we don't have mapping block we just ignore | ||
2089 | * them | ||
2084 | */ | 2090 | */ |
2085 | BUG_ON(wbc->sync_mode != WB_SYNC_NONE); | ||
2086 | redirty_page_for_writepage(wbc, page); | 2091 | redirty_page_for_writepage(wbc, page); |
2087 | unlock_page(page); | 2092 | unlock_page(page); |
2088 | return 0; | 2093 | return 0; |
@@ -2097,7 +2102,6 @@ static int ext4_da_writepage(struct page *page, | |||
2097 | return ret; | 2102 | return ret; |
2098 | } | 2103 | } |
2099 | 2104 | ||
2100 | |||
2101 | /* | 2105 | /* |
2102 | * For now just follow the DIO way to estimate the max credits | 2106 | * For now just follow the DIO way to estimate the max credits |
2103 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2107 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. |
@@ -2130,7 +2134,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2130 | return 0; | 2134 | return 0; |
2131 | 2135 | ||
2132 | /* | 2136 | /* |
2133 | * Estimate the worse case needed credits to write out | 2137 | * Estimate the worse case needed credits to write out |
2134 | * EXT4_MAX_BUF_BLOCKS pages | 2138 | * EXT4_MAX_BUF_BLOCKS pages |
2135 | */ | 2139 | */ |
2136 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | 2140 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; |
@@ -2152,6 +2156,19 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2152 | ret = PTR_ERR(handle); | 2156 | ret = PTR_ERR(handle); |
2153 | goto out_writepages; | 2157 | goto out_writepages; |
2154 | } | 2158 | } |
2159 | if (ext4_should_order_data(inode)) { | ||
2160 | /* | ||
2161 | * With ordered mode we need to add | ||
2162 | * the inode to the journal handle | ||
2163 | * when we do block allocation. | ||
2164 | */ | ||
2165 | ret = ext4_jbd2_file_inode(handle, inode); | ||
2166 | if (ret) { | ||
2167 | ext4_journal_stop(handle); | ||
2168 | goto out_writepages; | ||
2169 | } | ||
2170 | |||
2171 | } | ||
2155 | /* | 2172 | /* |
2156 | * set the max dirty pages could be write at a time | 2173 | * set the max dirty pages could be write at a time |
2157 | * to fit into the reserved transaction credits | 2174 | * to fit into the reserved transaction credits |
@@ -2735,7 +2752,10 @@ static const struct address_space_operations ext4_da_aops = { | |||
2735 | 2752 | ||
2736 | void ext4_set_aops(struct inode *inode) | 2753 | void ext4_set_aops(struct inode *inode) |
2737 | { | 2754 | { |
2738 | if (ext4_should_order_data(inode)) | 2755 | if (ext4_should_order_data(inode) && |
2756 | test_opt(inode->i_sb, DELALLOC)) | ||
2757 | inode->i_mapping->a_ops = &ext4_da_aops; | ||
2758 | else if (ext4_should_order_data(inode)) | ||
2739 | inode->i_mapping->a_ops = &ext4_ordered_aops; | 2759 | inode->i_mapping->a_ops = &ext4_ordered_aops; |
2740 | else if (ext4_should_writeback_data(inode) && | 2760 | else if (ext4_should_writeback_data(inode) && |
2741 | test_opt(inode->i_sb, DELALLOC)) | 2761 | test_opt(inode->i_sb, DELALLOC)) |