aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commitcd1aac32923a9c8adcc0ae85e33c1ca0c5855838 (patch)
tree3d55d9249ef960a7e345969404d537e36dbd9609 /fs/ext4
parent61628a3f3a37af2bf25daf8e26fd6b76a78c4f76 (diff)
ext4: Add ordered mode support for delalloc
This provides a new ordered mode implementation which gets rid of using buffer heads to enforce the ordering between metadata change with the related data chage. Instead, in the new ordering mode, it keeps track of all of the inodes touched by each transaction on a list, and when that transaction is committed, it flushes all of the dirty pages for those inodes. In addition, the new ordered mode reverses the lock ordering of the page lock and transaction lock, which provides easier support for delayed allocation. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/inode.c30
1 files changed, 25 insertions, 5 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7923336ecf94..24518b57733e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2043,11 +2043,12 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2043 return !buffer_mapped(bh) || buffer_delay(bh); 2043 return !buffer_mapped(bh) || buffer_delay(bh);
2044} 2044}
2045 2045
2046/* FIXME!! only support data=writeback mode */
2047/* 2046/*
2048 * get called vi ext4_da_writepages after taking page lock 2047 * get called vi ext4_da_writepages after taking page lock
2049 * We may end up doing block allocation here in case 2048 * We may end up doing block allocation here in case
2050 * mpage_da_map_blocks failed to allocate blocks. 2049 * mpage_da_map_blocks failed to allocate blocks.
2050 *
2051 * We also get called via journal_submit_inode_data_buffers
2051 */ 2052 */
2052static int ext4_da_writepage(struct page *page, 2053static int ext4_da_writepage(struct page *page,
2053 struct writeback_control *wbc) 2054 struct writeback_control *wbc)
@@ -2066,6 +2067,7 @@ static int ext4_da_writepage(struct page *page,
2066 * ext4_da_writepages() but directly (shrink_page_list). 2067 * ext4_da_writepages() but directly (shrink_page_list).
2067 * We cannot easily start a transaction here so we just skip 2068 * We cannot easily start a transaction here so we just skip
2068 * writing the page in case we would have to do so. 2069 * writing the page in case we would have to do so.
2070 * We reach here also via journal_submit_inode_data_buffers
2069 */ 2071 */
2070 size = i_size_read(inode); 2072 size = i_size_read(inode);
2071 2073
@@ -2081,8 +2083,11 @@ static int ext4_da_writepage(struct page *page,
2081 * We can't do block allocation under 2083 * We can't do block allocation under
2082 * page lock without a handle . So redirty 2084 * page lock without a handle . So redirty
2083 * the page and return 2085 * the page and return
2086 * We may reach here when we do a journal commit
2087 * via journal_submit_inode_data_buffers.
2088 * If we don't have mapping block we just ignore
2089 * them
2084 */ 2090 */
2085 BUG_ON(wbc->sync_mode != WB_SYNC_NONE);
2086 redirty_page_for_writepage(wbc, page); 2091 redirty_page_for_writepage(wbc, page);
2087 unlock_page(page); 2092 unlock_page(page);
2088 return 0; 2093 return 0;
@@ -2097,7 +2102,6 @@ static int ext4_da_writepage(struct page *page,
2097 return ret; 2102 return ret;
2098} 2103}
2099 2104
2100
2101/* 2105/*
2102 * For now just follow the DIO way to estimate the max credits 2106 * For now just follow the DIO way to estimate the max credits
2103 * needed to write out EXT4_MAX_WRITEBACK_PAGES. 2107 * needed to write out EXT4_MAX_WRITEBACK_PAGES.
@@ -2130,7 +2134,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2130 return 0; 2134 return 0;
2131 2135
2132 /* 2136 /*
2133 * Estimate the worse case needed credits to write out 2137 * Estimate the worse case needed credits to write out
2134 * EXT4_MAX_BUF_BLOCKS pages 2138 * EXT4_MAX_BUF_BLOCKS pages
2135 */ 2139 */
2136 needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; 2140 needed_blocks = EXT4_MAX_WRITEBACK_CREDITS;
@@ -2152,6 +2156,19 @@ static int ext4_da_writepages(struct address_space *mapping,
2152 ret = PTR_ERR(handle); 2156 ret = PTR_ERR(handle);
2153 goto out_writepages; 2157 goto out_writepages;
2154 } 2158 }
2159 if (ext4_should_order_data(inode)) {
2160 /*
2161 * With ordered mode we need to add
2162 * the inode to the journal handle
2163 * when we do block allocation.
2164 */
2165 ret = ext4_jbd2_file_inode(handle, inode);
2166 if (ret) {
2167 ext4_journal_stop(handle);
2168 goto out_writepages;
2169 }
2170
2171 }
2155 /* 2172 /*
2156 * set the max dirty pages could be write at a time 2173 * set the max dirty pages could be write at a time
2157 * to fit into the reserved transaction credits 2174 * to fit into the reserved transaction credits
@@ -2735,7 +2752,10 @@ static const struct address_space_operations ext4_da_aops = {
2735 2752
2736void ext4_set_aops(struct inode *inode) 2753void ext4_set_aops(struct inode *inode)
2737{ 2754{
2738 if (ext4_should_order_data(inode)) 2755 if (ext4_should_order_data(inode) &&
2756 test_opt(inode->i_sb, DELALLOC))
2757 inode->i_mapping->a_ops = &ext4_da_aops;
2758 else if (ext4_should_order_data(inode))
2739 inode->i_mapping->a_ops = &ext4_ordered_aops; 2759 inode->i_mapping->a_ops = &ext4_ordered_aops;
2740 else if (ext4_should_writeback_data(inode) && 2760 else if (ext4_should_writeback_data(inode) &&
2741 test_opt(inode->i_sb, DELALLOC)) 2761 test_opt(inode->i_sb, DELALLOC))