diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-07-11 19:27:31 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-07-11 19:27:31 -0400 |
commit | cd1aac32923a9c8adcc0ae85e33c1ca0c5855838 (patch) | |
tree | 3d55d9249ef960a7e345969404d537e36dbd9609 /fs/jbd2/commit.c | |
parent | 61628a3f3a37af2bf25daf8e26fd6b76a78c4f76 (diff) |
ext4: Add ordered mode support for delalloc
This provides a new ordered mode implementation which gets rid of using
buffer heads to enforce the ordering between metadata change with the
related data chage. Instead, in the new ordering mode, it keeps track
of all of the inodes touched by each transaction on a list, and when
that transaction is committed, it flushes all of the dirty pages for
those inodes. In addition, the new ordered mode reverses the lock
ordering of the page lock and transaction lock, which provides easier
support for delayed allocation.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r-- | fs/jbd2/commit.c | 38 |
1 files changed, 33 insertions, 5 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 483183d15ed..f8b3be87322 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
24 | #include <linux/crc32.h> | 24 | #include <linux/crc32.h> |
25 | #include <linux/writeback.h> | ||
26 | #include <linux/backing-dev.h> | ||
25 | 27 | ||
26 | /* | 28 | /* |
27 | * Default IO end handler for temporary BJ_IO buffer_heads. | 29 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -185,6 +187,27 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
185 | } | 187 | } |
186 | 188 | ||
187 | /* | 189 | /* |
190 | * write the filemap data using writepage() address_space_operations. | ||
191 | * We don't do block allocation here even for delalloc. We don't | ||
192 | * use writepages() because with dealyed allocation we may be doing | ||
193 | * block allocation in writepages(). | ||
194 | */ | ||
195 | static int journal_submit_inode_data_buffers(struct address_space *mapping) | ||
196 | { | ||
197 | int ret; | ||
198 | struct writeback_control wbc = { | ||
199 | .sync_mode = WB_SYNC_ALL, | ||
200 | .nr_to_write = mapping->nrpages * 2, | ||
201 | .range_start = 0, | ||
202 | .range_end = i_size_read(mapping->host), | ||
203 | .for_writepages = 1, | ||
204 | }; | ||
205 | |||
206 | ret = generic_writepages(mapping, &wbc); | ||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | /* | ||
188 | * Submit all the data buffers of inode associated with the transaction to | 211 | * Submit all the data buffers of inode associated with the transaction to |
189 | * disk. | 212 | * disk. |
190 | * | 213 | * |
@@ -192,7 +215,7 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
192 | * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently | 215 | * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently |
193 | * operate on from being released while we write out pages. | 216 | * operate on from being released while we write out pages. |
194 | */ | 217 | */ |
195 | static int journal_submit_inode_data_buffers(journal_t *journal, | 218 | static int journal_submit_data_buffers(journal_t *journal, |
196 | transaction_t *commit_transaction) | 219 | transaction_t *commit_transaction) |
197 | { | 220 | { |
198 | struct jbd2_inode *jinode; | 221 | struct jbd2_inode *jinode; |
@@ -204,8 +227,13 @@ static int journal_submit_inode_data_buffers(journal_t *journal, | |||
204 | mapping = jinode->i_vfs_inode->i_mapping; | 227 | mapping = jinode->i_vfs_inode->i_mapping; |
205 | jinode->i_flags |= JI_COMMIT_RUNNING; | 228 | jinode->i_flags |= JI_COMMIT_RUNNING; |
206 | spin_unlock(&journal->j_list_lock); | 229 | spin_unlock(&journal->j_list_lock); |
207 | err = filemap_fdatawrite_range(mapping, 0, | 230 | /* |
208 | i_size_read(jinode->i_vfs_inode)); | 231 | * submit the inode data buffers. We use writepage |
232 | * instead of writepages. Because writepages can do | ||
233 | * block allocation with delalloc. We need to write | ||
234 | * only allocated blocks here. | ||
235 | */ | ||
236 | err = journal_submit_inode_data_buffers(mapping); | ||
209 | if (!ret) | 237 | if (!ret) |
210 | ret = err; | 238 | ret = err; |
211 | spin_lock(&journal->j_list_lock); | 239 | spin_lock(&journal->j_list_lock); |
@@ -228,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, | |||
228 | struct jbd2_inode *jinode, *next_i; | 256 | struct jbd2_inode *jinode, *next_i; |
229 | int err, ret = 0; | 257 | int err, ret = 0; |
230 | 258 | ||
231 | /* For locking, see the comment in journal_submit_inode_data_buffers() */ | 259 | /* For locking, see the comment in journal_submit_data_buffers() */ |
232 | spin_lock(&journal->j_list_lock); | 260 | spin_lock(&journal->j_list_lock); |
233 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | 261 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
234 | jinode->i_flags |= JI_COMMIT_RUNNING; | 262 | jinode->i_flags |= JI_COMMIT_RUNNING; |
@@ -431,7 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
431 | * Now start flushing things to disk, in the order they appear | 459 | * Now start flushing things to disk, in the order they appear |
432 | * on the transaction lists. Data blocks go first. | 460 | * on the transaction lists. Data blocks go first. |
433 | */ | 461 | */ |
434 | err = journal_submit_inode_data_buffers(journal, commit_transaction); | 462 | err = journal_submit_data_buffers(journal, commit_transaction); |
435 | if (err) | 463 | if (err) |
436 | jbd2_journal_abort(journal, err); | 464 | jbd2_journal_abort(journal, err); |
437 | 465 | ||