aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/commit.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commitcd1aac32923a9c8adcc0ae85e33c1ca0c5855838 (patch)
tree3d55d9249ef960a7e345969404d537e36dbd9609 /fs/jbd2/commit.c
parent61628a3f3a37af2bf25daf8e26fd6b76a78c4f76 (diff)
ext4: Add ordered mode support for delalloc
This provides a new ordered mode implementation which gets rid of using buffer heads to enforce the ordering between metadata change with the related data chage. Instead, in the new ordering mode, it keeps track of all of the inodes touched by each transaction on a list, and when that transaction is committed, it flushes all of the dirty pages for those inodes. In addition, the new ordered mode reverses the lock ordering of the page lock and transaction lock, which provides easier support for delayed allocation. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r--fs/jbd2/commit.c38
1 files changed, 33 insertions, 5 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 483183d15ed5..f8b3be873226 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -22,6 +22,8 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/jiffies.h> 23#include <linux/jiffies.h>
24#include <linux/crc32.h> 24#include <linux/crc32.h>
25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
25 27
26/* 28/*
27 * Default IO end handler for temporary BJ_IO buffer_heads. 29 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -185,6 +187,27 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
185} 187}
186 188
187/* 189/*
190 * write the filemap data using writepage() address_space_operations.
191 * We don't do block allocation here even for delalloc. We don't
192 * use writepages() because with dealyed allocation we may be doing
193 * block allocation in writepages().
194 */
195static int journal_submit_inode_data_buffers(struct address_space *mapping)
196{
197 int ret;
198 struct writeback_control wbc = {
199 .sync_mode = WB_SYNC_ALL,
200 .nr_to_write = mapping->nrpages * 2,
201 .range_start = 0,
202 .range_end = i_size_read(mapping->host),
203 .for_writepages = 1,
204 };
205
206 ret = generic_writepages(mapping, &wbc);
207 return ret;
208}
209
210/*
188 * Submit all the data buffers of inode associated with the transaction to 211 * Submit all the data buffers of inode associated with the transaction to
189 * disk. 212 * disk.
190 * 213 *
@@ -192,7 +215,7 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
192 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently 215 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
193 * operate on from being released while we write out pages. 216 * operate on from being released while we write out pages.
194 */ 217 */
195static int journal_submit_inode_data_buffers(journal_t *journal, 218static int journal_submit_data_buffers(journal_t *journal,
196 transaction_t *commit_transaction) 219 transaction_t *commit_transaction)
197{ 220{
198 struct jbd2_inode *jinode; 221 struct jbd2_inode *jinode;
@@ -204,8 +227,13 @@ static int journal_submit_inode_data_buffers(journal_t *journal,
204 mapping = jinode->i_vfs_inode->i_mapping; 227 mapping = jinode->i_vfs_inode->i_mapping;
205 jinode->i_flags |= JI_COMMIT_RUNNING; 228 jinode->i_flags |= JI_COMMIT_RUNNING;
206 spin_unlock(&journal->j_list_lock); 229 spin_unlock(&journal->j_list_lock);
207 err = filemap_fdatawrite_range(mapping, 0, 230 /*
208 i_size_read(jinode->i_vfs_inode)); 231 * submit the inode data buffers. We use writepage
232 * instead of writepages. Because writepages can do
233 * block allocation with delalloc. We need to write
234 * only allocated blocks here.
235 */
236 err = journal_submit_inode_data_buffers(mapping);
209 if (!ret) 237 if (!ret)
210 ret = err; 238 ret = err;
211 spin_lock(&journal->j_list_lock); 239 spin_lock(&journal->j_list_lock);
@@ -228,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
228 struct jbd2_inode *jinode, *next_i; 256 struct jbd2_inode *jinode, *next_i;
229 int err, ret = 0; 257 int err, ret = 0;
230 258
231 /* For locking, see the comment in journal_submit_inode_data_buffers() */ 259 /* For locking, see the comment in journal_submit_data_buffers() */
232 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
233 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 261 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
234 jinode->i_flags |= JI_COMMIT_RUNNING; 262 jinode->i_flags |= JI_COMMIT_RUNNING;
@@ -431,7 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
431 * Now start flushing things to disk, in the order they appear 459 * Now start flushing things to disk, in the order they appear
432 * on the transaction lists. Data blocks go first. 460 * on the transaction lists. Data blocks go first.
433 */ 461 */
434 err = journal_submit_inode_data_buffers(journal, commit_transaction); 462 err = journal_submit_data_buffers(journal, commit_transaction);
435 if (err) 463 if (err)
436 jbd2_journal_abort(journal, err); 464 jbd2_journal_abort(journal, err);
437 465