From fd98496f467b3d26d05ab1498f41718b5ef13de5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 5 Jan 2009 21:34:13 -0500 Subject: jbd2: Add barrier not supported test to journal_wait_on_commit_record Xen doesn't report that barriers are not supported until buffer I/O is reported as completed, instead of when the buffer I/O is submitted. Add a check and a fallback codepath to journal_wait_on_commit_record() to detect this case, so that attempts to mount ext4 filesystems on LVM/devicemapper devices on Xen guests don't blow up with an "Aborting journal on device XXX"; "Remounting filesystem read-only" error. Thanks to Andreas Sundstrom for reporting this issue. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/jbd2/commit.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index ebc667bc54a8..6393fd0d804e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -168,12 +169,34 @@ static int journal_submit_commit_record(journal_t *journal, * This function along with journal_submit_commit_record * allows to write the commit record asynchronously. */ -static int journal_wait_on_commit_record(struct buffer_head *bh) +static int journal_wait_on_commit_record(journal_t *journal, + struct buffer_head *bh) { int ret = 0; +retry: clear_buffer_dirty(bh); wait_on_buffer(bh); + if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { + printk(KERN_WARNING + "JBD2: wait_on_commit_record: sync failed on %s - " + "disabling barriers\n", journal->j_devname); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + spin_unlock(&journal->j_state_lock); + + lock_buffer(bh); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + bh->b_end_io = journal_end_buffer_io_sync; + + ret = submit_bh(WRITE_SYNC, bh); + if (ret) { + unlock_buffer(bh); + return ret; + } + goto retry; + } if (unlikely(!buffer_uptodate(bh))) ret = -EIO; @@ -799,7 +822,7 @@ wait_for_iobuf: __jbd2_journal_abort_hard(journal); } if (!err && !is_journal_aborted(journal)) - err = journal_wait_on_commit_record(cbh); + err = journal_wait_on_commit_record(journal, cbh); if (err) jbd2_journal_abort(journal, err); -- cgit v1.2.2 From e07f7183a486cf9783d1f8c9d2997b5b39eeb2d4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 26 Nov 2008 01:14:26 -0500 Subject: jbd2: improve jbd2 fsync batching This patch removes the static sleep time in favor of a more self optimizing approach where we measure the average amount of time it takes to commit a transaction to disk and the ammount of time a transaction has been running. If somebody does a sync write or an fsync() traditionally we would sleep for 1 jiffies, which depending on the value of HZ could be a significant amount of time compared to how long it takes to commit a transaction to the underlying storage. With this patch instead of sleeping for a jiffie, we check to see if the amount of time this transaction has been running is less than the average commit time, and if it is we sleep for the delta using schedule_hrtimeout to give us a higher precision sleep time. This greatly benefits high end storage where you could end up sleeping for longer than it takes to commit the transaction and therefore sitting idle instead of allowing the transaction to be committed by keeping the sleep time to a minimum so you are sure to always be doing something. Signed-off-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6393fd0d804e..f22d1828ea85 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -355,6 +355,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) int flags; int err; unsigned long long blocknr; + ktime_t start_time; + u64 commit_time; char *tagp = NULL; journal_header_t *header; journal_block_tag_t *tag = NULL; @@ -481,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; + start_time = ktime_get(); commit_transaction->t_log_start = journal->j_head; wake_up(&journal->j_wait_transaction_locked); spin_unlock(&journal->j_state_lock); @@ -995,6 +998,17 @@ restart_loop: J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; + commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); + + /* + * weight the commit time higher than the average time so we don't + * react too strongly to vast changes in the commit time + */ + if (likely(journal->j_average_commit_time)) + journal->j_average_commit_time = (commit_time + + journal->j_average_commit_time*3) / 4; + else + journal->j_average_commit_time = commit_time; spin_unlock(&journal->j_state_lock); if (journal->j_commit_callback) -- cgit v1.2.2 From fb68407b0d9efba962c03f55009c797e22f024bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 6 Nov 2008 17:50:21 -0500 Subject: jbd2: Call journal commit callback without holding j_list_lock Avoid freeing the transaction in __jbd2_journal_drop_transaction() so the journal commit callback can run without holding j_list_lock, to avoid lock contention on this spinlock. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f22d1828ea85..0ad84162c425 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -363,7 +363,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int space_left = 0; int first_tag = 0; int tag_flag; - int i; + int i, to_free = 0; int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; @@ -1011,12 +1011,10 @@ restart_loop: journal->j_average_commit_time = commit_time; spin_unlock(&journal->j_state_lock); - if (journal->j_commit_callback) - journal->j_commit_callback(journal, commit_transaction); - if (commit_transaction->t_checkpoint_list == NULL && commit_transaction->t_checkpoint_io_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); + to_free = 1; } else { if (journal->j_checkpoint_transactions == NULL) { journal->j_checkpoint_transactions = commit_transaction; @@ -1035,11 +1033,16 @@ restart_loop: } spin_unlock(&journal->j_list_lock); + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, journal->j_commit_sequence, + journal->j_devname, commit_transaction->t_tid, journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); + if (to_free) + kfree(commit_transaction); wake_up(&journal->j_wait_done_commit); } -- cgit v1.2.2 From 40a1984d22294ab202f616e432bb8d3481897675 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 4 Jan 2009 19:55:57 -0500 Subject: jbd2: Submit writes to the journal using WRITE_SYNC Since we will be waiting the write of the commit record to the journal to complete in journal_submit_commit_record(), submit it using WRITE_SYNC. Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0ad84162c425..073124a29b8c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -138,7 +138,7 @@ static int journal_submit_commit_record(journal_t *journal, set_buffer_ordered(bh); barrier_done = 1; } - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); if (barrier_done) clear_buffer_ordered(bh); @@ -159,7 +159,7 @@ static int journal_submit_commit_record(journal_t *journal, lock_buffer(bh); set_buffer_uptodate(bh); clear_buffer_dirty(bh); - ret = submit_bh(WRITE, bh); + ret = submit_bh(WRITE_SYNC, bh); } *cbh = bh; return ret; -- cgit v1.2.2