aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd/commit.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-24 17:11:59 -0400
committerJan Kara <jack@suse.cz>2011-06-27 05:44:37 -0400
commitbb189247f35688a3353545902c56290fb7d7754a (patch)
tree02f93da7f642f3e59050d1c2837a7a8a8e61b3aa /fs/jbd/commit.c
parent2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff)
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 journal_commit_transaction() ... processing t_forget list __journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); journal_try_to_free_buffers() journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() journal_put_journal_head(jh) journal_remove_journal_head(bh); journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(), [__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd/commit.c')
-rw-r--r--fs/jbd/commit.c46
1 files changed, 21 insertions, 25 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index eedd201374a8..8799207df058 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -258,10 +258,6 @@ write_out_data:
258 jbd_unlock_bh_state(bh); 258 jbd_unlock_bh_state(bh);
259 if (locked) 259 if (locked)
260 unlock_buffer(bh); 260 unlock_buffer(bh);
261 journal_remove_journal_head(bh);
262 /* One for our safety reference, other for
263 * journal_remove_journal_head() */
264 put_bh(bh);
265 release_data_buffer(bh); 261 release_data_buffer(bh);
266 } 262 }
267 263
@@ -455,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
455 } 451 }
456 if (buffer_jbd(bh) && bh2jh(bh) == jh && 452 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
457 jh->b_transaction == commit_transaction && 453 jh->b_transaction == commit_transaction &&
458 jh->b_jlist == BJ_Locked) { 454 jh->b_jlist == BJ_Locked)
459 __journal_unfile_buffer(jh); 455 __journal_unfile_buffer(jh);
460 jbd_unlock_bh_state(bh); 456 jbd_unlock_bh_state(bh);
461 journal_remove_journal_head(bh);
462 put_bh(bh);
463 } else {
464 jbd_unlock_bh_state(bh);
465 }
466 release_data_buffer(bh); 457 release_data_buffer(bh);
467 cond_resched_lock(&journal->j_list_lock); 458 cond_resched_lock(&journal->j_list_lock);
468 } 459 }
@@ -807,10 +798,16 @@ restart_loop:
807 while (commit_transaction->t_forget) { 798 while (commit_transaction->t_forget) {
808 transaction_t *cp_transaction; 799 transaction_t *cp_transaction;
809 struct buffer_head *bh; 800 struct buffer_head *bh;
801 int try_to_free = 0;
810 802
811 jh = commit_transaction->t_forget; 803 jh = commit_transaction->t_forget;
812 spin_unlock(&journal->j_list_lock); 804 spin_unlock(&journal->j_list_lock);
813 bh = jh2bh(jh); 805 bh = jh2bh(jh);
806 /*
807 * Get a reference so that bh cannot be freed before we are
808 * done with it.
809 */
810 get_bh(bh);
814 jbd_lock_bh_state(bh); 811 jbd_lock_bh_state(bh);
815 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 812 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
816 jh->b_transaction == journal->j_running_transaction); 813 jh->b_transaction == journal->j_running_transaction);
@@ -868,28 +865,27 @@ restart_loop:
868 __journal_insert_checkpoint(jh, commit_transaction); 865 __journal_insert_checkpoint(jh, commit_transaction);
869 if (is_journal_aborted(journal)) 866 if (is_journal_aborted(journal))
870 clear_buffer_jbddirty(bh); 867 clear_buffer_jbddirty(bh);
871 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
872 __journal_refile_buffer(jh);
873 jbd_unlock_bh_state(bh);
874 } else { 868 } else {
875 J_ASSERT_BH(bh, !buffer_dirty(bh)); 869 J_ASSERT_BH(bh, !buffer_dirty(bh));
876 /* The buffer on BJ_Forget list and not jbddirty means 870 /*
871 * The buffer on BJ_Forget list and not jbddirty means
877 * it has been freed by this transaction and hence it 872 * it has been freed by this transaction and hence it
878 * could not have been reallocated until this 873 * could not have been reallocated until this
879 * transaction has committed. *BUT* it could be 874 * transaction has committed. *BUT* it could be
880 * reallocated once we have written all the data to 875 * reallocated once we have written all the data to
881 * disk and before we process the buffer on BJ_Forget 876 * disk and before we process the buffer on BJ_Forget
882 * list. */ 877 * list.
883 JBUFFER_TRACE(jh, "refile or unfile freed buffer"); 878 */
884 __journal_refile_buffer(jh); 879 if (!jh->b_next_transaction)
885 if (!jh->b_transaction) { 880 try_to_free = 1;
886 jbd_unlock_bh_state(bh);
887 /* needs a brelse */
888 journal_remove_journal_head(bh);
889 release_buffer_page(bh);
890 } else
891 jbd_unlock_bh_state(bh);
892 } 881 }
882 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
883 __journal_refile_buffer(jh);
884 jbd_unlock_bh_state(bh);
885 if (try_to_free)
886 release_buffer_page(bh);
887 else
888 __brelse(bh);
893 cond_resched_lock(&journal->j_list_lock); 889 cond_resched_lock(&journal->j_list_lock);
894 } 890 }
895 spin_unlock(&journal->j_list_lock); 891 spin_unlock(&journal->j_list_lock);