aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-24 17:11:59 -0400
committerJan Kara <jack@suse.cz>2011-06-27 05:44:37 -0400
commitbb189247f35688a3353545902c56290fb7d7754a (patch)
tree02f93da7f642f3e59050d1c2837a7a8a8e61b3aa
parent2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff)
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 journal_commit_transaction() ... processing t_forget list __journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); journal_try_to_free_buffers() journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() journal_put_journal_head(jh) journal_remove_journal_head(bh); journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(), [__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r--fs/jbd/checkpoint.c27
-rw-r--r--fs/jbd/commit.c46
-rw-r--r--fs/jbd/journal.c95
-rw-r--r--fs/jbd/transaction.c73
-rw-r--r--include/linux/jbd.h1
5 files changed, 104 insertions, 138 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index dea7503b47e8..61655a37c731 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -96,10 +96,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
96 96
97 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && 97 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
99 /*
100 * Get our reference so that bh cannot be freed before
101 * we unlock it
102 */
103 get_bh(bh);
99 JBUFFER_TRACE(jh, "remove from checkpoint list"); 104 JBUFFER_TRACE(jh, "remove from checkpoint list");
100 ret = __journal_remove_checkpoint(jh) + 1; 105 ret = __journal_remove_checkpoint(jh) + 1;
101 jbd_unlock_bh_state(bh); 106 jbd_unlock_bh_state(bh);
102 journal_remove_journal_head(bh);
103 BUFFER_TRACE(bh, "release"); 107 BUFFER_TRACE(bh, "release");
104 __brelse(bh); 108 __brelse(bh);
105 } else { 109 } else {
@@ -221,8 +225,8 @@ restart:
221 spin_lock(&journal->j_list_lock); 225 spin_lock(&journal->j_list_lock);
222 goto restart; 226 goto restart;
223 } 227 }
228 get_bh(bh);
224 if (buffer_locked(bh)) { 229 if (buffer_locked(bh)) {
225 get_bh(bh);
226 spin_unlock(&journal->j_list_lock); 230 spin_unlock(&journal->j_list_lock);
227 jbd_unlock_bh_state(bh); 231 jbd_unlock_bh_state(bh);
228 wait_on_buffer(bh); 232 wait_on_buffer(bh);
@@ -241,7 +245,6 @@ restart:
241 */ 245 */
242 released = __journal_remove_checkpoint(jh); 246 released = __journal_remove_checkpoint(jh);
243 jbd_unlock_bh_state(bh); 247 jbd_unlock_bh_state(bh);
244 journal_remove_journal_head(bh);
245 __brelse(bh); 248 __brelse(bh);
246 } 249 }
247 250
@@ -305,12 +308,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
305 ret = 1; 308 ret = 1;
306 if (unlikely(buffer_write_io_error(bh))) 309 if (unlikely(buffer_write_io_error(bh)))
307 ret = -EIO; 310 ret = -EIO;
311 get_bh(bh);
308 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 312 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
309 BUFFER_TRACE(bh, "remove from checkpoint"); 313 BUFFER_TRACE(bh, "remove from checkpoint");
310 __journal_remove_checkpoint(jh); 314 __journal_remove_checkpoint(jh);
311 spin_unlock(&journal->j_list_lock); 315 spin_unlock(&journal->j_list_lock);
312 jbd_unlock_bh_state(bh); 316 jbd_unlock_bh_state(bh);
313 journal_remove_journal_head(bh);
314 __brelse(bh); 317 __brelse(bh);
315 } else { 318 } else {
316 /* 319 /*
@@ -526,9 +529,9 @@ int cleanup_journal_tail(journal_t *journal)
526/* 529/*
527 * journal_clean_one_cp_list 530 * journal_clean_one_cp_list
528 * 531 *
529 * Find all the written-back checkpoint buffers in the given list and release them. 532 * Find all the written-back checkpoint buffers in the given list and release
533 * them.
530 * 534 *
531 * Called with the journal locked.
532 * Called with j_list_lock held. 535 * Called with j_list_lock held.
533 * Returns number of bufers reaped (for debug) 536 * Returns number of bufers reaped (for debug)
534 */ 537 */
@@ -635,8 +638,8 @@ out:
635 * checkpoint lists. 638 * checkpoint lists.
636 * 639 *
637 * The function returns 1 if it frees the transaction, 0 otherwise. 640 * The function returns 1 if it frees the transaction, 0 otherwise.
641 * The function can free jh and bh.
638 * 642 *
639 * This function is called with the journal locked.
640 * This function is called with j_list_lock held. 643 * This function is called with j_list_lock held.
641 * This function is called with jbd_lock_bh_state(jh2bh(jh)) 644 * This function is called with jbd_lock_bh_state(jh2bh(jh))
642 */ 645 */
@@ -655,13 +658,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
655 } 658 }
656 journal = transaction->t_journal; 659 journal = transaction->t_journal;
657 660
661 JBUFFER_TRACE(jh, "removing from transaction");
658 __buffer_unlink(jh); 662 __buffer_unlink(jh);
659 jh->b_cp_transaction = NULL; 663 jh->b_cp_transaction = NULL;
664 journal_put_journal_head(jh);
660 665
661 if (transaction->t_checkpoint_list != NULL || 666 if (transaction->t_checkpoint_list != NULL ||
662 transaction->t_checkpoint_io_list != NULL) 667 transaction->t_checkpoint_io_list != NULL)
663 goto out; 668 goto out;
664 JBUFFER_TRACE(jh, "transaction has no more buffers");
665 669
666 /* 670 /*
667 * There is one special case to worry about: if we have just pulled the 671 * There is one special case to worry about: if we have just pulled the
@@ -672,10 +676,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
672 * The locking here around t_state is a bit sleazy. 676 * The locking here around t_state is a bit sleazy.
673 * See the comment at the end of journal_commit_transaction(). 677 * See the comment at the end of journal_commit_transaction().
674 */ 678 */
675 if (transaction->t_state != T_FINISHED) { 679 if (transaction->t_state != T_FINISHED)
676 JBUFFER_TRACE(jh, "belongs to running/committing transaction");
677 goto out; 680 goto out;
678 }
679 681
680 /* OK, that was the last buffer for the transaction: we can now 682 /* OK, that was the last buffer for the transaction: we can now
681 safely remove this transaction from the log */ 683 safely remove this transaction from the log */
@@ -687,7 +689,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
687 wake_up(&journal->j_wait_logspace); 689 wake_up(&journal->j_wait_logspace);
688 ret = 1; 690 ret = 1;
689out: 691out:
690 JBUFFER_TRACE(jh, "exit");
691 return ret; 692 return ret;
692} 693}
693 694
@@ -706,6 +707,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
706 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); 707 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
707 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 708 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
708 709
710 /* Get reference for checkpointing transaction */
711 journal_grab_journal_head(jh2bh(jh));
709 jh->b_cp_transaction = transaction; 712 jh->b_cp_transaction = transaction;
710 713
711 if (!transaction->t_checkpoint_list) { 714 if (!transaction->t_checkpoint_list) {
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index eedd201374a8..8799207df058 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -258,10 +258,6 @@ write_out_data:
258 jbd_unlock_bh_state(bh); 258 jbd_unlock_bh_state(bh);
259 if (locked) 259 if (locked)
260 unlock_buffer(bh); 260 unlock_buffer(bh);
261 journal_remove_journal_head(bh);
262 /* One for our safety reference, other for
263 * journal_remove_journal_head() */
264 put_bh(bh);
265 release_data_buffer(bh); 261 release_data_buffer(bh);
266 } 262 }
267 263
@@ -455,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
455 } 451 }
456 if (buffer_jbd(bh) && bh2jh(bh) == jh && 452 if (buffer_jbd(bh) && bh2jh(bh) == jh &&
457 jh->b_transaction == commit_transaction && 453 jh->b_transaction == commit_transaction &&
458 jh->b_jlist == BJ_Locked) { 454 jh->b_jlist == BJ_Locked)
459 __journal_unfile_buffer(jh); 455 __journal_unfile_buffer(jh);
460 jbd_unlock_bh_state(bh); 456 jbd_unlock_bh_state(bh);
461 journal_remove_journal_head(bh);
462 put_bh(bh);
463 } else {
464 jbd_unlock_bh_state(bh);
465 }
466 release_data_buffer(bh); 457 release_data_buffer(bh);
467 cond_resched_lock(&journal->j_list_lock); 458 cond_resched_lock(&journal->j_list_lock);
468 } 459 }
@@ -807,10 +798,16 @@ restart_loop:
807 while (commit_transaction->t_forget) { 798 while (commit_transaction->t_forget) {
808 transaction_t *cp_transaction; 799 transaction_t *cp_transaction;
809 struct buffer_head *bh; 800 struct buffer_head *bh;
801 int try_to_free = 0;
810 802
811 jh = commit_transaction->t_forget; 803 jh = commit_transaction->t_forget;
812 spin_unlock(&journal->j_list_lock); 804 spin_unlock(&journal->j_list_lock);
813 bh = jh2bh(jh); 805 bh = jh2bh(jh);
806 /*
807 * Get a reference so that bh cannot be freed before we are
808 * done with it.
809 */
810 get_bh(bh);
814 jbd_lock_bh_state(bh); 811 jbd_lock_bh_state(bh);
815 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 812 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
816 jh->b_transaction == journal->j_running_transaction); 813 jh->b_transaction == journal->j_running_transaction);
@@ -868,28 +865,27 @@ restart_loop:
868 __journal_insert_checkpoint(jh, commit_transaction); 865 __journal_insert_checkpoint(jh, commit_transaction);
869 if (is_journal_aborted(journal)) 866 if (is_journal_aborted(journal))
870 clear_buffer_jbddirty(bh); 867 clear_buffer_jbddirty(bh);
871 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
872 __journal_refile_buffer(jh);
873 jbd_unlock_bh_state(bh);
874 } else { 868 } else {
875 J_ASSERT_BH(bh, !buffer_dirty(bh)); 869 J_ASSERT_BH(bh, !buffer_dirty(bh));
876 /* The buffer on BJ_Forget list and not jbddirty means 870 /*
871 * The buffer on BJ_Forget list and not jbddirty means
877 * it has been freed by this transaction and hence it 872 * it has been freed by this transaction and hence it
878 * could not have been reallocated until this 873 * could not have been reallocated until this
879 * transaction has committed. *BUT* it could be 874 * transaction has committed. *BUT* it could be
880 * reallocated once we have written all the data to 875 * reallocated once we have written all the data to
881 * disk and before we process the buffer on BJ_Forget 876 * disk and before we process the buffer on BJ_Forget
882 * list. */ 877 * list.
883 JBUFFER_TRACE(jh, "refile or unfile freed buffer"); 878 */
884 __journal_refile_buffer(jh); 879 if (!jh->b_next_transaction)
885 if (!jh->b_transaction) { 880 try_to_free = 1;
886 jbd_unlock_bh_state(bh);
887 /* needs a brelse */
888 journal_remove_journal_head(bh);
889 release_buffer_page(bh);
890 } else
891 jbd_unlock_bh_state(bh);
892 } 881 }
882 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
883 __journal_refile_buffer(jh);
884 jbd_unlock_bh_state(bh);
885 if (try_to_free)
886 release_buffer_page(bh);
887 else
888 __brelse(bh);
893 cond_resched_lock(&journal->j_list_lock); 889 cond_resched_lock(&journal->j_list_lock);
894 } 890 }
895 spin_unlock(&journal->j_list_lock); 891 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index ab019ee77888..9fe061fb8779 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
1803 * When a buffer has its BH_JBD bit set it is immune from being released by 1803 * When a buffer has its BH_JBD bit set it is immune from being released by
1804 * core kernel code, mainly via ->b_count. 1804 * core kernel code, mainly via ->b_count.
1805 * 1805 *
1806 * A journal_head may be detached from its buffer_head when the journal_head's 1806 * A journal_head is detached from its buffer_head when the journal_head's
1807 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 1807 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
1808 * Various places in JBD call journal_remove_journal_head() to indicate that the 1808 * transaction (b_cp_transaction) hold their references to b_jcount.
1809 * journal_head can be dropped if needed.
1810 * 1809 *
1811 * Various places in the kernel want to attach a journal_head to a buffer_head 1810 * Various places in the kernel want to attach a journal_head to a buffer_head
1812 * _before_ attaching the journal_head to a transaction. To protect the 1811 * _before_ attaching the journal_head to a transaction. To protect the
@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
1819 * (Attach a journal_head if needed. Increments b_jcount) 1818 * (Attach a journal_head if needed. Increments b_jcount)
1820 * struct journal_head *jh = journal_add_journal_head(bh); 1819 * struct journal_head *jh = journal_add_journal_head(bh);
1821 * ... 1820 * ...
1822 * jh->b_transaction = xxx; 1821 * (Get another reference for transaction)
1823 * journal_put_journal_head(jh); 1822 * journal_grab_journal_head(bh);
1824 * 1823 * jh->b_transaction = xxx;
1825 * Now, the journal_head's b_jcount is zero, but it is safe from being released 1824 * (Put original reference)
1826 * because it has a non-zero b_transaction. 1825 * journal_put_journal_head(jh);
1827 */ 1826 */
1828 1827
1829/* 1828/*
1830 * Give a buffer_head a journal_head. 1829 * Give a buffer_head a journal_head.
1831 * 1830 *
1832 * Doesn't need the journal lock.
1833 * May sleep. 1831 * May sleep.
1834 */ 1832 */
1835struct journal_head *journal_add_journal_head(struct buffer_head *bh) 1833struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1893 struct journal_head *jh = bh2jh(bh); 1891 struct journal_head *jh = bh2jh(bh);
1894 1892
1895 J_ASSERT_JH(jh, jh->b_jcount >= 0); 1893 J_ASSERT_JH(jh, jh->b_jcount >= 0);
1896 1894 J_ASSERT_JH(jh, jh->b_transaction == NULL);
1897 get_bh(bh); 1895 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1898 if (jh->b_jcount == 0) { 1896 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
1899 if (jh->b_transaction == NULL && 1897 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
1900 jh->b_next_transaction == NULL && 1898 J_ASSERT_BH(bh, buffer_jbd(bh));
1901 jh->b_cp_transaction == NULL) { 1899 J_ASSERT_BH(bh, jh2bh(jh) == bh);
1902 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 1900 BUFFER_TRACE(bh, "remove journal_head");
1903 J_ASSERT_BH(bh, buffer_jbd(bh)); 1901 if (jh->b_frozen_data) {
1904 J_ASSERT_BH(bh, jh2bh(jh) == bh); 1902 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
1905 BUFFER_TRACE(bh, "remove journal_head"); 1903 jbd_free(jh->b_frozen_data, bh->b_size);
1906 if (jh->b_frozen_data) {
1907 printk(KERN_WARNING "%s: freeing "
1908 "b_frozen_data\n",
1909 __func__);
1910 jbd_free(jh->b_frozen_data, bh->b_size);
1911 }
1912 if (jh->b_committed_data) {
1913 printk(KERN_WARNING "%s: freeing "
1914 "b_committed_data\n",
1915 __func__);
1916 jbd_free(jh->b_committed_data, bh->b_size);
1917 }
1918 bh->b_private = NULL;
1919 jh->b_bh = NULL; /* debug, really */
1920 clear_buffer_jbd(bh);
1921 __brelse(bh);
1922 journal_free_journal_head(jh);
1923 } else {
1924 BUFFER_TRACE(bh, "journal_head was locked");
1925 }
1926 } 1904 }
1905 if (jh->b_committed_data) {
1906 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
1907 jbd_free(jh->b_committed_data, bh->b_size);
1908 }
1909 bh->b_private = NULL;
1910 jh->b_bh = NULL; /* debug, really */
1911 clear_buffer_jbd(bh);
1912 journal_free_journal_head(jh);
1927} 1913}
1928 1914
1929/* 1915/*
1930 * journal_remove_journal_head(): if the buffer isn't attached to a transaction 1916 * Drop a reference on the passed journal_head. If it fell to zero then
1931 * and has a zero b_jcount then remove and release its journal_head. If we did
1932 * see that the buffer is not used by any transaction we also "logically"
1933 * decrement ->b_count.
1934 *
1935 * We in fact take an additional increment on ->b_count as a convenience,
1936 * because the caller usually wants to do additional things with the bh
1937 * after calling here.
1938 * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
1939 * time. Once the caller has run __brelse(), the buffer is eligible for
1940 * reaping by try_to_free_buffers().
1941 */
1942void journal_remove_journal_head(struct buffer_head *bh)
1943{
1944 jbd_lock_bh_journal_head(bh);
1945 __journal_remove_journal_head(bh);
1946 jbd_unlock_bh_journal_head(bh);
1947}
1948
1949/*
1950 * Drop a reference on the passed journal_head. If it fell to zero then try to
1951 * release the journal_head from the buffer_head. 1917 * release the journal_head from the buffer_head.
1952 */ 1918 */
1953void journal_put_journal_head(struct journal_head *jh) 1919void journal_put_journal_head(struct journal_head *jh)
@@ -1957,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh)
1957 jbd_lock_bh_journal_head(bh); 1923 jbd_lock_bh_journal_head(bh);
1958 J_ASSERT_JH(jh, jh->b_jcount > 0); 1924 J_ASSERT_JH(jh, jh->b_jcount > 0);
1959 --jh->b_jcount; 1925 --jh->b_jcount;
1960 if (!jh->b_jcount && !jh->b_transaction) { 1926 if (!jh->b_jcount) {
1961 __journal_remove_journal_head(bh); 1927 __journal_remove_journal_head(bh);
1928 jbd_unlock_bh_journal_head(bh);
1962 __brelse(bh); 1929 __brelse(bh);
1963 } 1930 } else
1964 jbd_unlock_bh_journal_head(bh); 1931 jbd_unlock_bh_journal_head(bh);
1965} 1932}
1966 1933
1967/* 1934/*
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index dc39efd05d54..7e59c6e66f9b 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -696,7 +696,6 @@ repeat:
696 if (!jh->b_transaction) { 696 if (!jh->b_transaction) {
697 JBUFFER_TRACE(jh, "no transaction"); 697 JBUFFER_TRACE(jh, "no transaction");
698 J_ASSERT_JH(jh, !jh->b_next_transaction); 698 J_ASSERT_JH(jh, !jh->b_next_transaction);
699 jh->b_transaction = transaction;
700 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 699 JBUFFER_TRACE(jh, "file as BJ_Reserved");
701 spin_lock(&journal->j_list_lock); 700 spin_lock(&journal->j_list_lock);
702 __journal_file_buffer(jh, transaction, BJ_Reserved); 701 __journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
818 * committed and so it's safe to clear the dirty bit. 817 * committed and so it's safe to clear the dirty bit.
819 */ 818 */
820 clear_buffer_dirty(jh2bh(jh)); 819 clear_buffer_dirty(jh2bh(jh));
821 jh->b_transaction = transaction;
822 820
823 /* first access by this transaction */ 821 /* first access by this transaction */
824 jh->b_modified = 0; 822 jh->b_modified = 0;
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1069 ret = -EIO; 1067 ret = -EIO;
1070 goto no_journal; 1068 goto no_journal;
1071 } 1069 }
1072 1070 /* We might have slept so buffer could be refiled now */
1073 if (jh->b_transaction != NULL) { 1071 if (jh->b_transaction != NULL &&
1072 jh->b_transaction != handle->h_transaction) {
1074 JBUFFER_TRACE(jh, "unfile from commit"); 1073 JBUFFER_TRACE(jh, "unfile from commit");
1075 __journal_temp_unlink_buffer(jh); 1074 __journal_temp_unlink_buffer(jh);
1076 /* It still points to the committing 1075 /* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1091 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { 1090 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1092 JBUFFER_TRACE(jh, "not on correct data list: unfile"); 1091 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1093 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); 1092 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1094 __journal_temp_unlink_buffer(jh);
1095 jh->b_transaction = handle->h_transaction;
1096 JBUFFER_TRACE(jh, "file as data"); 1093 JBUFFER_TRACE(jh, "file as data");
1097 __journal_file_buffer(jh, handle->h_transaction, 1094 __journal_file_buffer(jh, handle->h_transaction,
1098 BJ_SyncData); 1095 BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1300 __journal_file_buffer(jh, transaction, BJ_Forget); 1297 __journal_file_buffer(jh, transaction, BJ_Forget);
1301 } else { 1298 } else {
1302 __journal_unfile_buffer(jh); 1299 __journal_unfile_buffer(jh);
1303 journal_remove_journal_head(bh);
1304 __brelse(bh);
1305 if (!buffer_jbd(bh)) { 1300 if (!buffer_jbd(bh)) {
1306 spin_unlock(&journal->j_list_lock); 1301 spin_unlock(&journal->j_list_lock);
1307 jbd_unlock_bh_state(bh); 1302 jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
1622 mark_buffer_dirty(bh); /* Expose it to the VM */ 1617 mark_buffer_dirty(bh); /* Expose it to the VM */
1623} 1618}
1624 1619
1620/*
1621 * Remove buffer from all transactions.
1622 *
1623 * Called with bh_state lock and j_list_lock
1624 *
1625 * jh and bh may be already freed when this function returns.
1626 */
1625void __journal_unfile_buffer(struct journal_head *jh) 1627void __journal_unfile_buffer(struct journal_head *jh)
1626{ 1628{
1627 __journal_temp_unlink_buffer(jh); 1629 __journal_temp_unlink_buffer(jh);
1628 jh->b_transaction = NULL; 1630 jh->b_transaction = NULL;
1631 journal_put_journal_head(jh);
1629} 1632}
1630 1633
1631void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1634void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1632{ 1635{
1633 jbd_lock_bh_state(jh2bh(jh)); 1636 struct buffer_head *bh = jh2bh(jh);
1637
1638 /* Get reference so that buffer cannot be freed before we unlock it */
1639 get_bh(bh);
1640 jbd_lock_bh_state(bh);
1634 spin_lock(&journal->j_list_lock); 1641 spin_lock(&journal->j_list_lock);
1635 __journal_unfile_buffer(jh); 1642 __journal_unfile_buffer(jh);
1636 spin_unlock(&journal->j_list_lock); 1643 spin_unlock(&journal->j_list_lock);
1637 jbd_unlock_bh_state(jh2bh(jh)); 1644 jbd_unlock_bh_state(bh);
1645 __brelse(bh);
1638} 1646}
1639 1647
1640/* 1648/*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1661 /* A written-back ordered data buffer */ 1669 /* A written-back ordered data buffer */
1662 JBUFFER_TRACE(jh, "release data"); 1670 JBUFFER_TRACE(jh, "release data");
1663 __journal_unfile_buffer(jh); 1671 __journal_unfile_buffer(jh);
1664 journal_remove_journal_head(bh);
1665 __brelse(bh);
1666 } 1672 }
1667 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1673 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1668 /* written-back checkpointed metadata buffer */ 1674 /* written-back checkpointed metadata buffer */
1669 if (jh->b_jlist == BJ_None) { 1675 if (jh->b_jlist == BJ_None) {
1670 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1676 JBUFFER_TRACE(jh, "remove from checkpoint list");
1671 __journal_remove_checkpoint(jh); 1677 __journal_remove_checkpoint(jh);
1672 journal_remove_journal_head(bh);
1673 __brelse(bh);
1674 } 1678 }
1675 } 1679 }
1676 spin_unlock(&journal->j_list_lock); 1680 spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
1733 /* 1737 /*
1734 * We take our own ref against the journal_head here to avoid 1738 * We take our own ref against the journal_head here to avoid
1735 * having to add tons of locking around each instance of 1739 * having to add tons of locking around each instance of
1736 * journal_remove_journal_head() and journal_put_journal_head(). 1740 * journal_put_journal_head().
1737 */ 1741 */
1738 jh = journal_grab_journal_head(bh); 1742 jh = journal_grab_journal_head(bh);
1739 if (!jh) 1743 if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1770 int may_free = 1; 1774 int may_free = 1;
1771 struct buffer_head *bh = jh2bh(jh); 1775 struct buffer_head *bh = jh2bh(jh);
1772 1776
1773 __journal_unfile_buffer(jh);
1774
1775 if (jh->b_cp_transaction) { 1777 if (jh->b_cp_transaction) {
1776 JBUFFER_TRACE(jh, "on running+cp transaction"); 1778 JBUFFER_TRACE(jh, "on running+cp transaction");
1779 __journal_temp_unlink_buffer(jh);
1777 /* 1780 /*
1778 * We don't want to write the buffer anymore, clear the 1781 * We don't want to write the buffer anymore, clear the
1779 * bit so that we don't confuse checks in 1782 * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1784 may_free = 0; 1787 may_free = 0;
1785 } else { 1788 } else {
1786 JBUFFER_TRACE(jh, "on running transaction"); 1789 JBUFFER_TRACE(jh, "on running transaction");
1787 journal_remove_journal_head(bh); 1790 __journal_unfile_buffer(jh);
1788 __brelse(bh);
1789 } 1791 }
1790 return may_free; 1792 return may_free;
1791} 1793}
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
2070 2072
2071 if (jh->b_transaction) 2073 if (jh->b_transaction)
2072 __journal_temp_unlink_buffer(jh); 2074 __journal_temp_unlink_buffer(jh);
2075 else
2076 journal_grab_journal_head(bh);
2073 jh->b_transaction = transaction; 2077 jh->b_transaction = transaction;
2074 2078
2075 switch (jlist) { 2079 switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
2127 * already started to be used by a subsequent transaction, refile the 2131 * already started to be used by a subsequent transaction, refile the
2128 * buffer on that transaction's metadata list. 2132 * buffer on that transaction's metadata list.
2129 * 2133 *
2130 * Called under journal->j_list_lock 2134 * Called under j_list_lock
2131 *
2132 * Called under jbd_lock_bh_state(jh2bh(jh)) 2135 * Called under jbd_lock_bh_state(jh2bh(jh))
2136 *
2137 * jh and bh may be already free when this function returns
2133 */ 2138 */
2134void __journal_refile_buffer(struct journal_head *jh) 2139void __journal_refile_buffer(struct journal_head *jh)
2135{ 2140{
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
2153 2158
2154 was_dirty = test_clear_buffer_jbddirty(bh); 2159 was_dirty = test_clear_buffer_jbddirty(bh);
2155 __journal_temp_unlink_buffer(jh); 2160 __journal_temp_unlink_buffer(jh);
2161 /*
2162 * We set b_transaction here because b_next_transaction will inherit
2163 * our jh reference and thus __journal_file_buffer() must not take a
2164 * new one.
2165 */
2156 jh->b_transaction = jh->b_next_transaction; 2166 jh->b_transaction = jh->b_next_transaction;
2157 jh->b_next_transaction = NULL; 2167 jh->b_next_transaction = NULL;
2158 if (buffer_freed(bh)) 2168 if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
2169} 2179}
2170 2180
2171/* 2181/*
2172 * For the unlocked version of this call, also make sure that any 2182 * __journal_refile_buffer() with necessary locking added. We take our bh
2173 * hanging journal_head is cleaned up if necessary. 2183 * reference so that we can safely unlock bh.
2174 * 2184 *
2175 * __journal_refile_buffer is usually called as part of a single locked 2185 * The jh and bh may be freed by this call.
2176 * operation on a buffer_head, in which the caller is probably going to
2177 * be hooking the journal_head onto other lists. In that case it is up
2178 * to the caller to remove the journal_head if necessary. For the
2179 * unlocked journal_refile_buffer call, the caller isn't going to be
2180 * doing anything else to the buffer so we need to do the cleanup
2181 * ourselves to avoid a jh leak.
2182 *
2183 * *** The journal_head may be freed by this call! ***
2184 */ 2186 */
2185void journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2187void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2186{ 2188{
2187 struct buffer_head *bh = jh2bh(jh); 2189 struct buffer_head *bh = jh2bh(jh);
2188 2190
2191 /* Get reference so that buffer cannot be freed before we unlock it */
2192 get_bh(bh);
2189 jbd_lock_bh_state(bh); 2193 jbd_lock_bh_state(bh);
2190 spin_lock(&journal->j_list_lock); 2194 spin_lock(&journal->j_list_lock);
2191
2192 __journal_refile_buffer(jh); 2195 __journal_refile_buffer(jh);
2193 jbd_unlock_bh_state(bh); 2196 jbd_unlock_bh_state(bh);
2194 journal_remove_journal_head(bh);
2195
2196 spin_unlock(&journal->j_list_lock); 2197 spin_unlock(&journal->j_list_lock);
2197 __brelse(bh); 2198 __brelse(bh);
2198} 2199}
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index e06965081ba5..e6a5e34bed4f 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *);
940 */ 940 */
941struct journal_head *journal_add_journal_head(struct buffer_head *bh); 941struct journal_head *journal_add_journal_head(struct buffer_head *bh);
942struct journal_head *journal_grab_journal_head(struct buffer_head *bh); 942struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
943void journal_remove_journal_head(struct buffer_head *bh);
944void journal_put_journal_head(struct journal_head *jh); 943void journal_put_journal_head(struct journal_head *jh);
945 944
946/* 945/*