diff options
author | Jan Kara <jack@suse.cz> | 2011-06-24 17:11:59 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2011-06-27 05:44:37 -0400 |
commit | bb189247f35688a3353545902c56290fb7d7754a (patch) | |
tree | 02f93da7f642f3e59050d1c2837a7a8a8e61b3aa | |
parent | 2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff) |
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head
returned by bh2jh(). This is caused for example by the following race:
TASK1 TASK2
journal_commit_transaction()
...
processing t_forget list
__journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
journal_try_to_free_buffers()
journal_grab_journal_head(bh)
jbd_lock_bh_state(bh)
__journal_try_to_free_buffer()
journal_put_journal_head(jh)
journal_remove_journal_head(bh);
journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not
part of any transaction and thus frees journal_head before TASK1 gets to doing
so. Note that even buffer_head can be released by try_to_free_buffers() after
journal_put_journal_head() which adds even larger opportunity for oops (but I
didn't see this happen in reality).
Fix the problem by making transactions hold their own journal_head reference
(in b_jcount). That way we don't have to remove journal_head explicitely via
journal_remove_journal_head() and instead just remove journal_head when
b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(),
[__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free
journal_head which needs modification of a few callers. Also we have to be
careful because once journal_head is removed, buffer_head might be freed as
well. So we have to get our own buffer_head reference where it matters.
Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r-- | fs/jbd/checkpoint.c | 27 | ||||
-rw-r--r-- | fs/jbd/commit.c | 46 | ||||
-rw-r--r-- | fs/jbd/journal.c | 95 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 73 | ||||
-rw-r--r-- | include/linux/jbd.h | 1 |
5 files changed, 104 insertions, 138 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index dea7503b47e8..61655a37c731 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -96,10 +96,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
96 | 96 | ||
97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
99 | /* | ||
100 | * Get our reference so that bh cannot be freed before | ||
101 | * we unlock it | ||
102 | */ | ||
103 | get_bh(bh); | ||
99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
100 | ret = __journal_remove_checkpoint(jh) + 1; | 105 | ret = __journal_remove_checkpoint(jh) + 1; |
101 | jbd_unlock_bh_state(bh); | 106 | jbd_unlock_bh_state(bh); |
102 | journal_remove_journal_head(bh); | ||
103 | BUFFER_TRACE(bh, "release"); | 107 | BUFFER_TRACE(bh, "release"); |
104 | __brelse(bh); | 108 | __brelse(bh); |
105 | } else { | 109 | } else { |
@@ -221,8 +225,8 @@ restart: | |||
221 | spin_lock(&journal->j_list_lock); | 225 | spin_lock(&journal->j_list_lock); |
222 | goto restart; | 226 | goto restart; |
223 | } | 227 | } |
228 | get_bh(bh); | ||
224 | if (buffer_locked(bh)) { | 229 | if (buffer_locked(bh)) { |
225 | get_bh(bh); | ||
226 | spin_unlock(&journal->j_list_lock); | 230 | spin_unlock(&journal->j_list_lock); |
227 | jbd_unlock_bh_state(bh); | 231 | jbd_unlock_bh_state(bh); |
228 | wait_on_buffer(bh); | 232 | wait_on_buffer(bh); |
@@ -241,7 +245,6 @@ restart: | |||
241 | */ | 245 | */ |
242 | released = __journal_remove_checkpoint(jh); | 246 | released = __journal_remove_checkpoint(jh); |
243 | jbd_unlock_bh_state(bh); | 247 | jbd_unlock_bh_state(bh); |
244 | journal_remove_journal_head(bh); | ||
245 | __brelse(bh); | 248 | __brelse(bh); |
246 | } | 249 | } |
247 | 250 | ||
@@ -305,12 +308,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
305 | ret = 1; | 308 | ret = 1; |
306 | if (unlikely(buffer_write_io_error(bh))) | 309 | if (unlikely(buffer_write_io_error(bh))) |
307 | ret = -EIO; | 310 | ret = -EIO; |
311 | get_bh(bh); | ||
308 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 312 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
309 | BUFFER_TRACE(bh, "remove from checkpoint"); | 313 | BUFFER_TRACE(bh, "remove from checkpoint"); |
310 | __journal_remove_checkpoint(jh); | 314 | __journal_remove_checkpoint(jh); |
311 | spin_unlock(&journal->j_list_lock); | 315 | spin_unlock(&journal->j_list_lock); |
312 | jbd_unlock_bh_state(bh); | 316 | jbd_unlock_bh_state(bh); |
313 | journal_remove_journal_head(bh); | ||
314 | __brelse(bh); | 317 | __brelse(bh); |
315 | } else { | 318 | } else { |
316 | /* | 319 | /* |
@@ -526,9 +529,9 @@ int cleanup_journal_tail(journal_t *journal) | |||
526 | /* | 529 | /* |
527 | * journal_clean_one_cp_list | 530 | * journal_clean_one_cp_list |
528 | * | 531 | * |
529 | * Find all the written-back checkpoint buffers in the given list and release them. | 532 | * Find all the written-back checkpoint buffers in the given list and release |
533 | * them. | ||
530 | * | 534 | * |
531 | * Called with the journal locked. | ||
532 | * Called with j_list_lock held. | 535 | * Called with j_list_lock held. |
533 | * Returns number of bufers reaped (for debug) | 536 | * Returns number of bufers reaped (for debug) |
534 | */ | 537 | */ |
@@ -635,8 +638,8 @@ out: | |||
635 | * checkpoint lists. | 638 | * checkpoint lists. |
636 | * | 639 | * |
637 | * The function returns 1 if it frees the transaction, 0 otherwise. | 640 | * The function returns 1 if it frees the transaction, 0 otherwise. |
641 | * The function can free jh and bh. | ||
638 | * | 642 | * |
639 | * This function is called with the journal locked. | ||
640 | * This function is called with j_list_lock held. | 643 | * This function is called with j_list_lock held. |
641 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | 644 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) |
642 | */ | 645 | */ |
@@ -655,13 +658,14 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
655 | } | 658 | } |
656 | journal = transaction->t_journal; | 659 | journal = transaction->t_journal; |
657 | 660 | ||
661 | JBUFFER_TRACE(jh, "removing from transaction"); | ||
658 | __buffer_unlink(jh); | 662 | __buffer_unlink(jh); |
659 | jh->b_cp_transaction = NULL; | 663 | jh->b_cp_transaction = NULL; |
664 | journal_put_journal_head(jh); | ||
660 | 665 | ||
661 | if (transaction->t_checkpoint_list != NULL || | 666 | if (transaction->t_checkpoint_list != NULL || |
662 | transaction->t_checkpoint_io_list != NULL) | 667 | transaction->t_checkpoint_io_list != NULL) |
663 | goto out; | 668 | goto out; |
664 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | ||
665 | 669 | ||
666 | /* | 670 | /* |
667 | * There is one special case to worry about: if we have just pulled the | 671 | * There is one special case to worry about: if we have just pulled the |
@@ -672,10 +676,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
672 | * The locking here around t_state is a bit sleazy. | 676 | * The locking here around t_state is a bit sleazy. |
673 | * See the comment at the end of journal_commit_transaction(). | 677 | * See the comment at the end of journal_commit_transaction(). |
674 | */ | 678 | */ |
675 | if (transaction->t_state != T_FINISHED) { | 679 | if (transaction->t_state != T_FINISHED) |
676 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); | ||
677 | goto out; | 680 | goto out; |
678 | } | ||
679 | 681 | ||
680 | /* OK, that was the last buffer for the transaction: we can now | 682 | /* OK, that was the last buffer for the transaction: we can now |
681 | safely remove this transaction from the log */ | 683 | safely remove this transaction from the log */ |
@@ -687,7 +689,6 @@ int __journal_remove_checkpoint(struct journal_head *jh) | |||
687 | wake_up(&journal->j_wait_logspace); | 689 | wake_up(&journal->j_wait_logspace); |
688 | ret = 1; | 690 | ret = 1; |
689 | out: | 691 | out: |
690 | JBUFFER_TRACE(jh, "exit"); | ||
691 | return ret; | 692 | return ret; |
692 | } | 693 | } |
693 | 694 | ||
@@ -706,6 +707,8 @@ void __journal_insert_checkpoint(struct journal_head *jh, | |||
706 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); | 707 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); |
707 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); | 708 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
708 | 709 | ||
710 | /* Get reference for checkpointing transaction */ | ||
711 | journal_grab_journal_head(jh2bh(jh)); | ||
709 | jh->b_cp_transaction = transaction; | 712 | jh->b_cp_transaction = transaction; |
710 | 713 | ||
711 | if (!transaction->t_checkpoint_list) { | 714 | if (!transaction->t_checkpoint_list) { |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index eedd201374a8..8799207df058 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -258,10 +258,6 @@ write_out_data: | |||
258 | jbd_unlock_bh_state(bh); | 258 | jbd_unlock_bh_state(bh); |
259 | if (locked) | 259 | if (locked) |
260 | unlock_buffer(bh); | 260 | unlock_buffer(bh); |
261 | journal_remove_journal_head(bh); | ||
262 | /* One for our safety reference, other for | ||
263 | * journal_remove_journal_head() */ | ||
264 | put_bh(bh); | ||
265 | release_data_buffer(bh); | 261 | release_data_buffer(bh); |
266 | } | 262 | } |
267 | 263 | ||
@@ -455,14 +451,9 @@ void journal_commit_transaction(journal_t *journal) | |||
455 | } | 451 | } |
456 | if (buffer_jbd(bh) && bh2jh(bh) == jh && | 452 | if (buffer_jbd(bh) && bh2jh(bh) == jh && |
457 | jh->b_transaction == commit_transaction && | 453 | jh->b_transaction == commit_transaction && |
458 | jh->b_jlist == BJ_Locked) { | 454 | jh->b_jlist == BJ_Locked) |
459 | __journal_unfile_buffer(jh); | 455 | __journal_unfile_buffer(jh); |
460 | jbd_unlock_bh_state(bh); | 456 | jbd_unlock_bh_state(bh); |
461 | journal_remove_journal_head(bh); | ||
462 | put_bh(bh); | ||
463 | } else { | ||
464 | jbd_unlock_bh_state(bh); | ||
465 | } | ||
466 | release_data_buffer(bh); | 457 | release_data_buffer(bh); |
467 | cond_resched_lock(&journal->j_list_lock); | 458 | cond_resched_lock(&journal->j_list_lock); |
468 | } | 459 | } |
@@ -807,10 +798,16 @@ restart_loop: | |||
807 | while (commit_transaction->t_forget) { | 798 | while (commit_transaction->t_forget) { |
808 | transaction_t *cp_transaction; | 799 | transaction_t *cp_transaction; |
809 | struct buffer_head *bh; | 800 | struct buffer_head *bh; |
801 | int try_to_free = 0; | ||
810 | 802 | ||
811 | jh = commit_transaction->t_forget; | 803 | jh = commit_transaction->t_forget; |
812 | spin_unlock(&journal->j_list_lock); | 804 | spin_unlock(&journal->j_list_lock); |
813 | bh = jh2bh(jh); | 805 | bh = jh2bh(jh); |
806 | /* | ||
807 | * Get a reference so that bh cannot be freed before we are | ||
808 | * done with it. | ||
809 | */ | ||
810 | get_bh(bh); | ||
814 | jbd_lock_bh_state(bh); | 811 | jbd_lock_bh_state(bh); |
815 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || | 812 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || |
816 | jh->b_transaction == journal->j_running_transaction); | 813 | jh->b_transaction == journal->j_running_transaction); |
@@ -868,28 +865,27 @@ restart_loop: | |||
868 | __journal_insert_checkpoint(jh, commit_transaction); | 865 | __journal_insert_checkpoint(jh, commit_transaction); |
869 | if (is_journal_aborted(journal)) | 866 | if (is_journal_aborted(journal)) |
870 | clear_buffer_jbddirty(bh); | 867 | clear_buffer_jbddirty(bh); |
871 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | ||
872 | __journal_refile_buffer(jh); | ||
873 | jbd_unlock_bh_state(bh); | ||
874 | } else { | 868 | } else { |
875 | J_ASSERT_BH(bh, !buffer_dirty(bh)); | 869 | J_ASSERT_BH(bh, !buffer_dirty(bh)); |
876 | /* The buffer on BJ_Forget list and not jbddirty means | 870 | /* |
871 | * The buffer on BJ_Forget list and not jbddirty means | ||
877 | * it has been freed by this transaction and hence it | 872 | * it has been freed by this transaction and hence it |
878 | * could not have been reallocated until this | 873 | * could not have been reallocated until this |
879 | * transaction has committed. *BUT* it could be | 874 | * transaction has committed. *BUT* it could be |
880 | * reallocated once we have written all the data to | 875 | * reallocated once we have written all the data to |
881 | * disk and before we process the buffer on BJ_Forget | 876 | * disk and before we process the buffer on BJ_Forget |
882 | * list. */ | 877 | * list. |
883 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | 878 | */ |
884 | __journal_refile_buffer(jh); | 879 | if (!jh->b_next_transaction) |
885 | if (!jh->b_transaction) { | 880 | try_to_free = 1; |
886 | jbd_unlock_bh_state(bh); | ||
887 | /* needs a brelse */ | ||
888 | journal_remove_journal_head(bh); | ||
889 | release_buffer_page(bh); | ||
890 | } else | ||
891 | jbd_unlock_bh_state(bh); | ||
892 | } | 881 | } |
882 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | ||
883 | __journal_refile_buffer(jh); | ||
884 | jbd_unlock_bh_state(bh); | ||
885 | if (try_to_free) | ||
886 | release_buffer_page(bh); | ||
887 | else | ||
888 | __brelse(bh); | ||
893 | cond_resched_lock(&journal->j_list_lock); | 889 | cond_resched_lock(&journal->j_list_lock); |
894 | } | 890 | } |
895 | spin_unlock(&journal->j_list_lock); | 891 | spin_unlock(&journal->j_list_lock); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index ab019ee77888..9fe061fb8779 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1803 | * When a buffer has its BH_JBD bit set it is immune from being released by | 1803 | * When a buffer has its BH_JBD bit set it is immune from being released by |
1804 | * core kernel code, mainly via ->b_count. | 1804 | * core kernel code, mainly via ->b_count. |
1805 | * | 1805 | * |
1806 | * A journal_head may be detached from its buffer_head when the journal_head's | 1806 | * A journal_head is detached from its buffer_head when the journal_head's |
1807 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 1807 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
1808 | * Various places in JBD call journal_remove_journal_head() to indicate that the | 1808 | * transaction (b_cp_transaction) hold their references to b_jcount. |
1809 | * journal_head can be dropped if needed. | ||
1810 | * | 1809 | * |
1811 | * Various places in the kernel want to attach a journal_head to a buffer_head | 1810 | * Various places in the kernel want to attach a journal_head to a buffer_head |
1812 | * _before_ attaching the journal_head to a transaction. To protect the | 1811 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1819 | * (Attach a journal_head if needed. Increments b_jcount) | 1818 | * (Attach a journal_head if needed. Increments b_jcount) |
1820 | * struct journal_head *jh = journal_add_journal_head(bh); | 1819 | * struct journal_head *jh = journal_add_journal_head(bh); |
1821 | * ... | 1820 | * ... |
1822 | * jh->b_transaction = xxx; | 1821 | * (Get another reference for transaction) |
1823 | * journal_put_journal_head(jh); | 1822 | * journal_grab_journal_head(bh); |
1824 | * | 1823 | * jh->b_transaction = xxx; |
1825 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | 1824 | * (Put original reference) |
1826 | * because it has a non-zero b_transaction. | 1825 | * journal_put_journal_head(jh); |
1827 | */ | 1826 | */ |
1828 | 1827 | ||
1829 | /* | 1828 | /* |
1830 | * Give a buffer_head a journal_head. | 1829 | * Give a buffer_head a journal_head. |
1831 | * | 1830 | * |
1832 | * Doesn't need the journal lock. | ||
1833 | * May sleep. | 1831 | * May sleep. |
1834 | */ | 1832 | */ |
1835 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) | 1833 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) |
@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
1893 | struct journal_head *jh = bh2jh(bh); | 1891 | struct journal_head *jh = bh2jh(bh); |
1894 | 1892 | ||
1895 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 1893 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
1896 | 1894 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
1897 | get_bh(bh); | 1895 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
1898 | if (jh->b_jcount == 0) { | 1896 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
1899 | if (jh->b_transaction == NULL && | 1897 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
1900 | jh->b_next_transaction == NULL && | 1898 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
1901 | jh->b_cp_transaction == NULL) { | 1899 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
1902 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 1900 | BUFFER_TRACE(bh, "remove journal_head"); |
1903 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 1901 | if (jh->b_frozen_data) { |
1904 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 1902 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
1905 | BUFFER_TRACE(bh, "remove journal_head"); | 1903 | jbd_free(jh->b_frozen_data, bh->b_size); |
1906 | if (jh->b_frozen_data) { | ||
1907 | printk(KERN_WARNING "%s: freeing " | ||
1908 | "b_frozen_data\n", | ||
1909 | __func__); | ||
1910 | jbd_free(jh->b_frozen_data, bh->b_size); | ||
1911 | } | ||
1912 | if (jh->b_committed_data) { | ||
1913 | printk(KERN_WARNING "%s: freeing " | ||
1914 | "b_committed_data\n", | ||
1915 | __func__); | ||
1916 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1917 | } | ||
1918 | bh->b_private = NULL; | ||
1919 | jh->b_bh = NULL; /* debug, really */ | ||
1920 | clear_buffer_jbd(bh); | ||
1921 | __brelse(bh); | ||
1922 | journal_free_journal_head(jh); | ||
1923 | } else { | ||
1924 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
1925 | } | ||
1926 | } | 1904 | } |
1905 | if (jh->b_committed_data) { | ||
1906 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
1907 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1908 | } | ||
1909 | bh->b_private = NULL; | ||
1910 | jh->b_bh = NULL; /* debug, really */ | ||
1911 | clear_buffer_jbd(bh); | ||
1912 | journal_free_journal_head(jh); | ||
1927 | } | 1913 | } |
1928 | 1914 | ||
1929 | /* | 1915 | /* |
1930 | * journal_remove_journal_head(): if the buffer isn't attached to a transaction | 1916 | * Drop a reference on the passed journal_head. If it fell to zero then |
1931 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
1932 | * see that the buffer is not used by any transaction we also "logically" | ||
1933 | * decrement ->b_count. | ||
1934 | * | ||
1935 | * We in fact take an additional increment on ->b_count as a convenience, | ||
1936 | * because the caller usually wants to do additional things with the bh | ||
1937 | * after calling here. | ||
1938 | * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some | ||
1939 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
1940 | * reaping by try_to_free_buffers(). | ||
1941 | */ | ||
1942 | void journal_remove_journal_head(struct buffer_head *bh) | ||
1943 | { | ||
1944 | jbd_lock_bh_journal_head(bh); | ||
1945 | __journal_remove_journal_head(bh); | ||
1946 | jbd_unlock_bh_journal_head(bh); | ||
1947 | } | ||
1948 | |||
1949 | /* | ||
1950 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
1951 | * release the journal_head from the buffer_head. | 1917 | * release the journal_head from the buffer_head. |
1952 | */ | 1918 | */ |
1953 | void journal_put_journal_head(struct journal_head *jh) | 1919 | void journal_put_journal_head(struct journal_head *jh) |
@@ -1957,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh) | |||
1957 | jbd_lock_bh_journal_head(bh); | 1923 | jbd_lock_bh_journal_head(bh); |
1958 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 1924 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
1959 | --jh->b_jcount; | 1925 | --jh->b_jcount; |
1960 | if (!jh->b_jcount && !jh->b_transaction) { | 1926 | if (!jh->b_jcount) { |
1961 | __journal_remove_journal_head(bh); | 1927 | __journal_remove_journal_head(bh); |
1928 | jbd_unlock_bh_journal_head(bh); | ||
1962 | __brelse(bh); | 1929 | __brelse(bh); |
1963 | } | 1930 | } else |
1964 | jbd_unlock_bh_journal_head(bh); | 1931 | jbd_unlock_bh_journal_head(bh); |
1965 | } | 1932 | } |
1966 | 1933 | ||
1967 | /* | 1934 | /* |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index dc39efd05d54..7e59c6e66f9b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -696,7 +696,6 @@ repeat: | |||
696 | if (!jh->b_transaction) { | 696 | if (!jh->b_transaction) { |
697 | JBUFFER_TRACE(jh, "no transaction"); | 697 | JBUFFER_TRACE(jh, "no transaction"); |
698 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 698 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
699 | jh->b_transaction = transaction; | ||
700 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 699 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
701 | spin_lock(&journal->j_list_lock); | 700 | spin_lock(&journal->j_list_lock); |
702 | __journal_file_buffer(jh, transaction, BJ_Reserved); | 701 | __journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
818 | * committed and so it's safe to clear the dirty bit. | 817 | * committed and so it's safe to clear the dirty bit. |
819 | */ | 818 | */ |
820 | clear_buffer_dirty(jh2bh(jh)); | 819 | clear_buffer_dirty(jh2bh(jh)); |
821 | jh->b_transaction = transaction; | ||
822 | 820 | ||
823 | /* first access by this transaction */ | 821 | /* first access by this transaction */ |
824 | jh->b_modified = 0; | 822 | jh->b_modified = 0; |
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1069 | ret = -EIO; | 1067 | ret = -EIO; |
1070 | goto no_journal; | 1068 | goto no_journal; |
1071 | } | 1069 | } |
1072 | 1070 | /* We might have slept so buffer could be refiled now */ | |
1073 | if (jh->b_transaction != NULL) { | 1071 | if (jh->b_transaction != NULL && |
1072 | jh->b_transaction != handle->h_transaction) { | ||
1074 | JBUFFER_TRACE(jh, "unfile from commit"); | 1073 | JBUFFER_TRACE(jh, "unfile from commit"); |
1075 | __journal_temp_unlink_buffer(jh); | 1074 | __journal_temp_unlink_buffer(jh); |
1076 | /* It still points to the committing | 1075 | /* It still points to the committing |
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1091 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | 1090 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { |
1092 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | 1091 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); |
1093 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | 1092 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); |
1094 | __journal_temp_unlink_buffer(jh); | ||
1095 | jh->b_transaction = handle->h_transaction; | ||
1096 | JBUFFER_TRACE(jh, "file as data"); | 1093 | JBUFFER_TRACE(jh, "file as data"); |
1097 | __journal_file_buffer(jh, handle->h_transaction, | 1094 | __journal_file_buffer(jh, handle->h_transaction, |
1098 | BJ_SyncData); | 1095 | BJ_SyncData); |
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1300 | __journal_file_buffer(jh, transaction, BJ_Forget); | 1297 | __journal_file_buffer(jh, transaction, BJ_Forget); |
1301 | } else { | 1298 | } else { |
1302 | __journal_unfile_buffer(jh); | 1299 | __journal_unfile_buffer(jh); |
1303 | journal_remove_journal_head(bh); | ||
1304 | __brelse(bh); | ||
1305 | if (!buffer_jbd(bh)) { | 1300 | if (!buffer_jbd(bh)) { |
1306 | spin_unlock(&journal->j_list_lock); | 1301 | spin_unlock(&journal->j_list_lock); |
1307 | jbd_unlock_bh_state(bh); | 1302 | jbd_unlock_bh_state(bh); |
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) | |||
1622 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1617 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1623 | } | 1618 | } |
1624 | 1619 | ||
1620 | /* | ||
1621 | * Remove buffer from all transactions. | ||
1622 | * | ||
1623 | * Called with bh_state lock and j_list_lock | ||
1624 | * | ||
1625 | * jh and bh may be already freed when this function returns. | ||
1626 | */ | ||
1625 | void __journal_unfile_buffer(struct journal_head *jh) | 1627 | void __journal_unfile_buffer(struct journal_head *jh) |
1626 | { | 1628 | { |
1627 | __journal_temp_unlink_buffer(jh); | 1629 | __journal_temp_unlink_buffer(jh); |
1628 | jh->b_transaction = NULL; | 1630 | jh->b_transaction = NULL; |
1631 | journal_put_journal_head(jh); | ||
1629 | } | 1632 | } |
1630 | 1633 | ||
1631 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1634 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1632 | { | 1635 | { |
1633 | jbd_lock_bh_state(jh2bh(jh)); | 1636 | struct buffer_head *bh = jh2bh(jh); |
1637 | |||
1638 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1639 | get_bh(bh); | ||
1640 | jbd_lock_bh_state(bh); | ||
1634 | spin_lock(&journal->j_list_lock); | 1641 | spin_lock(&journal->j_list_lock); |
1635 | __journal_unfile_buffer(jh); | 1642 | __journal_unfile_buffer(jh); |
1636 | spin_unlock(&journal->j_list_lock); | 1643 | spin_unlock(&journal->j_list_lock); |
1637 | jbd_unlock_bh_state(jh2bh(jh)); | 1644 | jbd_unlock_bh_state(bh); |
1645 | __brelse(bh); | ||
1638 | } | 1646 | } |
1639 | 1647 | ||
1640 | /* | 1648 | /* |
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1661 | /* A written-back ordered data buffer */ | 1669 | /* A written-back ordered data buffer */ |
1662 | JBUFFER_TRACE(jh, "release data"); | 1670 | JBUFFER_TRACE(jh, "release data"); |
1663 | __journal_unfile_buffer(jh); | 1671 | __journal_unfile_buffer(jh); |
1664 | journal_remove_journal_head(bh); | ||
1665 | __brelse(bh); | ||
1666 | } | 1672 | } |
1667 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1673 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1668 | /* written-back checkpointed metadata buffer */ | 1674 | /* written-back checkpointed metadata buffer */ |
1669 | if (jh->b_jlist == BJ_None) { | 1675 | if (jh->b_jlist == BJ_None) { |
1670 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1676 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1671 | __journal_remove_checkpoint(jh); | 1677 | __journal_remove_checkpoint(jh); |
1672 | journal_remove_journal_head(bh); | ||
1673 | __brelse(bh); | ||
1674 | } | 1678 | } |
1675 | } | 1679 | } |
1676 | spin_unlock(&journal->j_list_lock); | 1680 | spin_unlock(&journal->j_list_lock); |
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, | |||
1733 | /* | 1737 | /* |
1734 | * We take our own ref against the journal_head here to avoid | 1738 | * We take our own ref against the journal_head here to avoid |
1735 | * having to add tons of locking around each instance of | 1739 | * having to add tons of locking around each instance of |
1736 | * journal_remove_journal_head() and journal_put_journal_head(). | 1740 | * journal_put_journal_head(). |
1737 | */ | 1741 | */ |
1738 | jh = journal_grab_journal_head(bh); | 1742 | jh = journal_grab_journal_head(bh); |
1739 | if (!jh) | 1743 | if (!jh) |
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1770 | int may_free = 1; | 1774 | int may_free = 1; |
1771 | struct buffer_head *bh = jh2bh(jh); | 1775 | struct buffer_head *bh = jh2bh(jh); |
1772 | 1776 | ||
1773 | __journal_unfile_buffer(jh); | ||
1774 | |||
1775 | if (jh->b_cp_transaction) { | 1777 | if (jh->b_cp_transaction) { |
1776 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1778 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1779 | __journal_temp_unlink_buffer(jh); | ||
1777 | /* | 1780 | /* |
1778 | * We don't want to write the buffer anymore, clear the | 1781 | * We don't want to write the buffer anymore, clear the |
1779 | * bit so that we don't confuse checks in | 1782 | * bit so that we don't confuse checks in |
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1784 | may_free = 0; | 1787 | may_free = 0; |
1785 | } else { | 1788 | } else { |
1786 | JBUFFER_TRACE(jh, "on running transaction"); | 1789 | JBUFFER_TRACE(jh, "on running transaction"); |
1787 | journal_remove_journal_head(bh); | 1790 | __journal_unfile_buffer(jh); |
1788 | __brelse(bh); | ||
1789 | } | 1791 | } |
1790 | return may_free; | 1792 | return may_free; |
1791 | } | 1793 | } |
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh, | |||
2070 | 2072 | ||
2071 | if (jh->b_transaction) | 2073 | if (jh->b_transaction) |
2072 | __journal_temp_unlink_buffer(jh); | 2074 | __journal_temp_unlink_buffer(jh); |
2075 | else | ||
2076 | journal_grab_journal_head(bh); | ||
2073 | jh->b_transaction = transaction; | 2077 | jh->b_transaction = transaction; |
2074 | 2078 | ||
2075 | switch (jlist) { | 2079 | switch (jlist) { |
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh, | |||
2127 | * already started to be used by a subsequent transaction, refile the | 2131 | * already started to be used by a subsequent transaction, refile the |
2128 | * buffer on that transaction's metadata list. | 2132 | * buffer on that transaction's metadata list. |
2129 | * | 2133 | * |
2130 | * Called under journal->j_list_lock | 2134 | * Called under j_list_lock |
2131 | * | ||
2132 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2135 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2136 | * | ||
2137 | * jh and bh may be already free when this function returns | ||
2133 | */ | 2138 | */ |
2134 | void __journal_refile_buffer(struct journal_head *jh) | 2139 | void __journal_refile_buffer(struct journal_head *jh) |
2135 | { | 2140 | { |
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2153 | 2158 | ||
2154 | was_dirty = test_clear_buffer_jbddirty(bh); | 2159 | was_dirty = test_clear_buffer_jbddirty(bh); |
2155 | __journal_temp_unlink_buffer(jh); | 2160 | __journal_temp_unlink_buffer(jh); |
2161 | /* | ||
2162 | * We set b_transaction here because b_next_transaction will inherit | ||
2163 | * our jh reference and thus __journal_file_buffer() must not take a | ||
2164 | * new one. | ||
2165 | */ | ||
2156 | jh->b_transaction = jh->b_next_transaction; | 2166 | jh->b_transaction = jh->b_next_transaction; |
2157 | jh->b_next_transaction = NULL; | 2167 | jh->b_next_transaction = NULL; |
2158 | if (buffer_freed(bh)) | 2168 | if (buffer_freed(bh)) |
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2169 | } | 2179 | } |
2170 | 2180 | ||
2171 | /* | 2181 | /* |
2172 | * For the unlocked version of this call, also make sure that any | 2182 | * __journal_refile_buffer() with necessary locking added. We take our bh |
2173 | * hanging journal_head is cleaned up if necessary. | 2183 | * reference so that we can safely unlock bh. |
2174 | * | 2184 | * |
2175 | * __journal_refile_buffer is usually called as part of a single locked | 2185 | * The jh and bh may be freed by this call. |
2176 | * operation on a buffer_head, in which the caller is probably going to | ||
2177 | * be hooking the journal_head onto other lists. In that case it is up | ||
2178 | * to the caller to remove the journal_head if necessary. For the | ||
2179 | * unlocked journal_refile_buffer call, the caller isn't going to be | ||
2180 | * doing anything else to the buffer so we need to do the cleanup | ||
2181 | * ourselves to avoid a jh leak. | ||
2182 | * | ||
2183 | * *** The journal_head may be freed by this call! *** | ||
2184 | */ | 2186 | */ |
2185 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2187 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2186 | { | 2188 | { |
2187 | struct buffer_head *bh = jh2bh(jh); | 2189 | struct buffer_head *bh = jh2bh(jh); |
2188 | 2190 | ||
2191 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2192 | get_bh(bh); | ||
2189 | jbd_lock_bh_state(bh); | 2193 | jbd_lock_bh_state(bh); |
2190 | spin_lock(&journal->j_list_lock); | 2194 | spin_lock(&journal->j_list_lock); |
2191 | |||
2192 | __journal_refile_buffer(jh); | 2195 | __journal_refile_buffer(jh); |
2193 | jbd_unlock_bh_state(bh); | 2196 | jbd_unlock_bh_state(bh); |
2194 | journal_remove_journal_head(bh); | ||
2195 | |||
2196 | spin_unlock(&journal->j_list_lock); | 2197 | spin_unlock(&journal->j_list_lock); |
2197 | __brelse(bh); | 2198 | __brelse(bh); |
2198 | } | 2199 | } |
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index e06965081ba5..e6a5e34bed4f 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *); | |||
940 | */ | 940 | */ |
941 | struct journal_head *journal_add_journal_head(struct buffer_head *bh); | 941 | struct journal_head *journal_add_journal_head(struct buffer_head *bh); |
942 | struct journal_head *journal_grab_journal_head(struct buffer_head *bh); | 942 | struct journal_head *journal_grab_journal_head(struct buffer_head *bh); |
943 | void journal_remove_journal_head(struct buffer_head *bh); | ||
944 | void journal_put_journal_head(struct journal_head *jh); | 943 | void journal_put_journal_head(struct journal_head *jh); |
945 | 944 | ||
946 | /* | 945 | /* |