diff options
author | Jan Kara <jack@suse.cz> | 2011-06-13 15:38:22 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-06-13 15:38:22 -0400 |
commit | de1b794130b130e77ffa975bb58cb843744f9ae5 (patch) | |
tree | 8c4b37582128dc36c2b7385294fba58d017ce3e8 /fs/jbd2 | |
parent | 1fb74cda1b5e9c6207225fda5ef7504e815ce0e0 (diff) |
jbd2: Fix oops in jbd2_journal_remove_journal_head()
jbd2_journal_remove_journal_head() can oops when trying to access
journal_head returned by bh2jh(). This is caused for example by the
following race:
TASK1 TASK2
jbd2_journal_commit_transaction()
...
processing t_forget list
__jbd2_journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
jbd2_journal_try_to_free_buffers()
jbd2_journal_grab_journal_head(bh)
jbd_lock_bh_state(bh)
__journal_try_to_free_buffer()
jbd2_journal_put_journal_head(jh)
jbd2_journal_remove_journal_head(bh);
jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and
buffer is not part of any transaction and thus frees journal_head
before TASK1 gets to doing so. Note that even buffer_head can be
released by try_to_free_buffers() after
jbd2_journal_put_journal_head() which adds even larger opportunity for
oops (but I didn't see this happen in reality).
Fix the problem by making transactions hold their own journal_head
reference (in b_jcount). That way we don't have to remove journal_head
explicitely via jbd2_journal_remove_journal_head() and instead just
remove journal_head when b_jcount drops to zero. The result of this is
that [__]jbd2_journal_refile_buffer(),
[__]jbd2_journal_unfile_buffer(), and
__jdb2_journal_remove_checkpoint() can free journal_head which needs
modification of a few callers. Also we have to be careful because once
journal_head is removed, buffer_head might be freed as well. So we
have to get our own buffer_head reference where it matters.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 28 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 33 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 91 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 67 |
4 files changed, 99 insertions, 120 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0a1a32..2c62c5aae82f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
97 | 97 | ||
98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && | 98 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 99 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
100 | /* | ||
101 | * Get our reference so that bh cannot be freed before | ||
102 | * we unlock it | ||
103 | */ | ||
104 | get_bh(bh); | ||
100 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 105 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
101 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 106 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
102 | jbd_unlock_bh_state(bh); | 107 | jbd_unlock_bh_state(bh); |
103 | jbd2_journal_remove_journal_head(bh); | ||
104 | BUFFER_TRACE(bh, "release"); | 108 | BUFFER_TRACE(bh, "release"); |
105 | __brelse(bh); | 109 | __brelse(bh); |
106 | } else { | 110 | } else { |
@@ -223,8 +227,8 @@ restart: | |||
223 | spin_lock(&journal->j_list_lock); | 227 | spin_lock(&journal->j_list_lock); |
224 | goto restart; | 228 | goto restart; |
225 | } | 229 | } |
230 | get_bh(bh); | ||
226 | if (buffer_locked(bh)) { | 231 | if (buffer_locked(bh)) { |
227 | atomic_inc(&bh->b_count); | ||
228 | spin_unlock(&journal->j_list_lock); | 232 | spin_unlock(&journal->j_list_lock); |
229 | jbd_unlock_bh_state(bh); | 233 | jbd_unlock_bh_state(bh); |
230 | wait_on_buffer(bh); | 234 | wait_on_buffer(bh); |
@@ -243,7 +247,6 @@ restart: | |||
243 | */ | 247 | */ |
244 | released = __jbd2_journal_remove_checkpoint(jh); | 248 | released = __jbd2_journal_remove_checkpoint(jh); |
245 | jbd_unlock_bh_state(bh); | 249 | jbd_unlock_bh_state(bh); |
246 | jbd2_journal_remove_journal_head(bh); | ||
247 | __brelse(bh); | 250 | __brelse(bh); |
248 | } | 251 | } |
249 | 252 | ||
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
284 | int ret = 0; | 287 | int ret = 0; |
285 | 288 | ||
286 | if (buffer_locked(bh)) { | 289 | if (buffer_locked(bh)) { |
287 | atomic_inc(&bh->b_count); | 290 | get_bh(bh); |
288 | spin_unlock(&journal->j_list_lock); | 291 | spin_unlock(&journal->j_list_lock); |
289 | jbd_unlock_bh_state(bh); | 292 | jbd_unlock_bh_state(bh); |
290 | wait_on_buffer(bh); | 293 | wait_on_buffer(bh); |
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
316 | ret = 1; | 319 | ret = 1; |
317 | if (unlikely(buffer_write_io_error(bh))) | 320 | if (unlikely(buffer_write_io_error(bh))) |
318 | ret = -EIO; | 321 | ret = -EIO; |
322 | get_bh(bh); | ||
319 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 323 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
320 | BUFFER_TRACE(bh, "remove from checkpoint"); | 324 | BUFFER_TRACE(bh, "remove from checkpoint"); |
321 | __jbd2_journal_remove_checkpoint(jh); | 325 | __jbd2_journal_remove_checkpoint(jh); |
322 | spin_unlock(&journal->j_list_lock); | 326 | spin_unlock(&journal->j_list_lock); |
323 | jbd_unlock_bh_state(bh); | 327 | jbd_unlock_bh_state(bh); |
324 | jbd2_journal_remove_journal_head(bh); | ||
325 | __brelse(bh); | 328 | __brelse(bh); |
326 | } else { | 329 | } else { |
327 | /* | 330 | /* |
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
554 | /* | 557 | /* |
555 | * journal_clean_one_cp_list | 558 | * journal_clean_one_cp_list |
556 | * | 559 | * |
557 | * Find all the written-back checkpoint buffers in the given list and release them. | 560 | * Find all the written-back checkpoint buffers in the given list and |
561 | * release them. | ||
558 | * | 562 | * |
559 | * Called with the journal locked. | 563 | * Called with the journal locked. |
560 | * Called with j_list_lock held. | 564 | * Called with j_list_lock held. |
@@ -663,8 +667,8 @@ out: | |||
663 | * checkpoint lists. | 667 | * checkpoint lists. |
664 | * | 668 | * |
665 | * The function returns 1 if it frees the transaction, 0 otherwise. | 669 | * The function returns 1 if it frees the transaction, 0 otherwise. |
670 | * The function can free jh and bh. | ||
666 | * | 671 | * |
667 | * This function is called with the journal locked. | ||
668 | * This function is called with j_list_lock held. | 672 | * This function is called with j_list_lock held. |
669 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) | 673 | * This function is called with jbd_lock_bh_state(jh2bh(jh)) |
670 | */ | 674 | */ |
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
684 | } | 688 | } |
685 | journal = transaction->t_journal; | 689 | journal = transaction->t_journal; |
686 | 690 | ||
691 | JBUFFER_TRACE(jh, "removing from transaction"); | ||
687 | __buffer_unlink(jh); | 692 | __buffer_unlink(jh); |
688 | jh->b_cp_transaction = NULL; | 693 | jh->b_cp_transaction = NULL; |
694 | jbd2_journal_put_journal_head(jh); | ||
689 | 695 | ||
690 | if (transaction->t_checkpoint_list != NULL || | 696 | if (transaction->t_checkpoint_list != NULL || |
691 | transaction->t_checkpoint_io_list != NULL) | 697 | transaction->t_checkpoint_io_list != NULL) |
692 | goto out; | 698 | goto out; |
693 | JBUFFER_TRACE(jh, "transaction has no more buffers"); | ||
694 | 699 | ||
695 | /* | 700 | /* |
696 | * There is one special case to worry about: if we have just pulled the | 701 | * There is one special case to worry about: if we have just pulled the |
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
701 | * The locking here around t_state is a bit sleazy. | 706 | * The locking here around t_state is a bit sleazy. |
702 | * See the comment at the end of jbd2_journal_commit_transaction(). | 707 | * See the comment at the end of jbd2_journal_commit_transaction(). |
703 | */ | 708 | */ |
704 | if (transaction->t_state != T_FINISHED) { | 709 | if (transaction->t_state != T_FINISHED) |
705 | JBUFFER_TRACE(jh, "belongs to running/committing transaction"); | ||
706 | goto out; | 710 | goto out; |
707 | } | ||
708 | 711 | ||
709 | /* OK, that was the last buffer for the transaction: we can now | 712 | /* OK, that was the last buffer for the transaction: we can now |
710 | safely remove this transaction from the log */ | 713 | safely remove this transaction from the log */ |
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
723 | wake_up(&journal->j_wait_logspace); | 726 | wake_up(&journal->j_wait_logspace); |
724 | ret = 1; | 727 | ret = 1; |
725 | out: | 728 | out: |
726 | JBUFFER_TRACE(jh, "exit"); | ||
727 | return ret; | 729 | return ret; |
728 | } | 730 | } |
729 | 731 | ||
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh, | |||
742 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); | 744 | J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); |
743 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); | 745 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
744 | 746 | ||
747 | /* Get reference for checkpointing transaction */ | ||
748 | jbd2_journal_grab_journal_head(jh2bh(jh)); | ||
745 | jh->b_cp_transaction = transaction; | 749 | jh->b_cp_transaction = transaction; |
746 | 750 | ||
747 | if (!transaction->t_checkpoint_list) { | 751 | if (!transaction->t_checkpoint_list) { |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f21cf3aaf92..eef6979821a4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -848,10 +848,16 @@ restart_loop: | |||
848 | while (commit_transaction->t_forget) { | 848 | while (commit_transaction->t_forget) { |
849 | transaction_t *cp_transaction; | 849 | transaction_t *cp_transaction; |
850 | struct buffer_head *bh; | 850 | struct buffer_head *bh; |
851 | int try_to_free = 0; | ||
851 | 852 | ||
852 | jh = commit_transaction->t_forget; | 853 | jh = commit_transaction->t_forget; |
853 | spin_unlock(&journal->j_list_lock); | 854 | spin_unlock(&journal->j_list_lock); |
854 | bh = jh2bh(jh); | 855 | bh = jh2bh(jh); |
856 | /* | ||
857 | * Get a reference so that bh cannot be freed before we are | ||
858 | * done with it. | ||
859 | */ | ||
860 | get_bh(bh); | ||
855 | jbd_lock_bh_state(bh); | 861 | jbd_lock_bh_state(bh); |
856 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); | 862 | J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); |
857 | 863 | ||
@@ -914,28 +920,27 @@ restart_loop: | |||
914 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 920 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
915 | if (is_journal_aborted(journal)) | 921 | if (is_journal_aborted(journal)) |
916 | clear_buffer_jbddirty(bh); | 922 | clear_buffer_jbddirty(bh); |
917 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | ||
918 | __jbd2_journal_refile_buffer(jh); | ||
919 | jbd_unlock_bh_state(bh); | ||
920 | } else { | 923 | } else { |
921 | J_ASSERT_BH(bh, !buffer_dirty(bh)); | 924 | J_ASSERT_BH(bh, !buffer_dirty(bh)); |
922 | /* The buffer on BJ_Forget list and not jbddirty means | 925 | /* |
926 | * The buffer on BJ_Forget list and not jbddirty means | ||
923 | * it has been freed by this transaction and hence it | 927 | * it has been freed by this transaction and hence it |
924 | * could not have been reallocated until this | 928 | * could not have been reallocated until this |
925 | * transaction has committed. *BUT* it could be | 929 | * transaction has committed. *BUT* it could be |
926 | * reallocated once we have written all the data to | 930 | * reallocated once we have written all the data to |
927 | * disk and before we process the buffer on BJ_Forget | 931 | * disk and before we process the buffer on BJ_Forget |
928 | * list. */ | 932 | * list. |
929 | JBUFFER_TRACE(jh, "refile or unfile freed buffer"); | 933 | */ |
930 | __jbd2_journal_refile_buffer(jh); | 934 | if (!jh->b_next_transaction) |
931 | if (!jh->b_transaction) { | 935 | try_to_free = 1; |
932 | jbd_unlock_bh_state(bh); | ||
933 | /* needs a brelse */ | ||
934 | jbd2_journal_remove_journal_head(bh); | ||
935 | release_buffer_page(bh); | ||
936 | } else | ||
937 | jbd_unlock_bh_state(bh); | ||
938 | } | 936 | } |
937 | JBUFFER_TRACE(jh, "refile or unfile buffer"); | ||
938 | __jbd2_journal_refile_buffer(jh); | ||
939 | jbd_unlock_bh_state(bh); | ||
940 | if (try_to_free) | ||
941 | release_buffer_page(bh); /* Drops bh reference */ | ||
942 | else | ||
943 | __brelse(bh); | ||
939 | cond_resched_lock(&journal->j_list_lock); | 944 | cond_resched_lock(&journal->j_list_lock); |
940 | } | 945 | } |
941 | spin_unlock(&journal->j_list_lock); | 946 | spin_unlock(&journal->j_list_lock); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9a7826990304..0dfa5b598e68 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -2078,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
2078 | * When a buffer has its BH_JBD bit set it is immune from being released by | 2078 | * When a buffer has its BH_JBD bit set it is immune from being released by |
2079 | * core kernel code, mainly via ->b_count. | 2079 | * core kernel code, mainly via ->b_count. |
2080 | * | 2080 | * |
2081 | * A journal_head may be detached from its buffer_head when the journal_head's | 2081 | * A journal_head is detached from its buffer_head when the journal_head's |
2082 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 2082 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
2083 | * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the | 2083 | * transaction (b_cp_transaction) hold their references to b_jcount. |
2084 | * journal_head can be dropped if needed. | ||
2085 | * | 2084 | * |
2086 | * Various places in the kernel want to attach a journal_head to a buffer_head | 2085 | * Various places in the kernel want to attach a journal_head to a buffer_head |
2087 | * _before_ attaching the journal_head to a transaction. To protect the | 2086 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -2094,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
2094 | * (Attach a journal_head if needed. Increments b_jcount) | 2093 | * (Attach a journal_head if needed. Increments b_jcount) |
2095 | * struct journal_head *jh = jbd2_journal_add_journal_head(bh); | 2094 | * struct journal_head *jh = jbd2_journal_add_journal_head(bh); |
2096 | * ... | 2095 | * ... |
2096 | * (Get another reference for transaction) | ||
2097 | * jbd2_journal_grab_journal_head(bh); | ||
2097 | * jh->b_transaction = xxx; | 2098 | * jh->b_transaction = xxx; |
2099 | * (Put original reference) | ||
2098 | * jbd2_journal_put_journal_head(jh); | 2100 | * jbd2_journal_put_journal_head(jh); |
2099 | * | ||
2100 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | ||
2101 | * because it has a non-zero b_transaction. | ||
2102 | */ | 2101 | */ |
2103 | 2102 | ||
2104 | /* | 2103 | /* |
2105 | * Give a buffer_head a journal_head. | 2104 | * Give a buffer_head a journal_head. |
2106 | * | 2105 | * |
2107 | * Doesn't need the journal lock. | ||
2108 | * May sleep. | 2106 | * May sleep. |
2109 | */ | 2107 | */ |
2110 | struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | 2108 | struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) |
@@ -2168,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
2168 | struct journal_head *jh = bh2jh(bh); | 2166 | struct journal_head *jh = bh2jh(bh); |
2169 | 2167 | ||
2170 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 2168 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
2171 | 2169 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
2172 | get_bh(bh); | 2170 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
2173 | if (jh->b_jcount == 0) { | 2171 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
2174 | if (jh->b_transaction == NULL && | 2172 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
2175 | jh->b_next_transaction == NULL && | 2173 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
2176 | jh->b_cp_transaction == NULL) { | 2174 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
2177 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 2175 | BUFFER_TRACE(bh, "remove journal_head"); |
2178 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 2176 | if (jh->b_frozen_data) { |
2179 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 2177 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
2180 | BUFFER_TRACE(bh, "remove journal_head"); | 2178 | jbd2_free(jh->b_frozen_data, bh->b_size); |
2181 | if (jh->b_frozen_data) { | ||
2182 | printk(KERN_WARNING "%s: freeing " | ||
2183 | "b_frozen_data\n", | ||
2184 | __func__); | ||
2185 | jbd2_free(jh->b_frozen_data, bh->b_size); | ||
2186 | } | ||
2187 | if (jh->b_committed_data) { | ||
2188 | printk(KERN_WARNING "%s: freeing " | ||
2189 | "b_committed_data\n", | ||
2190 | __func__); | ||
2191 | jbd2_free(jh->b_committed_data, bh->b_size); | ||
2192 | } | ||
2193 | bh->b_private = NULL; | ||
2194 | jh->b_bh = NULL; /* debug, really */ | ||
2195 | clear_buffer_jbd(bh); | ||
2196 | __brelse(bh); | ||
2197 | journal_free_journal_head(jh); | ||
2198 | } else { | ||
2199 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
2200 | } | ||
2201 | } | 2179 | } |
2180 | if (jh->b_committed_data) { | ||
2181 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
2182 | jbd2_free(jh->b_committed_data, bh->b_size); | ||
2183 | } | ||
2184 | bh->b_private = NULL; | ||
2185 | jh->b_bh = NULL; /* debug, really */ | ||
2186 | clear_buffer_jbd(bh); | ||
2187 | journal_free_journal_head(jh); | ||
2202 | } | 2188 | } |
2203 | 2189 | ||
2204 | /* | 2190 | /* |
2205 | * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction | 2191 | * Drop a reference on the passed journal_head. If it fell to zero then |
2206 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
2207 | * see that the buffer is not used by any transaction we also "logically" | ||
2208 | * decrement ->b_count. | ||
2209 | * | ||
2210 | * We in fact take an additional increment on ->b_count as a convenience, | ||
2211 | * because the caller usually wants to do additional things with the bh | ||
2212 | * after calling here. | ||
2213 | * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some | ||
2214 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
2215 | * reaping by try_to_free_buffers(). | ||
2216 | */ | ||
2217 | void jbd2_journal_remove_journal_head(struct buffer_head *bh) | ||
2218 | { | ||
2219 | jbd_lock_bh_journal_head(bh); | ||
2220 | __journal_remove_journal_head(bh); | ||
2221 | jbd_unlock_bh_journal_head(bh); | ||
2222 | } | ||
2223 | |||
2224 | /* | ||
2225 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
2226 | * release the journal_head from the buffer_head. | 2192 | * release the journal_head from the buffer_head. |
2227 | */ | 2193 | */ |
2228 | void jbd2_journal_put_journal_head(struct journal_head *jh) | 2194 | void jbd2_journal_put_journal_head(struct journal_head *jh) |
@@ -2232,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) | |||
2232 | jbd_lock_bh_journal_head(bh); | 2198 | jbd_lock_bh_journal_head(bh); |
2233 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 2199 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
2234 | --jh->b_jcount; | 2200 | --jh->b_jcount; |
2235 | if (!jh->b_jcount && !jh->b_transaction) { | 2201 | if (!jh->b_jcount) { |
2236 | __journal_remove_journal_head(bh); | 2202 | __journal_remove_journal_head(bh); |
2203 | jbd_unlock_bh_journal_head(bh); | ||
2237 | __brelse(bh); | 2204 | __brelse(bh); |
2238 | } | 2205 | } else |
2239 | jbd_unlock_bh_journal_head(bh); | 2206 | jbd_unlock_bh_journal_head(bh); |
2240 | } | 2207 | } |
2241 | 2208 | ||
2242 | /* | 2209 | /* |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 547b101049e5..2d7109414cdd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | 31 | ||
32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
33 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * jbd2_get_transaction: obtain a new transaction_t object. | 36 | * jbd2_get_transaction: obtain a new transaction_t object. |
@@ -764,7 +765,6 @@ repeat: | |||
764 | if (!jh->b_transaction) { | 765 | if (!jh->b_transaction) { |
765 | JBUFFER_TRACE(jh, "no transaction"); | 766 | JBUFFER_TRACE(jh, "no transaction"); |
766 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 767 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
767 | jh->b_transaction = transaction; | ||
768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
769 | spin_lock(&journal->j_list_lock); | 769 | spin_lock(&journal->j_list_lock); |
770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); | 770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
895 | * committed and so it's safe to clear the dirty bit. | 895 | * committed and so it's safe to clear the dirty bit. |
896 | */ | 896 | */ |
897 | clear_buffer_dirty(jh2bh(jh)); | 897 | clear_buffer_dirty(jh2bh(jh)); |
898 | jh->b_transaction = transaction; | ||
899 | |||
900 | /* first access by this transaction */ | 898 | /* first access by this transaction */ |
901 | jh->b_modified = 0; | 899 | jh->b_modified = 0; |
902 | 900 | ||
@@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1230 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1228 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
1231 | } else { | 1229 | } else { |
1232 | __jbd2_journal_unfile_buffer(jh); | 1230 | __jbd2_journal_unfile_buffer(jh); |
1233 | jbd2_journal_remove_journal_head(bh); | ||
1234 | __brelse(bh); | ||
1235 | if (!buffer_jbd(bh)) { | 1231 | if (!buffer_jbd(bh)) { |
1236 | spin_unlock(&journal->j_list_lock); | 1232 | spin_unlock(&journal->j_list_lock); |
1237 | jbd_unlock_bh_state(bh); | 1233 | jbd_unlock_bh_state(bh); |
@@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1554 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1550 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1555 | } | 1551 | } |
1556 | 1552 | ||
1557 | void __jbd2_journal_unfile_buffer(struct journal_head *jh) | 1553 | /* |
1554 | * Remove buffer from all transactions. | ||
1555 | * | ||
1556 | * Called with bh_state lock and j_list_lock | ||
1557 | * | ||
1558 | * jh and bh may be already freed when this function returns. | ||
1559 | */ | ||
1560 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh) | ||
1558 | { | 1561 | { |
1559 | __jbd2_journal_temp_unlink_buffer(jh); | 1562 | __jbd2_journal_temp_unlink_buffer(jh); |
1560 | jh->b_transaction = NULL; | 1563 | jh->b_transaction = NULL; |
1564 | jbd2_journal_put_journal_head(jh); | ||
1561 | } | 1565 | } |
1562 | 1566 | ||
1563 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1567 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1564 | { | 1568 | { |
1565 | jbd_lock_bh_state(jh2bh(jh)); | 1569 | struct buffer_head *bh = jh2bh(jh); |
1570 | |||
1571 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1572 | get_bh(bh); | ||
1573 | jbd_lock_bh_state(bh); | ||
1566 | spin_lock(&journal->j_list_lock); | 1574 | spin_lock(&journal->j_list_lock); |
1567 | __jbd2_journal_unfile_buffer(jh); | 1575 | __jbd2_journal_unfile_buffer(jh); |
1568 | spin_unlock(&journal->j_list_lock); | 1576 | spin_unlock(&journal->j_list_lock); |
1569 | jbd_unlock_bh_state(jh2bh(jh)); | 1577 | jbd_unlock_bh_state(bh); |
1578 | __brelse(bh); | ||
1570 | } | 1579 | } |
1571 | 1580 | ||
1572 | /* | 1581 | /* |
@@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1593 | if (jh->b_jlist == BJ_None) { | 1602 | if (jh->b_jlist == BJ_None) { |
1594 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1603 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1595 | __jbd2_journal_remove_checkpoint(jh); | 1604 | __jbd2_journal_remove_checkpoint(jh); |
1596 | jbd2_journal_remove_journal_head(bh); | ||
1597 | __brelse(bh); | ||
1598 | } | 1605 | } |
1599 | } | 1606 | } |
1600 | spin_unlock(&journal->j_list_lock); | 1607 | spin_unlock(&journal->j_list_lock); |
@@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
1657 | /* | 1664 | /* |
1658 | * We take our own ref against the journal_head here to avoid | 1665 | * We take our own ref against the journal_head here to avoid |
1659 | * having to add tons of locking around each instance of | 1666 | * having to add tons of locking around each instance of |
1660 | * jbd2_journal_remove_journal_head() and | ||
1661 | * jbd2_journal_put_journal_head(). | 1667 | * jbd2_journal_put_journal_head(). |
1662 | */ | 1668 | */ |
1663 | jh = jbd2_journal_grab_journal_head(bh); | 1669 | jh = jbd2_journal_grab_journal_head(bh); |
@@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1695 | int may_free = 1; | 1701 | int may_free = 1; |
1696 | struct buffer_head *bh = jh2bh(jh); | 1702 | struct buffer_head *bh = jh2bh(jh); |
1697 | 1703 | ||
1698 | __jbd2_journal_unfile_buffer(jh); | ||
1699 | |||
1700 | if (jh->b_cp_transaction) { | 1704 | if (jh->b_cp_transaction) { |
1701 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1705 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1706 | __jbd2_journal_temp_unlink_buffer(jh); | ||
1702 | /* | 1707 | /* |
1703 | * We don't want to write the buffer anymore, clear the | 1708 | * We don't want to write the buffer anymore, clear the |
1704 | * bit so that we don't confuse checks in | 1709 | * bit so that we don't confuse checks in |
@@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1709 | may_free = 0; | 1714 | may_free = 0; |
1710 | } else { | 1715 | } else { |
1711 | JBUFFER_TRACE(jh, "on running transaction"); | 1716 | JBUFFER_TRACE(jh, "on running transaction"); |
1712 | jbd2_journal_remove_journal_head(bh); | 1717 | __jbd2_journal_unfile_buffer(jh); |
1713 | __brelse(bh); | ||
1714 | } | 1718 | } |
1715 | return may_free; | 1719 | return may_free; |
1716 | } | 1720 | } |
@@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
1988 | 1992 | ||
1989 | if (jh->b_transaction) | 1993 | if (jh->b_transaction) |
1990 | __jbd2_journal_temp_unlink_buffer(jh); | 1994 | __jbd2_journal_temp_unlink_buffer(jh); |
1995 | else | ||
1996 | jbd2_journal_grab_journal_head(bh); | ||
1991 | jh->b_transaction = transaction; | 1997 | jh->b_transaction = transaction; |
1992 | 1998 | ||
1993 | switch (jlist) { | 1999 | switch (jlist) { |
@@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, | |||
2039 | * already started to be used by a subsequent transaction, refile the | 2045 | * already started to be used by a subsequent transaction, refile the |
2040 | * buffer on that transaction's metadata list. | 2046 | * buffer on that transaction's metadata list. |
2041 | * | 2047 | * |
2042 | * Called under journal->j_list_lock | 2048 | * Called under j_list_lock |
2043 | * | ||
2044 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2049 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2050 | * | ||
2051 | * jh and bh may be already free when this function returns | ||
2045 | */ | 2052 | */ |
2046 | void __jbd2_journal_refile_buffer(struct journal_head *jh) | 2053 | void __jbd2_journal_refile_buffer(struct journal_head *jh) |
2047 | { | 2054 | { |
@@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2065 | 2072 | ||
2066 | was_dirty = test_clear_buffer_jbddirty(bh); | 2073 | was_dirty = test_clear_buffer_jbddirty(bh); |
2067 | __jbd2_journal_temp_unlink_buffer(jh); | 2074 | __jbd2_journal_temp_unlink_buffer(jh); |
2075 | /* | ||
2076 | * We set b_transaction here because b_next_transaction will inherit | ||
2077 | * our jh reference and thus __jbd2_journal_file_buffer() must not | ||
2078 | * take a new one. | ||
2079 | */ | ||
2068 | jh->b_transaction = jh->b_next_transaction; | 2080 | jh->b_transaction = jh->b_next_transaction; |
2069 | jh->b_next_transaction = NULL; | 2081 | jh->b_next_transaction = NULL; |
2070 | if (buffer_freed(bh)) | 2082 | if (buffer_freed(bh)) |
@@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2081 | } | 2093 | } |
2082 | 2094 | ||
2083 | /* | 2095 | /* |
2084 | * For the unlocked version of this call, also make sure that any | 2096 | * __jbd2_journal_refile_buffer() with necessary locking added. We take our |
2085 | * hanging journal_head is cleaned up if necessary. | 2097 | * bh reference so that we can safely unlock bh. |
2086 | * | 2098 | * |
2087 | * __jbd2_journal_refile_buffer is usually called as part of a single locked | 2099 | * The jh and bh may be freed by this call. |
2088 | * operation on a buffer_head, in which the caller is probably going to | ||
2089 | * be hooking the journal_head onto other lists. In that case it is up | ||
2090 | * to the caller to remove the journal_head if necessary. For the | ||
2091 | * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be | ||
2092 | * doing anything else to the buffer so we need to do the cleanup | ||
2093 | * ourselves to avoid a jh leak. | ||
2094 | * | ||
2095 | * *** The journal_head may be freed by this call! *** | ||
2096 | */ | 2100 | */ |
2097 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2101 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2098 | { | 2102 | { |
2099 | struct buffer_head *bh = jh2bh(jh); | 2103 | struct buffer_head *bh = jh2bh(jh); |
2100 | 2104 | ||
2105 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2106 | get_bh(bh); | ||
2101 | jbd_lock_bh_state(bh); | 2107 | jbd_lock_bh_state(bh); |
2102 | spin_lock(&journal->j_list_lock); | 2108 | spin_lock(&journal->j_list_lock); |
2103 | |||
2104 | __jbd2_journal_refile_buffer(jh); | 2109 | __jbd2_journal_refile_buffer(jh); |
2105 | jbd_unlock_bh_state(bh); | 2110 | jbd_unlock_bh_state(bh); |
2106 | jbd2_journal_remove_journal_head(bh); | ||
2107 | |||
2108 | spin_unlock(&journal->j_list_lock); | 2111 | spin_unlock(&journal->j_list_lock); |
2109 | __brelse(bh); | 2112 | __brelse(bh); |
2110 | } | 2113 | } |