diff options
author | Jan Kara <jack@suse.cz> | 2011-06-24 17:11:59 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2011-06-27 05:44:37 -0400 |
commit | bb189247f35688a3353545902c56290fb7d7754a (patch) | |
tree | 02f93da7f642f3e59050d1c2837a7a8a8e61b3aa /fs/jbd/transaction.c | |
parent | 2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff) |
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head
returned by bh2jh(). This is caused for example by the following race:
TASK1 TASK2
journal_commit_transaction()
...
processing t_forget list
__journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
journal_try_to_free_buffers()
journal_grab_journal_head(bh)
jbd_lock_bh_state(bh)
__journal_try_to_free_buffer()
journal_put_journal_head(jh)
journal_remove_journal_head(bh);
journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not
part of any transaction and thus frees journal_head before TASK1 gets to doing
so. Note that even buffer_head can be released by try_to_free_buffers() after
journal_put_journal_head() which adds even larger opportunity for oops (but I
didn't see this happen in reality).
Fix the problem by making transactions hold their own journal_head reference
(in b_jcount). That way we don't have to remove journal_head explicitely via
journal_remove_journal_head() and instead just remove journal_head when
b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(),
[__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free
journal_head which needs modification of a few callers. Also we have to be
careful because once journal_head is removed, buffer_head might be freed as
well. So we have to get our own buffer_head reference where it matters.
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd/transaction.c')
-rw-r--r-- | fs/jbd/transaction.c | 73 |
1 files changed, 37 insertions, 36 deletions
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index dc39efd05d54..7e59c6e66f9b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -696,7 +696,6 @@ repeat: | |||
696 | if (!jh->b_transaction) { | 696 | if (!jh->b_transaction) { |
697 | JBUFFER_TRACE(jh, "no transaction"); | 697 | JBUFFER_TRACE(jh, "no transaction"); |
698 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 698 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
699 | jh->b_transaction = transaction; | ||
700 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 699 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
701 | spin_lock(&journal->j_list_lock); | 700 | spin_lock(&journal->j_list_lock); |
702 | __journal_file_buffer(jh, transaction, BJ_Reserved); | 701 | __journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
818 | * committed and so it's safe to clear the dirty bit. | 817 | * committed and so it's safe to clear the dirty bit. |
819 | */ | 818 | */ |
820 | clear_buffer_dirty(jh2bh(jh)); | 819 | clear_buffer_dirty(jh2bh(jh)); |
821 | jh->b_transaction = transaction; | ||
822 | 820 | ||
823 | /* first access by this transaction */ | 821 | /* first access by this transaction */ |
824 | jh->b_modified = 0; | 822 | jh->b_modified = 0; |
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1069 | ret = -EIO; | 1067 | ret = -EIO; |
1070 | goto no_journal; | 1068 | goto no_journal; |
1071 | } | 1069 | } |
1072 | 1070 | /* We might have slept so buffer could be refiled now */ | |
1073 | if (jh->b_transaction != NULL) { | 1071 | if (jh->b_transaction != NULL && |
1072 | jh->b_transaction != handle->h_transaction) { | ||
1074 | JBUFFER_TRACE(jh, "unfile from commit"); | 1073 | JBUFFER_TRACE(jh, "unfile from commit"); |
1075 | __journal_temp_unlink_buffer(jh); | 1074 | __journal_temp_unlink_buffer(jh); |
1076 | /* It still points to the committing | 1075 | /* It still points to the committing |
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1091 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | 1090 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { |
1092 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | 1091 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); |
1093 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | 1092 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); |
1094 | __journal_temp_unlink_buffer(jh); | ||
1095 | jh->b_transaction = handle->h_transaction; | ||
1096 | JBUFFER_TRACE(jh, "file as data"); | 1093 | JBUFFER_TRACE(jh, "file as data"); |
1097 | __journal_file_buffer(jh, handle->h_transaction, | 1094 | __journal_file_buffer(jh, handle->h_transaction, |
1098 | BJ_SyncData); | 1095 | BJ_SyncData); |
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1300 | __journal_file_buffer(jh, transaction, BJ_Forget); | 1297 | __journal_file_buffer(jh, transaction, BJ_Forget); |
1301 | } else { | 1298 | } else { |
1302 | __journal_unfile_buffer(jh); | 1299 | __journal_unfile_buffer(jh); |
1303 | journal_remove_journal_head(bh); | ||
1304 | __brelse(bh); | ||
1305 | if (!buffer_jbd(bh)) { | 1300 | if (!buffer_jbd(bh)) { |
1306 | spin_unlock(&journal->j_list_lock); | 1301 | spin_unlock(&journal->j_list_lock); |
1307 | jbd_unlock_bh_state(bh); | 1302 | jbd_unlock_bh_state(bh); |
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) | |||
1622 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1617 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1623 | } | 1618 | } |
1624 | 1619 | ||
1620 | /* | ||
1621 | * Remove buffer from all transactions. | ||
1622 | * | ||
1623 | * Called with bh_state lock and j_list_lock | ||
1624 | * | ||
1625 | * jh and bh may be already freed when this function returns. | ||
1626 | */ | ||
1625 | void __journal_unfile_buffer(struct journal_head *jh) | 1627 | void __journal_unfile_buffer(struct journal_head *jh) |
1626 | { | 1628 | { |
1627 | __journal_temp_unlink_buffer(jh); | 1629 | __journal_temp_unlink_buffer(jh); |
1628 | jh->b_transaction = NULL; | 1630 | jh->b_transaction = NULL; |
1631 | journal_put_journal_head(jh); | ||
1629 | } | 1632 | } |
1630 | 1633 | ||
1631 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1634 | void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1632 | { | 1635 | { |
1633 | jbd_lock_bh_state(jh2bh(jh)); | 1636 | struct buffer_head *bh = jh2bh(jh); |
1637 | |||
1638 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1639 | get_bh(bh); | ||
1640 | jbd_lock_bh_state(bh); | ||
1634 | spin_lock(&journal->j_list_lock); | 1641 | spin_lock(&journal->j_list_lock); |
1635 | __journal_unfile_buffer(jh); | 1642 | __journal_unfile_buffer(jh); |
1636 | spin_unlock(&journal->j_list_lock); | 1643 | spin_unlock(&journal->j_list_lock); |
1637 | jbd_unlock_bh_state(jh2bh(jh)); | 1644 | jbd_unlock_bh_state(bh); |
1645 | __brelse(bh); | ||
1638 | } | 1646 | } |
1639 | 1647 | ||
1640 | /* | 1648 | /* |
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1661 | /* A written-back ordered data buffer */ | 1669 | /* A written-back ordered data buffer */ |
1662 | JBUFFER_TRACE(jh, "release data"); | 1670 | JBUFFER_TRACE(jh, "release data"); |
1663 | __journal_unfile_buffer(jh); | 1671 | __journal_unfile_buffer(jh); |
1664 | journal_remove_journal_head(bh); | ||
1665 | __brelse(bh); | ||
1666 | } | 1672 | } |
1667 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1673 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
1668 | /* written-back checkpointed metadata buffer */ | 1674 | /* written-back checkpointed metadata buffer */ |
1669 | if (jh->b_jlist == BJ_None) { | 1675 | if (jh->b_jlist == BJ_None) { |
1670 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1676 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1671 | __journal_remove_checkpoint(jh); | 1677 | __journal_remove_checkpoint(jh); |
1672 | journal_remove_journal_head(bh); | ||
1673 | __brelse(bh); | ||
1674 | } | 1678 | } |
1675 | } | 1679 | } |
1676 | spin_unlock(&journal->j_list_lock); | 1680 | spin_unlock(&journal->j_list_lock); |
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, | |||
1733 | /* | 1737 | /* |
1734 | * We take our own ref against the journal_head here to avoid | 1738 | * We take our own ref against the journal_head here to avoid |
1735 | * having to add tons of locking around each instance of | 1739 | * having to add tons of locking around each instance of |
1736 | * journal_remove_journal_head() and journal_put_journal_head(). | 1740 | * journal_put_journal_head(). |
1737 | */ | 1741 | */ |
1738 | jh = journal_grab_journal_head(bh); | 1742 | jh = journal_grab_journal_head(bh); |
1739 | if (!jh) | 1743 | if (!jh) |
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1770 | int may_free = 1; | 1774 | int may_free = 1; |
1771 | struct buffer_head *bh = jh2bh(jh); | 1775 | struct buffer_head *bh = jh2bh(jh); |
1772 | 1776 | ||
1773 | __journal_unfile_buffer(jh); | ||
1774 | |||
1775 | if (jh->b_cp_transaction) { | 1777 | if (jh->b_cp_transaction) { |
1776 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1778 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1779 | __journal_temp_unlink_buffer(jh); | ||
1777 | /* | 1780 | /* |
1778 | * We don't want to write the buffer anymore, clear the | 1781 | * We don't want to write the buffer anymore, clear the |
1779 | * bit so that we don't confuse checks in | 1782 | * bit so that we don't confuse checks in |
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1784 | may_free = 0; | 1787 | may_free = 0; |
1785 | } else { | 1788 | } else { |
1786 | JBUFFER_TRACE(jh, "on running transaction"); | 1789 | JBUFFER_TRACE(jh, "on running transaction"); |
1787 | journal_remove_journal_head(bh); | 1790 | __journal_unfile_buffer(jh); |
1788 | __brelse(bh); | ||
1789 | } | 1791 | } |
1790 | return may_free; | 1792 | return may_free; |
1791 | } | 1793 | } |
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh, | |||
2070 | 2072 | ||
2071 | if (jh->b_transaction) | 2073 | if (jh->b_transaction) |
2072 | __journal_temp_unlink_buffer(jh); | 2074 | __journal_temp_unlink_buffer(jh); |
2075 | else | ||
2076 | journal_grab_journal_head(bh); | ||
2073 | jh->b_transaction = transaction; | 2077 | jh->b_transaction = transaction; |
2074 | 2078 | ||
2075 | switch (jlist) { | 2079 | switch (jlist) { |
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh, | |||
2127 | * already started to be used by a subsequent transaction, refile the | 2131 | * already started to be used by a subsequent transaction, refile the |
2128 | * buffer on that transaction's metadata list. | 2132 | * buffer on that transaction's metadata list. |
2129 | * | 2133 | * |
2130 | * Called under journal->j_list_lock | 2134 | * Called under j_list_lock |
2131 | * | ||
2132 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2135 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2136 | * | ||
2137 | * jh and bh may be already free when this function returns | ||
2133 | */ | 2138 | */ |
2134 | void __journal_refile_buffer(struct journal_head *jh) | 2139 | void __journal_refile_buffer(struct journal_head *jh) |
2135 | { | 2140 | { |
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2153 | 2158 | ||
2154 | was_dirty = test_clear_buffer_jbddirty(bh); | 2159 | was_dirty = test_clear_buffer_jbddirty(bh); |
2155 | __journal_temp_unlink_buffer(jh); | 2160 | __journal_temp_unlink_buffer(jh); |
2161 | /* | ||
2162 | * We set b_transaction here because b_next_transaction will inherit | ||
2163 | * our jh reference and thus __journal_file_buffer() must not take a | ||
2164 | * new one. | ||
2165 | */ | ||
2156 | jh->b_transaction = jh->b_next_transaction; | 2166 | jh->b_transaction = jh->b_next_transaction; |
2157 | jh->b_next_transaction = NULL; | 2167 | jh->b_next_transaction = NULL; |
2158 | if (buffer_freed(bh)) | 2168 | if (buffer_freed(bh)) |
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh) | |||
2169 | } | 2179 | } |
2170 | 2180 | ||
2171 | /* | 2181 | /* |
2172 | * For the unlocked version of this call, also make sure that any | 2182 | * __journal_refile_buffer() with necessary locking added. We take our bh |
2173 | * hanging journal_head is cleaned up if necessary. | 2183 | * reference so that we can safely unlock bh. |
2174 | * | 2184 | * |
2175 | * __journal_refile_buffer is usually called as part of a single locked | 2185 | * The jh and bh may be freed by this call. |
2176 | * operation on a buffer_head, in which the caller is probably going to | ||
2177 | * be hooking the journal_head onto other lists. In that case it is up | ||
2178 | * to the caller to remove the journal_head if necessary. For the | ||
2179 | * unlocked journal_refile_buffer call, the caller isn't going to be | ||
2180 | * doing anything else to the buffer so we need to do the cleanup | ||
2181 | * ourselves to avoid a jh leak. | ||
2182 | * | ||
2183 | * *** The journal_head may be freed by this call! *** | ||
2184 | */ | 2186 | */ |
2185 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2187 | void journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2186 | { | 2188 | { |
2187 | struct buffer_head *bh = jh2bh(jh); | 2189 | struct buffer_head *bh = jh2bh(jh); |
2188 | 2190 | ||
2191 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2192 | get_bh(bh); | ||
2189 | jbd_lock_bh_state(bh); | 2193 | jbd_lock_bh_state(bh); |
2190 | spin_lock(&journal->j_list_lock); | 2194 | spin_lock(&journal->j_list_lock); |
2191 | |||
2192 | __journal_refile_buffer(jh); | 2195 | __journal_refile_buffer(jh); |
2193 | jbd_unlock_bh_state(bh); | 2196 | jbd_unlock_bh_state(bh); |
2194 | journal_remove_journal_head(bh); | ||
2195 | |||
2196 | spin_unlock(&journal->j_list_lock); | 2197 | spin_unlock(&journal->j_list_lock); |
2197 | __brelse(bh); | 2198 | __brelse(bh); |
2198 | } | 2199 | } |