aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd/transaction.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-24 17:11:59 -0400
committerJan Kara <jack@suse.cz>2011-06-27 05:44:37 -0400
commitbb189247f35688a3353545902c56290fb7d7754a (patch)
tree02f93da7f642f3e59050d1c2837a7a8a8e61b3aa /fs/jbd/transaction.c
parent2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff)
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 journal_commit_transaction() ... processing t_forget list __journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); journal_try_to_free_buffers() journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() journal_put_journal_head(jh) journal_remove_journal_head(bh); journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(), [__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd/transaction.c')
-rw-r--r--fs/jbd/transaction.c73
1 files changed, 37 insertions, 36 deletions
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index dc39efd05d54..7e59c6e66f9b 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -696,7 +696,6 @@ repeat:
696 if (!jh->b_transaction) { 696 if (!jh->b_transaction) {
697 JBUFFER_TRACE(jh, "no transaction"); 697 JBUFFER_TRACE(jh, "no transaction");
698 J_ASSERT_JH(jh, !jh->b_next_transaction); 698 J_ASSERT_JH(jh, !jh->b_next_transaction);
699 jh->b_transaction = transaction;
700 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 699 JBUFFER_TRACE(jh, "file as BJ_Reserved");
701 spin_lock(&journal->j_list_lock); 700 spin_lock(&journal->j_list_lock);
702 __journal_file_buffer(jh, transaction, BJ_Reserved); 701 __journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
818 * committed and so it's safe to clear the dirty bit. 817 * committed and so it's safe to clear the dirty bit.
819 */ 818 */
820 clear_buffer_dirty(jh2bh(jh)); 819 clear_buffer_dirty(jh2bh(jh));
821 jh->b_transaction = transaction;
822 820
823 /* first access by this transaction */ 821 /* first access by this transaction */
824 jh->b_modified = 0; 822 jh->b_modified = 0;
@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1069 ret = -EIO; 1067 ret = -EIO;
1070 goto no_journal; 1068 goto no_journal;
1071 } 1069 }
1072 1070 /* We might have slept so buffer could be refiled now */
1073 if (jh->b_transaction != NULL) { 1071 if (jh->b_transaction != NULL &&
1072 jh->b_transaction != handle->h_transaction) {
1074 JBUFFER_TRACE(jh, "unfile from commit"); 1073 JBUFFER_TRACE(jh, "unfile from commit");
1075 __journal_temp_unlink_buffer(jh); 1074 __journal_temp_unlink_buffer(jh);
1076 /* It still points to the committing 1075 /* It still points to the committing
@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1091 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { 1090 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1092 JBUFFER_TRACE(jh, "not on correct data list: unfile"); 1091 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1093 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); 1092 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1094 __journal_temp_unlink_buffer(jh);
1095 jh->b_transaction = handle->h_transaction;
1096 JBUFFER_TRACE(jh, "file as data"); 1093 JBUFFER_TRACE(jh, "file as data");
1097 __journal_file_buffer(jh, handle->h_transaction, 1094 __journal_file_buffer(jh, handle->h_transaction,
1098 BJ_SyncData); 1095 BJ_SyncData);
@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
1300 __journal_file_buffer(jh, transaction, BJ_Forget); 1297 __journal_file_buffer(jh, transaction, BJ_Forget);
1301 } else { 1298 } else {
1302 __journal_unfile_buffer(jh); 1299 __journal_unfile_buffer(jh);
1303 journal_remove_journal_head(bh);
1304 __brelse(bh);
1305 if (!buffer_jbd(bh)) { 1300 if (!buffer_jbd(bh)) {
1306 spin_unlock(&journal->j_list_lock); 1301 spin_unlock(&journal->j_list_lock);
1307 jbd_unlock_bh_state(bh); 1302 jbd_unlock_bh_state(bh);
@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
1622 mark_buffer_dirty(bh); /* Expose it to the VM */ 1617 mark_buffer_dirty(bh); /* Expose it to the VM */
1623} 1618}
1624 1619
1620/*
1621 * Remove buffer from all transactions.
1622 *
1623 * Called with bh_state lock and j_list_lock
1624 *
1625 * jh and bh may be already freed when this function returns.
1626 */
1625void __journal_unfile_buffer(struct journal_head *jh) 1627void __journal_unfile_buffer(struct journal_head *jh)
1626{ 1628{
1627 __journal_temp_unlink_buffer(jh); 1629 __journal_temp_unlink_buffer(jh);
1628 jh->b_transaction = NULL; 1630 jh->b_transaction = NULL;
1631 journal_put_journal_head(jh);
1629} 1632}
1630 1633
1631void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1634void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1632{ 1635{
1633 jbd_lock_bh_state(jh2bh(jh)); 1636 struct buffer_head *bh = jh2bh(jh);
1637
1638 /* Get reference so that buffer cannot be freed before we unlock it */
1639 get_bh(bh);
1640 jbd_lock_bh_state(bh);
1634 spin_lock(&journal->j_list_lock); 1641 spin_lock(&journal->j_list_lock);
1635 __journal_unfile_buffer(jh); 1642 __journal_unfile_buffer(jh);
1636 spin_unlock(&journal->j_list_lock); 1643 spin_unlock(&journal->j_list_lock);
1637 jbd_unlock_bh_state(jh2bh(jh)); 1644 jbd_unlock_bh_state(bh);
1645 __brelse(bh);
1638} 1646}
1639 1647
1640/* 1648/*
@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1661 /* A written-back ordered data buffer */ 1669 /* A written-back ordered data buffer */
1662 JBUFFER_TRACE(jh, "release data"); 1670 JBUFFER_TRACE(jh, "release data");
1663 __journal_unfile_buffer(jh); 1671 __journal_unfile_buffer(jh);
1664 journal_remove_journal_head(bh);
1665 __brelse(bh);
1666 } 1672 }
1667 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1673 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1668 /* written-back checkpointed metadata buffer */ 1674 /* written-back checkpointed metadata buffer */
1669 if (jh->b_jlist == BJ_None) { 1675 if (jh->b_jlist == BJ_None) {
1670 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1676 JBUFFER_TRACE(jh, "remove from checkpoint list");
1671 __journal_remove_checkpoint(jh); 1677 __journal_remove_checkpoint(jh);
1672 journal_remove_journal_head(bh);
1673 __brelse(bh);
1674 } 1678 }
1675 } 1679 }
1676 spin_unlock(&journal->j_list_lock); 1680 spin_unlock(&journal->j_list_lock);
@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
1733 /* 1737 /*
1734 * We take our own ref against the journal_head here to avoid 1738 * We take our own ref against the journal_head here to avoid
1735 * having to add tons of locking around each instance of 1739 * having to add tons of locking around each instance of
1736 * journal_remove_journal_head() and journal_put_journal_head(). 1740 * journal_put_journal_head().
1737 */ 1741 */
1738 jh = journal_grab_journal_head(bh); 1742 jh = journal_grab_journal_head(bh);
1739 if (!jh) 1743 if (!jh)
@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1770 int may_free = 1; 1774 int may_free = 1;
1771 struct buffer_head *bh = jh2bh(jh); 1775 struct buffer_head *bh = jh2bh(jh);
1772 1776
1773 __journal_unfile_buffer(jh);
1774
1775 if (jh->b_cp_transaction) { 1777 if (jh->b_cp_transaction) {
1776 JBUFFER_TRACE(jh, "on running+cp transaction"); 1778 JBUFFER_TRACE(jh, "on running+cp transaction");
1779 __journal_temp_unlink_buffer(jh);
1777 /* 1780 /*
1778 * We don't want to write the buffer anymore, clear the 1781 * We don't want to write the buffer anymore, clear the
1779 * bit so that we don't confuse checks in 1782 * bit so that we don't confuse checks in
@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1784 may_free = 0; 1787 may_free = 0;
1785 } else { 1788 } else {
1786 JBUFFER_TRACE(jh, "on running transaction"); 1789 JBUFFER_TRACE(jh, "on running transaction");
1787 journal_remove_journal_head(bh); 1790 __journal_unfile_buffer(jh);
1788 __brelse(bh);
1789 } 1791 }
1790 return may_free; 1792 return may_free;
1791} 1793}
@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
2070 2072
2071 if (jh->b_transaction) 2073 if (jh->b_transaction)
2072 __journal_temp_unlink_buffer(jh); 2074 __journal_temp_unlink_buffer(jh);
2075 else
2076 journal_grab_journal_head(bh);
2073 jh->b_transaction = transaction; 2077 jh->b_transaction = transaction;
2074 2078
2075 switch (jlist) { 2079 switch (jlist) {
@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
2127 * already started to be used by a subsequent transaction, refile the 2131 * already started to be used by a subsequent transaction, refile the
2128 * buffer on that transaction's metadata list. 2132 * buffer on that transaction's metadata list.
2129 * 2133 *
2130 * Called under journal->j_list_lock 2134 * Called under j_list_lock
2131 *
2132 * Called under jbd_lock_bh_state(jh2bh(jh)) 2135 * Called under jbd_lock_bh_state(jh2bh(jh))
2136 *
2137 * jh and bh may be already free when this function returns
2133 */ 2138 */
2134void __journal_refile_buffer(struct journal_head *jh) 2139void __journal_refile_buffer(struct journal_head *jh)
2135{ 2140{
@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
2153 2158
2154 was_dirty = test_clear_buffer_jbddirty(bh); 2159 was_dirty = test_clear_buffer_jbddirty(bh);
2155 __journal_temp_unlink_buffer(jh); 2160 __journal_temp_unlink_buffer(jh);
2161 /*
2162 * We set b_transaction here because b_next_transaction will inherit
2163 * our jh reference and thus __journal_file_buffer() must not take a
2164 * new one.
2165 */
2156 jh->b_transaction = jh->b_next_transaction; 2166 jh->b_transaction = jh->b_next_transaction;
2157 jh->b_next_transaction = NULL; 2167 jh->b_next_transaction = NULL;
2158 if (buffer_freed(bh)) 2168 if (buffer_freed(bh))
@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
2169} 2179}
2170 2180
2171/* 2181/*
2172 * For the unlocked version of this call, also make sure that any 2182 * __journal_refile_buffer() with necessary locking added. We take our bh
2173 * hanging journal_head is cleaned up if necessary. 2183 * reference so that we can safely unlock bh.
2174 * 2184 *
2175 * __journal_refile_buffer is usually called as part of a single locked 2185 * The jh and bh may be freed by this call.
2176 * operation on a buffer_head, in which the caller is probably going to
2177 * be hooking the journal_head onto other lists. In that case it is up
2178 * to the caller to remove the journal_head if necessary. For the
2179 * unlocked journal_refile_buffer call, the caller isn't going to be
2180 * doing anything else to the buffer so we need to do the cleanup
2181 * ourselves to avoid a jh leak.
2182 *
2183 * *** The journal_head may be freed by this call! ***
2184 */ 2186 */
2185void journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2187void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2186{ 2188{
2187 struct buffer_head *bh = jh2bh(jh); 2189 struct buffer_head *bh = jh2bh(jh);
2188 2190
2191 /* Get reference so that buffer cannot be freed before we unlock it */
2192 get_bh(bh);
2189 jbd_lock_bh_state(bh); 2193 jbd_lock_bh_state(bh);
2190 spin_lock(&journal->j_list_lock); 2194 spin_lock(&journal->j_list_lock);
2191
2192 __journal_refile_buffer(jh); 2195 __journal_refile_buffer(jh);
2193 jbd_unlock_bh_state(bh); 2196 jbd_unlock_bh_state(bh);
2194 journal_remove_journal_head(bh);
2195
2196 spin_unlock(&journal->j_list_lock); 2197 spin_unlock(&journal->j_list_lock);
2197 __brelse(bh); 2198 __brelse(bh);
2198} 2199}