aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-13 15:38:22 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-06-13 15:38:22 -0400
commitde1b794130b130e77ffa975bb58cb843744f9ae5 (patch)
tree8c4b37582128dc36c2b7385294fba58d017ce3e8 /fs/jbd2/transaction.c
parent1fb74cda1b5e9c6207225fda5ef7504e815ce0e0 (diff)
jbd2: Fix oops in jbd2_journal_remove_journal_head()
jbd2_journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 jbd2_journal_commit_transaction() ... processing t_forget list __jbd2_journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); jbd2_journal_try_to_free_buffers() jbd2_journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() jbd2_journal_put_journal_head(jh) jbd2_journal_remove_journal_head(bh); jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after jbd2_journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via jbd2_journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]jbd2_journal_refile_buffer(), [__]jbd2_journal_unfile_buffer(), and __jdb2_journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c67
1 files changed, 35 insertions, 32 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 547b101049e5..2d7109414cdd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31 31
32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 32static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
33static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
33 34
34/* 35/*
35 * jbd2_get_transaction: obtain a new transaction_t object. 36 * jbd2_get_transaction: obtain a new transaction_t object.
@@ -764,7 +765,6 @@ repeat:
764 if (!jh->b_transaction) { 765 if (!jh->b_transaction) {
765 JBUFFER_TRACE(jh, "no transaction"); 766 JBUFFER_TRACE(jh, "no transaction");
766 J_ASSERT_JH(jh, !jh->b_next_transaction); 767 J_ASSERT_JH(jh, !jh->b_next_transaction);
767 jh->b_transaction = transaction;
768 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 768 JBUFFER_TRACE(jh, "file as BJ_Reserved");
769 spin_lock(&journal->j_list_lock); 769 spin_lock(&journal->j_list_lock);
770 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 770 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
895 * committed and so it's safe to clear the dirty bit. 895 * committed and so it's safe to clear the dirty bit.
896 */ 896 */
897 clear_buffer_dirty(jh2bh(jh)); 897 clear_buffer_dirty(jh2bh(jh));
898 jh->b_transaction = transaction;
899
900 /* first access by this transaction */ 898 /* first access by this transaction */
901 jh->b_modified = 0; 899 jh->b_modified = 0;
902 900
@@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1230 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1228 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1231 } else { 1229 } else {
1232 __jbd2_journal_unfile_buffer(jh); 1230 __jbd2_journal_unfile_buffer(jh);
1233 jbd2_journal_remove_journal_head(bh);
1234 __brelse(bh);
1235 if (!buffer_jbd(bh)) { 1231 if (!buffer_jbd(bh)) {
1236 spin_unlock(&journal->j_list_lock); 1232 spin_unlock(&journal->j_list_lock);
1237 jbd_unlock_bh_state(bh); 1233 jbd_unlock_bh_state(bh);
@@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1554 mark_buffer_dirty(bh); /* Expose it to the VM */ 1550 mark_buffer_dirty(bh); /* Expose it to the VM */
1555} 1551}
1556 1552
1557void __jbd2_journal_unfile_buffer(struct journal_head *jh) 1553/*
1554 * Remove buffer from all transactions.
1555 *
1556 * Called with bh_state lock and j_list_lock
1557 *
1558 * jh and bh may be already freed when this function returns.
1559 */
1560static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
1558{ 1561{
1559 __jbd2_journal_temp_unlink_buffer(jh); 1562 __jbd2_journal_temp_unlink_buffer(jh);
1560 jh->b_transaction = NULL; 1563 jh->b_transaction = NULL;
1564 jbd2_journal_put_journal_head(jh);
1561} 1565}
1562 1566
1563void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1567void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
1564{ 1568{
1565 jbd_lock_bh_state(jh2bh(jh)); 1569 struct buffer_head *bh = jh2bh(jh);
1570
1571 /* Get reference so that buffer cannot be freed before we unlock it */
1572 get_bh(bh);
1573 jbd_lock_bh_state(bh);
1566 spin_lock(&journal->j_list_lock); 1574 spin_lock(&journal->j_list_lock);
1567 __jbd2_journal_unfile_buffer(jh); 1575 __jbd2_journal_unfile_buffer(jh);
1568 spin_unlock(&journal->j_list_lock); 1576 spin_unlock(&journal->j_list_lock);
1569 jbd_unlock_bh_state(jh2bh(jh)); 1577 jbd_unlock_bh_state(bh);
1578 __brelse(bh);
1570} 1579}
1571 1580
1572/* 1581/*
@@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1593 if (jh->b_jlist == BJ_None) { 1602 if (jh->b_jlist == BJ_None) {
1594 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1603 JBUFFER_TRACE(jh, "remove from checkpoint list");
1595 __jbd2_journal_remove_checkpoint(jh); 1604 __jbd2_journal_remove_checkpoint(jh);
1596 jbd2_journal_remove_journal_head(bh);
1597 __brelse(bh);
1598 } 1605 }
1599 } 1606 }
1600 spin_unlock(&journal->j_list_lock); 1607 spin_unlock(&journal->j_list_lock);
@@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1657 /* 1664 /*
1658 * We take our own ref against the journal_head here to avoid 1665 * We take our own ref against the journal_head here to avoid
1659 * having to add tons of locking around each instance of 1666 * having to add tons of locking around each instance of
1660 * jbd2_journal_remove_journal_head() and
1661 * jbd2_journal_put_journal_head(). 1667 * jbd2_journal_put_journal_head().
1662 */ 1668 */
1663 jh = jbd2_journal_grab_journal_head(bh); 1669 jh = jbd2_journal_grab_journal_head(bh);
@@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1695 int may_free = 1; 1701 int may_free = 1;
1696 struct buffer_head *bh = jh2bh(jh); 1702 struct buffer_head *bh = jh2bh(jh);
1697 1703
1698 __jbd2_journal_unfile_buffer(jh);
1699
1700 if (jh->b_cp_transaction) { 1704 if (jh->b_cp_transaction) {
1701 JBUFFER_TRACE(jh, "on running+cp transaction"); 1705 JBUFFER_TRACE(jh, "on running+cp transaction");
1706 __jbd2_journal_temp_unlink_buffer(jh);
1702 /* 1707 /*
1703 * We don't want to write the buffer anymore, clear the 1708 * We don't want to write the buffer anymore, clear the
1704 * bit so that we don't confuse checks in 1709 * bit so that we don't confuse checks in
@@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1709 may_free = 0; 1714 may_free = 0;
1710 } else { 1715 } else {
1711 JBUFFER_TRACE(jh, "on running transaction"); 1716 JBUFFER_TRACE(jh, "on running transaction");
1712 jbd2_journal_remove_journal_head(bh); 1717 __jbd2_journal_unfile_buffer(jh);
1713 __brelse(bh);
1714 } 1718 }
1715 return may_free; 1719 return may_free;
1716} 1720}
@@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1988 1992
1989 if (jh->b_transaction) 1993 if (jh->b_transaction)
1990 __jbd2_journal_temp_unlink_buffer(jh); 1994 __jbd2_journal_temp_unlink_buffer(jh);
1995 else
1996 jbd2_journal_grab_journal_head(bh);
1991 jh->b_transaction = transaction; 1997 jh->b_transaction = transaction;
1992 1998
1993 switch (jlist) { 1999 switch (jlist) {
@@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
2039 * already started to be used by a subsequent transaction, refile the 2045 * already started to be used by a subsequent transaction, refile the
2040 * buffer on that transaction's metadata list. 2046 * buffer on that transaction's metadata list.
2041 * 2047 *
2042 * Called under journal->j_list_lock 2048 * Called under j_list_lock
2043 *
2044 * Called under jbd_lock_bh_state(jh2bh(jh)) 2049 * Called under jbd_lock_bh_state(jh2bh(jh))
2050 *
2051 * jh and bh may be already free when this function returns
2045 */ 2052 */
2046void __jbd2_journal_refile_buffer(struct journal_head *jh) 2053void __jbd2_journal_refile_buffer(struct journal_head *jh)
2047{ 2054{
@@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2065 2072
2066 was_dirty = test_clear_buffer_jbddirty(bh); 2073 was_dirty = test_clear_buffer_jbddirty(bh);
2067 __jbd2_journal_temp_unlink_buffer(jh); 2074 __jbd2_journal_temp_unlink_buffer(jh);
2075 /*
2076 * We set b_transaction here because b_next_transaction will inherit
2077 * our jh reference and thus __jbd2_journal_file_buffer() must not
2078 * take a new one.
2079 */
2068 jh->b_transaction = jh->b_next_transaction; 2080 jh->b_transaction = jh->b_next_transaction;
2069 jh->b_next_transaction = NULL; 2081 jh->b_next_transaction = NULL;
2070 if (buffer_freed(bh)) 2082 if (buffer_freed(bh))
@@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
2081} 2093}
2082 2094
2083/* 2095/*
2084 * For the unlocked version of this call, also make sure that any 2096 * __jbd2_journal_refile_buffer() with necessary locking added. We take our
2085 * hanging journal_head is cleaned up if necessary. 2097 * bh reference so that we can safely unlock bh.
2086 * 2098 *
2087 * __jbd2_journal_refile_buffer is usually called as part of a single locked 2099 * The jh and bh may be freed by this call.
2088 * operation on a buffer_head, in which the caller is probably going to
2089 * be hooking the journal_head onto other lists. In that case it is up
2090 * to the caller to remove the journal_head if necessary. For the
2091 * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
2092 * doing anything else to the buffer so we need to do the cleanup
2093 * ourselves to avoid a jh leak.
2094 *
2095 * *** The journal_head may be freed by this call! ***
2096 */ 2100 */
2097void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2101void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2098{ 2102{
2099 struct buffer_head *bh = jh2bh(jh); 2103 struct buffer_head *bh = jh2bh(jh);
2100 2104
2105 /* Get reference so that buffer cannot be freed before we unlock it */
2106 get_bh(bh);
2101 jbd_lock_bh_state(bh); 2107 jbd_lock_bh_state(bh);
2102 spin_lock(&journal->j_list_lock); 2108 spin_lock(&journal->j_list_lock);
2103
2104 __jbd2_journal_refile_buffer(jh); 2109 __jbd2_journal_refile_buffer(jh);
2105 jbd_unlock_bh_state(bh); 2110 jbd_unlock_bh_state(bh);
2106 jbd2_journal_remove_journal_head(bh);
2107
2108 spin_unlock(&journal->j_list_lock); 2111 spin_unlock(&journal->j_list_lock);
2109 __brelse(bh); 2112 __brelse(bh);
2110} 2113}