diff options
author | Jan Kara <jack@suse.cz> | 2011-06-13 15:38:22 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-06-13 15:38:22 -0400 |
commit | de1b794130b130e77ffa975bb58cb843744f9ae5 (patch) | |
tree | 8c4b37582128dc36c2b7385294fba58d017ce3e8 /fs/jbd2/transaction.c | |
parent | 1fb74cda1b5e9c6207225fda5ef7504e815ce0e0 (diff) |
jbd2: Fix oops in jbd2_journal_remove_journal_head()
jbd2_journal_remove_journal_head() can oops when trying to access
journal_head returned by bh2jh(). This is caused for example by the
following race:
TASK1 TASK2
jbd2_journal_commit_transaction()
...
processing t_forget list
__jbd2_journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
jbd2_journal_try_to_free_buffers()
jbd2_journal_grab_journal_head(bh)
jbd_lock_bh_state(bh)
__journal_try_to_free_buffer()
jbd2_journal_put_journal_head(jh)
jbd2_journal_remove_journal_head(bh);
jbd2_journal_put_journal_head() in TASK2 sees that b_jcount == 0 and
buffer is not part of any transaction and thus frees journal_head
before TASK1 gets to doing so. Note that even buffer_head can be
released by try_to_free_buffers() after
jbd2_journal_put_journal_head() which adds even larger opportunity for
oops (but I didn't see this happen in reality).
Fix the problem by making transactions hold their own journal_head
reference (in b_jcount). That way we don't have to remove journal_head
explicitely via jbd2_journal_remove_journal_head() and instead just
remove journal_head when b_jcount drops to zero. The result of this is
that [__]jbd2_journal_refile_buffer(),
[__]jbd2_journal_unfile_buffer(), and
__jdb2_journal_remove_checkpoint() can free journal_head which needs
modification of a few callers. Also we have to be careful because once
journal_head is removed, buffer_head might be freed as well. So we
have to get our own buffer_head reference where it matters.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 67 |
1 files changed, 35 insertions, 32 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 547b101049e5..2d7109414cdd 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | 31 | ||
32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); | 32 | static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); |
33 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh); | ||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * jbd2_get_transaction: obtain a new transaction_t object. | 36 | * jbd2_get_transaction: obtain a new transaction_t object. |
@@ -764,7 +765,6 @@ repeat: | |||
764 | if (!jh->b_transaction) { | 765 | if (!jh->b_transaction) { |
765 | JBUFFER_TRACE(jh, "no transaction"); | 766 | JBUFFER_TRACE(jh, "no transaction"); |
766 | J_ASSERT_JH(jh, !jh->b_next_transaction); | 767 | J_ASSERT_JH(jh, !jh->b_next_transaction); |
767 | jh->b_transaction = transaction; | ||
768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 768 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
769 | spin_lock(&journal->j_list_lock); | 769 | spin_lock(&journal->j_list_lock); |
770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); | 770 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); |
@@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
895 | * committed and so it's safe to clear the dirty bit. | 895 | * committed and so it's safe to clear the dirty bit. |
896 | */ | 896 | */ |
897 | clear_buffer_dirty(jh2bh(jh)); | 897 | clear_buffer_dirty(jh2bh(jh)); |
898 | jh->b_transaction = transaction; | ||
899 | |||
900 | /* first access by this transaction */ | 898 | /* first access by this transaction */ |
901 | jh->b_modified = 0; | 899 | jh->b_modified = 0; |
902 | 900 | ||
@@ -1230,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1230 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1228 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
1231 | } else { | 1229 | } else { |
1232 | __jbd2_journal_unfile_buffer(jh); | 1230 | __jbd2_journal_unfile_buffer(jh); |
1233 | jbd2_journal_remove_journal_head(bh); | ||
1234 | __brelse(bh); | ||
1235 | if (!buffer_jbd(bh)) { | 1231 | if (!buffer_jbd(bh)) { |
1236 | spin_unlock(&journal->j_list_lock); | 1232 | spin_unlock(&journal->j_list_lock); |
1237 | jbd_unlock_bh_state(bh); | 1233 | jbd_unlock_bh_state(bh); |
@@ -1554,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1554 | mark_buffer_dirty(bh); /* Expose it to the VM */ | 1550 | mark_buffer_dirty(bh); /* Expose it to the VM */ |
1555 | } | 1551 | } |
1556 | 1552 | ||
1557 | void __jbd2_journal_unfile_buffer(struct journal_head *jh) | 1553 | /* |
1554 | * Remove buffer from all transactions. | ||
1555 | * | ||
1556 | * Called with bh_state lock and j_list_lock | ||
1557 | * | ||
1558 | * jh and bh may be already freed when this function returns. | ||
1559 | */ | ||
1560 | static void __jbd2_journal_unfile_buffer(struct journal_head *jh) | ||
1558 | { | 1561 | { |
1559 | __jbd2_journal_temp_unlink_buffer(jh); | 1562 | __jbd2_journal_temp_unlink_buffer(jh); |
1560 | jh->b_transaction = NULL; | 1563 | jh->b_transaction = NULL; |
1564 | jbd2_journal_put_journal_head(jh); | ||
1561 | } | 1565 | } |
1562 | 1566 | ||
1563 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) | 1567 | void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) |
1564 | { | 1568 | { |
1565 | jbd_lock_bh_state(jh2bh(jh)); | 1569 | struct buffer_head *bh = jh2bh(jh); |
1570 | |||
1571 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
1572 | get_bh(bh); | ||
1573 | jbd_lock_bh_state(bh); | ||
1566 | spin_lock(&journal->j_list_lock); | 1574 | spin_lock(&journal->j_list_lock); |
1567 | __jbd2_journal_unfile_buffer(jh); | 1575 | __jbd2_journal_unfile_buffer(jh); |
1568 | spin_unlock(&journal->j_list_lock); | 1576 | spin_unlock(&journal->j_list_lock); |
1569 | jbd_unlock_bh_state(jh2bh(jh)); | 1577 | jbd_unlock_bh_state(bh); |
1578 | __brelse(bh); | ||
1570 | } | 1579 | } |
1571 | 1580 | ||
1572 | /* | 1581 | /* |
@@ -1593,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1593 | if (jh->b_jlist == BJ_None) { | 1602 | if (jh->b_jlist == BJ_None) { |
1594 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1603 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1595 | __jbd2_journal_remove_checkpoint(jh); | 1604 | __jbd2_journal_remove_checkpoint(jh); |
1596 | jbd2_journal_remove_journal_head(bh); | ||
1597 | __brelse(bh); | ||
1598 | } | 1605 | } |
1599 | } | 1606 | } |
1600 | spin_unlock(&journal->j_list_lock); | 1607 | spin_unlock(&journal->j_list_lock); |
@@ -1657,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, | |||
1657 | /* | 1664 | /* |
1658 | * We take our own ref against the journal_head here to avoid | 1665 | * We take our own ref against the journal_head here to avoid |
1659 | * having to add tons of locking around each instance of | 1666 | * having to add tons of locking around each instance of |
1660 | * jbd2_journal_remove_journal_head() and | ||
1661 | * jbd2_journal_put_journal_head(). | 1667 | * jbd2_journal_put_journal_head(). |
1662 | */ | 1668 | */ |
1663 | jh = jbd2_journal_grab_journal_head(bh); | 1669 | jh = jbd2_journal_grab_journal_head(bh); |
@@ -1695,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1695 | int may_free = 1; | 1701 | int may_free = 1; |
1696 | struct buffer_head *bh = jh2bh(jh); | 1702 | struct buffer_head *bh = jh2bh(jh); |
1697 | 1703 | ||
1698 | __jbd2_journal_unfile_buffer(jh); | ||
1699 | |||
1700 | if (jh->b_cp_transaction) { | 1704 | if (jh->b_cp_transaction) { |
1701 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1705 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
1706 | __jbd2_journal_temp_unlink_buffer(jh); | ||
1702 | /* | 1707 | /* |
1703 | * We don't want to write the buffer anymore, clear the | 1708 | * We don't want to write the buffer anymore, clear the |
1704 | * bit so that we don't confuse checks in | 1709 | * bit so that we don't confuse checks in |
@@ -1709,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1709 | may_free = 0; | 1714 | may_free = 0; |
1710 | } else { | 1715 | } else { |
1711 | JBUFFER_TRACE(jh, "on running transaction"); | 1716 | JBUFFER_TRACE(jh, "on running transaction"); |
1712 | jbd2_journal_remove_journal_head(bh); | 1717 | __jbd2_journal_unfile_buffer(jh); |
1713 | __brelse(bh); | ||
1714 | } | 1718 | } |
1715 | return may_free; | 1719 | return may_free; |
1716 | } | 1720 | } |
@@ -1988,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
1988 | 1992 | ||
1989 | if (jh->b_transaction) | 1993 | if (jh->b_transaction) |
1990 | __jbd2_journal_temp_unlink_buffer(jh); | 1994 | __jbd2_journal_temp_unlink_buffer(jh); |
1995 | else | ||
1996 | jbd2_journal_grab_journal_head(bh); | ||
1991 | jh->b_transaction = transaction; | 1997 | jh->b_transaction = transaction; |
1992 | 1998 | ||
1993 | switch (jlist) { | 1999 | switch (jlist) { |
@@ -2039,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh, | |||
2039 | * already started to be used by a subsequent transaction, refile the | 2045 | * already started to be used by a subsequent transaction, refile the |
2040 | * buffer on that transaction's metadata list. | 2046 | * buffer on that transaction's metadata list. |
2041 | * | 2047 | * |
2042 | * Called under journal->j_list_lock | 2048 | * Called under j_list_lock |
2043 | * | ||
2044 | * Called under jbd_lock_bh_state(jh2bh(jh)) | 2049 | * Called under jbd_lock_bh_state(jh2bh(jh)) |
2050 | * | ||
2051 | * jh and bh may be already free when this function returns | ||
2045 | */ | 2052 | */ |
2046 | void __jbd2_journal_refile_buffer(struct journal_head *jh) | 2053 | void __jbd2_journal_refile_buffer(struct journal_head *jh) |
2047 | { | 2054 | { |
@@ -2065,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2065 | 2072 | ||
2066 | was_dirty = test_clear_buffer_jbddirty(bh); | 2073 | was_dirty = test_clear_buffer_jbddirty(bh); |
2067 | __jbd2_journal_temp_unlink_buffer(jh); | 2074 | __jbd2_journal_temp_unlink_buffer(jh); |
2075 | /* | ||
2076 | * We set b_transaction here because b_next_transaction will inherit | ||
2077 | * our jh reference and thus __jbd2_journal_file_buffer() must not | ||
2078 | * take a new one. | ||
2079 | */ | ||
2068 | jh->b_transaction = jh->b_next_transaction; | 2080 | jh->b_transaction = jh->b_next_transaction; |
2069 | jh->b_next_transaction = NULL; | 2081 | jh->b_next_transaction = NULL; |
2070 | if (buffer_freed(bh)) | 2082 | if (buffer_freed(bh)) |
@@ -2081,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) | |||
2081 | } | 2093 | } |
2082 | 2094 | ||
2083 | /* | 2095 | /* |
2084 | * For the unlocked version of this call, also make sure that any | 2096 | * __jbd2_journal_refile_buffer() with necessary locking added. We take our |
2085 | * hanging journal_head is cleaned up if necessary. | 2097 | * bh reference so that we can safely unlock bh. |
2086 | * | 2098 | * |
2087 | * __jbd2_journal_refile_buffer is usually called as part of a single locked | 2099 | * The jh and bh may be freed by this call. |
2088 | * operation on a buffer_head, in which the caller is probably going to | ||
2089 | * be hooking the journal_head onto other lists. In that case it is up | ||
2090 | * to the caller to remove the journal_head if necessary. For the | ||
2091 | * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be | ||
2092 | * doing anything else to the buffer so we need to do the cleanup | ||
2093 | * ourselves to avoid a jh leak. | ||
2094 | * | ||
2095 | * *** The journal_head may be freed by this call! *** | ||
2096 | */ | 2100 | */ |
2097 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | 2101 | void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) |
2098 | { | 2102 | { |
2099 | struct buffer_head *bh = jh2bh(jh); | 2103 | struct buffer_head *bh = jh2bh(jh); |
2100 | 2104 | ||
2105 | /* Get reference so that buffer cannot be freed before we unlock it */ | ||
2106 | get_bh(bh); | ||
2101 | jbd_lock_bh_state(bh); | 2107 | jbd_lock_bh_state(bh); |
2102 | spin_lock(&journal->j_list_lock); | 2108 | spin_lock(&journal->j_list_lock); |
2103 | |||
2104 | __jbd2_journal_refile_buffer(jh); | 2109 | __jbd2_journal_refile_buffer(jh); |
2105 | jbd_unlock_bh_state(bh); | 2110 | jbd_unlock_bh_state(bh); |
2106 | jbd2_journal_remove_journal_head(bh); | ||
2107 | |||
2108 | spin_unlock(&journal->j_list_lock); | 2111 | spin_unlock(&journal->j_list_lock); |
2109 | __brelse(bh); | 2112 | __brelse(bh); |
2110 | } | 2113 | } |