diff options
author | Jan Kara <jack@suse.cz> | 2011-06-24 17:11:59 -0400 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2011-06-27 05:44:37 -0400 |
commit | bb189247f35688a3353545902c56290fb7d7754a (patch) | |
tree | 02f93da7f642f3e59050d1c2837a7a8a8e61b3aa /fs/jbd/journal.c | |
parent | 2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff) |
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head
returned by bh2jh(). This is caused for example by the following race:
TASK1 TASK2
journal_commit_transaction()
...
processing t_forget list
__journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh);
journal_try_to_free_buffers()
journal_grab_journal_head(bh)
jbd_lock_bh_state(bh)
__journal_try_to_free_buffer()
journal_put_journal_head(jh)
journal_remove_journal_head(bh);
journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not
part of any transaction and thus frees journal_head before TASK1 gets to doing
so. Note that even buffer_head can be released by try_to_free_buffers() after
journal_put_journal_head() which adds even larger opportunity for oops (but I
didn't see this happen in reality).
Fix the problem by making transactions hold their own journal_head reference
(in b_jcount). That way we don't have to remove journal_head explicitely via
journal_remove_journal_head() and instead just remove journal_head when
b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(),
[__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free
journal_head which needs modification of a few callers. Also we have to be
careful because once journal_head is removed, buffer_head might be freed as
well. So we have to get our own buffer_head reference where it matters.
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd/journal.c')
-rw-r--r-- | fs/jbd/journal.c | 95 |
1 files changed, 31 insertions, 64 deletions
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index ab019ee77888..9fe061fb8779 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1803 | * When a buffer has its BH_JBD bit set it is immune from being released by | 1803 | * When a buffer has its BH_JBD bit set it is immune from being released by |
1804 | * core kernel code, mainly via ->b_count. | 1804 | * core kernel code, mainly via ->b_count. |
1805 | * | 1805 | * |
1806 | * A journal_head may be detached from its buffer_head when the journal_head's | 1806 | * A journal_head is detached from its buffer_head when the journal_head's |
1807 | * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. | 1807 | * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint |
1808 | * Various places in JBD call journal_remove_journal_head() to indicate that the | 1808 | * transaction (b_cp_transaction) hold their references to b_jcount. |
1809 | * journal_head can be dropped if needed. | ||
1810 | * | 1809 | * |
1811 | * Various places in the kernel want to attach a journal_head to a buffer_head | 1810 | * Various places in the kernel want to attach a journal_head to a buffer_head |
1812 | * _before_ attaching the journal_head to a transaction. To protect the | 1811 | * _before_ attaching the journal_head to a transaction. To protect the |
@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh) | |||
1819 | * (Attach a journal_head if needed. Increments b_jcount) | 1818 | * (Attach a journal_head if needed. Increments b_jcount) |
1820 | * struct journal_head *jh = journal_add_journal_head(bh); | 1819 | * struct journal_head *jh = journal_add_journal_head(bh); |
1821 | * ... | 1820 | * ... |
1822 | * jh->b_transaction = xxx; | 1821 | * (Get another reference for transaction) |
1823 | * journal_put_journal_head(jh); | 1822 | * journal_grab_journal_head(bh); |
1824 | * | 1823 | * jh->b_transaction = xxx; |
1825 | * Now, the journal_head's b_jcount is zero, but it is safe from being released | 1824 | * (Put original reference) |
1826 | * because it has a non-zero b_transaction. | 1825 | * journal_put_journal_head(jh); |
1827 | */ | 1826 | */ |
1828 | 1827 | ||
1829 | /* | 1828 | /* |
1830 | * Give a buffer_head a journal_head. | 1829 | * Give a buffer_head a journal_head. |
1831 | * | 1830 | * |
1832 | * Doesn't need the journal lock. | ||
1833 | * May sleep. | 1831 | * May sleep. |
1834 | */ | 1832 | */ |
1835 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) | 1833 | struct journal_head *journal_add_journal_head(struct buffer_head *bh) |
@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) | |||
1893 | struct journal_head *jh = bh2jh(bh); | 1891 | struct journal_head *jh = bh2jh(bh); |
1894 | 1892 | ||
1895 | J_ASSERT_JH(jh, jh->b_jcount >= 0); | 1893 | J_ASSERT_JH(jh, jh->b_jcount >= 0); |
1896 | 1894 | J_ASSERT_JH(jh, jh->b_transaction == NULL); | |
1897 | get_bh(bh); | 1895 | J_ASSERT_JH(jh, jh->b_next_transaction == NULL); |
1898 | if (jh->b_jcount == 0) { | 1896 | J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); |
1899 | if (jh->b_transaction == NULL && | 1897 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); |
1900 | jh->b_next_transaction == NULL && | 1898 | J_ASSERT_BH(bh, buffer_jbd(bh)); |
1901 | jh->b_cp_transaction == NULL) { | 1899 | J_ASSERT_BH(bh, jh2bh(jh) == bh); |
1902 | J_ASSERT_JH(jh, jh->b_jlist == BJ_None); | 1900 | BUFFER_TRACE(bh, "remove journal_head"); |
1903 | J_ASSERT_BH(bh, buffer_jbd(bh)); | 1901 | if (jh->b_frozen_data) { |
1904 | J_ASSERT_BH(bh, jh2bh(jh) == bh); | 1902 | printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); |
1905 | BUFFER_TRACE(bh, "remove journal_head"); | 1903 | jbd_free(jh->b_frozen_data, bh->b_size); |
1906 | if (jh->b_frozen_data) { | ||
1907 | printk(KERN_WARNING "%s: freeing " | ||
1908 | "b_frozen_data\n", | ||
1909 | __func__); | ||
1910 | jbd_free(jh->b_frozen_data, bh->b_size); | ||
1911 | } | ||
1912 | if (jh->b_committed_data) { | ||
1913 | printk(KERN_WARNING "%s: freeing " | ||
1914 | "b_committed_data\n", | ||
1915 | __func__); | ||
1916 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1917 | } | ||
1918 | bh->b_private = NULL; | ||
1919 | jh->b_bh = NULL; /* debug, really */ | ||
1920 | clear_buffer_jbd(bh); | ||
1921 | __brelse(bh); | ||
1922 | journal_free_journal_head(jh); | ||
1923 | } else { | ||
1924 | BUFFER_TRACE(bh, "journal_head was locked"); | ||
1925 | } | ||
1926 | } | 1904 | } |
1905 | if (jh->b_committed_data) { | ||
1906 | printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); | ||
1907 | jbd_free(jh->b_committed_data, bh->b_size); | ||
1908 | } | ||
1909 | bh->b_private = NULL; | ||
1910 | jh->b_bh = NULL; /* debug, really */ | ||
1911 | clear_buffer_jbd(bh); | ||
1912 | journal_free_journal_head(jh); | ||
1927 | } | 1913 | } |
1928 | 1914 | ||
1929 | /* | 1915 | /* |
1930 | * journal_remove_journal_head(): if the buffer isn't attached to a transaction | 1916 | * Drop a reference on the passed journal_head. If it fell to zero then |
1931 | * and has a zero b_jcount then remove and release its journal_head. If we did | ||
1932 | * see that the buffer is not used by any transaction we also "logically" | ||
1933 | * decrement ->b_count. | ||
1934 | * | ||
1935 | * We in fact take an additional increment on ->b_count as a convenience, | ||
1936 | * because the caller usually wants to do additional things with the bh | ||
1937 | * after calling here. | ||
1938 | * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some | ||
1939 | * time. Once the caller has run __brelse(), the buffer is eligible for | ||
1940 | * reaping by try_to_free_buffers(). | ||
1941 | */ | ||
1942 | void journal_remove_journal_head(struct buffer_head *bh) | ||
1943 | { | ||
1944 | jbd_lock_bh_journal_head(bh); | ||
1945 | __journal_remove_journal_head(bh); | ||
1946 | jbd_unlock_bh_journal_head(bh); | ||
1947 | } | ||
1948 | |||
1949 | /* | ||
1950 | * Drop a reference on the passed journal_head. If it fell to zero then try to | ||
1951 | * release the journal_head from the buffer_head. | 1917 | * release the journal_head from the buffer_head. |
1952 | */ | 1918 | */ |
1953 | void journal_put_journal_head(struct journal_head *jh) | 1919 | void journal_put_journal_head(struct journal_head *jh) |
@@ -1957,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh) | |||
1957 | jbd_lock_bh_journal_head(bh); | 1923 | jbd_lock_bh_journal_head(bh); |
1958 | J_ASSERT_JH(jh, jh->b_jcount > 0); | 1924 | J_ASSERT_JH(jh, jh->b_jcount > 0); |
1959 | --jh->b_jcount; | 1925 | --jh->b_jcount; |
1960 | if (!jh->b_jcount && !jh->b_transaction) { | 1926 | if (!jh->b_jcount) { |
1961 | __journal_remove_journal_head(bh); | 1927 | __journal_remove_journal_head(bh); |
1928 | jbd_unlock_bh_journal_head(bh); | ||
1962 | __brelse(bh); | 1929 | __brelse(bh); |
1963 | } | 1930 | } else |
1964 | jbd_unlock_bh_journal_head(bh); | 1931 | jbd_unlock_bh_journal_head(bh); |
1965 | } | 1932 | } |
1966 | 1933 | ||
1967 | /* | 1934 | /* |