aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd/journal.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-06-24 17:11:59 -0400
committerJan Kara <jack@suse.cz>2011-06-27 05:44:37 -0400
commitbb189247f35688a3353545902c56290fb7d7754a (patch)
tree02f93da7f642f3e59050d1c2837a7a8a8e61b3aa /fs/jbd/journal.c
parent2c2ea9451fc2a12ee57c8346f0da26969d07ee7f (diff)
jbd: Fix oops in journal_remove_journal_head()
journal_remove_journal_head() can oops when trying to access journal_head returned by bh2jh(). This is caused for example by the following race: TASK1 TASK2 journal_commit_transaction() ... processing t_forget list __journal_refile_buffer(jh); if (!jh->b_transaction) { jbd_unlock_bh_state(bh); journal_try_to_free_buffers() journal_grab_journal_head(bh) jbd_lock_bh_state(bh) __journal_try_to_free_buffer() journal_put_journal_head(jh) journal_remove_journal_head(bh); journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not part of any transaction and thus frees journal_head before TASK1 gets to doing so. Note that even buffer_head can be released by try_to_free_buffers() after journal_put_journal_head() which adds even larger opportunity for oops (but I didn't see this happen in reality). Fix the problem by making transactions hold their own journal_head reference (in b_jcount). That way we don't have to remove journal_head explicitely via journal_remove_journal_head() and instead just remove journal_head when b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(), [__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free journal_head which needs modification of a few callers. Also we have to be careful because once journal_head is removed, buffer_head might be freed as well. So we have to get our own buffer_head reference where it matters. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd/journal.c')
-rw-r--r--fs/jbd/journal.c95
1 files changed, 31 insertions, 64 deletions
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index ab019ee77888..9fe061fb8779 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
1803 * When a buffer has its BH_JBD bit set it is immune from being released by 1803 * When a buffer has its BH_JBD bit set it is immune from being released by
1804 * core kernel code, mainly via ->b_count. 1804 * core kernel code, mainly via ->b_count.
1805 * 1805 *
1806 * A journal_head may be detached from its buffer_head when the journal_head's 1806 * A journal_head is detached from its buffer_head when the journal_head's
1807 * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. 1807 * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
1808 * Various places in JBD call journal_remove_journal_head() to indicate that the 1808 * transaction (b_cp_transaction) hold their references to b_jcount.
1809 * journal_head can be dropped if needed.
1810 * 1809 *
1811 * Various places in the kernel want to attach a journal_head to a buffer_head 1810 * Various places in the kernel want to attach a journal_head to a buffer_head
1812 * _before_ attaching the journal_head to a transaction. To protect the 1811 * _before_ attaching the journal_head to a transaction. To protect the
@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
1819 * (Attach a journal_head if needed. Increments b_jcount) 1818 * (Attach a journal_head if needed. Increments b_jcount)
1820 * struct journal_head *jh = journal_add_journal_head(bh); 1819 * struct journal_head *jh = journal_add_journal_head(bh);
1821 * ... 1820 * ...
1822 * jh->b_transaction = xxx; 1821 * (Get another reference for transaction)
1823 * journal_put_journal_head(jh); 1822 * journal_grab_journal_head(bh);
1824 * 1823 * jh->b_transaction = xxx;
1825 * Now, the journal_head's b_jcount is zero, but it is safe from being released 1824 * (Put original reference)
1826 * because it has a non-zero b_transaction. 1825 * journal_put_journal_head(jh);
1827 */ 1826 */
1828 1827
1829/* 1828/*
1830 * Give a buffer_head a journal_head. 1829 * Give a buffer_head a journal_head.
1831 * 1830 *
1832 * Doesn't need the journal lock.
1833 * May sleep. 1831 * May sleep.
1834 */ 1832 */
1835struct journal_head *journal_add_journal_head(struct buffer_head *bh) 1833struct journal_head *journal_add_journal_head(struct buffer_head *bh)
@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
1893 struct journal_head *jh = bh2jh(bh); 1891 struct journal_head *jh = bh2jh(bh);
1894 1892
1895 J_ASSERT_JH(jh, jh->b_jcount >= 0); 1893 J_ASSERT_JH(jh, jh->b_jcount >= 0);
1896 1894 J_ASSERT_JH(jh, jh->b_transaction == NULL);
1897 get_bh(bh); 1895 J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1898 if (jh->b_jcount == 0) { 1896 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
1899 if (jh->b_transaction == NULL && 1897 J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
1900 jh->b_next_transaction == NULL && 1898 J_ASSERT_BH(bh, buffer_jbd(bh));
1901 jh->b_cp_transaction == NULL) { 1899 J_ASSERT_BH(bh, jh2bh(jh) == bh);
1902 J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 1900 BUFFER_TRACE(bh, "remove journal_head");
1903 J_ASSERT_BH(bh, buffer_jbd(bh)); 1901 if (jh->b_frozen_data) {
1904 J_ASSERT_BH(bh, jh2bh(jh) == bh); 1902 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
1905 BUFFER_TRACE(bh, "remove journal_head"); 1903 jbd_free(jh->b_frozen_data, bh->b_size);
1906 if (jh->b_frozen_data) {
1907 printk(KERN_WARNING "%s: freeing "
1908 "b_frozen_data\n",
1909 __func__);
1910 jbd_free(jh->b_frozen_data, bh->b_size);
1911 }
1912 if (jh->b_committed_data) {
1913 printk(KERN_WARNING "%s: freeing "
1914 "b_committed_data\n",
1915 __func__);
1916 jbd_free(jh->b_committed_data, bh->b_size);
1917 }
1918 bh->b_private = NULL;
1919 jh->b_bh = NULL; /* debug, really */
1920 clear_buffer_jbd(bh);
1921 __brelse(bh);
1922 journal_free_journal_head(jh);
1923 } else {
1924 BUFFER_TRACE(bh, "journal_head was locked");
1925 }
1926 } 1904 }
1905 if (jh->b_committed_data) {
1906 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
1907 jbd_free(jh->b_committed_data, bh->b_size);
1908 }
1909 bh->b_private = NULL;
1910 jh->b_bh = NULL; /* debug, really */
1911 clear_buffer_jbd(bh);
1912 journal_free_journal_head(jh);
1927} 1913}
1928 1914
1929/* 1915/*
1930 * journal_remove_journal_head(): if the buffer isn't attached to a transaction 1916 * Drop a reference on the passed journal_head. If it fell to zero then
1931 * and has a zero b_jcount then remove and release its journal_head. If we did
1932 * see that the buffer is not used by any transaction we also "logically"
1933 * decrement ->b_count.
1934 *
1935 * We in fact take an additional increment on ->b_count as a convenience,
1936 * because the caller usually wants to do additional things with the bh
1937 * after calling here.
1938 * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
1939 * time. Once the caller has run __brelse(), the buffer is eligible for
1940 * reaping by try_to_free_buffers().
1941 */
1942void journal_remove_journal_head(struct buffer_head *bh)
1943{
1944 jbd_lock_bh_journal_head(bh);
1945 __journal_remove_journal_head(bh);
1946 jbd_unlock_bh_journal_head(bh);
1947}
1948
1949/*
1950 * Drop a reference on the passed journal_head. If it fell to zero then try to
1951 * release the journal_head from the buffer_head. 1917 * release the journal_head from the buffer_head.
1952 */ 1918 */
1953void journal_put_journal_head(struct journal_head *jh) 1919void journal_put_journal_head(struct journal_head *jh)
@@ -1957,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh)
1957 jbd_lock_bh_journal_head(bh); 1923 jbd_lock_bh_journal_head(bh);
1958 J_ASSERT_JH(jh, jh->b_jcount > 0); 1924 J_ASSERT_JH(jh, jh->b_jcount > 0);
1959 --jh->b_jcount; 1925 --jh->b_jcount;
1960 if (!jh->b_jcount && !jh->b_transaction) { 1926 if (!jh->b_jcount) {
1961 __journal_remove_journal_head(bh); 1927 __journal_remove_journal_head(bh);
1928 jbd_unlock_bh_journal_head(bh);
1962 __brelse(bh); 1929 __brelse(bh);
1963 } 1930 } else
1964 jbd_unlock_bh_journal_head(bh); 1931 jbd_unlock_bh_journal_head(bh);
1965} 1932}
1966 1933
1967/* 1934/*