diff options
author | Jan Kara <jack@suse.cz> | 2012-09-26 23:11:13 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-09-26 23:11:13 -0400 |
commit | b794e7a6ebfbddb819b0e75ab59ada6b08a285f2 (patch) | |
tree | 4ae2983a324f3788b831190c30cf92117e76e3d8 /fs/jbd2/transaction.c | |
parent | 9b68733273665a4c0d98041a657dabfb4fd6bd80 (diff) |
jbd2: fix assertion failure in commit code due to lacking transaction credits
ext4 users of data=journal mode with blocksize < pagesize were
occasionally hitting assertion failure in
jbd2_journal_commit_transaction() checking whether the transaction has
at least as many credits reserved as buffers attached. The core of the
problem is that when a file gets truncated, buffers that still need
checkpointing or that are attached to the committing transaction are
left with buffer_mapped set. When this happens to buffers beyond i_size
attached to a page stradding i_size, subsequent write extending the file
will see these buffers and as they are mapped (but underlying blocks
were freed) things go awry from here.
The assertion failure just coincidentally (and in this case luckily as
we would start corrupting filesystem) triggers due to journal_head not
being properly cleaned up as well.
We fix the problem by unmapping buffers if possible (in lots of cases we
just need a buffer attached to a transaction as a place holder but it
must not be written out anyway). And in one case, we just have to bite
the bullet and wait for transaction commit to finish.
CC: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r-- | fs/jbd2/transaction.c | 65 |
1 files changed, 45 insertions, 20 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index fb1ab9533b67..a74ba4659549 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
1841 | * We're outside-transaction here. Either or both of j_running_transaction | 1841 | * We're outside-transaction here. Either or both of j_running_transaction |
1842 | * and j_committing_transaction may be NULL. | 1842 | * and j_committing_transaction may be NULL. |
1843 | */ | 1843 | */ |
1844 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | 1844 | static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, |
1845 | int partial_page) | ||
1845 | { | 1846 | { |
1846 | transaction_t *transaction; | 1847 | transaction_t *transaction; |
1847 | struct journal_head *jh; | 1848 | struct journal_head *jh; |
1848 | int may_free = 1; | 1849 | int may_free = 1; |
1849 | int ret; | ||
1850 | 1850 | ||
1851 | BUFFER_TRACE(bh, "entry"); | 1851 | BUFFER_TRACE(bh, "entry"); |
1852 | 1852 | ||
1853 | retry: | ||
1853 | /* | 1854 | /* |
1854 | * It is safe to proceed here without the j_list_lock because the | 1855 | * It is safe to proceed here without the j_list_lock because the |
1855 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1856 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1878 | * clear the buffer dirty bit at latest at the moment when the | 1879 | * clear the buffer dirty bit at latest at the moment when the |
1879 | * transaction marking the buffer as freed in the filesystem | 1880 | * transaction marking the buffer as freed in the filesystem |
1880 | * structures is committed because from that moment on the | 1881 | * structures is committed because from that moment on the |
1881 | * buffer can be reallocated and used by a different page. | 1882 | * block can be reallocated and used by a different page. |
1882 | * Since the block hasn't been freed yet but the inode has | 1883 | * Since the block hasn't been freed yet but the inode has |
1883 | * already been added to orphan list, it is safe for us to add | 1884 | * already been added to orphan list, it is safe for us to add |
1884 | * the buffer to BJ_Forget list of the newest transaction. | 1885 | * the buffer to BJ_Forget list of the newest transaction. |
1886 | * | ||
1887 | * Also we have to clear buffer_mapped flag of a truncated buffer | ||
1888 | * because the buffer_head may be attached to the page straddling | ||
1889 | * i_size (can happen only when blocksize < pagesize) and thus the | ||
1890 | * buffer_head can be reused when the file is extended again. So we end | ||
1891 | * up keeping around invalidated buffers attached to transactions' | ||
1892 | * BJ_Forget list just to stop checkpointing code from cleaning up | ||
1893 | * the transaction this buffer was modified in. | ||
1885 | */ | 1894 | */ |
1886 | transaction = jh->b_transaction; | 1895 | transaction = jh->b_transaction; |
1887 | if (transaction == NULL) { | 1896 | if (transaction == NULL) { |
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1908 | * committed, the buffer won't be needed any | 1917 | * committed, the buffer won't be needed any |
1909 | * longer. */ | 1918 | * longer. */ |
1910 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); | 1919 | JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); |
1911 | ret = __dispose_buffer(jh, | 1920 | may_free = __dispose_buffer(jh, |
1912 | journal->j_running_transaction); | 1921 | journal->j_running_transaction); |
1913 | jbd2_journal_put_journal_head(jh); | 1922 | goto zap_buffer; |
1914 | spin_unlock(&journal->j_list_lock); | ||
1915 | jbd_unlock_bh_state(bh); | ||
1916 | write_unlock(&journal->j_state_lock); | ||
1917 | return ret; | ||
1918 | } else { | 1923 | } else { |
1919 | /* There is no currently-running transaction. So the | 1924 | /* There is no currently-running transaction. So the |
1920 | * orphan record which we wrote for this file must have | 1925 | * orphan record which we wrote for this file must have |
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1922 | * the committing transaction, if it exists. */ | 1927 | * the committing transaction, if it exists. */ |
1923 | if (journal->j_committing_transaction) { | 1928 | if (journal->j_committing_transaction) { |
1924 | JBUFFER_TRACE(jh, "give to committing trans"); | 1929 | JBUFFER_TRACE(jh, "give to committing trans"); |
1925 | ret = __dispose_buffer(jh, | 1930 | may_free = __dispose_buffer(jh, |
1926 | journal->j_committing_transaction); | 1931 | journal->j_committing_transaction); |
1927 | jbd2_journal_put_journal_head(jh); | 1932 | goto zap_buffer; |
1928 | spin_unlock(&journal->j_list_lock); | ||
1929 | jbd_unlock_bh_state(bh); | ||
1930 | write_unlock(&journal->j_state_lock); | ||
1931 | return ret; | ||
1932 | } else { | 1933 | } else { |
1933 | /* The orphan record's transaction has | 1934 | /* The orphan record's transaction has |
1934 | * committed. We can cleanse this buffer */ | 1935 | * committed. We can cleanse this buffer */ |
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1940 | JBUFFER_TRACE(jh, "on committing transaction"); | 1941 | JBUFFER_TRACE(jh, "on committing transaction"); |
1941 | /* | 1942 | /* |
1942 | * The buffer is committing, we simply cannot touch | 1943 | * The buffer is committing, we simply cannot touch |
1943 | * it. So we just set j_next_transaction to the | 1944 | * it. If the page is straddling i_size we have to wait |
1944 | * running transaction (if there is one) and mark | 1945 | * for commit and try again. |
1945 | * buffer as freed so that commit code knows it should | 1946 | */ |
1946 | * clear dirty bits when it is done with the buffer. | 1947 | if (partial_page) { |
1948 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
1949 | |||
1950 | jbd2_journal_put_journal_head(jh); | ||
1951 | spin_unlock(&journal->j_list_lock); | ||
1952 | jbd_unlock_bh_state(bh); | ||
1953 | write_unlock(&journal->j_state_lock); | ||
1954 | jbd2_log_wait_commit(journal, tid); | ||
1955 | goto retry; | ||
1956 | } | ||
1957 | /* | ||
1958 | * OK, buffer won't be reachable after truncate. We just set | ||
1959 | * j_next_transaction to the running transaction (if there is | ||
1960 | * one) and mark buffer as freed so that commit code knows it | ||
1961 | * should clear dirty bits when it is done with the buffer. | ||
1947 | */ | 1962 | */ |
1948 | set_buffer_freed(bh); | 1963 | set_buffer_freed(bh); |
1949 | if (journal->j_running_transaction && buffer_jbddirty(bh)) | 1964 | if (journal->j_running_transaction && buffer_jbddirty(bh)) |
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
1966 | } | 1981 | } |
1967 | 1982 | ||
1968 | zap_buffer: | 1983 | zap_buffer: |
1984 | /* | ||
1985 | * This is tricky. Although the buffer is truncated, it may be reused | ||
1986 | * if blocksize < pagesize and it is attached to the page straddling | ||
1987 | * EOF. Since the buffer might have been added to BJ_Forget list of the | ||
1988 | * running transaction, journal_get_write_access() won't clear | ||
1989 | * b_modified and credit accounting gets confused. So clear b_modified | ||
1990 | * here. | ||
1991 | */ | ||
1992 | jh->b_modified = 0; | ||
1969 | jbd2_journal_put_journal_head(jh); | 1993 | jbd2_journal_put_journal_head(jh); |
1970 | zap_buffer_no_jh: | 1994 | zap_buffer_no_jh: |
1971 | spin_unlock(&journal->j_list_lock); | 1995 | spin_unlock(&journal->j_list_lock); |
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2017 | if (offset <= curr_off) { | 2041 | if (offset <= curr_off) { |
2018 | /* This block is wholly outside the truncation point */ | 2042 | /* This block is wholly outside the truncation point */ |
2019 | lock_buffer(bh); | 2043 | lock_buffer(bh); |
2020 | may_free &= journal_unmap_buffer(journal, bh); | 2044 | may_free &= journal_unmap_buffer(journal, bh, |
2045 | offset > 0); | ||
2021 | unlock_buffer(bh); | 2046 | unlock_buffer(bh); |
2022 | } | 2047 | } |
2023 | curr_off = next_off; | 2048 | curr_off = next_off; |