aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/transaction.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2012-09-26 23:11:13 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-09-26 23:11:13 -0400
commitb794e7a6ebfbddb819b0e75ab59ada6b08a285f2 (patch)
tree4ae2983a324f3788b831190c30cf92117e76e3d8 /fs/jbd2/transaction.c
parent9b68733273665a4c0d98041a657dabfb4fd6bd80 (diff)
jbd2: fix assertion failure in commit code due to lacking transaction credits
ext4 users of data=journal mode with blocksize < pagesize were occasionally hitting assertion failure in jbd2_journal_commit_transaction() checking whether the transaction has at least as many credits reserved as buffers attached. The core of the problem is that when a file gets truncated, buffers that still need checkpointing or that are attached to the committing transaction are left with buffer_mapped set. When this happens to buffers beyond i_size attached to a page stradding i_size, subsequent write extending the file will see these buffers and as they are mapped (but underlying blocks were freed) things go awry from here. The assertion failure just coincidentally (and in this case luckily as we would start corrupting filesystem) triggers due to journal_head not being properly cleaned up as well. We fix the problem by unmapping buffers if possible (in lots of cases we just need a buffer attached to a transaction as a place holder but it must not be written out anyway). And in one case, we just have to bite the bullet and wait for transaction commit to finish. CC: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd2/transaction.c')
-rw-r--r--fs/jbd2/transaction.c65
1 files changed, 45 insertions, 20 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index fb1ab9533b67..a74ba4659549 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1841 * We're outside-transaction here. Either or both of j_running_transaction 1841 * We're outside-transaction here. Either or both of j_running_transaction
1842 * and j_committing_transaction may be NULL. 1842 * and j_committing_transaction may be NULL.
1843 */ 1843 */
1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1845 int partial_page)
1845{ 1846{
1846 transaction_t *transaction; 1847 transaction_t *transaction;
1847 struct journal_head *jh; 1848 struct journal_head *jh;
1848 int may_free = 1; 1849 int may_free = 1;
1849 int ret;
1850 1850
1851 BUFFER_TRACE(bh, "entry"); 1851 BUFFER_TRACE(bh, "entry");
1852 1852
1853retry:
1853 /* 1854 /*
1854 * It is safe to proceed here without the j_list_lock because the 1855 * It is safe to proceed here without the j_list_lock because the
1855 * buffers cannot be stolen by try_to_free_buffers as long as we are 1856 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1878 * clear the buffer dirty bit at latest at the moment when the 1879 * clear the buffer dirty bit at latest at the moment when the
1879 * transaction marking the buffer as freed in the filesystem 1880 * transaction marking the buffer as freed in the filesystem
1880 * structures is committed because from that moment on the 1881 * structures is committed because from that moment on the
1881 * buffer can be reallocated and used by a different page. 1882 * block can be reallocated and used by a different page.
1882 * Since the block hasn't been freed yet but the inode has 1883 * Since the block hasn't been freed yet but the inode has
1883 * already been added to orphan list, it is safe for us to add 1884 * already been added to orphan list, it is safe for us to add
1884 * the buffer to BJ_Forget list of the newest transaction. 1885 * the buffer to BJ_Forget list of the newest transaction.
1886 *
1887 * Also we have to clear buffer_mapped flag of a truncated buffer
1888 * because the buffer_head may be attached to the page straddling
1889 * i_size (can happen only when blocksize < pagesize) and thus the
1890 * buffer_head can be reused when the file is extended again. So we end
1891 * up keeping around invalidated buffers attached to transactions'
1892 * BJ_Forget list just to stop checkpointing code from cleaning up
1893 * the transaction this buffer was modified in.
1885 */ 1894 */
1886 transaction = jh->b_transaction; 1895 transaction = jh->b_transaction;
1887 if (transaction == NULL) { 1896 if (transaction == NULL) {
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1908 * committed, the buffer won't be needed any 1917 * committed, the buffer won't be needed any
1909 * longer. */ 1918 * longer. */
1910 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1919 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1911 ret = __dispose_buffer(jh, 1920 may_free = __dispose_buffer(jh,
1912 journal->j_running_transaction); 1921 journal->j_running_transaction);
1913 jbd2_journal_put_journal_head(jh); 1922 goto zap_buffer;
1914 spin_unlock(&journal->j_list_lock);
1915 jbd_unlock_bh_state(bh);
1916 write_unlock(&journal->j_state_lock);
1917 return ret;
1918 } else { 1923 } else {
1919 /* There is no currently-running transaction. So the 1924 /* There is no currently-running transaction. So the
1920 * orphan record which we wrote for this file must have 1925 * orphan record which we wrote for this file must have
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1922 * the committing transaction, if it exists. */ 1927 * the committing transaction, if it exists. */
1923 if (journal->j_committing_transaction) { 1928 if (journal->j_committing_transaction) {
1924 JBUFFER_TRACE(jh, "give to committing trans"); 1929 JBUFFER_TRACE(jh, "give to committing trans");
1925 ret = __dispose_buffer(jh, 1930 may_free = __dispose_buffer(jh,
1926 journal->j_committing_transaction); 1931 journal->j_committing_transaction);
1927 jbd2_journal_put_journal_head(jh); 1932 goto zap_buffer;
1928 spin_unlock(&journal->j_list_lock);
1929 jbd_unlock_bh_state(bh);
1930 write_unlock(&journal->j_state_lock);
1931 return ret;
1932 } else { 1933 } else {
1933 /* The orphan record's transaction has 1934 /* The orphan record's transaction has
1934 * committed. We can cleanse this buffer */ 1935 * committed. We can cleanse this buffer */
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1940 JBUFFER_TRACE(jh, "on committing transaction"); 1941 JBUFFER_TRACE(jh, "on committing transaction");
1941 /* 1942 /*
1942 * The buffer is committing, we simply cannot touch 1943 * The buffer is committing, we simply cannot touch
1943 * it. So we just set j_next_transaction to the 1944 * it. If the page is straddling i_size we have to wait
1944 * running transaction (if there is one) and mark 1945 * for commit and try again.
1945 * buffer as freed so that commit code knows it should 1946 */
1946 * clear dirty bits when it is done with the buffer. 1947 if (partial_page) {
1948 tid_t tid = journal->j_committing_transaction->t_tid;
1949
1950 jbd2_journal_put_journal_head(jh);
1951 spin_unlock(&journal->j_list_lock);
1952 jbd_unlock_bh_state(bh);
1953 write_unlock(&journal->j_state_lock);
1954 jbd2_log_wait_commit(journal, tid);
1955 goto retry;
1956 }
1957 /*
1958 * OK, buffer won't be reachable after truncate. We just set
1959 * j_next_transaction to the running transaction (if there is
1960 * one) and mark buffer as freed so that commit code knows it
1961 * should clear dirty bits when it is done with the buffer.
1947 */ 1962 */
1948 set_buffer_freed(bh); 1963 set_buffer_freed(bh);
1949 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1964 if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1966 } 1981 }
1967 1982
1968zap_buffer: 1983zap_buffer:
1984 /*
1985 * This is tricky. Although the buffer is truncated, it may be reused
1986 * if blocksize < pagesize and it is attached to the page straddling
1987 * EOF. Since the buffer might have been added to BJ_Forget list of the
1988 * running transaction, journal_get_write_access() won't clear
1989 * b_modified and credit accounting gets confused. So clear b_modified
1990 * here.
1991 */
1992 jh->b_modified = 0;
1969 jbd2_journal_put_journal_head(jh); 1993 jbd2_journal_put_journal_head(jh);
1970zap_buffer_no_jh: 1994zap_buffer_no_jh:
1971 spin_unlock(&journal->j_list_lock); 1995 spin_unlock(&journal->j_list_lock);
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2017 if (offset <= curr_off) { 2041 if (offset <= curr_off) {
2018 /* This block is wholly outside the truncation point */ 2042 /* This block is wholly outside the truncation point */
2019 lock_buffer(bh); 2043 lock_buffer(bh);
2020 may_free &= journal_unmap_buffer(journal, bh); 2044 may_free &= journal_unmap_buffer(journal, bh,
2045 offset > 0);
2021 unlock_buffer(bh); 2046 unlock_buffer(bh);
2022 } 2047 }
2023 curr_off = next_off; 2048 curr_off = next_off;