aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2012-09-26 23:11:13 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-09-26 23:11:13 -0400
commitb794e7a6ebfbddb819b0e75ab59ada6b08a285f2 (patch)
tree4ae2983a324f3788b831190c30cf92117e76e3d8
parent9b68733273665a4c0d98041a657dabfb4fd6bd80 (diff)
jbd2: fix assertion failure in commit code due to lacking transaction credits
ext4 users of data=journal mode with blocksize < pagesize were occasionally hitting assertion failure in jbd2_journal_commit_transaction() checking whether the transaction has at least as many credits reserved as buffers attached. The core of the problem is that when a file gets truncated, buffers that still need checkpointing or that are attached to the committing transaction are left with buffer_mapped set. When this happens to buffers beyond i_size attached to a page stradding i_size, subsequent write extending the file will see these buffers and as they are mapped (but underlying blocks were freed) things go awry from here. The assertion failure just coincidentally (and in this case luckily as we would start corrupting filesystem) triggers due to journal_head not being properly cleaned up as well. We fix the problem by unmapping buffers if possible (in lots of cases we just need a buffer attached to a transaction as a place holder but it must not be written out anyway). And in one case, we just have to bite the bullet and wait for transaction commit to finish. CC: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r--fs/jbd2/commit.c40
-rw-r--r--fs/jbd2/transaction.c65
2 files changed, 74 insertions, 31 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index af5280fb579b..3091d42992f0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1014,17 +1014,35 @@ restart_loop:
1014 * there's no point in keeping a checkpoint record for 1014 * there's no point in keeping a checkpoint record for
1015 * it. */ 1015 * it. */
1016 1016
1017 /* A buffer which has been freed while still being 1017 /*
1018 * journaled by a previous transaction may end up still 1018 * A buffer which has been freed while still being journaled by
1019 * being dirty here, but we want to avoid writing back 1019 * a previous transaction.
1020 * that buffer in the future after the "add to orphan" 1020 */
1021 * operation been committed, That's not only a performance 1021 if (buffer_freed(bh)) {
1022 * gain, it also stops aliasing problems if the buffer is 1022 /*
1023 * left behind for writeback and gets reallocated for another 1023 * If the running transaction is the one containing
1024 * use in a different page. */ 1024 * "add to orphan" operation (b_next_transaction !=
1025 if (buffer_freed(bh) && !jh->b_next_transaction) { 1025 * NULL), we have to wait for that transaction to
1026 clear_buffer_freed(bh); 1026 * commit before we can really get rid of the buffer.
1027 clear_buffer_jbddirty(bh); 1027 * So just clear b_modified to not confuse transaction
1028 * credit accounting and refile the buffer to
1029 * BJ_Forget of the running transaction. If the just
1030 * committed transaction contains "add to orphan"
1031 * operation, we can completely invalidate the buffer
1032 * now. We are rather through in that since the
1033 * buffer may be still accessible when blocksize <
1034 * pagesize and it is attached to the last partial
1035 * page.
1036 */
1037 jh->b_modified = 0;
1038 if (!jh->b_next_transaction) {
1039 clear_buffer_freed(bh);
1040 clear_buffer_jbddirty(bh);
1041 clear_buffer_mapped(bh);
1042 clear_buffer_new(bh);
1043 clear_buffer_req(bh);
1044 bh->b_bdev = NULL;
1045 }
1028 } 1046 }
1029 1047
1030 if (buffer_jbddirty(bh)) { 1048 if (buffer_jbddirty(bh)) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index fb1ab9533b67..a74ba4659549 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1841 * We're outside-transaction here. Either or both of j_running_transaction 1841 * We're outside-transaction here. Either or both of j_running_transaction
1842 * and j_committing_transaction may be NULL. 1842 * and j_committing_transaction may be NULL.
1843 */ 1843 */
1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1845 int partial_page)
1845{ 1846{
1846 transaction_t *transaction; 1847 transaction_t *transaction;
1847 struct journal_head *jh; 1848 struct journal_head *jh;
1848 int may_free = 1; 1849 int may_free = 1;
1849 int ret;
1850 1850
1851 BUFFER_TRACE(bh, "entry"); 1851 BUFFER_TRACE(bh, "entry");
1852 1852
1853retry:
1853 /* 1854 /*
1854 * It is safe to proceed here without the j_list_lock because the 1855 * It is safe to proceed here without the j_list_lock because the
1855 * buffers cannot be stolen by try_to_free_buffers as long as we are 1856 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1878 * clear the buffer dirty bit at latest at the moment when the 1879 * clear the buffer dirty bit at latest at the moment when the
1879 * transaction marking the buffer as freed in the filesystem 1880 * transaction marking the buffer as freed in the filesystem
1880 * structures is committed because from that moment on the 1881 * structures is committed because from that moment on the
1881 * buffer can be reallocated and used by a different page. 1882 * block can be reallocated and used by a different page.
1882 * Since the block hasn't been freed yet but the inode has 1883 * Since the block hasn't been freed yet but the inode has
1883 * already been added to orphan list, it is safe for us to add 1884 * already been added to orphan list, it is safe for us to add
1884 * the buffer to BJ_Forget list of the newest transaction. 1885 * the buffer to BJ_Forget list of the newest transaction.
1886 *
1887 * Also we have to clear buffer_mapped flag of a truncated buffer
1888 * because the buffer_head may be attached to the page straddling
1889 * i_size (can happen only when blocksize < pagesize) and thus the
1890 * buffer_head can be reused when the file is extended again. So we end
1891 * up keeping around invalidated buffers attached to transactions'
1892 * BJ_Forget list just to stop checkpointing code from cleaning up
1893 * the transaction this buffer was modified in.
1885 */ 1894 */
1886 transaction = jh->b_transaction; 1895 transaction = jh->b_transaction;
1887 if (transaction == NULL) { 1896 if (transaction == NULL) {
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1908 * committed, the buffer won't be needed any 1917 * committed, the buffer won't be needed any
1909 * longer. */ 1918 * longer. */
1910 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1919 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1911 ret = __dispose_buffer(jh, 1920 may_free = __dispose_buffer(jh,
1912 journal->j_running_transaction); 1921 journal->j_running_transaction);
1913 jbd2_journal_put_journal_head(jh); 1922 goto zap_buffer;
1914 spin_unlock(&journal->j_list_lock);
1915 jbd_unlock_bh_state(bh);
1916 write_unlock(&journal->j_state_lock);
1917 return ret;
1918 } else { 1923 } else {
1919 /* There is no currently-running transaction. So the 1924 /* There is no currently-running transaction. So the
1920 * orphan record which we wrote for this file must have 1925 * orphan record which we wrote for this file must have
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1922 * the committing transaction, if it exists. */ 1927 * the committing transaction, if it exists. */
1923 if (journal->j_committing_transaction) { 1928 if (journal->j_committing_transaction) {
1924 JBUFFER_TRACE(jh, "give to committing trans"); 1929 JBUFFER_TRACE(jh, "give to committing trans");
1925 ret = __dispose_buffer(jh, 1930 may_free = __dispose_buffer(jh,
1926 journal->j_committing_transaction); 1931 journal->j_committing_transaction);
1927 jbd2_journal_put_journal_head(jh); 1932 goto zap_buffer;
1928 spin_unlock(&journal->j_list_lock);
1929 jbd_unlock_bh_state(bh);
1930 write_unlock(&journal->j_state_lock);
1931 return ret;
1932 } else { 1933 } else {
1933 /* The orphan record's transaction has 1934 /* The orphan record's transaction has
1934 * committed. We can cleanse this buffer */ 1935 * committed. We can cleanse this buffer */
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1940 JBUFFER_TRACE(jh, "on committing transaction"); 1941 JBUFFER_TRACE(jh, "on committing transaction");
1941 /* 1942 /*
1942 * The buffer is committing, we simply cannot touch 1943 * The buffer is committing, we simply cannot touch
1943 * it. So we just set j_next_transaction to the 1944 * it. If the page is straddling i_size we have to wait
1944 * running transaction (if there is one) and mark 1945 * for commit and try again.
1945 * buffer as freed so that commit code knows it should 1946 */
1946 * clear dirty bits when it is done with the buffer. 1947 if (partial_page) {
1948 tid_t tid = journal->j_committing_transaction->t_tid;
1949
1950 jbd2_journal_put_journal_head(jh);
1951 spin_unlock(&journal->j_list_lock);
1952 jbd_unlock_bh_state(bh);
1953 write_unlock(&journal->j_state_lock);
1954 jbd2_log_wait_commit(journal, tid);
1955 goto retry;
1956 }
1957 /*
1958 * OK, buffer won't be reachable after truncate. We just set
1959 * j_next_transaction to the running transaction (if there is
1960 * one) and mark buffer as freed so that commit code knows it
1961 * should clear dirty bits when it is done with the buffer.
1947 */ 1962 */
1948 set_buffer_freed(bh); 1963 set_buffer_freed(bh);
1949 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1964 if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1966 } 1981 }
1967 1982
1968zap_buffer: 1983zap_buffer:
1984 /*
1985 * This is tricky. Although the buffer is truncated, it may be reused
1986 * if blocksize < pagesize and it is attached to the page straddling
1987 * EOF. Since the buffer might have been added to BJ_Forget list of the
1988 * running transaction, journal_get_write_access() won't clear
1989 * b_modified and credit accounting gets confused. So clear b_modified
1990 * here.
1991 */
1992 jh->b_modified = 0;
1969 jbd2_journal_put_journal_head(jh); 1993 jbd2_journal_put_journal_head(jh);
1970zap_buffer_no_jh: 1994zap_buffer_no_jh:
1971 spin_unlock(&journal->j_list_lock); 1995 spin_unlock(&journal->j_list_lock);
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2017 if (offset <= curr_off) { 2041 if (offset <= curr_off) {
2018 /* This block is wholly outside the truncation point */ 2042 /* This block is wholly outside the truncation point */
2019 lock_buffer(bh); 2043 lock_buffer(bh);
2020 may_free &= journal_unmap_buffer(journal, bh); 2044 may_free &= journal_unmap_buffer(journal, bh,
2045 offset > 0);
2021 unlock_buffer(bh); 2046 unlock_buffer(bh);
2022 } 2047 }
2023 curr_off = next_off; 2048 curr_off = next_off;