aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2012-07-11 17:16:25 -0400
committerJan Kara <jack@suse.cz>2012-09-12 09:52:03 -0400
commit09e05d4805e6c524c1af74e524e5d0528bb3fef3 (patch)
tree8522776e5ad521026885806f5fb8276be0c7dc1b /fs
parent5eec54fcde7e065eb3d8a6e70e61d90673ca706b (diff)
jbd: Fix assertion failure in commit code due to lacking transaction credits
ext3 users of data=journal mode with blocksize < pagesize were occasionally hitting assertion failure in journal_commit_transaction() checking whether the transaction has at least as many credits reserved as buffers attached. The core of the problem is that when a file gets truncated, buffers that still need checkpointing or that are attached to the committing transaction are left with buffer_mapped set. When this happens to buffers beyond i_size attached to a page stradding i_size, subsequent write extending the file will see these buffers and as they are mapped (but underlying blocks were freed) things go awry from here. The assertion failure just coincidentally (and in this case luckily as we would start corrupting filesystem) triggers due to journal_head not being properly cleaned up as well. Under some rare circumstances this bug could even hit data=ordered mode users. There the assertion won't trigger and we would end up corrupting the filesystem. We fix the problem by unmapping buffers if possible (in lots of cases we just need a buffer attached to a transaction as a place holder but it must not be written out anyway). And in one case, we just have to bite the bullet and wait for transaction commit to finish. Reviewed-by: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r--fs/jbd/commit.c45
-rw-r--r--fs/jbd/transaction.c64
2 files changed, 78 insertions, 31 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 52c15c776029..86b39b167c23 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -86,7 +86,12 @@ nope:
86static void release_data_buffer(struct buffer_head *bh) 86static void release_data_buffer(struct buffer_head *bh)
87{ 87{
88 if (buffer_freed(bh)) { 88 if (buffer_freed(bh)) {
89 WARN_ON_ONCE(buffer_dirty(bh));
89 clear_buffer_freed(bh); 90 clear_buffer_freed(bh);
91 clear_buffer_mapped(bh);
92 clear_buffer_new(bh);
93 clear_buffer_req(bh);
94 bh->b_bdev = NULL;
90 release_buffer_page(bh); 95 release_buffer_page(bh);
91 } else 96 } else
92 put_bh(bh); 97 put_bh(bh);
@@ -866,17 +871,35 @@ restart_loop:
866 * there's no point in keeping a checkpoint record for 871 * there's no point in keeping a checkpoint record for
867 * it. */ 872 * it. */
868 873
869 /* A buffer which has been freed while still being 874 /*
870 * journaled by a previous transaction may end up still 875 * A buffer which has been freed while still being journaled by
871 * being dirty here, but we want to avoid writing back 876 * a previous transaction.
872 * that buffer in the future after the "add to orphan" 877 */
873 * operation been committed, That's not only a performance 878 if (buffer_freed(bh)) {
874 * gain, it also stops aliasing problems if the buffer is 879 /*
875 * left behind for writeback and gets reallocated for another 880 * If the running transaction is the one containing
876 * use in a different page. */ 881 * "add to orphan" operation (b_next_transaction !=
877 if (buffer_freed(bh) && !jh->b_next_transaction) { 882 * NULL), we have to wait for that transaction to
878 clear_buffer_freed(bh); 883 * commit before we can really get rid of the buffer.
879 clear_buffer_jbddirty(bh); 884 * So just clear b_modified to not confuse transaction
885 * credit accounting and refile the buffer to
886 * BJ_Forget of the running transaction. If the just
887 * committed transaction contains "add to orphan"
888 * operation, we can completely invalidate the buffer
889 * now. We are rather throughout in that since the
890 * buffer may be still accessible when blocksize <
891 * pagesize and it is attached to the last partial
892 * page.
893 */
894 jh->b_modified = 0;
895 if (!jh->b_next_transaction) {
896 clear_buffer_freed(bh);
897 clear_buffer_jbddirty(bh);
898 clear_buffer_mapped(bh);
899 clear_buffer_new(bh);
900 clear_buffer_req(bh);
901 bh->b_bdev = NULL;
902 }
880 } 903 }
881 904
882 if (buffer_jbddirty(bh)) { 905 if (buffer_jbddirty(bh)) {
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index febc10db5ced..78b7f84241d4 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1843,15 +1843,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1843 * We're outside-transaction here. Either or both of j_running_transaction 1843 * We're outside-transaction here. Either or both of j_running_transaction
1844 * and j_committing_transaction may be NULL. 1844 * and j_committing_transaction may be NULL.
1845 */ 1845 */
1846static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1846static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1847 int partial_page)
1847{ 1848{
1848 transaction_t *transaction; 1849 transaction_t *transaction;
1849 struct journal_head *jh; 1850 struct journal_head *jh;
1850 int may_free = 1; 1851 int may_free = 1;
1851 int ret;
1852 1852
1853 BUFFER_TRACE(bh, "entry"); 1853 BUFFER_TRACE(bh, "entry");
1854 1854
1855retry:
1855 /* 1856 /*
1856 * It is safe to proceed here without the j_list_lock because the 1857 * It is safe to proceed here without the j_list_lock because the
1857 * buffers cannot be stolen by try_to_free_buffers as long as we are 1858 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1879,10 +1880,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1879 * clear the buffer dirty bit at latest at the moment when the 1880 * clear the buffer dirty bit at latest at the moment when the
1880 * transaction marking the buffer as freed in the filesystem 1881 * transaction marking the buffer as freed in the filesystem
1881 * structures is committed because from that moment on the 1882 * structures is committed because from that moment on the
1882 * buffer can be reallocated and used by a different page. 1883 * block can be reallocated and used by a different page.
1883 * Since the block hasn't been freed yet but the inode has 1884 * Since the block hasn't been freed yet but the inode has
1884 * already been added to orphan list, it is safe for us to add 1885 * already been added to orphan list, it is safe for us to add
1885 * the buffer to BJ_Forget list of the newest transaction. 1886 * the buffer to BJ_Forget list of the newest transaction.
1887 *
1888 * Also we have to clear buffer_mapped flag of a truncated buffer
1889 * because the buffer_head may be attached to the page straddling
1890 * i_size (can happen only when blocksize < pagesize) and thus the
1891 * buffer_head can be reused when the file is extended again. So we end
1892 * up keeping around invalidated buffers attached to transactions'
1893 * BJ_Forget list just to stop checkpointing code from cleaning up
1894 * the transaction this buffer was modified in.
1886 */ 1895 */
1887 transaction = jh->b_transaction; 1896 transaction = jh->b_transaction;
1888 if (transaction == NULL) { 1897 if (transaction == NULL) {
@@ -1909,13 +1918,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1909 * committed, the buffer won't be needed any 1918 * committed, the buffer won't be needed any
1910 * longer. */ 1919 * longer. */
1911 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1920 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1912 ret = __dispose_buffer(jh, 1921 may_free = __dispose_buffer(jh,
1913 journal->j_running_transaction); 1922 journal->j_running_transaction);
1914 journal_put_journal_head(jh); 1923 goto zap_buffer;
1915 spin_unlock(&journal->j_list_lock);
1916 jbd_unlock_bh_state(bh);
1917 spin_unlock(&journal->j_state_lock);
1918 return ret;
1919 } else { 1924 } else {
1920 /* There is no currently-running transaction. So the 1925 /* There is no currently-running transaction. So the
1921 * orphan record which we wrote for this file must have 1926 * orphan record which we wrote for this file must have
@@ -1923,13 +1928,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1923 * the committing transaction, if it exists. */ 1928 * the committing transaction, if it exists. */
1924 if (journal->j_committing_transaction) { 1929 if (journal->j_committing_transaction) {
1925 JBUFFER_TRACE(jh, "give to committing trans"); 1930 JBUFFER_TRACE(jh, "give to committing trans");
1926 ret = __dispose_buffer(jh, 1931 may_free = __dispose_buffer(jh,
1927 journal->j_committing_transaction); 1932 journal->j_committing_transaction);
1928 journal_put_journal_head(jh); 1933 goto zap_buffer;
1929 spin_unlock(&journal->j_list_lock);
1930 jbd_unlock_bh_state(bh);
1931 spin_unlock(&journal->j_state_lock);
1932 return ret;
1933 } else { 1934 } else {
1934 /* The orphan record's transaction has 1935 /* The orphan record's transaction has
1935 * committed. We can cleanse this buffer */ 1936 * committed. We can cleanse this buffer */
@@ -1950,10 +1951,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1950 } 1951 }
1951 /* 1952 /*
1952 * The buffer is committing, we simply cannot touch 1953 * The buffer is committing, we simply cannot touch
1953 * it. So we just set j_next_transaction to the 1954 * it. If the page is straddling i_size we have to wait
1954 * running transaction (if there is one) and mark 1955 * for commit and try again.
1955 * buffer as freed so that commit code knows it should 1956 */
1956 * clear dirty bits when it is done with the buffer. 1957 if (partial_page) {
1958 tid_t tid = journal->j_committing_transaction->t_tid;
1959
1960 journal_put_journal_head(jh);
1961 spin_unlock(&journal->j_list_lock);
1962 jbd_unlock_bh_state(bh);
1963 spin_unlock(&journal->j_state_lock);
1964 log_wait_commit(journal, tid);
1965 goto retry;
1966 }
1967 /*
1968 * OK, buffer won't be reachable after truncate. We just set
1969 * j_next_transaction to the running transaction (if there is
1970 * one) and mark buffer as freed so that commit code knows it
1971 * should clear dirty bits when it is done with the buffer.
1957 */ 1972 */
1958 set_buffer_freed(bh); 1973 set_buffer_freed(bh);
1959 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1974 if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1976,6 +1991,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1976 } 1991 }
1977 1992
1978zap_buffer: 1993zap_buffer:
1994 /*
1995 * This is tricky. Although the buffer is truncated, it may be reused
1996 * if blocksize < pagesize and it is attached to the page straddling
1997 * EOF. Since the buffer might have been added to BJ_Forget list of the
1998 * running transaction, journal_get_write_access() won't clear
1999 * b_modified and credit accounting gets confused. So clear b_modified
2000 * here. */
2001 jh->b_modified = 0;
1979 journal_put_journal_head(jh); 2002 journal_put_journal_head(jh);
1980zap_buffer_no_jh: 2003zap_buffer_no_jh:
1981 spin_unlock(&journal->j_list_lock); 2004 spin_unlock(&journal->j_list_lock);
@@ -2024,7 +2047,8 @@ void journal_invalidatepage(journal_t *journal,
2024 if (offset <= curr_off) { 2047 if (offset <= curr_off) {
2025 /* This block is wholly outside the truncation point */ 2048 /* This block is wholly outside the truncation point */
2026 lock_buffer(bh); 2049 lock_buffer(bh);
2027 may_free &= journal_unmap_buffer(journal, bh); 2050 may_free &= journal_unmap_buffer(journal, bh,
2051 offset > 0);
2028 unlock_buffer(bh); 2052 unlock_buffer(bh);
2029 } 2053 }
2030 curr_off = next_off; 2054 curr_off = next_off;