aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2012-12-25 13:29:52 -0500
committerTheodore Ts'o <tytso@mit.edu>2012-12-25 13:29:52 -0500
commit53e872681fed6a43047e71bf927f77d06f467988 (patch)
tree8b5061acbaf222b3f25df54ddbcaa0b1123c471a
parent4520fb3c3690f2643006d85f09ecb74554c10e95 (diff)
ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer() because we hold page lock which ranks below transaction start. We solve the issue by bailing out of journal_unmap_buffer() and jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible for waiting for transaction commit to finish and try invalidation again. Since the issue can happen only for page stradding i_size, it is simple enough to manually call jbd2_journal_invalidatepage() for such page from ext4_setattr(), check the return value and wait if necessary. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/inode.c82
-rw-r--r--fs/jbd2/transaction.c27
-rw-r--r--include/linux/jbd2.h2
3 files changed, 86 insertions, 25 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 12d3fbcff59f..cbfe13bf5b2a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
2894 block_invalidatepage(page, offset); 2894 block_invalidatepage(page, offset);
2895} 2895}
2896 2896
2897static void ext4_journalled_invalidatepage(struct page *page, 2897static int __ext4_journalled_invalidatepage(struct page *page,
2898 unsigned long offset) 2898 unsigned long offset)
2899{ 2899{
2900 journal_t *journal = EXT4_JOURNAL(page->mapping->host); 2900 journal_t *journal = EXT4_JOURNAL(page->mapping->host);
2901 2901
@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page,
2907 if (offset == 0) 2907 if (offset == 0)
2908 ClearPageChecked(page); 2908 ClearPageChecked(page);
2909 2909
2910 jbd2_journal_invalidatepage(journal, page, offset); 2910 return jbd2_journal_invalidatepage(journal, page, offset);
2911}
2912
2913/* Wrapper for aops... */
2914static void ext4_journalled_invalidatepage(struct page *page,
2915 unsigned long offset)
2916{
2917 WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0);
2911} 2918}
2912 2919
2913static int ext4_releasepage(struct page *page, gfp_t wait) 2920static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -4314,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4314} 4321}
4315 4322
4316/* 4323/*
4324 * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
4325 * buffers that are attached to a page stradding i_size and are undergoing
4326 * commit. In that case we have to wait for commit to finish and try again.
4327 */
4328static void ext4_wait_for_tail_page_commit(struct inode *inode)
4329{
4330 struct page *page;
4331 unsigned offset;
4332 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
4333 tid_t commit_tid = 0;
4334 int ret;
4335
4336 offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
4337 /*
4338 * All buffers in the last page remain valid? Then there's nothing to
4339 * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
4340 * blocksize case
4341 */
4342 if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
4343 return;
4344 while (1) {
4345 page = find_lock_page(inode->i_mapping,
4346 inode->i_size >> PAGE_CACHE_SHIFT);
4347 if (!page)
4348 return;
4349 ret = __ext4_journalled_invalidatepage(page, offset);
4350 unlock_page(page);
4351 page_cache_release(page);
4352 if (ret != -EBUSY)
4353 return;
4354 commit_tid = 0;
4355 read_lock(&journal->j_state_lock);
4356 if (journal->j_committing_transaction)
4357 commit_tid = journal->j_committing_transaction->t_tid;
4358 read_unlock(&journal->j_state_lock);
4359 if (commit_tid)
4360 jbd2_log_wait_commit(journal, commit_tid);
4361 }
4362}
4363
4364/*
4317 * ext4_setattr() 4365 * ext4_setattr()
4318 * 4366 *
4319 * Called from notify_change. 4367 * Called from notify_change.
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4426 } 4474 }
4427 4475
4428 if (attr->ia_valid & ATTR_SIZE) { 4476 if (attr->ia_valid & ATTR_SIZE) {
4429 if (attr->ia_size != i_size_read(inode)) { 4477 if (attr->ia_size != inode->i_size) {
4430 truncate_setsize(inode, attr->ia_size); 4478 loff_t oldsize = inode->i_size;
4431 /* Inode size will be reduced, wait for dio in flight. 4479
4432 * Temporarily disable dioread_nolock to prevent 4480 i_size_write(inode, attr->ia_size);
4433 * livelock. */ 4481 /*
4482 * Blocks are going to be removed from the inode. Wait
4483 * for dio in flight. Temporarily disable
4484 * dioread_nolock to prevent livelock.
4485 */
4434 if (orphan) { 4486 if (orphan) {
4435 ext4_inode_block_unlocked_dio(inode); 4487 if (!ext4_should_journal_data(inode)) {
4436 inode_dio_wait(inode); 4488 ext4_inode_block_unlocked_dio(inode);
4437 ext4_inode_resume_unlocked_dio(inode); 4489 inode_dio_wait(inode);
4490 ext4_inode_resume_unlocked_dio(inode);
4491 } else
4492 ext4_wait_for_tail_page_commit(inode);
4438 } 4493 }
4494 /*
4495 * Truncate pagecache after we've waited for commit
4496 * in data=journal mode to make pages freeable.
4497 */
4498 truncate_pagecache(inode, oldsize, inode->i_size);
4439 } 4499 }
4440 ext4_truncate(inode); 4500 ext4_truncate(inode);
4441 } 4501 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cd4485db42b3..ddc51a7f4508 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1840 1840
1841 BUFFER_TRACE(bh, "entry"); 1841 BUFFER_TRACE(bh, "entry");
1842 1842
1843retry:
1844 /* 1843 /*
1845 * It is safe to proceed here without the j_list_lock because the 1844 * It is safe to proceed here without the j_list_lock because the
1846 * buffers cannot be stolen by try_to_free_buffers as long as we are 1845 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1935,14 +1934,11 @@ retry:
1935 * for commit and try again. 1934 * for commit and try again.
1936 */ 1935 */
1937 if (partial_page) { 1936 if (partial_page) {
1938 tid_t tid = journal->j_committing_transaction->t_tid;
1939
1940 jbd2_journal_put_journal_head(jh); 1937 jbd2_journal_put_journal_head(jh);
1941 spin_unlock(&journal->j_list_lock); 1938 spin_unlock(&journal->j_list_lock);
1942 jbd_unlock_bh_state(bh); 1939 jbd_unlock_bh_state(bh);
1943 write_unlock(&journal->j_state_lock); 1940 write_unlock(&journal->j_state_lock);
1944 jbd2_log_wait_commit(journal, tid); 1941 return -EBUSY;
1945 goto retry;
1946 } 1942 }
1947 /* 1943 /*
1948 * OK, buffer won't be reachable after truncate. We just set 1944 * OK, buffer won't be reachable after truncate. We just set
@@ -2003,21 +1999,23 @@ zap_buffer_unlocked:
2003 * @page: page to flush 1999 * @page: page to flush
2004 * @offset: length of page to invalidate. 2000 * @offset: length of page to invalidate.
2005 * 2001 *
2006 * Reap page buffers containing data after offset in page. 2002 * Reap page buffers containing data after offset in page. Can return -EBUSY
2007 * 2003 * if buffers are part of the committing transaction and the page is straddling
2004 * i_size. Caller then has to wait for current commit and try again.
2008 */ 2005 */
2009void jbd2_journal_invalidatepage(journal_t *journal, 2006int jbd2_journal_invalidatepage(journal_t *journal,
2010 struct page *page, 2007 struct page *page,
2011 unsigned long offset) 2008 unsigned long offset)
2012{ 2009{
2013 struct buffer_head *head, *bh, *next; 2010 struct buffer_head *head, *bh, *next;
2014 unsigned int curr_off = 0; 2011 unsigned int curr_off = 0;
2015 int may_free = 1; 2012 int may_free = 1;
2013 int ret = 0;
2016 2014
2017 if (!PageLocked(page)) 2015 if (!PageLocked(page))
2018 BUG(); 2016 BUG();
2019 if (!page_has_buffers(page)) 2017 if (!page_has_buffers(page))
2020 return; 2018 return 0;
2021 2019
2022 /* We will potentially be playing with lists other than just the 2020 /* We will potentially be playing with lists other than just the
2023 * data lists (especially for journaled data mode), so be 2021 * data lists (especially for journaled data mode), so be
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2031 if (offset <= curr_off) { 2029 if (offset <= curr_off) {
2032 /* This block is wholly outside the truncation point */ 2030 /* This block is wholly outside the truncation point */
2033 lock_buffer(bh); 2031 lock_buffer(bh);
2034 may_free &= journal_unmap_buffer(journal, bh, 2032 ret = journal_unmap_buffer(journal, bh, offset > 0);
2035 offset > 0);
2036 unlock_buffer(bh); 2033 unlock_buffer(bh);
2034 if (ret < 0)
2035 return ret;
2036 may_free &= ret;
2037 } 2037 }
2038 curr_off = next_off; 2038 curr_off = next_off;
2039 bh = next; 2039 bh = next;
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2044 if (may_free && try_to_free_buffers(page)) 2044 if (may_free && try_to_free_buffers(page))
2045 J_ASSERT(!page_has_buffers(page)); 2045 J_ASSERT(!page_has_buffers(page));
2046 } 2046 }
2047 return 0;
2047} 2048}
2048 2049
2049/* 2050/*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1be23d9fdacb..e30b66346942 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *,
1098extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); 1098extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
1099extern int jbd2_journal_forget (handle_t *, struct buffer_head *); 1099extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
1100extern void journal_sync_buffer (struct buffer_head *); 1100extern void journal_sync_buffer (struct buffer_head *);
1101extern void jbd2_journal_invalidatepage(journal_t *, 1101extern int jbd2_journal_invalidatepage(journal_t *,
1102 struct page *, unsigned long); 1102 struct page *, unsigned long);
1103extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); 1103extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
1104extern int jbd2_journal_stop(handle_t *); 1104extern int jbd2_journal_stop(handle_t *);