diff options
author | Jan Kara <jack@suse.cz> | 2012-12-25 13:29:52 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-12-25 13:29:52 -0500 |
commit | 53e872681fed6a43047e71bf927f77d06f467988 (patch) | |
tree | 8b5061acbaf222b3f25df54ddbcaa0b1123c471a | |
parent | 4520fb3c3690f2643006d85f09ecb74554c10e95 (diff) |
ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start. We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/inode.c | 82 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 27 | ||||
-rw-r--r-- | include/linux/jbd2.h | 2 |
3 files changed, 86 insertions, 25 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 12d3fbcff59f..cbfe13bf5b2a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
2894 | block_invalidatepage(page, offset); | 2894 | block_invalidatepage(page, offset); |
2895 | } | 2895 | } |
2896 | 2896 | ||
2897 | static void ext4_journalled_invalidatepage(struct page *page, | 2897 | static int __ext4_journalled_invalidatepage(struct page *page, |
2898 | unsigned long offset) | 2898 | unsigned long offset) |
2899 | { | 2899 | { |
2900 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2900 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
2901 | 2901 | ||
@@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page, | |||
2907 | if (offset == 0) | 2907 | if (offset == 0) |
2908 | ClearPageChecked(page); | 2908 | ClearPageChecked(page); |
2909 | 2909 | ||
2910 | jbd2_journal_invalidatepage(journal, page, offset); | 2910 | return jbd2_journal_invalidatepage(journal, page, offset); |
2911 | } | ||
2912 | |||
2913 | /* Wrapper for aops... */ | ||
2914 | static void ext4_journalled_invalidatepage(struct page *page, | ||
2915 | unsigned long offset) | ||
2916 | { | ||
2917 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | ||
2911 | } | 2918 | } |
2912 | 2919 | ||
2913 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2920 | static int ext4_releasepage(struct page *page, gfp_t wait) |
@@ -4314,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4314 | } | 4321 | } |
4315 | 4322 | ||
4316 | /* | 4323 | /* |
4324 | * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate | ||
4325 | * buffers that are attached to a page stradding i_size and are undergoing | ||
4326 | * commit. In that case we have to wait for commit to finish and try again. | ||
4327 | */ | ||
4328 | static void ext4_wait_for_tail_page_commit(struct inode *inode) | ||
4329 | { | ||
4330 | struct page *page; | ||
4331 | unsigned offset; | ||
4332 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
4333 | tid_t commit_tid = 0; | ||
4334 | int ret; | ||
4335 | |||
4336 | offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | ||
4337 | /* | ||
4338 | * All buffers in the last page remain valid? Then there's nothing to | ||
4339 | * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == | ||
4340 | * blocksize case | ||
4341 | */ | ||
4342 | if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) | ||
4343 | return; | ||
4344 | while (1) { | ||
4345 | page = find_lock_page(inode->i_mapping, | ||
4346 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
4347 | if (!page) | ||
4348 | return; | ||
4349 | ret = __ext4_journalled_invalidatepage(page, offset); | ||
4350 | unlock_page(page); | ||
4351 | page_cache_release(page); | ||
4352 | if (ret != -EBUSY) | ||
4353 | return; | ||
4354 | commit_tid = 0; | ||
4355 | read_lock(&journal->j_state_lock); | ||
4356 | if (journal->j_committing_transaction) | ||
4357 | commit_tid = journal->j_committing_transaction->t_tid; | ||
4358 | read_unlock(&journal->j_state_lock); | ||
4359 | if (commit_tid) | ||
4360 | jbd2_log_wait_commit(journal, commit_tid); | ||
4361 | } | ||
4362 | } | ||
4363 | |||
4364 | /* | ||
4317 | * ext4_setattr() | 4365 | * ext4_setattr() |
4318 | * | 4366 | * |
4319 | * Called from notify_change. | 4367 | * Called from notify_change. |
@@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
4426 | } | 4474 | } |
4427 | 4475 | ||
4428 | if (attr->ia_valid & ATTR_SIZE) { | 4476 | if (attr->ia_valid & ATTR_SIZE) { |
4429 | if (attr->ia_size != i_size_read(inode)) { | 4477 | if (attr->ia_size != inode->i_size) { |
4430 | truncate_setsize(inode, attr->ia_size); | 4478 | loff_t oldsize = inode->i_size; |
4431 | /* Inode size will be reduced, wait for dio in flight. | 4479 | |
4432 | * Temporarily disable dioread_nolock to prevent | 4480 | i_size_write(inode, attr->ia_size); |
4433 | * livelock. */ | 4481 | /* |
4482 | * Blocks are going to be removed from the inode. Wait | ||
4483 | * for dio in flight. Temporarily disable | ||
4484 | * dioread_nolock to prevent livelock. | ||
4485 | */ | ||
4434 | if (orphan) { | 4486 | if (orphan) { |
4435 | ext4_inode_block_unlocked_dio(inode); | 4487 | if (!ext4_should_journal_data(inode)) { |
4436 | inode_dio_wait(inode); | 4488 | ext4_inode_block_unlocked_dio(inode); |
4437 | ext4_inode_resume_unlocked_dio(inode); | 4489 | inode_dio_wait(inode); |
4490 | ext4_inode_resume_unlocked_dio(inode); | ||
4491 | } else | ||
4492 | ext4_wait_for_tail_page_commit(inode); | ||
4438 | } | 4493 | } |
4494 | /* | ||
4495 | * Truncate pagecache after we've waited for commit | ||
4496 | * in data=journal mode to make pages freeable. | ||
4497 | */ | ||
4498 | truncate_pagecache(inode, oldsize, inode->i_size); | ||
4439 | } | 4499 | } |
4440 | ext4_truncate(inode); | 4500 | ext4_truncate(inode); |
4441 | } | 4501 | } |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index cd4485db42b3..ddc51a7f4508 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, | |||
1840 | 1840 | ||
1841 | BUFFER_TRACE(bh, "entry"); | 1841 | BUFFER_TRACE(bh, "entry"); |
1842 | 1842 | ||
1843 | retry: | ||
1844 | /* | 1843 | /* |
1845 | * It is safe to proceed here without the j_list_lock because the | 1844 | * It is safe to proceed here without the j_list_lock because the |
1846 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1845 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
@@ -1935,14 +1934,11 @@ retry: | |||
1935 | * for commit and try again. | 1934 | * for commit and try again. |
1936 | */ | 1935 | */ |
1937 | if (partial_page) { | 1936 | if (partial_page) { |
1938 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
1939 | |||
1940 | jbd2_journal_put_journal_head(jh); | 1937 | jbd2_journal_put_journal_head(jh); |
1941 | spin_unlock(&journal->j_list_lock); | 1938 | spin_unlock(&journal->j_list_lock); |
1942 | jbd_unlock_bh_state(bh); | 1939 | jbd_unlock_bh_state(bh); |
1943 | write_unlock(&journal->j_state_lock); | 1940 | write_unlock(&journal->j_state_lock); |
1944 | jbd2_log_wait_commit(journal, tid); | 1941 | return -EBUSY; |
1945 | goto retry; | ||
1946 | } | 1942 | } |
1947 | /* | 1943 | /* |
1948 | * OK, buffer won't be reachable after truncate. We just set | 1944 | * OK, buffer won't be reachable after truncate. We just set |
@@ -2003,21 +1999,23 @@ zap_buffer_unlocked: | |||
2003 | * @page: page to flush | 1999 | * @page: page to flush |
2004 | * @offset: length of page to invalidate. | 2000 | * @offset: length of page to invalidate. |
2005 | * | 2001 | * |
2006 | * Reap page buffers containing data after offset in page. | 2002 | * Reap page buffers containing data after offset in page. Can return -EBUSY |
2007 | * | 2003 | * if buffers are part of the committing transaction and the page is straddling |
2004 | * i_size. Caller then has to wait for current commit and try again. | ||
2008 | */ | 2005 | */ |
2009 | void jbd2_journal_invalidatepage(journal_t *journal, | 2006 | int jbd2_journal_invalidatepage(journal_t *journal, |
2010 | struct page *page, | 2007 | struct page *page, |
2011 | unsigned long offset) | 2008 | unsigned long offset) |
2012 | { | 2009 | { |
2013 | struct buffer_head *head, *bh, *next; | 2010 | struct buffer_head *head, *bh, *next; |
2014 | unsigned int curr_off = 0; | 2011 | unsigned int curr_off = 0; |
2015 | int may_free = 1; | 2012 | int may_free = 1; |
2013 | int ret = 0; | ||
2016 | 2014 | ||
2017 | if (!PageLocked(page)) | 2015 | if (!PageLocked(page)) |
2018 | BUG(); | 2016 | BUG(); |
2019 | if (!page_has_buffers(page)) | 2017 | if (!page_has_buffers(page)) |
2020 | return; | 2018 | return 0; |
2021 | 2019 | ||
2022 | /* We will potentially be playing with lists other than just the | 2020 | /* We will potentially be playing with lists other than just the |
2023 | * data lists (especially for journaled data mode), so be | 2021 | * data lists (especially for journaled data mode), so be |
@@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2031 | if (offset <= curr_off) { | 2029 | if (offset <= curr_off) { |
2032 | /* This block is wholly outside the truncation point */ | 2030 | /* This block is wholly outside the truncation point */ |
2033 | lock_buffer(bh); | 2031 | lock_buffer(bh); |
2034 | may_free &= journal_unmap_buffer(journal, bh, | 2032 | ret = journal_unmap_buffer(journal, bh, offset > 0); |
2035 | offset > 0); | ||
2036 | unlock_buffer(bh); | 2033 | unlock_buffer(bh); |
2034 | if (ret < 0) | ||
2035 | return ret; | ||
2036 | may_free &= ret; | ||
2037 | } | 2037 | } |
2038 | curr_off = next_off; | 2038 | curr_off = next_off; |
2039 | bh = next; | 2039 | bh = next; |
@@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
2044 | if (may_free && try_to_free_buffers(page)) | 2044 | if (may_free && try_to_free_buffers(page)) |
2045 | J_ASSERT(!page_has_buffers(page)); | 2045 | J_ASSERT(!page_has_buffers(page)); |
2046 | } | 2046 | } |
2047 | return 0; | ||
2047 | } | 2048 | } |
2048 | 2049 | ||
2049 | /* | 2050 | /* |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdacb..e30b66346942 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, | |||
1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); | 1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); |
1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); | 1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); |
1100 | extern void journal_sync_buffer (struct buffer_head *); | 1100 | extern void journal_sync_buffer (struct buffer_head *); |
1101 | extern void jbd2_journal_invalidatepage(journal_t *, | 1101 | extern int jbd2_journal_invalidatepage(journal_t *, |
1102 | struct page *, unsigned long); | 1102 | struct page *, unsigned long); |
1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); | 1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); |
1104 | extern int jbd2_journal_stop(handle_t *); | 1104 | extern int jbd2_journal_stop(handle_t *); |