diff options
| author | Jan Kara <jack@suse.cz> | 2012-12-25 13:29:52 -0500 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2012-12-25 13:29:52 -0500 |
| commit | 53e872681fed6a43047e71bf927f77d06f467988 (patch) | |
| tree | 8b5061acbaf222b3f25df54ddbcaa0b1123c471a | |
| parent | 4520fb3c3690f2643006d85f09ecb74554c10e95 (diff) | |
ext4: fix deadlock in journal_unmap_buffer()
We cannot wait for transaction commit in journal_unmap_buffer()
because we hold page lock which ranks below transaction start. We
solve the issue by bailing out of journal_unmap_buffer() and
jbd2_journal_invalidatepage() with -EBUSY. Caller is then responsible
for waiting for transaction commit to finish and try invalidation
again. Since the issue can happen only for page stradding i_size, it
is simple enough to manually call jbd2_journal_invalidatepage() for
such page from ext4_setattr(), check the return value and wait if
necessary.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
| -rw-r--r-- | fs/ext4/inode.c | 82 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 27 | ||||
| -rw-r--r-- | include/linux/jbd2.h | 2 |
3 files changed, 86 insertions, 25 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 12d3fbcff59..cbfe13bf5b2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -2894,8 +2894,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
| 2894 | block_invalidatepage(page, offset); | 2894 | block_invalidatepage(page, offset); |
| 2895 | } | 2895 | } |
| 2896 | 2896 | ||
| 2897 | static void ext4_journalled_invalidatepage(struct page *page, | 2897 | static int __ext4_journalled_invalidatepage(struct page *page, |
| 2898 | unsigned long offset) | 2898 | unsigned long offset) |
| 2899 | { | 2899 | { |
| 2900 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2900 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
| 2901 | 2901 | ||
| @@ -2907,7 +2907,14 @@ static void ext4_journalled_invalidatepage(struct page *page, | |||
| 2907 | if (offset == 0) | 2907 | if (offset == 0) |
| 2908 | ClearPageChecked(page); | 2908 | ClearPageChecked(page); |
| 2909 | 2909 | ||
| 2910 | jbd2_journal_invalidatepage(journal, page, offset); | 2910 | return jbd2_journal_invalidatepage(journal, page, offset); |
| 2911 | } | ||
| 2912 | |||
| 2913 | /* Wrapper for aops... */ | ||
| 2914 | static void ext4_journalled_invalidatepage(struct page *page, | ||
| 2915 | unsigned long offset) | ||
| 2916 | { | ||
| 2917 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | ||
| 2911 | } | 2918 | } |
| 2912 | 2919 | ||
| 2913 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2920 | static int ext4_releasepage(struct page *page, gfp_t wait) |
| @@ -4314,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 4314 | } | 4321 | } |
| 4315 | 4322 | ||
| 4316 | /* | 4323 | /* |
| 4324 | * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate | ||
| 4325 | * buffers that are attached to a page stradding i_size and are undergoing | ||
| 4326 | * commit. In that case we have to wait for commit to finish and try again. | ||
| 4327 | */ | ||
| 4328 | static void ext4_wait_for_tail_page_commit(struct inode *inode) | ||
| 4329 | { | ||
| 4330 | struct page *page; | ||
| 4331 | unsigned offset; | ||
| 4332 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | ||
| 4333 | tid_t commit_tid = 0; | ||
| 4334 | int ret; | ||
| 4335 | |||
| 4336 | offset = inode->i_size & (PAGE_CACHE_SIZE - 1); | ||
| 4337 | /* | ||
| 4338 | * All buffers in the last page remain valid? Then there's nothing to | ||
| 4339 | * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == | ||
| 4340 | * blocksize case | ||
| 4341 | */ | ||
| 4342 | if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) | ||
| 4343 | return; | ||
| 4344 | while (1) { | ||
| 4345 | page = find_lock_page(inode->i_mapping, | ||
| 4346 | inode->i_size >> PAGE_CACHE_SHIFT); | ||
| 4347 | if (!page) | ||
| 4348 | return; | ||
| 4349 | ret = __ext4_journalled_invalidatepage(page, offset); | ||
| 4350 | unlock_page(page); | ||
| 4351 | page_cache_release(page); | ||
| 4352 | if (ret != -EBUSY) | ||
| 4353 | return; | ||
| 4354 | commit_tid = 0; | ||
| 4355 | read_lock(&journal->j_state_lock); | ||
| 4356 | if (journal->j_committing_transaction) | ||
| 4357 | commit_tid = journal->j_committing_transaction->t_tid; | ||
| 4358 | read_unlock(&journal->j_state_lock); | ||
| 4359 | if (commit_tid) | ||
| 4360 | jbd2_log_wait_commit(journal, commit_tid); | ||
| 4361 | } | ||
| 4362 | } | ||
| 4363 | |||
| 4364 | /* | ||
| 4317 | * ext4_setattr() | 4365 | * ext4_setattr() |
| 4318 | * | 4366 | * |
| 4319 | * Called from notify_change. | 4367 | * Called from notify_change. |
| @@ -4426,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 4426 | } | 4474 | } |
| 4427 | 4475 | ||
| 4428 | if (attr->ia_valid & ATTR_SIZE) { | 4476 | if (attr->ia_valid & ATTR_SIZE) { |
| 4429 | if (attr->ia_size != i_size_read(inode)) { | 4477 | if (attr->ia_size != inode->i_size) { |
| 4430 | truncate_setsize(inode, attr->ia_size); | 4478 | loff_t oldsize = inode->i_size; |
| 4431 | /* Inode size will be reduced, wait for dio in flight. | 4479 | |
| 4432 | * Temporarily disable dioread_nolock to prevent | 4480 | i_size_write(inode, attr->ia_size); |
| 4433 | * livelock. */ | 4481 | /* |
| 4482 | * Blocks are going to be removed from the inode. Wait | ||
| 4483 | * for dio in flight. Temporarily disable | ||
| 4484 | * dioread_nolock to prevent livelock. | ||
| 4485 | */ | ||
| 4434 | if (orphan) { | 4486 | if (orphan) { |
| 4435 | ext4_inode_block_unlocked_dio(inode); | 4487 | if (!ext4_should_journal_data(inode)) { |
| 4436 | inode_dio_wait(inode); | 4488 | ext4_inode_block_unlocked_dio(inode); |
| 4437 | ext4_inode_resume_unlocked_dio(inode); | 4489 | inode_dio_wait(inode); |
| 4490 | ext4_inode_resume_unlocked_dio(inode); | ||
| 4491 | } else | ||
| 4492 | ext4_wait_for_tail_page_commit(inode); | ||
| 4438 | } | 4493 | } |
| 4494 | /* | ||
| 4495 | * Truncate pagecache after we've waited for commit | ||
| 4496 | * in data=journal mode to make pages freeable. | ||
| 4497 | */ | ||
| 4498 | truncate_pagecache(inode, oldsize, inode->i_size); | ||
| 4439 | } | 4499 | } |
| 4440 | ext4_truncate(inode); | 4500 | ext4_truncate(inode); |
| 4441 | } | 4501 | } |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index cd4485db42b..ddc51a7f450 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -1840,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, | |||
| 1840 | 1840 | ||
| 1841 | BUFFER_TRACE(bh, "entry"); | 1841 | BUFFER_TRACE(bh, "entry"); |
| 1842 | 1842 | ||
| 1843 | retry: | ||
| 1844 | /* | 1843 | /* |
| 1845 | * It is safe to proceed here without the j_list_lock because the | 1844 | * It is safe to proceed here without the j_list_lock because the |
| 1846 | * buffers cannot be stolen by try_to_free_buffers as long as we are | 1845 | * buffers cannot be stolen by try_to_free_buffers as long as we are |
| @@ -1935,14 +1934,11 @@ retry: | |||
| 1935 | * for commit and try again. | 1934 | * for commit and try again. |
| 1936 | */ | 1935 | */ |
| 1937 | if (partial_page) { | 1936 | if (partial_page) { |
| 1938 | tid_t tid = journal->j_committing_transaction->t_tid; | ||
| 1939 | |||
| 1940 | jbd2_journal_put_journal_head(jh); | 1937 | jbd2_journal_put_journal_head(jh); |
| 1941 | spin_unlock(&journal->j_list_lock); | 1938 | spin_unlock(&journal->j_list_lock); |
| 1942 | jbd_unlock_bh_state(bh); | 1939 | jbd_unlock_bh_state(bh); |
| 1943 | write_unlock(&journal->j_state_lock); | 1940 | write_unlock(&journal->j_state_lock); |
| 1944 | jbd2_log_wait_commit(journal, tid); | 1941 | return -EBUSY; |
| 1945 | goto retry; | ||
| 1946 | } | 1942 | } |
| 1947 | /* | 1943 | /* |
| 1948 | * OK, buffer won't be reachable after truncate. We just set | 1944 | * OK, buffer won't be reachable after truncate. We just set |
| @@ -2003,21 +1999,23 @@ zap_buffer_unlocked: | |||
| 2003 | * @page: page to flush | 1999 | * @page: page to flush |
| 2004 | * @offset: length of page to invalidate. | 2000 | * @offset: length of page to invalidate. |
| 2005 | * | 2001 | * |
| 2006 | * Reap page buffers containing data after offset in page. | 2002 | * Reap page buffers containing data after offset in page. Can return -EBUSY |
| 2007 | * | 2003 | * if buffers are part of the committing transaction and the page is straddling |
| 2004 | * i_size. Caller then has to wait for current commit and try again. | ||
| 2008 | */ | 2005 | */ |
| 2009 | void jbd2_journal_invalidatepage(journal_t *journal, | 2006 | int jbd2_journal_invalidatepage(journal_t *journal, |
| 2010 | struct page *page, | 2007 | struct page *page, |
| 2011 | unsigned long offset) | 2008 | unsigned long offset) |
| 2012 | { | 2009 | { |
| 2013 | struct buffer_head *head, *bh, *next; | 2010 | struct buffer_head *head, *bh, *next; |
| 2014 | unsigned int curr_off = 0; | 2011 | unsigned int curr_off = 0; |
| 2015 | int may_free = 1; | 2012 | int may_free = 1; |
| 2013 | int ret = 0; | ||
| 2016 | 2014 | ||
| 2017 | if (!PageLocked(page)) | 2015 | if (!PageLocked(page)) |
| 2018 | BUG(); | 2016 | BUG(); |
| 2019 | if (!page_has_buffers(page)) | 2017 | if (!page_has_buffers(page)) |
| 2020 | return; | 2018 | return 0; |
| 2021 | 2019 | ||
| 2022 | /* We will potentially be playing with lists other than just the | 2020 | /* We will potentially be playing with lists other than just the |
| 2023 | * data lists (especially for journaled data mode), so be | 2021 | * data lists (especially for journaled data mode), so be |
| @@ -2031,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
| 2031 | if (offset <= curr_off) { | 2029 | if (offset <= curr_off) { |
| 2032 | /* This block is wholly outside the truncation point */ | 2030 | /* This block is wholly outside the truncation point */ |
| 2033 | lock_buffer(bh); | 2031 | lock_buffer(bh); |
| 2034 | may_free &= journal_unmap_buffer(journal, bh, | 2032 | ret = journal_unmap_buffer(journal, bh, offset > 0); |
| 2035 | offset > 0); | ||
| 2036 | unlock_buffer(bh); | 2033 | unlock_buffer(bh); |
| 2034 | if (ret < 0) | ||
| 2035 | return ret; | ||
| 2036 | may_free &= ret; | ||
| 2037 | } | 2037 | } |
| 2038 | curr_off = next_off; | 2038 | curr_off = next_off; |
| 2039 | bh = next; | 2039 | bh = next; |
| @@ -2044,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal, | |||
| 2044 | if (may_free && try_to_free_buffers(page)) | 2044 | if (may_free && try_to_free_buffers(page)) |
| 2045 | J_ASSERT(!page_has_buffers(page)); | 2045 | J_ASSERT(!page_has_buffers(page)); |
| 2046 | } | 2046 | } |
| 2047 | return 0; | ||
| 2047 | } | 2048 | } |
| 2048 | 2049 | ||
| 2049 | /* | 2050 | /* |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 1be23d9fdac..e30b6634694 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
| @@ -1098,7 +1098,7 @@ void jbd2_journal_set_triggers(struct buffer_head *, | |||
| 1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); | 1098 | extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); |
| 1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); | 1099 | extern int jbd2_journal_forget (handle_t *, struct buffer_head *); |
| 1100 | extern void journal_sync_buffer (struct buffer_head *); | 1100 | extern void journal_sync_buffer (struct buffer_head *); |
| 1101 | extern void jbd2_journal_invalidatepage(journal_t *, | 1101 | extern int jbd2_journal_invalidatepage(journal_t *, |
| 1102 | struct page *, unsigned long); | 1102 | struct page *, unsigned long); |
| 1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); | 1103 | extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); |
| 1104 | extern int jbd2_journal_stop(handle_t *); | 1104 | extern int jbd2_journal_stop(handle_t *); |
