aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-07-25 04:46:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 13:53:32 -0400
commit3f31fddfa26b7594b44ff2b34f9a04ba409e0f91 (patch)
tree88994baf22f65dc4da0bef17ce61eda09c59db2a
parent9ebfbe9f926553eabc21b4400918d1216b27ed0c (diff)
jbd: fix race between free buffer and commit transaction
journal_try_to_free_buffers() could race with jbd commit transaction when the later is holding the buffer reference while waiting for the data buffer to flush to disk. If the caller of journal_try_to_free_buffers() request tries hard to release the buffers, it will treat the failure as error and return back to the caller. We have seen the directo IO failed due to this race. Some of the caller of releasepage() also expecting the buffer to be dropped when passed with GFP_KERNEL mask to the releasepage()->journal_try_to_free_buffers(). With this patch, if the caller is passing the __GFP_WAIT and __GFP_FS to indicating this call could wait, in case of try_to_free_buffers() failed, let's waiting for journal_commit_transaction() to finish commit the current committing transaction, then try to free those buffers again. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mingming Cao <cmm@us.ibm.com> Reviewed-by: Badari Pulavarty <pbadari@us.ibm.com> Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/jbd/transaction.c57
-rw-r--r--mm/filemap.c3
2 files changed, 56 insertions, 4 deletions
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..8dee32007500 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
1648 return; 1648 return;
1649} 1649}
1650 1650
1651/*
1652 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1653 * The latter might still hold the a count on buffers when inspecting
1654 * them on t_syncdata_list or t_locked_list.
1655 *
1656 * journal_try_to_free_buffers() will call this function to
1657 * wait for the current transaction to finish syncing data buffers, before
1658 * tryinf to free that buffer.
1659 *
1660 * Called with journal->j_state_lock held.
1661 */
1662static void journal_wait_for_transaction_sync_data(journal_t *journal)
1663{
1664 transaction_t *transaction = NULL;
1665 tid_t tid;
1666
1667 spin_lock(&journal->j_state_lock);
1668 transaction = journal->j_committing_transaction;
1669
1670 if (!transaction) {
1671 spin_unlock(&journal->j_state_lock);
1672 return;
1673 }
1674
1675 tid = transaction->t_tid;
1676 spin_unlock(&journal->j_state_lock);
1677 log_wait_commit(journal, tid);
1678}
1651 1679
1652/** 1680/**
1653 * int journal_try_to_free_buffers() - try to free page buffers. 1681 * int journal_try_to_free_buffers() - try to free page buffers.
1654 * @journal: journal for operation 1682 * @journal: journal for operation
1655 * @page: to try and free 1683 * @page: to try and free
1656 * @unused_gfp_mask: unused 1684 * @gfp_mask: we use the mask to detect how hard should we try to release
1685 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1686 * release the buffers.
1657 * 1687 *
1658 * 1688 *
1659 * For all the buffers on this page, 1689 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
1682 * journal_try_to_free_buffer() is changing its state. But that 1712 * journal_try_to_free_buffer() is changing its state. But that
1683 * cannot happen because we never reallocate freed data as metadata 1713 * cannot happen because we never reallocate freed data as metadata
1684 * while the data is part of a transaction. Yes? 1714 * while the data is part of a transaction. Yes?
1715 *
1716 * Return 0 on failure, 1 on success
1685 */ 1717 */
1686int journal_try_to_free_buffers(journal_t *journal, 1718int journal_try_to_free_buffers(journal_t *journal,
1687 struct page *page, gfp_t unused_gfp_mask) 1719 struct page *page, gfp_t gfp_mask)
1688{ 1720{
1689 struct buffer_head *head; 1721 struct buffer_head *head;
1690 struct buffer_head *bh; 1722 struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
1713 if (buffer_jbd(bh)) 1745 if (buffer_jbd(bh))
1714 goto busy; 1746 goto busy;
1715 } while ((bh = bh->b_this_page) != head); 1747 } while ((bh = bh->b_this_page) != head);
1748
1716 ret = try_to_free_buffers(page); 1749 ret = try_to_free_buffers(page);
1750
1751 /*
1752 * There are a number of places where journal_try_to_free_buffers()
1753 * could race with journal_commit_transaction(), the later still
1754 * holds the reference to the buffers to free while processing them.
1755 * try_to_free_buffers() failed to free those buffers. Some of the
1756 * caller of releasepage() request page buffers to be dropped, otherwise
1757 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1758 *
1759 * So, if the caller of try_to_release_page() wants the synchronous
1760 * behaviour(i.e make sure buffers are dropped upon return),
1761 * let's wait for the current transaction to finish flush of
1762 * dirty data buffers, then try to free those buffers again,
1763 * with the journal locked.
1764 */
1765 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1766 journal_wait_for_transaction_sync_data(journal);
1767 ret = try_to_free_buffers(page);
1768 }
1769
1717busy: 1770busy:
1718 return ret; 1771 return ret;
1719} 1772}
diff --git a/mm/filemap.c b/mm/filemap.c
index 7675b91f4f63..5d4c880d7cd9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2563,9 +2563,8 @@ EXPORT_SYMBOL(generic_file_aio_write);
2563 * Otherwise return zero. 2563 * Otherwise return zero.
2564 * 2564 *
2565 * The @gfp_mask argument specifies whether I/O may be performed to release 2565 * The @gfp_mask argument specifies whether I/O may be performed to release
2566 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). 2566 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
2567 * 2567 *
2568 * NOTE: @gfp_mask may go away, and this function may become non-blocking.
2569 */ 2568 */
2570int try_to_release_page(struct page *page, gfp_t gfp_mask) 2569int try_to_release_page(struct page *page, gfp_t gfp_mask)
2571{ 2570{