aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-07-13 21:06:39 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-13 21:06:39 -0400
commit530576bbf379fc45cfb34f246257d8526db44567 (patch)
treed4a89bf6f2736816e1b43abfdafcf2d61bbd19ef /fs
parent772cb7c83ba256a11c7bf99a11bef3858d23767c (diff)
jbd2: fix race between jbd2_journal_try_to_free_buffers() and jbd2 commit transaction
journal_try_to_free_buffers() could race with jbd commit transaction when the later is holding the buffer reference while waiting for the data buffer to flush to disk. If the caller of journal_try_to_free_buffers() request tries hard to release the buffers, it will treat the failure as error and return back to the caller. We have seen the directo IO failed due to this race. Some of the caller of releasepage() also expecting the buffer to be dropped when passed with GFP_KERNEL mask to the releasepage()->journal_try_to_free_buffers(). With this patch, if the caller is passing the GFP_KERNEL to indicating this call could wait, in case of try_to_free_buffers() failed, let's waiting for journal_commit_transaction() to finish commit the current committing transaction , then try to free those buffers again with journal locked. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Reviewed-by: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/jbd2/transaction.c62
1 files changed, 58 insertions, 4 deletions
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6e006e67804..ba620c4493d2 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
41 * new transaction and we can't block without protecting against other 41 * new transaction and we can't block without protecting against other
42 * processes trying to touch the journal while it is in transition. 42 * processes trying to touch the journal while it is in transition.
43 * 43 *
44 * Called under j_state_lock
45 */ 44 */
46 45
47static transaction_t * 46static transaction_t *
@@ -1656,12 +1655,43 @@ out:
1656 return; 1655 return;
1657} 1656}
1658 1657
1658/*
1659 * jbd2_journal_try_to_free_buffers() could race with
1660 * jbd2_journal_commit_transaction(). The later might still hold the
1661 * reference count to the buffers when inspecting them on
1662 * t_syncdata_list or t_locked_list.
1663 *
1664 * jbd2_journal_try_to_free_buffers() will call this function to
1665 * wait for the current transaction to finish syncing data buffers, before
1666 * try to free that buffer.
1667 *
1668 * Called with journal->j_state_lock hold.
1669 */
1670static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1671{
1672 transaction_t *transaction;
1673 tid_t tid;
1674
1675 spin_lock(&journal->j_state_lock);
1676 transaction = journal->j_committing_transaction;
1677
1678 if (!transaction) {
1679 spin_unlock(&journal->j_state_lock);
1680 return;
1681 }
1682
1683 tid = transaction->t_tid;
1684 spin_unlock(&journal->j_state_lock);
1685 jbd2_log_wait_commit(journal, tid);
1686}
1659 1687
1660/** 1688/**
1661 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1689 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1662 * @journal: journal for operation 1690 * @journal: journal for operation
1663 * @page: to try and free 1691 * @page: to try and free
1664 * @unused_gfp_mask: unused 1692 * @gfp_mask: we use the mask to detect how hard should we try to release
1693 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1694 * release the buffers.
1665 * 1695 *
1666 * 1696 *
1667 * For all the buffers on this page, 1697 * For all the buffers on this page,
@@ -1690,9 +1720,11 @@ out:
1690 * journal_try_to_free_buffer() is changing its state. But that 1720 * journal_try_to_free_buffer() is changing its state. But that
1691 * cannot happen because we never reallocate freed data as metadata 1721 * cannot happen because we never reallocate freed data as metadata
1692 * while the data is part of a transaction. Yes? 1722 * while the data is part of a transaction. Yes?
1723 *
1724 * Return 0 on failure, 1 on success
1693 */ 1725 */
1694int jbd2_journal_try_to_free_buffers(journal_t *journal, 1726int jbd2_journal_try_to_free_buffers(journal_t *journal,
1695 struct page *page, gfp_t unused_gfp_mask) 1727 struct page *page, gfp_t gfp_mask)
1696{ 1728{
1697 struct buffer_head *head; 1729 struct buffer_head *head;
1698 struct buffer_head *bh; 1730 struct buffer_head *bh;
@@ -1708,7 +1740,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1708 /* 1740 /*
1709 * We take our own ref against the journal_head here to avoid 1741 * We take our own ref against the journal_head here to avoid
1710 * having to add tons of locking around each instance of 1742 * having to add tons of locking around each instance of
1711 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). 1743 * jbd2_journal_remove_journal_head() and
1744 * jbd2_journal_put_journal_head().
1712 */ 1745 */
1713 jh = jbd2_journal_grab_journal_head(bh); 1746 jh = jbd2_journal_grab_journal_head(bh);
1714 if (!jh) 1747 if (!jh)
@@ -1721,7 +1754,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1721 if (buffer_jbd(bh)) 1754 if (buffer_jbd(bh))
1722 goto busy; 1755 goto busy;
1723 } while ((bh = bh->b_this_page) != head); 1756 } while ((bh = bh->b_this_page) != head);
1757
1724 ret = try_to_free_buffers(page); 1758 ret = try_to_free_buffers(page);
1759
1760 /*
1761 * There are a number of places where jbd2_journal_try_to_free_buffers()
1762 * could race with jbd2_journal_commit_transaction(), the later still
1763 * holds the reference to the buffers to free while processing them.
1764 * try_to_free_buffers() failed to free those buffers. Some of the
1765 * caller of releasepage() request page buffers to be dropped, otherwise
1766 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1767 *
1768 * So, if the caller of try_to_release_page() wants the synchronous
1769 * behaviour(i.e make sure buffers are dropped upon return),
1770 * let's wait for the current transaction to finish flush of
1771 * dirty data buffers, then try to free those buffers again,
1772 * with the journal locked.
1773 */
1774 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1775 jbd2_journal_wait_for_transaction_sync_data(journal);
1776 ret = try_to_free_buffers(page);
1777 }
1778
1725busy: 1779busy:
1726 return ret; 1780 return ret;
1727} 1781}