aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2007-02-10 02:32:29 -0500
committerTim Shimmin <tes@sgi.com>2007-02-10 02:32:29 -0500
commit585e6d8856526a846b90b485abf37ec40e5da1cf (patch)
tree8ecae5c3e10a1753fc178877ab11deadcf9625f3
parentdac61f521b1e4d2c6c48023f2f2743c6096b48ca (diff)
[XFS] Fix a synchronous buftarg flush deadlock when freezing.
At the last stage of a freeze, we flush the buftarg synchronously over and over again until it succeeds twice without skipping any buffers. The delwri list flush skips pinned buffers, but tries to flush all others. It removes the buffers from the delwri list, then tries to lock them one at a time as it traverses the list to issue the I/O. It holds them locked until we issue all of the I/O and then unlocks them once we've waited for it to complete. The problem is that during a freeze, the filesystem may still be doing stuff - like flushing delalloc data buffers - in the background and hence we can be trying to lock buffers that were on the delwri list at the same time. Hence we can get ABBA deadlocks between threads doing allocation and the buftarg flush (freeze) thread. Fix it by skipping locked (and pinned) buffers as we traverse the delwri buffer list. SGI-PV: 957195 SGI-Modid: xfs-linux-melb:xfs-kern:27535a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c117
1 files changed, 60 insertions, 57 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4fb01ffdfd1a..946b00bf3841 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1679,21 +1679,59 @@ xfsbufd_wakeup(
1679 return 0; 1679 return 0;
1680} 1680}
1681 1681
1682/*
1683 * Move as many buffers as specified to the supplied list
1684 * idicating if we skipped any buffers to prevent deadlocks.
1685 */
1686STATIC int
1687xfs_buf_delwri_split(
1688 xfs_buftarg_t *target,
1689 struct list_head *list,
1690 unsigned long age,
1691 int flags)
1692{
1693 xfs_buf_t *bp, *n;
1694 struct list_head *dwq = &target->bt_delwrite_queue;
1695 spinlock_t *dwlk = &target->bt_delwrite_lock;
1696 int skipped = 0;
1697
1698 INIT_LIST_HEAD(list);
1699 spin_lock(dwlk);
1700 list_for_each_entry_safe(bp, n, dwq, b_list) {
1701 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1702 ASSERT(bp->b_flags & XBF_DELWRI);
1703
1704 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1705 if (!(flags & XBT_FORCE_FLUSH) &&
1706 time_before(jiffies, bp->b_queuetime + age)) {
1707 xfs_buf_unlock(bp);
1708 break;
1709 }
1710
1711 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
1712 _XBF_RUN_QUEUES);
1713 bp->b_flags |= XBF_WRITE;
1714 list_move_tail(&bp->b_list, list);
1715 } else
1716 skipped++;
1717 }
1718 spin_unlock(dwlk);
1719
1720 return skipped;
1721
1722}
1723
1682STATIC int 1724STATIC int
1683xfsbufd( 1725xfsbufd(
1684 void *data) 1726 void *data)
1685{ 1727{
1686 struct list_head tmp; 1728 struct list_head tmp;
1687 unsigned long age; 1729 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1688 xfs_buftarg_t *target = (xfs_buftarg_t *)data; 1730 int count;
1689 xfs_buf_t *bp, *n; 1731 xfs_buf_t *bp;
1690 struct list_head *dwq = &target->bt_delwrite_queue;
1691 spinlock_t *dwlk = &target->bt_delwrite_lock;
1692 int count;
1693 1732
1694 current->flags |= PF_MEMALLOC; 1733 current->flags |= PF_MEMALLOC;
1695 1734
1696 INIT_LIST_HEAD(&tmp);
1697 do { 1735 do {
1698 if (unlikely(freezing(current))) { 1736 if (unlikely(freezing(current))) {
1699 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1737 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1705,37 +1743,19 @@ xfsbufd(
1705 schedule_timeout_interruptible( 1743 schedule_timeout_interruptible(
1706 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1744 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1707 1745
1708 count = 0; 1746 xfs_buf_delwri_split(target, &tmp,
1709 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1747 xfs_buf_age_centisecs * msecs_to_jiffies(10),
1710 spin_lock(dwlk); 1748 test_bit(XBT_FORCE_FLUSH, &target->bt_flags)
1711 list_for_each_entry_safe(bp, n, dwq, b_list) { 1749 ? XBT_FORCE_FLUSH : 0);
1712 XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
1713 ASSERT(bp->b_flags & XBF_DELWRI);
1714
1715 if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
1716 if (!test_bit(XBT_FORCE_FLUSH,
1717 &target->bt_flags) &&
1718 time_before(jiffies,
1719 bp->b_queuetime + age)) {
1720 xfs_buf_unlock(bp);
1721 break;
1722 }
1723
1724 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
1725 _XBF_RUN_QUEUES);
1726 bp->b_flags |= XBF_WRITE;
1727 list_move_tail(&bp->b_list, &tmp);
1728 count++;
1729 }
1730 }
1731 spin_unlock(dwlk);
1732 1750
1751 count = 0;
1733 while (!list_empty(&tmp)) { 1752 while (!list_empty(&tmp)) {
1734 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1753 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1735 ASSERT(target == bp->b_target); 1754 ASSERT(target == bp->b_target);
1736 1755
1737 list_del_init(&bp->b_list); 1756 list_del_init(&bp->b_list);
1738 xfs_buf_iostrategy(bp); 1757 xfs_buf_iostrategy(bp);
1758 count++;
1739 } 1759 }
1740 1760
1741 if (as_list_len > 0) 1761 if (as_list_len > 0)
@@ -1756,40 +1776,23 @@ xfsbufd(
1756 */ 1776 */
1757int 1777int
1758xfs_flush_buftarg( 1778xfs_flush_buftarg(
1759 xfs_buftarg_t *target, 1779 xfs_buftarg_t *target,
1760 int wait) 1780 int wait)
1761{ 1781{
1762 struct list_head tmp; 1782 struct list_head tmp;
1763 xfs_buf_t *bp, *n; 1783 xfs_buf_t *bp, *n;
1764 int pincount = 0; 1784 int pincount = 0;
1765 struct list_head *dwq = &target->bt_delwrite_queue;
1766 spinlock_t *dwlk = &target->bt_delwrite_lock;
1767 1785
1768 xfs_buf_runall_queues(xfsdatad_workqueue); 1786 xfs_buf_runall_queues(xfsdatad_workqueue);
1769 xfs_buf_runall_queues(xfslogd_workqueue); 1787 xfs_buf_runall_queues(xfslogd_workqueue);
1770 1788
1771 INIT_LIST_HEAD(&tmp); 1789 pincount = xfs_buf_delwri_split(target, &tmp, 0, XBT_FORCE_FLUSH);
1772 spin_lock(dwlk);
1773 list_for_each_entry_safe(bp, n, dwq, b_list) {
1774 ASSERT(bp->b_target == target);
1775 ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
1776 XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
1777 if (xfs_buf_ispin(bp)) {
1778 pincount++;
1779 continue;
1780 }
1781
1782 list_move_tail(&bp->b_list, &tmp);
1783 }
1784 spin_unlock(dwlk);
1785 1790
1786 /* 1791 /*
1787 * Dropped the delayed write list lock, now walk the temporary list 1792 * Dropped the delayed write list lock, now walk the temporary list
1788 */ 1793 */
1789 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1794 list_for_each_entry_safe(bp, n, &tmp, b_list) {
1790 xfs_buf_lock(bp); 1795 ASSERT(target == bp->b_target);
1791 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
1792 bp->b_flags |= XBF_WRITE;
1793 if (wait) 1796 if (wait)
1794 bp->b_flags &= ~XBF_ASYNC; 1797 bp->b_flags &= ~XBF_ASYNC;
1795 else 1798 else