diff options
author | David Chinner <dgc@sgi.com> | 2006-01-10 23:37:58 -0500 |
---|---|---|
committer | Nathan Scott <nathans@sgi.com> | 2006-01-10 23:37:58 -0500 |
commit | a6867a6815fa0241848d4620f2dbd2954f4405d7 (patch) | |
tree | 9565d18c86b935c3a099e4a817137372ce81dde1 /fs | |
parent | 216d3b2acba469a9bee98a09bb957e012ba7bc25 (diff) |
[XFS] Introduce per-filesystem delwri pagebuf flushing to reduce
contention between filesystems and prevent deadlocks between filesystems
when a flush dependency exists between them.
SGI-PV: 947098
SGI-Modid: xfs-linux-melb:xfs-kern:24844a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 136 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 9 |
2 files changed, 101 insertions, 44 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 6fe21d2b884..2a8acd38fa1 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | STATIC kmem_cache_t *pagebuf_zone; | 34 | STATIC kmem_cache_t *pagebuf_zone; |
35 | STATIC kmem_shaker_t pagebuf_shake; | 35 | STATIC kmem_shaker_t pagebuf_shake; |
36 | STATIC int xfsbufd(void *); | ||
36 | STATIC int xfsbufd_wakeup(int, gfp_t); | 37 | STATIC int xfsbufd_wakeup(int, gfp_t); |
37 | STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); | 38 | STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); |
38 | 39 | ||
@@ -1492,6 +1493,30 @@ xfs_free_bufhash( | |||
1492 | btp->bt_hash = NULL; | 1493 | btp->bt_hash = NULL; |
1493 | } | 1494 | } |
1494 | 1495 | ||
1496 | /* | ||
1497 | * buftarg list for delwrite queue processing | ||
1498 | */ | ||
1499 | STATIC LIST_HEAD(xfs_buftarg_list); | ||
1500 | STATIC DEFINE_SPINLOCK(xfs_buftarg_lock); | ||
1501 | |||
1502 | STATIC void | ||
1503 | xfs_register_buftarg( | ||
1504 | xfs_buftarg_t *btp) | ||
1505 | { | ||
1506 | spin_lock(&xfs_buftarg_lock); | ||
1507 | list_add(&btp->bt_list, &xfs_buftarg_list); | ||
1508 | spin_unlock(&xfs_buftarg_lock); | ||
1509 | } | ||
1510 | |||
1511 | STATIC void | ||
1512 | xfs_unregister_buftarg( | ||
1513 | xfs_buftarg_t *btp) | ||
1514 | { | ||
1515 | spin_lock(&xfs_buftarg_lock); | ||
1516 | list_del(&btp->bt_list); | ||
1517 | spin_unlock(&xfs_buftarg_lock); | ||
1518 | } | ||
1519 | |||
1495 | void | 1520 | void |
1496 | xfs_free_buftarg( | 1521 | xfs_free_buftarg( |
1497 | xfs_buftarg_t *btp, | 1522 | xfs_buftarg_t *btp, |
@@ -1502,6 +1527,12 @@ xfs_free_buftarg( | |||
1502 | xfs_blkdev_put(btp->pbr_bdev); | 1527 | xfs_blkdev_put(btp->pbr_bdev); |
1503 | xfs_free_bufhash(btp); | 1528 | xfs_free_bufhash(btp); |
1504 | iput(btp->pbr_mapping->host); | 1529 | iput(btp->pbr_mapping->host); |
1530 | |||
1531 | /* unregister the buftarg first so that we don't get a | ||
1532 | * wakeup finding a non-existent task */ | ||
1533 | xfs_unregister_buftarg(btp); | ||
1534 | kthread_stop(btp->bt_task); | ||
1535 | |||
1505 | kmem_free(btp, sizeof(*btp)); | 1536 | kmem_free(btp, sizeof(*btp)); |
1506 | } | 1537 | } |
1507 | 1538 | ||
@@ -1591,6 +1622,26 @@ xfs_mapping_buftarg( | |||
1591 | return 0; | 1622 | return 0; |
1592 | } | 1623 | } |
1593 | 1624 | ||
1625 | STATIC int | ||
1626 | xfs_alloc_delwrite_queue( | ||
1627 | xfs_buftarg_t *btp) | ||
1628 | { | ||
1629 | int error = 0; | ||
1630 | |||
1631 | INIT_LIST_HEAD(&btp->bt_list); | ||
1632 | INIT_LIST_HEAD(&btp->bt_delwrite_queue); | ||
1633 | spinlock_init(&btp->bt_delwrite_lock, "delwri_lock"); | ||
1634 | btp->bt_flags = 0; | ||
1635 | btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); | ||
1636 | if (IS_ERR(btp->bt_task)) { | ||
1637 | error = PTR_ERR(btp->bt_task); | ||
1638 | goto out_error; | ||
1639 | } | ||
1640 | xfs_register_buftarg(btp); | ||
1641 | out_error: | ||
1642 | return error; | ||
1643 | } | ||
1644 | |||
1594 | xfs_buftarg_t * | 1645 | xfs_buftarg_t * |
1595 | xfs_alloc_buftarg( | 1646 | xfs_alloc_buftarg( |
1596 | struct block_device *bdev, | 1647 | struct block_device *bdev, |
@@ -1606,6 +1657,8 @@ xfs_alloc_buftarg( | |||
1606 | goto error; | 1657 | goto error; |
1607 | if (xfs_mapping_buftarg(btp, bdev)) | 1658 | if (xfs_mapping_buftarg(btp, bdev)) |
1608 | goto error; | 1659 | goto error; |
1660 | if (xfs_alloc_delwrite_queue(btp)) | ||
1661 | goto error; | ||
1609 | xfs_alloc_bufhash(btp, external); | 1662 | xfs_alloc_bufhash(btp, external); |
1610 | return btp; | 1663 | return btp; |
1611 | 1664 | ||
@@ -1618,20 +1671,19 @@ error: | |||
1618 | /* | 1671 | /* |
1619 | * Pagebuf delayed write buffer handling | 1672 | * Pagebuf delayed write buffer handling |
1620 | */ | 1673 | */ |
1621 | |||
1622 | STATIC LIST_HEAD(pbd_delwrite_queue); | ||
1623 | STATIC DEFINE_SPINLOCK(pbd_delwrite_lock); | ||
1624 | |||
1625 | STATIC void | 1674 | STATIC void |
1626 | pagebuf_delwri_queue( | 1675 | pagebuf_delwri_queue( |
1627 | xfs_buf_t *pb, | 1676 | xfs_buf_t *pb, |
1628 | int unlock) | 1677 | int unlock) |
1629 | { | 1678 | { |
1679 | struct list_head *dwq = &pb->pb_target->bt_delwrite_queue; | ||
1680 | spinlock_t *dwlk = &pb->pb_target->bt_delwrite_lock; | ||
1681 | |||
1630 | PB_TRACE(pb, "delwri_q", (long)unlock); | 1682 | PB_TRACE(pb, "delwri_q", (long)unlock); |
1631 | ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == | 1683 | ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == |
1632 | (PBF_DELWRI|PBF_ASYNC)); | 1684 | (PBF_DELWRI|PBF_ASYNC)); |
1633 | 1685 | ||
1634 | spin_lock(&pbd_delwrite_lock); | 1686 | spin_lock(dwlk); |
1635 | /* If already in the queue, dequeue and place at tail */ | 1687 | /* If already in the queue, dequeue and place at tail */ |
1636 | if (!list_empty(&pb->pb_list)) { | 1688 | if (!list_empty(&pb->pb_list)) { |
1637 | ASSERT(pb->pb_flags & _PBF_DELWRI_Q); | 1689 | ASSERT(pb->pb_flags & _PBF_DELWRI_Q); |
@@ -1642,9 +1694,9 @@ pagebuf_delwri_queue( | |||
1642 | } | 1694 | } |
1643 | 1695 | ||
1644 | pb->pb_flags |= _PBF_DELWRI_Q; | 1696 | pb->pb_flags |= _PBF_DELWRI_Q; |
1645 | list_add_tail(&pb->pb_list, &pbd_delwrite_queue); | 1697 | list_add_tail(&pb->pb_list, dwq); |
1646 | pb->pb_queuetime = jiffies; | 1698 | pb->pb_queuetime = jiffies; |
1647 | spin_unlock(&pbd_delwrite_lock); | 1699 | spin_unlock(dwlk); |
1648 | 1700 | ||
1649 | if (unlock) | 1701 | if (unlock) |
1650 | pagebuf_unlock(pb); | 1702 | pagebuf_unlock(pb); |
@@ -1654,16 +1706,17 @@ void | |||
1654 | pagebuf_delwri_dequeue( | 1706 | pagebuf_delwri_dequeue( |
1655 | xfs_buf_t *pb) | 1707 | xfs_buf_t *pb) |
1656 | { | 1708 | { |
1709 | spinlock_t *dwlk = &pb->pb_target->bt_delwrite_lock; | ||
1657 | int dequeued = 0; | 1710 | int dequeued = 0; |
1658 | 1711 | ||
1659 | spin_lock(&pbd_delwrite_lock); | 1712 | spin_lock(dwlk); |
1660 | if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { | 1713 | if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { |
1661 | ASSERT(pb->pb_flags & _PBF_DELWRI_Q); | 1714 | ASSERT(pb->pb_flags & _PBF_DELWRI_Q); |
1662 | list_del_init(&pb->pb_list); | 1715 | list_del_init(&pb->pb_list); |
1663 | dequeued = 1; | 1716 | dequeued = 1; |
1664 | } | 1717 | } |
1665 | pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); | 1718 | pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); |
1666 | spin_unlock(&pbd_delwrite_lock); | 1719 | spin_unlock(dwlk); |
1667 | 1720 | ||
1668 | if (dequeued) | 1721 | if (dequeued) |
1669 | pagebuf_rele(pb); | 1722 | pagebuf_rele(pb); |
@@ -1678,21 +1731,22 @@ pagebuf_runall_queues( | |||
1678 | flush_workqueue(queue); | 1731 | flush_workqueue(queue); |
1679 | } | 1732 | } |
1680 | 1733 | ||
1681 | /* Defines for pagebuf daemon */ | ||
1682 | STATIC struct task_struct *xfsbufd_task; | ||
1683 | STATIC int xfsbufd_force_flush; | ||
1684 | STATIC int xfsbufd_force_sleep; | ||
1685 | |||
1686 | STATIC int | 1734 | STATIC int |
1687 | xfsbufd_wakeup( | 1735 | xfsbufd_wakeup( |
1688 | int priority, | 1736 | int priority, |
1689 | gfp_t mask) | 1737 | gfp_t mask) |
1690 | { | 1738 | { |
1691 | if (xfsbufd_force_sleep) | 1739 | xfs_buftarg_t *btp, *n; |
1692 | return 0; | 1740 | |
1693 | xfsbufd_force_flush = 1; | 1741 | spin_lock(&xfs_buftarg_lock); |
1694 | barrier(); | 1742 | list_for_each_entry_safe(btp, n, &xfs_buftarg_list, bt_list) { |
1695 | wake_up_process(xfsbufd_task); | 1743 | if (test_bit(BT_FORCE_SLEEP, &btp->bt_flags)) |
1744 | continue; | ||
1745 | set_bit(BT_FORCE_FLUSH, &btp->bt_flags); | ||
1746 | barrier(); | ||
1747 | wake_up_process(btp->bt_task); | ||
1748 | } | ||
1749 | spin_unlock(&xfs_buftarg_lock); | ||
1696 | return 0; | 1750 | return 0; |
1697 | } | 1751 | } |
1698 | 1752 | ||
@@ -1702,31 +1756,34 @@ xfsbufd( | |||
1702 | { | 1756 | { |
1703 | struct list_head tmp; | 1757 | struct list_head tmp; |
1704 | unsigned long age; | 1758 | unsigned long age; |
1705 | xfs_buftarg_t *target; | 1759 | xfs_buftarg_t *target = (xfs_buftarg_t *)data; |
1706 | xfs_buf_t *pb, *n; | 1760 | xfs_buf_t *pb, *n; |
1761 | struct list_head *dwq = &target->bt_delwrite_queue; | ||
1762 | spinlock_t *dwlk = &target->bt_delwrite_lock; | ||
1707 | 1763 | ||
1708 | current->flags |= PF_MEMALLOC; | 1764 | current->flags |= PF_MEMALLOC; |
1709 | 1765 | ||
1710 | INIT_LIST_HEAD(&tmp); | 1766 | INIT_LIST_HEAD(&tmp); |
1711 | do { | 1767 | do { |
1712 | if (unlikely(freezing(current))) { | 1768 | if (unlikely(freezing(current))) { |
1713 | xfsbufd_force_sleep = 1; | 1769 | set_bit(BT_FORCE_SLEEP, &target->bt_flags); |
1714 | refrigerator(); | 1770 | refrigerator(); |
1715 | } else { | 1771 | } else { |
1716 | xfsbufd_force_sleep = 0; | 1772 | clear_bit(BT_FORCE_SLEEP, &target->bt_flags); |
1717 | } | 1773 | } |
1718 | 1774 | ||
1719 | schedule_timeout_interruptible( | 1775 | schedule_timeout_interruptible( |
1720 | xfs_buf_timer_centisecs * msecs_to_jiffies(10)); | 1776 | xfs_buf_timer_centisecs * msecs_to_jiffies(10)); |
1721 | 1777 | ||
1722 | age = xfs_buf_age_centisecs * msecs_to_jiffies(10); | 1778 | age = xfs_buf_age_centisecs * msecs_to_jiffies(10); |
1723 | spin_lock(&pbd_delwrite_lock); | 1779 | spin_lock(dwlk); |
1724 | list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { | 1780 | list_for_each_entry_safe(pb, n, dwq, pb_list) { |
1725 | PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); | 1781 | PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); |
1726 | ASSERT(pb->pb_flags & PBF_DELWRI); | 1782 | ASSERT(pb->pb_flags & PBF_DELWRI); |
1727 | 1783 | ||
1728 | if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { | 1784 | if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { |
1729 | if (!xfsbufd_force_flush && | 1785 | if (!test_bit(BT_FORCE_FLUSH, |
1786 | &target->bt_flags) && | ||
1730 | time_before(jiffies, | 1787 | time_before(jiffies, |
1731 | pb->pb_queuetime + age)) { | 1788 | pb->pb_queuetime + age)) { |
1732 | pagebuf_unlock(pb); | 1789 | pagebuf_unlock(pb); |
@@ -1738,11 +1795,11 @@ xfsbufd( | |||
1738 | list_move(&pb->pb_list, &tmp); | 1795 | list_move(&pb->pb_list, &tmp); |
1739 | } | 1796 | } |
1740 | } | 1797 | } |
1741 | spin_unlock(&pbd_delwrite_lock); | 1798 | spin_unlock(dwlk); |
1742 | 1799 | ||
1743 | while (!list_empty(&tmp)) { | 1800 | while (!list_empty(&tmp)) { |
1744 | pb = list_entry(tmp.next, xfs_buf_t, pb_list); | 1801 | pb = list_entry(tmp.next, xfs_buf_t, pb_list); |
1745 | target = pb->pb_target; | 1802 | ASSERT(target == pb->pb_target); |
1746 | 1803 | ||
1747 | list_del_init(&pb->pb_list); | 1804 | list_del_init(&pb->pb_list); |
1748 | pagebuf_iostrategy(pb); | 1805 | pagebuf_iostrategy(pb); |
@@ -1753,7 +1810,7 @@ xfsbufd( | |||
1753 | if (as_list_len > 0) | 1810 | if (as_list_len > 0) |
1754 | purge_addresses(); | 1811 | purge_addresses(); |
1755 | 1812 | ||
1756 | xfsbufd_force_flush = 0; | 1813 | clear_bit(BT_FORCE_FLUSH, &target->bt_flags); |
1757 | } while (!kthread_should_stop()); | 1814 | } while (!kthread_should_stop()); |
1758 | 1815 | ||
1759 | return 0; | 1816 | return 0; |
@@ -1772,17 +1829,17 @@ xfs_flush_buftarg( | |||
1772 | struct list_head tmp; | 1829 | struct list_head tmp; |
1773 | xfs_buf_t *pb, *n; | 1830 | xfs_buf_t *pb, *n; |
1774 | int pincount = 0; | 1831 | int pincount = 0; |
1832 | struct list_head *dwq = &target->bt_delwrite_queue; | ||
1833 | spinlock_t *dwlk = &target->bt_delwrite_lock; | ||
1775 | 1834 | ||
1776 | pagebuf_runall_queues(xfsdatad_workqueue); | 1835 | pagebuf_runall_queues(xfsdatad_workqueue); |
1777 | pagebuf_runall_queues(xfslogd_workqueue); | 1836 | pagebuf_runall_queues(xfslogd_workqueue); |
1778 | 1837 | ||
1779 | INIT_LIST_HEAD(&tmp); | 1838 | INIT_LIST_HEAD(&tmp); |
1780 | spin_lock(&pbd_delwrite_lock); | 1839 | spin_lock(dwlk); |
1781 | list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { | 1840 | list_for_each_entry_safe(pb, n, dwq, pb_list) { |
1782 | |||
1783 | if (pb->pb_target != target) | ||
1784 | continue; | ||
1785 | 1841 | ||
1842 | ASSERT(pb->pb_target == target); | ||
1786 | ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); | 1843 | ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); |
1787 | PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); | 1844 | PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); |
1788 | if (pagebuf_ispin(pb)) { | 1845 | if (pagebuf_ispin(pb)) { |
@@ -1792,7 +1849,7 @@ xfs_flush_buftarg( | |||
1792 | 1849 | ||
1793 | list_move(&pb->pb_list, &tmp); | 1850 | list_move(&pb->pb_list, &tmp); |
1794 | } | 1851 | } |
1795 | spin_unlock(&pbd_delwrite_lock); | 1852 | spin_unlock(dwlk); |
1796 | 1853 | ||
1797 | /* | 1854 | /* |
1798 | * Dropped the delayed write list lock, now walk the temporary list | 1855 | * Dropped the delayed write list lock, now walk the temporary list |
@@ -1847,20 +1904,12 @@ pagebuf_init(void) | |||
1847 | if (!xfsdatad_workqueue) | 1904 | if (!xfsdatad_workqueue) |
1848 | goto out_destroy_xfslogd_workqueue; | 1905 | goto out_destroy_xfslogd_workqueue; |
1849 | 1906 | ||
1850 | xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd"); | ||
1851 | if (IS_ERR(xfsbufd_task)) { | ||
1852 | error = PTR_ERR(xfsbufd_task); | ||
1853 | goto out_destroy_xfsdatad_workqueue; | ||
1854 | } | ||
1855 | |||
1856 | pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); | 1907 | pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); |
1857 | if (!pagebuf_shake) | 1908 | if (!pagebuf_shake) |
1858 | goto out_stop_xfsbufd; | 1909 | goto out_destroy_xfsdatad_workqueue; |
1859 | 1910 | ||
1860 | return 0; | 1911 | return 0; |
1861 | 1912 | ||
1862 | out_stop_xfsbufd: | ||
1863 | kthread_stop(xfsbufd_task); | ||
1864 | out_destroy_xfsdatad_workqueue: | 1913 | out_destroy_xfsdatad_workqueue: |
1865 | destroy_workqueue(xfsdatad_workqueue); | 1914 | destroy_workqueue(xfsdatad_workqueue); |
1866 | out_destroy_xfslogd_workqueue: | 1915 | out_destroy_xfslogd_workqueue: |
@@ -1878,7 +1927,6 @@ void | |||
1878 | pagebuf_terminate(void) | 1927 | pagebuf_terminate(void) |
1879 | { | 1928 | { |
1880 | kmem_shake_deregister(pagebuf_shake); | 1929 | kmem_shake_deregister(pagebuf_shake); |
1881 | kthread_stop(xfsbufd_task); | ||
1882 | destroy_workqueue(xfsdatad_workqueue); | 1930 | destroy_workqueue(xfsdatad_workqueue); |
1883 | destroy_workqueue(xfslogd_workqueue); | 1931 | destroy_workqueue(xfslogd_workqueue); |
1884 | kmem_zone_destroy(pagebuf_zone); | 1932 | kmem_zone_destroy(pagebuf_zone); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 237a35b915d..f721d47ad4c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -88,6 +88,15 @@ typedef struct xfs_buftarg { | |||
88 | uint bt_hashmask; | 88 | uint bt_hashmask; |
89 | uint bt_hashshift; | 89 | uint bt_hashshift; |
90 | xfs_bufhash_t *bt_hash; | 90 | xfs_bufhash_t *bt_hash; |
91 | |||
92 | /* per device delwri queue */ | ||
93 | struct task_struct *bt_task; | ||
94 | struct list_head bt_list; | ||
95 | struct list_head bt_delwrite_queue; | ||
96 | spinlock_t bt_delwrite_lock; | ||
97 | uint bt_flags; | ||
98 | #define BT_FORCE_SLEEP 1 | ||
99 | #define BT_FORCE_FLUSH 2 | ||
91 | } xfs_buftarg_t; | 100 | } xfs_buftarg_t; |
92 | 101 | ||
93 | /* | 102 | /* |