aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2006-01-10 23:37:58 -0500
committerNathan Scott <nathans@sgi.com>2006-01-10 23:37:58 -0500
commita6867a6815fa0241848d4620f2dbd2954f4405d7 (patch)
tree9565d18c86b935c3a099e4a817137372ce81dde1 /fs/xfs
parent216d3b2acba469a9bee98a09bb957e012ba7bc25 (diff)
[XFS] Introduce per-filesystem delwri pagebuf flushing to reduce
contention between filesystems and prevent deadlocks between filesystems when a flush dependency exists between them. SGI-PV: 947098 SGI-Modid: xfs-linux-melb:xfs-kern:24844a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c136
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h9
2 files changed, 101 insertions, 44 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6fe21d2b8847..2a8acd38fa1e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,6 +33,7 @@
33 33
34STATIC kmem_cache_t *pagebuf_zone; 34STATIC kmem_cache_t *pagebuf_zone;
35STATIC kmem_shaker_t pagebuf_shake; 35STATIC kmem_shaker_t pagebuf_shake;
36STATIC int xfsbufd(void *);
36STATIC int xfsbufd_wakeup(int, gfp_t); 37STATIC int xfsbufd_wakeup(int, gfp_t);
37STATIC void pagebuf_delwri_queue(xfs_buf_t *, int); 38STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
38 39
@@ -1492,6 +1493,30 @@ xfs_free_bufhash(
1492 btp->bt_hash = NULL; 1493 btp->bt_hash = NULL;
1493} 1494}
1494 1495
1496/*
1497 * buftarg list for delwrite queue processing
1498 */
1499STATIC LIST_HEAD(xfs_buftarg_list);
1500STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
1501
1502STATIC void
1503xfs_register_buftarg(
1504 xfs_buftarg_t *btp)
1505{
1506 spin_lock(&xfs_buftarg_lock);
1507 list_add(&btp->bt_list, &xfs_buftarg_list);
1508 spin_unlock(&xfs_buftarg_lock);
1509}
1510
1511STATIC void
1512xfs_unregister_buftarg(
1513 xfs_buftarg_t *btp)
1514{
1515 spin_lock(&xfs_buftarg_lock);
1516 list_del(&btp->bt_list);
1517 spin_unlock(&xfs_buftarg_lock);
1518}
1519
1495void 1520void
1496xfs_free_buftarg( 1521xfs_free_buftarg(
1497 xfs_buftarg_t *btp, 1522 xfs_buftarg_t *btp,
@@ -1502,6 +1527,12 @@ xfs_free_buftarg(
1502 xfs_blkdev_put(btp->pbr_bdev); 1527 xfs_blkdev_put(btp->pbr_bdev);
1503 xfs_free_bufhash(btp); 1528 xfs_free_bufhash(btp);
1504 iput(btp->pbr_mapping->host); 1529 iput(btp->pbr_mapping->host);
1530
1531 /* unregister the buftarg first so that we don't get a
1532 * wakeup finding a non-existent task */
1533 xfs_unregister_buftarg(btp);
1534 kthread_stop(btp->bt_task);
1535
1505 kmem_free(btp, sizeof(*btp)); 1536 kmem_free(btp, sizeof(*btp));
1506} 1537}
1507 1538
@@ -1591,6 +1622,26 @@ xfs_mapping_buftarg(
1591 return 0; 1622 return 0;
1592} 1623}
1593 1624
1625STATIC int
1626xfs_alloc_delwrite_queue(
1627 xfs_buftarg_t *btp)
1628{
1629 int error = 0;
1630
1631 INIT_LIST_HEAD(&btp->bt_list);
1632 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1633 spinlock_init(&btp->bt_delwrite_lock, "delwri_lock");
1634 btp->bt_flags = 0;
1635 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
1636 if (IS_ERR(btp->bt_task)) {
1637 error = PTR_ERR(btp->bt_task);
1638 goto out_error;
1639 }
1640 xfs_register_buftarg(btp);
1641out_error:
1642 return error;
1643}
1644
1594xfs_buftarg_t * 1645xfs_buftarg_t *
1595xfs_alloc_buftarg( 1646xfs_alloc_buftarg(
1596 struct block_device *bdev, 1647 struct block_device *bdev,
@@ -1606,6 +1657,8 @@ xfs_alloc_buftarg(
1606 goto error; 1657 goto error;
1607 if (xfs_mapping_buftarg(btp, bdev)) 1658 if (xfs_mapping_buftarg(btp, bdev))
1608 goto error; 1659 goto error;
1660 if (xfs_alloc_delwrite_queue(btp))
1661 goto error;
1609 xfs_alloc_bufhash(btp, external); 1662 xfs_alloc_bufhash(btp, external);
1610 return btp; 1663 return btp;
1611 1664
@@ -1618,20 +1671,19 @@ error:
1618/* 1671/*
1619 * Pagebuf delayed write buffer handling 1672 * Pagebuf delayed write buffer handling
1620 */ 1673 */
1621
1622STATIC LIST_HEAD(pbd_delwrite_queue);
1623STATIC DEFINE_SPINLOCK(pbd_delwrite_lock);
1624
1625STATIC void 1674STATIC void
1626pagebuf_delwri_queue( 1675pagebuf_delwri_queue(
1627 xfs_buf_t *pb, 1676 xfs_buf_t *pb,
1628 int unlock) 1677 int unlock)
1629{ 1678{
1679 struct list_head *dwq = &pb->pb_target->bt_delwrite_queue;
1680 spinlock_t *dwlk = &pb->pb_target->bt_delwrite_lock;
1681
1630 PB_TRACE(pb, "delwri_q", (long)unlock); 1682 PB_TRACE(pb, "delwri_q", (long)unlock);
1631 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) == 1683 ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
1632 (PBF_DELWRI|PBF_ASYNC)); 1684 (PBF_DELWRI|PBF_ASYNC));
1633 1685
1634 spin_lock(&pbd_delwrite_lock); 1686 spin_lock(dwlk);
1635 /* If already in the queue, dequeue and place at tail */ 1687 /* If already in the queue, dequeue and place at tail */
1636 if (!list_empty(&pb->pb_list)) { 1688 if (!list_empty(&pb->pb_list)) {
1637 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1689 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
@@ -1642,9 +1694,9 @@ pagebuf_delwri_queue(
1642 } 1694 }
1643 1695
1644 pb->pb_flags |= _PBF_DELWRI_Q; 1696 pb->pb_flags |= _PBF_DELWRI_Q;
1645 list_add_tail(&pb->pb_list, &pbd_delwrite_queue); 1697 list_add_tail(&pb->pb_list, dwq);
1646 pb->pb_queuetime = jiffies; 1698 pb->pb_queuetime = jiffies;
1647 spin_unlock(&pbd_delwrite_lock); 1699 spin_unlock(dwlk);
1648 1700
1649 if (unlock) 1701 if (unlock)
1650 pagebuf_unlock(pb); 1702 pagebuf_unlock(pb);
@@ -1654,16 +1706,17 @@ void
1654pagebuf_delwri_dequeue( 1706pagebuf_delwri_dequeue(
1655 xfs_buf_t *pb) 1707 xfs_buf_t *pb)
1656{ 1708{
1709 spinlock_t *dwlk = &pb->pb_target->bt_delwrite_lock;
1657 int dequeued = 0; 1710 int dequeued = 0;
1658 1711
1659 spin_lock(&pbd_delwrite_lock); 1712 spin_lock(dwlk);
1660 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) { 1713 if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
1661 ASSERT(pb->pb_flags & _PBF_DELWRI_Q); 1714 ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
1662 list_del_init(&pb->pb_list); 1715 list_del_init(&pb->pb_list);
1663 dequeued = 1; 1716 dequeued = 1;
1664 } 1717 }
1665 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q); 1718 pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
1666 spin_unlock(&pbd_delwrite_lock); 1719 spin_unlock(dwlk);
1667 1720
1668 if (dequeued) 1721 if (dequeued)
1669 pagebuf_rele(pb); 1722 pagebuf_rele(pb);
@@ -1678,21 +1731,22 @@ pagebuf_runall_queues(
1678 flush_workqueue(queue); 1731 flush_workqueue(queue);
1679} 1732}
1680 1733
1681/* Defines for pagebuf daemon */
1682STATIC struct task_struct *xfsbufd_task;
1683STATIC int xfsbufd_force_flush;
1684STATIC int xfsbufd_force_sleep;
1685
1686STATIC int 1734STATIC int
1687xfsbufd_wakeup( 1735xfsbufd_wakeup(
1688 int priority, 1736 int priority,
1689 gfp_t mask) 1737 gfp_t mask)
1690{ 1738{
1691 if (xfsbufd_force_sleep) 1739 xfs_buftarg_t *btp, *n;
1692 return 0; 1740
1693 xfsbufd_force_flush = 1; 1741 spin_lock(&xfs_buftarg_lock);
1694 barrier(); 1742 list_for_each_entry_safe(btp, n, &xfs_buftarg_list, bt_list) {
1695 wake_up_process(xfsbufd_task); 1743 if (test_bit(BT_FORCE_SLEEP, &btp->bt_flags))
1744 continue;
1745 set_bit(BT_FORCE_FLUSH, &btp->bt_flags);
1746 barrier();
1747 wake_up_process(btp->bt_task);
1748 }
1749 spin_unlock(&xfs_buftarg_lock);
1696 return 0; 1750 return 0;
1697} 1751}
1698 1752
@@ -1702,31 +1756,34 @@ xfsbufd(
1702{ 1756{
1703 struct list_head tmp; 1757 struct list_head tmp;
1704 unsigned long age; 1758 unsigned long age;
1705 xfs_buftarg_t *target; 1759 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1706 xfs_buf_t *pb, *n; 1760 xfs_buf_t *pb, *n;
1761 struct list_head *dwq = &target->bt_delwrite_queue;
1762 spinlock_t *dwlk = &target->bt_delwrite_lock;
1707 1763
1708 current->flags |= PF_MEMALLOC; 1764 current->flags |= PF_MEMALLOC;
1709 1765
1710 INIT_LIST_HEAD(&tmp); 1766 INIT_LIST_HEAD(&tmp);
1711 do { 1767 do {
1712 if (unlikely(freezing(current))) { 1768 if (unlikely(freezing(current))) {
1713 xfsbufd_force_sleep = 1; 1769 set_bit(BT_FORCE_SLEEP, &target->bt_flags);
1714 refrigerator(); 1770 refrigerator();
1715 } else { 1771 } else {
1716 xfsbufd_force_sleep = 0; 1772 clear_bit(BT_FORCE_SLEEP, &target->bt_flags);
1717 } 1773 }
1718 1774
1719 schedule_timeout_interruptible( 1775 schedule_timeout_interruptible(
1720 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1776 xfs_buf_timer_centisecs * msecs_to_jiffies(10));
1721 1777
1722 age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1778 age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1723 spin_lock(&pbd_delwrite_lock); 1779 spin_lock(dwlk);
1724 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1780 list_for_each_entry_safe(pb, n, dwq, pb_list) {
1725 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); 1781 PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
1726 ASSERT(pb->pb_flags & PBF_DELWRI); 1782 ASSERT(pb->pb_flags & PBF_DELWRI);
1727 1783
1728 if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { 1784 if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
1729 if (!xfsbufd_force_flush && 1785 if (!test_bit(BT_FORCE_FLUSH,
1786 &target->bt_flags) &&
1730 time_before(jiffies, 1787 time_before(jiffies,
1731 pb->pb_queuetime + age)) { 1788 pb->pb_queuetime + age)) {
1732 pagebuf_unlock(pb); 1789 pagebuf_unlock(pb);
@@ -1738,11 +1795,11 @@ xfsbufd(
1738 list_move(&pb->pb_list, &tmp); 1795 list_move(&pb->pb_list, &tmp);
1739 } 1796 }
1740 } 1797 }
1741 spin_unlock(&pbd_delwrite_lock); 1798 spin_unlock(dwlk);
1742 1799
1743 while (!list_empty(&tmp)) { 1800 while (!list_empty(&tmp)) {
1744 pb = list_entry(tmp.next, xfs_buf_t, pb_list); 1801 pb = list_entry(tmp.next, xfs_buf_t, pb_list);
1745 target = pb->pb_target; 1802 ASSERT(target == pb->pb_target);
1746 1803
1747 list_del_init(&pb->pb_list); 1804 list_del_init(&pb->pb_list);
1748 pagebuf_iostrategy(pb); 1805 pagebuf_iostrategy(pb);
@@ -1753,7 +1810,7 @@ xfsbufd(
1753 if (as_list_len > 0) 1810 if (as_list_len > 0)
1754 purge_addresses(); 1811 purge_addresses();
1755 1812
1756 xfsbufd_force_flush = 0; 1813 clear_bit(BT_FORCE_FLUSH, &target->bt_flags);
1757 } while (!kthread_should_stop()); 1814 } while (!kthread_should_stop());
1758 1815
1759 return 0; 1816 return 0;
@@ -1772,17 +1829,17 @@ xfs_flush_buftarg(
1772 struct list_head tmp; 1829 struct list_head tmp;
1773 xfs_buf_t *pb, *n; 1830 xfs_buf_t *pb, *n;
1774 int pincount = 0; 1831 int pincount = 0;
1832 struct list_head *dwq = &target->bt_delwrite_queue;
1833 spinlock_t *dwlk = &target->bt_delwrite_lock;
1775 1834
1776 pagebuf_runall_queues(xfsdatad_workqueue); 1835 pagebuf_runall_queues(xfsdatad_workqueue);
1777 pagebuf_runall_queues(xfslogd_workqueue); 1836 pagebuf_runall_queues(xfslogd_workqueue);
1778 1837
1779 INIT_LIST_HEAD(&tmp); 1838 INIT_LIST_HEAD(&tmp);
1780 spin_lock(&pbd_delwrite_lock); 1839 spin_lock(dwlk);
1781 list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) { 1840 list_for_each_entry_safe(pb, n, dwq, pb_list) {
1782
1783 if (pb->pb_target != target)
1784 continue;
1785 1841
1842 ASSERT(pb->pb_target == target);
1786 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)); 1843 ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
1787 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); 1844 PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
1788 if (pagebuf_ispin(pb)) { 1845 if (pagebuf_ispin(pb)) {
@@ -1792,7 +1849,7 @@ xfs_flush_buftarg(
1792 1849
1793 list_move(&pb->pb_list, &tmp); 1850 list_move(&pb->pb_list, &tmp);
1794 } 1851 }
1795 spin_unlock(&pbd_delwrite_lock); 1852 spin_unlock(dwlk);
1796 1853
1797 /* 1854 /*
1798 * Dropped the delayed write list lock, now walk the temporary list 1855 * Dropped the delayed write list lock, now walk the temporary list
@@ -1847,20 +1904,12 @@ pagebuf_init(void)
1847 if (!xfsdatad_workqueue) 1904 if (!xfsdatad_workqueue)
1848 goto out_destroy_xfslogd_workqueue; 1905 goto out_destroy_xfslogd_workqueue;
1849 1906
1850 xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
1851 if (IS_ERR(xfsbufd_task)) {
1852 error = PTR_ERR(xfsbufd_task);
1853 goto out_destroy_xfsdatad_workqueue;
1854 }
1855
1856 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup); 1907 pagebuf_shake = kmem_shake_register(xfsbufd_wakeup);
1857 if (!pagebuf_shake) 1908 if (!pagebuf_shake)
1858 goto out_stop_xfsbufd; 1909 goto out_destroy_xfsdatad_workqueue;
1859 1910
1860 return 0; 1911 return 0;
1861 1912
1862 out_stop_xfsbufd:
1863 kthread_stop(xfsbufd_task);
1864 out_destroy_xfsdatad_workqueue: 1913 out_destroy_xfsdatad_workqueue:
1865 destroy_workqueue(xfsdatad_workqueue); 1914 destroy_workqueue(xfsdatad_workqueue);
1866 out_destroy_xfslogd_workqueue: 1915 out_destroy_xfslogd_workqueue:
@@ -1878,7 +1927,6 @@ void
1878pagebuf_terminate(void) 1927pagebuf_terminate(void)
1879{ 1928{
1880 kmem_shake_deregister(pagebuf_shake); 1929 kmem_shake_deregister(pagebuf_shake);
1881 kthread_stop(xfsbufd_task);
1882 destroy_workqueue(xfsdatad_workqueue); 1930 destroy_workqueue(xfsdatad_workqueue);
1883 destroy_workqueue(xfslogd_workqueue); 1931 destroy_workqueue(xfslogd_workqueue);
1884 kmem_zone_destroy(pagebuf_zone); 1932 kmem_zone_destroy(pagebuf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 237a35b915d1..f721d47ad4cc 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -88,6 +88,15 @@ typedef struct xfs_buftarg {
88 uint bt_hashmask; 88 uint bt_hashmask;
89 uint bt_hashshift; 89 uint bt_hashshift;
90 xfs_bufhash_t *bt_hash; 90 xfs_bufhash_t *bt_hash;
91
92 /* per device delwri queue */
93 struct task_struct *bt_task;
94 struct list_head bt_list;
95 struct list_head bt_delwrite_queue;
96 spinlock_t bt_delwrite_lock;
97 uint bt_flags;
98#define BT_FORCE_SLEEP 1
99#define BT_FORCE_FLUSH 2
91} xfs_buftarg_t; 100} xfs_buftarg_t;
92 101
93/* 102/*