aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2016-06-01 03:38:15 -0400
committerDave Chinner <david@fromorbit.com>2016-06-01 03:38:15 -0400
commit26f1fe858f2744edfc75e92d34a6be0af5e8b45d (patch)
tree8a1c5679ed28cfb5815f6fe3ecaa549ef1a26905 /fs/xfs
parent4478fb1f2db4b1473969ed24cf18264e3a4b1d79 (diff)
xfs: reduce lock hold times in buffer writeback
When we have a lot of metadata to flush from the AIL, the buffer list can get very long. The current submission code tries to batch submission to optimise IO order of the metadata (i.e. ascending block order) to maximise block layer merging or IO to adjacent metadata blocks. Unfortunately, the method used can result in long lock times occurring as buffers locked early on in the buffer list might not be dispatched until the end of the IO licst processing. This is because sorting does not occur util after the buffer list has been processed and the buffers that are going to be submitted are locked. Hence when the buffer list is several thousand buffers long, the lock hold times before IO dispatch can be significant. To fix this, sort the buffer list before we start trying to lock and submit buffers. This means we can now submit buffers immediately after they are locked, allowing merging to occur immediately on the plug and dispatch to occur as quickly as possible. This means there is minimal delay between locking the buffer and IO submission occuring, hence reducing the worst case lock hold times seen during delayed write buffer IO submission signficantly. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_buf.c60
1 files changed, 35 insertions, 25 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e71cfbd5acb3..efa2a734268f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1774,18 +1774,33 @@ xfs_buf_cmp(
1774 return 0; 1774 return 0;
1775} 1775}
1776 1776
1777/*
1778 * submit buffers for write.
1779 *
1780 * When we have a large buffer list, we do not want to hold all the buffers
1781 * locked while we block on the request queue waiting for IO dispatch. To avoid
1782 * this problem, we lock and submit buffers in groups of 50, thereby minimising
1783 * the lock hold times for lists which may contain thousands of objects.
1784 *
1785 * To do this, we sort the buffer list before we walk the list to lock and
1786 * submit buffers, and we plug and unplug around each group of buffers we
1787 * submit.
1788 */
1777static int 1789static int
1778__xfs_buf_delwri_submit( 1790xfs_buf_delwri_submit_buffers(
1779 struct list_head *buffer_list, 1791 struct list_head *buffer_list,
1780 struct list_head *io_list, 1792 struct list_head *wait_list)
1781 bool wait)
1782{ 1793{
1783 struct blk_plug plug;
1784 struct xfs_buf *bp, *n; 1794 struct xfs_buf *bp, *n;
1795 LIST_HEAD (submit_list);
1785 int pinned = 0; 1796 int pinned = 0;
1797 struct blk_plug plug;
1798
1799 list_sort(NULL, buffer_list, xfs_buf_cmp);
1786 1800
1801 blk_start_plug(&plug);
1787 list_for_each_entry_safe(bp, n, buffer_list, b_list) { 1802 list_for_each_entry_safe(bp, n, buffer_list, b_list) {
1788 if (!wait) { 1803 if (!wait_list) {
1789 if (xfs_buf_ispinned(bp)) { 1804 if (xfs_buf_ispinned(bp)) {
1790 pinned++; 1805 pinned++;
1791 continue; 1806 continue;
@@ -1808,25 +1823,21 @@ __xfs_buf_delwri_submit(
1808 continue; 1823 continue;
1809 } 1824 }
1810 1825
1811 list_move_tail(&bp->b_list, io_list);
1812 trace_xfs_buf_delwri_split(bp, _RET_IP_); 1826 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1813 }
1814
1815 list_sort(NULL, io_list, xfs_buf_cmp);
1816
1817 blk_start_plug(&plug);
1818 list_for_each_entry_safe(bp, n, io_list, b_list) {
1819 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
1820 bp->b_flags |= XBF_WRITE | XBF_ASYNC;
1821 1827
1822 /* 1828 /*
1823 * we do all Io submission async. This means if we need to wait 1829 * We do all IO submission async. This means if we need
1824 * for IO completion we need to take an extra reference so the 1830 * to wait for IO completion we need to take an extra
1825 * buffer is still valid on the other side. 1831 * reference so the buffer is still valid on the other
1832 * side. We need to move the buffer onto the io_list
1833 * at this point so the caller can still access it.
1826 */ 1834 */
1827 if (wait) 1835 bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
1836 bp->b_flags |= XBF_WRITE | XBF_ASYNC;
1837 if (wait_list) {
1828 xfs_buf_hold(bp); 1838 xfs_buf_hold(bp);
1829 else 1839 list_move_tail(&bp->b_list, wait_list);
1840 } else
1830 list_del_init(&bp->b_list); 1841 list_del_init(&bp->b_list);
1831 1842
1832 xfs_buf_submit(bp); 1843 xfs_buf_submit(bp);
@@ -1849,8 +1860,7 @@ int
1849xfs_buf_delwri_submit_nowait( 1860xfs_buf_delwri_submit_nowait(
1850 struct list_head *buffer_list) 1861 struct list_head *buffer_list)
1851{ 1862{
1852 LIST_HEAD (io_list); 1863 return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
1853 return __xfs_buf_delwri_submit(buffer_list, &io_list, false);
1854} 1864}
1855 1865
1856/* 1866/*
@@ -1865,15 +1875,15 @@ int
1865xfs_buf_delwri_submit( 1875xfs_buf_delwri_submit(
1866 struct list_head *buffer_list) 1876 struct list_head *buffer_list)
1867{ 1877{
1868 LIST_HEAD (io_list); 1878 LIST_HEAD (wait_list);
1869 int error = 0, error2; 1879 int error = 0, error2;
1870 struct xfs_buf *bp; 1880 struct xfs_buf *bp;
1871 1881
1872 __xfs_buf_delwri_submit(buffer_list, &io_list, true); 1882 xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
1873 1883
1874 /* Wait for IO to complete. */ 1884 /* Wait for IO to complete. */
1875 while (!list_empty(&io_list)) { 1885 while (!list_empty(&wait_list)) {
1876 bp = list_first_entry(&io_list, struct xfs_buf, b_list); 1886 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1877 1887
1878 list_del_init(&bp->b_list); 1888 list_del_init(&bp->b_list);
1879 1889