aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2010-01-25 23:13:25 -0500
committerDave Chinner <david@fromorbit.com>2010-01-25 23:13:25 -0500
commit089716aa1480b7197bcd678b8477774c379a2768 (patch)
tree63d38d2a210dc103ac106ebf88eecf94005c31b2
parentd808f617ad00a413585b806de340feda5ad9a2da (diff)
xfs: Sort delayed write buffers before dispatch
Currently when the xfsbufd writes delayed write buffers, it pushes them to disk in the order they come off the delayed write list. If there are lots of buffers Ń•pread widely over the disk, this results in overwhelming the elevator sort queues in the block layer and we end up losing the posibility of merging adjacent buffers to minimise the number of IOs. Use the new generic list_sort function to sort the delwri dispatch queue before issue to ensure that the buffers are pushed in the most friendly order possible to the lower layers. Signed-off-by: Dave Chinner <david@fromorbit.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c87
1 files changed, 60 insertions, 27 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b306265caa33..4556a4c31e36 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,6 +33,7 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
@@ -1877,14 +1878,42 @@ xfs_buf_delwri_split(
1877 1878
1878} 1879}
1879 1880
1881/*
1882 * Compare function is more complex than it needs to be because
1883 * the return value is only 32 bits and we are doing comparisons
1884 * on 64 bit values
1885 */
1886static int
1887xfs_buf_cmp(
1888 void *priv,
1889 struct list_head *a,
1890 struct list_head *b)
1891{
1892 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1893 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1894 xfs_daddr_t diff;
1895
1896 diff = ap->b_bn - bp->b_bn;
1897 if (diff < 0)
1898 return -1;
1899 if (diff > 0)
1900 return 1;
1901 return 0;
1902}
1903
1904void
1905xfs_buf_delwri_sort(
1906 xfs_buftarg_t *target,
1907 struct list_head *list)
1908{
1909 list_sort(NULL, list, xfs_buf_cmp);
1910}
1911
1880STATIC int 1912STATIC int
1881xfsbufd( 1913xfsbufd(
1882 void *data) 1914 void *data)
1883{ 1915{
1884 struct list_head tmp; 1916 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1885 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1886 int count;
1887 xfs_buf_t *bp;
1888 1917
1889 current->flags |= PF_MEMALLOC; 1918 current->flags |= PF_MEMALLOC;
1890 1919
@@ -1893,6 +1922,8 @@ xfsbufd(
1893 do { 1922 do {
1894 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1923 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1895 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1924 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1925 int count = 0;
1926 struct list_head tmp;
1896 1927
1897 if (unlikely(freezing(current))) { 1928 if (unlikely(freezing(current))) {
1898 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1929 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1907,11 +1938,10 @@ xfsbufd(
1907 schedule_timeout_interruptible(tout); 1938 schedule_timeout_interruptible(tout);
1908 1939
1909 xfs_buf_delwri_split(target, &tmp, age); 1940 xfs_buf_delwri_split(target, &tmp, age);
1910 count = 0; 1941 list_sort(NULL, &tmp, xfs_buf_cmp);
1911 while (!list_empty(&tmp)) { 1942 while (!list_empty(&tmp)) {
1912 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1943 struct xfs_buf *bp;
1913 ASSERT(target == bp->b_target); 1944 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1914
1915 list_del_init(&bp->b_list); 1945 list_del_init(&bp->b_list);
1916 xfs_buf_iostrategy(bp); 1946 xfs_buf_iostrategy(bp);
1917 count++; 1947 count++;
@@ -1937,42 +1967,45 @@ xfs_flush_buftarg(
1937 xfs_buftarg_t *target, 1967 xfs_buftarg_t *target,
1938 int wait) 1968 int wait)
1939{ 1969{
1940 struct list_head tmp; 1970 xfs_buf_t *bp;
1941 xfs_buf_t *bp, *n;
1942 int pincount = 0; 1971 int pincount = 0;
1972 LIST_HEAD(tmp_list);
1973 LIST_HEAD(wait_list);
1943 1974
1944 xfs_buf_runall_queues(xfsconvertd_workqueue); 1975 xfs_buf_runall_queues(xfsconvertd_workqueue);
1945 xfs_buf_runall_queues(xfsdatad_workqueue); 1976 xfs_buf_runall_queues(xfsdatad_workqueue);
1946 xfs_buf_runall_queues(xfslogd_workqueue); 1977 xfs_buf_runall_queues(xfslogd_workqueue);
1947 1978
1948 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1979 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1949 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1980 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1950 1981
1951 /* 1982 /*
1952 * Dropped the delayed write list lock, now walk the temporary list 1983 * Dropped the delayed write list lock, now walk the temporary list.
1984 * All I/O is issued async and then if we need to wait for completion
1985 * we do that after issuing all the IO.
1953 */ 1986 */
1954 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1987 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1988 while (!list_empty(&tmp_list)) {
1989 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1955 ASSERT(target == bp->b_target); 1990 ASSERT(target == bp->b_target);
1956 if (wait) 1991 list_del_init(&bp->b_list);
1992 if (wait) {
1957 bp->b_flags &= ~XBF_ASYNC; 1993 bp->b_flags &= ~XBF_ASYNC;
1958 else 1994 list_add(&bp->b_list, &wait_list);
1959 list_del_init(&bp->b_list); 1995 }
1960
1961 xfs_buf_iostrategy(bp); 1996 xfs_buf_iostrategy(bp);
1962 } 1997 }
1963 1998
1964 if (wait) 1999 if (wait) {
2000 /* Expedite and wait for IO to complete. */
1965 blk_run_address_space(target->bt_mapping); 2001 blk_run_address_space(target->bt_mapping);
2002 while (!list_empty(&wait_list)) {
2003 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1966 2004
1967 /* 2005 list_del_init(&bp->b_list);
1968 * Remaining list items must be flushed before returning 2006 xfs_iowait(bp);
1969 */ 2007 xfs_buf_relse(bp);
1970 while (!list_empty(&tmp)) { 2008 }
1971 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1972
1973 list_del_init(&bp->b_list);
1974 xfs_iowait(bp);
1975 xfs_buf_relse(bp);
1976 } 2009 }
1977 2010
1978 return pincount; 2011 return pincount;