aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-02-26 20:18:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-02-26 20:18:52 -0500
commitb305956abc3c50c52598bbf39b7a5f4850058ba8 (patch)
tree9046d97af63236dba36bc3be139c7e0a92e09d41 /fs/xfs/linux-2.6/xfs_buf.c
parent41630959ed5ce694ec2e8c0f3c69743e011394c8 (diff)
parent398007f863a4af2b4a5a07219c5a617f1a098115 (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (52 commits) fs/xfs: Correct NULL test xfs: optimize log flushing in xfs_fsync xfs: only clear the suid bit once in xfs_write xfs: kill xfs_bawrite xfs: log changed inodes instead of writing them synchronously xfs: remove invalid barrier optimization from xfs_fsync xfs: kill the unused XFS_QMOPT_* flush flags V2 xfs: Use delay write promotion for dquot flushing xfs: Sort delayed write buffers before dispatch xfs: Don't issue buffer IO direct from AIL push V2 xfs: Use delayed write for inodes rather than async V2 xfs: Make inode reclaim states explicit xfs: more reserved blocks fixups xfs: turn off sign warnings xfs: don't hold onto reserved blocks on remount,ro xfs: quota limit statvfs available blocks xfs: replace KM_LARGE with explicit vmalloc use xfs: cleanup up xfs_log_force calling conventions xfs: kill XLOG_VEC_SET_TYPE xfs: remove duplicate buffer flags ...
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c290
1 files changed, 245 insertions, 45 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6f3ebb634b8b..6f76ba85f193 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,6 +33,7 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
36 37
37#include "xfs_sb.h" 38#include "xfs_sb.h"
38#include "xfs_inum.h" 39#include "xfs_inum.h"
@@ -1072,22 +1073,30 @@ xfs_buf_ioerror(
1072} 1073}
1073 1074
1074int 1075int
1075xfs_bawrite( 1076xfs_bwrite(
1076 void *mp, 1077 struct xfs_mount *mp,
1077 struct xfs_buf *bp) 1078 struct xfs_buf *bp)
1078{ 1079{
1079 trace_xfs_buf_bawrite(bp, _RET_IP_); 1080 int iowait = (bp->b_flags & XBF_ASYNC) == 0;
1081 int error = 0;
1080 1082
1081 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 1083 bp->b_strat = xfs_bdstrat_cb;
1084 bp->b_mount = mp;
1085 bp->b_flags |= XBF_WRITE;
1086 if (!iowait)
1087 bp->b_flags |= _XBF_RUN_QUEUES;
1082 1088
1083 xfs_buf_delwri_dequeue(bp); 1089 xfs_buf_delwri_dequeue(bp);
1090 xfs_buf_iostrategy(bp);
1084 1091
1085 bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); 1092 if (iowait) {
1086 bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); 1093 error = xfs_buf_iowait(bp);
1094 if (error)
1095 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1096 xfs_buf_relse(bp);
1097 }
1087 1098
1088 bp->b_mount = mp; 1099 return error;
1089 bp->b_strat = xfs_bdstrat_cb;
1090 return xfs_bdstrat_cb(bp);
1091} 1100}
1092 1101
1093void 1102void
@@ -1106,6 +1115,126 @@ xfs_bdwrite(
1106 xfs_buf_delwri_queue(bp, 1); 1115 xfs_buf_delwri_queue(bp, 1);
1107} 1116}
1108 1117
1118/*
1119 * Called when we want to stop a buffer from getting written or read.
1120 * We attach the EIO error, muck with its flags, and call biodone
1121 * so that the proper iodone callbacks get called.
1122 */
1123STATIC int
1124xfs_bioerror(
1125 xfs_buf_t *bp)
1126{
1127#ifdef XFSERRORDEBUG
1128 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
1129#endif
1130
1131 /*
1132 * No need to wait until the buffer is unpinned, we aren't flushing it.
1133 */
1134 XFS_BUF_ERROR(bp, EIO);
1135
1136 /*
1137 * We're calling biodone, so delete XBF_DONE flag.
1138 */
1139 XFS_BUF_UNREAD(bp);
1140 XFS_BUF_UNDELAYWRITE(bp);
1141 XFS_BUF_UNDONE(bp);
1142 XFS_BUF_STALE(bp);
1143
1144 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1145 xfs_biodone(bp);
1146
1147 return EIO;
1148}
1149
1150/*
1151 * Same as xfs_bioerror, except that we are releasing the buffer
1152 * here ourselves, and avoiding the biodone call.
1153 * This is meant for userdata errors; metadata bufs come with
1154 * iodone functions attached, so that we can track down errors.
1155 */
1156STATIC int
1157xfs_bioerror_relse(
1158 struct xfs_buf *bp)
1159{
1160 int64_t fl = XFS_BUF_BFLAGS(bp);
1161 /*
1162 * No need to wait until the buffer is unpinned.
1163 * We aren't flushing it.
1164 *
1165 * chunkhold expects B_DONE to be set, whether
1166 * we actually finish the I/O or not. We don't want to
1167 * change that interface.
1168 */
1169 XFS_BUF_UNREAD(bp);
1170 XFS_BUF_UNDELAYWRITE(bp);
1171 XFS_BUF_DONE(bp);
1172 XFS_BUF_STALE(bp);
1173 XFS_BUF_CLR_IODONE_FUNC(bp);
1174 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
1175 if (!(fl & XBF_ASYNC)) {
1176 /*
1177 * Mark b_error and B_ERROR _both_.
1178 * Lot's of chunkcache code assumes that.
1179 * There's no reason to mark error for
1180 * ASYNC buffers.
1181 */
1182 XFS_BUF_ERROR(bp, EIO);
1183 XFS_BUF_FINISH_IOWAIT(bp);
1184 } else {
1185 xfs_buf_relse(bp);
1186 }
1187
1188 return EIO;
1189}
1190
1191
1192/*
1193 * All xfs metadata buffers except log state machine buffers
1194 * get this attached as their b_bdstrat callback function.
1195 * This is so that we can catch a buffer
1196 * after prematurely unpinning it to forcibly shutdown the filesystem.
1197 */
1198int
1199xfs_bdstrat_cb(
1200 struct xfs_buf *bp)
1201{
1202 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1203 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1204 /*
1205 * Metadata write that didn't get logged but
1206 * written delayed anyway. These aren't associated
1207 * with a transaction, and can be ignored.
1208 */
1209 if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
1210 return xfs_bioerror_relse(bp);
1211 else
1212 return xfs_bioerror(bp);
1213 }
1214
1215 xfs_buf_iorequest(bp);
1216 return 0;
1217}
1218
1219/*
1220 * Wrapper around bdstrat so that we can stop data from going to disk in case
1221 * we are shutting down the filesystem. Typically user data goes thru this
1222 * path; one of the exceptions is the superblock.
1223 */
1224void
1225xfsbdstrat(
1226 struct xfs_mount *mp,
1227 struct xfs_buf *bp)
1228{
1229 if (XFS_FORCED_SHUTDOWN(mp)) {
1230 trace_xfs_bdstrat_shut(bp, _RET_IP_);
1231 xfs_bioerror_relse(bp);
1232 return;
1233 }
1234
1235 xfs_buf_iorequest(bp);
1236}
1237
1109STATIC void 1238STATIC void
1110_xfs_buf_ioend( 1239_xfs_buf_ioend(
1111 xfs_buf_t *bp, 1240 xfs_buf_t *bp,
@@ -1324,7 +1453,7 @@ xfs_buf_iomove(
1324 xfs_buf_t *bp, /* buffer to process */ 1453 xfs_buf_t *bp, /* buffer to process */
1325 size_t boff, /* starting buffer offset */ 1454 size_t boff, /* starting buffer offset */
1326 size_t bsize, /* length to copy */ 1455 size_t bsize, /* length to copy */
1327 caddr_t data, /* data address */ 1456 void *data, /* data address */
1328 xfs_buf_rw_t mode) /* read/write/zero flag */ 1457 xfs_buf_rw_t mode) /* read/write/zero flag */
1329{ 1458{
1330 size_t bend, cpoff, csize; 1459 size_t bend, cpoff, csize;
@@ -1406,8 +1535,8 @@ xfs_alloc_bufhash(
1406 1535
1407 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ 1536 btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
1408 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; 1537 btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
1409 btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) * 1538 btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
1410 sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE); 1539 sizeof(xfs_bufhash_t));
1411 for (i = 0; i < (1 << btp->bt_hashshift); i++) { 1540 for (i = 0; i < (1 << btp->bt_hashshift); i++) {
1412 spin_lock_init(&btp->bt_hash[i].bh_lock); 1541 spin_lock_init(&btp->bt_hash[i].bh_lock);
1413 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); 1542 INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
@@ -1418,7 +1547,7 @@ STATIC void
1418xfs_free_bufhash( 1547xfs_free_bufhash(
1419 xfs_buftarg_t *btp) 1548 xfs_buftarg_t *btp)
1420{ 1549{
1421 kmem_free(btp->bt_hash); 1550 kmem_free_large(btp->bt_hash);
1422 btp->bt_hash = NULL; 1551 btp->bt_hash = NULL;
1423} 1552}
1424 1553
@@ -1623,6 +1752,11 @@ xfs_buf_delwri_queue(
1623 list_del(&bp->b_list); 1752 list_del(&bp->b_list);
1624 } 1753 }
1625 1754
1755 if (list_empty(dwq)) {
1756 /* start xfsbufd as it is about to have something to do */
1757 wake_up_process(bp->b_target->bt_task);
1758 }
1759
1626 bp->b_flags |= _XBF_DELWRI_Q; 1760 bp->b_flags |= _XBF_DELWRI_Q;
1627 list_add_tail(&bp->b_list, dwq); 1761 list_add_tail(&bp->b_list, dwq);
1628 bp->b_queuetime = jiffies; 1762 bp->b_queuetime = jiffies;
@@ -1654,6 +1788,35 @@ xfs_buf_delwri_dequeue(
1654 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); 1788 trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
1655} 1789}
1656 1790
1791/*
1792 * If a delwri buffer needs to be pushed before it has aged out, then promote
1793 * it to the head of the delwri queue so that it will be flushed on the next
1794 * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
1795 * than the age currently needed to flush the buffer. Hence the next time the
1796 * xfsbufd sees it is guaranteed to be considered old enough to flush.
1797 */
1798void
1799xfs_buf_delwri_promote(
1800 struct xfs_buf *bp)
1801{
1802 struct xfs_buftarg *btp = bp->b_target;
1803 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
1804
1805 ASSERT(bp->b_flags & XBF_DELWRI);
1806 ASSERT(bp->b_flags & _XBF_DELWRI_Q);
1807
1808 /*
1809 * Check the buffer age before locking the delayed write queue as we
1810 * don't need to promote buffers that are already past the flush age.
1811 */
1812 if (bp->b_queuetime < jiffies - age)
1813 return;
1814 bp->b_queuetime = jiffies - age;
1815 spin_lock(&btp->bt_delwrite_lock);
1816 list_move(&bp->b_list, &btp->bt_delwrite_queue);
1817 spin_unlock(&btp->bt_delwrite_lock);
1818}
1819
1657STATIC void 1820STATIC void
1658xfs_buf_runall_queues( 1821xfs_buf_runall_queues(
1659 struct workqueue_struct *queue) 1822 struct workqueue_struct *queue)
@@ -1672,6 +1835,8 @@ xfsbufd_wakeup(
1672 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) { 1835 list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
1673 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags)) 1836 if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
1674 continue; 1837 continue;
1838 if (list_empty(&btp->bt_delwrite_queue))
1839 continue;
1675 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags); 1840 set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
1676 wake_up_process(btp->bt_task); 1841 wake_up_process(btp->bt_task);
1677 } 1842 }
@@ -1722,20 +1887,53 @@ xfs_buf_delwri_split(
1722 1887
1723} 1888}
1724 1889
1890/*
1891 * Compare function is more complex than it needs to be because
1892 * the return value is only 32 bits and we are doing comparisons
1893 * on 64 bit values
1894 */
1895static int
1896xfs_buf_cmp(
1897 void *priv,
1898 struct list_head *a,
1899 struct list_head *b)
1900{
1901 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
1902 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
1903 xfs_daddr_t diff;
1904
1905 diff = ap->b_bn - bp->b_bn;
1906 if (diff < 0)
1907 return -1;
1908 if (diff > 0)
1909 return 1;
1910 return 0;
1911}
1912
1913void
1914xfs_buf_delwri_sort(
1915 xfs_buftarg_t *target,
1916 struct list_head *list)
1917{
1918 list_sort(NULL, list, xfs_buf_cmp);
1919}
1920
1725STATIC int 1921STATIC int
1726xfsbufd( 1922xfsbufd(
1727 void *data) 1923 void *data)
1728{ 1924{
1729 struct list_head tmp; 1925 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1730 xfs_buftarg_t *target = (xfs_buftarg_t *)data;
1731 int count;
1732 xfs_buf_t *bp;
1733 1926
1734 current->flags |= PF_MEMALLOC; 1927 current->flags |= PF_MEMALLOC;
1735 1928
1736 set_freezable(); 1929 set_freezable();
1737 1930
1738 do { 1931 do {
1932 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1933 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1934 int count = 0;
1935 struct list_head tmp;
1936
1739 if (unlikely(freezing(current))) { 1937 if (unlikely(freezing(current))) {
1740 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1938 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1741 refrigerator(); 1939 refrigerator();
@@ -1743,17 +1941,16 @@ xfsbufd(
1743 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1941 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1744 } 1942 }
1745 1943
1746 schedule_timeout_interruptible( 1944 /* sleep for a long time if there is nothing to do. */
1747 xfs_buf_timer_centisecs * msecs_to_jiffies(10)); 1945 if (list_empty(&target->bt_delwrite_queue))
1946 tout = MAX_SCHEDULE_TIMEOUT;
1947 schedule_timeout_interruptible(tout);
1748 1948
1749 xfs_buf_delwri_split(target, &tmp, 1949 xfs_buf_delwri_split(target, &tmp, age);
1750 xfs_buf_age_centisecs * msecs_to_jiffies(10)); 1950 list_sort(NULL, &tmp, xfs_buf_cmp);
1751
1752 count = 0;
1753 while (!list_empty(&tmp)) { 1951 while (!list_empty(&tmp)) {
1754 bp = list_entry(tmp.next, xfs_buf_t, b_list); 1952 struct xfs_buf *bp;
1755 ASSERT(target == bp->b_target); 1953 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1756
1757 list_del_init(&bp->b_list); 1954 list_del_init(&bp->b_list);
1758 xfs_buf_iostrategy(bp); 1955 xfs_buf_iostrategy(bp);
1759 count++; 1956 count++;
@@ -1779,42 +1976,45 @@ xfs_flush_buftarg(
1779 xfs_buftarg_t *target, 1976 xfs_buftarg_t *target,
1780 int wait) 1977 int wait)
1781{ 1978{
1782 struct list_head tmp; 1979 xfs_buf_t *bp;
1783 xfs_buf_t *bp, *n;
1784 int pincount = 0; 1980 int pincount = 0;
1981 LIST_HEAD(tmp_list);
1982 LIST_HEAD(wait_list);
1785 1983
1786 xfs_buf_runall_queues(xfsconvertd_workqueue); 1984 xfs_buf_runall_queues(xfsconvertd_workqueue);
1787 xfs_buf_runall_queues(xfsdatad_workqueue); 1985 xfs_buf_runall_queues(xfsdatad_workqueue);
1788 xfs_buf_runall_queues(xfslogd_workqueue); 1986 xfs_buf_runall_queues(xfslogd_workqueue);
1789 1987
1790 set_bit(XBT_FORCE_FLUSH, &target->bt_flags); 1988 set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
1791 pincount = xfs_buf_delwri_split(target, &tmp, 0); 1989 pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
1792 1990
1793 /* 1991 /*
1794 * Dropped the delayed write list lock, now walk the temporary list 1992 * Dropped the delayed write list lock, now walk the temporary list.
1993 * All I/O is issued async and then if we need to wait for completion
1994 * we do that after issuing all the IO.
1795 */ 1995 */
1796 list_for_each_entry_safe(bp, n, &tmp, b_list) { 1996 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1997 while (!list_empty(&tmp_list)) {
1998 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1797 ASSERT(target == bp->b_target); 1999 ASSERT(target == bp->b_target);
1798 if (wait) 2000 list_del_init(&bp->b_list);
2001 if (wait) {
1799 bp->b_flags &= ~XBF_ASYNC; 2002 bp->b_flags &= ~XBF_ASYNC;
1800 else 2003 list_add(&bp->b_list, &wait_list);
1801 list_del_init(&bp->b_list); 2004 }
1802
1803 xfs_buf_iostrategy(bp); 2005 xfs_buf_iostrategy(bp);
1804 } 2006 }
1805 2007
1806 if (wait) 2008 if (wait) {
2009 /* Expedite and wait for IO to complete. */
1807 blk_run_address_space(target->bt_mapping); 2010 blk_run_address_space(target->bt_mapping);
2011 while (!list_empty(&wait_list)) {
2012 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1808 2013
1809 /* 2014 list_del_init(&bp->b_list);
1810 * Remaining list items must be flushed before returning 2015 xfs_iowait(bp);
1811 */ 2016 xfs_buf_relse(bp);
1812 while (!list_empty(&tmp)) { 2017 }
1813 bp = list_entry(tmp.next, xfs_buf_t, b_list);
1814
1815 list_del_init(&bp->b_list);
1816 xfs_iowait(bp);
1817 xfs_buf_relse(bp);
1818 } 2018 }
1819 2019
1820 return pincount; 2020 return pincount;