aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2014-07-24 05:49:28 -0400
committerDave Chinner <david@fromorbit.com>2014-07-24 05:49:28 -0400
commitdc06f398f00059707236d456d954a3a9d2a829db (patch)
treeb7d0f9a2d9a109a32e6b72af3909b9465b406c23 /fs/xfs
parentf4526397928fff052f795713748f376a2bba1b5e (diff)
xfs: run an eofblocks scan on ENOSPC/EDQUOT
From: Brian Foster <bfoster@redhat.com> Speculative preallocation and and the associated throttling metrics assume we're working with large files on large filesystems. Users have reported inefficiencies in these mechanisms when we happen to be dealing with large files on smaller filesystems. This can occur because while prealloc throttling is aggressive under low free space conditions, it is not active until we reach 5% free space or less. For example, a 40GB filesystem has enough space for several files large enough to have multi-GB preallocations at any given time. If those files are slow growing, they might reserve preallocation for long periods of time as well as avoid the background scanner due to frequent modification. If a new file is written under these conditions, said file has no access to this already reserved space and premature ENOSPC is imminent. To handle this scenario, modify the buffered write ENOSPC handling and retry sequence to invoke an eofblocks scan. In the smaller filesystem scenario, the eofblocks scan resets the usage of preallocation such that when the 5% free space threshold is met, throttling effectively takes over to provide fair and efficient preallocation until legitimate ENOSPC. The eofblocks scan is selective based on the nature of the failure. For example, an EDQUOT failure in a particular quota will use a filtered scan for that quota. Because we don't know which quota might have caused an allocation failure at any given time, we include each applicable quota determined to be under low free space conditions in the scan. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_dquot.h15
-rw-r--r--fs/xfs/xfs_file.c23
-rw-r--r--fs/xfs/xfs_icache.c52
-rw-r--r--fs/xfs/xfs_icache.h1
4 files changed, 87 insertions, 4 deletions
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 68a68f704837..c24c67e22a2a 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -139,6 +139,21 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
139 } 139 }
140} 140}
141 141
142/*
143 * Check whether a dquot is under low free space conditions. We assume the quota
144 * is enabled and enforced.
145 */
146static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
147{
148 int64_t freesp;
149
150 freesp = be64_to_cpu(dqp->q_core.d_blk_hardlimit) - dqp->q_res_bcount;
151 if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
152 return true;
153
154 return false;
155}
156
142#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 157#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
143#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 158#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
144#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 159#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 181605da08e4..fcf91a22f5d8 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,6 +38,7 @@
38#include "xfs_trace.h" 38#include "xfs_trace.h"
39#include "xfs_log.h" 39#include "xfs_log.h"
40#include "xfs_dinode.h" 40#include "xfs_dinode.h"
41#include "xfs_icache.h"
41 42
42#include <linux/aio.h> 43#include <linux/aio.h>
43#include <linux/dcache.h> 44#include <linux/dcache.h>
@@ -689,14 +690,28 @@ write_retry:
689 ret = generic_perform_write(file, from, pos); 690 ret = generic_perform_write(file, from, pos);
690 if (likely(ret >= 0)) 691 if (likely(ret >= 0))
691 iocb->ki_pos = pos + ret; 692 iocb->ki_pos = pos + ret;
693
692 /* 694 /*
693 * If we just got an ENOSPC, try to write back all dirty inodes to 695 * If we hit a space limit, try to free up some lingering preallocated
694 * convert delalloc space to free up some of the excess reserved 696 * space before returning an error. In the case of ENOSPC, first try to
695 * metadata space. 697 * write back all dirty inodes to free up some of the excess reserved
698 * metadata space. This reduces the chances that the eofblocks scan
699 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
700 * also behaves as a filter to prevent too many eofblocks scans from
701 * running at the same time.
696 */ 702 */
697 if (ret == -ENOSPC && !enospc) { 703 if (ret == -EDQUOT && !enospc) {
704 enospc = xfs_inode_free_quota_eofblocks(ip);
705 if (enospc)
706 goto write_retry;
707 } else if (ret == -ENOSPC && !enospc) {
708 struct xfs_eofblocks eofb = {0};
709
698 enospc = 1; 710 enospc = 1;
699 xfs_flush_inodes(ip->i_mount); 711 xfs_flush_inodes(ip->i_mount);
712 eofb.eof_scan_owner = ip->i_ino; /* for locking */
713 eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
714 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
700 goto write_retry; 715 goto write_retry;
701 } 716 }
702 717
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43452081b705..981b2cf51985 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -33,6 +33,9 @@
33#include "xfs_trace.h" 33#include "xfs_trace.h"
34#include "xfs_icache.h" 34#include "xfs_icache.h"
35#include "xfs_bmap_util.h" 35#include "xfs_bmap_util.h"
36#include "xfs_quota.h"
37#include "xfs_dquot_item.h"
38#include "xfs_dquot.h"
36 39
37#include <linux/kthread.h> 40#include <linux/kthread.h>
38#include <linux/freezer.h> 41#include <linux/freezer.h>
@@ -1300,6 +1303,55 @@ xfs_icache_free_eofblocks(
1300 eofb, XFS_ICI_EOFBLOCKS_TAG); 1303 eofb, XFS_ICI_EOFBLOCKS_TAG);
1301} 1304}
1302 1305
1306/*
1307 * Run eofblocks scans on the quotas applicable to the inode. For inodes with
1308 * multiple quotas, we don't know exactly which quota caused an allocation
1309 * failure. We make a best effort by including each quota under low free space
1310 * conditions (less than 1% free space) in the scan.
1311 */
1312int
1313xfs_inode_free_quota_eofblocks(
1314 struct xfs_inode *ip)
1315{
1316 int scan = 0;
1317 struct xfs_eofblocks eofb = {0};
1318 struct xfs_dquot *dq;
1319
1320 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1321
1322 /*
1323 * Set the scan owner to avoid a potential livelock. Otherwise, the scan
1324 * can repeatedly trylock on the inode we're currently processing. We
1325 * run a sync scan to increase effectiveness and use the union filter to
1326 * cover all applicable quotas in a single scan.
1327 */
1328 eofb.eof_scan_owner = ip->i_ino;
1329 eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;
1330
1331 if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
1332 dq = xfs_inode_dquot(ip, XFS_DQ_USER);
1333 if (dq && xfs_dquot_lowsp(dq)) {
1334 eofb.eof_uid = VFS_I(ip)->i_uid;
1335 eofb.eof_flags |= XFS_EOF_FLAGS_UID;
1336 scan = 1;
1337 }
1338 }
1339
1340 if (XFS_IS_GQUOTA_ENFORCED(ip->i_mount)) {
1341 dq = xfs_inode_dquot(ip, XFS_DQ_GROUP);
1342 if (dq && xfs_dquot_lowsp(dq)) {
1343 eofb.eof_gid = VFS_I(ip)->i_gid;
1344 eofb.eof_flags |= XFS_EOF_FLAGS_GID;
1345 scan = 1;
1346 }
1347 }
1348
1349 if (scan)
1350 xfs_icache_free_eofblocks(ip->i_mount, &eofb);
1351
1352 return scan;
1353}
1354
1303void 1355void
1304xfs_inode_set_eofblocks_tag( 1356xfs_inode_set_eofblocks_tag(
1305 xfs_inode_t *ip) 1357 xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 98687af6a99d..46748b86b12f 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -58,6 +58,7 @@ void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
58void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); 58void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
59void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); 59void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
60int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *); 60int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);
61int xfs_inode_free_quota_eofblocks(struct xfs_inode *ip);
61void xfs_eofblocks_worker(struct work_struct *); 62void xfs_eofblocks_worker(struct work_struct *);
62 63
63int xfs_inode_ag_iterator(struct xfs_mount *mp, 64int xfs_inode_ag_iterator(struct xfs_mount *mp,