aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2018-01-19 20:47:36 -0500
committerDarrick J. Wong <darrick.wong@oracle.com>2018-01-29 10:27:24 -0500
commit6d8a45ce29c7d67cc4fc3016dc2a07660c62482a (patch)
treeb957ead680fc5c439f74e262483d1db289682016
parent9f37bd11b442dc7c79d8979ecf627c059bc6bfe7 (diff)
xfs: don't screw up direct writes when freesp is fragmented
xfs_bmap_btalloc is given a range of file offset blocks that must be allocated to some data/attr/cow fork. If the fork has an extent size hint associated with it, the request will be enlarged on both ends to try to satisfy the alignment hint. If free space is fragmentated, sometimes we can allocate some blocks but not enough to fulfill any of the requested range. Since bmapi_allocate always trims the new extent mapping to match the originally requested range, this results in bmapi_write returning zero and no mapping. The consequences of this vary -- buffered writes will simply re-call bmapi_write until it can satisfy at least one block from the original request. Direct IO overwrites notice nmaps == 0 and return -ENOSPC through the dio mechanism out to userspace with the weird result that writes fail even when we have enough space because the ENOSPC return overrides any partial write status. For direct CoW writes the situation was disastrous because nobody notices us returning an invalid zero-length wrong-offset mapping to iomap and the write goes off into space. Therefore, if free space is so fragmented that we managed to allocate some space but not enough to map into even a single block of the original allocation request range, we should break the alignment hint in order to guarantee at least some forward progress for the direct write. If we return a short allocation to iomap_apply it'll call back about the remaining blocks. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c20
1 files changed, 20 insertions, 0 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index cad21fd0c45d..daae00ed30c5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3390,6 +3390,8 @@ xfs_bmap_btalloc(
3390 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3390 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
3391 xfs_agnumber_t ag; 3391 xfs_agnumber_t ag;
3392 xfs_alloc_arg_t args; 3392 xfs_alloc_arg_t args;
3393 xfs_fileoff_t orig_offset;
3394 xfs_extlen_t orig_length;
3393 xfs_extlen_t blen; 3395 xfs_extlen_t blen;
3394 xfs_extlen_t nextminlen = 0; 3396 xfs_extlen_t nextminlen = 0;
3395 int nullfb; /* true if ap->firstblock isn't set */ 3397 int nullfb; /* true if ap->firstblock isn't set */
@@ -3399,6 +3401,8 @@ xfs_bmap_btalloc(
3399 int stripe_align; 3401 int stripe_align;
3400 3402
3401 ASSERT(ap->length); 3403 ASSERT(ap->length);
3404 orig_offset = ap->offset;
3405 orig_length = ap->length;
3402 3406
3403 mp = ap->ip->i_mount; 3407 mp = ap->ip->i_mount;
3404 3408
@@ -3614,6 +3618,22 @@ xfs_bmap_btalloc(
3614 *ap->firstblock = args.fsbno; 3618 *ap->firstblock = args.fsbno;
3615 ASSERT(nullfb || fb_agno <= args.agno); 3619 ASSERT(nullfb || fb_agno <= args.agno);
3616 ap->length = args.len; 3620 ap->length = args.len;
3621 /*
3622 * If the extent size hint is active, we tried to round the
3623 * caller's allocation request offset down to extsz and the
3624 * length up to another extsz boundary. If we found a free
3625 * extent we mapped it in starting at this new offset. If the
3626 * newly mapped space isn't long enough to cover any of the
3627 * range of offsets that was originally requested, move the
3628 * mapping up so that we can fill as much of the caller's
3629 * original request as possible. Free space is apparently
3630 * very fragmented so we're unlikely to be able to satisfy the
3631 * hints anyway.
3632 */
3633 if (ap->length <= orig_length)
3634 ap->offset = orig_offset;
3635 else if (ap->offset + ap->length < orig_offset + orig_length)
3636 ap->offset = orig_offset + orig_length - ap->length;
3617 xfs_bmap_btalloc_accounting(ap, &args); 3637 xfs_bmap_btalloc_accounting(ap, &args);
3618 } else { 3638 } else {
3619 ap->blkno = NULLFSBLOCK; 3639 ap->blkno = NULLFSBLOCK;