aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_iomap.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-02-11 00:05:01 -0500
committerBen Myers <bpm@sgi.com>2013-02-14 18:21:32 -0500
commita1e16c26660b301cc8423185924cf1b0b16ea92b (patch)
treeb0dfc8be0dd7a91d32b3280517ca64ca3f3c5f79 /fs/xfs/xfs_iomap.c
parent311f08acde635e4e5ccea9b9d8c856cc2e0ced95 (diff)
xfs: limit speculative prealloc size on sparse files
Speculative preallocation based on the current file size works well for contiguous files, but is sub-optimal for sparse files where the EOF preallocation can fill holes and result in large amounts of zeros being written when it is not necessary. The algorithm is modified to prevent EOF speculative preallocation from triggering larger allocations on IO patterns of truncate--to-zero-seek-write-seek-write-.... which results in non-sparse files for large files. This, unfortunately, is the way cp now behaves when copying sparse files and so needs to be fixed. What this code does is that it looks at the existing extent adjacent to the current EOF and if it determines that it is a hole we disable speculative preallocation altogether. To avoid the next write from doing a large prealloc, it takes the size of subsequent preallocations from the current size of the existing EOF extent. IOWs, if you leave a hole in the file, it resets preallocation behaviour to the same as if it was a zero size file. Example new behaviour: $ xfs_io -f -c "pwrite 0 31m" \ -c "pwrite 33m 1m" \ -c "pwrite 128m 1m" \ -c "fiemap -v" /mnt/scratch/blah wrote 32505856/32505856 bytes at offset 0 31 MiB, 7936 ops; 0.0000 sec (1.608 GiB/sec and 421432.7439 ops/sec) wrote 1048576/1048576 bytes at offset 34603008 1 MiB, 256 ops; 0.0000 sec (1.462 GiB/sec and 383233.5329 ops/sec) wrote 1048576/1048576 bytes at offset 134217728 1 MiB, 256 ops; 0.0000 sec (1.719 GiB/sec and 450704.2254 ops/sec) /mnt/scratch/blah: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..65535]: 96..65631 65536 0x0 1: [65536..67583]: hole 2048 2: [67584..69631]: 67680..69727 2048 0x0 3: [69632..262143]: hole 192512 4: [262144..264191]: 262240..264287 2048 0x1 Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_iomap.c')
-rw-r--r--fs/xfs/xfs_iomap.c77
1 files changed, 67 insertions, 10 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 364818eef40e..912d83d8860a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,62 @@ xfs_iomap_eof_want_preallocate(
311} 311}
312 312
313/* 313/*
314 * Determine the initial size of the preallocation. We are beyond the current
315 * EOF here, but we need to take into account whether this is a sparse write or
316 * an extending write when determining the preallocation size. Hence we need to
317 * look up the extent that ends at the current write offset and use the result
318 * to determine the preallocation size.
319 *
320 * If the extent is a hole, then preallocation is essentially disabled.
321 * Otherwise we take the size of the preceeding data extent as the basis for the
322 * preallocation size. If the size of the extent is greater than half the
323 * maximum extent length, then use the current offset as the basis. This ensures
324 * that for large files the preallocation size always extends to MAXEXTLEN
325 * rather than falling short due to things like stripe unit/width alignment of
326 * real extents.
327 */
328STATIC int
329xfs_iomap_eof_prealloc_initial_size(
330 struct xfs_mount *mp,
331 struct xfs_inode *ip,
332 xfs_off_t offset,
333 xfs_bmbt_irec_t *imap,
334 int nimaps)
335{
336 xfs_fileoff_t start_fsb;
337 int imaps = 1;
338 int error;
339
340 ASSERT(nimaps >= imaps);
341
342 /* if we are using a specific prealloc size, return now */
343 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
344 return 0;
345
346 /*
347 * As we write multiple pages, the offset will always align to the
348 * start of a page and hence point to a hole at EOF. i.e. if the size is
349 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
350 * will return FSB 1. Hence if there are blocks in the file, we want to
351 * point to the block prior to the EOF block and not the hole that maps
352 * directly at @offset.
353 */
354 start_fsb = XFS_B_TO_FSB(mp, offset);
355 if (start_fsb)
356 start_fsb--;
357 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
358 if (error)
359 return 0;
360
361 ASSERT(imaps == 1);
362 if (imap[0].br_startblock == HOLESTARTBLOCK)
363 return 0;
364 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
365 return imap[0].br_blockcount;
366 return XFS_B_TO_FSB(mp, offset);
367}
368
369/*
314 * If we don't have a user specified preallocation size, dynamically increase 370 * If we don't have a user specified preallocation size, dynamically increase
315 * the preallocation size as the size of the file grows. Cap the maximum size 371 * the preallocation size as the size of the file grows. Cap the maximum size
316 * at a single extent or less if the filesystem is near full. The closer the 372 * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +375,19 @@ xfs_iomap_eof_want_preallocate(
319STATIC xfs_fsblock_t 375STATIC xfs_fsblock_t
320xfs_iomap_prealloc_size( 376xfs_iomap_prealloc_size(
321 struct xfs_mount *mp, 377 struct xfs_mount *mp,
322 struct xfs_inode *ip) 378 struct xfs_inode *ip,
379 xfs_off_t offset,
380 struct xfs_bmbt_irec *imap,
381 int nimaps)
323{ 382{
324 xfs_fsblock_t alloc_blocks = 0; 383 xfs_fsblock_t alloc_blocks = 0;
325 384
326 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 385 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset,
386 imap, nimaps);
387 if (alloc_blocks > 0) {
327 int shift = 0; 388 int shift = 0;
328 int64_t freesp; 389 int64_t freesp;
329 390
330 /*
331 * rounddown_pow_of_two() returns an undefined result
332 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
333 * ensure we always pass in a non-zero value.
334 */
335 alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
336 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, 391 alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
337 rounddown_pow_of_two(alloc_blocks)); 392 rounddown_pow_of_two(alloc_blocks));
338 393
@@ -399,7 +454,6 @@ xfs_iomap_write_delay(
399 extsz = xfs_get_extsz_hint(ip); 454 extsz = xfs_get_extsz_hint(ip);
400 offset_fsb = XFS_B_TO_FSBT(mp, offset); 455 offset_fsb = XFS_B_TO_FSBT(mp, offset);
401 456
402
403 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 457 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
404 imap, XFS_WRITE_IMAPS, &prealloc); 458 imap, XFS_WRITE_IMAPS, &prealloc);
405 if (error) 459 if (error)
@@ -407,7 +461,10 @@ xfs_iomap_write_delay(
407 461
408retry: 462retry:
409 if (prealloc) { 463 if (prealloc) {
410 xfs_fsblock_t alloc_blocks = xfs_iomap_prealloc_size(mp, ip); 464 xfs_fsblock_t alloc_blocks;
465
466 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap,
467 XFS_WRITE_IMAPS);
411 468
412 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 469 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
413 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 470 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);