aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-09-18 21:10:21 -0400
committerDave Chinner <david@fromorbit.com>2016-09-18 21:10:21 -0400
commit51446f5ba44874db4d2a93a6eb61b133e5ec1b3e (patch)
tree8c86e63cea2cab372dee653ba4fed3d4e8e68409
parent85a6e764ff5485dfe1edf5e47290e4d32ea866d5 (diff)
xfs: rewrite and optimize the delalloc write path
Currently xfs_iomap_write_delay does up to lookups in the inode extent tree, which is rather costly especially with the new iomap based write path and small write sizes. But it turns out that the low-level xfs_bmap_search_extents gives us all the information we need in the regular delalloc buffered write path: - it will return us an extent covering the block we are looking up if it exists. In that case we can simply return that extent to the caller and are done - it will tell us if we are beyoned the last current allocated block with an eof return parameter. In that case we can create a delalloc reservation and use the also returned information about the last extent in the file as the hint to size our delalloc reservation. - it can tell us that we are writing into a hole, but that there is an extent beyoned this hole. In this case we can create a delalloc reservation that covers the requested size (possible capped to the next existing allocation). All that can be done in one single routine instead of bouncing up and down a few layers. This reduced the CPU overhead of the block mapping routines and also simplified the code a lot. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c89
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h10
-rw-r--r--fs/xfs/xfs_iomap.c395
-rw-r--r--fs/xfs/xfs_iomap.h2
4 files changed, 181 insertions, 315 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index b060bca93402..614803bc8a9f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1388,7 +1388,7 @@ xfs_bmap_search_multi_extents(
1388 * Else, *lastxp will be set to the index of the found 1388 * Else, *lastxp will be set to the index of the found
1389 * entry; *gotp will contain the entry. 1389 * entry; *gotp will contain the entry.
1390 */ 1390 */
1391STATIC xfs_bmbt_rec_host_t * /* pointer to found extent entry */ 1391xfs_bmbt_rec_host_t * /* pointer to found extent entry */
1392xfs_bmap_search_extents( 1392xfs_bmap_search_extents(
1393 xfs_inode_t *ip, /* incore inode pointer */ 1393 xfs_inode_t *ip, /* incore inode pointer */
1394 xfs_fileoff_t bno, /* block number searched for */ 1394 xfs_fileoff_t bno, /* block number searched for */
@@ -4074,7 +4074,7 @@ xfs_bmapi_read(
4074 return 0; 4074 return 0;
4075} 4075}
4076 4076
4077STATIC int 4077int
4078xfs_bmapi_reserve_delalloc( 4078xfs_bmapi_reserve_delalloc(
4079 struct xfs_inode *ip, 4079 struct xfs_inode *ip,
4080 xfs_fileoff_t aoff, 4080 xfs_fileoff_t aoff,
@@ -4170,91 +4170,6 @@ out_unreserve_quota:
4170 return error; 4170 return error;
4171} 4171}
4172 4172
4173/*
4174 * Map file blocks to filesystem blocks, adding delayed allocations as needed.
4175 */
4176int
4177xfs_bmapi_delay(
4178 struct xfs_inode *ip, /* incore inode */
4179 xfs_fileoff_t bno, /* starting file offs. mapped */
4180 xfs_filblks_t len, /* length to map in file */
4181 struct xfs_bmbt_irec *mval, /* output: map values */
4182 int *nmap, /* i/o: mval size/count */
4183 int flags) /* XFS_BMAPI_... */
4184{
4185 struct xfs_mount *mp = ip->i_mount;
4186 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4187 struct xfs_bmbt_irec got; /* current file extent record */
4188 struct xfs_bmbt_irec prev; /* previous file extent record */
4189 xfs_fileoff_t obno; /* old block number (offset) */
4190 xfs_fileoff_t end; /* end of mapped file region */
4191 xfs_extnum_t lastx; /* last useful extent number */
4192 int eof; /* we've hit the end of extents */
4193 int n = 0; /* current extent index */
4194 int error = 0;
4195
4196 ASSERT(*nmap >= 1);
4197 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4198 ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
4199 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4200
4201 if (unlikely(XFS_TEST_ERROR(
4202 (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
4203 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
4204 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4205 XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
4206 return -EFSCORRUPTED;
4207 }
4208
4209 if (XFS_FORCED_SHUTDOWN(mp))
4210 return -EIO;
4211
4212 XFS_STATS_INC(mp, xs_blk_mapw);
4213
4214 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4215 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
4216 if (error)
4217 return error;
4218 }
4219
4220 xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
4221 end = bno + len;
4222 obno = bno;
4223
4224 while (bno < end && n < *nmap) {
4225 if (eof || got.br_startoff > bno) {
4226 error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
4227 &prev, &lastx, eof);
4228 if (error) {
4229 if (n == 0) {
4230 *nmap = 0;
4231 return error;
4232 }
4233 break;
4234 }
4235 }
4236
4237 /* set up the extent map to return. */
4238 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4239 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4240
4241 /* If we're done, stop now. */
4242 if (bno >= end || n >= *nmap)
4243 break;
4244
4245 /* Else go on to the next record. */
4246 prev = got;
4247 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4248 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4249 else
4250 eof = 1;
4251 }
4252
4253 *nmap = n;
4254 return 0;
4255}
4256
4257
4258static int 4173static int
4259xfs_bmapi_allocate( 4174xfs_bmapi_allocate(
4260 struct xfs_bmalloca *bma) 4175 struct xfs_bmalloca *bma)
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 254034f96941..d66006960fbc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -181,9 +181,6 @@ int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
181int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, 181int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
182 xfs_filblks_t len, struct xfs_bmbt_irec *mval, 182 xfs_filblks_t len, struct xfs_bmbt_irec *mval,
183 int *nmap, int flags); 183 int *nmap, int flags);
184int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
185 xfs_filblks_t len, struct xfs_bmbt_irec *mval,
186 int *nmap, int flags);
187int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, 184int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
188 xfs_fileoff_t bno, xfs_filblks_t len, int flags, 185 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
189 xfs_fsblock_t *firstblock, xfs_extlen_t total, 186 xfs_fsblock_t *firstblock, xfs_extlen_t total,
@@ -202,5 +199,12 @@ int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
202 struct xfs_defer_ops *dfops, enum shift_direction direction, 199 struct xfs_defer_ops *dfops, enum shift_direction direction,
203 int num_exts); 200 int num_exts);
204int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset); 201int xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
202struct xfs_bmbt_rec_host *
203 xfs_bmap_search_extents(struct xfs_inode *ip, xfs_fileoff_t bno,
204 int fork, int *eofp, xfs_extnum_t *lastxp,
205 struct xfs_bmbt_irec *gotp, struct xfs_bmbt_irec *prevp);
206int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, xfs_fileoff_t aoff,
207 xfs_filblks_t len, struct xfs_bmbt_irec *got,
208 struct xfs_bmbt_irec *prev, xfs_extnum_t *lastx, int eof);
205 209
206#endif /* __XFS_BMAP_H__ */ 210#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 918511ae115c..f96c8ffce5f4 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * Copyright (c) 2016 Christoph Hellwig.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -42,7 +43,6 @@
42 43
43#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 44#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
44 << mp->m_writeio_log) 45 << mp->m_writeio_log)
45#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
46 46
47void 47void
48xfs_bmbt_to_iomap( 48xfs_bmbt_to_iomap(
@@ -311,130 +311,6 @@ out_trans_cancel:
311 goto out_unlock; 311 goto out_unlock;
312} 312}
313 313
314/*
315 * If the caller is doing a write at the end of the file, then extend the
316 * allocation out to the file system's write iosize. We clean up any extra
317 * space left over when the file is closed in xfs_inactive().
318 *
319 * If we find we already have delalloc preallocation beyond EOF, don't do more
320 * preallocation as it it not needed.
321 */
322STATIC int
323xfs_iomap_eof_want_preallocate(
324 xfs_mount_t *mp,
325 xfs_inode_t *ip,
326 xfs_off_t offset,
327 size_t count,
328 xfs_bmbt_irec_t *imap,
329 int nimaps,
330 int *prealloc)
331{
332 xfs_fileoff_t start_fsb;
333 xfs_filblks_t count_fsb;
334 int n, error, imaps;
335 int found_delalloc = 0;
336
337 *prealloc = 0;
338 if (offset + count <= XFS_ISIZE(ip))
339 return 0;
340
341 /*
342 * If the file is smaller than the minimum prealloc and we are using
343 * dynamic preallocation, don't do any preallocation at all as it is
344 * likely this is the only write to the file that is going to be done.
345 */
346 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
347 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks))
348 return 0;
349
350 /*
351 * If there are any real blocks past eof, then don't
352 * do any speculative allocation.
353 */
354 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
355 count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
356 while (count_fsb > 0) {
357 imaps = nimaps;
358 error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps,
359 0);
360 if (error)
361 return error;
362 for (n = 0; n < imaps; n++) {
363 if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
364 (imap[n].br_startblock != DELAYSTARTBLOCK))
365 return 0;
366 start_fsb += imap[n].br_blockcount;
367 count_fsb -= imap[n].br_blockcount;
368
369 if (imap[n].br_startblock == DELAYSTARTBLOCK)
370 found_delalloc = 1;
371 }
372 }
373 if (!found_delalloc)
374 *prealloc = 1;
375 return 0;
376}
377
378/*
379 * Determine the initial size of the preallocation. We are beyond the current
380 * EOF here, but we need to take into account whether this is a sparse write or
381 * an extending write when determining the preallocation size. Hence we need to
382 * look up the extent that ends at the current write offset and use the result
383 * to determine the preallocation size.
384 *
385 * If the extent is a hole, then preallocation is essentially disabled.
386 * Otherwise we take the size of the preceeding data extent as the basis for the
387 * preallocation size. If the size of the extent is greater than half the
388 * maximum extent length, then use the current offset as the basis. This ensures
389 * that for large files the preallocation size always extends to MAXEXTLEN
390 * rather than falling short due to things like stripe unit/width alignment of
391 * real extents.
392 */
393STATIC xfs_fsblock_t
394xfs_iomap_eof_prealloc_initial_size(
395 struct xfs_mount *mp,
396 struct xfs_inode *ip,
397 xfs_off_t offset,
398 xfs_bmbt_irec_t *imap,
399 int nimaps)
400{
401 xfs_fileoff_t start_fsb;
402 int imaps = 1;
403 int error;
404
405 ASSERT(nimaps >= imaps);
406
407 /* if we are using a specific prealloc size, return now */
408 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
409 return 0;
410
411 /* If the file is small, then use the minimum prealloc */
412 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign))
413 return 0;
414
415 /*
416 * As we write multiple pages, the offset will always align to the
417 * start of a page and hence point to a hole at EOF. i.e. if the size is
418 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096)
419 * will return FSB 1. Hence if there are blocks in the file, we want to
420 * point to the block prior to the EOF block and not the hole that maps
421 * directly at @offset.
422 */
423 start_fsb = XFS_B_TO_FSB(mp, offset);
424 if (start_fsb)
425 start_fsb--;
426 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE);
427 if (error)
428 return 0;
429
430 ASSERT(imaps == 1);
431 if (imap[0].br_startblock == HOLESTARTBLOCK)
432 return 0;
433 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
434 return imap[0].br_blockcount << 1;
435 return XFS_B_TO_FSB(mp, offset);
436}
437
438STATIC bool 314STATIC bool
439xfs_quota_need_throttle( 315xfs_quota_need_throttle(
440 struct xfs_inode *ip, 316 struct xfs_inode *ip,
@@ -496,27 +372,76 @@ xfs_quota_calc_throttle(
496} 372}
497 373
498/* 374/*
375 * If we are doing a write at the end of the file and there are no allocations
376 * past this one, then extend the allocation out to the file system's write
377 * iosize.
378 *
499 * If we don't have a user specified preallocation size, dynamically increase 379 * If we don't have a user specified preallocation size, dynamically increase
500 * the preallocation size as the size of the file grows. Cap the maximum size 380 * the preallocation size as the size of the file grows. Cap the maximum size
501 * at a single extent or less if the filesystem is near full. The closer the 381 * at a single extent or less if the filesystem is near full. The closer the
502 * filesystem is to full, the smaller the maximum prealocation. 382 * filesystem is to full, the smaller the maximum prealocation.
383 *
384 * As an exception we don't do any preallocation at all if the file is smaller
385 * than the minimum preallocation and we are using the default dynamic
386 * preallocation scheme, as it is likely this is the only write to the file that
387 * is going to be done.
388 *
389 * We clean up any extra space left over when the file is closed in
390 * xfs_inactive().
503 */ 391 */
504STATIC xfs_fsblock_t 392STATIC xfs_fsblock_t
505xfs_iomap_prealloc_size( 393xfs_iomap_prealloc_size(
506 struct xfs_mount *mp,
507 struct xfs_inode *ip, 394 struct xfs_inode *ip,
508 xfs_off_t offset, 395 loff_t offset,
509 struct xfs_bmbt_irec *imap, 396 loff_t count,
510 int nimaps) 397 xfs_extnum_t idx,
398 struct xfs_bmbt_irec *prev)
511{ 399{
512 xfs_fsblock_t alloc_blocks = 0; 400 struct xfs_mount *mp = ip->i_mount;
401 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
513 int shift = 0; 402 int shift = 0;
514 int64_t freesp; 403 int64_t freesp;
515 xfs_fsblock_t qblocks; 404 xfs_fsblock_t qblocks;
516 int qshift = 0; 405 int qshift = 0;
406 xfs_fsblock_t alloc_blocks = 0;
407
408 if (offset + count <= XFS_ISIZE(ip))
409 return 0;
410
411 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
412 (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)))
413 return 0;
517 414
518 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, 415 /*
519 imap, nimaps); 416 * If an explicit allocsize is set, the file is small, or we
417 * are writing behind a hole, then use the minimum prealloc:
418 */
419 if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
420 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
421 idx == 0 ||
422 prev->br_startoff + prev->br_blockcount < offset_fsb)
423 return mp->m_writeio_blocks;
424
425 /*
426 * Determine the initial size of the preallocation. We are beyond the
427 * current EOF here, but we need to take into account whether this is
428 * a sparse write or an extending write when determining the
429 * preallocation size. Hence we need to look up the extent that ends
430 * at the current write offset and use the result to determine the
431 * preallocation size.
432 *
433 * If the extent is a hole, then preallocation is essentially disabled.
434 * Otherwise we take the size of the preceding data extent as the basis
435 * for the preallocation size. If the size of the extent is greater than
436 * half the maximum extent length, then use the current offset as the
437 * basis. This ensures that for large files the preallocation size
438 * always extends to MAXEXTLEN rather than falling short due to things
439 * like stripe unit/width alignment of real extents.
440 */
441 if (prev->br_blockcount <= (MAXEXTLEN >> 1))
442 alloc_blocks = prev->br_blockcount << 1;
443 else
444 alloc_blocks = XFS_B_TO_FSB(mp, offset);
520 if (!alloc_blocks) 445 if (!alloc_blocks)
521 goto check_writeio; 446 goto check_writeio;
522 qblocks = alloc_blocks; 447 qblocks = alloc_blocks;
@@ -587,120 +512,145 @@ xfs_iomap_prealloc_size(
587 */ 512 */
588 while (alloc_blocks && alloc_blocks >= freesp) 513 while (alloc_blocks && alloc_blocks >= freesp)
589 alloc_blocks >>= 4; 514 alloc_blocks >>= 4;
590
591check_writeio: 515check_writeio:
592 if (alloc_blocks < mp->m_writeio_blocks) 516 if (alloc_blocks < mp->m_writeio_blocks)
593 alloc_blocks = mp->m_writeio_blocks; 517 alloc_blocks = mp->m_writeio_blocks;
594
595 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, 518 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
596 mp->m_writeio_blocks); 519 mp->m_writeio_blocks);
597
598 return alloc_blocks; 520 return alloc_blocks;
599} 521}
600 522
601int 523static int
602xfs_iomap_write_delay( 524xfs_file_iomap_begin_delay(
603 xfs_inode_t *ip, 525 struct inode *inode,
604 xfs_off_t offset, 526 loff_t offset,
605 size_t count, 527 loff_t count,
606 xfs_bmbt_irec_t *ret_imap) 528 unsigned flags,
529 struct iomap *iomap)
607{ 530{
608 xfs_mount_t *mp = ip->i_mount; 531 struct xfs_inode *ip = XFS_I(inode);
609 xfs_fileoff_t offset_fsb; 532 struct xfs_mount *mp = ip->i_mount;
610 xfs_fileoff_t last_fsb; 533 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
611 xfs_off_t aligned_offset; 534 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
612 xfs_fileoff_t ioalign; 535 xfs_fileoff_t maxbytes_fsb =
613 xfs_extlen_t extsz; 536 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
614 int nimaps; 537 xfs_fileoff_t end_fsb, orig_end_fsb;
615 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 538 int error = 0, eof = 0;
616 int prealloc; 539 struct xfs_bmbt_irec got;
617 int error; 540 struct xfs_bmbt_irec prev;
618 541 xfs_extnum_t idx;
619 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
620
621 /*
622 * Make sure that the dquots are there. This doesn't hold
623 * the ilock across a disk read.
624 */
625 error = xfs_qm_dqattach_locked(ip, 0);
626 if (error)
627 return error;
628 542
629 extsz = xfs_get_extsz_hint(ip); 543 ASSERT(!XFS_IS_REALTIME_INODE(ip));
630 offset_fsb = XFS_B_TO_FSBT(mp, offset); 544 ASSERT(!xfs_get_extsz_hint(ip));
631 545
632 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 546 xfs_ilock(ip, XFS_ILOCK_EXCL);
633 imap, XFS_WRITE_IMAPS, &prealloc);
634 if (error)
635 return error;
636 547
637retry: 548 if (unlikely(XFS_TEST_ERROR(
638 if (prealloc) { 549 (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
639 xfs_fsblock_t alloc_blocks; 550 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
551 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
552 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
553 error = -EFSCORRUPTED;
554 goto out_unlock;
555 }
640 556
641 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap, 557 XFS_STATS_INC(mp, xs_blk_mapw);
642 XFS_WRITE_IMAPS);
643 558
644 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 559 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
645 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 560 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
646 last_fsb = ioalign + alloc_blocks; 561 if (error)
647 } else { 562 goto out_unlock;
648 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
649 } 563 }
650 564
651 if (prealloc || extsz) { 565 xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
652 error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb); 566 &got, &prev);
653 if (error) 567 if (!eof && got.br_startoff <= offset_fsb) {
654 return error; 568 trace_xfs_iomap_found(ip, offset, count, 0, &got);
569 goto done;
655 } 570 }
656 571
572 error = xfs_qm_dqattach_locked(ip, 0);
573 if (error)
574 goto out_unlock;
575
657 /* 576 /*
658 * Make sure preallocation does not create extents beyond the range we 577 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages
659 * actually support in this filesystem. 578 * to keep the chunks of work done where somewhat symmetric with the
579 * work writeback does. This is a completely arbitrary number pulled
580 * out of thin air as a best guess for initial testing.
581 *
582 * Note that the values needs to be less than 32-bits wide until
583 * the lower level functions are updated.
660 */ 584 */
661 if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)) 585 count = min_t(loff_t, count, 1024 * PAGE_SIZE);
662 last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 586 end_fsb = orig_end_fsb =
587 min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
663 588
664 ASSERT(last_fsb > offset_fsb); 589 if (eof) {
590 xfs_fsblock_t prealloc_blocks;
665 591
666 nimaps = XFS_WRITE_IMAPS; 592 prealloc_blocks =
667 error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, 593 xfs_iomap_prealloc_size(ip, offset, count, idx, &prev);
668 imap, &nimaps, XFS_BMAPI_ENTIRE); 594 if (prealloc_blocks) {
595 xfs_extlen_t align;
596 xfs_off_t end_offset;
597
598 end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
599 end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
600 prealloc_blocks;
601
602 align = xfs_eof_alignment(ip, 0);
603 if (align)
604 end_fsb = roundup_64(end_fsb, align);
605
606 end_fsb = min(end_fsb, maxbytes_fsb);
607 ASSERT(end_fsb > offset_fsb);
608 }
609 }
610
611retry:
612 error = xfs_bmapi_reserve_delalloc(ip, offset_fsb,
613 end_fsb - offset_fsb, &got,
614 &prev, &idx, eof);
669 switch (error) { 615 switch (error) {
670 case 0: 616 case 0:
617 break;
671 case -ENOSPC: 618 case -ENOSPC:
672 case -EDQUOT: 619 case -EDQUOT:
673 break; 620 /* retry without any preallocation */
674 default:
675 return error;
676 }
677
678 /*
679 * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry
680 * without EOF preallocation.
681 */
682 if (nimaps == 0) {
683 trace_xfs_delalloc_enospc(ip, offset, count); 621 trace_xfs_delalloc_enospc(ip, offset, count);
684 if (prealloc) { 622 if (end_fsb != orig_end_fsb) {
685 prealloc = 0; 623 end_fsb = orig_end_fsb;
686 error = 0;
687 goto retry; 624 goto retry;
688 } 625 }
689 return error ? error : -ENOSPC; 626 /*FALLTHRU*/
627 default:
628 goto out_unlock;
690 } 629 }
691 630
692 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
693 return xfs_alert_fsblock_zero(ip, &imap[0]);
694
695 /* 631 /*
696 * Tag the inode as speculatively preallocated so we can reclaim this 632 * Tag the inode as speculatively preallocated so we can reclaim this
697 * space on demand, if necessary. 633 * space on demand, if necessary.
698 */ 634 */
699 if (prealloc) 635 if (end_fsb != orig_end_fsb)
700 xfs_inode_set_eofblocks_tag(ip); 636 xfs_inode_set_eofblocks_tag(ip);
701 637
702 *ret_imap = imap[0]; 638 trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
703 return 0; 639done:
640 if (isnullstartblock(got.br_startblock))
641 got.br_startblock = DELAYSTARTBLOCK;
642
643 if (!got.br_startblock) {
644 error = xfs_alert_fsblock_zero(ip, &got);
645 if (error)
646 goto out_unlock;
647 }
648
649 xfs_bmbt_to_iomap(ip, iomap, &got);
650
651out_unlock:
652 xfs_iunlock(ip, XFS_ILOCK_EXCL);
653 return error;
704} 654}
705 655
706/* 656/*
@@ -1008,6 +958,11 @@ xfs_file_iomap_begin(
1008 if (XFS_FORCED_SHUTDOWN(mp)) 958 if (XFS_FORCED_SHUTDOWN(mp))
1009 return -EIO; 959 return -EIO;
1010 960
961 if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) {
962 return xfs_file_iomap_begin_delay(inode, offset, length, flags,
963 iomap);
964 }
965
1011 xfs_ilock(ip, XFS_ILOCK_EXCL); 966 xfs_ilock(ip, XFS_ILOCK_EXCL);
1012 967
1013 ASSERT(offset <= mp->m_super->s_maxbytes); 968 ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -1035,19 +990,13 @@ xfs_file_iomap_begin(
1035 * the lower level functions are updated. 990 * the lower level functions are updated.
1036 */ 991 */
1037 length = min_t(loff_t, length, 1024 * PAGE_SIZE); 992 length = min_t(loff_t, length, 1024 * PAGE_SIZE);
1038 if (xfs_get_extsz_hint(ip)) { 993 /*
1039 /* 994 * xfs_iomap_write_direct() expects the shared lock. It
1040 * xfs_iomap_write_direct() expects the shared lock. It 995 * is unlocked on return.
1041 * is unlocked on return. 996 */
1042 */ 997 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1043 xfs_ilock_demote(ip, XFS_ILOCK_EXCL); 998 error = xfs_iomap_write_direct(ip, offset, length, &imap,
1044 error = xfs_iomap_write_direct(ip, offset, length, &imap, 999 nimaps);
1045 nimaps);
1046 } else {
1047 error = xfs_iomap_write_delay(ip, offset, length, &imap);
1048 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1049 }
1050
1051 if (error) 1000 if (error)
1052 return error; 1001 return error;
1053 1002
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index fb8aca3d69ab..6498be485932 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -25,8 +25,6 @@ struct xfs_bmbt_irec;
25 25
26int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, 26int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
27 struct xfs_bmbt_irec *, int); 27 struct xfs_bmbt_irec *, int);
28int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,
29 struct xfs_bmbt_irec *);
30int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, 28int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
31 struct xfs_bmbt_irec *); 29 struct xfs_bmbt_irec *);
32int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); 30int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);