aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2019-02-18 12:38:47 -0500
committerDarrick J. Wong <darrick.wong@oracle.com>2019-02-21 10:55:07 -0500
commitdb46e604adf8c923214a63b46e87ca2411d3d580 (patch)
treeec4797241a36cc5dc9d2b0d8fb466d8bc98f1bc1
parent12df89f28fa92e54bfb2ae01f9ee059e74e1acc0 (diff)
xfs: merge COW handling into xfs_file_iomap_begin_delay
Besides simplifying the code a bit this allows to actually implement the behavior of using COW preallocation for non-COW data mentioned in the current comments. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_iomap.c133
-rw-r--r--fs/xfs/xfs_reflink.c67
-rw-r--r--fs/xfs/xfs_reflink.h2
-rw-r--r--fs/xfs/xfs_trace.h3
4 files changed, 94 insertions, 111 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index be9d2a4b190a..08c4d1d8f90e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay(
534{ 534{
535 struct xfs_inode *ip = XFS_I(inode); 535 struct xfs_inode *ip = XFS_I(inode);
536 struct xfs_mount *mp = ip->i_mount; 536 struct xfs_mount *mp = ip->i_mount;
537 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
538 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 537 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
539 xfs_fileoff_t maxbytes_fsb = 538 xfs_fileoff_t maxbytes_fsb =
540 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 539 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
541 xfs_fileoff_t end_fsb; 540 xfs_fileoff_t end_fsb;
542 int error = 0, eof = 0; 541 struct xfs_bmbt_irec imap, cmap;
543 struct xfs_bmbt_irec got; 542 struct xfs_iext_cursor icur, ccur;
544 struct xfs_iext_cursor icur;
545 xfs_fsblock_t prealloc_blocks = 0; 543 xfs_fsblock_t prealloc_blocks = 0;
544 bool eof = false, cow_eof = false, shared;
545 int whichfork = XFS_DATA_FORK;
546 int error = 0;
546 547
547 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 548 ASSERT(!XFS_IS_REALTIME_INODE(ip));
548 ASSERT(!xfs_get_extsz_hint(ip)); 549 ASSERT(!xfs_get_extsz_hint(ip));
@@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay(
560 561
561 XFS_STATS_INC(mp, xs_blk_mapw); 562 XFS_STATS_INC(mp, xs_blk_mapw);
562 563
563 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 564 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
564 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 565 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
565 if (error) 566 if (error)
566 goto out_unlock; 567 goto out_unlock;
@@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay(
568 569
569 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); 570 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
570 571
571 eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); 572 /*
573 * Search the data fork fork first to look up our source mapping. We
574 * always need the data fork map, as we have to return it to the
575 * iomap code so that the higher level write code can read data in to
576 * perform read-modify-write cycles for unaligned writes.
577 */
578 eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
572 if (eof) 579 if (eof)
573 got.br_startoff = end_fsb; /* fake hole until the end */ 580 imap.br_startoff = end_fsb; /* fake hole until the end */
581
582 /* We never need to allocate blocks for zeroing a hole. */
583 if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
584 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
585 goto out_unlock;
586 }
587
588 /*
589 * Search the COW fork extent list even if we did not find a data fork
590 * extent. This serves two purposes: first this implements the
591 * speculative preallocation using cowextsize, so that we also unshare
592 * block adjacent to shared blocks instead of just the shared blocks
593 * themselves. Second the lookup in the extent list is generally faster
594 * than going out to the shared extent tree.
595 */
596 if (xfs_is_reflink_inode(ip)) {
597 cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
598 &ccur, &cmap);
599 if (!cow_eof && cmap.br_startoff <= offset_fsb) {
600 trace_xfs_reflink_cow_found(ip, &cmap);
601 whichfork = XFS_COW_FORK;
602 goto done;
603 }
604 }
574 605
575 if (got.br_startoff <= offset_fsb) { 606 if (imap.br_startoff <= offset_fsb) {
576 /* 607 /*
577 * For reflink files we may need a delalloc reservation when 608 * For reflink files we may need a delalloc reservation when
578 * overwriting shared extents. This includes zeroing of 609 * overwriting shared extents. This includes zeroing of
579 * existing extents that contain data. 610 * existing extents that contain data.
580 */ 611 */
581 if (xfs_is_reflink_inode(ip) && 612 if (!xfs_is_reflink_inode(ip) ||
582 ((flags & IOMAP_WRITE) || 613 ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
583 got.br_state != XFS_EXT_UNWRITTEN)) { 614 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
584 xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); 615 &imap);
585 error = xfs_reflink_reserve_cow(ip, &got); 616 goto done;
586 if (error)
587 goto out_unlock;
588 } 617 }
589 618
590 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); 619 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
591 goto done;
592 }
593 620
594 if (flags & IOMAP_ZERO) { 621 /* Trim the mapping to the nearest shared extent boundary. */
595 xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); 622 error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
596 goto out_unlock; 623 if (error)
624 goto out_unlock;
625
626 /* Not shared? Just report the (potentially capped) extent. */
627 if (!shared) {
628 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
629 &imap);
630 goto done;
631 }
632
633 /*
634 * Fork all the shared blocks from our write offset until the
635 * end of the extent.
636 */
637 whichfork = XFS_COW_FORK;
638 end_fsb = imap.br_startoff + imap.br_blockcount;
639 } else {
640 /*
641 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
642 * pages to keep the chunks of work done where somewhat
643 * symmetric with the work writeback does. This is a completely
644 * arbitrary number pulled out of thin air.
645 *
646 * Note that the values needs to be less than 32-bits wide until
647 * the lower level functions are updated.
648 */
649 count = min_t(loff_t, count, 1024 * PAGE_SIZE);
650 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
597 } 651 }
598 652
599 error = xfs_qm_dqattach_locked(ip, false); 653 error = xfs_qm_dqattach_locked(ip, false);
600 if (error) 654 if (error)
601 goto out_unlock; 655 goto out_unlock;
602 656
603 /* 657 if (eof && whichfork == XFS_DATA_FORK) {
604 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages
605 * to keep the chunks of work done where somewhat symmetric with the
606 * work writeback does. This is a completely arbitrary number pulled
607 * out of thin air as a best guess for initial testing.
608 *
609 * Note that the values needs to be less than 32-bits wide until
610 * the lower level functions are updated.
611 */
612 count = min_t(loff_t, count, 1024 * PAGE_SIZE);
613 end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
614
615 if (eof) {
616 prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, 658 prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count,
617 &icur); 659 &icur);
618 if (prealloc_blocks) { 660 if (prealloc_blocks) {
@@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay(
635 } 677 }
636 678
637retry: 679retry:
638 error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, 680 error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb,
639 end_fsb - offset_fsb, prealloc_blocks, &got, &icur, 681 end_fsb - offset_fsb, prealloc_blocks,
640 eof); 682 whichfork == XFS_DATA_FORK ? &imap : &cmap,
683 whichfork == XFS_DATA_FORK ? &icur : &ccur,
684 whichfork == XFS_DATA_FORK ? eof : cow_eof);
641 switch (error) { 685 switch (error) {
642 case 0: 686 case 0:
643 break; 687 break;
@@ -659,9 +703,20 @@ retry:
659 * them out if the write happens to fail. 703 * them out if the write happens to fail.
660 */ 704 */
661 iomap->flags |= IOMAP_F_NEW; 705 iomap->flags |= IOMAP_F_NEW;
662 trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); 706 trace_xfs_iomap_alloc(ip, offset, count, whichfork,
707 whichfork == XFS_DATA_FORK ? &imap : &cmap);
663done: 708done:
664 error = xfs_bmbt_to_iomap(ip, iomap, &got, false); 709 if (whichfork == XFS_COW_FORK) {
710 if (imap.br_startoff > offset_fsb) {
711 xfs_trim_extent(&cmap, offset_fsb,
712 imap.br_startoff - offset_fsb);
713 error = xfs_bmbt_to_iomap(ip, iomap, &cmap, false);
714 goto out_unlock;
715 }
716 /* ensure we only report blocks we have a reservation for */
717 xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount);
718 }
719 error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
665out_unlock: 720out_unlock:
666 xfs_iunlock(ip, XFS_ILOCK_EXCL); 721 xfs_iunlock(ip, XFS_ILOCK_EXCL);
667 return error; 722 return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 8a5353daf9ab..9ef1f79cb3ae 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -234,73 +234,6 @@ xfs_reflink_trim_around_shared(
234 } 234 }
235} 235}
236 236
237/*
238 * Trim the passed in imap to the next shared/unshared extent boundary, and
239 * if imap->br_startoff points to a shared extent reserve space for it in the
240 * COW fork.
241 *
242 * Note that imap will always contain the block numbers for the existing blocks
243 * in the data fork, as the upper layers need them for read-modify-write
244 * operations.
245 */
246int
247xfs_reflink_reserve_cow(
248 struct xfs_inode *ip,
249 struct xfs_bmbt_irec *imap)
250{
251 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
252 struct xfs_bmbt_irec got;
253 int error = 0;
254 bool eof = false;
255 struct xfs_iext_cursor icur;
256 bool shared;
257
258 /*
259 * Search the COW fork extent list first. This serves two purposes:
260 * first this implement the speculative preallocation using cowextisze,
261 * so that we also unshared block adjacent to shared blocks instead
262 * of just the shared blocks themselves. Second the lookup in the
263 * extent list is generally faster than going out to the shared extent
264 * tree.
265 */
266
267 if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got))
268 eof = true;
269 if (!eof && got.br_startoff <= imap->br_startoff) {
270 trace_xfs_reflink_cow_found(ip, imap);
271 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
272 return 0;
273 }
274
275 /* Trim the mapping to the nearest shared extent boundary. */
276 error = xfs_reflink_trim_around_shared(ip, imap, &shared);
277 if (error)
278 return error;
279
280 /* Not shared? Just report the (potentially capped) extent. */
281 if (!shared)
282 return 0;
283
284 /*
285 * Fork all the shared blocks from our write offset until the end of
286 * the extent.
287 */
288 error = xfs_qm_dqattach_locked(ip, false);
289 if (error)
290 return error;
291
292 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
293 imap->br_blockcount, 0, &got, &icur, eof);
294 if (error == -ENOSPC || error == -EDQUOT)
295 trace_xfs_reflink_cow_enospc(ip, imap);
296 if (error)
297 return error;
298
299 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
300 trace_xfs_reflink_cow_alloc(ip, &got);
301 return 0;
302}
303
304/* Convert part of an unwritten CoW extent to a real one. */ 237/* Convert part of an unwritten CoW extent to a real one. */
305STATIC int 238STATIC int
306xfs_reflink_convert_cow_extent( 239xfs_reflink_convert_cow_extent(
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 70d68a1a9b49..4a9e3cd4768a 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -12,8 +12,6 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
12extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, 12extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
13 struct xfs_bmbt_irec *irec, bool *shared); 13 struct xfs_bmbt_irec *irec, bool *shared);
14 14
15extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
16 struct xfs_bmbt_irec *imap);
17extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, 15extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
18 struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode, 16 struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode,
19 unsigned iomap_flags); 17 unsigned iomap_flags);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f1e18ae8a209..47fb07d86efd 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3196,13 +3196,10 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error);
3196 3196
3197/* copy on write */ 3197/* copy on write */
3198DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); 3198DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
3199DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
3200DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); 3199DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
3201DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); 3200DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
3202DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); 3201DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
3203 3202
3204DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
3205
3206DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); 3203DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
3207 3204
3208DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); 3205DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);