diff options
author | Christoph Hellwig <hch@lst.de> | 2019-02-18 12:38:47 -0500 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2019-02-21 10:55:07 -0500 |
commit | db46e604adf8c923214a63b46e87ca2411d3d580 (patch) | |
tree | ec4797241a36cc5dc9d2b0d8fb466d8bc98f1bc1 | |
parent | 12df89f28fa92e54bfb2ae01f9ee059e74e1acc0 (diff) |
xfs: merge COW handling into xfs_file_iomap_begin_delay
Besides simplifying the code a bit this allows to actually implement
the behavior of using COW preallocation for non-COW data mentioned
in the current comments.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/xfs_iomap.c | 133 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 67 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 3 |
4 files changed, 94 insertions, 111 deletions
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index be9d2a4b190a..08c4d1d8f90e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -534,15 +534,16 @@ xfs_file_iomap_begin_delay( | |||
534 | { | 534 | { |
535 | struct xfs_inode *ip = XFS_I(inode); | 535 | struct xfs_inode *ip = XFS_I(inode); |
536 | struct xfs_mount *mp = ip->i_mount; | 536 | struct xfs_mount *mp = ip->i_mount; |
537 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); | ||
538 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | 537 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); |
539 | xfs_fileoff_t maxbytes_fsb = | 538 | xfs_fileoff_t maxbytes_fsb = |
540 | XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); | 539 | XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
541 | xfs_fileoff_t end_fsb; | 540 | xfs_fileoff_t end_fsb; |
542 | int error = 0, eof = 0; | 541 | struct xfs_bmbt_irec imap, cmap; |
543 | struct xfs_bmbt_irec got; | 542 | struct xfs_iext_cursor icur, ccur; |
544 | struct xfs_iext_cursor icur; | ||
545 | xfs_fsblock_t prealloc_blocks = 0; | 543 | xfs_fsblock_t prealloc_blocks = 0; |
544 | bool eof = false, cow_eof = false, shared; | ||
545 | int whichfork = XFS_DATA_FORK; | ||
546 | int error = 0; | ||
546 | 547 | ||
547 | ASSERT(!XFS_IS_REALTIME_INODE(ip)); | 548 | ASSERT(!XFS_IS_REALTIME_INODE(ip)); |
548 | ASSERT(!xfs_get_extsz_hint(ip)); | 549 | ASSERT(!xfs_get_extsz_hint(ip)); |
@@ -560,7 +561,7 @@ xfs_file_iomap_begin_delay( | |||
560 | 561 | ||
561 | XFS_STATS_INC(mp, xs_blk_mapw); | 562 | XFS_STATS_INC(mp, xs_blk_mapw); |
562 | 563 | ||
563 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | 564 | if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { |
564 | error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); | 565 | error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); |
565 | if (error) | 566 | if (error) |
566 | goto out_unlock; | 567 | goto out_unlock; |
@@ -568,51 +569,92 @@ xfs_file_iomap_begin_delay( | |||
568 | 569 | ||
569 | end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); | 570 | end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); |
570 | 571 | ||
571 | eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); | 572 | /* |
573 | * Search the data fork fork first to look up our source mapping. We | ||
574 | * always need the data fork map, as we have to return it to the | ||
575 | * iomap code so that the higher level write code can read data in to | ||
576 | * perform read-modify-write cycles for unaligned writes. | ||
577 | */ | ||
578 | eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); | ||
572 | if (eof) | 579 | if (eof) |
573 | got.br_startoff = end_fsb; /* fake hole until the end */ | 580 | imap.br_startoff = end_fsb; /* fake hole until the end */ |
581 | |||
582 | /* We never need to allocate blocks for zeroing a hole. */ | ||
583 | if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { | ||
584 | xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); | ||
585 | goto out_unlock; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * Search the COW fork extent list even if we did not find a data fork | ||
590 | * extent. This serves two purposes: first this implements the | ||
591 | * speculative preallocation using cowextsize, so that we also unshare | ||
592 | * block adjacent to shared blocks instead of just the shared blocks | ||
593 | * themselves. Second the lookup in the extent list is generally faster | ||
594 | * than going out to the shared extent tree. | ||
595 | */ | ||
596 | if (xfs_is_reflink_inode(ip)) { | ||
597 | cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, | ||
598 | &ccur, &cmap); | ||
599 | if (!cow_eof && cmap.br_startoff <= offset_fsb) { | ||
600 | trace_xfs_reflink_cow_found(ip, &cmap); | ||
601 | whichfork = XFS_COW_FORK; | ||
602 | goto done; | ||
603 | } | ||
604 | } | ||
574 | 605 | ||
575 | if (got.br_startoff <= offset_fsb) { | 606 | if (imap.br_startoff <= offset_fsb) { |
576 | /* | 607 | /* |
577 | * For reflink files we may need a delalloc reservation when | 608 | * For reflink files we may need a delalloc reservation when |
578 | * overwriting shared extents. This includes zeroing of | 609 | * overwriting shared extents. This includes zeroing of |
579 | * existing extents that contain data. | 610 | * existing extents that contain data. |
580 | */ | 611 | */ |
581 | if (xfs_is_reflink_inode(ip) && | 612 | if (!xfs_is_reflink_inode(ip) || |
582 | ((flags & IOMAP_WRITE) || | 613 | ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { |
583 | got.br_state != XFS_EXT_UNWRITTEN)) { | 614 | trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, |
584 | xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); | 615 | &imap); |
585 | error = xfs_reflink_reserve_cow(ip, &got); | 616 | goto done; |
586 | if (error) | ||
587 | goto out_unlock; | ||
588 | } | 617 | } |
589 | 618 | ||
590 | trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, &got); | 619 | xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); |
591 | goto done; | ||
592 | } | ||
593 | 620 | ||
594 | if (flags & IOMAP_ZERO) { | 621 | /* Trim the mapping to the nearest shared extent boundary. */ |
595 | xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff); | 622 | error = xfs_reflink_trim_around_shared(ip, &imap, &shared); |
596 | goto out_unlock; | 623 | if (error) |
624 | goto out_unlock; | ||
625 | |||
626 | /* Not shared? Just report the (potentially capped) extent. */ | ||
627 | if (!shared) { | ||
628 | trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, | ||
629 | &imap); | ||
630 | goto done; | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * Fork all the shared blocks from our write offset until the | ||
635 | * end of the extent. | ||
636 | */ | ||
637 | whichfork = XFS_COW_FORK; | ||
638 | end_fsb = imap.br_startoff + imap.br_blockcount; | ||
639 | } else { | ||
640 | /* | ||
641 | * We cap the maximum length we map here to MAX_WRITEBACK_PAGES | ||
642 | * pages to keep the chunks of work done where somewhat | ||
643 | * symmetric with the work writeback does. This is a completely | ||
644 | * arbitrary number pulled out of thin air. | ||
645 | * | ||
646 | * Note that the values needs to be less than 32-bits wide until | ||
647 | * the lower level functions are updated. | ||
648 | */ | ||
649 | count = min_t(loff_t, count, 1024 * PAGE_SIZE); | ||
650 | end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); | ||
597 | } | 651 | } |
598 | 652 | ||
599 | error = xfs_qm_dqattach_locked(ip, false); | 653 | error = xfs_qm_dqattach_locked(ip, false); |
600 | if (error) | 654 | if (error) |
601 | goto out_unlock; | 655 | goto out_unlock; |
602 | 656 | ||
603 | /* | 657 | if (eof && whichfork == XFS_DATA_FORK) { |
604 | * We cap the maximum length we map here to MAX_WRITEBACK_PAGES pages | ||
605 | * to keep the chunks of work done where somewhat symmetric with the | ||
606 | * work writeback does. This is a completely arbitrary number pulled | ||
607 | * out of thin air as a best guess for initial testing. | ||
608 | * | ||
609 | * Note that the values needs to be less than 32-bits wide until | ||
610 | * the lower level functions are updated. | ||
611 | */ | ||
612 | count = min_t(loff_t, count, 1024 * PAGE_SIZE); | ||
613 | end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb); | ||
614 | |||
615 | if (eof) { | ||
616 | prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, | 658 | prealloc_blocks = xfs_iomap_prealloc_size(ip, offset, count, |
617 | &icur); | 659 | &icur); |
618 | if (prealloc_blocks) { | 660 | if (prealloc_blocks) { |
@@ -635,9 +677,11 @@ xfs_file_iomap_begin_delay( | |||
635 | } | 677 | } |
636 | 678 | ||
637 | retry: | 679 | retry: |
638 | error = xfs_bmapi_reserve_delalloc(ip, XFS_DATA_FORK, offset_fsb, | 680 | error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb, |
639 | end_fsb - offset_fsb, prealloc_blocks, &got, &icur, | 681 | end_fsb - offset_fsb, prealloc_blocks, |
640 | eof); | 682 | whichfork == XFS_DATA_FORK ? &imap : &cmap, |
683 | whichfork == XFS_DATA_FORK ? &icur : &ccur, | ||
684 | whichfork == XFS_DATA_FORK ? eof : cow_eof); | ||
641 | switch (error) { | 685 | switch (error) { |
642 | case 0: | 686 | case 0: |
643 | break; | 687 | break; |
@@ -659,9 +703,20 @@ retry: | |||
659 | * them out if the write happens to fail. | 703 | * them out if the write happens to fail. |
660 | */ | 704 | */ |
661 | iomap->flags |= IOMAP_F_NEW; | 705 | iomap->flags |= IOMAP_F_NEW; |
662 | trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &got); | 706 | trace_xfs_iomap_alloc(ip, offset, count, whichfork, |
707 | whichfork == XFS_DATA_FORK ? &imap : &cmap); | ||
663 | done: | 708 | done: |
664 | error = xfs_bmbt_to_iomap(ip, iomap, &got, false); | 709 | if (whichfork == XFS_COW_FORK) { |
710 | if (imap.br_startoff > offset_fsb) { | ||
711 | xfs_trim_extent(&cmap, offset_fsb, | ||
712 | imap.br_startoff - offset_fsb); | ||
713 | error = xfs_bmbt_to_iomap(ip, iomap, &cmap, false); | ||
714 | goto out_unlock; | ||
715 | } | ||
716 | /* ensure we only report blocks we have a reservation for */ | ||
717 | xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount); | ||
718 | } | ||
719 | error = xfs_bmbt_to_iomap(ip, iomap, &imap, false); | ||
665 | out_unlock: | 720 | out_unlock: |
666 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 721 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
667 | return error; | 722 | return error; |
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 8a5353daf9ab..9ef1f79cb3ae 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -234,73 +234,6 @@ xfs_reflink_trim_around_shared( | |||
234 | } | 234 | } |
235 | } | 235 | } |
236 | 236 | ||
237 | /* | ||
238 | * Trim the passed in imap to the next shared/unshared extent boundary, and | ||
239 | * if imap->br_startoff points to a shared extent reserve space for it in the | ||
240 | * COW fork. | ||
241 | * | ||
242 | * Note that imap will always contain the block numbers for the existing blocks | ||
243 | * in the data fork, as the upper layers need them for read-modify-write | ||
244 | * operations. | ||
245 | */ | ||
246 | int | ||
247 | xfs_reflink_reserve_cow( | ||
248 | struct xfs_inode *ip, | ||
249 | struct xfs_bmbt_irec *imap) | ||
250 | { | ||
251 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
252 | struct xfs_bmbt_irec got; | ||
253 | int error = 0; | ||
254 | bool eof = false; | ||
255 | struct xfs_iext_cursor icur; | ||
256 | bool shared; | ||
257 | |||
258 | /* | ||
259 | * Search the COW fork extent list first. This serves two purposes: | ||
260 | * first this implement the speculative preallocation using cowextisze, | ||
261 | * so that we also unshared block adjacent to shared blocks instead | ||
262 | * of just the shared blocks themselves. Second the lookup in the | ||
263 | * extent list is generally faster than going out to the shared extent | ||
264 | * tree. | ||
265 | */ | ||
266 | |||
267 | if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &icur, &got)) | ||
268 | eof = true; | ||
269 | if (!eof && got.br_startoff <= imap->br_startoff) { | ||
270 | trace_xfs_reflink_cow_found(ip, imap); | ||
271 | xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | /* Trim the mapping to the nearest shared extent boundary. */ | ||
276 | error = xfs_reflink_trim_around_shared(ip, imap, &shared); | ||
277 | if (error) | ||
278 | return error; | ||
279 | |||
280 | /* Not shared? Just report the (potentially capped) extent. */ | ||
281 | if (!shared) | ||
282 | return 0; | ||
283 | |||
284 | /* | ||
285 | * Fork all the shared blocks from our write offset until the end of | ||
286 | * the extent. | ||
287 | */ | ||
288 | error = xfs_qm_dqattach_locked(ip, false); | ||
289 | if (error) | ||
290 | return error; | ||
291 | |||
292 | error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, | ||
293 | imap->br_blockcount, 0, &got, &icur, eof); | ||
294 | if (error == -ENOSPC || error == -EDQUOT) | ||
295 | trace_xfs_reflink_cow_enospc(ip, imap); | ||
296 | if (error) | ||
297 | return error; | ||
298 | |||
299 | xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); | ||
300 | trace_xfs_reflink_cow_alloc(ip, &got); | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | /* Convert part of an unwritten CoW extent to a real one. */ | 237 | /* Convert part of an unwritten CoW extent to a real one. */ |
305 | STATIC int | 238 | STATIC int |
306 | xfs_reflink_convert_cow_extent( | 239 | xfs_reflink_convert_cow_extent( |
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 70d68a1a9b49..4a9e3cd4768a 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h | |||
@@ -12,8 +12,6 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp, | |||
12 | extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, | 12 | extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, |
13 | struct xfs_bmbt_irec *irec, bool *shared); | 13 | struct xfs_bmbt_irec *irec, bool *shared); |
14 | 14 | ||
15 | extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, | ||
16 | struct xfs_bmbt_irec *imap); | ||
17 | extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, | 15 | extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, |
18 | struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode, | 16 | struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode, |
19 | unsigned iomap_flags); | 17 | unsigned iomap_flags); |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f1e18ae8a209..47fb07d86efd 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -3196,13 +3196,10 @@ DEFINE_INODE_ERROR_EVENT(xfs_reflink_unshare_error); | |||
3196 | 3196 | ||
3197 | /* copy on write */ | 3197 | /* copy on write */ |
3198 | DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); | 3198 | DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared); |
3199 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); | ||
3200 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); | 3199 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); |
3201 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); | 3200 | DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); |
3202 | DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); | 3201 | DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow); |
3203 | 3202 | ||
3204 | DEFINE_RW_EVENT(xfs_reflink_reserve_cow); | ||
3205 | |||
3206 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); | 3203 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write); |
3207 | 3204 | ||
3208 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); | 3205 | DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); |