diff options
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r-- | fs/xfs/xfs_reflink.c | 265 |
1 files changed, 185 insertions, 80 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 07593a362cd0..da6d08fb359c 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c | |||
@@ -82,11 +82,22 @@ | |||
82 | * mappings are a reservation against the free space in the filesystem; | 82 | * mappings are a reservation against the free space in the filesystem; |
83 | * adjacent mappings can also be combined into fewer larger mappings. | 83 | * adjacent mappings can also be combined into fewer larger mappings. |
84 | * | 84 | * |
85 | * As an optimization, the CoW extent size hint (cowextsz) creates | ||
86 | * outsized aligned delalloc reservations in the hope of landing out of | ||
87 | * order nearby CoW writes in a single extent on disk, thereby reducing | ||
88 | * fragmentation and improving future performance. | ||
89 | * | ||
90 | * D: --RRRRRRSSSRRRRRRRR--- (data fork) | ||
91 | * C: ------DDDDDDD--------- (CoW fork) | ||
92 | * | ||
85 | * When dirty pages are being written out (typically in writepage), the | 93 | * When dirty pages are being written out (typically in writepage), the |
86 | * delalloc reservations are converted into real mappings by allocating | 94 | * delalloc reservations are converted into unwritten mappings by |
87 | * blocks and replacing the delalloc mapping with real ones. A delalloc | 95 | * allocating blocks and replacing the delalloc mapping with real ones. |
88 | * mapping can be replaced by several real ones if the free space is | 96 | * A delalloc mapping can be replaced by several unwritten ones if the |
89 | * fragmented. | 97 | * free space is fragmented. |
98 | * | ||
99 | * D: --RRRRRRSSSRRRRRRRR--- | ||
100 | * C: ------UUUUUUU--------- | ||
90 | * | 101 | * |
91 | * We want to adapt the delalloc mechanism for copy-on-write, since the | 102 | * We want to adapt the delalloc mechanism for copy-on-write, since the |
92 | * write paths are similar. The first two steps (creating the reservation | 103 | * write paths are similar. The first two steps (creating the reservation |
@@ -101,13 +112,29 @@ | |||
101 | * Block-aligned directio writes will use the same mechanism as buffered | 112 | * Block-aligned directio writes will use the same mechanism as buffered |
102 | * writes. | 113 | * writes. |
103 | * | 114 | * |
115 | * Just prior to submitting the actual disk write requests, we convert | ||
116 | * the extents representing the range of the file actually being written | ||
117 | * (as opposed to extra pieces created for the cowextsize hint) to real | ||
118 | * extents. This will become important in the next step: | ||
119 | * | ||
120 | * D: --RRRRRRSSSRRRRRRRR--- | ||
121 | * C: ------UUrrUUU--------- | ||
122 | * | ||
104 | * CoW remapping must be done after the data block write completes, | 123 | * CoW remapping must be done after the data block write completes, |
105 | * because we don't want to destroy the old data fork map until we're sure | 124 | * because we don't want to destroy the old data fork map until we're sure |
106 | * the new block has been written. Since the new mappings are kept in a | 125 | * the new block has been written. Since the new mappings are kept in a |
107 | * separate fork, we can simply iterate these mappings to find the ones | 126 | * separate fork, we can simply iterate these mappings to find the ones |
108 | * that cover the file blocks that we just CoW'd. For each extent, simply | 127 | * that cover the file blocks that we just CoW'd. For each extent, simply |
109 | * unmap the corresponding range in the data fork, map the new range into | 128 | * unmap the corresponding range in the data fork, map the new range into |
110 | * the data fork, and remove the extent from the CoW fork. | 129 | * the data fork, and remove the extent from the CoW fork. Because of |
130 | * the presence of the cowextsize hint, however, we must be careful | ||
131 | * only to remap the blocks that we've actually written out -- we must | ||
132 | * never remap delalloc reservations nor CoW staging blocks that have | ||
133 | * yet to be written. This corresponds exactly to the real extents in | ||
134 | * the CoW fork: | ||
135 | * | ||
136 | * D: --RRRRRRrrSRRRRRRRR--- | ||
137 | * C: ------UU--UUU--------- | ||
111 | * | 138 | * |
112 | * Since the remapping operation can be applied to an arbitrary file | 139 | * Since the remapping operation can be applied to an arbitrary file |
113 | * range, we record the need for the remap step as a flag in the ioend | 140 | * range, we record the need for the remap step as a flag in the ioend |
@@ -296,103 +323,165 @@ xfs_reflink_reserve_cow( | |||
296 | return 0; | 323 | return 0; |
297 | } | 324 | } |
298 | 325 | ||
299 | /* Allocate all CoW reservations covering a range of blocks in a file. */ | 326 | /* Convert part of an unwritten CoW extent to a real one. */ |
300 | static int | 327 | STATIC int |
301 | __xfs_reflink_allocate_cow( | 328 | xfs_reflink_convert_cow_extent( |
302 | struct xfs_inode *ip, | 329 | struct xfs_inode *ip, |
303 | xfs_fileoff_t *offset_fsb, | 330 | struct xfs_bmbt_irec *imap, |
304 | xfs_fileoff_t end_fsb) | 331 | xfs_fileoff_t offset_fsb, |
332 | xfs_filblks_t count_fsb, | ||
333 | struct xfs_defer_ops *dfops) | ||
305 | { | 334 | { |
306 | struct xfs_mount *mp = ip->i_mount; | 335 | xfs_fsblock_t first_block; |
307 | struct xfs_bmbt_irec imap; | 336 | int nimaps = 1; |
308 | struct xfs_defer_ops dfops; | ||
309 | struct xfs_trans *tp; | ||
310 | xfs_fsblock_t first_block; | ||
311 | int nimaps = 1, error; | ||
312 | bool shared; | ||
313 | |||
314 | xfs_defer_init(&dfops, &first_block); | ||
315 | 337 | ||
316 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, | 338 | if (imap->br_state == XFS_EXT_NORM) |
317 | XFS_TRANS_RESERVE, &tp); | 339 | return 0; |
318 | if (error) | ||
319 | return error; | ||
320 | 340 | ||
321 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 341 | xfs_trim_extent(imap, offset_fsb, count_fsb); |
342 | trace_xfs_reflink_convert_cow(ip, imap); | ||
343 | if (imap->br_blockcount == 0) | ||
344 | return 0; | ||
345 | return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount, | ||
346 | XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block, | ||
347 | 0, imap, &nimaps, dfops); | ||
348 | } | ||
322 | 349 | ||
323 | /* Read extent from the source file. */ | 350 | /* Convert all of the unwritten CoW extents in a file's range to real ones. */ |
324 | nimaps = 1; | 351 | int |
325 | error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, | 352 | xfs_reflink_convert_cow( |
326 | &imap, &nimaps, 0); | 353 | struct xfs_inode *ip, |
327 | if (error) | 354 | xfs_off_t offset, |
328 | goto out_unlock; | 355 | xfs_off_t count) |
329 | ASSERT(nimaps == 1); | 356 | { |
357 | struct xfs_bmbt_irec got; | ||
358 | struct xfs_defer_ops dfops; | ||
359 | struct xfs_mount *mp = ip->i_mount; | ||
360 | struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); | ||
361 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
362 | xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); | ||
363 | xfs_extnum_t idx; | ||
364 | bool found; | ||
365 | int error = 0; | ||
330 | 366 | ||
331 | error = xfs_reflink_reserve_cow(ip, &imap, &shared); | 367 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
332 | if (error) | ||
333 | goto out_trans_cancel; | ||
334 | 368 | ||
335 | if (!shared) { | 369 | /* Convert all the extents to real from unwritten. */ |
336 | *offset_fsb = imap.br_startoff + imap.br_blockcount; | 370 | for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); |
337 | goto out_trans_cancel; | 371 | found && got.br_startoff < end_fsb; |
372 | found = xfs_iext_get_extent(ifp, ++idx, &got)) { | ||
373 | error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, | ||
374 | end_fsb - offset_fsb, &dfops); | ||
375 | if (error) | ||
376 | break; | ||
338 | } | 377 | } |
339 | 378 | ||
340 | xfs_trans_ijoin(tp, ip, 0); | 379 | /* Finish up. */ |
341 | error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, | ||
342 | XFS_BMAPI_COWFORK, &first_block, | ||
343 | XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), | ||
344 | &imap, &nimaps, &dfops); | ||
345 | if (error) | ||
346 | goto out_trans_cancel; | ||
347 | |||
348 | error = xfs_defer_finish(&tp, &dfops, NULL); | ||
349 | if (error) | ||
350 | goto out_trans_cancel; | ||
351 | |||
352 | error = xfs_trans_commit(tp); | ||
353 | |||
354 | *offset_fsb = imap.br_startoff + imap.br_blockcount; | ||
355 | out_unlock: | ||
356 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 380 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
357 | return error; | 381 | return error; |
358 | out_trans_cancel: | ||
359 | xfs_defer_cancel(&dfops); | ||
360 | xfs_trans_cancel(tp); | ||
361 | goto out_unlock; | ||
362 | } | 382 | } |
363 | 383 | ||
364 | /* Allocate all CoW reservations covering a part of a file. */ | 384 | /* Allocate all CoW reservations covering a range of blocks in a file. */ |
365 | int | 385 | int |
366 | xfs_reflink_allocate_cow_range( | 386 | xfs_reflink_allocate_cow( |
367 | struct xfs_inode *ip, | 387 | struct xfs_inode *ip, |
368 | xfs_off_t offset, | 388 | struct xfs_bmbt_irec *imap, |
369 | xfs_off_t count) | 389 | bool *shared, |
390 | uint *lockmode) | ||
370 | { | 391 | { |
371 | struct xfs_mount *mp = ip->i_mount; | 392 | struct xfs_mount *mp = ip->i_mount; |
372 | xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); | 393 | xfs_fileoff_t offset_fsb = imap->br_startoff; |
373 | xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); | 394 | xfs_filblks_t count_fsb = imap->br_blockcount; |
374 | int error; | 395 | struct xfs_bmbt_irec got; |
396 | struct xfs_defer_ops dfops; | ||
397 | struct xfs_trans *tp = NULL; | ||
398 | xfs_fsblock_t first_block; | ||
399 | int nimaps, error = 0; | ||
400 | bool trimmed; | ||
401 | xfs_filblks_t resaligned; | ||
402 | xfs_extlen_t resblks = 0; | ||
403 | xfs_extnum_t idx; | ||
375 | 404 | ||
405 | retry: | ||
376 | ASSERT(xfs_is_reflink_inode(ip)); | 406 | ASSERT(xfs_is_reflink_inode(ip)); |
377 | 407 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); | |
378 | trace_xfs_reflink_allocate_cow_range(ip, offset, count); | ||
379 | 408 | ||
380 | /* | 409 | /* |
381 | * Make sure that the dquots are there. | 410 | * Even if the extent is not shared we might have a preallocation for |
411 | * it in the COW fork. If so use it. | ||
382 | */ | 412 | */ |
383 | error = xfs_qm_dqattach(ip, 0); | 413 | if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) && |
384 | if (error) | 414 | got.br_startoff <= offset_fsb) { |
385 | return error; | 415 | *shared = true; |
386 | 416 | ||
387 | while (offset_fsb < end_fsb) { | 417 | /* If we have a real allocation in the COW fork we're done. */ |
388 | error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); | 418 | if (!isnullstartblock(got.br_startblock)) { |
389 | if (error) { | 419 | xfs_trim_extent(&got, offset_fsb, count_fsb); |
390 | trace_xfs_reflink_allocate_cow_range_error(ip, error, | 420 | *imap = got; |
391 | _RET_IP_); | 421 | goto convert; |
392 | break; | ||
393 | } | 422 | } |
423 | |||
424 | xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); | ||
425 | } else { | ||
426 | error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); | ||
427 | if (error || !*shared) | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | if (!tp) { | ||
432 | resaligned = xfs_aligned_fsb_count(imap->br_startoff, | ||
433 | imap->br_blockcount, xfs_get_cowextsz_hint(ip)); | ||
434 | resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); | ||
435 | |||
436 | xfs_iunlock(ip, *lockmode); | ||
437 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); | ||
438 | *lockmode = XFS_ILOCK_EXCL; | ||
439 | xfs_ilock(ip, *lockmode); | ||
440 | |||
441 | if (error) | ||
442 | return error; | ||
443 | |||
444 | error = xfs_qm_dqattach_locked(ip, 0); | ||
445 | if (error) | ||
446 | goto out; | ||
447 | goto retry; | ||
394 | } | 448 | } |
395 | 449 | ||
450 | error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, | ||
451 | XFS_QMOPT_RES_REGBLKS); | ||
452 | if (error) | ||
453 | goto out; | ||
454 | |||
455 | xfs_trans_ijoin(tp, ip, 0); | ||
456 | |||
457 | xfs_defer_init(&dfops, &first_block); | ||
458 | nimaps = 1; | ||
459 | |||
460 | /* Allocate the entire reservation as unwritten blocks. */ | ||
461 | error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, | ||
462 | XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block, | ||
463 | resblks, imap, &nimaps, &dfops); | ||
464 | if (error) | ||
465 | goto out_bmap_cancel; | ||
466 | |||
467 | /* Finish up. */ | ||
468 | error = xfs_defer_finish(&tp, &dfops, NULL); | ||
469 | if (error) | ||
470 | goto out_bmap_cancel; | ||
471 | |||
472 | error = xfs_trans_commit(tp); | ||
473 | if (error) | ||
474 | return error; | ||
475 | convert: | ||
476 | return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb, | ||
477 | &dfops); | ||
478 | out_bmap_cancel: | ||
479 | xfs_defer_cancel(&dfops); | ||
480 | xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, | ||
481 | XFS_QMOPT_RES_REGBLKS); | ||
482 | out: | ||
483 | if (tp) | ||
484 | xfs_trans_cancel(tp); | ||
396 | return error; | 485 | return error; |
397 | } | 486 | } |
398 | 487 | ||
@@ -641,6 +730,16 @@ xfs_reflink_end_cow( | |||
641 | 730 | ||
642 | ASSERT(!isnullstartblock(got.br_startblock)); | 731 | ASSERT(!isnullstartblock(got.br_startblock)); |
643 | 732 | ||
733 | /* | ||
734 | * Don't remap unwritten extents; these are | ||
735 | * speculatively preallocated CoW extents that have been | ||
736 | * allocated but have not yet been involved in a write. | ||
737 | */ | ||
738 | if (got.br_state == XFS_EXT_UNWRITTEN) { | ||
739 | idx--; | ||
740 | goto next_extent; | ||
741 | } | ||
742 | |||
644 | /* Unmap the old blocks in the data fork. */ | 743 | /* Unmap the old blocks in the data fork. */ |
645 | xfs_defer_init(&dfops, &firstfsb); | 744 | xfs_defer_init(&dfops, &firstfsb); |
646 | rlen = del.br_blockcount; | 745 | rlen = del.br_blockcount; |
@@ -855,13 +954,14 @@ STATIC int | |||
855 | xfs_reflink_update_dest( | 954 | xfs_reflink_update_dest( |
856 | struct xfs_inode *dest, | 955 | struct xfs_inode *dest, |
857 | xfs_off_t newlen, | 956 | xfs_off_t newlen, |
858 | xfs_extlen_t cowextsize) | 957 | xfs_extlen_t cowextsize, |
958 | bool is_dedupe) | ||
859 | { | 959 | { |
860 | struct xfs_mount *mp = dest->i_mount; | 960 | struct xfs_mount *mp = dest->i_mount; |
861 | struct xfs_trans *tp; | 961 | struct xfs_trans *tp; |
862 | int error; | 962 | int error; |
863 | 963 | ||
864 | if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) | 964 | if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) |
865 | return 0; | 965 | return 0; |
866 | 966 | ||
867 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); | 967 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); |
@@ -882,6 +982,10 @@ xfs_reflink_update_dest( | |||
882 | dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; | 982 | dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; |
883 | } | 983 | } |
884 | 984 | ||
985 | if (!is_dedupe) { | ||
986 | xfs_trans_ichgtime(tp, dest, | ||
987 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
988 | } | ||
885 | xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); | 989 | xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); |
886 | 990 | ||
887 | error = xfs_trans_commit(tp); | 991 | error = xfs_trans_commit(tp); |
@@ -1195,7 +1299,8 @@ xfs_reflink_remap_range( | |||
1195 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) | 1299 | !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) |
1196 | cowextsize = src->i_d.di_cowextsize; | 1300 | cowextsize = src->i_d.di_cowextsize; |
1197 | 1301 | ||
1198 | ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); | 1302 | ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, |
1303 | is_dedupe); | ||
1199 | 1304 | ||
1200 | out_unlock: | 1305 | out_unlock: |
1201 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); | 1306 | xfs_iunlock(src, XFS_MMAPLOCK_EXCL); |