aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_reflink.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r--fs/xfs/xfs_reflink.c265
1 files changed, 185 insertions, 80 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 07593a362cd0..da6d08fb359c 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -82,11 +82,22 @@
82 * mappings are a reservation against the free space in the filesystem; 82 * mappings are a reservation against the free space in the filesystem;
83 * adjacent mappings can also be combined into fewer larger mappings. 83 * adjacent mappings can also be combined into fewer larger mappings.
84 * 84 *
85 * As an optimization, the CoW extent size hint (cowextsz) creates
86 * outsized aligned delalloc reservations in the hope of landing out of
87 * order nearby CoW writes in a single extent on disk, thereby reducing
88 * fragmentation and improving future performance.
89 *
90 * D: --RRRRRRSSSRRRRRRRR--- (data fork)
91 * C: ------DDDDDDD--------- (CoW fork)
92 *
85 * When dirty pages are being written out (typically in writepage), the 93 * When dirty pages are being written out (typically in writepage), the
86 * delalloc reservations are converted into real mappings by allocating 94 * delalloc reservations are converted into unwritten mappings by
87 * blocks and replacing the delalloc mapping with real ones. A delalloc 95 * allocating blocks and replacing the delalloc mapping with real ones.
88 * mapping can be replaced by several real ones if the free space is 96 * A delalloc mapping can be replaced by several unwritten ones if the
89 * fragmented. 97 * free space is fragmented.
98 *
99 * D: --RRRRRRSSSRRRRRRRR---
100 * C: ------UUUUUUU---------
90 * 101 *
91 * We want to adapt the delalloc mechanism for copy-on-write, since the 102 * We want to adapt the delalloc mechanism for copy-on-write, since the
92 * write paths are similar. The first two steps (creating the reservation 103 * write paths are similar. The first two steps (creating the reservation
@@ -101,13 +112,29 @@
101 * Block-aligned directio writes will use the same mechanism as buffered 112 * Block-aligned directio writes will use the same mechanism as buffered
102 * writes. 113 * writes.
103 * 114 *
115 * Just prior to submitting the actual disk write requests, we convert
116 * the extents representing the range of the file actually being written
117 * (as opposed to extra pieces created for the cowextsize hint) to real
118 * extents. This will become important in the next step:
119 *
120 * D: --RRRRRRSSSRRRRRRRR---
121 * C: ------UUrrUUU---------
122 *
104 * CoW remapping must be done after the data block write completes, 123 * CoW remapping must be done after the data block write completes,
105 * because we don't want to destroy the old data fork map until we're sure 124 * because we don't want to destroy the old data fork map until we're sure
106 * the new block has been written. Since the new mappings are kept in a 125 * the new block has been written. Since the new mappings are kept in a
107 * separate fork, we can simply iterate these mappings to find the ones 126 * separate fork, we can simply iterate these mappings to find the ones
108 * that cover the file blocks that we just CoW'd. For each extent, simply 127 * that cover the file blocks that we just CoW'd. For each extent, simply
109 * unmap the corresponding range in the data fork, map the new range into 128 * unmap the corresponding range in the data fork, map the new range into
110 * the data fork, and remove the extent from the CoW fork. 129 * the data fork, and remove the extent from the CoW fork. Because of
130 * the presence of the cowextsize hint, however, we must be careful
131 * only to remap the blocks that we've actually written out -- we must
132 * never remap delalloc reservations nor CoW staging blocks that have
133 * yet to be written. This corresponds exactly to the real extents in
134 * the CoW fork:
135 *
136 * D: --RRRRRRrrSRRRRRRRR---
137 * C: ------UU--UUU---------
111 * 138 *
112 * Since the remapping operation can be applied to an arbitrary file 139 * Since the remapping operation can be applied to an arbitrary file
113 * range, we record the need for the remap step as a flag in the ioend 140 * range, we record the need for the remap step as a flag in the ioend
@@ -296,103 +323,165 @@ xfs_reflink_reserve_cow(
296 return 0; 323 return 0;
297} 324}
298 325
299/* Allocate all CoW reservations covering a range of blocks in a file. */ 326/* Convert part of an unwritten CoW extent to a real one. */
300static int 327STATIC int
301__xfs_reflink_allocate_cow( 328xfs_reflink_convert_cow_extent(
302 struct xfs_inode *ip, 329 struct xfs_inode *ip,
303 xfs_fileoff_t *offset_fsb, 330 struct xfs_bmbt_irec *imap,
304 xfs_fileoff_t end_fsb) 331 xfs_fileoff_t offset_fsb,
332 xfs_filblks_t count_fsb,
333 struct xfs_defer_ops *dfops)
305{ 334{
306 struct xfs_mount *mp = ip->i_mount; 335 xfs_fsblock_t first_block;
307 struct xfs_bmbt_irec imap; 336 int nimaps = 1;
308 struct xfs_defer_ops dfops;
309 struct xfs_trans *tp;
310 xfs_fsblock_t first_block;
311 int nimaps = 1, error;
312 bool shared;
313
314 xfs_defer_init(&dfops, &first_block);
315 337
316 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 338 if (imap->br_state == XFS_EXT_NORM)
317 XFS_TRANS_RESERVE, &tp); 339 return 0;
318 if (error)
319 return error;
320 340
321 xfs_ilock(ip, XFS_ILOCK_EXCL); 341 xfs_trim_extent(imap, offset_fsb, count_fsb);
342 trace_xfs_reflink_convert_cow(ip, imap);
343 if (imap->br_blockcount == 0)
344 return 0;
345 return xfs_bmapi_write(NULL, ip, imap->br_startoff, imap->br_blockcount,
346 XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
347 0, imap, &nimaps, dfops);
348}
322 349
323 /* Read extent from the source file. */ 350/* Convert all of the unwritten CoW extents in a file's range to real ones. */
324 nimaps = 1; 351int
325 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, 352xfs_reflink_convert_cow(
326 &imap, &nimaps, 0); 353 struct xfs_inode *ip,
327 if (error) 354 xfs_off_t offset,
328 goto out_unlock; 355 xfs_off_t count)
329 ASSERT(nimaps == 1); 356{
357 struct xfs_bmbt_irec got;
358 struct xfs_defer_ops dfops;
359 struct xfs_mount *mp = ip->i_mount;
360 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
361 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
362 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
363 xfs_extnum_t idx;
364 bool found;
365 int error = 0;
330 366
331 error = xfs_reflink_reserve_cow(ip, &imap, &shared); 367 xfs_ilock(ip, XFS_ILOCK_EXCL);
332 if (error)
333 goto out_trans_cancel;
334 368
335 if (!shared) { 369 /* Convert all the extents to real from unwritten. */
336 *offset_fsb = imap.br_startoff + imap.br_blockcount; 370 for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
337 goto out_trans_cancel; 371 found && got.br_startoff < end_fsb;
372 found = xfs_iext_get_extent(ifp, ++idx, &got)) {
373 error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
374 end_fsb - offset_fsb, &dfops);
375 if (error)
376 break;
338 } 377 }
339 378
340 xfs_trans_ijoin(tp, ip, 0); 379 /* Finish up. */
341 error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
342 XFS_BMAPI_COWFORK, &first_block,
343 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
344 &imap, &nimaps, &dfops);
345 if (error)
346 goto out_trans_cancel;
347
348 error = xfs_defer_finish(&tp, &dfops, NULL);
349 if (error)
350 goto out_trans_cancel;
351
352 error = xfs_trans_commit(tp);
353
354 *offset_fsb = imap.br_startoff + imap.br_blockcount;
355out_unlock:
356 xfs_iunlock(ip, XFS_ILOCK_EXCL); 380 xfs_iunlock(ip, XFS_ILOCK_EXCL);
357 return error; 381 return error;
358out_trans_cancel:
359 xfs_defer_cancel(&dfops);
360 xfs_trans_cancel(tp);
361 goto out_unlock;
362} 382}
363 383
364/* Allocate all CoW reservations covering a part of a file. */ 384/* Allocate all CoW reservations covering a range of blocks in a file. */
365int 385int
366xfs_reflink_allocate_cow_range( 386xfs_reflink_allocate_cow(
367 struct xfs_inode *ip, 387 struct xfs_inode *ip,
368 xfs_off_t offset, 388 struct xfs_bmbt_irec *imap,
369 xfs_off_t count) 389 bool *shared,
390 uint *lockmode)
370{ 391{
371 struct xfs_mount *mp = ip->i_mount; 392 struct xfs_mount *mp = ip->i_mount;
372 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 393 xfs_fileoff_t offset_fsb = imap->br_startoff;
373 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 394 xfs_filblks_t count_fsb = imap->br_blockcount;
374 int error; 395 struct xfs_bmbt_irec got;
396 struct xfs_defer_ops dfops;
397 struct xfs_trans *tp = NULL;
398 xfs_fsblock_t first_block;
399 int nimaps, error = 0;
400 bool trimmed;
401 xfs_filblks_t resaligned;
402 xfs_extlen_t resblks = 0;
403 xfs_extnum_t idx;
375 404
405retry:
376 ASSERT(xfs_is_reflink_inode(ip)); 406 ASSERT(xfs_is_reflink_inode(ip));
377 407 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
378 trace_xfs_reflink_allocate_cow_range(ip, offset, count);
379 408
380 /* 409 /*
381 * Make sure that the dquots are there. 410 * Even if the extent is not shared we might have a preallocation for
411 * it in the COW fork. If so use it.
382 */ 412 */
383 error = xfs_qm_dqattach(ip, 0); 413 if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
384 if (error) 414 got.br_startoff <= offset_fsb) {
385 return error; 415 *shared = true;
386 416
387 while (offset_fsb < end_fsb) { 417 /* If we have a real allocation in the COW fork we're done. */
388 error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb); 418 if (!isnullstartblock(got.br_startblock)) {
389 if (error) { 419 xfs_trim_extent(&got, offset_fsb, count_fsb);
390 trace_xfs_reflink_allocate_cow_range_error(ip, error, 420 *imap = got;
391 _RET_IP_); 421 goto convert;
392 break;
393 } 422 }
423
424 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
425 } else {
426 error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
427 if (error || !*shared)
428 goto out;
429 }
430
431 if (!tp) {
432 resaligned = xfs_aligned_fsb_count(imap->br_startoff,
433 imap->br_blockcount, xfs_get_cowextsz_hint(ip));
434 resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
435
436 xfs_iunlock(ip, *lockmode);
437 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
438 *lockmode = XFS_ILOCK_EXCL;
439 xfs_ilock(ip, *lockmode);
440
441 if (error)
442 return error;
443
444 error = xfs_qm_dqattach_locked(ip, 0);
445 if (error)
446 goto out;
447 goto retry;
394 } 448 }
395 449
450 error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
451 XFS_QMOPT_RES_REGBLKS);
452 if (error)
453 goto out;
454
455 xfs_trans_ijoin(tp, ip, 0);
456
457 xfs_defer_init(&dfops, &first_block);
458 nimaps = 1;
459
460 /* Allocate the entire reservation as unwritten blocks. */
461 error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
462 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
463 resblks, imap, &nimaps, &dfops);
464 if (error)
465 goto out_bmap_cancel;
466
467 /* Finish up. */
468 error = xfs_defer_finish(&tp, &dfops, NULL);
469 if (error)
470 goto out_bmap_cancel;
471
472 error = xfs_trans_commit(tp);
473 if (error)
474 return error;
475convert:
476 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
477 &dfops);
478out_bmap_cancel:
479 xfs_defer_cancel(&dfops);
480 xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
481 XFS_QMOPT_RES_REGBLKS);
482out:
483 if (tp)
484 xfs_trans_cancel(tp);
396 return error; 485 return error;
397} 486}
398 487
@@ -641,6 +730,16 @@ xfs_reflink_end_cow(
641 730
642 ASSERT(!isnullstartblock(got.br_startblock)); 731 ASSERT(!isnullstartblock(got.br_startblock));
643 732
733 /*
734 * Don't remap unwritten extents; these are
735 * speculatively preallocated CoW extents that have been
736 * allocated but have not yet been involved in a write.
737 */
738 if (got.br_state == XFS_EXT_UNWRITTEN) {
739 idx--;
740 goto next_extent;
741 }
742
644 /* Unmap the old blocks in the data fork. */ 743 /* Unmap the old blocks in the data fork. */
645 xfs_defer_init(&dfops, &firstfsb); 744 xfs_defer_init(&dfops, &firstfsb);
646 rlen = del.br_blockcount; 745 rlen = del.br_blockcount;
@@ -855,13 +954,14 @@ STATIC int
855xfs_reflink_update_dest( 954xfs_reflink_update_dest(
856 struct xfs_inode *dest, 955 struct xfs_inode *dest,
857 xfs_off_t newlen, 956 xfs_off_t newlen,
858 xfs_extlen_t cowextsize) 957 xfs_extlen_t cowextsize,
958 bool is_dedupe)
859{ 959{
860 struct xfs_mount *mp = dest->i_mount; 960 struct xfs_mount *mp = dest->i_mount;
861 struct xfs_trans *tp; 961 struct xfs_trans *tp;
862 int error; 962 int error;
863 963
864 if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) 964 if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
865 return 0; 965 return 0;
866 966
867 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); 967 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -882,6 +982,10 @@ xfs_reflink_update_dest(
882 dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; 982 dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
883 } 983 }
884 984
985 if (!is_dedupe) {
986 xfs_trans_ichgtime(tp, dest,
987 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
988 }
885 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); 989 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
886 990
887 error = xfs_trans_commit(tp); 991 error = xfs_trans_commit(tp);
@@ -1195,7 +1299,8 @@ xfs_reflink_remap_range(
1195 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1299 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
1196 cowextsize = src->i_d.di_cowextsize; 1300 cowextsize = src->i_d.di_cowextsize;
1197 1301
1198 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); 1302 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
1303 is_dedupe);
1199 1304
1200out_unlock: 1305out_unlock:
1201 xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1306 xfs_iunlock(src, XFS_MMAPLOCK_EXCL);