aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_reflink.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_reflink.c')
-rw-r--r--fs/xfs/xfs_reflink.c499
1 files changed, 272 insertions, 227 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 5965e9455d91..a279b4e7f5fe 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
182 if (!xfs_is_reflink_inode(ip) || 182 if (!xfs_is_reflink_inode(ip) ||
183 ISUNWRITTEN(irec) || 183 ISUNWRITTEN(irec) ||
184 irec->br_startblock == HOLESTARTBLOCK || 184 irec->br_startblock == HOLESTARTBLOCK ||
185 irec->br_startblock == DELAYSTARTBLOCK) { 185 irec->br_startblock == DELAYSTARTBLOCK ||
186 isnullstartblock(irec->br_startblock)) {
186 *shared = false; 187 *shared = false;
187 return 0; 188 return 0;
188 } 189 }
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
227 } 228 }
228} 229}
229 230
230/* Create a CoW reservation for a range of blocks within a file. */ 231/*
231static int 232 * Trim the passed in imap to the next shared/unshared extent boundary, and
232__xfs_reflink_reserve_cow( 233 * if imap->br_startoff points to a shared extent reserve space for it in the
234 * COW fork. In this case *shared is set to true, else to false.
235 *
236 * Note that imap will always contain the block numbers for the existing blocks
237 * in the data fork, as the upper layers need them for read-modify-write
238 * operations.
239 */
240int
241xfs_reflink_reserve_cow(
233 struct xfs_inode *ip, 242 struct xfs_inode *ip,
234 xfs_fileoff_t *offset_fsb, 243 struct xfs_bmbt_irec *imap,
235 xfs_fileoff_t end_fsb, 244 bool *shared)
236 bool *skipped)
237{ 245{
238 struct xfs_bmbt_irec got, prev, imap; 246 struct xfs_bmbt_irec got, prev;
239 xfs_fileoff_t orig_end_fsb; 247 xfs_fileoff_t end_fsb, orig_end_fsb;
240 int nimaps, eof = 0, error = 0; 248 int eof = 0, error = 0;
241 bool shared = false, trimmed = false; 249 bool trimmed;
242 xfs_extnum_t idx; 250 xfs_extnum_t idx;
243 xfs_extlen_t align; 251 xfs_extlen_t align;
244 252
245 /* Already reserved? Skip the refcount btree access. */ 253 /*
246 xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, 254 * Search the COW fork extent list first. This serves two purposes:
255 * first this implement the speculative preallocation using cowextisze,
256 * so that we also unshared block adjacent to shared blocks instead
257 * of just the shared blocks themselves. Second the lookup in the
258 * extent list is generally faster than going out to the shared extent
259 * tree.
260 */
261 xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
247 &got, &prev); 262 &got, &prev);
248 if (!eof && got.br_startoff <= *offset_fsb) { 263 if (!eof && got.br_startoff <= imap->br_startoff) {
249 end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; 264 trace_xfs_reflink_cow_found(ip, imap);
250 trace_xfs_reflink_cow_found(ip, &got); 265 xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
251 goto done;
252 }
253 266
254 /* Read extent from the source file. */ 267 *shared = true;
255 nimaps = 1; 268 return 0;
256 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, 269 }
257 &imap, &nimaps, 0);
258 if (error)
259 goto out_unlock;
260 ASSERT(nimaps == 1);
261 270
262 /* Trim the mapping to the nearest shared extent boundary. */ 271 /* Trim the mapping to the nearest shared extent boundary. */
263 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); 272 error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
264 if (error) 273 if (error)
265 goto out_unlock; 274 return error;
266
267 end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
268 275
269 /* Not shared? Just report the (potentially capped) extent. */ 276 /* Not shared? Just report the (potentially capped) extent. */
270 if (!shared) { 277 if (!*shared)
271 *skipped = true; 278 return 0;
272 goto done;
273 }
274 279
275 /* 280 /*
276 * Fork all the shared blocks from our write offset until the end of 281 * Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
278 */ 283 */
279 error = xfs_qm_dqattach_locked(ip, 0); 284 error = xfs_qm_dqattach_locked(ip, 0);
280 if (error) 285 if (error)
281 goto out_unlock; 286 return error;
287
288 end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
282 289
283 align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); 290 align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
284 if (align) 291 if (align)
285 end_fsb = roundup_64(end_fsb, align); 292 end_fsb = roundup_64(end_fsb, align);
286 293
287retry: 294retry:
288 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, 295 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
289 end_fsb - *offset_fsb, &got, 296 end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
290 &prev, &idx, eof);
291 switch (error) { 297 switch (error) {
292 case 0: 298 case 0:
293 break; 299 break;
294 case -ENOSPC: 300 case -ENOSPC:
295 case -EDQUOT: 301 case -EDQUOT:
296 /* retry without any preallocation */ 302 /* retry without any preallocation */
297 trace_xfs_reflink_cow_enospc(ip, &imap); 303 trace_xfs_reflink_cow_enospc(ip, imap);
298 if (end_fsb != orig_end_fsb) { 304 if (end_fsb != orig_end_fsb) {
299 end_fsb = orig_end_fsb; 305 end_fsb = orig_end_fsb;
300 goto retry; 306 goto retry;
301 } 307 }
302 /*FALLTHRU*/ 308 /*FALLTHRU*/
303 default: 309 default:
304 goto out_unlock; 310 return error;
305 } 311 }
306 312
307 if (end_fsb != orig_end_fsb) 313 if (end_fsb != orig_end_fsb)
308 xfs_inode_set_cowblocks_tag(ip); 314 xfs_inode_set_cowblocks_tag(ip);
309 315
310 trace_xfs_reflink_cow_alloc(ip, &got); 316 trace_xfs_reflink_cow_alloc(ip, &got);
311done: 317 return 0;
312 *offset_fsb = end_fsb;
313out_unlock:
314 return error;
315}
316
317/* Create a CoW reservation for part of a file. */
318int
319xfs_reflink_reserve_cow_range(
320 struct xfs_inode *ip,
321 xfs_off_t offset,
322 xfs_off_t count)
323{
324 struct xfs_mount *mp = ip->i_mount;
325 xfs_fileoff_t offset_fsb, end_fsb;
326 bool skipped = false;
327 int error;
328
329 trace_xfs_reflink_reserve_cow_range(ip, offset, count);
330
331 offset_fsb = XFS_B_TO_FSBT(mp, offset);
332 end_fsb = XFS_B_TO_FSB(mp, offset + count);
333
334 xfs_ilock(ip, XFS_ILOCK_EXCL);
335 while (offset_fsb < end_fsb) {
336 error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
337 &skipped);
338 if (error) {
339 trace_xfs_reflink_reserve_cow_range_error(ip, error,
340 _RET_IP_);
341 break;
342 }
343 }
344 xfs_iunlock(ip, XFS_ILOCK_EXCL);
345
346 return error;
347} 318}
348 319
349/* Allocate all CoW reservations covering a range of blocks in a file. */ 320/* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
358 struct xfs_defer_ops dfops; 329 struct xfs_defer_ops dfops;
359 struct xfs_trans *tp; 330 struct xfs_trans *tp;
360 xfs_fsblock_t first_block; 331 xfs_fsblock_t first_block;
361 xfs_fileoff_t next_fsb;
362 int nimaps = 1, error; 332 int nimaps = 1, error;
363 bool skipped = false; 333 bool shared;
364 334
365 xfs_defer_init(&dfops, &first_block); 335 xfs_defer_init(&dfops, &first_block);
366 336
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
371 341
372 xfs_ilock(ip, XFS_ILOCK_EXCL); 342 xfs_ilock(ip, XFS_ILOCK_EXCL);
373 343
374 next_fsb = *offset_fsb; 344 /* Read extent from the source file. */
375 error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); 345 nimaps = 1;
346 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
347 &imap, &nimaps, 0);
348 if (error)
349 goto out_unlock;
350 ASSERT(nimaps == 1);
351
352 error = xfs_reflink_reserve_cow(ip, &imap, &shared);
376 if (error) 353 if (error)
377 goto out_trans_cancel; 354 goto out_trans_cancel;
378 355
379 if (skipped) { 356 if (!shared) {
380 *offset_fsb = next_fsb; 357 *offset_fsb = imap.br_startoff + imap.br_blockcount;
381 goto out_trans_cancel; 358 goto out_trans_cancel;
382 } 359 }
383 360
384 xfs_trans_ijoin(tp, ip, 0); 361 xfs_trans_ijoin(tp, ip, 0);
385 error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, 362 error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
386 XFS_BMAPI_COWFORK, &first_block, 363 XFS_BMAPI_COWFORK, &first_block,
387 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 364 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
388 &imap, &nimaps, &dfops); 365 &imap, &nimaps, &dfops);
389 if (error) 366 if (error)
390 goto out_trans_cancel; 367 goto out_trans_cancel;
391 368
392 /* We might not have been able to map the whole delalloc extent */
393 *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
394
395 error = xfs_defer_finish(&tp, &dfops, NULL); 369 error = xfs_defer_finish(&tp, &dfops, NULL);
396 if (error) 370 if (error)
397 goto out_trans_cancel; 371 goto out_trans_cancel;
398 372
399 error = xfs_trans_commit(tp); 373 error = xfs_trans_commit(tp);
400 374
375 *offset_fsb = imap.br_startoff + imap.br_blockcount;
401out_unlock: 376out_unlock:
402 xfs_iunlock(ip, XFS_ILOCK_EXCL); 377 xfs_iunlock(ip, XFS_ILOCK_EXCL);
403 return error; 378 return error;
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
536 xfs_fileoff_t offset_fsb, 511 xfs_fileoff_t offset_fsb,
537 xfs_fileoff_t end_fsb) 512 xfs_fileoff_t end_fsb)
538{ 513{
539 struct xfs_bmbt_irec irec; 514 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
540 xfs_filblks_t count_fsb; 515 struct xfs_bmbt_irec got, prev, del;
516 xfs_extnum_t idx;
541 xfs_fsblock_t firstfsb; 517 xfs_fsblock_t firstfsb;
542 struct xfs_defer_ops dfops; 518 struct xfs_defer_ops dfops;
543 int error = 0; 519 int error = 0, eof = 0;
544 int nimaps;
545 520
546 if (!xfs_is_reflink_inode(ip)) 521 if (!xfs_is_reflink_inode(ip))
547 return 0; 522 return 0;
548 523
549 /* Go find the old extent in the CoW fork. */ 524 xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
550 while (offset_fsb < end_fsb) { 525 &got, &prev);
551 nimaps = 1; 526 if (eof)
552 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); 527 return 0;
553 error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
554 &nimaps, XFS_BMAPI_COWFORK);
555 if (error)
556 break;
557 ASSERT(nimaps == 1);
558
559 trace_xfs_reflink_cancel_cow(ip, &irec);
560 528
561 if (irec.br_startblock == DELAYSTARTBLOCK) { 529 while (got.br_startoff < end_fsb) {
562 /* Free a delayed allocation. */ 530 del = got;
563 xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, 531 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
564 false); 532 trace_xfs_reflink_cancel_cow(ip, &del);
565 ip->i_delayed_blks -= irec.br_blockcount;
566 533
567 /* Remove the mapping from the CoW fork. */ 534 if (isnullstartblock(del.br_startblock)) {
568 error = xfs_bunmapi_cow(ip, &irec); 535 error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
536 &idx, &got, &del);
569 if (error) 537 if (error)
570 break; 538 break;
571 } else if (irec.br_startblock == HOLESTARTBLOCK) {
572 /* empty */
573 } else { 539 } else {
574 xfs_trans_ijoin(*tpp, ip, 0); 540 xfs_trans_ijoin(*tpp, ip, 0);
575 xfs_defer_init(&dfops, &firstfsb); 541 xfs_defer_init(&dfops, &firstfsb);
576 542
577 /* Free the CoW orphan record. */ 543 /* Free the CoW orphan record. */
578 error = xfs_refcount_free_cow_extent(ip->i_mount, 544 error = xfs_refcount_free_cow_extent(ip->i_mount,
579 &dfops, irec.br_startblock, 545 &dfops, del.br_startblock,
580 irec.br_blockcount); 546 del.br_blockcount);
581 if (error) 547 if (error)
582 break; 548 break;
583 549
584 xfs_bmap_add_free(ip->i_mount, &dfops, 550 xfs_bmap_add_free(ip->i_mount, &dfops,
585 irec.br_startblock, irec.br_blockcount, 551 del.br_startblock, del.br_blockcount,
586 NULL); 552 NULL);
587 553
588 /* Update quota accounting */ 554 /* Update quota accounting */
589 xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, 555 xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
590 -(long)irec.br_blockcount); 556 -(long)del.br_blockcount);
591 557
592 /* Roll the transaction */ 558 /* Roll the transaction */
593 error = xfs_defer_finish(tpp, &dfops, ip); 559 error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
597 } 563 }
598 564
599 /* Remove the mapping from the CoW fork. */ 565 /* Remove the mapping from the CoW fork. */
600 error = xfs_bunmapi_cow(ip, &irec); 566 xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
601 if (error)
602 break;
603 } 567 }
604 568
605 /* Roll on... */ 569 if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
606 offset_fsb = irec.br_startoff + irec.br_blockcount; 570 break;
571 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
607 } 572 }
608 573
574 /* clear tag if cow fork is emptied */
575 if (!ifp->if_bytes)
576 xfs_inode_clear_cowblocks_tag(ip);
577
609 return error; 578 return error;
610} 579}
611 580
@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
668 xfs_off_t offset, 637 xfs_off_t offset,
669 xfs_off_t count) 638 xfs_off_t count)
670{ 639{
671 struct xfs_bmbt_irec irec; 640 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
672 struct xfs_bmbt_irec uirec; 641 struct xfs_bmbt_irec got, prev, del;
673 struct xfs_trans *tp; 642 struct xfs_trans *tp;
674 xfs_fileoff_t offset_fsb; 643 xfs_fileoff_t offset_fsb;
675 xfs_fileoff_t end_fsb; 644 xfs_fileoff_t end_fsb;
676 xfs_filblks_t count_fsb;
677 xfs_fsblock_t firstfsb; 645 xfs_fsblock_t firstfsb;
678 struct xfs_defer_ops dfops; 646 struct xfs_defer_ops dfops;
679 int error; 647 int error, eof = 0;
680 unsigned int resblks; 648 unsigned int resblks;
681 xfs_filblks_t ilen;
682 xfs_filblks_t rlen; 649 xfs_filblks_t rlen;
683 int nimaps; 650 xfs_extnum_t idx;
684 651
685 trace_xfs_reflink_end_cow(ip, offset, count); 652 trace_xfs_reflink_end_cow(ip, offset, count);
686 653
654 /* No COW extents? That's easy! */
655 if (ifp->if_bytes == 0)
656 return 0;
657
687 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 658 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
688 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); 659 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
689 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
690 660
691 /* Start a rolling transaction to switch the mappings */ 661 /* Start a rolling transaction to switch the mappings */
692 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); 662 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
698 xfs_ilock(ip, XFS_ILOCK_EXCL); 668 xfs_ilock(ip, XFS_ILOCK_EXCL);
699 xfs_trans_ijoin(tp, ip, 0); 669 xfs_trans_ijoin(tp, ip, 0);
700 670
701 /* Go find the old extent in the CoW fork. */ 671 xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
702 while (offset_fsb < end_fsb) { 672 &got, &prev);
703 /* Read extent from the source file */
704 nimaps = 1;
705 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
706 error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
707 &nimaps, XFS_BMAPI_COWFORK);
708 if (error)
709 goto out_cancel;
710 ASSERT(nimaps == 1);
711 673
712 ASSERT(irec.br_startblock != DELAYSTARTBLOCK); 674 /* If there is a hole at end_fsb - 1 go to the previous extent */
713 trace_xfs_reflink_cow_remap(ip, &irec); 675 if (eof || got.br_startoff > end_fsb) {
676 ASSERT(idx > 0);
677 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
678 }
714 679
715 /* 680 /* Walk backwards until we're out of the I/O range... */
716 * We can have a hole in the CoW fork if part of a directio 681 while (got.br_startoff + got.br_blockcount > offset_fsb) {
717 * write is CoW but part of it isn't. 682 del = got;
718 */ 683 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
719 rlen = ilen = irec.br_blockcount; 684
720 if (irec.br_startblock == HOLESTARTBLOCK) 685 /* Extent delete may have bumped idx forward */
686 if (!del.br_blockcount) {
687 idx--;
721 goto next_extent; 688 goto next_extent;
689 }
690
691 ASSERT(!isnullstartblock(got.br_startblock));
722 692
723 /* Unmap the old blocks in the data fork. */ 693 /* Unmap the old blocks in the data fork. */
724 while (rlen) { 694 xfs_defer_init(&dfops, &firstfsb);
725 xfs_defer_init(&dfops, &firstfsb); 695 rlen = del.br_blockcount;
726 error = __xfs_bunmapi(tp, ip, irec.br_startoff, 696 error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
727 &rlen, 0, 1, &firstfsb, &dfops); 697 &firstfsb, &dfops);
728 if (error) 698 if (error)
729 goto out_defer; 699 goto out_defer;
730
731 /*
732 * Trim the extent to whatever got unmapped.
733 * Remember, bunmapi works backwards.
734 */
735 uirec.br_startblock = irec.br_startblock + rlen;
736 uirec.br_startoff = irec.br_startoff + rlen;
737 uirec.br_blockcount = irec.br_blockcount - rlen;
738 irec.br_blockcount = rlen;
739 trace_xfs_reflink_cow_remap_piece(ip, &uirec);
740 700
741 /* Free the CoW orphan record. */ 701 /* Trim the extent to whatever got unmapped. */
742 error = xfs_refcount_free_cow_extent(tp->t_mountp, 702 if (rlen) {
743 &dfops, uirec.br_startblock, 703 xfs_trim_extent(&del, del.br_startoff + rlen,
744 uirec.br_blockcount); 704 del.br_blockcount - rlen);
745 if (error) 705 }
746 goto out_defer; 706 trace_xfs_reflink_cow_remap(ip, &del);
747 707
748 /* Map the new blocks into the data fork. */ 708 /* Free the CoW orphan record. */
749 error = xfs_bmap_map_extent(tp->t_mountp, &dfops, 709 error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
750 ip, &uirec); 710 del.br_startblock, del.br_blockcount);
751 if (error) 711 if (error)
752 goto out_defer; 712 goto out_defer;
753 713
754 /* Remove the mapping from the CoW fork. */ 714 /* Map the new blocks into the data fork. */
755 error = xfs_bunmapi_cow(ip, &uirec); 715 error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
756 if (error) 716 if (error)
757 goto out_defer; 717 goto out_defer;
758 718
759 error = xfs_defer_finish(&tp, &dfops, ip); 719 /* Remove the mapping from the CoW fork. */
760 if (error) 720 xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
761 goto out_defer; 721
762 } 722 error = xfs_defer_finish(&tp, &dfops, ip);
723 if (error)
724 goto out_defer;
763 725
764next_extent: 726next_extent:
765 /* Roll on... */ 727 if (idx < 0)
766 offset_fsb = irec.br_startoff + ilen; 728 break;
729 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
767 } 730 }
768 731
769 error = xfs_trans_commit(tp); 732 error = xfs_trans_commit(tp);
@@ -774,7 +737,6 @@ next_extent:
774 737
775out_defer: 738out_defer:
776 xfs_defer_cancel(&dfops); 739 xfs_defer_cancel(&dfops);
777out_cancel:
778 xfs_trans_cancel(tp); 740 xfs_trans_cancel(tp);
779 xfs_iunlock(ip, XFS_ILOCK_EXCL); 741 xfs_iunlock(ip, XFS_ILOCK_EXCL);
780out: 742out:
@@ -1312,19 +1274,26 @@ out_error:
1312 */ 1274 */
1313int 1275int
1314xfs_reflink_remap_range( 1276xfs_reflink_remap_range(
1315 struct xfs_inode *src, 1277 struct file *file_in,
1316 xfs_off_t srcoff, 1278 loff_t pos_in,
1317 struct xfs_inode *dest, 1279 struct file *file_out,
1318 xfs_off_t destoff, 1280 loff_t pos_out,
1319 xfs_off_t len, 1281 u64 len,
1320 unsigned int flags) 1282 bool is_dedupe)
1321{ 1283{
1284 struct inode *inode_in = file_inode(file_in);
1285 struct xfs_inode *src = XFS_I(inode_in);
1286 struct inode *inode_out = file_inode(file_out);
1287 struct xfs_inode *dest = XFS_I(inode_out);
1322 struct xfs_mount *mp = src->i_mount; 1288 struct xfs_mount *mp = src->i_mount;
1289 loff_t bs = inode_out->i_sb->s_blocksize;
1290 bool same_inode = (inode_in == inode_out);
1323 xfs_fileoff_t sfsbno, dfsbno; 1291 xfs_fileoff_t sfsbno, dfsbno;
1324 xfs_filblks_t fsblen; 1292 xfs_filblks_t fsblen;
1325 int error;
1326 xfs_extlen_t cowextsize; 1293 xfs_extlen_t cowextsize;
1327 bool is_same; 1294 loff_t isize;
1295 ssize_t ret;
1296 loff_t blen;
1328 1297
1329 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1298 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1330 return -EOPNOTSUPP; 1299 return -EOPNOTSUPP;
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
1332 if (XFS_FORCED_SHUTDOWN(mp)) 1301 if (XFS_FORCED_SHUTDOWN(mp))
1333 return -EIO; 1302 return -EIO;
1334 1303
1335 /* Don't reflink realtime inodes */
1336 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
1337 return -EINVAL;
1338
1339 if (flags & ~XFS_REFLINK_ALL)
1340 return -EINVAL;
1341
1342 trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
1343
1344 /* Lock both files against IO */ 1304 /* Lock both files against IO */
1345 if (src->i_ino == dest->i_ino) { 1305 if (same_inode) {
1346 xfs_ilock(src, XFS_IOLOCK_EXCL); 1306 xfs_ilock(src, XFS_IOLOCK_EXCL);
1347 xfs_ilock(src, XFS_MMAPLOCK_EXCL); 1307 xfs_ilock(src, XFS_MMAPLOCK_EXCL);
1348 } else { 1308 } else {
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
1350 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); 1310 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
1351 } 1311 }
1352 1312
1313 /* Don't touch certain kinds of inodes */
1314 ret = -EPERM;
1315 if (IS_IMMUTABLE(inode_out))
1316 goto out_unlock;
1317
1318 ret = -ETXTBSY;
1319 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
1320 goto out_unlock;
1321
1322
1323 /* Don't reflink dirs, pipes, sockets... */
1324 ret = -EISDIR;
1325 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
1326 goto out_unlock;
1327 ret = -EINVAL;
1328 if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
1329 goto out_unlock;
1330 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
1331 goto out_unlock;
1332
1333 /* Don't reflink realtime inodes */
1334 if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
1335 goto out_unlock;
1336
1337 /* Don't share DAX file data for now. */
1338 if (IS_DAX(inode_in) || IS_DAX(inode_out))
1339 goto out_unlock;
1340
1341 /* Are we going all the way to the end? */
1342 isize = i_size_read(inode_in);
1343 if (isize == 0) {
1344 ret = 0;
1345 goto out_unlock;
1346 }
1347
1348 if (len == 0)
1349 len = isize - pos_in;
1350
1351 /* Ensure offsets don't wrap and the input is inside i_size */
1352 if (pos_in + len < pos_in || pos_out + len < pos_out ||
1353 pos_in + len > isize)
1354 goto out_unlock;
1355
1356 /* Don't allow dedupe past EOF in the dest file */
1357 if (is_dedupe) {
1358 loff_t disize;
1359
1360 disize = i_size_read(inode_out);
1361 if (pos_out >= disize || pos_out + len > disize)
1362 goto out_unlock;
1363 }
1364
1365 /* If we're linking to EOF, continue to the block boundary. */
1366 if (pos_in + len == isize)
1367 blen = ALIGN(isize, bs) - pos_in;
1368 else
1369 blen = len;
1370
1371 /* Only reflink if we're aligned to block boundaries */
1372 if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
1373 !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
1374 goto out_unlock;
1375
1376 /* Don't allow overlapped reflink within the same file */
1377 if (same_inode) {
1378 if (pos_out + blen > pos_in && pos_out < pos_in + blen)
1379 goto out_unlock;
1380 }
1381
1382 /* Wait for the completion of any pending IOs on both files */
1383 inode_dio_wait(inode_in);
1384 if (!same_inode)
1385 inode_dio_wait(inode_out);
1386
1387 ret = filemap_write_and_wait_range(inode_in->i_mapping,
1388 pos_in, pos_in + len - 1);
1389 if (ret)
1390 goto out_unlock;
1391
1392 ret = filemap_write_and_wait_range(inode_out->i_mapping,
1393 pos_out, pos_out + len - 1);
1394 if (ret)
1395 goto out_unlock;
1396
1397 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1398
1353 /* 1399 /*
1354 * Check that the extents are the same. 1400 * Check that the extents are the same.
1355 */ 1401 */
1356 if (flags & XFS_REFLINK_DEDUPE) { 1402 if (is_dedupe) {
1357 is_same = false; 1403 bool is_same = false;
1358 error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), 1404
1359 destoff, len, &is_same); 1405 ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
1360 if (error) 1406 len, &is_same);
1361 goto out_error; 1407 if (ret)
1408 goto out_unlock;
1362 if (!is_same) { 1409 if (!is_same) {
1363 error = -EBADE; 1410 ret = -EBADE;
1364 goto out_error; 1411 goto out_unlock;
1365 } 1412 }
1366 } 1413 }
1367 1414
1368 error = xfs_reflink_set_inode_flag(src, dest); 1415 ret = xfs_reflink_set_inode_flag(src, dest);
1369 if (error) 1416 if (ret)
1370 goto out_error; 1417 goto out_unlock;
1371 1418
1372 /* 1419 /*
1373 * Invalidate the page cache so that we can clear any CoW mappings 1420 * Invalidate the page cache so that we can clear any CoW mappings
1374 * in the destination file. 1421 * in the destination file.
1375 */ 1422 */
1376 truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, 1423 truncate_inode_pages_range(&inode_out->i_data, pos_out,
1377 PAGE_ALIGN(destoff + len) - 1); 1424 PAGE_ALIGN(pos_out + len) - 1);
1378 1425
1379 dfsbno = XFS_B_TO_FSBT(mp, destoff); 1426 dfsbno = XFS_B_TO_FSBT(mp, pos_out);
1380 sfsbno = XFS_B_TO_FSBT(mp, srcoff); 1427 sfsbno = XFS_B_TO_FSBT(mp, pos_in);
1381 fsblen = XFS_B_TO_FSB(mp, len); 1428 fsblen = XFS_B_TO_FSB(mp, len);
1382 error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, 1429 ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
1383 destoff + len); 1430 pos_out + len);
1384 if (error) 1431 if (ret)
1385 goto out_error; 1432 goto out_unlock;
1386 1433
1387 /* 1434 /*
1388 * Carry the cowextsize hint from src to dest if we're sharing the 1435 * Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
1390 * has a cowextsize hint, and the destination file does not. 1437 * has a cowextsize hint, and the destination file does not.
1391 */ 1438 */
1392 cowextsize = 0; 1439 cowextsize = 0;
1393 if (srcoff == 0 && len == i_size_read(VFS_I(src)) && 1440 if (pos_in == 0 && len == i_size_read(inode_in) &&
1394 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 1441 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1395 destoff == 0 && len >= i_size_read(VFS_I(dest)) && 1442 pos_out == 0 && len >= i_size_read(inode_out) &&
1396 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1443 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
1397 cowextsize = src->i_d.di_cowextsize; 1444 cowextsize = src->i_d.di_cowextsize;
1398 1445
1399 error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); 1446 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
1400 if (error)
1401 goto out_error;
1402 1447
1403out_error: 1448out_unlock:
1404 xfs_iunlock(src, XFS_MMAPLOCK_EXCL); 1449 xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
1405 xfs_iunlock(src, XFS_IOLOCK_EXCL); 1450 xfs_iunlock(src, XFS_IOLOCK_EXCL);
1406 if (src->i_ino != dest->i_ino) { 1451 if (src->i_ino != dest->i_ino) {
1407 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); 1452 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
1408 xfs_iunlock(dest, XFS_IOLOCK_EXCL); 1453 xfs_iunlock(dest, XFS_IOLOCK_EXCL);
1409 } 1454 }
1410 if (error) 1455 if (ret)
1411 trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); 1456 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1412 return error; 1457 return ret;
1413} 1458}
1414 1459
1415/* 1460/*