aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c54
1 files changed, 54 insertions, 0 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6d650468d21a..1bbaace73383 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -722,12 +722,66 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
722 &ordered_sums, 0); 722 &ordered_sums, 0);
723 if (ret) 723 if (ret)
724 goto out; 724 goto out;
725 /*
726 * Now delete all existing cums in the csum root that
727 * cover our range. We do this because we can have an
728 * extent that is completely referenced by one file
729 * extent item and partially referenced by another
730 * file extent item (like after using the clone or
731 * extent_same ioctls). In this case if we end up doing
732 * the replay of the one that partially references the
733 * extent first, and we do not do the csum deletion
734 * below, we can get 2 csum items in the csum tree that
735 * overlap each other. For example, imagine our log has
736 * the two following file extent items:
737 *
738 * key (257 EXTENT_DATA 409600)
739 * extent data disk byte 12845056 nr 102400
740 * extent data offset 20480 nr 20480 ram 102400
741 *
742 * key (257 EXTENT_DATA 819200)
743 * extent data disk byte 12845056 nr 102400
744 * extent data offset 0 nr 102400 ram 102400
745 *
746 * Where the second one fully references the 100K extent
747 * that starts at disk byte 12845056, and the log tree
748 * has a single csum item that covers the entire range
749 * of the extent:
750 *
751 * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
752 *
753 * After the first file extent item is replayed, the
754 * csum tree gets the following csum item:
755 *
756 * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
757 *
758 * Which covers the 20K sub-range starting at offset 20K
759 * of our extent. Now when we replay the second file
760 * extent item, if we do not delete existing csum items
761 * that cover any of its blocks, we end up getting two
762 * csum items in our csum tree that overlap each other:
763 *
764 * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100
765 * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20
766 *
767 * Which is a problem, because after this anyone trying
768 * to lookup up for the checksum of any block of our
769 * extent starting at an offset of 40K or higher, will
770 * end up looking at the second csum item only, which
771 * does not contain the checksum for any block starting
772 * at offset 40K or higher of our extent.
773 */
725 while (!list_empty(&ordered_sums)) { 774 while (!list_empty(&ordered_sums)) {
726 struct btrfs_ordered_sum *sums; 775 struct btrfs_ordered_sum *sums;
727 sums = list_entry(ordered_sums.next, 776 sums = list_entry(ordered_sums.next,
728 struct btrfs_ordered_sum, 777 struct btrfs_ordered_sum,
729 list); 778 list);
730 if (!ret) 779 if (!ret)
780 ret = btrfs_del_csums(trans,
781 root->fs_info->csum_root,
782 sums->bytenr,
783 sums->len);
784 if (!ret)
731 ret = btrfs_csum_file_blocks(trans, 785 ret = btrfs_csum_file_blocks(trans,
732 root->fs_info->csum_root, 786 root->fs_info->csum_root,
733 sums); 787 sums);