diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6d650468d21a..1bbaace73383 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -722,12 +722,66 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
722 | &ordered_sums, 0); | 722 | &ordered_sums, 0); |
723 | if (ret) | 723 | if (ret) |
724 | goto out; | 724 | goto out; |
725 | /* | ||
726 | * Now delete all existing cums in the csum root that | ||
727 | * cover our range. We do this because we can have an | ||
728 | * extent that is completely referenced by one file | ||
729 | * extent item and partially referenced by another | ||
730 | * file extent item (like after using the clone or | ||
731 | * extent_same ioctls). In this case if we end up doing | ||
732 | * the replay of the one that partially references the | ||
733 | * extent first, and we do not do the csum deletion | ||
734 | * below, we can get 2 csum items in the csum tree that | ||
735 | * overlap each other. For example, imagine our log has | ||
736 | * the two following file extent items: | ||
737 | * | ||
738 | * key (257 EXTENT_DATA 409600) | ||
739 | * extent data disk byte 12845056 nr 102400 | ||
740 | * extent data offset 20480 nr 20480 ram 102400 | ||
741 | * | ||
742 | * key (257 EXTENT_DATA 819200) | ||
743 | * extent data disk byte 12845056 nr 102400 | ||
744 | * extent data offset 0 nr 102400 ram 102400 | ||
745 | * | ||
746 | * Where the second one fully references the 100K extent | ||
747 | * that starts at disk byte 12845056, and the log tree | ||
748 | * has a single csum item that covers the entire range | ||
749 | * of the extent: | ||
750 | * | ||
751 | * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 | ||
752 | * | ||
753 | * After the first file extent item is replayed, the | ||
754 | * csum tree gets the following csum item: | ||
755 | * | ||
756 | * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 | ||
757 | * | ||
758 | * Which covers the 20K sub-range starting at offset 20K | ||
759 | * of our extent. Now when we replay the second file | ||
760 | * extent item, if we do not delete existing csum items | ||
761 | * that cover any of its blocks, we end up getting two | ||
762 | * csum items in our csum tree that overlap each other: | ||
763 | * | ||
764 | * key (EXTENT_CSUM EXTENT_CSUM 12845056) itemsize 100 | ||
765 | * key (EXTENT_CSUM EXTENT_CSUM 12865536) itemsize 20 | ||
766 | * | ||
767 | * Which is a problem, because after this anyone trying | ||
768 | * to lookup up for the checksum of any block of our | ||
769 | * extent starting at an offset of 40K or higher, will | ||
770 | * end up looking at the second csum item only, which | ||
771 | * does not contain the checksum for any block starting | ||
772 | * at offset 40K or higher of our extent. | ||
773 | */ | ||
725 | while (!list_empty(&ordered_sums)) { | 774 | while (!list_empty(&ordered_sums)) { |
726 | struct btrfs_ordered_sum *sums; | 775 | struct btrfs_ordered_sum *sums; |
727 | sums = list_entry(ordered_sums.next, | 776 | sums = list_entry(ordered_sums.next, |
728 | struct btrfs_ordered_sum, | 777 | struct btrfs_ordered_sum, |
729 | list); | 778 | list); |
730 | if (!ret) | 779 | if (!ret) |
780 | ret = btrfs_del_csums(trans, | ||
781 | root->fs_info->csum_root, | ||
782 | sums->bytenr, | ||
783 | sums->len); | ||
784 | if (!ret) | ||
731 | ret = btrfs_csum_file_blocks(trans, | 785 | ret = btrfs_csum_file_blocks(trans, |
732 | root->fs_info->csum_root, | 786 | root->fs_info->csum_root, |
733 | sums); | 787 | sums); |