aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2012-09-06 06:01:21 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-01 15:19:10 -0400
commitb9a8cc5bef963b76c5b6c3016b7e91988a3e758b (patch)
treeee3d69f930908f0de9e2e90f2bfb450fffeba018 /fs
parent361048f586f59d414421c6486dd846063a0cac98 (diff)
Btrfs: fix file extent discount problem in the, snapshot
If a snapshot is created while we are writing some data into the file, the i_size of the corresponding file in the snapshot will be wrong, it will be beyond the end of the last file extent. And btrfsck will report: root 256 inode 257 errors 100 Steps to reproduce: # mkfs.btrfs <partition> # mount <partition> <mnt> # cd <mnt> # dd if=/dev/zero of=tmpfile bs=4M count=1024 & # for ((i=0; i<4; i++)) > do > btrfs sub snap . $i > done This because the algorithm of disk_i_size update is wrong. Though there are some ordered extents behind the current one which we use to update disk_i_size, it doesn't mean those extents will be dealt with in the same transaction. So We shouldn't use the offset of those extents to update disk_i_size. Or we will get the wrong i_size in the snapshot. We fix this problem by recording the max real i_size. If we find there is a ordered extent which is in front of the current one and doesn't complete, we will record the end of the current one into that ordered extent. Surely, if the current extent holds the end of other extent(it must be greater than the current one because it is behind the current one), we will record the number that the current extent holds. In this way, we can exclude the ordered extents that may not be dealth with in the same transaction, and be easy to know the real disk_i_size. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ordered-data.c62
-rw-r--r--fs/btrfs/ordered-data.h7
2 files changed, 25 insertions, 44 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 051c7fe551dd..cd8ecb73c05c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -775,7 +775,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
775 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 775 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
776 u64 disk_i_size; 776 u64 disk_i_size;
777 u64 new_i_size; 777 u64 new_i_size;
778 u64 i_size_test;
779 u64 i_size = i_size_read(inode); 778 u64 i_size = i_size_read(inode);
780 struct rb_node *node; 779 struct rb_node *node;
781 struct rb_node *prev = NULL; 780 struct rb_node *prev = NULL;
@@ -835,55 +834,30 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
835 break; 834 break;
836 if (test->file_offset >= i_size) 835 if (test->file_offset >= i_size)
837 break; 836 break;
838 if (test->file_offset >= disk_i_size) 837 if (test->file_offset >= disk_i_size) {
838 /*
839 * we don't update disk_i_size now, so record this
840 * undealt i_size. Or we will not know the real
841 * i_size.
842 */
843 if (test->outstanding_isize < offset)
844 test->outstanding_isize = offset;
845 if (ordered &&
846 ordered->outstanding_isize >
847 test->outstanding_isize)
848 test->outstanding_isize =
849 ordered->outstanding_isize;
839 goto out; 850 goto out;
840 }
841 new_i_size = min_t(u64, offset, i_size);
842
843 /*
844 * at this point, we know we can safely update i_size to at least
845 * the offset from this ordered extent. But, we need to
846 * walk forward and see if ios from higher up in the file have
847 * finished.
848 */
849 if (ordered) {
850 node = rb_next(&ordered->rb_node);
851 } else {
852 if (prev)
853 node = rb_next(prev);
854 else
855 node = rb_first(&tree->tree);
856 }
857
858 /*
859 * We are looking for an area between our current extent and the next
860 * ordered extent to update the i_size to. There are 3 cases here
861 *
862 * 1) We don't actually have anything and we can update to i_size.
863 * 2) We have stuff but they already did their i_size update so again we
864 * can just update to i_size.
865 * 3) We have an outstanding ordered extent so the most we can update
866 * our disk_i_size to is the start of the next offset.
867 */
868 i_size_test = i_size;
869 for (; node; node = rb_next(node)) {
870 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
871
872 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
873 continue;
874 if (test->file_offset > offset) {
875 i_size_test = test->file_offset;
876 break;
877 } 851 }
878 } 852 }
853 new_i_size = min_t(u64, offset, i_size);
879 854
880 /* 855 /*
881 * i_size_test is the end of a region after this ordered 856 * Some ordered extents may completed before the current one, and
882 * extent where there are no ordered extents, we can safely set 857 * we hold the real i_size in ->outstanding_isize.
883 * disk_i_size to this.
884 */ 858 */
885 if (i_size_test > offset) 859 if (ordered && ordered->outstanding_isize > new_i_size)
886 new_i_size = min_t(u64, i_size_test, i_size); 860 new_i_size = min_t(u64, ordered->outstanding_isize, i_size);
887 BTRFS_I(inode)->disk_i_size = new_i_size; 861 BTRFS_I(inode)->disk_i_size = new_i_size;
888 ret = 0; 862 ret = 0;
889out: 863out:
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index e03c560d2997..c2443a431ca5 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -96,6 +96,13 @@ struct btrfs_ordered_extent {
96 /* number of bytes that still need writing */ 96 /* number of bytes that still need writing */
97 u64 bytes_left; 97 u64 bytes_left;
98 98
99 /*
100 * the end of the ordered extent which is behind it but
101 * didn't update disk_i_size. Please see the comment of
102 * btrfs_ordered_update_i_size();
103 */
104 u64 outstanding_isize;
105
99 /* flags (described above) */ 106 /* flags (described above) */
100 unsigned long flags; 107 unsigned long flags;
101 108