diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2008-12-12 10:03:38 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-12-12 10:03:38 -0500 |
commit | 17d217fe970d34720f4f1633dca73a6aa2f3d9d1 (patch) | |
tree | 4e2e716400cc45a6697475629f4c046b96ff76e7 /fs/btrfs/inode.c | |
parent | e4404d6e8da678d852b7f767f665f8edf76c9e9f (diff) |
Btrfs: fix nodatasum handling in balancing code
Checksums on data can be disabled by mount option, so it's
possible some data extents don't have checksums or have
invalid checksums. This causes trouble for data relocation.
This patch contains following things to make data relocation
work.
1) make nodatasum/nodatacow mount option only affects new
files. Checksums and COW on data are only controlled by the
inode flags.
2) check the existence of checksum in the nodatacow checker.
If checksums exist, force COW the data extent. This ensure that
checksum for a given block is either valid or does not exist.
3) update data relocation code to properly handle the case
of checksum missing.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 74 |
1 files changed, 61 insertions, 13 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a28b7706314..e64a4fe19a60 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -771,6 +771,13 @@ static noinline int cow_file_range(struct inode *inode, | |||
771 | ram_size, cur_alloc_size, 0); | 771 | ram_size, cur_alloc_size, 0); |
772 | BUG_ON(ret); | 772 | BUG_ON(ret); |
773 | 773 | ||
774 | if (root->root_key.objectid == | ||
775 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
776 | ret = btrfs_reloc_clone_csums(inode, start, | ||
777 | cur_alloc_size); | ||
778 | BUG_ON(ret); | ||
779 | } | ||
780 | |||
774 | if (disk_num_bytes < cur_alloc_size) { | 781 | if (disk_num_bytes < cur_alloc_size) { |
775 | printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, | 782 | printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, |
776 | cur_alloc_size); | 783 | cur_alloc_size); |
@@ -910,6 +917,26 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
910 | return 0; | 917 | return 0; |
911 | } | 918 | } |
912 | 919 | ||
920 | static int noinline csum_exist_in_range(struct btrfs_root *root, | ||
921 | u64 bytenr, u64 num_bytes) | ||
922 | { | ||
923 | int ret; | ||
924 | struct btrfs_ordered_sum *sums; | ||
925 | LIST_HEAD(list); | ||
926 | |||
927 | ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1, | ||
928 | &list); | ||
929 | if (ret == 0 && list_empty(&list)) | ||
930 | return 0; | ||
931 | |||
932 | while (!list_empty(&list)) { | ||
933 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
934 | list_del(&sums->list); | ||
935 | kfree(sums); | ||
936 | } | ||
937 | return 1; | ||
938 | } | ||
939 | |||
913 | /* | 940 | /* |
914 | * when nowcow writeback call back. This checks for snapshots or COW copies | 941 | * when nowcow writeback call back. This checks for snapshots or COW copies |
915 | * of the extents that exist in the file, and COWs the file as required. | 942 | * of the extents that exist in the file, and COWs the file as required. |
@@ -971,6 +998,7 @@ next_slot: | |||
971 | 998 | ||
972 | nocow = 0; | 999 | nocow = 0; |
973 | disk_bytenr = 0; | 1000 | disk_bytenr = 0; |
1001 | num_bytes = 0; | ||
974 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 1002 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
975 | 1003 | ||
976 | if (found_key.objectid > inode->i_ino || | 1004 | if (found_key.objectid > inode->i_ino || |
@@ -996,19 +1024,29 @@ next_slot: | |||
996 | path->slots[0]++; | 1024 | path->slots[0]++; |
997 | goto next_slot; | 1025 | goto next_slot; |
998 | } | 1026 | } |
1027 | if (disk_bytenr == 0) | ||
1028 | goto out_check; | ||
999 | if (btrfs_file_extent_compression(leaf, fi) || | 1029 | if (btrfs_file_extent_compression(leaf, fi) || |
1000 | btrfs_file_extent_encryption(leaf, fi) || | 1030 | btrfs_file_extent_encryption(leaf, fi) || |
1001 | btrfs_file_extent_other_encoding(leaf, fi)) | 1031 | btrfs_file_extent_other_encoding(leaf, fi)) |
1002 | goto out_check; | 1032 | goto out_check; |
1003 | if (disk_bytenr == 0) | ||
1004 | goto out_check; | ||
1005 | if (extent_type == BTRFS_FILE_EXTENT_REG && !force) | 1033 | if (extent_type == BTRFS_FILE_EXTENT_REG && !force) |
1006 | goto out_check; | 1034 | goto out_check; |
1007 | if (btrfs_cross_ref_exist(trans, root, disk_bytenr)) | ||
1008 | goto out_check; | ||
1009 | if (btrfs_extent_readonly(root, disk_bytenr)) | 1035 | if (btrfs_extent_readonly(root, disk_bytenr)) |
1010 | goto out_check; | 1036 | goto out_check; |
1037 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
1038 | disk_bytenr)) | ||
1039 | goto out_check; | ||
1011 | disk_bytenr += btrfs_file_extent_offset(leaf, fi); | 1040 | disk_bytenr += btrfs_file_extent_offset(leaf, fi); |
1041 | disk_bytenr += cur_offset - found_key.offset; | ||
1042 | num_bytes = min(end + 1, extent_end) - cur_offset; | ||
1043 | /* | ||
1044 | * force cow if csum exists in the range. | ||
1045 | * this ensure that csum for a given extent are | ||
1046 | * either valid or do not exist. | ||
1047 | */ | ||
1048 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
1049 | goto out_check; | ||
1012 | nocow = 1; | 1050 | nocow = 1; |
1013 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | 1051 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
1014 | extent_end = found_key.offset + | 1052 | extent_end = found_key.offset + |
@@ -1041,8 +1079,6 @@ out_check: | |||
1041 | cow_start = (u64)-1; | 1079 | cow_start = (u64)-1; |
1042 | } | 1080 | } |
1043 | 1081 | ||
1044 | disk_bytenr += cur_offset - found_key.offset; | ||
1045 | num_bytes = min(end + 1, extent_end) - cur_offset; | ||
1046 | if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { | 1082 | if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { |
1047 | struct extent_map *em; | 1083 | struct extent_map *em; |
1048 | struct extent_map_tree *em_tree; | 1084 | struct extent_map_tree *em_tree; |
@@ -1105,11 +1141,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1105 | u64 start, u64 end, int *page_started, | 1141 | u64 start, u64 end, int *page_started, |
1106 | unsigned long *nr_written) | 1142 | unsigned long *nr_written) |
1107 | { | 1143 | { |
1108 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1109 | int ret; | 1144 | int ret; |
1110 | 1145 | ||
1111 | if (btrfs_test_opt(root, NODATACOW) || | 1146 | if (btrfs_test_flag(inode, NODATACOW)) |
1112 | btrfs_test_flag(inode, NODATACOW)) | ||
1113 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1147 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1114 | page_started, 1, nr_written); | 1148 | page_started, 1, nr_written); |
1115 | else if (btrfs_test_flag(inode, PREALLOC)) | 1149 | else if (btrfs_test_flag(inode, PREALLOC)) |
@@ -1252,8 +1286,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1252 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1286 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
1253 | BUG_ON(ret); | 1287 | BUG_ON(ret); |
1254 | 1288 | ||
1255 | skip_sum = btrfs_test_opt(root, NODATASUM) || | 1289 | skip_sum = btrfs_test_flag(inode, NODATASUM); |
1256 | btrfs_test_flag(inode, NODATASUM); | ||
1257 | 1290 | ||
1258 | if (!(rw & (1 << BIO_RW))) { | 1291 | if (!(rw & (1 << BIO_RW))) { |
1259 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1292 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
@@ -1263,6 +1296,9 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1263 | btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1296 | btrfs_lookup_bio_sums(root, inode, bio, NULL); |
1264 | goto mapit; | 1297 | goto mapit; |
1265 | } else if (!skip_sum) { | 1298 | } else if (!skip_sum) { |
1299 | /* csum items have already been cloned */ | ||
1300 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
1301 | goto mapit; | ||
1266 | /* we're doing a write, do the async checksumming */ | 1302 | /* we're doing a write, do the async checksumming */ |
1267 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1303 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
1268 | inode, rw, bio, mirror_num, | 1304 | inode, rw, bio, mirror_num, |
@@ -1692,9 +1728,15 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1692 | ClearPageChecked(page); | 1728 | ClearPageChecked(page); |
1693 | goto good; | 1729 | goto good; |
1694 | } | 1730 | } |
1695 | if (btrfs_test_opt(root, NODATASUM) || | 1731 | if (btrfs_test_flag(inode, NODATASUM)) |
1696 | btrfs_test_flag(inode, NODATASUM)) | 1732 | return 0; |
1733 | |||
1734 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | ||
1735 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { | ||
1736 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, | ||
1737 | GFP_NOFS); | ||
1697 | return 0; | 1738 | return 0; |
1739 | } | ||
1698 | 1740 | ||
1699 | if (state && state->start == start) { | 1741 | if (state && state->start == start) { |
1700 | private = state->private; | 1742 | private = state->private; |
@@ -3391,6 +3433,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3391 | owner = 1; | 3433 | owner = 1; |
3392 | BTRFS_I(inode)->block_group = | 3434 | BTRFS_I(inode)->block_group = |
3393 | btrfs_find_block_group(root, 0, alloc_hint, owner); | 3435 | btrfs_find_block_group(root, 0, alloc_hint, owner); |
3436 | if ((mode & S_IFREG)) { | ||
3437 | if (btrfs_test_opt(root, NODATASUM)) | ||
3438 | btrfs_set_flag(inode, NODATASUM); | ||
3439 | if (btrfs_test_opt(root, NODATACOW)) | ||
3440 | btrfs_set_flag(inode, NODATACOW); | ||
3441 | } | ||
3394 | 3442 | ||
3395 | key[0].objectid = objectid; | 3443 | key[0].objectid = objectid; |
3396 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 3444 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |