aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2008-12-12 10:03:38 -0500
committerChris Mason <chris.mason@oracle.com>2008-12-12 10:03:38 -0500
commit17d217fe970d34720f4f1633dca73a6aa2f3d9d1 (patch)
tree4e2e716400cc45a6697475629f4c046b96ff76e7 /fs/btrfs/inode.c
parente4404d6e8da678d852b7f767f665f8edf76c9e9f (diff)
Btrfs: fix nodatasum handling in balancing code
Checksums on data can be disabled by mount option, so it's possible some data extents don't have checksums or have invalid checksums. This causes trouble for data relocation. This patch contains following things to make data relocation work. 1) make nodatasum/nodatacow mount option only affects new files. Checksums and COW on data are only controlled by the inode flags. 2) check the existence of checksum in the nodatacow checker. If checksums exist, force COW the data extent. This ensure that checksum for a given block is either valid or does not exist. 3) update data relocation code to properly handle the case of checksum missing. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c74
1 files changed, 61 insertions, 13 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0a28b7706314..e64a4fe19a60 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -771,6 +771,13 @@ static noinline int cow_file_range(struct inode *inode,
771 ram_size, cur_alloc_size, 0); 771 ram_size, cur_alloc_size, 0);
772 BUG_ON(ret); 772 BUG_ON(ret);
773 773
774 if (root->root_key.objectid ==
775 BTRFS_DATA_RELOC_TREE_OBJECTID) {
776 ret = btrfs_reloc_clone_csums(inode, start,
777 cur_alloc_size);
778 BUG_ON(ret);
779 }
780
774 if (disk_num_bytes < cur_alloc_size) { 781 if (disk_num_bytes < cur_alloc_size) {
775 printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes, 782 printk("num_bytes %Lu cur_alloc %Lu\n", disk_num_bytes,
776 cur_alloc_size); 783 cur_alloc_size);
@@ -910,6 +917,26 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
910 return 0; 917 return 0;
911} 918}
912 919
920static int noinline csum_exist_in_range(struct btrfs_root *root,
921 u64 bytenr, u64 num_bytes)
922{
923 int ret;
924 struct btrfs_ordered_sum *sums;
925 LIST_HEAD(list);
926
927 ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1,
928 &list);
929 if (ret == 0 && list_empty(&list))
930 return 0;
931
932 while (!list_empty(&list)) {
933 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
934 list_del(&sums->list);
935 kfree(sums);
936 }
937 return 1;
938}
939
913/* 940/*
914 * when nowcow writeback call back. This checks for snapshots or COW copies 941 * when nowcow writeback call back. This checks for snapshots or COW copies
915 * of the extents that exist in the file, and COWs the file as required. 942 * of the extents that exist in the file, and COWs the file as required.
@@ -971,6 +998,7 @@ next_slot:
971 998
972 nocow = 0; 999 nocow = 0;
973 disk_bytenr = 0; 1000 disk_bytenr = 0;
1001 num_bytes = 0;
974 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1002 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
975 1003
976 if (found_key.objectid > inode->i_ino || 1004 if (found_key.objectid > inode->i_ino ||
@@ -996,19 +1024,29 @@ next_slot:
996 path->slots[0]++; 1024 path->slots[0]++;
997 goto next_slot; 1025 goto next_slot;
998 } 1026 }
1027 if (disk_bytenr == 0)
1028 goto out_check;
999 if (btrfs_file_extent_compression(leaf, fi) || 1029 if (btrfs_file_extent_compression(leaf, fi) ||
1000 btrfs_file_extent_encryption(leaf, fi) || 1030 btrfs_file_extent_encryption(leaf, fi) ||
1001 btrfs_file_extent_other_encoding(leaf, fi)) 1031 btrfs_file_extent_other_encoding(leaf, fi))
1002 goto out_check; 1032 goto out_check;
1003 if (disk_bytenr == 0)
1004 goto out_check;
1005 if (extent_type == BTRFS_FILE_EXTENT_REG && !force) 1033 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1006 goto out_check; 1034 goto out_check;
1007 if (btrfs_cross_ref_exist(trans, root, disk_bytenr))
1008 goto out_check;
1009 if (btrfs_extent_readonly(root, disk_bytenr)) 1035 if (btrfs_extent_readonly(root, disk_bytenr))
1010 goto out_check; 1036 goto out_check;
1037 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
1038 disk_bytenr))
1039 goto out_check;
1011 disk_bytenr += btrfs_file_extent_offset(leaf, fi); 1040 disk_bytenr += btrfs_file_extent_offset(leaf, fi);
1041 disk_bytenr += cur_offset - found_key.offset;
1042 num_bytes = min(end + 1, extent_end) - cur_offset;
1043 /*
1044 * force cow if csum exists in the range.
1045 * this ensure that csum for a given extent are
1046 * either valid or do not exist.
1047 */
1048 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
1049 goto out_check;
1012 nocow = 1; 1050 nocow = 1;
1013 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1051 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1014 extent_end = found_key.offset + 1052 extent_end = found_key.offset +
@@ -1041,8 +1079,6 @@ out_check:
1041 cow_start = (u64)-1; 1079 cow_start = (u64)-1;
1042 } 1080 }
1043 1081
1044 disk_bytenr += cur_offset - found_key.offset;
1045 num_bytes = min(end + 1, extent_end) - cur_offset;
1046 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 1082 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1047 struct extent_map *em; 1083 struct extent_map *em;
1048 struct extent_map_tree *em_tree; 1084 struct extent_map_tree *em_tree;
@@ -1105,11 +1141,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1105 u64 start, u64 end, int *page_started, 1141 u64 start, u64 end, int *page_started,
1106 unsigned long *nr_written) 1142 unsigned long *nr_written)
1107{ 1143{
1108 struct btrfs_root *root = BTRFS_I(inode)->root;
1109 int ret; 1144 int ret;
1110 1145
1111 if (btrfs_test_opt(root, NODATACOW) || 1146 if (btrfs_test_flag(inode, NODATACOW))
1112 btrfs_test_flag(inode, NODATACOW))
1113 ret = run_delalloc_nocow(inode, locked_page, start, end, 1147 ret = run_delalloc_nocow(inode, locked_page, start, end,
1114 page_started, 1, nr_written); 1148 page_started, 1, nr_written);
1115 else if (btrfs_test_flag(inode, PREALLOC)) 1149 else if (btrfs_test_flag(inode, PREALLOC))
@@ -1252,8 +1286,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1252 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1286 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1253 BUG_ON(ret); 1287 BUG_ON(ret);
1254 1288
1255 skip_sum = btrfs_test_opt(root, NODATASUM) || 1289 skip_sum = btrfs_test_flag(inode, NODATASUM);
1256 btrfs_test_flag(inode, NODATASUM);
1257 1290
1258 if (!(rw & (1 << BIO_RW))) { 1291 if (!(rw & (1 << BIO_RW))) {
1259 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1292 if (bio_flags & EXTENT_BIO_COMPRESSED) {
@@ -1263,6 +1296,9 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1263 btrfs_lookup_bio_sums(root, inode, bio, NULL); 1296 btrfs_lookup_bio_sums(root, inode, bio, NULL);
1264 goto mapit; 1297 goto mapit;
1265 } else if (!skip_sum) { 1298 } else if (!skip_sum) {
1299 /* csum items have already been cloned */
1300 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1301 goto mapit;
1266 /* we're doing a write, do the async checksumming */ 1302 /* we're doing a write, do the async checksumming */
1267 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 1303 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1268 inode, rw, bio, mirror_num, 1304 inode, rw, bio, mirror_num,
@@ -1692,9 +1728,15 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1692 ClearPageChecked(page); 1728 ClearPageChecked(page);
1693 goto good; 1729 goto good;
1694 } 1730 }
1695 if (btrfs_test_opt(root, NODATASUM) || 1731 if (btrfs_test_flag(inode, NODATASUM))
1696 btrfs_test_flag(inode, NODATASUM)) 1732 return 0;
1733
1734 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1735 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
1736 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
1737 GFP_NOFS);
1697 return 0; 1738 return 0;
1739 }
1698 1740
1699 if (state && state->start == start) { 1741 if (state && state->start == start) {
1700 private = state->private; 1742 private = state->private;
@@ -3391,6 +3433,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3391 owner = 1; 3433 owner = 1;
3392 BTRFS_I(inode)->block_group = 3434 BTRFS_I(inode)->block_group =
3393 btrfs_find_block_group(root, 0, alloc_hint, owner); 3435 btrfs_find_block_group(root, 0, alloc_hint, owner);
3436 if ((mode & S_IFREG)) {
3437 if (btrfs_test_opt(root, NODATASUM))
3438 btrfs_set_flag(inode, NODATASUM);
3439 if (btrfs_test_opt(root, NODATACOW))
3440 btrfs_set_flag(inode, NODATACOW);
3441 }
3394 3442
3395 key[0].objectid = objectid; 3443 key[0].objectid = objectid;
3396 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 3444 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);