diff options
author | Yan Zheng <zheng.yan@oracle.com> | 2008-07-30 09:26:11 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:05 -0400 |
commit | f321e4910398cf7922265d269fb17fd26f312571 (patch) | |
tree | 8430f004991351e48a4b3f9441fe0cbbcf70eddb /fs | |
parent | 3bf10418675cb424724b5cb9d7725b234defe1fd (diff) |
Btrfs: Update and fix mount -o nodatacow
To check whether a given file extent is referenced by multiple snapshots, the
checker walks down the fs tree through dead root and checks all tree blocks in
the path.
We can easily detect whether a given tree block is directly referenced by other
snapshot. We can also detect any indirect reference from other snapshot by
checking reference's generation. The checker can always detect multiple
references, but can't reliably detect cases of single reference. So btrfs may
do file data cow even there is only one reference.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.h | 9 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 202 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 6 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 16 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 5 |
5 files changed, 142 insertions, 96 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5517dfc6f71c..83422088c629 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -617,7 +617,7 @@ struct btrfs_leaf_ref_tree { | |||
617 | * in ram representation of the tree. extent_root is used for all allocations | 617 | * in ram representation of the tree. extent_root is used for all allocations |
618 | * and for the extent tree extent_root root. | 618 | * and for the extent tree extent_root root. |
619 | */ | 619 | */ |
620 | struct dirty_root; | 620 | struct btrfs_dirty_root; |
621 | struct btrfs_root { | 621 | struct btrfs_root { |
622 | struct extent_buffer *node; | 622 | struct extent_buffer *node; |
623 | 623 | ||
@@ -627,7 +627,7 @@ struct btrfs_root { | |||
627 | struct extent_buffer *commit_root; | 627 | struct extent_buffer *commit_root; |
628 | struct btrfs_leaf_ref_tree *ref_tree; | 628 | struct btrfs_leaf_ref_tree *ref_tree; |
629 | struct btrfs_leaf_ref_tree ref_tree_struct; | 629 | struct btrfs_leaf_ref_tree ref_tree_struct; |
630 | struct dirty_root *dirty_root; | 630 | struct btrfs_dirty_root *dirty_root; |
631 | 631 | ||
632 | struct btrfs_root_item root_item; | 632 | struct btrfs_root_item root_item; |
633 | struct btrfs_key root_key; | 633 | struct btrfs_key root_key; |
@@ -1399,9 +1399,8 @@ static inline struct dentry *fdentry(struct file *file) { | |||
1399 | } | 1399 | } |
1400 | 1400 | ||
1401 | /* extent-tree.c */ | 1401 | /* extent-tree.c */ |
1402 | u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, | 1402 | int btrfs_cross_ref_exists(struct btrfs_root *root, |
1403 | struct btrfs_path *count_path, | 1403 | struct btrfs_key *key, u64 bytenr); |
1404 | u64 expected_owner, u64 first_extent); | ||
1405 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | 1404 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, |
1406 | struct btrfs_root *root); | 1405 | struct btrfs_root *root); |
1407 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1406 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6290cf41d647..fe1ddbd2bfd6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -802,70 +802,57 @@ out: | |||
802 | return 0; | 802 | return 0; |
803 | } | 803 | } |
804 | 804 | ||
805 | u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, | 805 | |
806 | struct btrfs_path *count_path, | 806 | static int get_reference_status(struct btrfs_root *root, u64 bytenr, |
807 | u64 expected_owner, | 807 | u64 parent_gen, u64 ref_objectid, |
808 | u64 first_extent) | 808 | u64 *min_generation, u32 *ref_count) |
809 | { | 809 | { |
810 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 810 | struct btrfs_root *extent_root = root->fs_info->extent_root; |
811 | struct btrfs_path *path; | 811 | struct btrfs_path *path; |
812 | u64 bytenr; | 812 | struct extent_buffer *leaf; |
813 | u64 found_objectid; | 813 | struct btrfs_extent_ref *ref_item; |
814 | u64 found_owner; | 814 | struct btrfs_key key; |
815 | struct btrfs_key found_key; | ||
815 | u64 root_objectid = root->root_key.objectid; | 816 | u64 root_objectid = root->root_key.objectid; |
816 | u32 total_count = 0; | 817 | u64 ref_generation; |
817 | u32 extent_refs; | ||
818 | u32 cur_count; | ||
819 | u32 nritems; | 818 | u32 nritems; |
820 | int ret; | 819 | int ret; |
821 | struct btrfs_key key; | ||
822 | struct btrfs_key found_key; | ||
823 | struct extent_buffer *l; | ||
824 | struct btrfs_extent_item *item; | ||
825 | struct btrfs_extent_ref *ref_item; | ||
826 | int level = -1; | ||
827 | 820 | ||
828 | /* FIXME, needs locking */ | ||
829 | BUG(); | ||
830 | |||
831 | mutex_lock(&root->fs_info->alloc_mutex); | ||
832 | path = btrfs_alloc_path(); | ||
833 | again: | ||
834 | if (level == -1) | ||
835 | bytenr = first_extent; | ||
836 | else | ||
837 | bytenr = count_path->nodes[level]->start; | ||
838 | |||
839 | cur_count = 0; | ||
840 | key.objectid = bytenr; | 821 | key.objectid = bytenr; |
841 | key.offset = 0; | 822 | key.offset = 0; |
823 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
842 | 824 | ||
843 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 825 | path = btrfs_alloc_path(); |
826 | mutex_lock(&root->fs_info->alloc_mutex); | ||
844 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); | 827 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
845 | if (ret < 0) | 828 | if (ret < 0) |
846 | goto out; | 829 | goto out; |
847 | BUG_ON(ret == 0); | 830 | BUG_ON(ret == 0); |
848 | 831 | ||
849 | l = path->nodes[0]; | 832 | leaf = path->nodes[0]; |
850 | btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); | 833 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
851 | 834 | ||
852 | if (found_key.objectid != bytenr || | 835 | if (found_key.objectid != bytenr || |
853 | found_key.type != BTRFS_EXTENT_ITEM_KEY) { | 836 | found_key.type != BTRFS_EXTENT_ITEM_KEY) { |
837 | ret = 1; | ||
854 | goto out; | 838 | goto out; |
855 | } | 839 | } |
856 | 840 | ||
857 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | 841 | *ref_count = 0; |
858 | extent_refs = btrfs_extent_refs(l, item); | 842 | *min_generation = (u64)-1; |
843 | |||
859 | while (1) { | 844 | while (1) { |
860 | l = path->nodes[0]; | 845 | leaf = path->nodes[0]; |
861 | nritems = btrfs_header_nritems(l); | 846 | nritems = btrfs_header_nritems(leaf); |
862 | if (path->slots[0] >= nritems) { | 847 | if (path->slots[0] >= nritems) { |
863 | ret = btrfs_next_leaf(extent_root, path); | 848 | ret = btrfs_next_leaf(extent_root, path); |
849 | if (ret < 0) | ||
850 | goto out; | ||
864 | if (ret == 0) | 851 | if (ret == 0) |
865 | continue; | 852 | continue; |
866 | break; | 853 | break; |
867 | } | 854 | } |
868 | btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); | 855 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
869 | if (found_key.objectid != bytenr) | 856 | if (found_key.objectid != bytenr) |
870 | break; | 857 | break; |
871 | 858 | ||
@@ -874,57 +861,120 @@ again: | |||
874 | continue; | 861 | continue; |
875 | } | 862 | } |
876 | 863 | ||
877 | cur_count++; | 864 | ref_item = btrfs_item_ptr(leaf, path->slots[0], |
878 | ref_item = btrfs_item_ptr(l, path->slots[0], | ||
879 | struct btrfs_extent_ref); | 865 | struct btrfs_extent_ref); |
880 | found_objectid = btrfs_ref_root(l, ref_item); | 866 | ref_generation = btrfs_ref_generation(leaf, ref_item); |
881 | 867 | /* | |
882 | if (found_objectid != root_objectid) { | 868 | * For (parent_gen > 0 && parent_gen > ref_gen): |
883 | total_count = 2; | 869 | * |
884 | goto out; | 870 | * we reach here through the oldest root, therefore |
885 | } | 871 | * all other reference from same snapshot should have |
886 | if (level == -1) { | 872 | * a larger generation. |
887 | found_owner = btrfs_ref_objectid(l, ref_item); | 873 | */ |
888 | if (found_owner != expected_owner) { | 874 | if ((root_objectid != btrfs_ref_root(leaf, ref_item)) || |
889 | total_count = 2; | 875 | (parent_gen > 0 && parent_gen > ref_generation) || |
890 | goto out; | 876 | (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID && |
891 | } | 877 | ref_objectid != btrfs_ref_objectid(leaf, ref_item))) { |
892 | /* | 878 | if (ref_count) |
893 | * nasty. we don't count a reference held by | 879 | *ref_count = 2; |
894 | * the running transaction. This allows nodatacow | 880 | break; |
895 | * to avoid cow most of the time | ||
896 | */ | ||
897 | if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && | ||
898 | btrfs_ref_generation(l, ref_item) == | ||
899 | root->fs_info->generation) { | ||
900 | extent_refs--; | ||
901 | } | ||
902 | } | 881 | } |
903 | total_count = 1; | 882 | |
883 | *ref_count = 1; | ||
884 | if (*min_generation > ref_generation) | ||
885 | *min_generation = ref_generation; | ||
886 | |||
904 | path->slots[0]++; | 887 | path->slots[0]++; |
905 | } | 888 | } |
906 | /* | 889 | ret = 0; |
907 | * if there is more than one reference against a data extent, | 890 | out: |
908 | * we have to assume the other ref is another snapshot | 891 | mutex_unlock(&root->fs_info->alloc_mutex); |
909 | */ | 892 | btrfs_free_path(path); |
910 | if (level == -1 && extent_refs > 1) { | 893 | return ret; |
911 | total_count = 2; | 894 | } |
895 | |||
896 | int btrfs_cross_ref_exists(struct btrfs_root *root, | ||
897 | struct btrfs_key *key, u64 bytenr) | ||
898 | { | ||
899 | struct btrfs_trans_handle *trans; | ||
900 | struct btrfs_root *old_root; | ||
901 | struct btrfs_path *path = NULL; | ||
902 | struct extent_buffer *eb; | ||
903 | struct btrfs_file_extent_item *item; | ||
904 | u64 ref_generation; | ||
905 | u64 min_generation; | ||
906 | u64 extent_start; | ||
907 | u32 ref_count; | ||
908 | int level; | ||
909 | int ret; | ||
910 | |||
911 | BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY); | ||
912 | ret = get_reference_status(root, bytenr, 0, key->objectid, | ||
913 | &min_generation, &ref_count); | ||
914 | if (ret) | ||
915 | return ret; | ||
916 | |||
917 | if (ref_count != 1) | ||
918 | return 1; | ||
919 | |||
920 | trans = btrfs_start_transaction(root, 0); | ||
921 | old_root = root->dirty_root->root; | ||
922 | ref_generation = old_root->root_key.offset; | ||
923 | |||
924 | /* all references are created in running transaction */ | ||
925 | if (min_generation > ref_generation) { | ||
926 | ret = 0; | ||
912 | goto out; | 927 | goto out; |
913 | } | 928 | } |
914 | if (cur_count == 0) { | 929 | |
915 | total_count = 0; | 930 | path = btrfs_alloc_path(); |
931 | if (!path) { | ||
932 | ret = -ENOMEM; | ||
916 | goto out; | 933 | goto out; |
917 | } | 934 | } |
918 | if (level >= 0 && root->node == count_path->nodes[level]) | 935 | |
936 | path->skip_locking = 1; | ||
937 | /* if no item found, the extent is referenced by other snapshot */ | ||
938 | ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0); | ||
939 | if (ret) | ||
919 | goto out; | 940 | goto out; |
920 | level++; | ||
921 | btrfs_release_path(root, path); | ||
922 | goto again; | ||
923 | 941 | ||
942 | eb = path->nodes[0]; | ||
943 | item = btrfs_item_ptr(eb, path->slots[0], | ||
944 | struct btrfs_file_extent_item); | ||
945 | if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG || | ||
946 | btrfs_file_extent_disk_bytenr(eb, item) != bytenr) { | ||
947 | ret = 1; | ||
948 | goto out; | ||
949 | } | ||
950 | |||
951 | for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) { | ||
952 | if (level >= 0) { | ||
953 | eb = path->nodes[level]; | ||
954 | if (!eb) | ||
955 | continue; | ||
956 | extent_start = eb->start; | ||
957 | } else | ||
958 | extent_start = bytenr; | ||
959 | |||
960 | ret = get_reference_status(root, extent_start, ref_generation, | ||
961 | 0, &min_generation, &ref_count); | ||
962 | if (ret) | ||
963 | goto out; | ||
964 | |||
965 | if (ref_count != 1) { | ||
966 | ret = 1; | ||
967 | goto out; | ||
968 | } | ||
969 | if (level >= 0) | ||
970 | ref_generation = btrfs_header_generation(eb); | ||
971 | } | ||
972 | ret = 0; | ||
924 | out: | 973 | out: |
925 | btrfs_free_path(path); | 974 | if (path) |
926 | mutex_unlock(&root->fs_info->alloc_mutex); | 975 | btrfs_free_path(path); |
927 | return total_count; | 976 | btrfs_end_transaction(trans, root); |
977 | return ret; | ||
928 | } | 978 | } |
929 | 979 | ||
930 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 980 | int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7c87f863d6f7..3aa82cec6bf7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -249,11 +249,8 @@ again: | |||
249 | if (bytenr == 0) | 249 | if (bytenr == 0) |
250 | goto not_found; | 250 | goto not_found; |
251 | 251 | ||
252 | if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, | 252 | if (btrfs_cross_ref_exists(root, &found_key, bytenr)) |
253 | bytenr) != 1) { | ||
254 | goto not_found; | 253 | goto not_found; |
255 | } | ||
256 | |||
257 | /* | 254 | /* |
258 | * we may be called by the resizer, make sure we're inside | 255 | * we may be called by the resizer, make sure we're inside |
259 | * the limits of the FS | 256 | * the limits of the FS |
@@ -277,6 +274,7 @@ loop: | |||
277 | goto again; | 274 | goto again; |
278 | 275 | ||
279 | not_found: | 276 | not_found: |
277 | btrfs_release_path(root, path); | ||
280 | cow_file_range(inode, start, end); | 278 | cow_file_range(inode, start, end); |
281 | start = end + 1; | 279 | start = end + 1; |
282 | goto loop; | 280 | goto loop; |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b8be6703189a..216f31571620 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -32,12 +32,6 @@ extern struct kmem_cache *btrfs_transaction_cachep; | |||
32 | 32 | ||
33 | #define BTRFS_ROOT_TRANS_TAG 0 | 33 | #define BTRFS_ROOT_TRANS_TAG 0 |
34 | 34 | ||
35 | struct dirty_root { | ||
36 | struct list_head list; | ||
37 | struct btrfs_root *root; | ||
38 | struct btrfs_root *latest_root; | ||
39 | }; | ||
40 | |||
41 | static noinline void put_transaction(struct btrfs_transaction *transaction) | 35 | static noinline void put_transaction(struct btrfs_transaction *transaction) |
42 | { | 36 | { |
43 | WARN_ON(transaction->use_count == 0); | 37 | WARN_ON(transaction->use_count == 0); |
@@ -91,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
91 | 85 | ||
92 | static noinline int record_root_in_trans(struct btrfs_root *root) | 86 | static noinline int record_root_in_trans(struct btrfs_root *root) |
93 | { | 87 | { |
94 | struct dirty_root *dirty; | 88 | struct btrfs_dirty_root *dirty; |
95 | u64 running_trans_id = root->fs_info->running_transaction->transid; | 89 | u64 running_trans_id = root->fs_info->running_transaction->transid; |
96 | if (root->ref_cows && root->last_trans < running_trans_id) { | 90 | if (root->ref_cows && root->last_trans < running_trans_id) { |
97 | WARN_ON(root == root->fs_info->extent_root); | 91 | WARN_ON(root == root->fs_info->extent_root); |
@@ -372,7 +366,7 @@ int btrfs_add_dead_root(struct btrfs_root *root, | |||
372 | struct btrfs_root *latest, | 366 | struct btrfs_root *latest, |
373 | struct list_head *dead_list) | 367 | struct list_head *dead_list) |
374 | { | 368 | { |
375 | struct dirty_root *dirty; | 369 | struct btrfs_dirty_root *dirty; |
376 | 370 | ||
377 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | 371 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); |
378 | if (!dirty) | 372 | if (!dirty) |
@@ -387,7 +381,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | |||
387 | struct radix_tree_root *radix, | 381 | struct radix_tree_root *radix, |
388 | struct list_head *list) | 382 | struct list_head *list) |
389 | { | 383 | { |
390 | struct dirty_root *dirty; | 384 | struct btrfs_dirty_root *dirty; |
391 | struct btrfs_root *gang[8]; | 385 | struct btrfs_root *gang[8]; |
392 | struct btrfs_root *root; | 386 | struct btrfs_root *root; |
393 | int i; | 387 | int i; |
@@ -498,7 +492,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
498 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | 492 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, |
499 | struct list_head *list) | 493 | struct list_head *list) |
500 | { | 494 | { |
501 | struct dirty_root *dirty; | 495 | struct btrfs_dirty_root *dirty; |
502 | struct btrfs_trans_handle *trans; | 496 | struct btrfs_trans_handle *trans; |
503 | unsigned long nr; | 497 | unsigned long nr; |
504 | u64 num_bytes; | 498 | u64 num_bytes; |
@@ -509,7 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
509 | while(!list_empty(list)) { | 503 | while(!list_empty(list)) { |
510 | struct btrfs_root *root; | 504 | struct btrfs_root *root; |
511 | 505 | ||
512 | dirty = list_entry(list->prev, struct dirty_root, list); | 506 | dirty = list_entry(list->prev, struct btrfs_dirty_root, list); |
513 | list_del_init(&dirty->list); | 507 | list_del_init(&dirty->list); |
514 | 508 | ||
515 | num_bytes = btrfs_root_used(&dirty->root->root_item); | 509 | num_bytes = btrfs_root_used(&dirty->root->root_item); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index df2ca2aad1c0..f5adb23151fb 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -52,6 +52,11 @@ struct btrfs_pending_snapshot { | |||
52 | struct list_head list; | 52 | struct list_head list; |
53 | }; | 53 | }; |
54 | 54 | ||
55 | struct btrfs_dirty_root { | ||
56 | struct list_head list; | ||
57 | struct btrfs_root *root; | ||
58 | struct btrfs_root *latest_root; | ||
59 | }; | ||
55 | 60 | ||
56 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | 61 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, |
57 | struct inode *inode) | 62 | struct inode *inode) |