aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2008-07-30 09:26:11 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:05 -0400
commitf321e4910398cf7922265d269fb17fd26f312571 (patch)
tree8430f004991351e48a4b3f9441fe0cbbcf70eddb
parent3bf10418675cb424724b5cb9d7725b234defe1fd (diff)
Btrfs: Update and fix mount -o nodatacow
To check whether a given file extent is referenced by multiple snapshots, the checker walks down the fs tree through dead root and checks all tree blocks in the path. We can easily detect whether a given tree block is directly referenced by other snapshot. We can also detect any indirect reference from other snapshot by checking reference's generation. The checker can always detect multiple references, but can't reliably detect cases of single reference. So btrfs may do file data cow even there is only one reference. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/extent-tree.c202
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/transaction.h5
5 files changed, 142 insertions, 96 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5517dfc6f71c..83422088c629 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -617,7 +617,7 @@ struct btrfs_leaf_ref_tree {
617 * in ram representation of the tree. extent_root is used for all allocations 617 * in ram representation of the tree. extent_root is used for all allocations
618 * and for the extent tree extent_root root. 618 * and for the extent tree extent_root root.
619 */ 619 */
620struct dirty_root; 620struct btrfs_dirty_root;
621struct btrfs_root { 621struct btrfs_root {
622 struct extent_buffer *node; 622 struct extent_buffer *node;
623 623
@@ -627,7 +627,7 @@ struct btrfs_root {
627 struct extent_buffer *commit_root; 627 struct extent_buffer *commit_root;
628 struct btrfs_leaf_ref_tree *ref_tree; 628 struct btrfs_leaf_ref_tree *ref_tree;
629 struct btrfs_leaf_ref_tree ref_tree_struct; 629 struct btrfs_leaf_ref_tree ref_tree_struct;
630 struct dirty_root *dirty_root; 630 struct btrfs_dirty_root *dirty_root;
631 631
632 struct btrfs_root_item root_item; 632 struct btrfs_root_item root_item;
633 struct btrfs_key root_key; 633 struct btrfs_key root_key;
@@ -1399,9 +1399,8 @@ static inline struct dentry *fdentry(struct file *file) {
1399} 1399}
1400 1400
1401/* extent-tree.c */ 1401/* extent-tree.c */
1402u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, 1402int btrfs_cross_ref_exists(struct btrfs_root *root,
1403 struct btrfs_path *count_path, 1403 struct btrfs_key *key, u64 bytenr);
1404 u64 expected_owner, u64 first_extent);
1405int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 1404int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1406 struct btrfs_root *root); 1405 struct btrfs_root *root);
1407int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1406int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6290cf41d647..fe1ddbd2bfd6 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -802,70 +802,57 @@ out:
802 return 0; 802 return 0;
803} 803}
804 804
805u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, 805
806 struct btrfs_path *count_path, 806static int get_reference_status(struct btrfs_root *root, u64 bytenr,
807 u64 expected_owner, 807 u64 parent_gen, u64 ref_objectid,
808 u64 first_extent) 808 u64 *min_generation, u32 *ref_count)
809{ 809{
810 struct btrfs_root *extent_root = root->fs_info->extent_root; 810 struct btrfs_root *extent_root = root->fs_info->extent_root;
811 struct btrfs_path *path; 811 struct btrfs_path *path;
812 u64 bytenr; 812 struct extent_buffer *leaf;
813 u64 found_objectid; 813 struct btrfs_extent_ref *ref_item;
814 u64 found_owner; 814 struct btrfs_key key;
815 struct btrfs_key found_key;
815 u64 root_objectid = root->root_key.objectid; 816 u64 root_objectid = root->root_key.objectid;
816 u32 total_count = 0; 817 u64 ref_generation;
817 u32 extent_refs;
818 u32 cur_count;
819 u32 nritems; 818 u32 nritems;
820 int ret; 819 int ret;
821 struct btrfs_key key;
822 struct btrfs_key found_key;
823 struct extent_buffer *l;
824 struct btrfs_extent_item *item;
825 struct btrfs_extent_ref *ref_item;
826 int level = -1;
827 820
828 /* FIXME, needs locking */
829 BUG();
830
831 mutex_lock(&root->fs_info->alloc_mutex);
832 path = btrfs_alloc_path();
833again:
834 if (level == -1)
835 bytenr = first_extent;
836 else
837 bytenr = count_path->nodes[level]->start;
838
839 cur_count = 0;
840 key.objectid = bytenr; 821 key.objectid = bytenr;
841 key.offset = 0; 822 key.offset = 0;
823 key.type = BTRFS_EXTENT_ITEM_KEY;
842 824
843 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 825 path = btrfs_alloc_path();
826 mutex_lock(&root->fs_info->alloc_mutex);
844 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 827 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
845 if (ret < 0) 828 if (ret < 0)
846 goto out; 829 goto out;
847 BUG_ON(ret == 0); 830 BUG_ON(ret == 0);
848 831
849 l = path->nodes[0]; 832 leaf = path->nodes[0];
850 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 833 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
851 834
852 if (found_key.objectid != bytenr || 835 if (found_key.objectid != bytenr ||
853 found_key.type != BTRFS_EXTENT_ITEM_KEY) { 836 found_key.type != BTRFS_EXTENT_ITEM_KEY) {
837 ret = 1;
854 goto out; 838 goto out;
855 } 839 }
856 840
857 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 841 *ref_count = 0;
858 extent_refs = btrfs_extent_refs(l, item); 842 *min_generation = (u64)-1;
843
859 while (1) { 844 while (1) {
860 l = path->nodes[0]; 845 leaf = path->nodes[0];
861 nritems = btrfs_header_nritems(l); 846 nritems = btrfs_header_nritems(leaf);
862 if (path->slots[0] >= nritems) { 847 if (path->slots[0] >= nritems) {
863 ret = btrfs_next_leaf(extent_root, path); 848 ret = btrfs_next_leaf(extent_root, path);
849 if (ret < 0)
850 goto out;
864 if (ret == 0) 851 if (ret == 0)
865 continue; 852 continue;
866 break; 853 break;
867 } 854 }
868 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 855 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
869 if (found_key.objectid != bytenr) 856 if (found_key.objectid != bytenr)
870 break; 857 break;
871 858
@@ -874,57 +861,120 @@ again:
874 continue; 861 continue;
875 } 862 }
876 863
877 cur_count++; 864 ref_item = btrfs_item_ptr(leaf, path->slots[0],
878 ref_item = btrfs_item_ptr(l, path->slots[0],
879 struct btrfs_extent_ref); 865 struct btrfs_extent_ref);
880 found_objectid = btrfs_ref_root(l, ref_item); 866 ref_generation = btrfs_ref_generation(leaf, ref_item);
881 867 /*
882 if (found_objectid != root_objectid) { 868 * For (parent_gen > 0 && parent_gen > ref_gen):
883 total_count = 2; 869 *
884 goto out; 870 * we reach here through the oldest root, therefore
885 } 871 * all other reference from same snapshot should have
886 if (level == -1) { 872 * a larger generation.
887 found_owner = btrfs_ref_objectid(l, ref_item); 873 */
888 if (found_owner != expected_owner) { 874 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
889 total_count = 2; 875 (parent_gen > 0 && parent_gen > ref_generation) ||
890 goto out; 876 (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
891 } 877 ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
892 /* 878 if (ref_count)
893 * nasty. we don't count a reference held by 879 *ref_count = 2;
894 * the running transaction. This allows nodatacow 880 break;
895 * to avoid cow most of the time
896 */
897 if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
898 btrfs_ref_generation(l, ref_item) ==
899 root->fs_info->generation) {
900 extent_refs--;
901 }
902 } 881 }
903 total_count = 1; 882
883 *ref_count = 1;
884 if (*min_generation > ref_generation)
885 *min_generation = ref_generation;
886
904 path->slots[0]++; 887 path->slots[0]++;
905 } 888 }
906 /* 889 ret = 0;
907 * if there is more than one reference against a data extent, 890out:
908 * we have to assume the other ref is another snapshot 891 mutex_unlock(&root->fs_info->alloc_mutex);
909 */ 892 btrfs_free_path(path);
910 if (level == -1 && extent_refs > 1) { 893 return ret;
911 total_count = 2; 894}
895
896int btrfs_cross_ref_exists(struct btrfs_root *root,
897 struct btrfs_key *key, u64 bytenr)
898{
899 struct btrfs_trans_handle *trans;
900 struct btrfs_root *old_root;
901 struct btrfs_path *path = NULL;
902 struct extent_buffer *eb;
903 struct btrfs_file_extent_item *item;
904 u64 ref_generation;
905 u64 min_generation;
906 u64 extent_start;
907 u32 ref_count;
908 int level;
909 int ret;
910
911 BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
912 ret = get_reference_status(root, bytenr, 0, key->objectid,
913 &min_generation, &ref_count);
914 if (ret)
915 return ret;
916
917 if (ref_count != 1)
918 return 1;
919
920 trans = btrfs_start_transaction(root, 0);
921 old_root = root->dirty_root->root;
922 ref_generation = old_root->root_key.offset;
923
924 /* all references are created in running transaction */
925 if (min_generation > ref_generation) {
926 ret = 0;
912 goto out; 927 goto out;
913 } 928 }
914 if (cur_count == 0) { 929
915 total_count = 0; 930 path = btrfs_alloc_path();
931 if (!path) {
932 ret = -ENOMEM;
916 goto out; 933 goto out;
917 } 934 }
918 if (level >= 0 && root->node == count_path->nodes[level]) 935
936 path->skip_locking = 1;
937 /* if no item found, the extent is referenced by other snapshot */
938 ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
939 if (ret)
919 goto out; 940 goto out;
920 level++;
921 btrfs_release_path(root, path);
922 goto again;
923 941
942 eb = path->nodes[0];
943 item = btrfs_item_ptr(eb, path->slots[0],
944 struct btrfs_file_extent_item);
945 if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
946 btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
947 ret = 1;
948 goto out;
949 }
950
951 for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
952 if (level >= 0) {
953 eb = path->nodes[level];
954 if (!eb)
955 continue;
956 extent_start = eb->start;
957 } else
958 extent_start = bytenr;
959
960 ret = get_reference_status(root, extent_start, ref_generation,
961 0, &min_generation, &ref_count);
962 if (ret)
963 goto out;
964
965 if (ref_count != 1) {
966 ret = 1;
967 goto out;
968 }
969 if (level >= 0)
970 ref_generation = btrfs_header_generation(eb);
971 }
972 ret = 0;
924out: 973out:
925 btrfs_free_path(path); 974 if (path)
926 mutex_unlock(&root->fs_info->alloc_mutex); 975 btrfs_free_path(path);
927 return total_count; 976 btrfs_end_transaction(trans, root);
977 return ret;
928} 978}
929 979
930int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 980int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7c87f863d6f7..3aa82cec6bf7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -249,11 +249,8 @@ again:
249 if (bytenr == 0) 249 if (bytenr == 0)
250 goto not_found; 250 goto not_found;
251 251
252 if (btrfs_count_snapshots_in_path(root, path, inode->i_ino, 252 if (btrfs_cross_ref_exists(root, &found_key, bytenr))
253 bytenr) != 1) {
254 goto not_found; 253 goto not_found;
255 }
256
257 /* 254 /*
258 * we may be called by the resizer, make sure we're inside 255 * we may be called by the resizer, make sure we're inside
259 * the limits of the FS 256 * the limits of the FS
@@ -277,6 +274,7 @@ loop:
277 goto again; 274 goto again;
278 275
279not_found: 276not_found:
277 btrfs_release_path(root, path);
280 cow_file_range(inode, start, end); 278 cow_file_range(inode, start, end);
281 start = end + 1; 279 start = end + 1;
282 goto loop; 280 goto loop;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b8be6703189a..216f31571620 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,12 +32,6 @@ extern struct kmem_cache *btrfs_transaction_cachep;
32 32
33#define BTRFS_ROOT_TRANS_TAG 0 33#define BTRFS_ROOT_TRANS_TAG 0
34 34
35struct dirty_root {
36 struct list_head list;
37 struct btrfs_root *root;
38 struct btrfs_root *latest_root;
39};
40
41static noinline void put_transaction(struct btrfs_transaction *transaction) 35static noinline void put_transaction(struct btrfs_transaction *transaction)
42{ 36{
43 WARN_ON(transaction->use_count == 0); 37 WARN_ON(transaction->use_count == 0);
@@ -91,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root)
91 85
92static noinline int record_root_in_trans(struct btrfs_root *root) 86static noinline int record_root_in_trans(struct btrfs_root *root)
93{ 87{
94 struct dirty_root *dirty; 88 struct btrfs_dirty_root *dirty;
95 u64 running_trans_id = root->fs_info->running_transaction->transid; 89 u64 running_trans_id = root->fs_info->running_transaction->transid;
96 if (root->ref_cows && root->last_trans < running_trans_id) { 90 if (root->ref_cows && root->last_trans < running_trans_id) {
97 WARN_ON(root == root->fs_info->extent_root); 91 WARN_ON(root == root->fs_info->extent_root);
@@ -372,7 +366,7 @@ int btrfs_add_dead_root(struct btrfs_root *root,
372 struct btrfs_root *latest, 366 struct btrfs_root *latest,
373 struct list_head *dead_list) 367 struct list_head *dead_list)
374{ 368{
375 struct dirty_root *dirty; 369 struct btrfs_dirty_root *dirty;
376 370
377 dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 371 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
378 if (!dirty) 372 if (!dirty)
@@ -387,7 +381,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
387 struct radix_tree_root *radix, 381 struct radix_tree_root *radix,
388 struct list_head *list) 382 struct list_head *list)
389{ 383{
390 struct dirty_root *dirty; 384 struct btrfs_dirty_root *dirty;
391 struct btrfs_root *gang[8]; 385 struct btrfs_root *gang[8];
392 struct btrfs_root *root; 386 struct btrfs_root *root;
393 int i; 387 int i;
@@ -498,7 +492,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
498static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 492static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
499 struct list_head *list) 493 struct list_head *list)
500{ 494{
501 struct dirty_root *dirty; 495 struct btrfs_dirty_root *dirty;
502 struct btrfs_trans_handle *trans; 496 struct btrfs_trans_handle *trans;
503 unsigned long nr; 497 unsigned long nr;
504 u64 num_bytes; 498 u64 num_bytes;
@@ -509,7 +503,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
509 while(!list_empty(list)) { 503 while(!list_empty(list)) {
510 struct btrfs_root *root; 504 struct btrfs_root *root;
511 505
512 dirty = list_entry(list->prev, struct dirty_root, list); 506 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
513 list_del_init(&dirty->list); 507 list_del_init(&dirty->list);
514 508
515 num_bytes = btrfs_root_used(&dirty->root->root_item); 509 num_bytes = btrfs_root_used(&dirty->root->root_item);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index df2ca2aad1c0..f5adb23151fb 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -52,6 +52,11 @@ struct btrfs_pending_snapshot {
52 struct list_head list; 52 struct list_head list;
53}; 53};
54 54
55struct btrfs_dirty_root {
56 struct list_head list;
57 struct btrfs_root *root;
58 struct btrfs_root *latest_root;
59};
55 60
56static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, 61static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
57 struct inode *inode) 62 struct inode *inode)