diff options
49 files changed, 3617 insertions, 1534 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index fbd76ded9a34..4dabeb893b7c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -74,6 +74,7 @@ BTRFS_WORK_HELPER(endio_helper); | |||
74 | BTRFS_WORK_HELPER(endio_meta_helper); | 74 | BTRFS_WORK_HELPER(endio_meta_helper); |
75 | BTRFS_WORK_HELPER(endio_meta_write_helper); | 75 | BTRFS_WORK_HELPER(endio_meta_write_helper); |
76 | BTRFS_WORK_HELPER(endio_raid56_helper); | 76 | BTRFS_WORK_HELPER(endio_raid56_helper); |
77 | BTRFS_WORK_HELPER(endio_repair_helper); | ||
77 | BTRFS_WORK_HELPER(rmw_helper); | 78 | BTRFS_WORK_HELPER(rmw_helper); |
78 | BTRFS_WORK_HELPER(endio_write_helper); | 79 | BTRFS_WORK_HELPER(endio_write_helper); |
79 | BTRFS_WORK_HELPER(freespace_write_helper); | 80 | BTRFS_WORK_HELPER(freespace_write_helper); |
@@ -91,7 +92,7 @@ __btrfs_alloc_workqueue(const char *name, int flags, int max_active, | |||
91 | { | 92 | { |
92 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | 93 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); |
93 | 94 | ||
94 | if (unlikely(!ret)) | 95 | if (!ret) |
95 | return NULL; | 96 | return NULL; |
96 | 97 | ||
97 | ret->max_active = max_active; | 98 | ret->max_active = max_active; |
@@ -115,7 +116,7 @@ __btrfs_alloc_workqueue(const char *name, int flags, int max_active, | |||
115 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | 116 | ret->normal_wq = alloc_workqueue("%s-%s", flags, |
116 | ret->max_active, "btrfs", | 117 | ret->max_active, "btrfs", |
117 | name); | 118 | name); |
118 | if (unlikely(!ret->normal_wq)) { | 119 | if (!ret->normal_wq) { |
119 | kfree(ret); | 120 | kfree(ret); |
120 | return NULL; | 121 | return NULL; |
121 | } | 122 | } |
@@ -137,12 +138,12 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | |||
137 | { | 138 | { |
138 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | 139 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); |
139 | 140 | ||
140 | if (unlikely(!ret)) | 141 | if (!ret) |
141 | return NULL; | 142 | return NULL; |
142 | 143 | ||
143 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, | 144 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
144 | max_active, thresh); | 145 | max_active, thresh); |
145 | if (unlikely(!ret->normal)) { | 146 | if (!ret->normal) { |
146 | kfree(ret); | 147 | kfree(ret); |
147 | return NULL; | 148 | return NULL; |
148 | } | 149 | } |
@@ -150,7 +151,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, | |||
150 | if (flags & WQ_HIGHPRI) { | 151 | if (flags & WQ_HIGHPRI) { |
151 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, | 152 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
152 | thresh); | 153 | thresh); |
153 | if (unlikely(!ret->high)) { | 154 | if (!ret->high) { |
154 | __btrfs_destroy_workqueue(ret->normal); | 155 | __btrfs_destroy_workqueue(ret->normal); |
155 | kfree(ret); | 156 | kfree(ret); |
156 | return NULL; | 157 | return NULL; |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index e9e31c94758f..e386c29ef1f6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -53,6 +53,7 @@ BTRFS_WORK_HELPER_PROTO(endio_helper); | |||
53 | BTRFS_WORK_HELPER_PROTO(endio_meta_helper); | 53 | BTRFS_WORK_HELPER_PROTO(endio_meta_helper); |
54 | BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); | 54 | BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); |
55 | BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); | 55 | BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); |
56 | BTRFS_WORK_HELPER_PROTO(endio_repair_helper); | ||
56 | BTRFS_WORK_HELPER_PROTO(rmw_helper); | 57 | BTRFS_WORK_HELPER_PROTO(rmw_helper); |
57 | BTRFS_WORK_HELPER_PROTO(endio_write_helper); | 58 | BTRFS_WORK_HELPER_PROTO(endio_write_helper); |
58 | BTRFS_WORK_HELPER_PROTO(freespace_write_helper); | 59 | BTRFS_WORK_HELPER_PROTO(freespace_write_helper); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 54a201dac7f9..2d3e32ebfd15 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -25,6 +25,9 @@ | |||
25 | #include "delayed-ref.h" | 25 | #include "delayed-ref.h" |
26 | #include "locking.h" | 26 | #include "locking.h" |
27 | 27 | ||
28 | /* Just an arbitrary number so we can be sure this happened */ | ||
29 | #define BACKREF_FOUND_SHARED 6 | ||
30 | |||
28 | struct extent_inode_elem { | 31 | struct extent_inode_elem { |
29 | u64 inum; | 32 | u64 inum; |
30 | u64 offset; | 33 | u64 offset; |
@@ -377,7 +380,8 @@ out: | |||
377 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 380 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
378 | struct btrfs_path *path, u64 time_seq, | 381 | struct btrfs_path *path, u64 time_seq, |
379 | struct list_head *head, | 382 | struct list_head *head, |
380 | const u64 *extent_item_pos, u64 total_refs) | 383 | const u64 *extent_item_pos, u64 total_refs, |
384 | u64 root_objectid) | ||
381 | { | 385 | { |
382 | int err; | 386 | int err; |
383 | int ret = 0; | 387 | int ret = 0; |
@@ -402,6 +406,10 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
402 | continue; | 406 | continue; |
403 | if (ref->count == 0) | 407 | if (ref->count == 0) |
404 | continue; | 408 | continue; |
409 | if (root_objectid && ref->root_id != root_objectid) { | ||
410 | ret = BACKREF_FOUND_SHARED; | ||
411 | goto out; | ||
412 | } | ||
405 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 413 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
406 | parents, extent_item_pos, | 414 | parents, extent_item_pos, |
407 | total_refs); | 415 | total_refs); |
@@ -482,7 +490,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, | |||
482 | continue; | 490 | continue; |
483 | BUG_ON(!ref->wanted_disk_byte); | 491 | BUG_ON(!ref->wanted_disk_byte); |
484 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, | 492 | eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, |
485 | fs_info->tree_root->leafsize, 0); | 493 | 0); |
486 | if (!eb || !extent_buffer_uptodate(eb)) { | 494 | if (!eb || !extent_buffer_uptodate(eb)) { |
487 | free_extent_buffer(eb); | 495 | free_extent_buffer(eb); |
488 | return -EIO; | 496 | return -EIO; |
@@ -561,7 +569,8 @@ static void __merge_refs(struct list_head *head, int mode) | |||
561 | * smaller or equal that seq to the list | 569 | * smaller or equal that seq to the list |
562 | */ | 570 | */ |
563 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 571 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
564 | struct list_head *prefs, u64 *total_refs) | 572 | struct list_head *prefs, u64 *total_refs, |
573 | u64 inum) | ||
565 | { | 574 | { |
566 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 575 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
567 | struct rb_node *n = &head->node.rb_node; | 576 | struct rb_node *n = &head->node.rb_node; |
@@ -625,6 +634,16 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
625 | key.objectid = ref->objectid; | 634 | key.objectid = ref->objectid; |
626 | key.type = BTRFS_EXTENT_DATA_KEY; | 635 | key.type = BTRFS_EXTENT_DATA_KEY; |
627 | key.offset = ref->offset; | 636 | key.offset = ref->offset; |
637 | |||
638 | /* | ||
639 | * Found a inum that doesn't match our known inum, we | ||
640 | * know it's shared. | ||
641 | */ | ||
642 | if (inum && ref->objectid != inum) { | ||
643 | ret = BACKREF_FOUND_SHARED; | ||
644 | break; | ||
645 | } | ||
646 | |||
628 | ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, | 647 | ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, |
629 | node->bytenr, | 648 | node->bytenr, |
630 | node->ref_mod * sgn, GFP_ATOMIC); | 649 | node->ref_mod * sgn, GFP_ATOMIC); |
@@ -659,7 +678,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
659 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 678 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
660 | struct btrfs_path *path, u64 bytenr, | 679 | struct btrfs_path *path, u64 bytenr, |
661 | int *info_level, struct list_head *prefs, | 680 | int *info_level, struct list_head *prefs, |
662 | u64 *total_refs) | 681 | u64 *total_refs, u64 inum) |
663 | { | 682 | { |
664 | int ret = 0; | 683 | int ret = 0; |
665 | int slot; | 684 | int slot; |
@@ -744,6 +763,12 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
744 | dref); | 763 | dref); |
745 | key.type = BTRFS_EXTENT_DATA_KEY; | 764 | key.type = BTRFS_EXTENT_DATA_KEY; |
746 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 765 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
766 | |||
767 | if (inum && key.objectid != inum) { | ||
768 | ret = BACKREF_FOUND_SHARED; | ||
769 | break; | ||
770 | } | ||
771 | |||
747 | root = btrfs_extent_data_ref_root(leaf, dref); | 772 | root = btrfs_extent_data_ref_root(leaf, dref); |
748 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 773 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
749 | bytenr, count, GFP_NOFS); | 774 | bytenr, count, GFP_NOFS); |
@@ -765,7 +790,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
765 | */ | 790 | */ |
766 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | 791 | static int __add_keyed_refs(struct btrfs_fs_info *fs_info, |
767 | struct btrfs_path *path, u64 bytenr, | 792 | struct btrfs_path *path, u64 bytenr, |
768 | int info_level, struct list_head *prefs) | 793 | int info_level, struct list_head *prefs, u64 inum) |
769 | { | 794 | { |
770 | struct btrfs_root *extent_root = fs_info->extent_root; | 795 | struct btrfs_root *extent_root = fs_info->extent_root; |
771 | int ret; | 796 | int ret; |
@@ -827,6 +852,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
827 | dref); | 852 | dref); |
828 | key.type = BTRFS_EXTENT_DATA_KEY; | 853 | key.type = BTRFS_EXTENT_DATA_KEY; |
829 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); | 854 | key.offset = btrfs_extent_data_ref_offset(leaf, dref); |
855 | |||
856 | if (inum && key.objectid != inum) { | ||
857 | ret = BACKREF_FOUND_SHARED; | ||
858 | break; | ||
859 | } | ||
860 | |||
830 | root = btrfs_extent_data_ref_root(leaf, dref); | 861 | root = btrfs_extent_data_ref_root(leaf, dref); |
831 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, | 862 | ret = __add_prelim_ref(prefs, root, &key, 0, 0, |
832 | bytenr, count, GFP_NOFS); | 863 | bytenr, count, GFP_NOFS); |
@@ -854,7 +885,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
854 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 885 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
855 | struct btrfs_fs_info *fs_info, u64 bytenr, | 886 | struct btrfs_fs_info *fs_info, u64 bytenr, |
856 | u64 time_seq, struct ulist *refs, | 887 | u64 time_seq, struct ulist *refs, |
857 | struct ulist *roots, const u64 *extent_item_pos) | 888 | struct ulist *roots, const u64 *extent_item_pos, |
889 | u64 root_objectid, u64 inum) | ||
858 | { | 890 | { |
859 | struct btrfs_key key; | 891 | struct btrfs_key key; |
860 | struct btrfs_path *path; | 892 | struct btrfs_path *path; |
@@ -929,7 +961,8 @@ again: | |||
929 | } | 961 | } |
930 | spin_unlock(&delayed_refs->lock); | 962 | spin_unlock(&delayed_refs->lock); |
931 | ret = __add_delayed_refs(head, time_seq, | 963 | ret = __add_delayed_refs(head, time_seq, |
932 | &prefs_delayed, &total_refs); | 964 | &prefs_delayed, &total_refs, |
965 | inum); | ||
933 | mutex_unlock(&head->mutex); | 966 | mutex_unlock(&head->mutex); |
934 | if (ret) | 967 | if (ret) |
935 | goto out; | 968 | goto out; |
@@ -951,11 +984,11 @@ again: | |||
951 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 984 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
952 | ret = __add_inline_refs(fs_info, path, bytenr, | 985 | ret = __add_inline_refs(fs_info, path, bytenr, |
953 | &info_level, &prefs, | 986 | &info_level, &prefs, |
954 | &total_refs); | 987 | &total_refs, inum); |
955 | if (ret) | 988 | if (ret) |
956 | goto out; | 989 | goto out; |
957 | ret = __add_keyed_refs(fs_info, path, bytenr, | 990 | ret = __add_keyed_refs(fs_info, path, bytenr, |
958 | info_level, &prefs); | 991 | info_level, &prefs, inum); |
959 | if (ret) | 992 | if (ret) |
960 | goto out; | 993 | goto out; |
961 | } | 994 | } |
@@ -971,7 +1004,8 @@ again: | |||
971 | __merge_refs(&prefs, 1); | 1004 | __merge_refs(&prefs, 1); |
972 | 1005 | ||
973 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 1006 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
974 | extent_item_pos, total_refs); | 1007 | extent_item_pos, total_refs, |
1008 | root_objectid); | ||
975 | if (ret) | 1009 | if (ret) |
976 | goto out; | 1010 | goto out; |
977 | 1011 | ||
@@ -981,6 +1015,11 @@ again: | |||
981 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 1015 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
982 | WARN_ON(ref->count < 0); | 1016 | WARN_ON(ref->count < 0); |
983 | if (roots && ref->count && ref->root_id && ref->parent == 0) { | 1017 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
1018 | if (root_objectid && ref->root_id != root_objectid) { | ||
1019 | ret = BACKREF_FOUND_SHARED; | ||
1020 | goto out; | ||
1021 | } | ||
1022 | |||
984 | /* no parent == root of tree */ | 1023 | /* no parent == root of tree */ |
985 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 1024 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
986 | if (ret < 0) | 1025 | if (ret < 0) |
@@ -989,12 +1028,10 @@ again: | |||
989 | if (ref->count && ref->parent) { | 1028 | if (ref->count && ref->parent) { |
990 | if (extent_item_pos && !ref->inode_list && | 1029 | if (extent_item_pos && !ref->inode_list && |
991 | ref->level == 0) { | 1030 | ref->level == 0) { |
992 | u32 bsz; | ||
993 | struct extent_buffer *eb; | 1031 | struct extent_buffer *eb; |
994 | bsz = btrfs_level_size(fs_info->extent_root, | 1032 | |
995 | ref->level); | ||
996 | eb = read_tree_block(fs_info->extent_root, | 1033 | eb = read_tree_block(fs_info->extent_root, |
997 | ref->parent, bsz, 0); | 1034 | ref->parent, 0); |
998 | if (!eb || !extent_buffer_uptodate(eb)) { | 1035 | if (!eb || !extent_buffer_uptodate(eb)) { |
999 | free_extent_buffer(eb); | 1036 | free_extent_buffer(eb); |
1000 | ret = -EIO; | 1037 | ret = -EIO; |
@@ -1087,7 +1124,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1087 | return -ENOMEM; | 1124 | return -ENOMEM; |
1088 | 1125 | ||
1089 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1126 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1090 | time_seq, *leafs, NULL, extent_item_pos); | 1127 | time_seq, *leafs, NULL, extent_item_pos, 0, 0); |
1091 | if (ret < 0 && ret != -ENOENT) { | 1128 | if (ret < 0 && ret != -ENOENT) { |
1092 | free_leaf_list(*leafs); | 1129 | free_leaf_list(*leafs); |
1093 | return ret; | 1130 | return ret; |
@@ -1130,7 +1167,7 @@ static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
1130 | ULIST_ITER_INIT(&uiter); | 1167 | ULIST_ITER_INIT(&uiter); |
1131 | while (1) { | 1168 | while (1) { |
1132 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1169 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1133 | time_seq, tmp, *roots, NULL); | 1170 | time_seq, tmp, *roots, NULL, 0, 0); |
1134 | if (ret < 0 && ret != -ENOENT) { | 1171 | if (ret < 0 && ret != -ENOENT) { |
1135 | ulist_free(tmp); | 1172 | ulist_free(tmp); |
1136 | ulist_free(*roots); | 1173 | ulist_free(*roots); |
@@ -1161,6 +1198,54 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | |||
1161 | return ret; | 1198 | return ret; |
1162 | } | 1199 | } |
1163 | 1200 | ||
1201 | int btrfs_check_shared(struct btrfs_trans_handle *trans, | ||
1202 | struct btrfs_fs_info *fs_info, u64 root_objectid, | ||
1203 | u64 inum, u64 bytenr) | ||
1204 | { | ||
1205 | struct ulist *tmp = NULL; | ||
1206 | struct ulist *roots = NULL; | ||
1207 | struct ulist_iterator uiter; | ||
1208 | struct ulist_node *node; | ||
1209 | struct seq_list elem = {}; | ||
1210 | int ret = 0; | ||
1211 | |||
1212 | tmp = ulist_alloc(GFP_NOFS); | ||
1213 | roots = ulist_alloc(GFP_NOFS); | ||
1214 | if (!tmp || !roots) { | ||
1215 | ulist_free(tmp); | ||
1216 | ulist_free(roots); | ||
1217 | return -ENOMEM; | ||
1218 | } | ||
1219 | |||
1220 | if (trans) | ||
1221 | btrfs_get_tree_mod_seq(fs_info, &elem); | ||
1222 | else | ||
1223 | down_read(&fs_info->commit_root_sem); | ||
1224 | ULIST_ITER_INIT(&uiter); | ||
1225 | while (1) { | ||
1226 | ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, | ||
1227 | roots, NULL, root_objectid, inum); | ||
1228 | if (ret == BACKREF_FOUND_SHARED) { | ||
1229 | ret = 1; | ||
1230 | break; | ||
1231 | } | ||
1232 | if (ret < 0 && ret != -ENOENT) | ||
1233 | break; | ||
1234 | node = ulist_next(tmp, &uiter); | ||
1235 | if (!node) | ||
1236 | break; | ||
1237 | bytenr = node->val; | ||
1238 | cond_resched(); | ||
1239 | } | ||
1240 | if (trans) | ||
1241 | btrfs_put_tree_mod_seq(fs_info, &elem); | ||
1242 | else | ||
1243 | up_read(&fs_info->commit_root_sem); | ||
1244 | ulist_free(tmp); | ||
1245 | ulist_free(roots); | ||
1246 | return ret; | ||
1247 | } | ||
1248 | |||
1164 | /* | 1249 | /* |
1165 | * this makes the path point to (inum INODE_ITEM ioff) | 1250 | * this makes the path point to (inum INODE_ITEM ioff) |
1166 | */ | 1251 | */ |
@@ -1193,7 +1278,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
1193 | unsigned long ptr; | 1278 | unsigned long ptr; |
1194 | 1279 | ||
1195 | key.objectid = inode_objectid; | 1280 | key.objectid = inode_objectid; |
1196 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | 1281 | key.type = BTRFS_INODE_EXTREF_KEY; |
1197 | key.offset = start_off; | 1282 | key.offset = start_off; |
1198 | 1283 | ||
1199 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 1284 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -1233,7 +1318,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
1233 | ret = -ENOENT; | 1318 | ret = -ENOENT; |
1234 | if (found_key.objectid != inode_objectid) | 1319 | if (found_key.objectid != inode_objectid) |
1235 | break; | 1320 | break; |
1236 | if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) | 1321 | if (found_key.type != BTRFS_INODE_EXTREF_KEY) |
1237 | break; | 1322 | break; |
1238 | 1323 | ||
1239 | ret = 0; | 1324 | ret = 0; |
@@ -1366,7 +1451,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1366 | } | 1451 | } |
1367 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | 1452 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); |
1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1453 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
1369 | size = fs_info->extent_root->leafsize; | 1454 | size = fs_info->extent_root->nodesize; |
1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1455 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
1371 | size = found_key->offset; | 1456 | size = found_key->offset; |
1372 | 1457 | ||
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 86fc20fec282..2a1ac6bfc724 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -71,6 +71,9 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, | |||
71 | u64 start_off, struct btrfs_path *path, | 71 | u64 start_off, struct btrfs_path *path, |
72 | struct btrfs_inode_extref **ret_extref, | 72 | struct btrfs_inode_extref **ret_extref, |
73 | u64 *found_off); | 73 | u64 *found_off); |
74 | int btrfs_check_shared(struct btrfs_trans_handle *trans, | ||
75 | struct btrfs_fs_info *fs_info, u64 root_objectid, | ||
76 | u64 inum, u64 bytenr); | ||
74 | 77 | ||
75 | int __init btrfs_prelim_ref_init(void); | 78 | int __init btrfs_prelim_ref_init(void); |
76 | void btrfs_prelim_ref_exit(void); | 79 | void btrfs_prelim_ref_exit(void); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 56b8522d5767..4aadadcfab20 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -44,6 +44,17 @@ | |||
44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 | 44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 |
45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 | 45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 |
46 | #define BTRFS_INODE_HAS_PROPS 11 | 46 | #define BTRFS_INODE_HAS_PROPS 11 |
47 | /* | ||
48 | * The following 3 bits are meant only for the btree inode. | ||
49 | * When any of them is set, it means an error happened while writing an | ||
50 | * extent buffer belonging to: | ||
51 | * 1) a non-log btree | ||
52 | * 2) a log btree and first log sub-transaction | ||
53 | * 3) a log btree and second log sub-transaction | ||
54 | */ | ||
55 | #define BTRFS_INODE_BTREE_ERR 12 | ||
56 | #define BTRFS_INODE_BTREE_LOG1_ERR 13 | ||
57 | #define BTRFS_INODE_BTREE_LOG2_ERR 14 | ||
47 | 58 | ||
48 | /* in memory btrfs inode */ | 59 | /* in memory btrfs inode */ |
49 | struct btrfs_inode { | 60 | struct btrfs_inode { |
@@ -121,6 +132,12 @@ struct btrfs_inode { | |||
121 | u64 delalloc_bytes; | 132 | u64 delalloc_bytes; |
122 | 133 | ||
123 | /* | 134 | /* |
135 | * total number of bytes pending defrag, used by stat to check whether | ||
136 | * it needs COW. | ||
137 | */ | ||
138 | u64 defrag_bytes; | ||
139 | |||
140 | /* | ||
124 | * the size of the file stored in the metadata on disk. data=ordered | 141 | * the size of the file stored in the metadata on disk. data=ordered |
125 | * means the in-memory i_size might be larger than the size on disk | 142 | * means the in-memory i_size might be larger than the size on disk |
126 | * because not all the blocks are written yet. | 143 | * because not all the blocks are written yet. |
@@ -248,8 +265,11 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | |||
248 | return 0; | 265 | return 0; |
249 | } | 266 | } |
250 | 267 | ||
268 | #define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 | ||
269 | |||
251 | struct btrfs_dio_private { | 270 | struct btrfs_dio_private { |
252 | struct inode *inode; | 271 | struct inode *inode; |
272 | unsigned long flags; | ||
253 | u64 logical_offset; | 273 | u64 logical_offset; |
254 | u64 disk_bytenr; | 274 | u64 disk_bytenr; |
255 | u64 bytes; | 275 | u64 bytes; |
@@ -266,7 +286,12 @@ struct btrfs_dio_private { | |||
266 | 286 | ||
267 | /* dio_bio came from fs/direct-io.c */ | 287 | /* dio_bio came from fs/direct-io.c */ |
268 | struct bio *dio_bio; | 288 | struct bio *dio_bio; |
269 | u8 csum[0]; | 289 | |
290 | /* | ||
291 | * The original bio may be splited to several sub-bios, this is | ||
292 | * done during endio of sub-bios | ||
293 | */ | ||
294 | int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int); | ||
270 | }; | 295 | }; |
271 | 296 | ||
272 | /* | 297 | /* |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ce92ae30250f..cb7f3fe9c9f6 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -807,7 +807,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
807 | 807 | ||
808 | /* super block bytenr is always the unmapped device bytenr */ | 808 | /* super block bytenr is always the unmapped device bytenr */ |
809 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); | 809 | dev_bytenr = btrfs_sb_offset(superblock_mirror_num); |
810 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) | 810 | if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) |
811 | return -1; | 811 | return -1; |
812 | bh = __bread(superblock_bdev, dev_bytenr / 4096, | 812 | bh = __bread(superblock_bdev, dev_bytenr / 4096, |
813 | BTRFS_SUPER_INFO_SIZE); | 813 | BTRFS_SUPER_INFO_SIZE); |
@@ -820,7 +820,6 @@ static int btrfsic_process_superblock_dev_mirror( | |||
820 | btrfs_super_magic(super_tmp) != BTRFS_MAGIC || | 820 | btrfs_super_magic(super_tmp) != BTRFS_MAGIC || |
821 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || | 821 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || |
822 | btrfs_super_nodesize(super_tmp) != state->metablock_size || | 822 | btrfs_super_nodesize(super_tmp) != state->metablock_size || |
823 | btrfs_super_leafsize(super_tmp) != state->metablock_size || | ||
824 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { | 823 | btrfs_super_sectorsize(super_tmp) != state->datablock_size) { |
825 | brelse(bh); | 824 | brelse(bh); |
826 | return 0; | 825 | return 0; |
@@ -1252,8 +1251,7 @@ static void btrfsic_read_from_block_data( | |||
1252 | 1251 | ||
1253 | while (len > 0) { | 1252 | while (len > 0) { |
1254 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); | 1253 | cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); |
1255 | BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> | 1254 | BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE)); |
1256 | PAGE_CACHE_SHIFT); | ||
1257 | kaddr = block_ctx->datav[i]; | 1255 | kaddr = block_ctx->datav[i]; |
1258 | memcpy(dst, kaddr + offset_in_page, cur); | 1256 | memcpy(dst, kaddr + offset_in_page, cur); |
1259 | 1257 | ||
@@ -3120,24 +3118,12 @@ int btrfsic_mount(struct btrfs_root *root, | |||
3120 | struct list_head *dev_head = &fs_devices->devices; | 3118 | struct list_head *dev_head = &fs_devices->devices; |
3121 | struct btrfs_device *device; | 3119 | struct btrfs_device *device; |
3122 | 3120 | ||
3123 | if (root->nodesize != root->leafsize) { | ||
3124 | printk(KERN_INFO | ||
3125 | "btrfsic: cannot handle nodesize %d != leafsize %d!\n", | ||
3126 | root->nodesize, root->leafsize); | ||
3127 | return -1; | ||
3128 | } | ||
3129 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { | 3121 | if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { |
3130 | printk(KERN_INFO | 3122 | printk(KERN_INFO |
3131 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | 3123 | "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", |
3132 | root->nodesize, PAGE_CACHE_SIZE); | 3124 | root->nodesize, PAGE_CACHE_SIZE); |
3133 | return -1; | 3125 | return -1; |
3134 | } | 3126 | } |
3135 | if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { | ||
3136 | printk(KERN_INFO | ||
3137 | "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | ||
3138 | root->leafsize, PAGE_CACHE_SIZE); | ||
3139 | return -1; | ||
3140 | } | ||
3141 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { | 3127 | if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { |
3142 | printk(KERN_INFO | 3128 | printk(KERN_INFO |
3143 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", | 3129 | "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 1daea0b47187..d3220d31d3cb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -91,8 +91,7 @@ static inline int compressed_bio_size(struct btrfs_root *root, | |||
91 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 91 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
92 | 92 | ||
93 | return sizeof(struct compressed_bio) + | 93 | return sizeof(struct compressed_bio) + |
94 | ((disk_size + root->sectorsize - 1) / root->sectorsize) * | 94 | (DIV_ROUND_UP(disk_size, root->sectorsize)) * csum_size; |
95 | csum_size; | ||
96 | } | 95 | } |
97 | 96 | ||
98 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | 97 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
@@ -389,7 +388,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
389 | * freed before we're done setting it up | 388 | * freed before we're done setting it up |
390 | */ | 389 | */ |
391 | atomic_inc(&cb->pending_bios); | 390 | atomic_inc(&cb->pending_bios); |
392 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 391 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, |
392 | BTRFS_WQ_ENDIO_DATA); | ||
393 | BUG_ON(ret); /* -ENOMEM */ | 393 | BUG_ON(ret); /* -ENOMEM */ |
394 | 394 | ||
395 | if (!skip_sum) { | 395 | if (!skip_sum) { |
@@ -420,7 +420,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
420 | } | 420 | } |
421 | bio_get(bio); | 421 | bio_get(bio); |
422 | 422 | ||
423 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 423 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, BTRFS_WQ_ENDIO_DATA); |
424 | BUG_ON(ret); /* -ENOMEM */ | 424 | BUG_ON(ret); /* -ENOMEM */ |
425 | 425 | ||
426 | if (!skip_sum) { | 426 | if (!skip_sum) { |
@@ -615,8 +615,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
615 | cb->compress_type = extent_compress_type(bio_flags); | 615 | cb->compress_type = extent_compress_type(bio_flags); |
616 | cb->orig_bio = bio; | 616 | cb->orig_bio = bio; |
617 | 617 | ||
618 | nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) / | 618 | nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); |
619 | PAGE_CACHE_SIZE; | ||
620 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, | 619 | cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, |
621 | GFP_NOFS); | 620 | GFP_NOFS); |
622 | if (!cb->compressed_pages) | 621 | if (!cb->compressed_pages) |
@@ -670,7 +669,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
670 | PAGE_CACHE_SIZE) { | 669 | PAGE_CACHE_SIZE) { |
671 | bio_get(comp_bio); | 670 | bio_get(comp_bio); |
672 | 671 | ||
673 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 672 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, |
673 | BTRFS_WQ_ENDIO_DATA); | ||
674 | BUG_ON(ret); /* -ENOMEM */ | 674 | BUG_ON(ret); /* -ENOMEM */ |
675 | 675 | ||
676 | /* | 676 | /* |
@@ -686,8 +686,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
686 | comp_bio, sums); | 686 | comp_bio, sums); |
687 | BUG_ON(ret); /* -ENOMEM */ | 687 | BUG_ON(ret); /* -ENOMEM */ |
688 | } | 688 | } |
689 | sums += (comp_bio->bi_iter.bi_size + | 689 | sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size, |
690 | root->sectorsize - 1) / root->sectorsize; | 690 | root->sectorsize); |
691 | 691 | ||
692 | ret = btrfs_map_bio(root, READ, comp_bio, | 692 | ret = btrfs_map_bio(root, READ, comp_bio, |
693 | mirror_num, 0); | 693 | mirror_num, 0); |
@@ -708,7 +708,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
708 | } | 708 | } |
709 | bio_get(comp_bio); | 709 | bio_get(comp_bio); |
710 | 710 | ||
711 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); | 711 | ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, |
712 | BTRFS_WQ_ENDIO_DATA); | ||
712 | BUG_ON(ret); /* -ENOMEM */ | 713 | BUG_ON(ret); /* -ENOMEM */ |
713 | 714 | ||
714 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 715 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 44ee5d2e52a4..19bc6162fb8e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -258,9 +258,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
258 | else | 258 | else |
259 | btrfs_node_key(buf, &disk_key, 0); | 259 | btrfs_node_key(buf, &disk_key, 0); |
260 | 260 | ||
261 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 261 | cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, |
262 | new_root_objectid, &disk_key, level, | 262 | &disk_key, level, buf->start, 0); |
263 | buf->start, 0); | ||
264 | if (IS_ERR(cow)) | 263 | if (IS_ERR(cow)) |
265 | return PTR_ERR(cow); | 264 | return PTR_ERR(cow); |
266 | 265 | ||
@@ -1133,9 +1132,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
1133 | } else | 1132 | } else |
1134 | parent_start = 0; | 1133 | parent_start = 0; |
1135 | 1134 | ||
1136 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 1135 | cow = btrfs_alloc_tree_block(trans, root, parent_start, |
1137 | root->root_key.objectid, &disk_key, | 1136 | root->root_key.objectid, &disk_key, level, |
1138 | level, search_start, empty_size); | 1137 | search_start, empty_size); |
1139 | if (IS_ERR(cow)) | 1138 | if (IS_ERR(cow)) |
1140 | return PTR_ERR(cow); | 1139 | return PTR_ERR(cow); |
1141 | 1140 | ||
@@ -1425,7 +1424,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
1425 | struct tree_mod_root *old_root = NULL; | 1424 | struct tree_mod_root *old_root = NULL; |
1426 | u64 old_generation = 0; | 1425 | u64 old_generation = 0; |
1427 | u64 logical; | 1426 | u64 logical; |
1428 | u32 blocksize; | ||
1429 | 1427 | ||
1430 | eb_root = btrfs_read_lock_root_node(root); | 1428 | eb_root = btrfs_read_lock_root_node(root); |
1431 | tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq); | 1429 | tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq); |
@@ -1444,8 +1442,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) | |||
1444 | if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | 1442 | if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { |
1445 | btrfs_tree_read_unlock(eb_root); | 1443 | btrfs_tree_read_unlock(eb_root); |
1446 | free_extent_buffer(eb_root); | 1444 | free_extent_buffer(eb_root); |
1447 | blocksize = btrfs_level_size(root, old_root->level); | 1445 | old = read_tree_block(root, logical, 0); |
1448 | old = read_tree_block(root, logical, blocksize, 0); | ||
1449 | if (WARN_ON(!old || !extent_buffer_uptodate(old))) { | 1446 | if (WARN_ON(!old || !extent_buffer_uptodate(old))) { |
1450 | free_extent_buffer(old); | 1447 | free_extent_buffer(old); |
1451 | btrfs_warn(root->fs_info, | 1448 | btrfs_warn(root->fs_info, |
@@ -1506,10 +1503,9 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
1506 | struct btrfs_root *root, | 1503 | struct btrfs_root *root, |
1507 | struct extent_buffer *buf) | 1504 | struct extent_buffer *buf) |
1508 | { | 1505 | { |
1509 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1506 | if (btrfs_test_is_dummy_root(root)) |
1510 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
1511 | return 0; | 1507 | return 0; |
1512 | #endif | 1508 | |
1513 | /* ensure we can see the force_cow */ | 1509 | /* ensure we can see the force_cow */ |
1514 | smp_rmb(); | 1510 | smp_rmb(); |
1515 | 1511 | ||
@@ -1651,7 +1647,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1651 | WARN_ON(trans->transid != root->fs_info->generation); | 1647 | WARN_ON(trans->transid != root->fs_info->generation); |
1652 | 1648 | ||
1653 | parent_nritems = btrfs_header_nritems(parent); | 1649 | parent_nritems = btrfs_header_nritems(parent); |
1654 | blocksize = btrfs_level_size(root, parent_level - 1); | 1650 | blocksize = root->nodesize; |
1655 | end_slot = parent_nritems; | 1651 | end_slot = parent_nritems; |
1656 | 1652 | ||
1657 | if (parent_nritems == 1) | 1653 | if (parent_nritems == 1) |
@@ -1685,15 +1681,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1685 | continue; | 1681 | continue; |
1686 | } | 1682 | } |
1687 | 1683 | ||
1688 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 1684 | cur = btrfs_find_tree_block(root, blocknr); |
1689 | if (cur) | 1685 | if (cur) |
1690 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); | 1686 | uptodate = btrfs_buffer_uptodate(cur, gen, 0); |
1691 | else | 1687 | else |
1692 | uptodate = 0; | 1688 | uptodate = 0; |
1693 | if (!cur || !uptodate) { | 1689 | if (!cur || !uptodate) { |
1694 | if (!cur) { | 1690 | if (!cur) { |
1695 | cur = read_tree_block(root, blocknr, | 1691 | cur = read_tree_block(root, blocknr, gen); |
1696 | blocksize, gen); | ||
1697 | if (!cur || !extent_buffer_uptodate(cur)) { | 1692 | if (!cur || !extent_buffer_uptodate(cur)) { |
1698 | free_extent_buffer(cur); | 1693 | free_extent_buffer(cur); |
1699 | return -EIO; | 1694 | return -EIO; |
@@ -1872,7 +1867,6 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, | |||
1872 | BUG_ON(level == 0); | 1867 | BUG_ON(level == 0); |
1873 | 1868 | ||
1874 | eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), | 1869 | eb = read_tree_block(root, btrfs_node_blockptr(parent, slot), |
1875 | btrfs_level_size(root, level - 1), | ||
1876 | btrfs_node_ptr_generation(parent, slot)); | 1870 | btrfs_node_ptr_generation(parent, slot)); |
1877 | if (eb && !extent_buffer_uptodate(eb)) { | 1871 | if (eb && !extent_buffer_uptodate(eb)) { |
1878 | free_extent_buffer(eb); | 1872 | free_extent_buffer(eb); |
@@ -2267,8 +2261,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
2267 | node = path->nodes[level]; | 2261 | node = path->nodes[level]; |
2268 | 2262 | ||
2269 | search = btrfs_node_blockptr(node, slot); | 2263 | search = btrfs_node_blockptr(node, slot); |
2270 | blocksize = btrfs_level_size(root, level - 1); | 2264 | blocksize = root->nodesize; |
2271 | eb = btrfs_find_tree_block(root, search, blocksize); | 2265 | eb = btrfs_find_tree_block(root, search); |
2272 | if (eb) { | 2266 | if (eb) { |
2273 | free_extent_buffer(eb); | 2267 | free_extent_buffer(eb); |
2274 | return; | 2268 | return; |
@@ -2298,7 +2292,7 @@ static void reada_for_search(struct btrfs_root *root, | |||
2298 | if ((search <= target && target - search <= 65536) || | 2292 | if ((search <= target && target - search <= 65536) || |
2299 | (search > target && search - target <= 65536)) { | 2293 | (search > target && search - target <= 65536)) { |
2300 | gen = btrfs_node_ptr_generation(node, nr); | 2294 | gen = btrfs_node_ptr_generation(node, nr); |
2301 | readahead_tree_block(root, search, blocksize, gen); | 2295 | readahead_tree_block(root, search, blocksize); |
2302 | nread += blocksize; | 2296 | nread += blocksize; |
2303 | } | 2297 | } |
2304 | nscan++; | 2298 | nscan++; |
@@ -2325,12 +2319,12 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2325 | 2319 | ||
2326 | nritems = btrfs_header_nritems(parent); | 2320 | nritems = btrfs_header_nritems(parent); |
2327 | slot = path->slots[level + 1]; | 2321 | slot = path->slots[level + 1]; |
2328 | blocksize = btrfs_level_size(root, level); | 2322 | blocksize = root->nodesize; |
2329 | 2323 | ||
2330 | if (slot > 0) { | 2324 | if (slot > 0) { |
2331 | block1 = btrfs_node_blockptr(parent, slot - 1); | 2325 | block1 = btrfs_node_blockptr(parent, slot - 1); |
2332 | gen = btrfs_node_ptr_generation(parent, slot - 1); | 2326 | gen = btrfs_node_ptr_generation(parent, slot - 1); |
2333 | eb = btrfs_find_tree_block(root, block1, blocksize); | 2327 | eb = btrfs_find_tree_block(root, block1); |
2334 | /* | 2328 | /* |
2335 | * if we get -eagain from btrfs_buffer_uptodate, we | 2329 | * if we get -eagain from btrfs_buffer_uptodate, we |
2336 | * don't want to return eagain here. That will loop | 2330 | * don't want to return eagain here. That will loop |
@@ -2343,16 +2337,16 @@ static noinline void reada_for_balance(struct btrfs_root *root, | |||
2343 | if (slot + 1 < nritems) { | 2337 | if (slot + 1 < nritems) { |
2344 | block2 = btrfs_node_blockptr(parent, slot + 1); | 2338 | block2 = btrfs_node_blockptr(parent, slot + 1); |
2345 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 2339 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
2346 | eb = btrfs_find_tree_block(root, block2, blocksize); | 2340 | eb = btrfs_find_tree_block(root, block2); |
2347 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) | 2341 | if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) |
2348 | block2 = 0; | 2342 | block2 = 0; |
2349 | free_extent_buffer(eb); | 2343 | free_extent_buffer(eb); |
2350 | } | 2344 | } |
2351 | 2345 | ||
2352 | if (block1) | 2346 | if (block1) |
2353 | readahead_tree_block(root, block1, blocksize, 0); | 2347 | readahead_tree_block(root, block1, blocksize); |
2354 | if (block2) | 2348 | if (block2) |
2355 | readahead_tree_block(root, block2, blocksize, 0); | 2349 | readahead_tree_block(root, block2, blocksize); |
2356 | } | 2350 | } |
2357 | 2351 | ||
2358 | 2352 | ||
@@ -2454,16 +2448,14 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
2454 | { | 2448 | { |
2455 | u64 blocknr; | 2449 | u64 blocknr; |
2456 | u64 gen; | 2450 | u64 gen; |
2457 | u32 blocksize; | ||
2458 | struct extent_buffer *b = *eb_ret; | 2451 | struct extent_buffer *b = *eb_ret; |
2459 | struct extent_buffer *tmp; | 2452 | struct extent_buffer *tmp; |
2460 | int ret; | 2453 | int ret; |
2461 | 2454 | ||
2462 | blocknr = btrfs_node_blockptr(b, slot); | 2455 | blocknr = btrfs_node_blockptr(b, slot); |
2463 | gen = btrfs_node_ptr_generation(b, slot); | 2456 | gen = btrfs_node_ptr_generation(b, slot); |
2464 | blocksize = btrfs_level_size(root, level - 1); | ||
2465 | 2457 | ||
2466 | tmp = btrfs_find_tree_block(root, blocknr, blocksize); | 2458 | tmp = btrfs_find_tree_block(root, blocknr); |
2467 | if (tmp) { | 2459 | if (tmp) { |
2468 | /* first we do an atomic uptodate check */ | 2460 | /* first we do an atomic uptodate check */ |
2469 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | 2461 | if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { |
@@ -2507,7 +2499,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
2507 | btrfs_release_path(p); | 2499 | btrfs_release_path(p); |
2508 | 2500 | ||
2509 | ret = -EAGAIN; | 2501 | ret = -EAGAIN; |
2510 | tmp = read_tree_block(root, blocknr, blocksize, 0); | 2502 | tmp = read_tree_block(root, blocknr, 0); |
2511 | if (tmp) { | 2503 | if (tmp) { |
2512 | /* | 2504 | /* |
2513 | * If the read above didn't mark this buffer up to date, | 2505 | * If the read above didn't mark this buffer up to date, |
@@ -2792,8 +2784,6 @@ again: | |||
2792 | if (!should_cow_block(trans, root, b)) | 2784 | if (!should_cow_block(trans, root, b)) |
2793 | goto cow_done; | 2785 | goto cow_done; |
2794 | 2786 | ||
2795 | btrfs_set_path_blocking(p); | ||
2796 | |||
2797 | /* | 2787 | /* |
2798 | * must have write locks on this node and the | 2788 | * must have write locks on this node and the |
2799 | * parent | 2789 | * parent |
@@ -2807,6 +2797,7 @@ again: | |||
2807 | goto again; | 2797 | goto again; |
2808 | } | 2798 | } |
2809 | 2799 | ||
2800 | btrfs_set_path_blocking(p); | ||
2810 | err = btrfs_cow_block(trans, root, b, | 2801 | err = btrfs_cow_block(trans, root, b, |
2811 | p->nodes[level + 1], | 2802 | p->nodes[level + 1], |
2812 | p->slots[level + 1], &b); | 2803 | p->slots[level + 1], &b); |
@@ -3362,9 +3353,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
3362 | else | 3353 | else |
3363 | btrfs_node_key(lower, &lower_key, 0); | 3354 | btrfs_node_key(lower, &lower_key, 0); |
3364 | 3355 | ||
3365 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3356 | c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, |
3366 | root->root_key.objectid, &lower_key, | 3357 | &lower_key, level, root->node->start, 0); |
3367 | level, root->node->start, 0); | ||
3368 | if (IS_ERR(c)) | 3358 | if (IS_ERR(c)) |
3369 | return PTR_ERR(c); | 3359 | return PTR_ERR(c); |
3370 | 3360 | ||
@@ -3502,9 +3492,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
3502 | mid = (c_nritems + 1) / 2; | 3492 | mid = (c_nritems + 1) / 2; |
3503 | btrfs_node_key(c, &disk_key, mid); | 3493 | btrfs_node_key(c, &disk_key, mid); |
3504 | 3494 | ||
3505 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3495 | split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, |
3506 | root->root_key.objectid, | 3496 | &disk_key, level, c->start, 0); |
3507 | &disk_key, level, c->start, 0); | ||
3508 | if (IS_ERR(split)) | 3497 | if (IS_ERR(split)) |
3509 | return PTR_ERR(split); | 3498 | return PTR_ERR(split); |
3510 | 3499 | ||
@@ -4282,13 +4271,12 @@ again: | |||
4282 | else | 4271 | else |
4283 | btrfs_item_key(l, &disk_key, mid); | 4272 | btrfs_item_key(l, &disk_key, mid); |
4284 | 4273 | ||
4285 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 4274 | right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, |
4286 | root->root_key.objectid, | 4275 | &disk_key, 0, l->start, 0); |
4287 | &disk_key, 0, l->start, 0); | ||
4288 | if (IS_ERR(right)) | 4276 | if (IS_ERR(right)) |
4289 | return PTR_ERR(right); | 4277 | return PTR_ERR(right); |
4290 | 4278 | ||
4291 | root_add_used(root, root->leafsize); | 4279 | root_add_used(root, root->nodesize); |
4292 | 4280 | ||
4293 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 4281 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
4294 | btrfs_set_header_bytenr(right, right->start); | 4282 | btrfs_set_header_bytenr(right, right->start); |
@@ -4626,8 +4614,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, | |||
4626 | ptr = btrfs_item_ptr_offset(leaf, slot); | 4614 | ptr = btrfs_item_ptr_offset(leaf, slot); |
4627 | memmove_extent_buffer(leaf, ptr, | 4615 | memmove_extent_buffer(leaf, ptr, |
4628 | (unsigned long)fi, | 4616 | (unsigned long)fi, |
4629 | offsetof(struct btrfs_file_extent_item, | 4617 | BTRFS_FILE_EXTENT_INLINE_DATA_START); |
4630 | disk_bytenr)); | ||
4631 | } | 4618 | } |
4632 | } | 4619 | } |
4633 | 4620 | ||
@@ -4738,6 +4725,12 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | |||
4738 | int slot; | 4725 | int slot; |
4739 | struct btrfs_map_token token; | 4726 | struct btrfs_map_token token; |
4740 | 4727 | ||
4728 | if (path->slots[0] == 0) { | ||
4729 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
4730 | fixup_low_keys(root, path, &disk_key, 1); | ||
4731 | } | ||
4732 | btrfs_unlock_up_safe(path, 1); | ||
4733 | |||
4741 | btrfs_init_map_token(&token); | 4734 | btrfs_init_map_token(&token); |
4742 | 4735 | ||
4743 | leaf = path->nodes[0]; | 4736 | leaf = path->nodes[0]; |
@@ -4798,12 +4791,6 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, | |||
4798 | } | 4791 | } |
4799 | 4792 | ||
4800 | btrfs_set_header_nritems(leaf, nritems + nr); | 4793 | btrfs_set_header_nritems(leaf, nritems + nr); |
4801 | |||
4802 | if (slot == 0) { | ||
4803 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | ||
4804 | fixup_low_keys(root, path, &disk_key, 1); | ||
4805 | } | ||
4806 | btrfs_unlock_up_safe(path, 1); | ||
4807 | btrfs_mark_buffer_dirty(leaf); | 4794 | btrfs_mark_buffer_dirty(leaf); |
4808 | 4795 | ||
4809 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 4796 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
@@ -5145,8 +5132,9 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
5145 | u32 nritems; | 5132 | u32 nritems; |
5146 | int level; | 5133 | int level; |
5147 | int ret = 1; | 5134 | int ret = 1; |
5135 | int keep_locks = path->keep_locks; | ||
5148 | 5136 | ||
5149 | WARN_ON(!path->keep_locks); | 5137 | path->keep_locks = 1; |
5150 | again: | 5138 | again: |
5151 | cur = btrfs_read_lock_root_node(root); | 5139 | cur = btrfs_read_lock_root_node(root); |
5152 | level = btrfs_header_level(cur); | 5140 | level = btrfs_header_level(cur); |
@@ -5210,7 +5198,6 @@ find_next_key: | |||
5210 | path->slots[level] = slot; | 5198 | path->slots[level] = slot; |
5211 | if (level == path->lowest_level) { | 5199 | if (level == path->lowest_level) { |
5212 | ret = 0; | 5200 | ret = 0; |
5213 | unlock_up(path, level, 1, 0, NULL); | ||
5214 | goto out; | 5201 | goto out; |
5215 | } | 5202 | } |
5216 | btrfs_set_path_blocking(path); | 5203 | btrfs_set_path_blocking(path); |
@@ -5225,9 +5212,12 @@ find_next_key: | |||
5225 | btrfs_clear_path_blocking(path, NULL, 0); | 5212 | btrfs_clear_path_blocking(path, NULL, 0); |
5226 | } | 5213 | } |
5227 | out: | 5214 | out: |
5228 | if (ret == 0) | 5215 | path->keep_locks = keep_locks; |
5216 | if (ret == 0) { | ||
5217 | btrfs_unlock_up_safe(path, path->lowest_level + 1); | ||
5218 | btrfs_set_path_blocking(path); | ||
5229 | memcpy(min_key, &found_key, sizeof(found_key)); | 5219 | memcpy(min_key, &found_key, sizeof(found_key)); |
5230 | btrfs_set_path_blocking(path); | 5220 | } |
5231 | return ret; | 5221 | return ret; |
5232 | } | 5222 | } |
5233 | 5223 | ||
@@ -5375,7 +5365,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5375 | goto out; | 5365 | goto out; |
5376 | } | 5366 | } |
5377 | 5367 | ||
5378 | tmp_buf = kmalloc(left_root->leafsize, GFP_NOFS); | 5368 | tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); |
5379 | if (!tmp_buf) { | 5369 | if (!tmp_buf) { |
5380 | ret = -ENOMEM; | 5370 | ret = -ENOMEM; |
5381 | goto out; | 5371 | goto out; |
@@ -5520,18 +5510,18 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5520 | goto out; | 5510 | goto out; |
5521 | advance_right = ADVANCE; | 5511 | advance_right = ADVANCE; |
5522 | } else { | 5512 | } else { |
5523 | enum btrfs_compare_tree_result cmp; | 5513 | enum btrfs_compare_tree_result result; |
5524 | 5514 | ||
5525 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); | 5515 | WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); |
5526 | ret = tree_compare_item(left_root, left_path, | 5516 | ret = tree_compare_item(left_root, left_path, |
5527 | right_path, tmp_buf); | 5517 | right_path, tmp_buf); |
5528 | if (ret) | 5518 | if (ret) |
5529 | cmp = BTRFS_COMPARE_TREE_CHANGED; | 5519 | result = BTRFS_COMPARE_TREE_CHANGED; |
5530 | else | 5520 | else |
5531 | cmp = BTRFS_COMPARE_TREE_SAME; | 5521 | result = BTRFS_COMPARE_TREE_SAME; |
5532 | ret = changed_cb(left_root, right_root, | 5522 | ret = changed_cb(left_root, right_root, |
5533 | left_path, right_path, | 5523 | left_path, right_path, |
5534 | &left_key, cmp, ctx); | 5524 | &left_key, result, ctx); |
5535 | if (ret < 0) | 5525 | if (ret < 0) |
5536 | goto out; | 5526 | goto out; |
5537 | advance_left = ADVANCE; | 5527 | advance_left = ADVANCE; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8e29b614fe93..d557264ee974 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
35 | #include <linux/btrfs.h> | 35 | #include <linux/btrfs.h> |
36 | #include <linux/workqueue.h> | 36 | #include <linux/workqueue.h> |
37 | #include <linux/security.h> | ||
37 | #include "extent_io.h" | 38 | #include "extent_io.h" |
38 | #include "extent_map.h" | 39 | #include "extent_map.h" |
39 | #include "async-thread.h" | 40 | #include "async-thread.h" |
@@ -62,13 +63,6 @@ struct btrfs_ordered_sum; | |||
62 | 63 | ||
63 | #define BTRFS_COMPAT_EXTENT_TREE_V0 | 64 | #define BTRFS_COMPAT_EXTENT_TREE_V0 |
64 | 65 | ||
65 | /* | ||
66 | * files bigger than this get some pre-flushing when they are added | ||
67 | * to the ordered operations list. That way we limit the total | ||
68 | * work done by the commit | ||
69 | */ | ||
70 | #define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) | ||
71 | |||
72 | /* holds pointers to all of the tree roots */ | 66 | /* holds pointers to all of the tree roots */ |
73 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | 67 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL |
74 | 68 | ||
@@ -391,10 +385,12 @@ struct btrfs_header { | |||
391 | sizeof(struct btrfs_header)) / \ | 385 | sizeof(struct btrfs_header)) / \ |
392 | sizeof(struct btrfs_key_ptr)) | 386 | sizeof(struct btrfs_key_ptr)) |
393 | #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) | 387 | #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) |
394 | #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) | 388 | #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->nodesize)) |
389 | #define BTRFS_FILE_EXTENT_INLINE_DATA_START \ | ||
390 | (offsetof(struct btrfs_file_extent_item, disk_bytenr)) | ||
395 | #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ | 391 | #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ |
396 | sizeof(struct btrfs_item) - \ | 392 | sizeof(struct btrfs_item) - \ |
397 | sizeof(struct btrfs_file_extent_item)) | 393 | BTRFS_FILE_EXTENT_INLINE_DATA_START) |
398 | #define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ | 394 | #define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ |
399 | sizeof(struct btrfs_item) -\ | 395 | sizeof(struct btrfs_item) -\ |
400 | sizeof(struct btrfs_dir_item)) | 396 | sizeof(struct btrfs_dir_item)) |
@@ -474,7 +470,7 @@ struct btrfs_super_block { | |||
474 | __le64 num_devices; | 470 | __le64 num_devices; |
475 | __le32 sectorsize; | 471 | __le32 sectorsize; |
476 | __le32 nodesize; | 472 | __le32 nodesize; |
477 | __le32 leafsize; | 473 | __le32 __unused_leafsize; |
478 | __le32 stripesize; | 474 | __le32 stripesize; |
479 | __le32 sys_chunk_array_size; | 475 | __le32 sys_chunk_array_size; |
480 | __le64 chunk_root_generation; | 476 | __le64 chunk_root_generation; |
@@ -903,6 +899,8 @@ struct btrfs_file_extent_item { | |||
903 | /* | 899 | /* |
904 | * disk space consumed by the extent, checksum blocks are included | 900 | * disk space consumed by the extent, checksum blocks are included |
905 | * in these numbers | 901 | * in these numbers |
902 | * | ||
903 | * At this offset in the structure, the inline extent data start. | ||
906 | */ | 904 | */ |
907 | __le64 disk_bytenr; | 905 | __le64 disk_bytenr; |
908 | __le64 disk_num_bytes; | 906 | __le64 disk_num_bytes; |
@@ -1305,8 +1303,8 @@ struct btrfs_block_group_cache { | |||
1305 | */ | 1303 | */ |
1306 | struct list_head cluster_list; | 1304 | struct list_head cluster_list; |
1307 | 1305 | ||
1308 | /* For delayed block group creation */ | 1306 | /* For delayed block group creation or deletion of empty block groups */ |
1309 | struct list_head new_bg_list; | 1307 | struct list_head bg_list; |
1310 | }; | 1308 | }; |
1311 | 1309 | ||
1312 | /* delayed seq elem */ | 1310 | /* delayed seq elem */ |
@@ -1545,6 +1543,7 @@ struct btrfs_fs_info { | |||
1545 | struct btrfs_workqueue *endio_workers; | 1543 | struct btrfs_workqueue *endio_workers; |
1546 | struct btrfs_workqueue *endio_meta_workers; | 1544 | struct btrfs_workqueue *endio_meta_workers; |
1547 | struct btrfs_workqueue *endio_raid56_workers; | 1545 | struct btrfs_workqueue *endio_raid56_workers; |
1546 | struct btrfs_workqueue *endio_repair_workers; | ||
1548 | struct btrfs_workqueue *rmw_workers; | 1547 | struct btrfs_workqueue *rmw_workers; |
1549 | struct btrfs_workqueue *endio_meta_write_workers; | 1548 | struct btrfs_workqueue *endio_meta_write_workers; |
1550 | struct btrfs_workqueue *endio_write_workers; | 1549 | struct btrfs_workqueue *endio_write_workers; |
@@ -1574,6 +1573,7 @@ struct btrfs_fs_info { | |||
1574 | int do_barriers; | 1573 | int do_barriers; |
1575 | int closing; | 1574 | int closing; |
1576 | int log_root_recovering; | 1575 | int log_root_recovering; |
1576 | int open; | ||
1577 | 1577 | ||
1578 | u64 total_pinned; | 1578 | u64 total_pinned; |
1579 | 1579 | ||
@@ -1723,6 +1723,12 @@ struct btrfs_fs_info { | |||
1723 | 1723 | ||
1724 | /* Used to reclaim the metadata space in the background. */ | 1724 | /* Used to reclaim the metadata space in the background. */ |
1725 | struct work_struct async_reclaim_work; | 1725 | struct work_struct async_reclaim_work; |
1726 | |||
1727 | spinlock_t unused_bgs_lock; | ||
1728 | struct list_head unused_bgs; | ||
1729 | |||
1730 | /* For btrfs to record security options */ | ||
1731 | struct security_mnt_opts security_opts; | ||
1726 | }; | 1732 | }; |
1727 | 1733 | ||
1728 | struct btrfs_subvolume_writers { | 1734 | struct btrfs_subvolume_writers { |
@@ -1776,12 +1782,12 @@ struct btrfs_root { | |||
1776 | 1782 | ||
1777 | /* free ino cache stuff */ | 1783 | /* free ino cache stuff */ |
1778 | struct btrfs_free_space_ctl *free_ino_ctl; | 1784 | struct btrfs_free_space_ctl *free_ino_ctl; |
1779 | enum btrfs_caching_type cached; | 1785 | enum btrfs_caching_type ino_cache_state; |
1780 | spinlock_t cache_lock; | 1786 | spinlock_t ino_cache_lock; |
1781 | wait_queue_head_t cache_wait; | 1787 | wait_queue_head_t ino_cache_wait; |
1782 | struct btrfs_free_space_ctl *free_ino_pinned; | 1788 | struct btrfs_free_space_ctl *free_ino_pinned; |
1783 | u64 cache_progress; | 1789 | u64 ino_cache_progress; |
1784 | struct inode *cache_inode; | 1790 | struct inode *ino_cache_inode; |
1785 | 1791 | ||
1786 | struct mutex log_mutex; | 1792 | struct mutex log_mutex; |
1787 | wait_queue_head_t log_writer_wait; | 1793 | wait_queue_head_t log_writer_wait; |
@@ -1806,18 +1812,14 @@ struct btrfs_root { | |||
1806 | /* node allocations are done in nodesize units */ | 1812 | /* node allocations are done in nodesize units */ |
1807 | u32 nodesize; | 1813 | u32 nodesize; |
1808 | 1814 | ||
1809 | /* leaf allocations are done in leafsize units */ | ||
1810 | u32 leafsize; | ||
1811 | |||
1812 | u32 stripesize; | 1815 | u32 stripesize; |
1813 | 1816 | ||
1814 | u32 type; | 1817 | u32 type; |
1815 | 1818 | ||
1816 | u64 highest_objectid; | 1819 | u64 highest_objectid; |
1817 | 1820 | ||
1818 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1821 | /* only used with CONFIG_BTRFS_FS_RUN_SANITY_TESTS is enabled */ |
1819 | u64 alloc_bytenr; | 1822 | u64 alloc_bytenr; |
1820 | #endif | ||
1821 | 1823 | ||
1822 | u64 defrag_trans_start; | 1824 | u64 defrag_trans_start; |
1823 | struct btrfs_key defrag_progress; | 1825 | struct btrfs_key defrag_progress; |
@@ -2094,6 +2096,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
2094 | #define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24) | 2096 | #define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24) |
2095 | 2097 | ||
2096 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) | 2098 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) |
2099 | #define BTRFS_DEFAULT_MAX_INLINE (8192) | ||
2097 | 2100 | ||
2098 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 2101 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
2099 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 2102 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -2995,8 +2998,6 @@ BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, | |||
2995 | sectorsize, 32); | 2998 | sectorsize, 32); |
2996 | BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, | 2999 | BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, |
2997 | nodesize, 32); | 3000 | nodesize, 32); |
2998 | BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, | ||
2999 | leafsize, 32); | ||
3000 | BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, | 3001 | BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, |
3001 | stripesize, 32); | 3002 | stripesize, 32); |
3002 | BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, | 3003 | BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, |
@@ -3049,14 +3050,12 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, | |||
3049 | static inline unsigned long | 3050 | static inline unsigned long |
3050 | btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) | 3051 | btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) |
3051 | { | 3052 | { |
3052 | unsigned long offset = (unsigned long)e; | 3053 | return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; |
3053 | offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
3054 | return offset; | ||
3055 | } | 3054 | } |
3056 | 3055 | ||
3057 | static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) | 3056 | static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) |
3058 | { | 3057 | { |
3059 | return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; | 3058 | return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize; |
3060 | } | 3059 | } |
3061 | 3060 | ||
3062 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, | 3061 | BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, |
@@ -3086,9 +3085,7 @@ BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, | |||
3086 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, | 3085 | static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb, |
3087 | struct btrfs_item *e) | 3086 | struct btrfs_item *e) |
3088 | { | 3087 | { |
3089 | unsigned long offset; | 3088 | return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; |
3090 | offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); | ||
3091 | return btrfs_item_size(eb, e) - offset; | ||
3092 | } | 3089 | } |
3093 | 3090 | ||
3094 | /* this returns the number of file bytes represented by the inline item. | 3091 | /* this returns the number of file bytes represented by the inline item. |
@@ -3232,13 +3229,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | |||
3232 | return sb->s_fs_info; | 3229 | return sb->s_fs_info; |
3233 | } | 3230 | } |
3234 | 3231 | ||
3235 | static inline u32 btrfs_level_size(struct btrfs_root *root, int level) | ||
3236 | { | ||
3237 | if (level == 0) | ||
3238 | return root->leafsize; | ||
3239 | return root->nodesize; | ||
3240 | } | ||
3241 | |||
3242 | /* helper function to cast into the data area of the leaf. */ | 3232 | /* helper function to cast into the data area of the leaf. */ |
3243 | #define btrfs_item_ptr(leaf, slot, type) \ | 3233 | #define btrfs_item_ptr(leaf, slot, type) \ |
3244 | ((type *)(btrfs_leaf_data(leaf) + \ | 3234 | ((type *)(btrfs_leaf_data(leaf) + \ |
@@ -3263,7 +3253,7 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) | |||
3263 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 3253 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
3264 | unsigned num_items) | 3254 | unsigned num_items) |
3265 | { | 3255 | { |
3266 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3256 | return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
3267 | 2 * num_items; | 3257 | 2 * num_items; |
3268 | } | 3258 | } |
3269 | 3259 | ||
@@ -3274,8 +3264,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | |||
3274 | static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, | 3264 | static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root, |
3275 | unsigned num_items) | 3265 | unsigned num_items) |
3276 | { | 3266 | { |
3277 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 3267 | return root->nodesize * BTRFS_MAX_LEVEL * num_items; |
3278 | num_items; | ||
3279 | } | 3268 | } |
3280 | 3269 | ||
3281 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, | 3270 | int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, |
@@ -3305,9 +3294,9 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group( | |||
3305 | u64 bytenr); | 3294 | u64 bytenr); |
3306 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | 3295 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); |
3307 | int get_block_group_index(struct btrfs_block_group_cache *cache); | 3296 | int get_block_group_index(struct btrfs_block_group_cache *cache); |
3308 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 3297 | struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, |
3309 | struct btrfs_root *root, u32 blocksize, | 3298 | struct btrfs_root *root, u64 parent, |
3310 | u64 parent, u64 root_objectid, | 3299 | u64 root_objectid, |
3311 | struct btrfs_disk_key *key, int level, | 3300 | struct btrfs_disk_key *key, int level, |
3312 | u64 hint, u64 empty_size); | 3301 | u64 hint, u64 empty_size); |
3313 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 3302 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
@@ -3363,6 +3352,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
3363 | u64 size); | 3352 | u64 size); |
3364 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 3353 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
3365 | struct btrfs_root *root, u64 group_start); | 3354 | struct btrfs_root *root, u64 group_start); |
3355 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); | ||
3366 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | 3356 | void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, |
3367 | struct btrfs_root *root); | 3357 | struct btrfs_root *root); |
3368 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 3358 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
@@ -3604,6 +3594,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) | |||
3604 | kfree(fs_info->uuid_root); | 3594 | kfree(fs_info->uuid_root); |
3605 | kfree(fs_info->super_copy); | 3595 | kfree(fs_info->super_copy); |
3606 | kfree(fs_info->super_for_commit); | 3596 | kfree(fs_info->super_for_commit); |
3597 | security_free_mnt_opts(&fs_info->security_opts); | ||
3607 | kfree(fs_info); | 3598 | kfree(fs_info); |
3608 | } | 3599 | } |
3609 | 3600 | ||
@@ -3739,8 +3730,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
3739 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 3730 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
3740 | struct bio *bio, u32 *dst); | 3731 | struct bio *bio, u32 *dst); |
3741 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | 3732 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, |
3742 | struct btrfs_dio_private *dip, struct bio *bio, | 3733 | struct bio *bio, u64 logical_offset); |
3743 | u64 logical_offset); | ||
3744 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 3734 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
3745 | struct btrfs_root *root, | 3735 | struct btrfs_root *root, |
3746 | u64 objectid, u64 pos, | 3736 | u64 objectid, u64 pos, |
@@ -4141,8 +4131,15 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) | |||
4141 | /* Sanity test specific functions */ | 4131 | /* Sanity test specific functions */ |
4142 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 4132 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
4143 | void btrfs_test_destroy_inode(struct inode *inode); | 4133 | void btrfs_test_destroy_inode(struct inode *inode); |
4144 | int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, | ||
4145 | u64 rfer, u64 excl); | ||
4146 | #endif | 4134 | #endif |
4147 | 4135 | ||
4136 | static inline int btrfs_test_is_dummy_root(struct btrfs_root *root) | ||
4137 | { | ||
4138 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | ||
4139 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
4140 | return 1; | ||
4141 | #endif | ||
4142 | return 0; | ||
4143 | } | ||
4144 | |||
4148 | #endif | 4145 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a2e90f855d7d..054577bddaf2 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1042,7 +1042,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | |||
1042 | int ret; | 1042 | int ret; |
1043 | 1043 | ||
1044 | key.objectid = node->inode_id; | 1044 | key.objectid = node->inode_id; |
1045 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 1045 | key.type = BTRFS_INODE_ITEM_KEY; |
1046 | key.offset = 0; | 1046 | key.offset = 0; |
1047 | 1047 | ||
1048 | if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) | 1048 | if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) |
@@ -1099,7 +1099,7 @@ err_out: | |||
1099 | search: | 1099 | search: |
1100 | btrfs_release_path(path); | 1100 | btrfs_release_path(path); |
1101 | 1101 | ||
1102 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | 1102 | key.type = BTRFS_INODE_EXTREF_KEY; |
1103 | key.offset = -1; | 1103 | key.offset = -1; |
1104 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1104 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1105 | if (ret < 0) | 1105 | if (ret < 0) |
@@ -1473,7 +1473,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
1473 | } | 1473 | } |
1474 | 1474 | ||
1475 | delayed_item->key.objectid = btrfs_ino(dir); | 1475 | delayed_item->key.objectid = btrfs_ino(dir); |
1476 | btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY); | 1476 | delayed_item->key.type = BTRFS_DIR_INDEX_KEY; |
1477 | delayed_item->key.offset = index; | 1477 | delayed_item->key.offset = index; |
1478 | 1478 | ||
1479 | dir_item = (struct btrfs_dir_item *)delayed_item->data; | 1479 | dir_item = (struct btrfs_dir_item *)delayed_item->data; |
@@ -1542,7 +1542,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, | |||
1542 | return PTR_ERR(node); | 1542 | return PTR_ERR(node); |
1543 | 1543 | ||
1544 | item_key.objectid = btrfs_ino(dir); | 1544 | item_key.objectid = btrfs_ino(dir); |
1545 | btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY); | 1545 | item_key.type = BTRFS_DIR_INDEX_KEY; |
1546 | item_key.offset = index; | 1546 | item_key.offset = index; |
1547 | 1547 | ||
1548 | ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); | 1548 | ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index eea26e1b2fda..6f662b34ba0e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -168,8 +168,12 @@ no_valid_dev_replace_entry_found: | |||
168 | dev_replace->srcdev->total_bytes; | 168 | dev_replace->srcdev->total_bytes; |
169 | dev_replace->tgtdev->disk_total_bytes = | 169 | dev_replace->tgtdev->disk_total_bytes = |
170 | dev_replace->srcdev->disk_total_bytes; | 170 | dev_replace->srcdev->disk_total_bytes; |
171 | dev_replace->tgtdev->commit_total_bytes = | ||
172 | dev_replace->srcdev->commit_total_bytes; | ||
171 | dev_replace->tgtdev->bytes_used = | 173 | dev_replace->tgtdev->bytes_used = |
172 | dev_replace->srcdev->bytes_used; | 174 | dev_replace->srcdev->bytes_used; |
175 | dev_replace->tgtdev->commit_bytes_used = | ||
176 | dev_replace->srcdev->commit_bytes_used; | ||
173 | } | 177 | } |
174 | dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; | 178 | dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; |
175 | btrfs_init_dev_replace_tgtdev_for_resume(fs_info, | 179 | btrfs_init_dev_replace_tgtdev_for_resume(fs_info, |
@@ -329,30 +333,34 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
329 | args->start.tgtdev_name[0] == '\0') | 333 | args->start.tgtdev_name[0] == '\0') |
330 | return -EINVAL; | 334 | return -EINVAL; |
331 | 335 | ||
332 | mutex_lock(&fs_info->volume_mutex); | 336 | /* |
333 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, | 337 | * Here we commit the transaction to make sure commit_total_bytes |
334 | &tgt_device); | 338 | * of all the devices are updated. |
335 | if (ret) { | 339 | */ |
336 | btrfs_err(fs_info, "target device %s is invalid!", | 340 | trans = btrfs_attach_transaction(root); |
337 | args->start.tgtdev_name); | 341 | if (!IS_ERR(trans)) { |
338 | mutex_unlock(&fs_info->volume_mutex); | 342 | ret = btrfs_commit_transaction(trans, root); |
339 | return -EINVAL; | 343 | if (ret) |
344 | return ret; | ||
345 | } else if (PTR_ERR(trans) != -ENOENT) { | ||
346 | return PTR_ERR(trans); | ||
340 | } | 347 | } |
341 | 348 | ||
349 | /* the disk copy procedure reuses the scrub code */ | ||
350 | mutex_lock(&fs_info->volume_mutex); | ||
342 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, | 351 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, |
343 | args->start.srcdev_name, | 352 | args->start.srcdev_name, |
344 | &src_device); | 353 | &src_device); |
345 | mutex_unlock(&fs_info->volume_mutex); | ||
346 | if (ret) { | 354 | if (ret) { |
347 | ret = -EINVAL; | 355 | mutex_unlock(&fs_info->volume_mutex); |
348 | goto leave_no_lock; | 356 | return ret; |
349 | } | 357 | } |
350 | 358 | ||
351 | if (tgt_device->total_bytes < src_device->total_bytes) { | 359 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, |
352 | btrfs_err(fs_info, "target device is smaller than source device!"); | 360 | src_device, &tgt_device); |
353 | ret = -EINVAL; | 361 | mutex_unlock(&fs_info->volume_mutex); |
354 | goto leave_no_lock; | 362 | if (ret) |
355 | } | 363 | return ret; |
356 | 364 | ||
357 | btrfs_dev_replace_lock(dev_replace); | 365 | btrfs_dev_replace_lock(dev_replace); |
358 | switch (dev_replace->replace_state) { | 366 | switch (dev_replace->replace_state) { |
@@ -380,10 +388,6 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
380 | src_device->devid, | 388 | src_device->devid, |
381 | rcu_str_deref(tgt_device->name)); | 389 | rcu_str_deref(tgt_device->name)); |
382 | 390 | ||
383 | tgt_device->total_bytes = src_device->total_bytes; | ||
384 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | ||
385 | tgt_device->bytes_used = src_device->bytes_used; | ||
386 | |||
387 | /* | 391 | /* |
388 | * from now on, the writes to the srcdev are all duplicated to | 392 | * from now on, the writes to the srcdev are all duplicated to |
389 | * go to the tgtdev as well (refer to btrfs_map_block()). | 393 | * go to the tgtdev as well (refer to btrfs_map_block()). |
@@ -414,7 +418,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
414 | 418 | ||
415 | /* the disk copy procedure reuses the scrub code */ | 419 | /* the disk copy procedure reuses the scrub code */ |
416 | ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, | 420 | ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, |
417 | src_device->total_bytes, | 421 | btrfs_device_get_total_bytes(src_device), |
418 | &dev_replace->scrub_progress, 0, 1); | 422 | &dev_replace->scrub_progress, 0, 1); |
419 | 423 | ||
420 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); | 424 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); |
@@ -426,9 +430,7 @@ leave: | |||
426 | dev_replace->srcdev = NULL; | 430 | dev_replace->srcdev = NULL; |
427 | dev_replace->tgtdev = NULL; | 431 | dev_replace->tgtdev = NULL; |
428 | btrfs_dev_replace_unlock(dev_replace); | 432 | btrfs_dev_replace_unlock(dev_replace); |
429 | leave_no_lock: | 433 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
430 | if (tgt_device) | ||
431 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
432 | return ret; | 434 | return ret; |
433 | } | 435 | } |
434 | 436 | ||
@@ -507,9 +509,10 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
507 | ret = btrfs_commit_transaction(trans, root); | 509 | ret = btrfs_commit_transaction(trans, root); |
508 | WARN_ON(ret); | 510 | WARN_ON(ret); |
509 | 511 | ||
512 | mutex_lock(&uuid_mutex); | ||
510 | /* keep away write_all_supers() during the finishing procedure */ | 513 | /* keep away write_all_supers() during the finishing procedure */ |
511 | mutex_lock(&root->fs_info->chunk_mutex); | ||
512 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 514 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
515 | mutex_lock(&root->fs_info->chunk_mutex); | ||
513 | btrfs_dev_replace_lock(dev_replace); | 516 | btrfs_dev_replace_lock(dev_replace); |
514 | dev_replace->replace_state = | 517 | dev_replace->replace_state = |
515 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED | 518 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED |
@@ -532,8 +535,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
532 | src_device->devid, | 535 | src_device->devid, |
533 | rcu_str_deref(tgt_device->name), scrub_ret); | 536 | rcu_str_deref(tgt_device->name), scrub_ret); |
534 | btrfs_dev_replace_unlock(dev_replace); | 537 | btrfs_dev_replace_unlock(dev_replace); |
535 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
536 | mutex_unlock(&root->fs_info->chunk_mutex); | 538 | mutex_unlock(&root->fs_info->chunk_mutex); |
539 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
540 | mutex_unlock(&uuid_mutex); | ||
537 | if (tgt_device) | 541 | if (tgt_device) |
538 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 542 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
539 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 543 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
@@ -542,7 +546,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
542 | } | 546 | } |
543 | 547 | ||
544 | printk_in_rcu(KERN_INFO | 548 | printk_in_rcu(KERN_INFO |
545 | "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n", | 549 | "BTRFS: dev_replace from %s (devid %llu) to %s finished\n", |
546 | src_device->missing ? "<missing disk>" : | 550 | src_device->missing ? "<missing disk>" : |
547 | rcu_str_deref(src_device->name), | 551 | rcu_str_deref(src_device->name), |
548 | src_device->devid, | 552 | src_device->devid, |
@@ -550,23 +554,29 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
550 | tgt_device->is_tgtdev_for_dev_replace = 0; | 554 | tgt_device->is_tgtdev_for_dev_replace = 0; |
551 | tgt_device->devid = src_device->devid; | 555 | tgt_device->devid = src_device->devid; |
552 | src_device->devid = BTRFS_DEV_REPLACE_DEVID; | 556 | src_device->devid = BTRFS_DEV_REPLACE_DEVID; |
553 | tgt_device->bytes_used = src_device->bytes_used; | ||
554 | memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); | 557 | memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); |
555 | memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); | 558 | memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); |
556 | memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); | 559 | memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); |
557 | tgt_device->total_bytes = src_device->total_bytes; | 560 | btrfs_device_set_total_bytes(tgt_device, src_device->total_bytes); |
558 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | 561 | btrfs_device_set_disk_total_bytes(tgt_device, |
559 | tgt_device->bytes_used = src_device->bytes_used; | 562 | src_device->disk_total_bytes); |
563 | btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used); | ||
564 | ASSERT(list_empty(&src_device->resized_list)); | ||
565 | tgt_device->commit_total_bytes = src_device->commit_total_bytes; | ||
566 | tgt_device->commit_bytes_used = src_device->bytes_used; | ||
560 | if (fs_info->sb->s_bdev == src_device->bdev) | 567 | if (fs_info->sb->s_bdev == src_device->bdev) |
561 | fs_info->sb->s_bdev = tgt_device->bdev; | 568 | fs_info->sb->s_bdev = tgt_device->bdev; |
562 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) | 569 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) |
563 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 570 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
564 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 571 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
572 | fs_info->fs_devices->rw_devices++; | ||
565 | 573 | ||
566 | /* replace the sysfs entry */ | 574 | /* replace the sysfs entry */ |
567 | btrfs_kobj_rm_device(fs_info, src_device); | 575 | btrfs_kobj_rm_device(fs_info, src_device); |
568 | btrfs_kobj_add_device(fs_info, tgt_device); | 576 | btrfs_kobj_add_device(fs_info, tgt_device); |
569 | 577 | ||
578 | btrfs_dev_replace_unlock(dev_replace); | ||
579 | |||
570 | btrfs_rm_dev_replace_blocked(fs_info); | 580 | btrfs_rm_dev_replace_blocked(fs_info); |
571 | 581 | ||
572 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 582 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
@@ -580,9 +590,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
580 | * superblock is scratched out so that it is no longer marked to | 590 | * superblock is scratched out so that it is no longer marked to |
581 | * belong to this filesystem. | 591 | * belong to this filesystem. |
582 | */ | 592 | */ |
583 | btrfs_dev_replace_unlock(dev_replace); | ||
584 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
585 | mutex_unlock(&root->fs_info->chunk_mutex); | 593 | mutex_unlock(&root->fs_info->chunk_mutex); |
594 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
595 | mutex_unlock(&uuid_mutex); | ||
586 | 596 | ||
587 | /* write back the superblocks */ | 597 | /* write back the superblocks */ |
588 | trans = btrfs_start_transaction(root, 0); | 598 | trans = btrfs_start_transaction(root, 0); |
@@ -643,6 +653,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
643 | struct btrfs_ioctl_dev_replace_args *args) | 653 | struct btrfs_ioctl_dev_replace_args *args) |
644 | { | 654 | { |
645 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | 655 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; |
656 | struct btrfs_device *srcdev; | ||
646 | 657 | ||
647 | btrfs_dev_replace_lock(dev_replace); | 658 | btrfs_dev_replace_lock(dev_replace); |
648 | /* even if !dev_replace_is_valid, the values are good enough for | 659 | /* even if !dev_replace_is_valid, the values are good enough for |
@@ -665,8 +676,9 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | |||
665 | break; | 676 | break; |
666 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | 677 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: |
667 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | 678 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: |
679 | srcdev = dev_replace->srcdev; | ||
668 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, | 680 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, |
669 | div64_u64(dev_replace->srcdev->total_bytes, 1000)); | 681 | div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); |
670 | break; | 682 | break; |
671 | } | 683 | } |
672 | btrfs_dev_replace_unlock(dev_replace); | 684 | btrfs_dev_replace_unlock(dev_replace); |
@@ -825,7 +837,7 @@ static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info) | |||
825 | 837 | ||
826 | ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, | 838 | ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, |
827 | dev_replace->committed_cursor_left, | 839 | dev_replace->committed_cursor_left, |
828 | dev_replace->srcdev->total_bytes, | 840 | btrfs_device_get_total_bytes(dev_replace->srcdev), |
829 | &dev_replace->scrub_progress, 0, 1); | 841 | &dev_replace->scrub_progress, 0, 1); |
830 | ret = btrfs_dev_replace_finishing(fs_info, ret); | 842 | ret = btrfs_dev_replace_finishing(fs_info, ret); |
831 | WARN_ON(ret); | 843 | WARN_ON(ret); |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a0691df5dcea..fc8df866e919 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -86,7 +86,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
86 | BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); | 86 | BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)); |
87 | 87 | ||
88 | key.objectid = objectid; | 88 | key.objectid = objectid; |
89 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 89 | key.type = BTRFS_XATTR_ITEM_KEY; |
90 | key.offset = btrfs_name_hash(name, name_len); | 90 | key.offset = btrfs_name_hash(name, name_len); |
91 | 91 | ||
92 | data_size = sizeof(*dir_item) + name_len + data_len; | 92 | data_size = sizeof(*dir_item) + name_len + data_len; |
@@ -137,7 +137,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
137 | u32 data_size; | 137 | u32 data_size; |
138 | 138 | ||
139 | key.objectid = btrfs_ino(dir); | 139 | key.objectid = btrfs_ino(dir); |
140 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 140 | key.type = BTRFS_DIR_ITEM_KEY; |
141 | key.offset = btrfs_name_hash(name, name_len); | 141 | key.offset = btrfs_name_hash(name, name_len); |
142 | 142 | ||
143 | path = btrfs_alloc_path(); | 143 | path = btrfs_alloc_path(); |
@@ -204,7 +204,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
204 | int cow = mod != 0; | 204 | int cow = mod != 0; |
205 | 205 | ||
206 | key.objectid = dir; | 206 | key.objectid = dir; |
207 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 207 | key.type = BTRFS_DIR_ITEM_KEY; |
208 | 208 | ||
209 | key.offset = btrfs_name_hash(name, name_len); | 209 | key.offset = btrfs_name_hash(name, name_len); |
210 | 210 | ||
@@ -234,7 +234,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | |||
234 | return -ENOMEM; | 234 | return -ENOMEM; |
235 | 235 | ||
236 | key.objectid = dir; | 236 | key.objectid = dir; |
237 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 237 | key.type = BTRFS_DIR_ITEM_KEY; |
238 | key.offset = btrfs_name_hash(name, name_len); | 238 | key.offset = btrfs_name_hash(name, name_len); |
239 | 239 | ||
240 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 240 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
@@ -297,7 +297,7 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
297 | int cow = mod != 0; | 297 | int cow = mod != 0; |
298 | 298 | ||
299 | key.objectid = dir; | 299 | key.objectid = dir; |
300 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | 300 | key.type = BTRFS_DIR_INDEX_KEY; |
301 | key.offset = objectid; | 301 | key.offset = objectid; |
302 | 302 | ||
303 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 303 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
@@ -367,7 +367,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | |||
367 | int cow = mod != 0; | 367 | int cow = mod != 0; |
368 | 368 | ||
369 | key.objectid = dir; | 369 | key.objectid = dir; |
370 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 370 | key.type = BTRFS_XATTR_ITEM_KEY; |
371 | key.offset = btrfs_name_hash(name, name_len); | 371 | key.offset = btrfs_name_hash(name, name_len); |
372 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | 372 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); |
373 | if (ret < 0) | 373 | if (ret < 0) |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d0d78dc07792..fa45e3cae40d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -72,21 +72,41 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root); | |||
72 | static void btrfs_error_commit_super(struct btrfs_root *root); | 72 | static void btrfs_error_commit_super(struct btrfs_root *root); |
73 | 73 | ||
74 | /* | 74 | /* |
75 | * end_io_wq structs are used to do processing in task context when an IO is | 75 | * btrfs_end_io_wq structs are used to do processing in task context when an IO |
76 | * complete. This is used during reads to verify checksums, and it is used | 76 | * is complete. This is used during reads to verify checksums, and it is used |
77 | * by writes to insert metadata for new file extents after IO is complete. | 77 | * by writes to insert metadata for new file extents after IO is complete. |
78 | */ | 78 | */ |
79 | struct end_io_wq { | 79 | struct btrfs_end_io_wq { |
80 | struct bio *bio; | 80 | struct bio *bio; |
81 | bio_end_io_t *end_io; | 81 | bio_end_io_t *end_io; |
82 | void *private; | 82 | void *private; |
83 | struct btrfs_fs_info *info; | 83 | struct btrfs_fs_info *info; |
84 | int error; | 84 | int error; |
85 | int metadata; | 85 | enum btrfs_wq_endio_type metadata; |
86 | struct list_head list; | 86 | struct list_head list; |
87 | struct btrfs_work work; | 87 | struct btrfs_work work; |
88 | }; | 88 | }; |
89 | 89 | ||
90 | static struct kmem_cache *btrfs_end_io_wq_cache; | ||
91 | |||
92 | int __init btrfs_end_io_wq_init(void) | ||
93 | { | ||
94 | btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", | ||
95 | sizeof(struct btrfs_end_io_wq), | ||
96 | 0, | ||
97 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
98 | NULL); | ||
99 | if (!btrfs_end_io_wq_cache) | ||
100 | return -ENOMEM; | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | void btrfs_end_io_wq_exit(void) | ||
105 | { | ||
106 | if (btrfs_end_io_wq_cache) | ||
107 | kmem_cache_destroy(btrfs_end_io_wq_cache); | ||
108 | } | ||
109 | |||
90 | /* | 110 | /* |
91 | * async submit bios are used to offload expensive checksumming | 111 | * async submit bios are used to offload expensive checksumming |
92 | * onto the worker threads. They checksum file and metadata bios | 112 | * onto the worker threads. They checksum file and metadata bios |
@@ -327,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
327 | { | 347 | { |
328 | struct extent_state *cached_state = NULL; | 348 | struct extent_state *cached_state = NULL; |
329 | int ret; | 349 | int ret; |
330 | bool need_lock = (current->journal_info == | 350 | bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB); |
331 | (void *)BTRFS_SEND_TRANS_STUB); | ||
332 | 351 | ||
333 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) | 352 | if (!parent_transid || btrfs_header_generation(eb) == parent_transid) |
334 | return 0; | 353 | return 0; |
@@ -348,9 +367,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
348 | ret = 0; | 367 | ret = 0; |
349 | goto out; | 368 | goto out; |
350 | } | 369 | } |
351 | printk_ratelimited("parent transid verify failed on %llu wanted %llu " | 370 | printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", |
352 | "found %llu\n", | 371 | eb->fs_info->sb->s_id, eb->start, |
353 | eb->start, parent_transid, btrfs_header_generation(eb)); | 372 | parent_transid, btrfs_header_generation(eb)); |
354 | ret = 1; | 373 | ret = 1; |
355 | 374 | ||
356 | /* | 375 | /* |
@@ -607,22 +626,22 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
607 | goto err; | 626 | goto err; |
608 | 627 | ||
609 | eb->read_mirror = mirror; | 628 | eb->read_mirror = mirror; |
610 | if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { | 629 | if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) { |
611 | ret = -EIO; | 630 | ret = -EIO; |
612 | goto err; | 631 | goto err; |
613 | } | 632 | } |
614 | 633 | ||
615 | found_start = btrfs_header_bytenr(eb); | 634 | found_start = btrfs_header_bytenr(eb); |
616 | if (found_start != eb->start) { | 635 | if (found_start != eb->start) { |
617 | printk_ratelimited(KERN_INFO "BTRFS: bad tree block start " | 636 | printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " |
618 | "%llu %llu\n", | 637 | "%llu %llu\n", |
619 | found_start, eb->start); | 638 | eb->fs_info->sb->s_id, found_start, eb->start); |
620 | ret = -EIO; | 639 | ret = -EIO; |
621 | goto err; | 640 | goto err; |
622 | } | 641 | } |
623 | if (check_tree_block_fsid(root, eb)) { | 642 | if (check_tree_block_fsid(root, eb)) { |
624 | printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n", | 643 | printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", |
625 | eb->start); | 644 | eb->fs_info->sb->s_id, eb->start); |
626 | ret = -EIO; | 645 | ret = -EIO; |
627 | goto err; | 646 | goto err; |
628 | } | 647 | } |
@@ -680,7 +699,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) | |||
680 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 699 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
681 | 700 | ||
682 | eb = (struct extent_buffer *)page->private; | 701 | eb = (struct extent_buffer *)page->private; |
683 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 702 | set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); |
684 | eb->read_mirror = failed_mirror; | 703 | eb->read_mirror = failed_mirror; |
685 | atomic_dec(&eb->io_pages); | 704 | atomic_dec(&eb->io_pages); |
686 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) | 705 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) |
@@ -690,7 +709,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) | |||
690 | 709 | ||
691 | static void end_workqueue_bio(struct bio *bio, int err) | 710 | static void end_workqueue_bio(struct bio *bio, int err) |
692 | { | 711 | { |
693 | struct end_io_wq *end_io_wq = bio->bi_private; | 712 | struct btrfs_end_io_wq *end_io_wq = bio->bi_private; |
694 | struct btrfs_fs_info *fs_info; | 713 | struct btrfs_fs_info *fs_info; |
695 | struct btrfs_workqueue *wq; | 714 | struct btrfs_workqueue *wq; |
696 | btrfs_work_func_t func; | 715 | btrfs_work_func_t func; |
@@ -713,7 +732,11 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
713 | func = btrfs_endio_write_helper; | 732 | func = btrfs_endio_write_helper; |
714 | } | 733 | } |
715 | } else { | 734 | } else { |
716 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { | 735 | if (unlikely(end_io_wq->metadata == |
736 | BTRFS_WQ_ENDIO_DIO_REPAIR)) { | ||
737 | wq = fs_info->endio_repair_workers; | ||
738 | func = btrfs_endio_repair_helper; | ||
739 | } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { | ||
717 | wq = fs_info->endio_raid56_workers; | 740 | wq = fs_info->endio_raid56_workers; |
718 | func = btrfs_endio_raid56_helper; | 741 | func = btrfs_endio_raid56_helper; |
719 | } else if (end_io_wq->metadata) { | 742 | } else if (end_io_wq->metadata) { |
@@ -729,19 +752,12 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
729 | btrfs_queue_work(wq, &end_io_wq->work); | 752 | btrfs_queue_work(wq, &end_io_wq->work); |
730 | } | 753 | } |
731 | 754 | ||
732 | /* | ||
733 | * For the metadata arg you want | ||
734 | * | ||
735 | * 0 - if data | ||
736 | * 1 - if normal metadta | ||
737 | * 2 - if writing to the free space cache area | ||
738 | * 3 - raid parity work | ||
739 | */ | ||
740 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 755 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
741 | int metadata) | 756 | enum btrfs_wq_endio_type metadata) |
742 | { | 757 | { |
743 | struct end_io_wq *end_io_wq; | 758 | struct btrfs_end_io_wq *end_io_wq; |
744 | end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); | 759 | |
760 | end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); | ||
745 | if (!end_io_wq) | 761 | if (!end_io_wq) |
746 | return -ENOMEM; | 762 | return -ENOMEM; |
747 | 763 | ||
@@ -925,7 +941,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
925 | * can happen in the async kernel threads | 941 | * can happen in the async kernel threads |
926 | */ | 942 | */ |
927 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 943 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
928 | bio, 1); | 944 | bio, BTRFS_WQ_ENDIO_METADATA); |
929 | if (ret) | 945 | if (ret) |
930 | goto out_w_error; | 946 | goto out_w_error; |
931 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 947 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
@@ -1057,20 +1073,17 @@ static const struct address_space_operations btree_aops = { | |||
1057 | .set_page_dirty = btree_set_page_dirty, | 1073 | .set_page_dirty = btree_set_page_dirty, |
1058 | }; | 1074 | }; |
1059 | 1075 | ||
1060 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 1076 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) |
1061 | u64 parent_transid) | ||
1062 | { | 1077 | { |
1063 | struct extent_buffer *buf = NULL; | 1078 | struct extent_buffer *buf = NULL; |
1064 | struct inode *btree_inode = root->fs_info->btree_inode; | 1079 | struct inode *btree_inode = root->fs_info->btree_inode; |
1065 | int ret = 0; | ||
1066 | 1080 | ||
1067 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1081 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1068 | if (!buf) | 1082 | if (!buf) |
1069 | return 0; | 1083 | return; |
1070 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | 1084 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, |
1071 | buf, 0, WAIT_NONE, btree_get_extent, 0); | 1085 | buf, 0, WAIT_NONE, btree_get_extent, 0); |
1072 | free_extent_buffer(buf); | 1086 | free_extent_buffer(buf); |
1073 | return ret; | ||
1074 | } | 1087 | } |
1075 | 1088 | ||
1076 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 1089 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -1106,7 +1119,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
1106 | } | 1119 | } |
1107 | 1120 | ||
1108 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 1121 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
1109 | u64 bytenr, u32 blocksize) | 1122 | u64 bytenr) |
1110 | { | 1123 | { |
1111 | return find_extent_buffer(root->fs_info, bytenr); | 1124 | return find_extent_buffer(root->fs_info, bytenr); |
1112 | } | 1125 | } |
@@ -1114,11 +1127,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | |||
1114 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 1127 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
1115 | u64 bytenr, u32 blocksize) | 1128 | u64 bytenr, u32 blocksize) |
1116 | { | 1129 | { |
1117 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 1130 | if (btrfs_test_is_dummy_root(root)) |
1118 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
1119 | return alloc_test_extent_buffer(root->fs_info, bytenr, | 1131 | return alloc_test_extent_buffer(root->fs_info, bytenr, |
1120 | blocksize); | 1132 | blocksize); |
1121 | #endif | ||
1122 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); | 1133 | return alloc_extent_buffer(root->fs_info, bytenr, blocksize); |
1123 | } | 1134 | } |
1124 | 1135 | ||
@@ -1136,12 +1147,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | |||
1136 | } | 1147 | } |
1137 | 1148 | ||
1138 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 1149 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
1139 | u32 blocksize, u64 parent_transid) | 1150 | u64 parent_transid) |
1140 | { | 1151 | { |
1141 | struct extent_buffer *buf = NULL; | 1152 | struct extent_buffer *buf = NULL; |
1142 | int ret; | 1153 | int ret; |
1143 | 1154 | ||
1144 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1155 | buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); |
1145 | if (!buf) | 1156 | if (!buf) |
1146 | return NULL; | 1157 | return NULL; |
1147 | 1158 | ||
@@ -1200,16 +1211,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | |||
1200 | kfree(writers); | 1211 | kfree(writers); |
1201 | } | 1212 | } |
1202 | 1213 | ||
1203 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1214 | static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, |
1204 | u32 stripesize, struct btrfs_root *root, | 1215 | struct btrfs_root *root, struct btrfs_fs_info *fs_info, |
1205 | struct btrfs_fs_info *fs_info, | ||
1206 | u64 objectid) | 1216 | u64 objectid) |
1207 | { | 1217 | { |
1208 | root->node = NULL; | 1218 | root->node = NULL; |
1209 | root->commit_root = NULL; | 1219 | root->commit_root = NULL; |
1210 | root->sectorsize = sectorsize; | 1220 | root->sectorsize = sectorsize; |
1211 | root->nodesize = nodesize; | 1221 | root->nodesize = nodesize; |
1212 | root->leafsize = leafsize; | ||
1213 | root->stripesize = stripesize; | 1222 | root->stripesize = stripesize; |
1214 | root->state = 0; | 1223 | root->state = 0; |
1215 | root->orphan_cleanup_state = 0; | 1224 | root->orphan_cleanup_state = 0; |
@@ -1295,7 +1304,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) | |||
1295 | root = btrfs_alloc_root(NULL); | 1304 | root = btrfs_alloc_root(NULL); |
1296 | if (!root) | 1305 | if (!root) |
1297 | return ERR_PTR(-ENOMEM); | 1306 | return ERR_PTR(-ENOMEM); |
1298 | __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); | 1307 | __setup_root(4096, 4096, 4096, root, NULL, 1); |
1299 | set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); | 1308 | set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); |
1300 | root->alloc_bytenr = 0; | 1309 | root->alloc_bytenr = 0; |
1301 | 1310 | ||
@@ -1318,15 +1327,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, | |||
1318 | if (!root) | 1327 | if (!root) |
1319 | return ERR_PTR(-ENOMEM); | 1328 | return ERR_PTR(-ENOMEM); |
1320 | 1329 | ||
1321 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1330 | __setup_root(tree_root->nodesize, tree_root->sectorsize, |
1322 | tree_root->sectorsize, tree_root->stripesize, | 1331 | tree_root->stripesize, root, fs_info, objectid); |
1323 | root, fs_info, objectid); | ||
1324 | root->root_key.objectid = objectid; | 1332 | root->root_key.objectid = objectid; |
1325 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1333 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
1326 | root->root_key.offset = 0; | 1334 | root->root_key.offset = 0; |
1327 | 1335 | ||
1328 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 1336 | leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); |
1329 | 0, objectid, NULL, 0, 0, 0); | ||
1330 | if (IS_ERR(leaf)) { | 1337 | if (IS_ERR(leaf)) { |
1331 | ret = PTR_ERR(leaf); | 1338 | ret = PTR_ERR(leaf); |
1332 | leaf = NULL; | 1339 | leaf = NULL; |
@@ -1396,9 +1403,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1396 | if (!root) | 1403 | if (!root) |
1397 | return ERR_PTR(-ENOMEM); | 1404 | return ERR_PTR(-ENOMEM); |
1398 | 1405 | ||
1399 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1406 | __setup_root(tree_root->nodesize, tree_root->sectorsize, |
1400 | tree_root->sectorsize, tree_root->stripesize, | 1407 | tree_root->stripesize, root, fs_info, |
1401 | root, fs_info, BTRFS_TREE_LOG_OBJECTID); | 1408 | BTRFS_TREE_LOG_OBJECTID); |
1402 | 1409 | ||
1403 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | 1410 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; |
1404 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1411 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -1413,9 +1420,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | |||
1413 | * updated (along with back refs to the log tree). | 1420 | * updated (along with back refs to the log tree). |
1414 | */ | 1421 | */ |
1415 | 1422 | ||
1416 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 1423 | leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID, |
1417 | BTRFS_TREE_LOG_OBJECTID, NULL, | 1424 | NULL, 0, 0, 0); |
1418 | 0, 0, 0); | ||
1419 | if (IS_ERR(leaf)) { | 1425 | if (IS_ERR(leaf)) { |
1420 | kfree(root); | 1426 | kfree(root); |
1421 | return ERR_CAST(leaf); | 1427 | return ERR_CAST(leaf); |
@@ -1465,7 +1471,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1465 | btrfs_set_stack_inode_generation(inode_item, 1); | 1471 | btrfs_set_stack_inode_generation(inode_item, 1); |
1466 | btrfs_set_stack_inode_size(inode_item, 3); | 1472 | btrfs_set_stack_inode_size(inode_item, 3); |
1467 | btrfs_set_stack_inode_nlink(inode_item, 1); | 1473 | btrfs_set_stack_inode_nlink(inode_item, 1); |
1468 | btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); | 1474 | btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); |
1469 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); | 1475 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); |
1470 | 1476 | ||
1471 | btrfs_set_root_node(&log_root->root_item, log_root->node); | 1477 | btrfs_set_root_node(&log_root->root_item, log_root->node); |
@@ -1485,7 +1491,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1485 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1491 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1486 | struct btrfs_path *path; | 1492 | struct btrfs_path *path; |
1487 | u64 generation; | 1493 | u64 generation; |
1488 | u32 blocksize; | ||
1489 | int ret; | 1494 | int ret; |
1490 | 1495 | ||
1491 | path = btrfs_alloc_path(); | 1496 | path = btrfs_alloc_path(); |
@@ -1498,9 +1503,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1498 | goto alloc_fail; | 1503 | goto alloc_fail; |
1499 | } | 1504 | } |
1500 | 1505 | ||
1501 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1506 | __setup_root(tree_root->nodesize, tree_root->sectorsize, |
1502 | tree_root->sectorsize, tree_root->stripesize, | 1507 | tree_root->stripesize, root, fs_info, key->objectid); |
1503 | root, fs_info, key->objectid); | ||
1504 | 1508 | ||
1505 | ret = btrfs_find_root(tree_root, key, path, | 1509 | ret = btrfs_find_root(tree_root, key, path, |
1506 | &root->root_item, &root->root_key); | 1510 | &root->root_item, &root->root_key); |
@@ -1511,9 +1515,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, | |||
1511 | } | 1515 | } |
1512 | 1516 | ||
1513 | generation = btrfs_root_generation(&root->root_item); | 1517 | generation = btrfs_root_generation(&root->root_item); |
1514 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | ||
1515 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1518 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1516 | blocksize, generation); | 1519 | generation); |
1517 | if (!root->node) { | 1520 | if (!root->node) { |
1518 | ret = -ENOMEM; | 1521 | ret = -ENOMEM; |
1519 | goto find_fail; | 1522 | goto find_fail; |
@@ -1573,8 +1576,8 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
1573 | root->subv_writers = writers; | 1576 | root->subv_writers = writers; |
1574 | 1577 | ||
1575 | btrfs_init_free_ino_ctl(root); | 1578 | btrfs_init_free_ino_ctl(root); |
1576 | spin_lock_init(&root->cache_lock); | 1579 | spin_lock_init(&root->ino_cache_lock); |
1577 | init_waitqueue_head(&root->cache_wait); | 1580 | init_waitqueue_head(&root->ino_cache_wait); |
1578 | 1581 | ||
1579 | ret = get_anon_bdev(&root->anon_dev); | 1582 | ret = get_anon_bdev(&root->anon_dev); |
1580 | if (ret) | 1583 | if (ret) |
@@ -1708,10 +1711,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1708 | return ret; | 1711 | return ret; |
1709 | } | 1712 | } |
1710 | 1713 | ||
1711 | /* | ||
1712 | * If this fails, caller must call bdi_destroy() to get rid of the | ||
1713 | * bdi again. | ||
1714 | */ | ||
1715 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | 1714 | static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) |
1716 | { | 1715 | { |
1717 | int err; | 1716 | int err; |
@@ -1734,16 +1733,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1734 | static void end_workqueue_fn(struct btrfs_work *work) | 1733 | static void end_workqueue_fn(struct btrfs_work *work) |
1735 | { | 1734 | { |
1736 | struct bio *bio; | 1735 | struct bio *bio; |
1737 | struct end_io_wq *end_io_wq; | 1736 | struct btrfs_end_io_wq *end_io_wq; |
1738 | int error; | 1737 | int error; |
1739 | 1738 | ||
1740 | end_io_wq = container_of(work, struct end_io_wq, work); | 1739 | end_io_wq = container_of(work, struct btrfs_end_io_wq, work); |
1741 | bio = end_io_wq->bio; | 1740 | bio = end_io_wq->bio; |
1742 | 1741 | ||
1743 | error = end_io_wq->error; | 1742 | error = end_io_wq->error; |
1744 | bio->bi_private = end_io_wq->private; | 1743 | bio->bi_private = end_io_wq->private; |
1745 | bio->bi_end_io = end_io_wq->end_io; | 1744 | bio->bi_end_io = end_io_wq->end_io; |
1746 | kfree(end_io_wq); | 1745 | kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); |
1747 | bio_endio_nodec(bio, error); | 1746 | bio_endio_nodec(bio, error); |
1748 | } | 1747 | } |
1749 | 1748 | ||
@@ -1772,6 +1771,7 @@ static int cleaner_kthread(void *arg) | |||
1772 | } | 1771 | } |
1773 | 1772 | ||
1774 | btrfs_run_delayed_iputs(root); | 1773 | btrfs_run_delayed_iputs(root); |
1774 | btrfs_delete_unused_bgs(root->fs_info); | ||
1775 | again = btrfs_clean_one_deleted_snapshot(root); | 1775 | again = btrfs_clean_one_deleted_snapshot(root); |
1776 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1776 | mutex_unlock(&root->fs_info->cleaner_mutex); |
1777 | 1777 | ||
@@ -2063,6 +2063,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | |||
2063 | btrfs_destroy_workqueue(fs_info->endio_workers); | 2063 | btrfs_destroy_workqueue(fs_info->endio_workers); |
2064 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); | 2064 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
2065 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); | 2065 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
2066 | btrfs_destroy_workqueue(fs_info->endio_repair_workers); | ||
2066 | btrfs_destroy_workqueue(fs_info->rmw_workers); | 2067 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
2067 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); | 2068 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
2068 | btrfs_destroy_workqueue(fs_info->endio_write_workers); | 2069 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
@@ -2143,8 +2144,6 @@ int open_ctree(struct super_block *sb, | |||
2143 | { | 2144 | { |
2144 | u32 sectorsize; | 2145 | u32 sectorsize; |
2145 | u32 nodesize; | 2146 | u32 nodesize; |
2146 | u32 leafsize; | ||
2147 | u32 blocksize; | ||
2148 | u32 stripesize; | 2147 | u32 stripesize; |
2149 | u64 generation; | 2148 | u64 generation; |
2150 | u64 features; | 2149 | u64 features; |
@@ -2233,6 +2232,7 @@ int open_ctree(struct super_block *sb, | |||
2233 | spin_lock_init(&fs_info->super_lock); | 2232 | spin_lock_init(&fs_info->super_lock); |
2234 | spin_lock_init(&fs_info->qgroup_op_lock); | 2233 | spin_lock_init(&fs_info->qgroup_op_lock); |
2235 | spin_lock_init(&fs_info->buffer_lock); | 2234 | spin_lock_init(&fs_info->buffer_lock); |
2235 | spin_lock_init(&fs_info->unused_bgs_lock); | ||
2236 | rwlock_init(&fs_info->tree_mod_log_lock); | 2236 | rwlock_init(&fs_info->tree_mod_log_lock); |
2237 | mutex_init(&fs_info->reloc_mutex); | 2237 | mutex_init(&fs_info->reloc_mutex); |
2238 | mutex_init(&fs_info->delalloc_root_mutex); | 2238 | mutex_init(&fs_info->delalloc_root_mutex); |
@@ -2242,6 +2242,7 @@ int open_ctree(struct super_block *sb, | |||
2242 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 2242 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
2243 | INIT_LIST_HEAD(&fs_info->space_info); | 2243 | INIT_LIST_HEAD(&fs_info->space_info); |
2244 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | 2244 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); |
2245 | INIT_LIST_HEAD(&fs_info->unused_bgs); | ||
2245 | btrfs_mapping_init(&fs_info->mapping_tree); | 2246 | btrfs_mapping_init(&fs_info->mapping_tree); |
2246 | btrfs_init_block_rsv(&fs_info->global_block_rsv, | 2247 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
2247 | BTRFS_BLOCK_RSV_GLOBAL); | 2248 | BTRFS_BLOCK_RSV_GLOBAL); |
@@ -2260,7 +2261,7 @@ int open_ctree(struct super_block *sb, | |||
2260 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2261 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2261 | atomic64_set(&fs_info->tree_mod_seq, 0); | 2262 | atomic64_set(&fs_info->tree_mod_seq, 0); |
2262 | fs_info->sb = sb; | 2263 | fs_info->sb = sb; |
2263 | fs_info->max_inline = 8192 * 1024; | 2264 | fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; |
2264 | fs_info->metadata_ratio = 0; | 2265 | fs_info->metadata_ratio = 0; |
2265 | fs_info->defrag_inodes = RB_ROOT; | 2266 | fs_info->defrag_inodes = RB_ROOT; |
2266 | fs_info->free_chunk_space = 0; | 2267 | fs_info->free_chunk_space = 0; |
@@ -2389,7 +2390,7 @@ int open_ctree(struct super_block *sb, | |||
2389 | goto fail_alloc; | 2390 | goto fail_alloc; |
2390 | } | 2391 | } |
2391 | 2392 | ||
2392 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 2393 | __setup_root(4096, 4096, 4096, tree_root, |
2393 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 2394 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
2394 | 2395 | ||
2395 | invalidate_bdev(fs_devices->latest_bdev); | 2396 | invalidate_bdev(fs_devices->latest_bdev); |
@@ -2469,19 +2470,22 @@ int open_ctree(struct super_block *sb, | |||
2469 | goto fail_alloc; | 2470 | goto fail_alloc; |
2470 | } | 2471 | } |
2471 | 2472 | ||
2472 | if (btrfs_super_leafsize(disk_super) != | 2473 | /* |
2474 | * Leafsize and nodesize were always equal, this is only a sanity check. | ||
2475 | */ | ||
2476 | if (le32_to_cpu(disk_super->__unused_leafsize) != | ||
2473 | btrfs_super_nodesize(disk_super)) { | 2477 | btrfs_super_nodesize(disk_super)) { |
2474 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " | 2478 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " |
2475 | "blocksizes don't match. node %d leaf %d\n", | 2479 | "blocksizes don't match. node %d leaf %d\n", |
2476 | btrfs_super_nodesize(disk_super), | 2480 | btrfs_super_nodesize(disk_super), |
2477 | btrfs_super_leafsize(disk_super)); | 2481 | le32_to_cpu(disk_super->__unused_leafsize)); |
2478 | err = -EINVAL; | 2482 | err = -EINVAL; |
2479 | goto fail_alloc; | 2483 | goto fail_alloc; |
2480 | } | 2484 | } |
2481 | if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { | 2485 | if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { |
2482 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " | 2486 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " |
2483 | "blocksize (%d) was too large\n", | 2487 | "blocksize (%d) was too large\n", |
2484 | btrfs_super_leafsize(disk_super)); | 2488 | btrfs_super_nodesize(disk_super)); |
2485 | err = -EINVAL; | 2489 | err = -EINVAL; |
2486 | goto fail_alloc; | 2490 | goto fail_alloc; |
2487 | } | 2491 | } |
@@ -2498,17 +2502,16 @@ int open_ctree(struct super_block *sb, | |||
2498 | * flag our filesystem as having big metadata blocks if | 2502 | * flag our filesystem as having big metadata blocks if |
2499 | * they are bigger than the page size | 2503 | * they are bigger than the page size |
2500 | */ | 2504 | */ |
2501 | if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { | 2505 | if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { |
2502 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) | 2506 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) |
2503 | printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); | 2507 | printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); |
2504 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; | 2508 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; |
2505 | } | 2509 | } |
2506 | 2510 | ||
2507 | nodesize = btrfs_super_nodesize(disk_super); | 2511 | nodesize = btrfs_super_nodesize(disk_super); |
2508 | leafsize = btrfs_super_leafsize(disk_super); | ||
2509 | sectorsize = btrfs_super_sectorsize(disk_super); | 2512 | sectorsize = btrfs_super_sectorsize(disk_super); |
2510 | stripesize = btrfs_super_stripesize(disk_super); | 2513 | stripesize = btrfs_super_stripesize(disk_super); |
2511 | fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); | 2514 | fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); |
2512 | fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); | 2515 | fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); |
2513 | 2516 | ||
2514 | /* | 2517 | /* |
@@ -2516,7 +2519,7 @@ int open_ctree(struct super_block *sb, | |||
2516 | * extent buffers for the same range. It leads to corruptions | 2519 | * extent buffers for the same range. It leads to corruptions |
2517 | */ | 2520 | */ |
2518 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && | 2521 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && |
2519 | (sectorsize != leafsize)) { | 2522 | (sectorsize != nodesize)) { |
2520 | printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " | 2523 | printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " |
2521 | "are not allowed for mixed block groups on %s\n", | 2524 | "are not allowed for mixed block groups on %s\n", |
2522 | sb->s_id); | 2525 | sb->s_id); |
@@ -2579,6 +2582,8 @@ int open_ctree(struct super_block *sb, | |||
2579 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); | 2582 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
2580 | fs_info->endio_raid56_workers = | 2583 | fs_info->endio_raid56_workers = |
2581 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); | 2584 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
2585 | fs_info->endio_repair_workers = | ||
2586 | btrfs_alloc_workqueue("endio-repair", flags, 1, 0); | ||
2582 | fs_info->rmw_workers = | 2587 | fs_info->rmw_workers = |
2583 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); | 2588 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
2584 | fs_info->endio_write_workers = | 2589 | fs_info->endio_write_workers = |
@@ -2600,11 +2605,12 @@ int open_ctree(struct super_block *sb, | |||
2600 | fs_info->submit_workers && fs_info->flush_workers && | 2605 | fs_info->submit_workers && fs_info->flush_workers && |
2601 | fs_info->endio_workers && fs_info->endio_meta_workers && | 2606 | fs_info->endio_workers && fs_info->endio_meta_workers && |
2602 | fs_info->endio_meta_write_workers && | 2607 | fs_info->endio_meta_write_workers && |
2608 | fs_info->endio_repair_workers && | ||
2603 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && | 2609 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
2604 | fs_info->endio_freespace_worker && fs_info->rmw_workers && | 2610 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
2605 | fs_info->caching_workers && fs_info->readahead_workers && | 2611 | fs_info->caching_workers && fs_info->readahead_workers && |
2606 | fs_info->fixup_workers && fs_info->delayed_workers && | 2612 | fs_info->fixup_workers && fs_info->delayed_workers && |
2607 | fs_info->fixup_workers && fs_info->extent_workers && | 2613 | fs_info->extent_workers && |
2608 | fs_info->qgroup_rescan_workers)) { | 2614 | fs_info->qgroup_rescan_workers)) { |
2609 | err = -ENOMEM; | 2615 | err = -ENOMEM; |
2610 | goto fail_sb_buffer; | 2616 | goto fail_sb_buffer; |
@@ -2615,7 +2621,6 @@ int open_ctree(struct super_block *sb, | |||
2615 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); | 2621 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); |
2616 | 2622 | ||
2617 | tree_root->nodesize = nodesize; | 2623 | tree_root->nodesize = nodesize; |
2618 | tree_root->leafsize = leafsize; | ||
2619 | tree_root->sectorsize = sectorsize; | 2624 | tree_root->sectorsize = sectorsize; |
2620 | tree_root->stripesize = stripesize; | 2625 | tree_root->stripesize = stripesize; |
2621 | 2626 | ||
@@ -2642,16 +2647,14 @@ int open_ctree(struct super_block *sb, | |||
2642 | goto fail_sb_buffer; | 2647 | goto fail_sb_buffer; |
2643 | } | 2648 | } |
2644 | 2649 | ||
2645 | blocksize = btrfs_level_size(tree_root, | ||
2646 | btrfs_super_chunk_root_level(disk_super)); | ||
2647 | generation = btrfs_super_chunk_root_generation(disk_super); | 2650 | generation = btrfs_super_chunk_root_generation(disk_super); |
2648 | 2651 | ||
2649 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2652 | __setup_root(nodesize, sectorsize, stripesize, chunk_root, |
2650 | chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); | 2653 | fs_info, BTRFS_CHUNK_TREE_OBJECTID); |
2651 | 2654 | ||
2652 | chunk_root->node = read_tree_block(chunk_root, | 2655 | chunk_root->node = read_tree_block(chunk_root, |
2653 | btrfs_super_chunk_root(disk_super), | 2656 | btrfs_super_chunk_root(disk_super), |
2654 | blocksize, generation); | 2657 | generation); |
2655 | if (!chunk_root->node || | 2658 | if (!chunk_root->node || |
2656 | !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2659 | !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
2657 | printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", | 2660 | printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", |
@@ -2684,13 +2687,11 @@ int open_ctree(struct super_block *sb, | |||
2684 | } | 2687 | } |
2685 | 2688 | ||
2686 | retry_root_backup: | 2689 | retry_root_backup: |
2687 | blocksize = btrfs_level_size(tree_root, | ||
2688 | btrfs_super_root_level(disk_super)); | ||
2689 | generation = btrfs_super_generation(disk_super); | 2690 | generation = btrfs_super_generation(disk_super); |
2690 | 2691 | ||
2691 | tree_root->node = read_tree_block(tree_root, | 2692 | tree_root->node = read_tree_block(tree_root, |
2692 | btrfs_super_root(disk_super), | 2693 | btrfs_super_root(disk_super), |
2693 | blocksize, generation); | 2694 | generation); |
2694 | if (!tree_root->node || | 2695 | if (!tree_root->node || |
2695 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | 2696 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { |
2696 | printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", | 2697 | printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", |
@@ -2859,9 +2860,6 @@ retry_root_backup: | |||
2859 | err = -EIO; | 2860 | err = -EIO; |
2860 | goto fail_qgroup; | 2861 | goto fail_qgroup; |
2861 | } | 2862 | } |
2862 | blocksize = | ||
2863 | btrfs_level_size(tree_root, | ||
2864 | btrfs_super_log_root_level(disk_super)); | ||
2865 | 2863 | ||
2866 | log_tree_root = btrfs_alloc_root(fs_info); | 2864 | log_tree_root = btrfs_alloc_root(fs_info); |
2867 | if (!log_tree_root) { | 2865 | if (!log_tree_root) { |
@@ -2869,11 +2867,10 @@ retry_root_backup: | |||
2869 | goto fail_qgroup; | 2867 | goto fail_qgroup; |
2870 | } | 2868 | } |
2871 | 2869 | ||
2872 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 2870 | __setup_root(nodesize, sectorsize, stripesize, |
2873 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | 2871 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); |
2874 | 2872 | ||
2875 | log_tree_root->node = read_tree_block(tree_root, bytenr, | 2873 | log_tree_root->node = read_tree_block(tree_root, bytenr, |
2876 | blocksize, | ||
2877 | generation + 1); | 2874 | generation + 1); |
2878 | if (!log_tree_root->node || | 2875 | if (!log_tree_root->node || |
2879 | !extent_buffer_uptodate(log_tree_root->node)) { | 2876 | !extent_buffer_uptodate(log_tree_root->node)) { |
@@ -2980,6 +2977,8 @@ retry_root_backup: | |||
2980 | fs_info->update_uuid_tree_gen = 1; | 2977 | fs_info->update_uuid_tree_gen = 1; |
2981 | } | 2978 | } |
2982 | 2979 | ||
2980 | fs_info->open = 1; | ||
2981 | |||
2983 | return 0; | 2982 | return 0; |
2984 | 2983 | ||
2985 | fail_qgroup: | 2984 | fail_qgroup: |
@@ -3139,7 +3138,8 @@ static int write_dev_supers(struct btrfs_device *device, | |||
3139 | 3138 | ||
3140 | for (i = 0; i < max_mirrors; i++) { | 3139 | for (i = 0; i < max_mirrors; i++) { |
3141 | bytenr = btrfs_sb_offset(i); | 3140 | bytenr = btrfs_sb_offset(i); |
3142 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | 3141 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= |
3142 | device->commit_total_bytes) | ||
3143 | break; | 3143 | break; |
3144 | 3144 | ||
3145 | if (wait) { | 3145 | if (wait) { |
@@ -3456,8 +3456,9 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
3456 | btrfs_set_stack_device_type(dev_item, dev->type); | 3456 | btrfs_set_stack_device_type(dev_item, dev->type); |
3457 | btrfs_set_stack_device_id(dev_item, dev->devid); | 3457 | btrfs_set_stack_device_id(dev_item, dev->devid); |
3458 | btrfs_set_stack_device_total_bytes(dev_item, | 3458 | btrfs_set_stack_device_total_bytes(dev_item, |
3459 | dev->disk_total_bytes); | 3459 | dev->commit_total_bytes); |
3460 | btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); | 3460 | btrfs_set_stack_device_bytes_used(dev_item, |
3461 | dev->commit_bytes_used); | ||
3461 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); | 3462 | btrfs_set_stack_device_io_align(dev_item, dev->io_align); |
3462 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); | 3463 | btrfs_set_stack_device_io_width(dev_item, dev->io_width); |
3463 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); | 3464 | btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); |
@@ -3532,7 +3533,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, | |||
3532 | 3533 | ||
3533 | static void free_fs_root(struct btrfs_root *root) | 3534 | static void free_fs_root(struct btrfs_root *root) |
3534 | { | 3535 | { |
3535 | iput(root->cache_inode); | 3536 | iput(root->ino_cache_inode); |
3536 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 3537 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); |
3537 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 3538 | btrfs_free_block_rsv(root, root->orphan_block_rsv); |
3538 | root->orphan_block_rsv = NULL; | 3539 | root->orphan_block_rsv = NULL; |
@@ -3623,7 +3624,7 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
3623 | return btrfs_commit_transaction(trans, root); | 3624 | return btrfs_commit_transaction(trans, root); |
3624 | } | 3625 | } |
3625 | 3626 | ||
3626 | int close_ctree(struct btrfs_root *root) | 3627 | void close_ctree(struct btrfs_root *root) |
3627 | { | 3628 | { |
3628 | struct btrfs_fs_info *fs_info = root->fs_info; | 3629 | struct btrfs_fs_info *fs_info = root->fs_info; |
3629 | int ret; | 3630 | int ret; |
@@ -3689,6 +3690,7 @@ int close_ctree(struct btrfs_root *root) | |||
3689 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 3690 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
3690 | btrfs_stop_all_workers(fs_info); | 3691 | btrfs_stop_all_workers(fs_info); |
3691 | 3692 | ||
3693 | fs_info->open = 0; | ||
3692 | free_root_pointers(fs_info, 1); | 3694 | free_root_pointers(fs_info, 1); |
3693 | 3695 | ||
3694 | iput(fs_info->btree_inode); | 3696 | iput(fs_info->btree_inode); |
@@ -3711,8 +3713,6 @@ int close_ctree(struct btrfs_root *root) | |||
3711 | 3713 | ||
3712 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | 3714 | btrfs_free_block_rsv(root, root->orphan_block_rsv); |
3713 | root->orphan_block_rsv = NULL; | 3715 | root->orphan_block_rsv = NULL; |
3714 | |||
3715 | return 0; | ||
3716 | } | 3716 | } |
3717 | 3717 | ||
3718 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 3718 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
@@ -3814,10 +3814,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
3814 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3814 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
3815 | int read_only) | 3815 | int read_only) |
3816 | { | 3816 | { |
3817 | struct btrfs_super_block *sb = fs_info->super_copy; | ||
3818 | int ret = 0; | ||
3819 | |||
3820 | if (sb->root_level > BTRFS_MAX_LEVEL) { | ||
3821 | printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", | ||
3822 | sb->root_level, BTRFS_MAX_LEVEL); | ||
3823 | ret = -EINVAL; | ||
3824 | } | ||
3825 | if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { | ||
3826 | printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", | ||
3827 | sb->chunk_root_level, BTRFS_MAX_LEVEL); | ||
3828 | ret = -EINVAL; | ||
3829 | } | ||
3830 | if (sb->log_root_level > BTRFS_MAX_LEVEL) { | ||
3831 | printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", | ||
3832 | sb->log_root_level, BTRFS_MAX_LEVEL); | ||
3833 | ret = -EINVAL; | ||
3834 | } | ||
3835 | |||
3817 | /* | 3836 | /* |
3818 | * Placeholder for checks | 3837 | * The common minimum, we don't know if we can trust the nodesize/sectorsize |
3838 | * items yet, they'll be verified later. Issue just a warning. | ||
3819 | */ | 3839 | */ |
3820 | return 0; | 3840 | if (!IS_ALIGNED(sb->root, 4096)) |
3841 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | ||
3842 | sb->root); | ||
3843 | if (!IS_ALIGNED(sb->chunk_root, 4096)) | ||
3844 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | ||
3845 | sb->chunk_root); | ||
3846 | if (!IS_ALIGNED(sb->log_root, 4096)) | ||
3847 | printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", | ||
3848 | sb->log_root); | ||
3849 | |||
3850 | if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { | ||
3851 | printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", | ||
3852 | fs_info->fsid, sb->dev_item.fsid); | ||
3853 | ret = -EINVAL; | ||
3854 | } | ||
3855 | |||
3856 | /* | ||
3857 | * Hint to catch really bogus numbers, bitflips or so, more exact checks are | ||
3858 | * done later | ||
3859 | */ | ||
3860 | if (sb->num_devices > (1UL << 31)) | ||
3861 | printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", | ||
3862 | sb->num_devices); | ||
3863 | |||
3864 | if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { | ||
3865 | printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", | ||
3866 | sb->bytenr, BTRFS_SUPER_INFO_OFFSET); | ||
3867 | ret = -EINVAL; | ||
3868 | } | ||
3869 | |||
3870 | /* | ||
3871 | * The generation is a global counter, we'll trust it more than the others | ||
3872 | * but it's still possible that it's the one that's wrong. | ||
3873 | */ | ||
3874 | if (sb->generation < sb->chunk_root_generation) | ||
3875 | printk(KERN_WARNING | ||
3876 | "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", | ||
3877 | sb->generation, sb->chunk_root_generation); | ||
3878 | if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) | ||
3879 | printk(KERN_WARNING | ||
3880 | "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", | ||
3881 | sb->generation, sb->cache_generation); | ||
3882 | |||
3883 | return ret; | ||
3821 | } | 3884 | } |
3822 | 3885 | ||
3823 | static void btrfs_error_commit_super(struct btrfs_root *root) | 3886 | static void btrfs_error_commit_super(struct btrfs_root *root) |
@@ -4009,9 +4072,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
4009 | 4072 | ||
4010 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); | 4073 | clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); |
4011 | while (start <= end) { | 4074 | while (start <= end) { |
4012 | eb = btrfs_find_tree_block(root, start, | 4075 | eb = btrfs_find_tree_block(root, start); |
4013 | root->leafsize); | 4076 | start += root->nodesize; |
4014 | start += root->leafsize; | ||
4015 | if (!eb) | 4077 | if (!eb) |
4016 | continue; | 4078 | continue; |
4017 | wait_on_extent_buffer_writeback(eb); | 4079 | wait_on_extent_buffer_writeback(eb); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 23ce3ceba0a9..414651821fb3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -25,11 +25,12 @@ | |||
25 | #define BTRFS_SUPER_MIRROR_MAX 3 | 25 | #define BTRFS_SUPER_MIRROR_MAX 3 |
26 | #define BTRFS_SUPER_MIRROR_SHIFT 12 | 26 | #define BTRFS_SUPER_MIRROR_SHIFT 12 |
27 | 27 | ||
28 | enum { | 28 | enum btrfs_wq_endio_type { |
29 | BTRFS_WQ_ENDIO_DATA = 0, | 29 | BTRFS_WQ_ENDIO_DATA = 0, |
30 | BTRFS_WQ_ENDIO_METADATA = 1, | 30 | BTRFS_WQ_ENDIO_METADATA = 1, |
31 | BTRFS_WQ_ENDIO_FREE_SPACE = 2, | 31 | BTRFS_WQ_ENDIO_FREE_SPACE = 2, |
32 | BTRFS_WQ_ENDIO_RAID56 = 3, | 32 | BTRFS_WQ_ENDIO_RAID56 = 3, |
33 | BTRFS_WQ_ENDIO_DIO_REPAIR = 4, | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | static inline u64 btrfs_sb_offset(int mirror) | 36 | static inline u64 btrfs_sb_offset(int mirror) |
@@ -44,9 +45,8 @@ struct btrfs_device; | |||
44 | struct btrfs_fs_devices; | 45 | struct btrfs_fs_devices; |
45 | 46 | ||
46 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 47 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
47 | u32 blocksize, u64 parent_transid); | 48 | u64 parent_transid); |
48 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 49 | void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); |
49 | u64 parent_transid); | ||
50 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 50 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
51 | int mirror_num, struct extent_buffer **eb); | 51 | int mirror_num, struct extent_buffer **eb); |
52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | 52 | struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, |
@@ -56,13 +56,13 @@ void clean_tree_block(struct btrfs_trans_handle *trans, | |||
56 | int open_ctree(struct super_block *sb, | 56 | int open_ctree(struct super_block *sb, |
57 | struct btrfs_fs_devices *fs_devices, | 57 | struct btrfs_fs_devices *fs_devices, |
58 | char *options); | 58 | char *options); |
59 | int close_ctree(struct btrfs_root *root); | 59 | void close_ctree(struct btrfs_root *root); |
60 | int write_ctree_super(struct btrfs_trans_handle *trans, | 60 | int write_ctree_super(struct btrfs_trans_handle *trans, |
61 | struct btrfs_root *root, int max_mirrors); | 61 | struct btrfs_root *root, int max_mirrors); |
62 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); | 62 | struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); |
63 | int btrfs_commit_super(struct btrfs_root *root); | 63 | int btrfs_commit_super(struct btrfs_root *root); |
64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 64 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
65 | u64 bytenr, u32 blocksize); | 65 | u64 bytenr); |
66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | 66 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, |
67 | struct btrfs_key *location); | 67 | struct btrfs_key *location); |
68 | int btrfs_init_fs_root(struct btrfs_root *root); | 68 | int btrfs_init_fs_root(struct btrfs_root *root); |
@@ -119,7 +119,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); | |||
119 | u32 btrfs_csum_data(char *data, u32 seed, size_t len); | 119 | u32 btrfs_csum_data(char *data, u32 seed, size_t len); |
120 | void btrfs_csum_final(u32 crc, char *result); | 120 | void btrfs_csum_final(u32 crc, char *result); |
121 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 121 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
122 | int metadata); | 122 | enum btrfs_wq_endio_type metadata); |
123 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 123 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
124 | int rw, struct bio *bio, int mirror_num, | 124 | int rw, struct bio *bio, int mirror_num, |
125 | unsigned long bio_flags, u64 bio_offset, | 125 | unsigned long bio_flags, u64 bio_offset, |
@@ -141,6 +141,8 @@ int btree_lock_page_hook(struct page *page, void *data, | |||
141 | void (*flush_fn)(void *)); | 141 | void (*flush_fn)(void *)); |
142 | int btrfs_calc_num_tolerated_disk_barrier_failures( | 142 | int btrfs_calc_num_tolerated_disk_barrier_failures( |
143 | struct btrfs_fs_info *fs_info); | 143 | struct btrfs_fs_info *fs_info); |
144 | int __init btrfs_end_io_wq_init(void); | ||
145 | void btrfs_end_io_wq_exit(void); | ||
144 | 146 | ||
145 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 147 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
146 | void btrfs_init_lockdep(void); | 148 | void btrfs_init_lockdep(void); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 41422a3de8ed..37d164540c3a 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -70,7 +70,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
70 | return ERR_PTR(-ESTALE); | 70 | return ERR_PTR(-ESTALE); |
71 | 71 | ||
72 | key.objectid = root_objectid; | 72 | key.objectid = root_objectid; |
73 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 73 | key.type = BTRFS_ROOT_ITEM_KEY; |
74 | key.offset = (u64)-1; | 74 | key.offset = (u64)-1; |
75 | 75 | ||
76 | index = srcu_read_lock(&fs_info->subvol_srcu); | 76 | index = srcu_read_lock(&fs_info->subvol_srcu); |
@@ -82,7 +82,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | |||
82 | } | 82 | } |
83 | 83 | ||
84 | key.objectid = objectid; | 84 | key.objectid = objectid; |
85 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 85 | key.type = BTRFS_INODE_ITEM_KEY; |
86 | key.offset = 0; | 86 | key.offset = 0; |
87 | 87 | ||
88 | inode = btrfs_iget(sb, &key, root, NULL); | 88 | inode = btrfs_iget(sb, &key, root, NULL); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index caaf015d6e4b..d56589571012 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -491,7 +491,7 @@ next: | |||
491 | key.objectid); | 491 | key.objectid); |
492 | if (key.type == BTRFS_METADATA_ITEM_KEY) | 492 | if (key.type == BTRFS_METADATA_ITEM_KEY) |
493 | last = key.objectid + | 493 | last = key.objectid + |
494 | fs_info->tree_root->leafsize; | 494 | fs_info->tree_root->nodesize; |
495 | else | 495 | else |
496 | last = key.objectid + key.offset; | 496 | last = key.objectid + key.offset; |
497 | 497 | ||
@@ -765,7 +765,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
765 | * different | 765 | * different |
766 | */ | 766 | */ |
767 | if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { | 767 | if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) { |
768 | offset = root->leafsize; | 768 | offset = root->nodesize; |
769 | metadata = 0; | 769 | metadata = 0; |
770 | } | 770 | } |
771 | 771 | ||
@@ -799,13 +799,13 @@ again: | |||
799 | path->slots[0]); | 799 | path->slots[0]); |
800 | if (key.objectid == bytenr && | 800 | if (key.objectid == bytenr && |
801 | key.type == BTRFS_EXTENT_ITEM_KEY && | 801 | key.type == BTRFS_EXTENT_ITEM_KEY && |
802 | key.offset == root->leafsize) | 802 | key.offset == root->nodesize) |
803 | ret = 0; | 803 | ret = 0; |
804 | } | 804 | } |
805 | if (ret) { | 805 | if (ret) { |
806 | key.objectid = bytenr; | 806 | key.objectid = bytenr; |
807 | key.type = BTRFS_EXTENT_ITEM_KEY; | 807 | key.type = BTRFS_EXTENT_ITEM_KEY; |
808 | key.offset = root->leafsize; | 808 | key.offset = root->nodesize; |
809 | btrfs_release_path(path); | 809 | btrfs_release_path(path); |
810 | goto again; | 810 | goto again; |
811 | } | 811 | } |
@@ -2651,7 +2651,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, | |||
2651 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); | 2651 | num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
2652 | num_heads = heads_to_leaves(root, num_heads); | 2652 | num_heads = heads_to_leaves(root, num_heads); |
2653 | if (num_heads > 1) | 2653 | if (num_heads > 1) |
2654 | num_bytes += (num_heads - 1) * root->leafsize; | 2654 | num_bytes += (num_heads - 1) * root->nodesize; |
2655 | num_bytes <<= 1; | 2655 | num_bytes <<= 1; |
2656 | global_rsv = &root->fs_info->global_block_rsv; | 2656 | global_rsv = &root->fs_info->global_block_rsv; |
2657 | 2657 | ||
@@ -3073,10 +3073,10 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3073 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 3073 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
3074 | u64, u64, u64, u64, u64, u64, int); | 3074 | u64, u64, u64, u64, u64, u64, int); |
3075 | 3075 | ||
3076 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 3076 | |
3077 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | 3077 | if (btrfs_test_is_dummy_root(root)) |
3078 | return 0; | 3078 | return 0; |
3079 | #endif | 3079 | |
3080 | ref_root = btrfs_header_owner(buf); | 3080 | ref_root = btrfs_header_owner(buf); |
3081 | nritems = btrfs_header_nritems(buf); | 3081 | nritems = btrfs_header_nritems(buf); |
3082 | level = btrfs_header_level(buf); | 3082 | level = btrfs_header_level(buf); |
@@ -3097,7 +3097,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3097 | for (i = 0; i < nritems; i++) { | 3097 | for (i = 0; i < nritems; i++) { |
3098 | if (level == 0) { | 3098 | if (level == 0) { |
3099 | btrfs_item_key_to_cpu(buf, &key, i); | 3099 | btrfs_item_key_to_cpu(buf, &key, i); |
3100 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | 3100 | if (key.type != BTRFS_EXTENT_DATA_KEY) |
3101 | continue; | 3101 | continue; |
3102 | fi = btrfs_item_ptr(buf, i, | 3102 | fi = btrfs_item_ptr(buf, i, |
3103 | struct btrfs_file_extent_item); | 3103 | struct btrfs_file_extent_item); |
@@ -3117,7 +3117,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3117 | goto fail; | 3117 | goto fail; |
3118 | } else { | 3118 | } else { |
3119 | bytenr = btrfs_node_blockptr(buf, i); | 3119 | bytenr = btrfs_node_blockptr(buf, i); |
3120 | num_bytes = btrfs_level_size(root, level - 1); | 3120 | num_bytes = root->nodesize; |
3121 | ret = process_func(trans, root, bytenr, num_bytes, | 3121 | ret = process_func(trans, root, bytenr, num_bytes, |
3122 | parent, ref_root, level - 1, 0, | 3122 | parent, ref_root, level - 1, 0, |
3123 | 1); | 3123 | 1); |
@@ -4343,11 +4343,21 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, | |||
4343 | } | 4343 | } |
4344 | 4344 | ||
4345 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, | 4345 | static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info, |
4346 | struct btrfs_fs_info *fs_info) | 4346 | struct btrfs_fs_info *fs_info, |
4347 | int flush_state) | ||
4347 | { | 4348 | { |
4348 | u64 used; | 4349 | u64 used; |
4349 | 4350 | ||
4350 | spin_lock(&space_info->lock); | 4351 | spin_lock(&space_info->lock); |
4352 | /* | ||
4353 | * We run out of space and have not got any free space via flush_space, | ||
4354 | * so don't bother doing async reclaim. | ||
4355 | */ | ||
4356 | if (flush_state > COMMIT_TRANS && space_info->full) { | ||
4357 | spin_unlock(&space_info->lock); | ||
4358 | return 0; | ||
4359 | } | ||
4360 | |||
4351 | used = space_info->bytes_used + space_info->bytes_reserved + | 4361 | used = space_info->bytes_used + space_info->bytes_reserved + |
4352 | space_info->bytes_pinned + space_info->bytes_readonly + | 4362 | space_info->bytes_pinned + space_info->bytes_readonly + |
4353 | space_info->bytes_may_use; | 4363 | space_info->bytes_may_use; |
@@ -4380,11 +4390,12 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) | |||
4380 | flush_space(fs_info->fs_root, space_info, to_reclaim, | 4390 | flush_space(fs_info->fs_root, space_info, to_reclaim, |
4381 | to_reclaim, flush_state); | 4391 | to_reclaim, flush_state); |
4382 | flush_state++; | 4392 | flush_state++; |
4383 | if (!btrfs_need_do_async_reclaim(space_info, fs_info)) | 4393 | if (!btrfs_need_do_async_reclaim(space_info, fs_info, |
4394 | flush_state)) | ||
4384 | return; | 4395 | return; |
4385 | } while (flush_state <= COMMIT_TRANS); | 4396 | } while (flush_state <= COMMIT_TRANS); |
4386 | 4397 | ||
4387 | if (btrfs_need_do_async_reclaim(space_info, fs_info)) | 4398 | if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state)) |
4388 | queue_work(system_unbound_wq, work); | 4399 | queue_work(system_unbound_wq, work); |
4389 | } | 4400 | } |
4390 | 4401 | ||
@@ -4502,7 +4513,13 @@ again: | |||
4502 | space_info->flush = 1; | 4513 | space_info->flush = 1; |
4503 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { | 4514 | } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { |
4504 | used += orig_bytes; | 4515 | used += orig_bytes; |
4505 | if (need_do_async_reclaim(space_info, root->fs_info, used) && | 4516 | /* |
4517 | * We will do the space reservation dance during log replay, | ||
4518 | * which means we won't have fs_info->fs_root set, so don't do | ||
4519 | * the async reclaim as we will panic. | ||
4520 | */ | ||
4521 | if (!root->fs_info->log_root_recovering && | ||
4522 | need_do_async_reclaim(space_info, root->fs_info, used) && | ||
4506 | !work_busy(&root->fs_info->async_reclaim_work)) | 4523 | !work_busy(&root->fs_info->async_reclaim_work)) |
4507 | queue_work(system_unbound_wq, | 4524 | queue_work(system_unbound_wq, |
4508 | &root->fs_info->async_reclaim_work); | 4525 | &root->fs_info->async_reclaim_work); |
@@ -4839,7 +4856,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
4839 | if (num_bytes * 3 > meta_used) | 4856 | if (num_bytes * 3 > meta_used) |
4840 | num_bytes = div64_u64(meta_used, 3); | 4857 | num_bytes = div64_u64(meta_used, 3); |
4841 | 4858 | ||
4842 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | 4859 | return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); |
4843 | } | 4860 | } |
4844 | 4861 | ||
4845 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | 4862 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) |
@@ -4988,7 +5005,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | |||
4988 | 5005 | ||
4989 | if (root->fs_info->quota_enabled) { | 5006 | if (root->fs_info->quota_enabled) { |
4990 | /* One for parent inode, two for dir entries */ | 5007 | /* One for parent inode, two for dir entries */ |
4991 | num_bytes = 3 * root->leafsize; | 5008 | num_bytes = 3 * root->nodesize; |
4992 | ret = btrfs_qgroup_reserve(root, num_bytes); | 5009 | ret = btrfs_qgroup_reserve(root, num_bytes); |
4993 | if (ret) | 5010 | if (ret) |
4994 | return ret; | 5011 | return ret; |
@@ -5176,7 +5193,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5176 | 5193 | ||
5177 | if (root->fs_info->quota_enabled) { | 5194 | if (root->fs_info->quota_enabled) { |
5178 | ret = btrfs_qgroup_reserve(root, num_bytes + | 5195 | ret = btrfs_qgroup_reserve(root, num_bytes + |
5179 | nr_extents * root->leafsize); | 5196 | nr_extents * root->nodesize); |
5180 | if (ret) | 5197 | if (ret) |
5181 | goto out_fail; | 5198 | goto out_fail; |
5182 | } | 5199 | } |
@@ -5185,7 +5202,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
5185 | if (unlikely(ret)) { | 5202 | if (unlikely(ret)) { |
5186 | if (root->fs_info->quota_enabled) | 5203 | if (root->fs_info->quota_enabled) |
5187 | btrfs_qgroup_free(root, num_bytes + | 5204 | btrfs_qgroup_free(root, num_bytes + |
5188 | nr_extents * root->leafsize); | 5205 | nr_extents * root->nodesize); |
5189 | goto out_fail; | 5206 | goto out_fail; |
5190 | } | 5207 | } |
5191 | 5208 | ||
@@ -5301,7 +5318,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
5301 | btrfs_ino(inode), to_free, 0); | 5318 | btrfs_ino(inode), to_free, 0); |
5302 | if (root->fs_info->quota_enabled) { | 5319 | if (root->fs_info->quota_enabled) { |
5303 | btrfs_qgroup_free(root, num_bytes + | 5320 | btrfs_qgroup_free(root, num_bytes + |
5304 | dropped * root->leafsize); | 5321 | dropped * root->nodesize); |
5305 | } | 5322 | } |
5306 | 5323 | ||
5307 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 5324 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
@@ -5422,6 +5439,20 @@ static int update_block_group(struct btrfs_root *root, | |||
5422 | spin_unlock(&cache->space_info->lock); | 5439 | spin_unlock(&cache->space_info->lock); |
5423 | } else { | 5440 | } else { |
5424 | old_val -= num_bytes; | 5441 | old_val -= num_bytes; |
5442 | |||
5443 | /* | ||
5444 | * No longer have used bytes in this block group, queue | ||
5445 | * it for deletion. | ||
5446 | */ | ||
5447 | if (old_val == 0) { | ||
5448 | spin_lock(&info->unused_bgs_lock); | ||
5449 | if (list_empty(&cache->bg_list)) { | ||
5450 | btrfs_get_block_group(cache); | ||
5451 | list_add_tail(&cache->bg_list, | ||
5452 | &info->unused_bgs); | ||
5453 | } | ||
5454 | spin_unlock(&info->unused_bgs_lock); | ||
5455 | } | ||
5425 | btrfs_set_block_group_used(&cache->item, old_val); | 5456 | btrfs_set_block_group_used(&cache->item, old_val); |
5426 | cache->pinned += num_bytes; | 5457 | cache->pinned += num_bytes; |
5427 | cache->space_info->bytes_pinned += num_bytes; | 5458 | cache->space_info->bytes_pinned += num_bytes; |
@@ -6233,10 +6264,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
6233 | int ret; | 6264 | int ret; |
6234 | struct btrfs_fs_info *fs_info = root->fs_info; | 6265 | struct btrfs_fs_info *fs_info = root->fs_info; |
6235 | 6266 | ||
6236 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 6267 | if (btrfs_test_is_dummy_root(root)) |
6237 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
6238 | return 0; | 6268 | return 0; |
6239 | #endif | 6269 | |
6240 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); | 6270 | add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid); |
6241 | 6271 | ||
6242 | /* | 6272 | /* |
@@ -6263,14 +6293,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
6263 | return ret; | 6293 | return ret; |
6264 | } | 6294 | } |
6265 | 6295 | ||
6266 | static u64 stripe_align(struct btrfs_root *root, | ||
6267 | struct btrfs_block_group_cache *cache, | ||
6268 | u64 val, u64 num_bytes) | ||
6269 | { | ||
6270 | u64 ret = ALIGN(val, root->stripesize); | ||
6271 | return ret; | ||
6272 | } | ||
6273 | |||
6274 | /* | 6296 | /* |
6275 | * when we wait for progress in the block group caching, its because | 6297 | * when we wait for progress in the block group caching, its because |
6276 | * our allocation attempt failed at least once. So, we must sleep | 6298 | * our allocation attempt failed at least once. So, we must sleep |
@@ -6464,7 +6486,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, | |||
6464 | bool have_caching_bg = false; | 6486 | bool have_caching_bg = false; |
6465 | 6487 | ||
6466 | WARN_ON(num_bytes < root->sectorsize); | 6488 | WARN_ON(num_bytes < root->sectorsize); |
6467 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 6489 | ins->type = BTRFS_EXTENT_ITEM_KEY; |
6468 | ins->objectid = 0; | 6490 | ins->objectid = 0; |
6469 | ins->offset = 0; | 6491 | ins->offset = 0; |
6470 | 6492 | ||
@@ -6751,8 +6773,7 @@ unclustered_alloc: | |||
6751 | goto loop; | 6773 | goto loop; |
6752 | } | 6774 | } |
6753 | checks: | 6775 | checks: |
6754 | search_start = stripe_align(root, block_group, | 6776 | search_start = ALIGN(offset, root->stripesize); |
6755 | offset, num_bytes); | ||
6756 | 6777 | ||
6757 | /* move on to the next group */ | 6778 | /* move on to the next group */ |
6758 | if (search_start + num_bytes > | 6779 | if (search_start + num_bytes > |
@@ -7077,7 +7098,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7077 | path = btrfs_alloc_path(); | 7098 | path = btrfs_alloc_path(); |
7078 | if (!path) { | 7099 | if (!path) { |
7079 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, | 7100 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, |
7080 | root->leafsize); | 7101 | root->nodesize); |
7081 | return -ENOMEM; | 7102 | return -ENOMEM; |
7082 | } | 7103 | } |
7083 | 7104 | ||
@@ -7086,7 +7107,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7086 | ins, size); | 7107 | ins, size); |
7087 | if (ret) { | 7108 | if (ret) { |
7088 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, | 7109 | btrfs_free_and_pin_reserved_extent(root, ins->objectid, |
7089 | root->leafsize); | 7110 | root->nodesize); |
7090 | btrfs_free_path(path); | 7111 | btrfs_free_path(path); |
7091 | return ret; | 7112 | return ret; |
7092 | } | 7113 | } |
@@ -7101,7 +7122,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7101 | 7122 | ||
7102 | if (skinny_metadata) { | 7123 | if (skinny_metadata) { |
7103 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); | 7124 | iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); |
7104 | num_bytes = root->leafsize; | 7125 | num_bytes = root->nodesize; |
7105 | } else { | 7126 | } else { |
7106 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); | 7127 | block_info = (struct btrfs_tree_block_info *)(extent_item + 1); |
7107 | btrfs_set_tree_block_key(leaf, block_info, key); | 7128 | btrfs_set_tree_block_key(leaf, block_info, key); |
@@ -7131,14 +7152,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
7131 | return ret; | 7152 | return ret; |
7132 | } | 7153 | } |
7133 | 7154 | ||
7134 | ret = update_block_group(root, ins->objectid, root->leafsize, 1); | 7155 | ret = update_block_group(root, ins->objectid, root->nodesize, 1); |
7135 | if (ret) { /* -ENOENT, logic error */ | 7156 | if (ret) { /* -ENOENT, logic error */ |
7136 | btrfs_err(fs_info, "update block group failed for %llu %llu", | 7157 | btrfs_err(fs_info, "update block group failed for %llu %llu", |
7137 | ins->objectid, ins->offset); | 7158 | ins->objectid, ins->offset); |
7138 | BUG(); | 7159 | BUG(); |
7139 | } | 7160 | } |
7140 | 7161 | ||
7141 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); | 7162 | trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->nodesize); |
7142 | return ret; | 7163 | return ret; |
7143 | } | 7164 | } |
7144 | 7165 | ||
@@ -7213,17 +7234,19 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
7213 | btrfs_set_buffer_uptodate(buf); | 7234 | btrfs_set_buffer_uptodate(buf); |
7214 | 7235 | ||
7215 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | 7236 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { |
7237 | buf->log_index = root->log_transid % 2; | ||
7216 | /* | 7238 | /* |
7217 | * we allow two log transactions at a time, use different | 7239 | * we allow two log transactions at a time, use different |
7218 | * EXENT bit to differentiate dirty pages. | 7240 | * EXENT bit to differentiate dirty pages. |
7219 | */ | 7241 | */ |
7220 | if (root->log_transid % 2 == 0) | 7242 | if (buf->log_index == 0) |
7221 | set_extent_dirty(&root->dirty_log_pages, buf->start, | 7243 | set_extent_dirty(&root->dirty_log_pages, buf->start, |
7222 | buf->start + buf->len - 1, GFP_NOFS); | 7244 | buf->start + buf->len - 1, GFP_NOFS); |
7223 | else | 7245 | else |
7224 | set_extent_new(&root->dirty_log_pages, buf->start, | 7246 | set_extent_new(&root->dirty_log_pages, buf->start, |
7225 | buf->start + buf->len - 1, GFP_NOFS); | 7247 | buf->start + buf->len - 1, GFP_NOFS); |
7226 | } else { | 7248 | } else { |
7249 | buf->log_index = -1; | ||
7227 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, | 7250 | set_extent_dirty(&trans->transaction->dirty_pages, buf->start, |
7228 | buf->start + buf->len - 1, GFP_NOFS); | 7251 | buf->start + buf->len - 1, GFP_NOFS); |
7229 | } | 7252 | } |
@@ -7300,8 +7323,8 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info, | |||
7300 | * | 7323 | * |
7301 | * returns the tree buffer or NULL. | 7324 | * returns the tree buffer or NULL. |
7302 | */ | 7325 | */ |
7303 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 7326 | struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, |
7304 | struct btrfs_root *root, u32 blocksize, | 7327 | struct btrfs_root *root, |
7305 | u64 parent, u64 root_objectid, | 7328 | u64 parent, u64 root_objectid, |
7306 | struct btrfs_disk_key *key, int level, | 7329 | struct btrfs_disk_key *key, int level, |
7307 | u64 hint, u64 empty_size) | 7330 | u64 hint, u64 empty_size) |
@@ -7311,18 +7334,18 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
7311 | struct extent_buffer *buf; | 7334 | struct extent_buffer *buf; |
7312 | u64 flags = 0; | 7335 | u64 flags = 0; |
7313 | int ret; | 7336 | int ret; |
7337 | u32 blocksize = root->nodesize; | ||
7314 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, | 7338 | bool skinny_metadata = btrfs_fs_incompat(root->fs_info, |
7315 | SKINNY_METADATA); | 7339 | SKINNY_METADATA); |
7316 | 7340 | ||
7317 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 7341 | if (btrfs_test_is_dummy_root(root)) { |
7318 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) { | ||
7319 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, | 7342 | buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, |
7320 | blocksize, level); | 7343 | blocksize, level); |
7321 | if (!IS_ERR(buf)) | 7344 | if (!IS_ERR(buf)) |
7322 | root->alloc_bytenr += blocksize; | 7345 | root->alloc_bytenr += blocksize; |
7323 | return buf; | 7346 | return buf; |
7324 | } | 7347 | } |
7325 | #endif | 7348 | |
7326 | block_rsv = use_block_rsv(trans, root, blocksize); | 7349 | block_rsv = use_block_rsv(trans, root, blocksize); |
7327 | if (IS_ERR(block_rsv)) | 7350 | if (IS_ERR(block_rsv)) |
7328 | return ERR_CAST(block_rsv); | 7351 | return ERR_CAST(block_rsv); |
@@ -7417,7 +7440,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
7417 | 7440 | ||
7418 | eb = path->nodes[wc->level]; | 7441 | eb = path->nodes[wc->level]; |
7419 | nritems = btrfs_header_nritems(eb); | 7442 | nritems = btrfs_header_nritems(eb); |
7420 | blocksize = btrfs_level_size(root, wc->level - 1); | 7443 | blocksize = root->nodesize; |
7421 | 7444 | ||
7422 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { | 7445 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
7423 | if (nread >= wc->reada_count) | 7446 | if (nread >= wc->reada_count) |
@@ -7464,10 +7487,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
7464 | continue; | 7487 | continue; |
7465 | } | 7488 | } |
7466 | reada: | 7489 | reada: |
7467 | ret = readahead_tree_block(root, bytenr, blocksize, | 7490 | readahead_tree_block(root, bytenr, blocksize); |
7468 | generation); | ||
7469 | if (ret) | ||
7470 | break; | ||
7471 | nread++; | 7491 | nread++; |
7472 | } | 7492 | } |
7473 | wc->reada_slot = slot; | 7493 | wc->reada_slot = slot; |
@@ -7626,7 +7646,6 @@ walk_down: | |||
7626 | level = root_level; | 7646 | level = root_level; |
7627 | while (level >= 0) { | 7647 | while (level >= 0) { |
7628 | if (path->nodes[level] == NULL) { | 7648 | if (path->nodes[level] == NULL) { |
7629 | int child_bsize = root->nodesize; | ||
7630 | int parent_slot; | 7649 | int parent_slot; |
7631 | u64 child_gen; | 7650 | u64 child_gen; |
7632 | u64 child_bytenr; | 7651 | u64 child_bytenr; |
@@ -7638,8 +7657,7 @@ walk_down: | |||
7638 | child_bytenr = btrfs_node_blockptr(eb, parent_slot); | 7657 | child_bytenr = btrfs_node_blockptr(eb, parent_slot); |
7639 | child_gen = btrfs_node_ptr_generation(eb, parent_slot); | 7658 | child_gen = btrfs_node_ptr_generation(eb, parent_slot); |
7640 | 7659 | ||
7641 | eb = read_tree_block(root, child_bytenr, child_bsize, | 7660 | eb = read_tree_block(root, child_bytenr, child_gen); |
7642 | child_gen); | ||
7643 | if (!eb || !extent_buffer_uptodate(eb)) { | 7661 | if (!eb || !extent_buffer_uptodate(eb)) { |
7644 | ret = -EIO; | 7662 | ret = -EIO; |
7645 | goto out; | 7663 | goto out; |
@@ -7655,7 +7673,7 @@ walk_down: | |||
7655 | ret = btrfs_qgroup_record_ref(trans, root->fs_info, | 7673 | ret = btrfs_qgroup_record_ref(trans, root->fs_info, |
7656 | root->objectid, | 7674 | root->objectid, |
7657 | child_bytenr, | 7675 | child_bytenr, |
7658 | child_bsize, | 7676 | root->nodesize, |
7659 | BTRFS_QGROUP_OPER_SUB_SUBTREE, | 7677 | BTRFS_QGROUP_OPER_SUB_SUBTREE, |
7660 | 0); | 7678 | 0); |
7661 | if (ret) | 7679 | if (ret) |
@@ -7806,9 +7824,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
7806 | } | 7824 | } |
7807 | 7825 | ||
7808 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | 7826 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
7809 | blocksize = btrfs_level_size(root, level - 1); | 7827 | blocksize = root->nodesize; |
7810 | 7828 | ||
7811 | next = btrfs_find_tree_block(root, bytenr, blocksize); | 7829 | next = btrfs_find_tree_block(root, bytenr); |
7812 | if (!next) { | 7830 | if (!next) { |
7813 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 7831 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
7814 | if (!next) | 7832 | if (!next) |
@@ -7870,7 +7888,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
7870 | if (!next) { | 7888 | if (!next) { |
7871 | if (reada && level == 1) | 7889 | if (reada && level == 1) |
7872 | reada_walk_down(trans, root, wc, path); | 7890 | reada_walk_down(trans, root, wc, path); |
7873 | next = read_tree_block(root, bytenr, blocksize, generation); | 7891 | next = read_tree_block(root, bytenr, generation); |
7874 | if (!next || !extent_buffer_uptodate(next)) { | 7892 | if (!next || !extent_buffer_uptodate(next)) { |
7875 | free_extent_buffer(next); | 7893 | free_extent_buffer(next); |
7876 | return -EIO; | 7894 | return -EIO; |
@@ -8853,6 +8871,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
8853 | } | 8871 | } |
8854 | up_write(&info->commit_root_sem); | 8872 | up_write(&info->commit_root_sem); |
8855 | 8873 | ||
8874 | spin_lock(&info->unused_bgs_lock); | ||
8875 | while (!list_empty(&info->unused_bgs)) { | ||
8876 | block_group = list_first_entry(&info->unused_bgs, | ||
8877 | struct btrfs_block_group_cache, | ||
8878 | bg_list); | ||
8879 | list_del_init(&block_group->bg_list); | ||
8880 | btrfs_put_block_group(block_group); | ||
8881 | } | ||
8882 | spin_unlock(&info->unused_bgs_lock); | ||
8883 | |||
8856 | spin_lock(&info->block_group_cache_lock); | 8884 | spin_lock(&info->block_group_cache_lock); |
8857 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 8885 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
8858 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 8886 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
@@ -8987,7 +9015,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) | |||
8987 | init_rwsem(&cache->data_rwsem); | 9015 | init_rwsem(&cache->data_rwsem); |
8988 | INIT_LIST_HEAD(&cache->list); | 9016 | INIT_LIST_HEAD(&cache->list); |
8989 | INIT_LIST_HEAD(&cache->cluster_list); | 9017 | INIT_LIST_HEAD(&cache->cluster_list); |
8990 | INIT_LIST_HEAD(&cache->new_bg_list); | 9018 | INIT_LIST_HEAD(&cache->bg_list); |
8991 | btrfs_init_free_space_ctl(cache); | 9019 | btrfs_init_free_space_ctl(cache); |
8992 | 9020 | ||
8993 | return cache; | 9021 | return cache; |
@@ -9009,7 +9037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
9009 | root = info->extent_root; | 9037 | root = info->extent_root; |
9010 | key.objectid = 0; | 9038 | key.objectid = 0; |
9011 | key.offset = 0; | 9039 | key.offset = 0; |
9012 | btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); | 9040 | key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
9013 | path = btrfs_alloc_path(); | 9041 | path = btrfs_alloc_path(); |
9014 | if (!path) | 9042 | if (!path) |
9015 | return -ENOMEM; | 9043 | return -ENOMEM; |
@@ -9128,8 +9156,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
9128 | __link_block_group(space_info, cache); | 9156 | __link_block_group(space_info, cache); |
9129 | 9157 | ||
9130 | set_avail_alloc_bits(root->fs_info, cache->flags); | 9158 | set_avail_alloc_bits(root->fs_info, cache->flags); |
9131 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 9159 | if (btrfs_chunk_readonly(root, cache->key.objectid)) { |
9132 | set_block_group_ro(cache, 1); | 9160 | set_block_group_ro(cache, 1); |
9161 | } else if (btrfs_block_group_used(&cache->item) == 0) { | ||
9162 | spin_lock(&info->unused_bgs_lock); | ||
9163 | /* Should always be true but just in case. */ | ||
9164 | if (list_empty(&cache->bg_list)) { | ||
9165 | btrfs_get_block_group(cache); | ||
9166 | list_add_tail(&cache->bg_list, | ||
9167 | &info->unused_bgs); | ||
9168 | } | ||
9169 | spin_unlock(&info->unused_bgs_lock); | ||
9170 | } | ||
9133 | } | 9171 | } |
9134 | 9172 | ||
9135 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 9173 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -9170,10 +9208,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
9170 | struct btrfs_key key; | 9208 | struct btrfs_key key; |
9171 | int ret = 0; | 9209 | int ret = 0; |
9172 | 9210 | ||
9173 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, | 9211 | list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { |
9174 | new_bg_list) { | 9212 | list_del_init(&block_group->bg_list); |
9175 | list_del_init(&block_group->new_bg_list); | ||
9176 | |||
9177 | if (ret) | 9213 | if (ret) |
9178 | continue; | 9214 | continue; |
9179 | 9215 | ||
@@ -9259,7 +9295,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
9259 | 9295 | ||
9260 | __link_block_group(cache->space_info, cache); | 9296 | __link_block_group(cache->space_info, cache); |
9261 | 9297 | ||
9262 | list_add_tail(&cache->new_bg_list, &trans->new_bgs); | 9298 | list_add_tail(&cache->bg_list, &trans->new_bgs); |
9263 | 9299 | ||
9264 | set_avail_alloc_bits(extent_root->fs_info, type); | 9300 | set_avail_alloc_bits(extent_root->fs_info, type); |
9265 | 9301 | ||
@@ -9413,8 +9449,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9413 | 9449 | ||
9414 | memcpy(&key, &block_group->key, sizeof(key)); | 9450 | memcpy(&key, &block_group->key, sizeof(key)); |
9415 | 9451 | ||
9416 | btrfs_clear_space_info_full(root->fs_info); | ||
9417 | |||
9418 | btrfs_put_block_group(block_group); | 9452 | btrfs_put_block_group(block_group); |
9419 | btrfs_put_block_group(block_group); | 9453 | btrfs_put_block_group(block_group); |
9420 | 9454 | ||
@@ -9430,6 +9464,101 @@ out: | |||
9430 | return ret; | 9464 | return ret; |
9431 | } | 9465 | } |
9432 | 9466 | ||
9467 | /* | ||
9468 | * Process the unused_bgs list and remove any that don't have any allocated | ||
9469 | * space inside of them. | ||
9470 | */ | ||
9471 | void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) | ||
9472 | { | ||
9473 | struct btrfs_block_group_cache *block_group; | ||
9474 | struct btrfs_space_info *space_info; | ||
9475 | struct btrfs_root *root = fs_info->extent_root; | ||
9476 | struct btrfs_trans_handle *trans; | ||
9477 | int ret = 0; | ||
9478 | |||
9479 | if (!fs_info->open) | ||
9480 | return; | ||
9481 | |||
9482 | spin_lock(&fs_info->unused_bgs_lock); | ||
9483 | while (!list_empty(&fs_info->unused_bgs)) { | ||
9484 | u64 start, end; | ||
9485 | |||
9486 | block_group = list_first_entry(&fs_info->unused_bgs, | ||
9487 | struct btrfs_block_group_cache, | ||
9488 | bg_list); | ||
9489 | space_info = block_group->space_info; | ||
9490 | list_del_init(&block_group->bg_list); | ||
9491 | if (ret || btrfs_mixed_space_info(space_info)) { | ||
9492 | btrfs_put_block_group(block_group); | ||
9493 | continue; | ||
9494 | } | ||
9495 | spin_unlock(&fs_info->unused_bgs_lock); | ||
9496 | |||
9497 | /* Don't want to race with allocators so take the groups_sem */ | ||
9498 | down_write(&space_info->groups_sem); | ||
9499 | spin_lock(&block_group->lock); | ||
9500 | if (block_group->reserved || | ||
9501 | btrfs_block_group_used(&block_group->item) || | ||
9502 | block_group->ro) { | ||
9503 | /* | ||
9504 | * We want to bail if we made new allocations or have | ||
9505 | * outstanding allocations in this block group. We do | ||
9506 | * the ro check in case balance is currently acting on | ||
9507 | * this block group. | ||
9508 | */ | ||
9509 | spin_unlock(&block_group->lock); | ||
9510 | up_write(&space_info->groups_sem); | ||
9511 | goto next; | ||
9512 | } | ||
9513 | spin_unlock(&block_group->lock); | ||
9514 | |||
9515 | /* We don't want to force the issue, only flip if it's ok. */ | ||
9516 | ret = set_block_group_ro(block_group, 0); | ||
9517 | up_write(&space_info->groups_sem); | ||
9518 | if (ret < 0) { | ||
9519 | ret = 0; | ||
9520 | goto next; | ||
9521 | } | ||
9522 | |||
9523 | /* | ||
9524 | * Want to do this before we do anything else so we can recover | ||
9525 | * properly if we fail to join the transaction. | ||
9526 | */ | ||
9527 | trans = btrfs_join_transaction(root); | ||
9528 | if (IS_ERR(trans)) { | ||
9529 | btrfs_set_block_group_rw(root, block_group); | ||
9530 | ret = PTR_ERR(trans); | ||
9531 | goto next; | ||
9532 | } | ||
9533 | |||
9534 | /* | ||
9535 | * We could have pending pinned extents for this block group, | ||
9536 | * just delete them, we don't care about them anymore. | ||
9537 | */ | ||
9538 | start = block_group->key.objectid; | ||
9539 | end = start + block_group->key.offset - 1; | ||
9540 | clear_extent_bits(&fs_info->freed_extents[0], start, end, | ||
9541 | EXTENT_DIRTY, GFP_NOFS); | ||
9542 | clear_extent_bits(&fs_info->freed_extents[1], start, end, | ||
9543 | EXTENT_DIRTY, GFP_NOFS); | ||
9544 | |||
9545 | /* Reset pinned so btrfs_put_block_group doesn't complain */ | ||
9546 | block_group->pinned = 0; | ||
9547 | |||
9548 | /* | ||
9549 | * Btrfs_remove_chunk will abort the transaction if things go | ||
9550 | * horribly wrong. | ||
9551 | */ | ||
9552 | ret = btrfs_remove_chunk(trans, root, | ||
9553 | block_group->key.objectid); | ||
9554 | btrfs_end_transaction(trans, root); | ||
9555 | next: | ||
9556 | btrfs_put_block_group(block_group); | ||
9557 | spin_lock(&fs_info->unused_bgs_lock); | ||
9558 | } | ||
9559 | spin_unlock(&fs_info->unused_bgs_lock); | ||
9560 | } | ||
9561 | |||
9433 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | 9562 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) |
9434 | { | 9563 | { |
9435 | struct btrfs_space_info *space_info; | 9564 | struct btrfs_space_info *space_info; |
@@ -9561,7 +9690,7 @@ void btrfs_end_nocow_write(struct btrfs_root *root) | |||
9561 | 9690 | ||
9562 | int btrfs_start_nocow_write(struct btrfs_root *root) | 9691 | int btrfs_start_nocow_write(struct btrfs_root *root) |
9563 | { | 9692 | { |
9564 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | 9693 | if (atomic_read(&root->will_be_snapshoted)) |
9565 | return 0; | 9694 | return 0; |
9566 | 9695 | ||
9567 | percpu_counter_inc(&root->subv_writers->counter); | 9696 | percpu_counter_inc(&root->subv_writers->counter); |
@@ -9569,7 +9698,7 @@ int btrfs_start_nocow_write(struct btrfs_root *root) | |||
9569 | * Make sure counter is updated before we check for snapshot creation. | 9698 | * Make sure counter is updated before we check for snapshot creation. |
9570 | */ | 9699 | */ |
9571 | smp_mb(); | 9700 | smp_mb(); |
9572 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | 9701 | if (atomic_read(&root->will_be_snapshoted)) { |
9573 | btrfs_end_nocow_write(root); | 9702 | btrfs_end_nocow_write(root); |
9574 | return 0; | 9703 | return 0; |
9575 | } | 9704 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index af0359dcf337..bf3f424e0013 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -25,6 +25,11 @@ static struct kmem_cache *extent_state_cache; | |||
25 | static struct kmem_cache *extent_buffer_cache; | 25 | static struct kmem_cache *extent_buffer_cache; |
26 | static struct bio_set *btrfs_bioset; | 26 | static struct bio_set *btrfs_bioset; |
27 | 27 | ||
28 | static inline bool extent_state_in_tree(const struct extent_state *state) | ||
29 | { | ||
30 | return !RB_EMPTY_NODE(&state->rb_node); | ||
31 | } | ||
32 | |||
28 | #ifdef CONFIG_BTRFS_DEBUG | 33 | #ifdef CONFIG_BTRFS_DEBUG |
29 | static LIST_HEAD(buffers); | 34 | static LIST_HEAD(buffers); |
30 | static LIST_HEAD(states); | 35 | static LIST_HEAD(states); |
@@ -59,9 +64,9 @@ void btrfs_leak_debug_check(void) | |||
59 | 64 | ||
60 | while (!list_empty(&states)) { | 65 | while (!list_empty(&states)) { |
61 | state = list_entry(states.next, struct extent_state, leak_list); | 66 | state = list_entry(states.next, struct extent_state, leak_list); |
62 | printk(KERN_ERR "BTRFS: state leak: start %llu end %llu " | 67 | pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", |
63 | "state %lu in tree %p refs %d\n", | 68 | state->start, state->end, state->state, |
64 | state->start, state->end, state->state, state->tree, | 69 | extent_state_in_tree(state), |
65 | atomic_read(&state->refs)); | 70 | atomic_read(&state->refs)); |
66 | list_del(&state->leak_list); | 71 | list_del(&state->leak_list); |
67 | kmem_cache_free(extent_state_cache, state); | 72 | kmem_cache_free(extent_state_cache, state); |
@@ -209,7 +214,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
209 | return state; | 214 | return state; |
210 | state->state = 0; | 215 | state->state = 0; |
211 | state->private = 0; | 216 | state->private = 0; |
212 | state->tree = NULL; | 217 | RB_CLEAR_NODE(&state->rb_node); |
213 | btrfs_leak_debug_add(&state->leak_list, &states); | 218 | btrfs_leak_debug_add(&state->leak_list, &states); |
214 | atomic_set(&state->refs, 1); | 219 | atomic_set(&state->refs, 1); |
215 | init_waitqueue_head(&state->wq); | 220 | init_waitqueue_head(&state->wq); |
@@ -222,7 +227,7 @@ void free_extent_state(struct extent_state *state) | |||
222 | if (!state) | 227 | if (!state) |
223 | return; | 228 | return; |
224 | if (atomic_dec_and_test(&state->refs)) { | 229 | if (atomic_dec_and_test(&state->refs)) { |
225 | WARN_ON(state->tree); | 230 | WARN_ON(extent_state_in_tree(state)); |
226 | btrfs_leak_debug_del(&state->leak_list); | 231 | btrfs_leak_debug_del(&state->leak_list); |
227 | trace_free_extent_state(state, _RET_IP_); | 232 | trace_free_extent_state(state, _RET_IP_); |
228 | kmem_cache_free(extent_state_cache, state); | 233 | kmem_cache_free(extent_state_cache, state); |
@@ -371,8 +376,8 @@ static void merge_state(struct extent_io_tree *tree, | |||
371 | other->state == state->state) { | 376 | other->state == state->state) { |
372 | merge_cb(tree, state, other); | 377 | merge_cb(tree, state, other); |
373 | state->start = other->start; | 378 | state->start = other->start; |
374 | other->tree = NULL; | ||
375 | rb_erase(&other->rb_node, &tree->state); | 379 | rb_erase(&other->rb_node, &tree->state); |
380 | RB_CLEAR_NODE(&other->rb_node); | ||
376 | free_extent_state(other); | 381 | free_extent_state(other); |
377 | } | 382 | } |
378 | } | 383 | } |
@@ -383,8 +388,8 @@ static void merge_state(struct extent_io_tree *tree, | |||
383 | other->state == state->state) { | 388 | other->state == state->state) { |
384 | merge_cb(tree, state, other); | 389 | merge_cb(tree, state, other); |
385 | state->end = other->end; | 390 | state->end = other->end; |
386 | other->tree = NULL; | ||
387 | rb_erase(&other->rb_node, &tree->state); | 391 | rb_erase(&other->rb_node, &tree->state); |
392 | RB_CLEAR_NODE(&other->rb_node); | ||
388 | free_extent_state(other); | 393 | free_extent_state(other); |
389 | } | 394 | } |
390 | } | 395 | } |
@@ -442,7 +447,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
442 | found->start, found->end, start, end); | 447 | found->start, found->end, start, end); |
443 | return -EEXIST; | 448 | return -EEXIST; |
444 | } | 449 | } |
445 | state->tree = tree; | ||
446 | merge_state(tree, state); | 450 | merge_state(tree, state); |
447 | return 0; | 451 | return 0; |
448 | } | 452 | } |
@@ -486,7 +490,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
486 | free_extent_state(prealloc); | 490 | free_extent_state(prealloc); |
487 | return -EEXIST; | 491 | return -EEXIST; |
488 | } | 492 | } |
489 | prealloc->tree = tree; | ||
490 | return 0; | 493 | return 0; |
491 | } | 494 | } |
492 | 495 | ||
@@ -524,9 +527,9 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree, | |||
524 | wake_up(&state->wq); | 527 | wake_up(&state->wq); |
525 | if (state->state == 0) { | 528 | if (state->state == 0) { |
526 | next = next_state(state); | 529 | next = next_state(state); |
527 | if (state->tree) { | 530 | if (extent_state_in_tree(state)) { |
528 | rb_erase(&state->rb_node, &tree->state); | 531 | rb_erase(&state->rb_node, &tree->state); |
529 | state->tree = NULL; | 532 | RB_CLEAR_NODE(&state->rb_node); |
530 | free_extent_state(state); | 533 | free_extent_state(state); |
531 | } else { | 534 | } else { |
532 | WARN_ON(1); | 535 | WARN_ON(1); |
@@ -606,8 +609,8 @@ again: | |||
606 | cached_state = NULL; | 609 | cached_state = NULL; |
607 | } | 610 | } |
608 | 611 | ||
609 | if (cached && cached->tree && cached->start <= start && | 612 | if (cached && extent_state_in_tree(cached) && |
610 | cached->end > start) { | 613 | cached->start <= start && cached->end > start) { |
611 | if (clear) | 614 | if (clear) |
612 | atomic_dec(&cached->refs); | 615 | atomic_dec(&cached->refs); |
613 | state = cached; | 616 | state = cached; |
@@ -843,7 +846,7 @@ again: | |||
843 | if (cached_state && *cached_state) { | 846 | if (cached_state && *cached_state) { |
844 | state = *cached_state; | 847 | state = *cached_state; |
845 | if (state->start <= start && state->end > start && | 848 | if (state->start <= start && state->end > start && |
846 | state->tree) { | 849 | extent_state_in_tree(state)) { |
847 | node = &state->rb_node; | 850 | node = &state->rb_node; |
848 | goto hit_next; | 851 | goto hit_next; |
849 | } | 852 | } |
@@ -1069,7 +1072,7 @@ again: | |||
1069 | if (cached_state && *cached_state) { | 1072 | if (cached_state && *cached_state) { |
1070 | state = *cached_state; | 1073 | state = *cached_state; |
1071 | if (state->start <= start && state->end > start && | 1074 | if (state->start <= start && state->end > start && |
1072 | state->tree) { | 1075 | extent_state_in_tree(state)) { |
1073 | node = &state->rb_node; | 1076 | node = &state->rb_node; |
1074 | goto hit_next; | 1077 | goto hit_next; |
1075 | } | 1078 | } |
@@ -1459,7 +1462,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, | |||
1459 | spin_lock(&tree->lock); | 1462 | spin_lock(&tree->lock); |
1460 | if (cached_state && *cached_state) { | 1463 | if (cached_state && *cached_state) { |
1461 | state = *cached_state; | 1464 | state = *cached_state; |
1462 | if (state->end == start - 1 && state->tree) { | 1465 | if (state->end == start - 1 && extent_state_in_tree(state)) { |
1463 | n = rb_next(&state->rb_node); | 1466 | n = rb_next(&state->rb_node); |
1464 | while (n) { | 1467 | while (n) { |
1465 | state = rb_entry(n, struct extent_state, | 1468 | state = rb_entry(n, struct extent_state, |
@@ -1905,7 +1908,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1905 | int bitset = 0; | 1908 | int bitset = 0; |
1906 | 1909 | ||
1907 | spin_lock(&tree->lock); | 1910 | spin_lock(&tree->lock); |
1908 | if (cached && cached->tree && cached->start <= start && | 1911 | if (cached && extent_state_in_tree(cached) && cached->start <= start && |
1909 | cached->end > start) | 1912 | cached->end > start) |
1910 | node = &cached->rb_node; | 1913 | node = &cached->rb_node; |
1911 | else | 1914 | else |
@@ -1959,27 +1962,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1959 | SetPageUptodate(page); | 1962 | SetPageUptodate(page); |
1960 | } | 1963 | } |
1961 | 1964 | ||
1962 | /* | 1965 | int free_io_failure(struct inode *inode, struct io_failure_record *rec) |
1963 | * When IO fails, either with EIO or csum verification fails, we | ||
1964 | * try other mirrors that might have a good copy of the data. This | ||
1965 | * io_failure_record is used to record state as we go through all the | ||
1966 | * mirrors. If another mirror has good data, the page is set up to date | ||
1967 | * and things continue. If a good mirror can't be found, the original | ||
1968 | * bio end_io callback is called to indicate things have failed. | ||
1969 | */ | ||
1970 | struct io_failure_record { | ||
1971 | struct page *page; | ||
1972 | u64 start; | ||
1973 | u64 len; | ||
1974 | u64 logical; | ||
1975 | unsigned long bio_flags; | ||
1976 | int this_mirror; | ||
1977 | int failed_mirror; | ||
1978 | int in_validation; | ||
1979 | }; | ||
1980 | |||
1981 | static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | ||
1982 | int did_repair) | ||
1983 | { | 1966 | { |
1984 | int ret; | 1967 | int ret; |
1985 | int err = 0; | 1968 | int err = 0; |
@@ -2012,10 +1995,10 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | |||
2012 | * currently, there can be no more than two copies of every data bit. thus, | 1995 | * currently, there can be no more than two copies of every data bit. thus, |
2013 | * exactly one rewrite is required. | 1996 | * exactly one rewrite is required. |
2014 | */ | 1997 | */ |
2015 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | 1998 | int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, |
2016 | u64 length, u64 logical, struct page *page, | 1999 | struct page *page, unsigned int pg_offset, int mirror_num) |
2017 | int mirror_num) | ||
2018 | { | 2000 | { |
2001 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
2019 | struct bio *bio; | 2002 | struct bio *bio; |
2020 | struct btrfs_device *dev; | 2003 | struct btrfs_device *dev; |
2021 | u64 map_length = 0; | 2004 | u64 map_length = 0; |
@@ -2053,7 +2036,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
2053 | return -EIO; | 2036 | return -EIO; |
2054 | } | 2037 | } |
2055 | bio->bi_bdev = dev->bdev; | 2038 | bio->bi_bdev = dev->bdev; |
2056 | bio_add_page(bio, page, length, start - page_offset(page)); | 2039 | bio_add_page(bio, page, length, pg_offset); |
2057 | 2040 | ||
2058 | if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { | 2041 | if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { |
2059 | /* try to remap that extent elsewhere? */ | 2042 | /* try to remap that extent elsewhere? */ |
@@ -2063,10 +2046,9 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
2063 | } | 2046 | } |
2064 | 2047 | ||
2065 | printk_ratelimited_in_rcu(KERN_INFO | 2048 | printk_ratelimited_in_rcu(KERN_INFO |
2066 | "BTRFS: read error corrected: ino %lu off %llu " | 2049 | "BTRFS: read error corrected: ino %llu off %llu (dev %s sector %llu)\n", |
2067 | "(dev %s sector %llu)\n", page->mapping->host->i_ino, | 2050 | btrfs_ino(inode), start, |
2068 | start, rcu_str_deref(dev->name), sector); | 2051 | rcu_str_deref(dev->name), sector); |
2069 | |||
2070 | bio_put(bio); | 2052 | bio_put(bio); |
2071 | return 0; | 2053 | return 0; |
2072 | } | 2054 | } |
@@ -2082,9 +2064,11 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | |||
2082 | return -EROFS; | 2064 | return -EROFS; |
2083 | 2065 | ||
2084 | for (i = 0; i < num_pages; i++) { | 2066 | for (i = 0; i < num_pages; i++) { |
2085 | struct page *p = extent_buffer_page(eb, i); | 2067 | struct page *p = eb->pages[i]; |
2086 | ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, | 2068 | |
2087 | start, p, mirror_num); | 2069 | ret = repair_io_failure(root->fs_info->btree_inode, start, |
2070 | PAGE_CACHE_SIZE, start, p, | ||
2071 | start - page_offset(p), mirror_num); | ||
2088 | if (ret) | 2072 | if (ret) |
2089 | break; | 2073 | break; |
2090 | start += PAGE_CACHE_SIZE; | 2074 | start += PAGE_CACHE_SIZE; |
@@ -2097,16 +2081,15 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | |||
2097 | * each time an IO finishes, we do a fast check in the IO failure tree | 2081 | * each time an IO finishes, we do a fast check in the IO failure tree |
2098 | * to see if we need to process or clean up an io_failure_record | 2082 | * to see if we need to process or clean up an io_failure_record |
2099 | */ | 2083 | */ |
2100 | static int clean_io_failure(u64 start, struct page *page) | 2084 | int clean_io_failure(struct inode *inode, u64 start, struct page *page, |
2085 | unsigned int pg_offset) | ||
2101 | { | 2086 | { |
2102 | u64 private; | 2087 | u64 private; |
2103 | u64 private_failure; | 2088 | u64 private_failure; |
2104 | struct io_failure_record *failrec; | 2089 | struct io_failure_record *failrec; |
2105 | struct inode *inode = page->mapping->host; | ||
2106 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | 2090 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; |
2107 | struct extent_state *state; | 2091 | struct extent_state *state; |
2108 | int num_copies; | 2092 | int num_copies; |
2109 | int did_repair = 0; | ||
2110 | int ret; | 2093 | int ret; |
2111 | 2094 | ||
2112 | private = 0; | 2095 | private = 0; |
@@ -2127,7 +2110,6 @@ static int clean_io_failure(u64 start, struct page *page) | |||
2127 | /* there was no real error, just free the record */ | 2110 | /* there was no real error, just free the record */ |
2128 | pr_debug("clean_io_failure: freeing dummy error at %llu\n", | 2111 | pr_debug("clean_io_failure: freeing dummy error at %llu\n", |
2129 | failrec->start); | 2112 | failrec->start); |
2130 | did_repair = 1; | ||
2131 | goto out; | 2113 | goto out; |
2132 | } | 2114 | } |
2133 | if (fs_info->sb->s_flags & MS_RDONLY) | 2115 | if (fs_info->sb->s_flags & MS_RDONLY) |
@@ -2144,55 +2126,70 @@ static int clean_io_failure(u64 start, struct page *page) | |||
2144 | num_copies = btrfs_num_copies(fs_info, failrec->logical, | 2126 | num_copies = btrfs_num_copies(fs_info, failrec->logical, |
2145 | failrec->len); | 2127 | failrec->len); |
2146 | if (num_copies > 1) { | 2128 | if (num_copies > 1) { |
2147 | ret = repair_io_failure(fs_info, start, failrec->len, | 2129 | repair_io_failure(inode, start, failrec->len, |
2148 | failrec->logical, page, | 2130 | failrec->logical, page, |
2149 | failrec->failed_mirror); | 2131 | pg_offset, failrec->failed_mirror); |
2150 | did_repair = !ret; | ||
2151 | } | 2132 | } |
2152 | ret = 0; | ||
2153 | } | 2133 | } |
2154 | 2134 | ||
2155 | out: | 2135 | out: |
2156 | if (!ret) | 2136 | free_io_failure(inode, failrec); |
2157 | ret = free_io_failure(inode, failrec, did_repair); | ||
2158 | 2137 | ||
2159 | return ret; | 2138 | return 0; |
2160 | } | 2139 | } |
2161 | 2140 | ||
2162 | /* | 2141 | /* |
2163 | * this is a generic handler for readpage errors (default | 2142 | * Can be called when |
2164 | * readpage_io_failed_hook). if other copies exist, read those and write back | 2143 | * - hold extent lock |
2165 | * good data to the failed position. does not investigate in remapping the | 2144 | * - under ordered extent |
2166 | * failed extent elsewhere, hoping the device will be smart enough to do this as | 2145 | * - the inode is freeing |
2167 | * needed | ||
2168 | */ | 2146 | */ |
2147 | void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) | ||
2148 | { | ||
2149 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | ||
2150 | struct io_failure_record *failrec; | ||
2151 | struct extent_state *state, *next; | ||
2169 | 2152 | ||
2170 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | 2153 | if (RB_EMPTY_ROOT(&failure_tree->state)) |
2171 | struct page *page, u64 start, u64 end, | 2154 | return; |
2172 | int failed_mirror) | 2155 | |
2156 | spin_lock(&failure_tree->lock); | ||
2157 | state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY); | ||
2158 | while (state) { | ||
2159 | if (state->start > end) | ||
2160 | break; | ||
2161 | |||
2162 | ASSERT(state->end <= end); | ||
2163 | |||
2164 | next = next_state(state); | ||
2165 | |||
2166 | failrec = (struct io_failure_record *)state->private; | ||
2167 | free_extent_state(state); | ||
2168 | kfree(failrec); | ||
2169 | |||
2170 | state = next; | ||
2171 | } | ||
2172 | spin_unlock(&failure_tree->lock); | ||
2173 | } | ||
2174 | |||
2175 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | ||
2176 | struct io_failure_record **failrec_ret) | ||
2173 | { | 2177 | { |
2174 | struct io_failure_record *failrec = NULL; | 2178 | struct io_failure_record *failrec; |
2175 | u64 private; | 2179 | u64 private; |
2176 | struct extent_map *em; | 2180 | struct extent_map *em; |
2177 | struct inode *inode = page->mapping->host; | ||
2178 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 2181 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
2179 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2182 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
2180 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 2183 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
2181 | struct bio *bio; | ||
2182 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2183 | struct btrfs_io_bio *btrfs_bio; | ||
2184 | int num_copies; | ||
2185 | int ret; | 2184 | int ret; |
2186 | int read_mode; | ||
2187 | u64 logical; | 2185 | u64 logical; |
2188 | 2186 | ||
2189 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
2190 | |||
2191 | ret = get_state_private(failure_tree, start, &private); | 2187 | ret = get_state_private(failure_tree, start, &private); |
2192 | if (ret) { | 2188 | if (ret) { |
2193 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); | 2189 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); |
2194 | if (!failrec) | 2190 | if (!failrec) |
2195 | return -ENOMEM; | 2191 | return -ENOMEM; |
2192 | |||
2196 | failrec->start = start; | 2193 | failrec->start = start; |
2197 | failrec->len = end - start + 1; | 2194 | failrec->len = end - start + 1; |
2198 | failrec->this_mirror = 0; | 2195 | failrec->this_mirror = 0; |
@@ -2212,11 +2209,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2212 | em = NULL; | 2209 | em = NULL; |
2213 | } | 2210 | } |
2214 | read_unlock(&em_tree->lock); | 2211 | read_unlock(&em_tree->lock); |
2215 | |||
2216 | if (!em) { | 2212 | if (!em) { |
2217 | kfree(failrec); | 2213 | kfree(failrec); |
2218 | return -EIO; | 2214 | return -EIO; |
2219 | } | 2215 | } |
2216 | |||
2220 | logical = start - em->start; | 2217 | logical = start - em->start; |
2221 | logical = em->block_start + logical; | 2218 | logical = em->block_start + logical; |
2222 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 2219 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
@@ -2225,8 +2222,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2225 | extent_set_compress_type(&failrec->bio_flags, | 2222 | extent_set_compress_type(&failrec->bio_flags, |
2226 | em->compress_type); | 2223 | em->compress_type); |
2227 | } | 2224 | } |
2228 | pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " | 2225 | |
2229 | "len=%llu\n", logical, start, failrec->len); | 2226 | pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", |
2227 | logical, start, failrec->len); | ||
2228 | |||
2230 | failrec->logical = logical; | 2229 | failrec->logical = logical; |
2231 | free_extent_map(em); | 2230 | free_extent_map(em); |
2232 | 2231 | ||
@@ -2246,8 +2245,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2246 | } | 2245 | } |
2247 | } else { | 2246 | } else { |
2248 | failrec = (struct io_failure_record *)(unsigned long)private; | 2247 | failrec = (struct io_failure_record *)(unsigned long)private; |
2249 | pr_debug("bio_readpage_error: (found) logical=%llu, " | 2248 | pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", |
2250 | "start=%llu, len=%llu, validation=%d\n", | ||
2251 | failrec->logical, failrec->start, failrec->len, | 2249 | failrec->logical, failrec->start, failrec->len, |
2252 | failrec->in_validation); | 2250 | failrec->in_validation); |
2253 | /* | 2251 | /* |
@@ -2256,6 +2254,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2256 | * clean_io_failure() clean all those errors at once. | 2254 | * clean_io_failure() clean all those errors at once. |
2257 | */ | 2255 | */ |
2258 | } | 2256 | } |
2257 | |||
2258 | *failrec_ret = failrec; | ||
2259 | |||
2260 | return 0; | ||
2261 | } | ||
2262 | |||
2263 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, | ||
2264 | struct io_failure_record *failrec, int failed_mirror) | ||
2265 | { | ||
2266 | int num_copies; | ||
2267 | |||
2259 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, | 2268 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, |
2260 | failrec->logical, failrec->len); | 2269 | failrec->logical, failrec->len); |
2261 | if (num_copies == 1) { | 2270 | if (num_copies == 1) { |
@@ -2264,10 +2273,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2264 | * all the retry and error correction code that follows. no | 2273 | * all the retry and error correction code that follows. no |
2265 | * matter what the error is, it is very likely to persist. | 2274 | * matter what the error is, it is very likely to persist. |
2266 | */ | 2275 | */ |
2267 | pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", | 2276 | pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2268 | num_copies, failrec->this_mirror, failed_mirror); | 2277 | num_copies, failrec->this_mirror, failed_mirror); |
2269 | free_io_failure(inode, failrec, 0); | 2278 | return 0; |
2270 | return -EIO; | ||
2271 | } | 2279 | } |
2272 | 2280 | ||
2273 | /* | 2281 | /* |
@@ -2287,7 +2295,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2287 | BUG_ON(failrec->in_validation); | 2295 | BUG_ON(failrec->in_validation); |
2288 | failrec->in_validation = 1; | 2296 | failrec->in_validation = 1; |
2289 | failrec->this_mirror = failed_mirror; | 2297 | failrec->this_mirror = failed_mirror; |
2290 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
2291 | } else { | 2298 | } else { |
2292 | /* | 2299 | /* |
2293 | * we're ready to fulfill a) and b) alongside. get a good copy | 2300 | * we're ready to fulfill a) and b) alongside. get a good copy |
@@ -2303,25 +2310,36 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2303 | failrec->this_mirror++; | 2310 | failrec->this_mirror++; |
2304 | if (failrec->this_mirror == failed_mirror) | 2311 | if (failrec->this_mirror == failed_mirror) |
2305 | failrec->this_mirror++; | 2312 | failrec->this_mirror++; |
2306 | read_mode = READ_SYNC; | ||
2307 | } | 2313 | } |
2308 | 2314 | ||
2309 | if (failrec->this_mirror > num_copies) { | 2315 | if (failrec->this_mirror > num_copies) { |
2310 | pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", | 2316 | pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2311 | num_copies, failrec->this_mirror, failed_mirror); | 2317 | num_copies, failrec->this_mirror, failed_mirror); |
2312 | free_io_failure(inode, failrec, 0); | 2318 | return 0; |
2313 | return -EIO; | ||
2314 | } | 2319 | } |
2315 | 2320 | ||
2321 | return 1; | ||
2322 | } | ||
2323 | |||
2324 | |||
2325 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | ||
2326 | struct io_failure_record *failrec, | ||
2327 | struct page *page, int pg_offset, int icsum, | ||
2328 | bio_end_io_t *endio_func, void *data) | ||
2329 | { | ||
2330 | struct bio *bio; | ||
2331 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2332 | struct btrfs_io_bio *btrfs_bio; | ||
2333 | |||
2316 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 2334 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); |
2317 | if (!bio) { | 2335 | if (!bio) |
2318 | free_io_failure(inode, failrec, 0); | 2336 | return NULL; |
2319 | return -EIO; | 2337 | |
2320 | } | 2338 | bio->bi_end_io = endio_func; |
2321 | bio->bi_end_io = failed_bio->bi_end_io; | ||
2322 | bio->bi_iter.bi_sector = failrec->logical >> 9; | 2339 | bio->bi_iter.bi_sector = failrec->logical >> 9; |
2323 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 2340 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
2324 | bio->bi_iter.bi_size = 0; | 2341 | bio->bi_iter.bi_size = 0; |
2342 | bio->bi_private = data; | ||
2325 | 2343 | ||
2326 | btrfs_failed_bio = btrfs_io_bio(failed_bio); | 2344 | btrfs_failed_bio = btrfs_io_bio(failed_bio); |
2327 | if (btrfs_failed_bio->csum) { | 2345 | if (btrfs_failed_bio->csum) { |
@@ -2330,21 +2348,73 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2330 | 2348 | ||
2331 | btrfs_bio = btrfs_io_bio(bio); | 2349 | btrfs_bio = btrfs_io_bio(bio); |
2332 | btrfs_bio->csum = btrfs_bio->csum_inline; | 2350 | btrfs_bio->csum = btrfs_bio->csum_inline; |
2333 | phy_offset >>= inode->i_sb->s_blocksize_bits; | 2351 | icsum *= csum_size; |
2334 | phy_offset *= csum_size; | 2352 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum, |
2335 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, | ||
2336 | csum_size); | 2353 | csum_size); |
2337 | } | 2354 | } |
2338 | 2355 | ||
2339 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 2356 | bio_add_page(bio, page, failrec->len, pg_offset); |
2357 | |||
2358 | return bio; | ||
2359 | } | ||
2360 | |||
2361 | /* | ||
2362 | * this is a generic handler for readpage errors (default | ||
2363 | * readpage_io_failed_hook). if other copies exist, read those and write back | ||
2364 | * good data to the failed position. does not investigate in remapping the | ||
2365 | * failed extent elsewhere, hoping the device will be smart enough to do this as | ||
2366 | * needed | ||
2367 | */ | ||
2368 | |||
2369 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | ||
2370 | struct page *page, u64 start, u64 end, | ||
2371 | int failed_mirror) | ||
2372 | { | ||
2373 | struct io_failure_record *failrec; | ||
2374 | struct inode *inode = page->mapping->host; | ||
2375 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
2376 | struct bio *bio; | ||
2377 | int read_mode; | ||
2378 | int ret; | ||
2379 | |||
2380 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
2381 | |||
2382 | ret = btrfs_get_io_failure_record(inode, start, end, &failrec); | ||
2383 | if (ret) | ||
2384 | return ret; | ||
2385 | |||
2386 | ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror); | ||
2387 | if (!ret) { | ||
2388 | free_io_failure(inode, failrec); | ||
2389 | return -EIO; | ||
2390 | } | ||
2391 | |||
2392 | if (failed_bio->bi_vcnt > 1) | ||
2393 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
2394 | else | ||
2395 | read_mode = READ_SYNC; | ||
2396 | |||
2397 | phy_offset >>= inode->i_sb->s_blocksize_bits; | ||
2398 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | ||
2399 | start - page_offset(page), | ||
2400 | (int)phy_offset, failed_bio->bi_end_io, | ||
2401 | NULL); | ||
2402 | if (!bio) { | ||
2403 | free_io_failure(inode, failrec); | ||
2404 | return -EIO; | ||
2405 | } | ||
2340 | 2406 | ||
2341 | pr_debug("bio_readpage_error: submitting new read[%#x] to " | 2407 | pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", |
2342 | "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, | 2408 | read_mode, failrec->this_mirror, failrec->in_validation); |
2343 | failrec->this_mirror, num_copies, failrec->in_validation); | ||
2344 | 2409 | ||
2345 | ret = tree->ops->submit_bio_hook(inode, read_mode, bio, | 2410 | ret = tree->ops->submit_bio_hook(inode, read_mode, bio, |
2346 | failrec->this_mirror, | 2411 | failrec->this_mirror, |
2347 | failrec->bio_flags, 0); | 2412 | failrec->bio_flags, 0); |
2413 | if (ret) { | ||
2414 | free_io_failure(inode, failrec); | ||
2415 | bio_put(bio); | ||
2416 | } | ||
2417 | |||
2348 | return ret; | 2418 | return ret; |
2349 | } | 2419 | } |
2350 | 2420 | ||
@@ -2469,7 +2539,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2469 | struct inode *inode = page->mapping->host; | 2539 | struct inode *inode = page->mapping->host; |
2470 | 2540 | ||
2471 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " | 2541 | pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " |
2472 | "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err, | 2542 | "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err, |
2473 | io_bio->mirror_num); | 2543 | io_bio->mirror_num); |
2474 | tree = &BTRFS_I(inode)->io_tree; | 2544 | tree = &BTRFS_I(inode)->io_tree; |
2475 | 2545 | ||
@@ -2503,7 +2573,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2503 | if (ret) | 2573 | if (ret) |
2504 | uptodate = 0; | 2574 | uptodate = 0; |
2505 | else | 2575 | else |
2506 | clean_io_failure(start, page); | 2576 | clean_io_failure(inode, start, page, 0); |
2507 | } | 2577 | } |
2508 | 2578 | ||
2509 | if (likely(uptodate)) | 2579 | if (likely(uptodate)) |
@@ -2540,12 +2610,12 @@ readpage_ok: | |||
2540 | if (likely(uptodate)) { | 2610 | if (likely(uptodate)) { |
2541 | loff_t i_size = i_size_read(inode); | 2611 | loff_t i_size = i_size_read(inode); |
2542 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 2612 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
2543 | unsigned offset; | 2613 | unsigned off; |
2544 | 2614 | ||
2545 | /* Zero out the end if this page straddles i_size */ | 2615 | /* Zero out the end if this page straddles i_size */ |
2546 | offset = i_size & (PAGE_CACHE_SIZE-1); | 2616 | off = i_size & (PAGE_CACHE_SIZE-1); |
2547 | if (page->index == end_index && offset) | 2617 | if (page->index == end_index && off) |
2548 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 2618 | zero_user_segment(page, off, PAGE_CACHE_SIZE); |
2549 | SetPageUptodate(page); | 2619 | SetPageUptodate(page); |
2550 | } else { | 2620 | } else { |
2551 | ClearPageUptodate(page); | 2621 | ClearPageUptodate(page); |
@@ -2618,9 +2688,18 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
2618 | 2688 | ||
2619 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) | 2689 | struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask) |
2620 | { | 2690 | { |
2621 | return bio_clone_bioset(bio, gfp_mask, btrfs_bioset); | 2691 | struct btrfs_io_bio *btrfs_bio; |
2622 | } | 2692 | struct bio *new; |
2623 | 2693 | ||
2694 | new = bio_clone_bioset(bio, gfp_mask, btrfs_bioset); | ||
2695 | if (new) { | ||
2696 | btrfs_bio = btrfs_io_bio(new); | ||
2697 | btrfs_bio->csum = NULL; | ||
2698 | btrfs_bio->csum_allocated = NULL; | ||
2699 | btrfs_bio->end_io = NULL; | ||
2700 | } | ||
2701 | return new; | ||
2702 | } | ||
2624 | 2703 | ||
2625 | /* this also allocates from the btrfs_bioset */ | 2704 | /* this also allocates from the btrfs_bioset */ |
2626 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | 2705 | struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) |
@@ -3501,7 +3580,7 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3501 | 3580 | ||
3502 | num_pages = num_extent_pages(eb->start, eb->len); | 3581 | num_pages = num_extent_pages(eb->start, eb->len); |
3503 | for (i = 0; i < num_pages; i++) { | 3582 | for (i = 0; i < num_pages; i++) { |
3504 | struct page *p = extent_buffer_page(eb, i); | 3583 | struct page *p = eb->pages[i]; |
3505 | 3584 | ||
3506 | if (!trylock_page(p)) { | 3585 | if (!trylock_page(p)) { |
3507 | if (!flush) { | 3586 | if (!flush) { |
@@ -3522,6 +3601,68 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) | |||
3522 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); | 3601 | wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); |
3523 | } | 3602 | } |
3524 | 3603 | ||
3604 | static void set_btree_ioerr(struct page *page) | ||
3605 | { | ||
3606 | struct extent_buffer *eb = (struct extent_buffer *)page->private; | ||
3607 | struct btrfs_inode *btree_ino = BTRFS_I(eb->fs_info->btree_inode); | ||
3608 | |||
3609 | SetPageError(page); | ||
3610 | if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) | ||
3611 | return; | ||
3612 | |||
3613 | /* | ||
3614 | * If writeback for a btree extent that doesn't belong to a log tree | ||
3615 | * failed, increment the counter transaction->eb_write_errors. | ||
3616 | * We do this because while the transaction is running and before it's | ||
3617 | * committing (when we call filemap_fdata[write|wait]_range against | ||
3618 | * the btree inode), we might have | ||
3619 | * btree_inode->i_mapping->a_ops->writepages() called by the VM - if it | ||
3620 | * returns an error or an error happens during writeback, when we're | ||
3621 | * committing the transaction we wouldn't know about it, since the pages | ||
3622 | * can be no longer dirty nor marked anymore for writeback (if a | ||
3623 | * subsequent modification to the extent buffer didn't happen before the | ||
3624 | * transaction commit), which makes filemap_fdata[write|wait]_range not | ||
3625 | * able to find the pages tagged with SetPageError at transaction | ||
3626 | * commit time. So if this happens we must abort the transaction, | ||
3627 | * otherwise we commit a super block with btree roots that point to | ||
3628 | * btree nodes/leafs whose content on disk is invalid - either garbage | ||
3629 | * or the content of some node/leaf from a past generation that got | ||
3630 | * cowed or deleted and is no longer valid. | ||
3631 | * | ||
3632 | * Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would | ||
3633 | * not be enough - we need to distinguish between log tree extents vs | ||
3634 | * non-log tree extents, and the next filemap_fdatawait_range() call | ||
3635 | * will catch and clear such errors in the mapping - and that call might | ||
3636 | * be from a log sync and not from a transaction commit. Also, checking | ||
3637 | * for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is | ||
3638 | * not done and would not be reliable - the eb might have been released | ||
3639 | * from memory and reading it back again means that flag would not be | ||
3640 | * set (since it's a runtime flag, not persisted on disk). | ||
3641 | * | ||
3642 | * Using the flags below in the btree inode also makes us achieve the | ||
3643 | * goal of AS_EIO/AS_ENOSPC when writepages() returns success, started | ||
3644 | * writeback for all dirty pages and before filemap_fdatawait_range() | ||
3645 | * is called, the writeback for all dirty pages had already finished | ||
3646 | * with errors - because we were not using AS_EIO/AS_ENOSPC, | ||
3647 | * filemap_fdatawait_range() would return success, as it could not know | ||
3648 | * that writeback errors happened (the pages were no longer tagged for | ||
3649 | * writeback). | ||
3650 | */ | ||
3651 | switch (eb->log_index) { | ||
3652 | case -1: | ||
3653 | set_bit(BTRFS_INODE_BTREE_ERR, &btree_ino->runtime_flags); | ||
3654 | break; | ||
3655 | case 0: | ||
3656 | set_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); | ||
3657 | break; | ||
3658 | case 1: | ||
3659 | set_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); | ||
3660 | break; | ||
3661 | default: | ||
3662 | BUG(); /* unexpected, logic error */ | ||
3663 | } | ||
3664 | } | ||
3665 | |||
3525 | static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | 3666 | static void end_bio_extent_buffer_writepage(struct bio *bio, int err) |
3526 | { | 3667 | { |
3527 | struct bio_vec *bvec; | 3668 | struct bio_vec *bvec; |
@@ -3535,10 +3676,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) | |||
3535 | BUG_ON(!eb); | 3676 | BUG_ON(!eb); |
3536 | done = atomic_dec_and_test(&eb->io_pages); | 3677 | done = atomic_dec_and_test(&eb->io_pages); |
3537 | 3678 | ||
3538 | if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { | 3679 | if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { |
3539 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | ||
3540 | ClearPageUptodate(page); | 3680 | ClearPageUptodate(page); |
3541 | SetPageError(page); | 3681 | set_btree_ioerr(page); |
3542 | } | 3682 | } |
3543 | 3683 | ||
3544 | end_page_writeback(page); | 3684 | end_page_writeback(page); |
@@ -3565,14 +3705,14 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3565 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; | 3705 | int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META; |
3566 | int ret = 0; | 3706 | int ret = 0; |
3567 | 3707 | ||
3568 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3708 | clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); |
3569 | num_pages = num_extent_pages(eb->start, eb->len); | 3709 | num_pages = num_extent_pages(eb->start, eb->len); |
3570 | atomic_set(&eb->io_pages, num_pages); | 3710 | atomic_set(&eb->io_pages, num_pages); |
3571 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) | 3711 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) |
3572 | bio_flags = EXTENT_BIO_TREE_LOG; | 3712 | bio_flags = EXTENT_BIO_TREE_LOG; |
3573 | 3713 | ||
3574 | for (i = 0; i < num_pages; i++) { | 3714 | for (i = 0; i < num_pages; i++) { |
3575 | struct page *p = extent_buffer_page(eb, i); | 3715 | struct page *p = eb->pages[i]; |
3576 | 3716 | ||
3577 | clear_page_dirty_for_io(p); | 3717 | clear_page_dirty_for_io(p); |
3578 | set_page_writeback(p); | 3718 | set_page_writeback(p); |
@@ -3582,8 +3722,8 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3582 | 0, epd->bio_flags, bio_flags); | 3722 | 0, epd->bio_flags, bio_flags); |
3583 | epd->bio_flags = bio_flags; | 3723 | epd->bio_flags = bio_flags; |
3584 | if (ret) { | 3724 | if (ret) { |
3585 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 3725 | set_btree_ioerr(p); |
3586 | SetPageError(p); | 3726 | end_page_writeback(p); |
3587 | if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) | 3727 | if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) |
3588 | end_extent_buffer_writeback(eb); | 3728 | end_extent_buffer_writeback(eb); |
3589 | ret = -EIO; | 3729 | ret = -EIO; |
@@ -3596,7 +3736,8 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3596 | 3736 | ||
3597 | if (unlikely(ret)) { | 3737 | if (unlikely(ret)) { |
3598 | for (; i < num_pages; i++) { | 3738 | for (; i < num_pages; i++) { |
3599 | struct page *p = extent_buffer_page(eb, i); | 3739 | struct page *p = eb->pages[i]; |
3740 | clear_page_dirty_for_io(p); | ||
3600 | unlock_page(p); | 3741 | unlock_page(p); |
3601 | } | 3742 | } |
3602 | } | 3743 | } |
@@ -4166,19 +4307,6 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, | |||
4166 | return NULL; | 4307 | return NULL; |
4167 | } | 4308 | } |
4168 | 4309 | ||
4169 | static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx) | ||
4170 | { | ||
4171 | unsigned long cnt = *((unsigned long *)ctx); | ||
4172 | |||
4173 | cnt++; | ||
4174 | *((unsigned long *)ctx) = cnt; | ||
4175 | |||
4176 | /* Now we're sure that the extent is shared. */ | ||
4177 | if (cnt > 1) | ||
4178 | return 1; | ||
4179 | return 0; | ||
4180 | } | ||
4181 | |||
4182 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 4310 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
4183 | __u64 start, __u64 len, get_extent_t *get_extent) | 4311 | __u64 start, __u64 len, get_extent_t *get_extent) |
4184 | { | 4312 | { |
@@ -4195,6 +4323,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4195 | struct extent_map *em = NULL; | 4323 | struct extent_map *em = NULL; |
4196 | struct extent_state *cached_state = NULL; | 4324 | struct extent_state *cached_state = NULL; |
4197 | struct btrfs_path *path; | 4325 | struct btrfs_path *path; |
4326 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4198 | int end = 0; | 4327 | int end = 0; |
4199 | u64 em_start = 0; | 4328 | u64 em_start = 0; |
4200 | u64 em_len = 0; | 4329 | u64 em_len = 0; |
@@ -4215,8 +4344,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4215 | * lookup the last file extent. We're not using i_size here | 4344 | * lookup the last file extent. We're not using i_size here |
4216 | * because there might be preallocation past i_size | 4345 | * because there might be preallocation past i_size |
4217 | */ | 4346 | */ |
4218 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | 4347 | ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1, |
4219 | path, btrfs_ino(inode), -1, 0); | 4348 | 0); |
4220 | if (ret < 0) { | 4349 | if (ret < 0) { |
4221 | btrfs_free_path(path); | 4350 | btrfs_free_path(path); |
4222 | return ret; | 4351 | return ret; |
@@ -4224,7 +4353,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4224 | WARN_ON(!ret); | 4353 | WARN_ON(!ret); |
4225 | path->slots[0]--; | 4354 | path->slots[0]--; |
4226 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | 4355 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); |
4227 | found_type = btrfs_key_type(&found_key); | 4356 | found_type = found_key.type; |
4228 | 4357 | ||
4229 | /* No extents, but there might be delalloc bits */ | 4358 | /* No extents, but there might be delalloc bits */ |
4230 | if (found_key.objectid != btrfs_ino(inode) || | 4359 | if (found_key.objectid != btrfs_ino(inode) || |
@@ -4309,25 +4438,27 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4309 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { | 4438 | } else if (em->block_start == EXTENT_MAP_DELALLOC) { |
4310 | flags |= (FIEMAP_EXTENT_DELALLOC | | 4439 | flags |= (FIEMAP_EXTENT_DELALLOC | |
4311 | FIEMAP_EXTENT_UNKNOWN); | 4440 | FIEMAP_EXTENT_UNKNOWN); |
4312 | } else { | 4441 | } else if (fieinfo->fi_extents_max) { |
4313 | unsigned long ref_cnt = 0; | 4442 | u64 bytenr = em->block_start - |
4443 | (em->start - em->orig_start); | ||
4314 | 4444 | ||
4315 | disko = em->block_start + offset_in_extent; | 4445 | disko = em->block_start + offset_in_extent; |
4316 | 4446 | ||
4317 | /* | 4447 | /* |
4318 | * As btrfs supports shared space, this information | 4448 | * As btrfs supports shared space, this information |
4319 | * can be exported to userspace tools via | 4449 | * can be exported to userspace tools via |
4320 | * flag FIEMAP_EXTENT_SHARED. | 4450 | * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0 |
4451 | * then we're just getting a count and we can skip the | ||
4452 | * lookup stuff. | ||
4321 | */ | 4453 | */ |
4322 | ret = iterate_inodes_from_logical( | 4454 | ret = btrfs_check_shared(NULL, root->fs_info, |
4323 | em->block_start, | 4455 | root->objectid, |
4324 | BTRFS_I(inode)->root->fs_info, | 4456 | btrfs_ino(inode), bytenr); |
4325 | path, count_ext_ref, &ref_cnt); | 4457 | if (ret < 0) |
4326 | if (ret < 0 && ret != -ENOENT) | ||
4327 | goto out_free; | 4458 | goto out_free; |
4328 | 4459 | if (ret) | |
4329 | if (ref_cnt > 1) | ||
4330 | flags |= FIEMAP_EXTENT_SHARED; | 4460 | flags |= FIEMAP_EXTENT_SHARED; |
4461 | ret = 0; | ||
4331 | } | 4462 | } |
4332 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 4463 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
4333 | flags |= FIEMAP_EXTENT_ENCODED; | 4464 | flags |= FIEMAP_EXTENT_ENCODED; |
@@ -4381,24 +4512,21 @@ int extent_buffer_under_io(struct extent_buffer *eb) | |||
4381 | /* | 4512 | /* |
4382 | * Helper for releasing extent buffer page. | 4513 | * Helper for releasing extent buffer page. |
4383 | */ | 4514 | */ |
4384 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | 4515 | static void btrfs_release_extent_buffer_page(struct extent_buffer *eb) |
4385 | unsigned long start_idx) | ||
4386 | { | 4516 | { |
4387 | unsigned long index; | 4517 | unsigned long index; |
4388 | unsigned long num_pages; | ||
4389 | struct page *page; | 4518 | struct page *page; |
4390 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); | 4519 | int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags); |
4391 | 4520 | ||
4392 | BUG_ON(extent_buffer_under_io(eb)); | 4521 | BUG_ON(extent_buffer_under_io(eb)); |
4393 | 4522 | ||
4394 | num_pages = num_extent_pages(eb->start, eb->len); | 4523 | index = num_extent_pages(eb->start, eb->len); |
4395 | index = start_idx + num_pages; | 4524 | if (index == 0) |
4396 | if (start_idx >= index) | ||
4397 | return; | 4525 | return; |
4398 | 4526 | ||
4399 | do { | 4527 | do { |
4400 | index--; | 4528 | index--; |
4401 | page = extent_buffer_page(eb, index); | 4529 | page = eb->pages[index]; |
4402 | if (page && mapped) { | 4530 | if (page && mapped) { |
4403 | spin_lock(&page->mapping->private_lock); | 4531 | spin_lock(&page->mapping->private_lock); |
4404 | /* | 4532 | /* |
@@ -4429,7 +4557,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4429 | /* One for when we alloced the page */ | 4557 | /* One for when we alloced the page */ |
4430 | page_cache_release(page); | 4558 | page_cache_release(page); |
4431 | } | 4559 | } |
4432 | } while (index != start_idx); | 4560 | } while (index != 0); |
4433 | } | 4561 | } |
4434 | 4562 | ||
4435 | /* | 4563 | /* |
@@ -4437,7 +4565,7 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, | |||
4437 | */ | 4565 | */ |
4438 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) | 4566 | static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) |
4439 | { | 4567 | { |
4440 | btrfs_release_extent_buffer_page(eb, 0); | 4568 | btrfs_release_extent_buffer_page(eb); |
4441 | __free_extent_buffer(eb); | 4569 | __free_extent_buffer(eb); |
4442 | } | 4570 | } |
4443 | 4571 | ||
@@ -4580,7 +4708,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb, | |||
4580 | 4708 | ||
4581 | num_pages = num_extent_pages(eb->start, eb->len); | 4709 | num_pages = num_extent_pages(eb->start, eb->len); |
4582 | for (i = 0; i < num_pages; i++) { | 4710 | for (i = 0; i < num_pages; i++) { |
4583 | struct page *p = extent_buffer_page(eb, i); | 4711 | struct page *p = eb->pages[i]; |
4712 | |||
4584 | if (p != accessed) | 4713 | if (p != accessed) |
4585 | mark_page_accessed(p); | 4714 | mark_page_accessed(p); |
4586 | } | 4715 | } |
@@ -4749,7 +4878,7 @@ again: | |||
4749 | */ | 4878 | */ |
4750 | SetPageChecked(eb->pages[0]); | 4879 | SetPageChecked(eb->pages[0]); |
4751 | for (i = 1; i < num_pages; i++) { | 4880 | for (i = 1; i < num_pages; i++) { |
4752 | p = extent_buffer_page(eb, i); | 4881 | p = eb->pages[i]; |
4753 | ClearPageChecked(p); | 4882 | ClearPageChecked(p); |
4754 | unlock_page(p); | 4883 | unlock_page(p); |
4755 | } | 4884 | } |
@@ -4794,7 +4923,7 @@ static int release_extent_buffer(struct extent_buffer *eb) | |||
4794 | } | 4923 | } |
4795 | 4924 | ||
4796 | /* Should be safe to release our pages at this point */ | 4925 | /* Should be safe to release our pages at this point */ |
4797 | btrfs_release_extent_buffer_page(eb, 0); | 4926 | btrfs_release_extent_buffer_page(eb); |
4798 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); | 4927 | call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); |
4799 | return 1; | 4928 | return 1; |
4800 | } | 4929 | } |
@@ -4860,7 +4989,7 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) | |||
4860 | num_pages = num_extent_pages(eb->start, eb->len); | 4989 | num_pages = num_extent_pages(eb->start, eb->len); |
4861 | 4990 | ||
4862 | for (i = 0; i < num_pages; i++) { | 4991 | for (i = 0; i < num_pages; i++) { |
4863 | page = extent_buffer_page(eb, i); | 4992 | page = eb->pages[i]; |
4864 | if (!PageDirty(page)) | 4993 | if (!PageDirty(page)) |
4865 | continue; | 4994 | continue; |
4866 | 4995 | ||
@@ -4896,7 +5025,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb) | |||
4896 | WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); | 5025 | WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); |
4897 | 5026 | ||
4898 | for (i = 0; i < num_pages; i++) | 5027 | for (i = 0; i < num_pages; i++) |
4899 | set_page_dirty(extent_buffer_page(eb, i)); | 5028 | set_page_dirty(eb->pages[i]); |
4900 | return was_dirty; | 5029 | return was_dirty; |
4901 | } | 5030 | } |
4902 | 5031 | ||
@@ -4909,7 +5038,7 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb) | |||
4909 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 5038 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
4910 | num_pages = num_extent_pages(eb->start, eb->len); | 5039 | num_pages = num_extent_pages(eb->start, eb->len); |
4911 | for (i = 0; i < num_pages; i++) { | 5040 | for (i = 0; i < num_pages; i++) { |
4912 | page = extent_buffer_page(eb, i); | 5041 | page = eb->pages[i]; |
4913 | if (page) | 5042 | if (page) |
4914 | ClearPageUptodate(page); | 5043 | ClearPageUptodate(page); |
4915 | } | 5044 | } |
@@ -4925,7 +5054,7 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb) | |||
4925 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 5054 | set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
4926 | num_pages = num_extent_pages(eb->start, eb->len); | 5055 | num_pages = num_extent_pages(eb->start, eb->len); |
4927 | for (i = 0; i < num_pages; i++) { | 5056 | for (i = 0; i < num_pages; i++) { |
4928 | page = extent_buffer_page(eb, i); | 5057 | page = eb->pages[i]; |
4929 | SetPageUptodate(page); | 5058 | SetPageUptodate(page); |
4930 | } | 5059 | } |
4931 | return 0; | 5060 | return 0; |
@@ -4965,7 +5094,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
4965 | 5094 | ||
4966 | num_pages = num_extent_pages(eb->start, eb->len); | 5095 | num_pages = num_extent_pages(eb->start, eb->len); |
4967 | for (i = start_i; i < num_pages; i++) { | 5096 | for (i = start_i; i < num_pages; i++) { |
4968 | page = extent_buffer_page(eb, i); | 5097 | page = eb->pages[i]; |
4969 | if (wait == WAIT_NONE) { | 5098 | if (wait == WAIT_NONE) { |
4970 | if (!trylock_page(page)) | 5099 | if (!trylock_page(page)) |
4971 | goto unlock_exit; | 5100 | goto unlock_exit; |
@@ -4984,11 +5113,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
4984 | goto unlock_exit; | 5113 | goto unlock_exit; |
4985 | } | 5114 | } |
4986 | 5115 | ||
4987 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 5116 | clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); |
4988 | eb->read_mirror = 0; | 5117 | eb->read_mirror = 0; |
4989 | atomic_set(&eb->io_pages, num_reads); | 5118 | atomic_set(&eb->io_pages, num_reads); |
4990 | for (i = start_i; i < num_pages; i++) { | 5119 | for (i = start_i; i < num_pages; i++) { |
4991 | page = extent_buffer_page(eb, i); | 5120 | page = eb->pages[i]; |
4992 | if (!PageUptodate(page)) { | 5121 | if (!PageUptodate(page)) { |
4993 | ClearPageError(page); | 5122 | ClearPageError(page); |
4994 | err = __extent_read_full_page(tree, page, | 5123 | err = __extent_read_full_page(tree, page, |
@@ -5013,7 +5142,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
5013 | return ret; | 5142 | return ret; |
5014 | 5143 | ||
5015 | for (i = start_i; i < num_pages; i++) { | 5144 | for (i = start_i; i < num_pages; i++) { |
5016 | page = extent_buffer_page(eb, i); | 5145 | page = eb->pages[i]; |
5017 | wait_on_page_locked(page); | 5146 | wait_on_page_locked(page); |
5018 | if (!PageUptodate(page)) | 5147 | if (!PageUptodate(page)) |
5019 | ret = -EIO; | 5148 | ret = -EIO; |
@@ -5024,7 +5153,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
5024 | unlock_exit: | 5153 | unlock_exit: |
5025 | i = start_i; | 5154 | i = start_i; |
5026 | while (locked_pages > 0) { | 5155 | while (locked_pages > 0) { |
5027 | page = extent_buffer_page(eb, i); | 5156 | page = eb->pages[i]; |
5028 | i++; | 5157 | i++; |
5029 | unlock_page(page); | 5158 | unlock_page(page); |
5030 | locked_pages--; | 5159 | locked_pages--; |
@@ -5050,7 +5179,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
5050 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5179 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5051 | 5180 | ||
5052 | while (len > 0) { | 5181 | while (len > 0) { |
5053 | page = extent_buffer_page(eb, i); | 5182 | page = eb->pages[i]; |
5054 | 5183 | ||
5055 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 5184 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
5056 | kaddr = page_address(page); | 5185 | kaddr = page_address(page); |
@@ -5082,7 +5211,7 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, | |||
5082 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5211 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5083 | 5212 | ||
5084 | while (len > 0) { | 5213 | while (len > 0) { |
5085 | page = extent_buffer_page(eb, i); | 5214 | page = eb->pages[i]; |
5086 | 5215 | ||
5087 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 5216 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
5088 | kaddr = page_address(page); | 5217 | kaddr = page_address(page); |
@@ -5131,7 +5260,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
5131 | return -EINVAL; | 5260 | return -EINVAL; |
5132 | } | 5261 | } |
5133 | 5262 | ||
5134 | p = extent_buffer_page(eb, i); | 5263 | p = eb->pages[i]; |
5135 | kaddr = page_address(p); | 5264 | kaddr = page_address(p); |
5136 | *map = kaddr + offset; | 5265 | *map = kaddr + offset; |
5137 | *map_len = PAGE_CACHE_SIZE - offset; | 5266 | *map_len = PAGE_CACHE_SIZE - offset; |
@@ -5157,7 +5286,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
5157 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5286 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5158 | 5287 | ||
5159 | while (len > 0) { | 5288 | while (len > 0) { |
5160 | page = extent_buffer_page(eb, i); | 5289 | page = eb->pages[i]; |
5161 | 5290 | ||
5162 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 5291 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
5163 | 5292 | ||
@@ -5191,7 +5320,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
5191 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5320 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5192 | 5321 | ||
5193 | while (len > 0) { | 5322 | while (len > 0) { |
5194 | page = extent_buffer_page(eb, i); | 5323 | page = eb->pages[i]; |
5195 | WARN_ON(!PageUptodate(page)); | 5324 | WARN_ON(!PageUptodate(page)); |
5196 | 5325 | ||
5197 | cur = min(len, PAGE_CACHE_SIZE - offset); | 5326 | cur = min(len, PAGE_CACHE_SIZE - offset); |
@@ -5221,7 +5350,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
5221 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); | 5350 | offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); |
5222 | 5351 | ||
5223 | while (len > 0) { | 5352 | while (len > 0) { |
5224 | page = extent_buffer_page(eb, i); | 5353 | page = eb->pages[i]; |
5225 | WARN_ON(!PageUptodate(page)); | 5354 | WARN_ON(!PageUptodate(page)); |
5226 | 5355 | ||
5227 | cur = min(len, PAGE_CACHE_SIZE - offset); | 5356 | cur = min(len, PAGE_CACHE_SIZE - offset); |
@@ -5252,7 +5381,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
5252 | (PAGE_CACHE_SIZE - 1); | 5381 | (PAGE_CACHE_SIZE - 1); |
5253 | 5382 | ||
5254 | while (len > 0) { | 5383 | while (len > 0) { |
5255 | page = extent_buffer_page(dst, i); | 5384 | page = dst->pages[i]; |
5256 | WARN_ON(!PageUptodate(page)); | 5385 | WARN_ON(!PageUptodate(page)); |
5257 | 5386 | ||
5258 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 5387 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
@@ -5330,8 +5459,7 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
5330 | cur = min_t(unsigned long, cur, | 5459 | cur = min_t(unsigned long, cur, |
5331 | (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); | 5460 | (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); |
5332 | 5461 | ||
5333 | copy_pages(extent_buffer_page(dst, dst_i), | 5462 | copy_pages(dst->pages[dst_i], dst->pages[src_i], |
5334 | extent_buffer_page(dst, src_i), | ||
5335 | dst_off_in_page, src_off_in_page, cur); | 5463 | dst_off_in_page, src_off_in_page, cur); |
5336 | 5464 | ||
5337 | src_offset += cur; | 5465 | src_offset += cur; |
@@ -5377,8 +5505,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | |||
5377 | 5505 | ||
5378 | cur = min_t(unsigned long, len, src_off_in_page + 1); | 5506 | cur = min_t(unsigned long, len, src_off_in_page + 1); |
5379 | cur = min(cur, dst_off_in_page + 1); | 5507 | cur = min(cur, dst_off_in_page + 1); |
5380 | copy_pages(extent_buffer_page(dst, dst_i), | 5508 | copy_pages(dst->pages[dst_i], dst->pages[src_i], |
5381 | extent_buffer_page(dst, src_i), | ||
5382 | dst_off_in_page - cur + 1, | 5509 | dst_off_in_page - cur + 1, |
5383 | src_off_in_page - cur + 1, cur); | 5510 | src_off_in_page - cur + 1, cur); |
5384 | 5511 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ccc264e7bde1..6d4b938be986 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -11,8 +11,6 @@ | |||
11 | #define EXTENT_NEW (1 << 4) | 11 | #define EXTENT_NEW (1 << 4) |
12 | #define EXTENT_DELALLOC (1 << 5) | 12 | #define EXTENT_DELALLOC (1 << 5) |
13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
14 | #define EXTENT_DEFRAG_DONE (1 << 7) | ||
15 | #define EXTENT_BUFFER_FILLED (1 << 8) | ||
16 | #define EXTENT_BOUNDARY (1 << 9) | 14 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_NODATASUM (1 << 10) | 15 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 16 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
@@ -34,16 +32,16 @@ | |||
34 | 32 | ||
35 | /* these are bit numbers for test/set bit */ | 33 | /* these are bit numbers for test/set bit */ |
36 | #define EXTENT_BUFFER_UPTODATE 0 | 34 | #define EXTENT_BUFFER_UPTODATE 0 |
37 | #define EXTENT_BUFFER_BLOCKING 1 | ||
38 | #define EXTENT_BUFFER_DIRTY 2 | 35 | #define EXTENT_BUFFER_DIRTY 2 |
39 | #define EXTENT_BUFFER_CORRUPT 3 | 36 | #define EXTENT_BUFFER_CORRUPT 3 |
40 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ | 37 | #define EXTENT_BUFFER_READAHEAD 4 /* this got triggered by readahead */ |
41 | #define EXTENT_BUFFER_TREE_REF 5 | 38 | #define EXTENT_BUFFER_TREE_REF 5 |
42 | #define EXTENT_BUFFER_STALE 6 | 39 | #define EXTENT_BUFFER_STALE 6 |
43 | #define EXTENT_BUFFER_WRITEBACK 7 | 40 | #define EXTENT_BUFFER_WRITEBACK 7 |
44 | #define EXTENT_BUFFER_IOERR 8 | 41 | #define EXTENT_BUFFER_READ_ERR 8 /* read IO error */ |
45 | #define EXTENT_BUFFER_DUMMY 9 | 42 | #define EXTENT_BUFFER_DUMMY 9 |
46 | #define EXTENT_BUFFER_IN_TREE 10 | 43 | #define EXTENT_BUFFER_IN_TREE 10 |
44 | #define EXTENT_BUFFER_WRITE_ERR 11 /* write IO error */ | ||
47 | 45 | ||
48 | /* these are flags for extent_clear_unlock_delalloc */ | 46 | /* these are flags for extent_clear_unlock_delalloc */ |
49 | #define PAGE_UNLOCK (1 << 0) | 47 | #define PAGE_UNLOCK (1 << 0) |
@@ -57,7 +55,6 @@ | |||
57 | * map has page->private set to one. | 55 | * map has page->private set to one. |
58 | */ | 56 | */ |
59 | #define EXTENT_PAGE_PRIVATE 1 | 57 | #define EXTENT_PAGE_PRIVATE 1 |
60 | #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 | ||
61 | 58 | ||
62 | struct extent_state; | 59 | struct extent_state; |
63 | struct btrfs_root; | 60 | struct btrfs_root; |
@@ -108,7 +105,6 @@ struct extent_state { | |||
108 | struct rb_node rb_node; | 105 | struct rb_node rb_node; |
109 | 106 | ||
110 | /* ADD NEW ELEMENTS AFTER THIS */ | 107 | /* ADD NEW ELEMENTS AFTER THIS */ |
111 | struct extent_io_tree *tree; | ||
112 | wait_queue_head_t wq; | 108 | wait_queue_head_t wq; |
113 | atomic_t refs; | 109 | atomic_t refs; |
114 | unsigned long state; | 110 | unsigned long state; |
@@ -126,8 +122,6 @@ struct extent_state { | |||
126 | struct extent_buffer { | 122 | struct extent_buffer { |
127 | u64 start; | 123 | u64 start; |
128 | unsigned long len; | 124 | unsigned long len; |
129 | unsigned long map_start; | ||
130 | unsigned long map_len; | ||
131 | unsigned long bflags; | 125 | unsigned long bflags; |
132 | struct btrfs_fs_info *fs_info; | 126 | struct btrfs_fs_info *fs_info; |
133 | spinlock_t refs_lock; | 127 | spinlock_t refs_lock; |
@@ -144,7 +138,9 @@ struct extent_buffer { | |||
144 | atomic_t blocking_readers; | 138 | atomic_t blocking_readers; |
145 | atomic_t spinning_readers; | 139 | atomic_t spinning_readers; |
146 | atomic_t spinning_writers; | 140 | atomic_t spinning_writers; |
147 | int lock_nested; | 141 | short lock_nested; |
142 | /* >= 0 if eb belongs to a log tree, -1 otherwise */ | ||
143 | short log_index; | ||
148 | 144 | ||
149 | /* protects write locks */ | 145 | /* protects write locks */ |
150 | rwlock_t lock; | 146 | rwlock_t lock; |
@@ -286,12 +282,6 @@ static inline unsigned long num_extent_pages(u64 start, u64 len) | |||
286 | (start >> PAGE_CACHE_SHIFT); | 282 | (start >> PAGE_CACHE_SHIFT); |
287 | } | 283 | } |
288 | 284 | ||
289 | static inline struct page *extent_buffer_page(struct extent_buffer *eb, | ||
290 | unsigned long i) | ||
291 | { | ||
292 | return eb->pages[i]; | ||
293 | } | ||
294 | |||
295 | static inline void extent_buffer_get(struct extent_buffer *eb) | 285 | static inline void extent_buffer_get(struct extent_buffer *eb) |
296 | { | 286 | { |
297 | atomic_inc(&eb->refs); | 287 | atomic_inc(&eb->refs); |
@@ -341,18 +331,50 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask); | |||
341 | 331 | ||
342 | struct btrfs_fs_info; | 332 | struct btrfs_fs_info; |
343 | 333 | ||
344 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | 334 | int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, |
345 | u64 length, u64 logical, struct page *page, | 335 | struct page *page, unsigned int pg_offset, |
346 | int mirror_num); | 336 | int mirror_num); |
337 | int clean_io_failure(struct inode *inode, u64 start, struct page *page, | ||
338 | unsigned int pg_offset); | ||
347 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 339 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
348 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | 340 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, |
349 | int mirror_num); | 341 | int mirror_num); |
342 | |||
343 | /* | ||
344 | * When IO fails, either with EIO or csum verification fails, we | ||
345 | * try other mirrors that might have a good copy of the data. This | ||
346 | * io_failure_record is used to record state as we go through all the | ||
347 | * mirrors. If another mirror has good data, the page is set up to date | ||
348 | * and things continue. If a good mirror can't be found, the original | ||
349 | * bio end_io callback is called to indicate things have failed. | ||
350 | */ | ||
351 | struct io_failure_record { | ||
352 | struct page *page; | ||
353 | u64 start; | ||
354 | u64 len; | ||
355 | u64 logical; | ||
356 | unsigned long bio_flags; | ||
357 | int this_mirror; | ||
358 | int failed_mirror; | ||
359 | int in_validation; | ||
360 | }; | ||
361 | |||
362 | void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end); | ||
363 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | ||
364 | struct io_failure_record **failrec_ret); | ||
365 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, | ||
366 | struct io_failure_record *failrec, int fail_mirror); | ||
367 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | ||
368 | struct io_failure_record *failrec, | ||
369 | struct page *page, int pg_offset, int icsum, | ||
370 | bio_end_io_t *endio_func, void *data); | ||
371 | int free_io_failure(struct inode *inode, struct io_failure_record *rec); | ||
350 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 372 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
351 | noinline u64 find_lock_delalloc_range(struct inode *inode, | 373 | noinline u64 find_lock_delalloc_range(struct inode *inode, |
352 | struct extent_io_tree *tree, | 374 | struct extent_io_tree *tree, |
353 | struct page *locked_page, u64 *start, | 375 | struct page *locked_page, u64 *start, |
354 | u64 *end, u64 max_bytes); | 376 | u64 *end, u64 max_bytes); |
377 | #endif | ||
355 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, | 378 | struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, |
356 | u64 start, unsigned long len); | 379 | u64 start, unsigned long len); |
357 | #endif | 380 | #endif |
358 | #endif | ||
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54c84daec9b5..783a94355efd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -55,7 +55,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
55 | return -ENOMEM; | 55 | return -ENOMEM; |
56 | file_key.objectid = objectid; | 56 | file_key.objectid = objectid; |
57 | file_key.offset = pos; | 57 | file_key.offset = pos; |
58 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 58 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
59 | 59 | ||
60 | path->leave_spinning = 1; | 60 | path->leave_spinning = 1; |
61 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 61 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
@@ -100,7 +100,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
100 | 100 | ||
101 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 101 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
102 | file_key.offset = bytenr; | 102 | file_key.offset = bytenr; |
103 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); | 103 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
104 | ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); | 104 | ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); |
105 | if (ret < 0) | 105 | if (ret < 0) |
106 | goto fail; | 106 | goto fail; |
@@ -111,7 +111,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans, | |||
111 | goto fail; | 111 | goto fail; |
112 | path->slots[0]--; | 112 | path->slots[0]--; |
113 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 113 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
114 | if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY) | 114 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY) |
115 | goto fail; | 115 | goto fail; |
116 | 116 | ||
117 | csum_offset = (bytenr - found_key.offset) >> | 117 | csum_offset = (bytenr - found_key.offset) >> |
@@ -148,7 +148,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
148 | 148 | ||
149 | file_key.objectid = objectid; | 149 | file_key.objectid = objectid; |
150 | file_key.offset = offset; | 150 | file_key.offset = offset; |
151 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 151 | file_key.type = BTRFS_EXTENT_DATA_KEY; |
152 | ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); | 152 | ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); |
153 | return ret; | 153 | return ret; |
154 | } | 154 | } |
@@ -299,19 +299,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
299 | } | 299 | } |
300 | 300 | ||
301 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | 301 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, |
302 | struct btrfs_dio_private *dip, struct bio *bio, | 302 | struct bio *bio, u64 offset) |
303 | u64 offset) | ||
304 | { | 303 | { |
305 | int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr; | 304 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); |
306 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | ||
307 | int ret; | ||
308 | |||
309 | len >>= inode->i_sb->s_blocksize_bits; | ||
310 | len *= csum_size; | ||
311 | |||
312 | ret = __btrfs_lookup_bio_sums(root, inode, bio, offset, | ||
313 | (u32 *)(dip->csum + len), 1); | ||
314 | return ret; | ||
315 | } | 305 | } |
316 | 306 | ||
317 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 307 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
@@ -329,8 +319,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | |||
329 | u64 csum_end; | 319 | u64 csum_end; |
330 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | 320 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
331 | 321 | ||
332 | ASSERT(start == ALIGN(start, root->sectorsize) && | 322 | ASSERT(IS_ALIGNED(start, root->sectorsize) && |
333 | (end + 1) == ALIGN(end + 1, root->sectorsize)); | 323 | IS_ALIGNED(end + 1, root->sectorsize)); |
334 | 324 | ||
335 | path = btrfs_alloc_path(); | 325 | path = btrfs_alloc_path(); |
336 | if (!path) | 326 | if (!path) |
@@ -720,7 +710,7 @@ again: | |||
720 | bytenr = sums->bytenr + total_bytes; | 710 | bytenr = sums->bytenr + total_bytes; |
721 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; | 711 | file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; |
722 | file_key.offset = bytenr; | 712 | file_key.offset = bytenr; |
723 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY); | 713 | file_key.type = BTRFS_EXTENT_CSUM_KEY; |
724 | 714 | ||
725 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); | 715 | item = btrfs_lookup_csum(trans, root, path, bytenr, 1); |
726 | if (!IS_ERR(item)) { | 716 | if (!IS_ERR(item)) { |
@@ -790,7 +780,7 @@ again: | |||
790 | csum_offset = (bytenr - found_key.offset) >> | 780 | csum_offset = (bytenr - found_key.offset) >> |
791 | root->fs_info->sb->s_blocksize_bits; | 781 | root->fs_info->sb->s_blocksize_bits; |
792 | 782 | ||
793 | if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY || | 783 | if (found_key.type != BTRFS_EXTENT_CSUM_KEY || |
794 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || | 784 | found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || |
795 | csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { | 785 | csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { |
796 | goto insert; | 786 | goto insert; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ff1cc0399b9a..a18ceabd99a8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -299,7 +299,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
299 | 299 | ||
300 | /* get the inode */ | 300 | /* get the inode */ |
301 | key.objectid = defrag->root; | 301 | key.objectid = defrag->root; |
302 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 302 | key.type = BTRFS_ROOT_ITEM_KEY; |
303 | key.offset = (u64)-1; | 303 | key.offset = (u64)-1; |
304 | 304 | ||
305 | index = srcu_read_lock(&fs_info->subvol_srcu); | 305 | index = srcu_read_lock(&fs_info->subvol_srcu); |
@@ -311,7 +311,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | |||
311 | } | 311 | } |
312 | 312 | ||
313 | key.objectid = defrag->ino; | 313 | key.objectid = defrag->ino; |
314 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 314 | key.type = BTRFS_INODE_ITEM_KEY; |
315 | key.offset = 0; | 315 | key.offset = 0; |
316 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | 316 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); |
317 | if (IS_ERR(inode)) { | 317 | if (IS_ERR(inode)) { |
@@ -452,7 +452,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
452 | if (unlikely(copied == 0)) | 452 | if (unlikely(copied == 0)) |
453 | break; | 453 | break; |
454 | 454 | ||
455 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 455 | if (copied < PAGE_CACHE_SIZE - offset) { |
456 | offset += copied; | 456 | offset += copied; |
457 | } else { | 457 | } else { |
458 | pg++; | 458 | pg++; |
@@ -1481,9 +1481,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1481 | bool force_page_uptodate = false; | 1481 | bool force_page_uptodate = false; |
1482 | bool need_unlock; | 1482 | bool need_unlock; |
1483 | 1483 | ||
1484 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1484 | nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE), |
1485 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1485 | PAGE_CACHE_SIZE / (sizeof(struct page *))); |
1486 | (sizeof(struct page *))); | ||
1487 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); | 1486 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); |
1488 | nrptrs = max(nrptrs, 8); | 1487 | nrptrs = max(nrptrs, 8); |
1489 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 1488 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
@@ -1497,8 +1496,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1497 | size_t write_bytes = min(iov_iter_count(i), | 1496 | size_t write_bytes = min(iov_iter_count(i), |
1498 | nrptrs * (size_t)PAGE_CACHE_SIZE - | 1497 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
1499 | offset); | 1498 | offset); |
1500 | size_t num_pages = (write_bytes + offset + | 1499 | size_t num_pages = DIV_ROUND_UP(write_bytes + offset, |
1501 | PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1500 | PAGE_CACHE_SIZE); |
1502 | size_t reserve_bytes; | 1501 | size_t reserve_bytes; |
1503 | size_t dirty_pages; | 1502 | size_t dirty_pages; |
1504 | size_t copied; | 1503 | size_t copied; |
@@ -1526,9 +1525,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1526 | * our prealloc extent may be smaller than | 1525 | * our prealloc extent may be smaller than |
1527 | * write_bytes, so scale down. | 1526 | * write_bytes, so scale down. |
1528 | */ | 1527 | */ |
1529 | num_pages = (write_bytes + offset + | 1528 | num_pages = DIV_ROUND_UP(write_bytes + offset, |
1530 | PAGE_CACHE_SIZE - 1) >> | 1529 | PAGE_CACHE_SIZE); |
1531 | PAGE_CACHE_SHIFT; | ||
1532 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; | 1530 | reserve_bytes = num_pages << PAGE_CACHE_SHIFT; |
1533 | ret = 0; | 1531 | ret = 0; |
1534 | } else { | 1532 | } else { |
@@ -1590,9 +1588,8 @@ again: | |||
1590 | dirty_pages = 0; | 1588 | dirty_pages = 0; |
1591 | } else { | 1589 | } else { |
1592 | force_page_uptodate = false; | 1590 | force_page_uptodate = false; |
1593 | dirty_pages = (copied + offset + | 1591 | dirty_pages = DIV_ROUND_UP(copied + offset, |
1594 | PAGE_CACHE_SIZE - 1) >> | 1592 | PAGE_CACHE_SIZE); |
1595 | PAGE_CACHE_SHIFT; | ||
1596 | } | 1593 | } |
1597 | 1594 | ||
1598 | /* | 1595 | /* |
@@ -1653,7 +1650,7 @@ again: | |||
1653 | cond_resched(); | 1650 | cond_resched(); |
1654 | 1651 | ||
1655 | balance_dirty_pages_ratelimited(inode->i_mapping); | 1652 | balance_dirty_pages_ratelimited(inode->i_mapping); |
1656 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1653 | if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1) |
1657 | btrfs_btree_balance_dirty(root); | 1654 | btrfs_btree_balance_dirty(root); |
1658 | 1655 | ||
1659 | pos += copied; | 1656 | pos += copied; |
@@ -1795,7 +1792,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1795 | if (sync) | 1792 | if (sync) |
1796 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1793 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1797 | 1794 | ||
1798 | if (unlikely(file->f_flags & O_DIRECT)) { | 1795 | if (file->f_flags & O_DIRECT) { |
1799 | num_written = __btrfs_direct_write(iocb, from, pos); | 1796 | num_written = __btrfs_direct_write(iocb, from, pos); |
1800 | } else { | 1797 | } else { |
1801 | num_written = __btrfs_buffered_write(file, from, pos); | 1798 | num_written = __btrfs_buffered_write(file, from, pos); |
@@ -1852,6 +1849,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1852 | return 0; | 1849 | return 0; |
1853 | } | 1850 | } |
1854 | 1851 | ||
1852 | static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) | ||
1853 | { | ||
1854 | int ret; | ||
1855 | |||
1856 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
1857 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1858 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1859 | &BTRFS_I(inode)->runtime_flags)) | ||
1860 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1861 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
1862 | |||
1863 | return ret; | ||
1864 | } | ||
1865 | |||
1855 | /* | 1866 | /* |
1856 | * fsync call for both files and directories. This logs the inode into | 1867 | * fsync call for both files and directories. This logs the inode into |
1857 | * the tree log instead of forcing full commits whenever possible. | 1868 | * the tree log instead of forcing full commits whenever possible. |
@@ -1881,30 +1892,64 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1881 | * multi-task, and make the performance up. See | 1892 | * multi-task, and make the performance up. See |
1882 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. | 1893 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. |
1883 | */ | 1894 | */ |
1884 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1895 | ret = start_ordered_ops(inode, start, end); |
1885 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1886 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1887 | &BTRFS_I(inode)->runtime_flags)) | ||
1888 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1889 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
1890 | if (ret) | 1896 | if (ret) |
1891 | return ret; | 1897 | return ret; |
1892 | 1898 | ||
1893 | mutex_lock(&inode->i_mutex); | 1899 | mutex_lock(&inode->i_mutex); |
1894 | |||
1895 | /* | ||
1896 | * We flush the dirty pages again to avoid some dirty pages in the | ||
1897 | * range being left. | ||
1898 | */ | ||
1899 | atomic_inc(&root->log_batch); | 1900 | atomic_inc(&root->log_batch); |
1900 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 1901 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
1901 | &BTRFS_I(inode)->runtime_flags); | 1902 | &BTRFS_I(inode)->runtime_flags); |
1903 | /* | ||
1904 | * We might have have had more pages made dirty after calling | ||
1905 | * start_ordered_ops and before acquiring the inode's i_mutex. | ||
1906 | */ | ||
1902 | if (full_sync) { | 1907 | if (full_sync) { |
1908 | /* | ||
1909 | * For a full sync, we need to make sure any ordered operations | ||
1910 | * start and finish before we start logging the inode, so that | ||
1911 | * all extents are persisted and the respective file extent | ||
1912 | * items are in the fs/subvol btree. | ||
1913 | */ | ||
1903 | ret = btrfs_wait_ordered_range(inode, start, end - start + 1); | 1914 | ret = btrfs_wait_ordered_range(inode, start, end - start + 1); |
1904 | if (ret) { | 1915 | } else { |
1905 | mutex_unlock(&inode->i_mutex); | 1916 | /* |
1906 | goto out; | 1917 | * Start any new ordered operations before starting to log the |
1907 | } | 1918 | * inode. We will wait for them to finish in btrfs_sync_log(). |
1919 | * | ||
1920 | * Right before acquiring the inode's mutex, we might have new | ||
1921 | * writes dirtying pages, which won't immediately start the | ||
1922 | * respective ordered operations - that is done through the | ||
1923 | * fill_delalloc callbacks invoked from the writepage and | ||
1924 | * writepages address space operations. So make sure we start | ||
1925 | * all ordered operations before starting to log our inode. Not | ||
1926 | * doing this means that while logging the inode, writeback | ||
1927 | * could start and invoke writepage/writepages, which would call | ||
1928 | * the fill_delalloc callbacks (cow_file_range, | ||
1929 | * submit_compressed_extents). These callbacks add first an | ||
1930 | * extent map to the modified list of extents and then create | ||
1931 | * the respective ordered operation, which means in | ||
1932 | * tree-log.c:btrfs_log_inode() we might capture all existing | ||
1933 | * ordered operations (with btrfs_get_logged_extents()) before | ||
1934 | * the fill_delalloc callback adds its ordered operation, and by | ||
1935 | * the time we visit the modified list of extent maps (with | ||
1936 | * btrfs_log_changed_extents()), we see and process the extent | ||
1937 | * map they created. We then use the extent map to construct a | ||
1938 | * file extent item for logging without waiting for the | ||
1939 | * respective ordered operation to finish - this file extent | ||
1940 | * item points to a disk location that might not have yet been | ||
1941 | * written to, containing random data - so after a crash a log | ||
1942 | * replay will make our inode have file extent items that point | ||
1943 | * to disk locations containing invalid data, as we returned | ||
1944 | * success to userspace without waiting for the respective | ||
1945 | * ordered operation to finish, because it wasn't captured by | ||
1946 | * btrfs_get_logged_extents(). | ||
1947 | */ | ||
1948 | ret = start_ordered_ops(inode, start, end); | ||
1949 | } | ||
1950 | if (ret) { | ||
1951 | mutex_unlock(&inode->i_mutex); | ||
1952 | goto out; | ||
1908 | } | 1953 | } |
1909 | atomic_inc(&root->log_batch); | 1954 | atomic_inc(&root->log_batch); |
1910 | 1955 | ||
@@ -1984,6 +2029,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1984 | */ | 2029 | */ |
1985 | mutex_unlock(&inode->i_mutex); | 2030 | mutex_unlock(&inode->i_mutex); |
1986 | 2031 | ||
2032 | /* | ||
2033 | * If any of the ordered extents had an error, just return it to user | ||
2034 | * space, so that the application knows some writes didn't succeed and | ||
2035 | * can take proper action (retry for e.g.). Blindly committing the | ||
2036 | * transaction in this case, would fool userspace that everything was | ||
2037 | * successful. And we also want to make sure our log doesn't contain | ||
2038 | * file extent items pointing to extents that weren't fully written to - | ||
2039 | * just like in the non fast fsync path, where we check for the ordered | ||
2040 | * operation's error flag before writing to the log tree and return -EIO | ||
2041 | * if any of them had this flag set (btrfs_wait_ordered_range) - | ||
2042 | * therefore we need to check for errors in the ordered operations, | ||
2043 | * which are indicated by ctx.io_err. | ||
2044 | */ | ||
2045 | if (ctx.io_err) { | ||
2046 | btrfs_end_transaction(trans, root); | ||
2047 | ret = ctx.io_err; | ||
2048 | goto out; | ||
2049 | } | ||
2050 | |||
1987 | if (ret != BTRFS_NO_LOG_SYNC) { | 2051 | if (ret != BTRFS_NO_LOG_SYNC) { |
1988 | if (!ret) { | 2052 | if (!ret) { |
1989 | ret = btrfs_sync_log(trans, root, &ctx); | 2053 | ret = btrfs_sync_log(trans, root, &ctx); |
@@ -2621,23 +2685,28 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) | |||
2621 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2685 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2622 | struct extent_map *em = NULL; | 2686 | struct extent_map *em = NULL; |
2623 | struct extent_state *cached_state = NULL; | 2687 | struct extent_state *cached_state = NULL; |
2624 | u64 lockstart = *offset; | 2688 | u64 lockstart; |
2625 | u64 lockend = i_size_read(inode); | 2689 | u64 lockend; |
2626 | u64 start = *offset; | 2690 | u64 start; |
2627 | u64 len = i_size_read(inode); | 2691 | u64 len; |
2628 | int ret = 0; | 2692 | int ret = 0; |
2629 | 2693 | ||
2630 | lockend = max_t(u64, root->sectorsize, lockend); | 2694 | if (inode->i_size == 0) |
2695 | return -ENXIO; | ||
2696 | |||
2697 | /* | ||
2698 | * *offset can be negative, in this case we start finding DATA/HOLE from | ||
2699 | * the very start of the file. | ||
2700 | */ | ||
2701 | start = max_t(loff_t, 0, *offset); | ||
2702 | |||
2703 | lockstart = round_down(start, root->sectorsize); | ||
2704 | lockend = round_up(i_size_read(inode), root->sectorsize); | ||
2631 | if (lockend <= lockstart) | 2705 | if (lockend <= lockstart) |
2632 | lockend = lockstart + root->sectorsize; | 2706 | lockend = lockstart + root->sectorsize; |
2633 | |||
2634 | lockend--; | 2707 | lockend--; |
2635 | len = lockend - lockstart + 1; | 2708 | len = lockend - lockstart + 1; |
2636 | 2709 | ||
2637 | len = max_t(u64, len, root->sectorsize); | ||
2638 | if (inode->i_size == 0) | ||
2639 | return -ENXIO; | ||
2640 | |||
2641 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, | 2710 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, |
2642 | &cached_state); | 2711 | &cached_state); |
2643 | 2712 | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2b0a627cb5f9..33848196550e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -279,8 +279,7 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, | |||
279 | int num_pages; | 279 | int num_pages; |
280 | int check_crcs = 0; | 280 | int check_crcs = 0; |
281 | 281 | ||
282 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 282 | num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); |
283 | PAGE_CACHE_SHIFT; | ||
284 | 283 | ||
285 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) | 284 | if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) |
286 | check_crcs = 1; | 285 | check_crcs = 1; |
@@ -1998,6 +1997,128 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, | |||
1998 | return merged; | 1997 | return merged; |
1999 | } | 1998 | } |
2000 | 1999 | ||
2000 | static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, | ||
2001 | struct btrfs_free_space *info, | ||
2002 | bool update_stat) | ||
2003 | { | ||
2004 | struct btrfs_free_space *bitmap; | ||
2005 | unsigned long i; | ||
2006 | unsigned long j; | ||
2007 | const u64 end = info->offset + info->bytes; | ||
2008 | const u64 bitmap_offset = offset_to_bitmap(ctl, end); | ||
2009 | u64 bytes; | ||
2010 | |||
2011 | bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); | ||
2012 | if (!bitmap) | ||
2013 | return false; | ||
2014 | |||
2015 | i = offset_to_bit(bitmap->offset, ctl->unit, end); | ||
2016 | j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); | ||
2017 | if (j == i) | ||
2018 | return false; | ||
2019 | bytes = (j - i) * ctl->unit; | ||
2020 | info->bytes += bytes; | ||
2021 | |||
2022 | if (update_stat) | ||
2023 | bitmap_clear_bits(ctl, bitmap, end, bytes); | ||
2024 | else | ||
2025 | __bitmap_clear_bits(ctl, bitmap, end, bytes); | ||
2026 | |||
2027 | if (!bitmap->bytes) | ||
2028 | free_bitmap(ctl, bitmap); | ||
2029 | |||
2030 | return true; | ||
2031 | } | ||
2032 | |||
2033 | static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, | ||
2034 | struct btrfs_free_space *info, | ||
2035 | bool update_stat) | ||
2036 | { | ||
2037 | struct btrfs_free_space *bitmap; | ||
2038 | u64 bitmap_offset; | ||
2039 | unsigned long i; | ||
2040 | unsigned long j; | ||
2041 | unsigned long prev_j; | ||
2042 | u64 bytes; | ||
2043 | |||
2044 | bitmap_offset = offset_to_bitmap(ctl, info->offset); | ||
2045 | /* If we're on a boundary, try the previous logical bitmap. */ | ||
2046 | if (bitmap_offset == info->offset) { | ||
2047 | if (info->offset == 0) | ||
2048 | return false; | ||
2049 | bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); | ||
2050 | } | ||
2051 | |||
2052 | bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); | ||
2053 | if (!bitmap) | ||
2054 | return false; | ||
2055 | |||
2056 | i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; | ||
2057 | j = 0; | ||
2058 | prev_j = (unsigned long)-1; | ||
2059 | for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { | ||
2060 | if (j > i) | ||
2061 | break; | ||
2062 | prev_j = j; | ||
2063 | } | ||
2064 | if (prev_j == i) | ||
2065 | return false; | ||
2066 | |||
2067 | if (prev_j == (unsigned long)-1) | ||
2068 | bytes = (i + 1) * ctl->unit; | ||
2069 | else | ||
2070 | bytes = (i - prev_j) * ctl->unit; | ||
2071 | |||
2072 | info->offset -= bytes; | ||
2073 | info->bytes += bytes; | ||
2074 | |||
2075 | if (update_stat) | ||
2076 | bitmap_clear_bits(ctl, bitmap, info->offset, bytes); | ||
2077 | else | ||
2078 | __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); | ||
2079 | |||
2080 | if (!bitmap->bytes) | ||
2081 | free_bitmap(ctl, bitmap); | ||
2082 | |||
2083 | return true; | ||
2084 | } | ||
2085 | |||
2086 | /* | ||
2087 | * We prefer always to allocate from extent entries, both for clustered and | ||
2088 | * non-clustered allocation requests. So when attempting to add a new extent | ||
2089 | * entry, try to see if there's adjacent free space in bitmap entries, and if | ||
2090 | * there is, migrate that space from the bitmaps to the extent. | ||
2091 | * Like this we get better chances of satisfying space allocation requests | ||
2092 | * because we attempt to satisfy them based on a single cache entry, and never | ||
2093 | * on 2 or more entries - even if the entries represent a contiguous free space | ||
2094 | * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry | ||
2095 | * ends). | ||
2096 | */ | ||
2097 | static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, | ||
2098 | struct btrfs_free_space *info, | ||
2099 | bool update_stat) | ||
2100 | { | ||
2101 | /* | ||
2102 | * Only work with disconnected entries, as we can change their offset, | ||
2103 | * and must be extent entries. | ||
2104 | */ | ||
2105 | ASSERT(!info->bitmap); | ||
2106 | ASSERT(RB_EMPTY_NODE(&info->offset_index)); | ||
2107 | |||
2108 | if (ctl->total_bitmaps > 0) { | ||
2109 | bool stole_end; | ||
2110 | bool stole_front = false; | ||
2111 | |||
2112 | stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); | ||
2113 | if (ctl->total_bitmaps > 0) | ||
2114 | stole_front = steal_from_bitmap_to_front(ctl, info, | ||
2115 | update_stat); | ||
2116 | |||
2117 | if (stole_end || stole_front) | ||
2118 | try_merge_free_space(ctl, info, update_stat); | ||
2119 | } | ||
2120 | } | ||
2121 | |||
2001 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | 2122 | int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, |
2002 | u64 offset, u64 bytes) | 2123 | u64 offset, u64 bytes) |
2003 | { | 2124 | { |
@@ -2010,6 +2131,7 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | |||
2010 | 2131 | ||
2011 | info->offset = offset; | 2132 | info->offset = offset; |
2012 | info->bytes = bytes; | 2133 | info->bytes = bytes; |
2134 | RB_CLEAR_NODE(&info->offset_index); | ||
2013 | 2135 | ||
2014 | spin_lock(&ctl->tree_lock); | 2136 | spin_lock(&ctl->tree_lock); |
2015 | 2137 | ||
@@ -2029,6 +2151,14 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, | |||
2029 | goto out; | 2151 | goto out; |
2030 | } | 2152 | } |
2031 | link: | 2153 | link: |
2154 | /* | ||
2155 | * Only steal free space from adjacent bitmaps if we're sure we're not | ||
2156 | * going to add the new free space to existing bitmap entries - because | ||
2157 | * that would mean unnecessary work that would be reverted. Therefore | ||
2158 | * attempt to steal space from bitmaps if we're adding an extent entry. | ||
2159 | */ | ||
2160 | steal_from_bitmap(ctl, info, true); | ||
2161 | |||
2032 | ret = link_free_space(ctl, info); | 2162 | ret = link_free_space(ctl, info); |
2033 | if (ret) | 2163 | if (ret) |
2034 | kmem_cache_free(btrfs_free_space_cachep, info); | 2164 | kmem_cache_free(btrfs_free_space_cachep, info); |
@@ -2205,10 +2335,13 @@ __btrfs_return_cluster_to_free_space( | |||
2205 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2335 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2206 | node = rb_next(&entry->offset_index); | 2336 | node = rb_next(&entry->offset_index); |
2207 | rb_erase(&entry->offset_index, &cluster->root); | 2337 | rb_erase(&entry->offset_index, &cluster->root); |
2338 | RB_CLEAR_NODE(&entry->offset_index); | ||
2208 | 2339 | ||
2209 | bitmap = (entry->bitmap != NULL); | 2340 | bitmap = (entry->bitmap != NULL); |
2210 | if (!bitmap) | 2341 | if (!bitmap) { |
2211 | try_merge_free_space(ctl, entry, false); | 2342 | try_merge_free_space(ctl, entry, false); |
2343 | steal_from_bitmap(ctl, entry, false); | ||
2344 | } | ||
2212 | tree_insert_offset(&ctl->free_space_offset, | 2345 | tree_insert_offset(&ctl->free_space_offset, |
2213 | entry->offset, &entry->offset_index, bitmap); | 2346 | entry->offset, &entry->offset_index, bitmap); |
2214 | } | 2347 | } |
@@ -3033,10 +3166,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, | |||
3033 | { | 3166 | { |
3034 | struct inode *inode = NULL; | 3167 | struct inode *inode = NULL; |
3035 | 3168 | ||
3036 | spin_lock(&root->cache_lock); | 3169 | spin_lock(&root->ino_cache_lock); |
3037 | if (root->cache_inode) | 3170 | if (root->ino_cache_inode) |
3038 | inode = igrab(root->cache_inode); | 3171 | inode = igrab(root->ino_cache_inode); |
3039 | spin_unlock(&root->cache_lock); | 3172 | spin_unlock(&root->ino_cache_lock); |
3040 | if (inode) | 3173 | if (inode) |
3041 | return inode; | 3174 | return inode; |
3042 | 3175 | ||
@@ -3044,10 +3177,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, | |||
3044 | if (IS_ERR(inode)) | 3177 | if (IS_ERR(inode)) |
3045 | return inode; | 3178 | return inode; |
3046 | 3179 | ||
3047 | spin_lock(&root->cache_lock); | 3180 | spin_lock(&root->ino_cache_lock); |
3048 | if (!btrfs_fs_closing(root->fs_info)) | 3181 | if (!btrfs_fs_closing(root->fs_info)) |
3049 | root->cache_inode = igrab(inode); | 3182 | root->ino_cache_inode = igrab(inode); |
3050 | spin_unlock(&root->cache_lock); | 3183 | spin_unlock(&root->ino_cache_lock); |
3051 | 3184 | ||
3052 | return inode; | 3185 | return inode; |
3053 | } | 3186 | } |
@@ -3176,6 +3309,7 @@ again: | |||
3176 | map = NULL; | 3309 | map = NULL; |
3177 | add_new_bitmap(ctl, info, offset); | 3310 | add_new_bitmap(ctl, info, offset); |
3178 | bitmap_info = info; | 3311 | bitmap_info = info; |
3312 | info = NULL; | ||
3179 | } | 3313 | } |
3180 | 3314 | ||
3181 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); | 3315 | bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); |
@@ -3186,6 +3320,8 @@ again: | |||
3186 | if (bytes) | 3320 | if (bytes) |
3187 | goto again; | 3321 | goto again; |
3188 | 3322 | ||
3323 | if (info) | ||
3324 | kmem_cache_free(btrfs_free_space_cachep, info); | ||
3189 | if (map) | 3325 | if (map) |
3190 | kfree(map); | 3326 | kfree(map); |
3191 | return 0; | 3327 | return 0; |
@@ -3260,6 +3396,7 @@ have_info: | |||
3260 | goto have_info; | 3396 | goto have_info; |
3261 | } | 3397 | } |
3262 | 3398 | ||
3399 | ret = 0; | ||
3263 | goto out; | 3400 | goto out; |
3264 | } | 3401 | } |
3265 | 3402 | ||
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 85889aa82c62..64f15bb30a81 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c | |||
@@ -20,10 +20,8 @@ static struct crypto_shash *tfm; | |||
20 | int __init btrfs_hash_init(void) | 20 | int __init btrfs_hash_init(void) |
21 | { | 21 | { |
22 | tfm = crypto_alloc_shash("crc32c", 0, 0); | 22 | tfm = crypto_alloc_shash("crc32c", 0, 0); |
23 | if (IS_ERR(tfm)) | ||
24 | return PTR_ERR(tfm); | ||
25 | 23 | ||
26 | return 0; | 24 | return PTR_ERR_OR_ZERO(tfm); |
27 | } | 25 | } |
28 | 26 | ||
29 | void btrfs_hash_exit(void) | 27 | void btrfs_hash_exit(void) |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 2be38df703c9..8ffa4783cbf4 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -135,7 +135,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, | |||
135 | u32 item_size; | 135 | u32 item_size; |
136 | 136 | ||
137 | key.objectid = inode_objectid; | 137 | key.objectid = inode_objectid; |
138 | btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); | 138 | key.type = BTRFS_INODE_EXTREF_KEY; |
139 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); | 139 | key.offset = btrfs_extref_hash(ref_objectid, name, name_len); |
140 | 140 | ||
141 | path = btrfs_alloc_path(); | 141 | path = btrfs_alloc_path(); |
@@ -209,7 +209,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
209 | 209 | ||
210 | key.objectid = inode_objectid; | 210 | key.objectid = inode_objectid; |
211 | key.offset = ref_objectid; | 211 | key.offset = ref_objectid; |
212 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 212 | key.type = BTRFS_INODE_REF_KEY; |
213 | 213 | ||
214 | path = btrfs_alloc_path(); | 214 | path = btrfs_alloc_path(); |
215 | if (!path) | 215 | if (!path) |
@@ -337,7 +337,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
337 | 337 | ||
338 | key.objectid = inode_objectid; | 338 | key.objectid = inode_objectid; |
339 | key.offset = ref_objectid; | 339 | key.offset = ref_objectid; |
340 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 340 | key.type = BTRFS_INODE_REF_KEY; |
341 | 341 | ||
342 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
343 | if (!path) | 343 | if (!path) |
@@ -400,7 +400,7 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | |||
400 | struct btrfs_key key; | 400 | struct btrfs_key key; |
401 | int ret; | 401 | int ret; |
402 | key.objectid = objectid; | 402 | key.objectid = objectid; |
403 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 403 | key.type = BTRFS_INODE_ITEM_KEY; |
404 | key.offset = 0; | 404 | key.offset = 0; |
405 | 405 | ||
406 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 406 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
@@ -420,13 +420,13 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root | |||
420 | struct btrfs_key found_key; | 420 | struct btrfs_key found_key; |
421 | 421 | ||
422 | ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); | 422 | ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); |
423 | if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && | 423 | if (ret > 0 && location->type == BTRFS_ROOT_ITEM_KEY && |
424 | location->offset == (u64)-1 && path->slots[0] != 0) { | 424 | location->offset == (u64)-1 && path->slots[0] != 0) { |
425 | slot = path->slots[0] - 1; | 425 | slot = path->slots[0] - 1; |
426 | leaf = path->nodes[0]; | 426 | leaf = path->nodes[0]; |
427 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 427 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
428 | if (found_key.objectid == location->objectid && | 428 | if (found_key.objectid == location->objectid && |
429 | btrfs_key_type(&found_key) == btrfs_key_type(location)) { | 429 | found_key.type == location->type) { |
430 | path->slots[0]--; | 430 | path->slots[0]--; |
431 | return 0; | 431 | return 0; |
432 | } | 432 | } |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 888fbe19079f..83d646bd2e4b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -87,7 +87,7 @@ again: | |||
87 | */ | 87 | */ |
88 | btrfs_item_key_to_cpu(leaf, &key, 0); | 88 | btrfs_item_key_to_cpu(leaf, &key, 0); |
89 | btrfs_release_path(path); | 89 | btrfs_release_path(path); |
90 | root->cache_progress = last; | 90 | root->ino_cache_progress = last; |
91 | up_read(&fs_info->commit_root_sem); | 91 | up_read(&fs_info->commit_root_sem); |
92 | schedule_timeout(1); | 92 | schedule_timeout(1); |
93 | goto again; | 93 | goto again; |
@@ -106,7 +106,7 @@ again: | |||
106 | if (last != (u64)-1 && last + 1 != key.objectid) { | 106 | if (last != (u64)-1 && last + 1 != key.objectid) { |
107 | __btrfs_add_free_space(ctl, last + 1, | 107 | __btrfs_add_free_space(ctl, last + 1, |
108 | key.objectid - last - 1); | 108 | key.objectid - last - 1); |
109 | wake_up(&root->cache_wait); | 109 | wake_up(&root->ino_cache_wait); |
110 | } | 110 | } |
111 | 111 | ||
112 | last = key.objectid; | 112 | last = key.objectid; |
@@ -119,14 +119,14 @@ next: | |||
119 | root->highest_objectid - last - 1); | 119 | root->highest_objectid - last - 1); |
120 | } | 120 | } |
121 | 121 | ||
122 | spin_lock(&root->cache_lock); | 122 | spin_lock(&root->ino_cache_lock); |
123 | root->cached = BTRFS_CACHE_FINISHED; | 123 | root->ino_cache_state = BTRFS_CACHE_FINISHED; |
124 | spin_unlock(&root->cache_lock); | 124 | spin_unlock(&root->ino_cache_lock); |
125 | 125 | ||
126 | root->cache_progress = (u64)-1; | 126 | root->ino_cache_progress = (u64)-1; |
127 | btrfs_unpin_free_ino(root); | 127 | btrfs_unpin_free_ino(root); |
128 | out: | 128 | out: |
129 | wake_up(&root->cache_wait); | 129 | wake_up(&root->ino_cache_wait); |
130 | up_read(&fs_info->commit_root_sem); | 130 | up_read(&fs_info->commit_root_sem); |
131 | 131 | ||
132 | btrfs_free_path(path); | 132 | btrfs_free_path(path); |
@@ -144,20 +144,20 @@ static void start_caching(struct btrfs_root *root) | |||
144 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 144 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
145 | return; | 145 | return; |
146 | 146 | ||
147 | spin_lock(&root->cache_lock); | 147 | spin_lock(&root->ino_cache_lock); |
148 | if (root->cached != BTRFS_CACHE_NO) { | 148 | if (root->ino_cache_state != BTRFS_CACHE_NO) { |
149 | spin_unlock(&root->cache_lock); | 149 | spin_unlock(&root->ino_cache_lock); |
150 | return; | 150 | return; |
151 | } | 151 | } |
152 | 152 | ||
153 | root->cached = BTRFS_CACHE_STARTED; | 153 | root->ino_cache_state = BTRFS_CACHE_STARTED; |
154 | spin_unlock(&root->cache_lock); | 154 | spin_unlock(&root->ino_cache_lock); |
155 | 155 | ||
156 | ret = load_free_ino_cache(root->fs_info, root); | 156 | ret = load_free_ino_cache(root->fs_info, root); |
157 | if (ret == 1) { | 157 | if (ret == 1) { |
158 | spin_lock(&root->cache_lock); | 158 | spin_lock(&root->ino_cache_lock); |
159 | root->cached = BTRFS_CACHE_FINISHED; | 159 | root->ino_cache_state = BTRFS_CACHE_FINISHED; |
160 | spin_unlock(&root->cache_lock); | 160 | spin_unlock(&root->ino_cache_lock); |
161 | return; | 161 | return; |
162 | } | 162 | } |
163 | 163 | ||
@@ -196,11 +196,11 @@ again: | |||
196 | 196 | ||
197 | start_caching(root); | 197 | start_caching(root); |
198 | 198 | ||
199 | wait_event(root->cache_wait, | 199 | wait_event(root->ino_cache_wait, |
200 | root->cached == BTRFS_CACHE_FINISHED || | 200 | root->ino_cache_state == BTRFS_CACHE_FINISHED || |
201 | root->free_ino_ctl->free_space > 0); | 201 | root->free_ino_ctl->free_space > 0); |
202 | 202 | ||
203 | if (root->cached == BTRFS_CACHE_FINISHED && | 203 | if (root->ino_cache_state == BTRFS_CACHE_FINISHED && |
204 | root->free_ino_ctl->free_space == 0) | 204 | root->free_ino_ctl->free_space == 0) |
205 | return -ENOSPC; | 205 | return -ENOSPC; |
206 | else | 206 | else |
@@ -214,17 +214,17 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | |||
214 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | 214 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) |
215 | return; | 215 | return; |
216 | again: | 216 | again: |
217 | if (root->cached == BTRFS_CACHE_FINISHED) { | 217 | if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { |
218 | __btrfs_add_free_space(pinned, objectid, 1); | 218 | __btrfs_add_free_space(pinned, objectid, 1); |
219 | } else { | 219 | } else { |
220 | down_write(&root->fs_info->commit_root_sem); | 220 | down_write(&root->fs_info->commit_root_sem); |
221 | spin_lock(&root->cache_lock); | 221 | spin_lock(&root->ino_cache_lock); |
222 | if (root->cached == BTRFS_CACHE_FINISHED) { | 222 | if (root->ino_cache_state == BTRFS_CACHE_FINISHED) { |
223 | spin_unlock(&root->cache_lock); | 223 | spin_unlock(&root->ino_cache_lock); |
224 | up_write(&root->fs_info->commit_root_sem); | 224 | up_write(&root->fs_info->commit_root_sem); |
225 | goto again; | 225 | goto again; |
226 | } | 226 | } |
227 | spin_unlock(&root->cache_lock); | 227 | spin_unlock(&root->ino_cache_lock); |
228 | 228 | ||
229 | start_caching(root); | 229 | start_caching(root); |
230 | 230 | ||
@@ -235,10 +235,10 @@ again: | |||
235 | } | 235 | } |
236 | 236 | ||
237 | /* | 237 | /* |
238 | * When a transaction is committed, we'll move those inode numbers which | 238 | * When a transaction is committed, we'll move those inode numbers which are |
239 | * are smaller than root->cache_progress from pinned tree to free_ino tree, | 239 | * smaller than root->ino_cache_progress from pinned tree to free_ino tree, and |
240 | * and others will just be dropped, because the commit root we were | 240 | * others will just be dropped, because the commit root we were searching has |
241 | * searching has changed. | 241 | * changed. |
242 | * | 242 | * |
243 | * Must be called with root->fs_info->commit_root_sem held | 243 | * Must be called with root->fs_info->commit_root_sem held |
244 | */ | 244 | */ |
@@ -261,10 +261,10 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) | |||
261 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 261 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
262 | BUG_ON(info->bitmap); /* Logic error */ | 262 | BUG_ON(info->bitmap); /* Logic error */ |
263 | 263 | ||
264 | if (info->offset > root->cache_progress) | 264 | if (info->offset > root->ino_cache_progress) |
265 | goto free; | 265 | goto free; |
266 | else if (info->offset + info->bytes > root->cache_progress) | 266 | else if (info->offset + info->bytes > root->ino_cache_progress) |
267 | count = root->cache_progress - info->offset + 1; | 267 | count = root->ino_cache_progress - info->offset + 1; |
268 | else | 268 | else |
269 | count = info->bytes; | 269 | count = info->bytes; |
270 | 270 | ||
@@ -462,13 +462,13 @@ again: | |||
462 | } | 462 | } |
463 | } | 463 | } |
464 | 464 | ||
465 | spin_lock(&root->cache_lock); | 465 | spin_lock(&root->ino_cache_lock); |
466 | if (root->cached != BTRFS_CACHE_FINISHED) { | 466 | if (root->ino_cache_state != BTRFS_CACHE_FINISHED) { |
467 | ret = -1; | 467 | ret = -1; |
468 | spin_unlock(&root->cache_lock); | 468 | spin_unlock(&root->ino_cache_lock); |
469 | goto out_put; | 469 | goto out_put; |
470 | } | 470 | } |
471 | spin_unlock(&root->cache_lock); | 471 | spin_unlock(&root->ino_cache_lock); |
472 | 472 | ||
473 | spin_lock(&ctl->tree_lock); | 473 | spin_lock(&ctl->tree_lock); |
474 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; | 474 | prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 016c403bfe7e..fc9c0439caa3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -153,7 +153,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
153 | 153 | ||
154 | key.objectid = btrfs_ino(inode); | 154 | key.objectid = btrfs_ino(inode); |
155 | key.offset = start; | 155 | key.offset = start; |
156 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 156 | key.type = BTRFS_EXTENT_DATA_KEY; |
157 | 157 | ||
158 | datasize = btrfs_file_extent_calc_inline_size(cur_size); | 158 | datasize = btrfs_file_extent_calc_inline_size(cur_size); |
159 | path->leave_spinning = 1; | 159 | path->leave_spinning = 1; |
@@ -249,8 +249,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, | |||
249 | data_len = compressed_size; | 249 | data_len = compressed_size; |
250 | 250 | ||
251 | if (start > 0 || | 251 | if (start > 0 || |
252 | actual_end >= PAGE_CACHE_SIZE || | 252 | actual_end > PAGE_CACHE_SIZE || |
253 | data_len >= BTRFS_MAX_INLINE_DATA_SIZE(root) || | 253 | data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) || |
254 | (!compressed_size && | 254 | (!compressed_size && |
255 | (actual_end & (root->sectorsize - 1)) == 0) || | 255 | (actual_end & (root->sectorsize - 1)) == 0) || |
256 | end + 1 < isize || | 256 | end + 1 < isize || |
@@ -348,6 +348,23 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
348 | return 0; | 348 | return 0; |
349 | } | 349 | } |
350 | 350 | ||
351 | static inline int inode_need_compress(struct inode *inode) | ||
352 | { | ||
353 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
354 | |||
355 | /* force compress */ | ||
356 | if (btrfs_test_opt(root, FORCE_COMPRESS)) | ||
357 | return 1; | ||
358 | /* bad compression ratios */ | ||
359 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) | ||
360 | return 0; | ||
361 | if (btrfs_test_opt(root, COMPRESS) || | ||
362 | BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || | ||
363 | BTRFS_I(inode)->force_compress) | ||
364 | return 1; | ||
365 | return 0; | ||
366 | } | ||
367 | |||
351 | /* | 368 | /* |
352 | * we create compressed extents in two phases. The first | 369 | * we create compressed extents in two phases. The first |
353 | * phase compresses a range of pages that have already been | 370 | * phase compresses a range of pages that have already been |
@@ -444,10 +461,7 @@ again: | |||
444 | * inode has not been flagged as nocompress. This flag can | 461 | * inode has not been flagged as nocompress. This flag can |
445 | * change at any time if we discover bad compression ratios. | 462 | * change at any time if we discover bad compression ratios. |
446 | */ | 463 | */ |
447 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && | 464 | if (inode_need_compress(inode)) { |
448 | (btrfs_test_opt(root, COMPRESS) || | ||
449 | (BTRFS_I(inode)->force_compress) || | ||
450 | (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { | ||
451 | WARN_ON(pages); | 465 | WARN_ON(pages); |
452 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); | 466 | pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); |
453 | if (!pages) { | 467 | if (!pages) { |
@@ -1094,7 +1108,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1094 | async_cow->locked_page = locked_page; | 1108 | async_cow->locked_page = locked_page; |
1095 | async_cow->start = start; | 1109 | async_cow->start = start; |
1096 | 1110 | ||
1097 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) | 1111 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && |
1112 | !btrfs_test_opt(root, FORCE_COMPRESS)) | ||
1098 | cur_end = end; | 1113 | cur_end = end; |
1099 | else | 1114 | else |
1100 | cur_end = min(end, start + 512 * 1024 - 1); | 1115 | cur_end = min(end, start + 512 * 1024 - 1); |
@@ -1445,6 +1460,26 @@ error: | |||
1445 | return ret; | 1460 | return ret; |
1446 | } | 1461 | } |
1447 | 1462 | ||
1463 | static inline int need_force_cow(struct inode *inode, u64 start, u64 end) | ||
1464 | { | ||
1465 | |||
1466 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
1467 | !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) | ||
1468 | return 0; | ||
1469 | |||
1470 | /* | ||
1471 | * @defrag_bytes is a hint value, no spinlock held here, | ||
1472 | * if is not zero, it means the file is defragging. | ||
1473 | * Force cow if given extent needs to be defragged. | ||
1474 | */ | ||
1475 | if (BTRFS_I(inode)->defrag_bytes && | ||
1476 | test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | ||
1477 | EXTENT_DEFRAG, 0, NULL)) | ||
1478 | return 1; | ||
1479 | |||
1480 | return 0; | ||
1481 | } | ||
1482 | |||
1448 | /* | 1483 | /* |
1449 | * extent_io.c call back to do delayed allocation processing | 1484 | * extent_io.c call back to do delayed allocation processing |
1450 | */ | 1485 | */ |
@@ -1453,17 +1488,15 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1453 | unsigned long *nr_written) | 1488 | unsigned long *nr_written) |
1454 | { | 1489 | { |
1455 | int ret; | 1490 | int ret; |
1456 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1491 | int force_cow = need_force_cow(inode, start, end); |
1457 | 1492 | ||
1458 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) { | 1493 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { |
1459 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1494 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1460 | page_started, 1, nr_written); | 1495 | page_started, 1, nr_written); |
1461 | } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) { | 1496 | } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { |
1462 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1497 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1463 | page_started, 0, nr_written); | 1498 | page_started, 0, nr_written); |
1464 | } else if (!btrfs_test_opt(root, COMPRESS) && | 1499 | } else if (!inode_need_compress(inode)) { |
1465 | !(BTRFS_I(inode)->force_compress) && | ||
1466 | !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) { | ||
1467 | ret = cow_file_range(inode, locked_page, start, end, | 1500 | ret = cow_file_range(inode, locked_page, start, end, |
1468 | page_started, nr_written, 1); | 1501 | page_started, nr_written, 1); |
1469 | } else { | 1502 | } else { |
@@ -1555,6 +1588,8 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1555 | struct extent_state *state, unsigned long *bits) | 1588 | struct extent_state *state, unsigned long *bits) |
1556 | { | 1589 | { |
1557 | 1590 | ||
1591 | if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) | ||
1592 | WARN_ON(1); | ||
1558 | /* | 1593 | /* |
1559 | * set_bit and clear bit hooks normally require _irqsave/restore | 1594 | * set_bit and clear bit hooks normally require _irqsave/restore |
1560 | * but in this case, we are only testing for the DELALLOC | 1595 | * but in this case, we are only testing for the DELALLOC |
@@ -1577,6 +1612,8 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1577 | root->fs_info->delalloc_batch); | 1612 | root->fs_info->delalloc_batch); |
1578 | spin_lock(&BTRFS_I(inode)->lock); | 1613 | spin_lock(&BTRFS_I(inode)->lock); |
1579 | BTRFS_I(inode)->delalloc_bytes += len; | 1614 | BTRFS_I(inode)->delalloc_bytes += len; |
1615 | if (*bits & EXTENT_DEFRAG) | ||
1616 | BTRFS_I(inode)->defrag_bytes += len; | ||
1580 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, | 1617 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
1581 | &BTRFS_I(inode)->runtime_flags)) | 1618 | &BTRFS_I(inode)->runtime_flags)) |
1582 | btrfs_add_delalloc_inodes(root, inode); | 1619 | btrfs_add_delalloc_inodes(root, inode); |
@@ -1591,6 +1628,13 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1591 | struct extent_state *state, | 1628 | struct extent_state *state, |
1592 | unsigned long *bits) | 1629 | unsigned long *bits) |
1593 | { | 1630 | { |
1631 | u64 len = state->end + 1 - state->start; | ||
1632 | |||
1633 | spin_lock(&BTRFS_I(inode)->lock); | ||
1634 | if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) | ||
1635 | BTRFS_I(inode)->defrag_bytes -= len; | ||
1636 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1637 | |||
1594 | /* | 1638 | /* |
1595 | * set_bit and clear bit hooks normally require _irqsave/restore | 1639 | * set_bit and clear bit hooks normally require _irqsave/restore |
1596 | * but in this case, we are only testing for the DELALLOC | 1640 | * but in this case, we are only testing for the DELALLOC |
@@ -1598,7 +1642,6 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1598 | */ | 1642 | */ |
1599 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1643 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1600 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1644 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1601 | u64 len = state->end + 1 - state->start; | ||
1602 | bool do_list = !btrfs_is_free_space_inode(inode); | 1645 | bool do_list = !btrfs_is_free_space_inode(inode); |
1603 | 1646 | ||
1604 | if (*bits & EXTENT_FIRST_DELALLOC) { | 1647 | if (*bits & EXTENT_FIRST_DELALLOC) { |
@@ -2660,6 +2703,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
2660 | goto out; | 2703 | goto out; |
2661 | } | 2704 | } |
2662 | 2705 | ||
2706 | btrfs_free_io_failure_record(inode, ordered_extent->file_offset, | ||
2707 | ordered_extent->file_offset + | ||
2708 | ordered_extent->len - 1); | ||
2709 | |||
2663 | if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { | 2710 | if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { |
2664 | truncated = true; | 2711 | truncated = true; |
2665 | logical_len = ordered_extent->truncated_len; | 2712 | logical_len = ordered_extent->truncated_len; |
@@ -2856,6 +2903,40 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2856 | return 0; | 2903 | return 0; |
2857 | } | 2904 | } |
2858 | 2905 | ||
2906 | static int __readpage_endio_check(struct inode *inode, | ||
2907 | struct btrfs_io_bio *io_bio, | ||
2908 | int icsum, struct page *page, | ||
2909 | int pgoff, u64 start, size_t len) | ||
2910 | { | ||
2911 | char *kaddr; | ||
2912 | u32 csum_expected; | ||
2913 | u32 csum = ~(u32)0; | ||
2914 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
2915 | DEFAULT_RATELIMIT_BURST); | ||
2916 | |||
2917 | csum_expected = *(((u32 *)io_bio->csum) + icsum); | ||
2918 | |||
2919 | kaddr = kmap_atomic(page); | ||
2920 | csum = btrfs_csum_data(kaddr + pgoff, csum, len); | ||
2921 | btrfs_csum_final(csum, (char *)&csum); | ||
2922 | if (csum != csum_expected) | ||
2923 | goto zeroit; | ||
2924 | |||
2925 | kunmap_atomic(kaddr); | ||
2926 | return 0; | ||
2927 | zeroit: | ||
2928 | if (__ratelimit(&_rs)) | ||
2929 | btrfs_info(BTRFS_I(inode)->root->fs_info, | ||
2930 | "csum failed ino %llu off %llu csum %u expected csum %u", | ||
2931 | btrfs_ino(inode), start, csum, csum_expected); | ||
2932 | memset(kaddr + pgoff, 1, len); | ||
2933 | flush_dcache_page(page); | ||
2934 | kunmap_atomic(kaddr); | ||
2935 | if (csum_expected == 0) | ||
2936 | return 0; | ||
2937 | return -EIO; | ||
2938 | } | ||
2939 | |||
2859 | /* | 2940 | /* |
2860 | * when reads are done, we need to check csums to verify the data is correct | 2941 | * when reads are done, we need to check csums to verify the data is correct |
2861 | * if there's a match, we allow the bio to finish. If not, the code in | 2942 | * if there's a match, we allow the bio to finish. If not, the code in |
@@ -2868,20 +2949,15 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
2868 | size_t offset = start - page_offset(page); | 2949 | size_t offset = start - page_offset(page); |
2869 | struct inode *inode = page->mapping->host; | 2950 | struct inode *inode = page->mapping->host; |
2870 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2951 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2871 | char *kaddr; | ||
2872 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2952 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2873 | u32 csum_expected; | ||
2874 | u32 csum = ~(u32)0; | ||
2875 | static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, | ||
2876 | DEFAULT_RATELIMIT_BURST); | ||
2877 | 2953 | ||
2878 | if (PageChecked(page)) { | 2954 | if (PageChecked(page)) { |
2879 | ClearPageChecked(page); | 2955 | ClearPageChecked(page); |
2880 | goto good; | 2956 | return 0; |
2881 | } | 2957 | } |
2882 | 2958 | ||
2883 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) | 2959 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) |
2884 | goto good; | 2960 | return 0; |
2885 | 2961 | ||
2886 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 2962 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
2887 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { | 2963 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
@@ -2891,28 +2967,8 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
2891 | } | 2967 | } |
2892 | 2968 | ||
2893 | phy_offset >>= inode->i_sb->s_blocksize_bits; | 2969 | phy_offset >>= inode->i_sb->s_blocksize_bits; |
2894 | csum_expected = *(((u32 *)io_bio->csum) + phy_offset); | 2970 | return __readpage_endio_check(inode, io_bio, phy_offset, page, offset, |
2895 | 2971 | start, (size_t)(end - start + 1)); | |
2896 | kaddr = kmap_atomic(page); | ||
2897 | csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); | ||
2898 | btrfs_csum_final(csum, (char *)&csum); | ||
2899 | if (csum != csum_expected) | ||
2900 | goto zeroit; | ||
2901 | |||
2902 | kunmap_atomic(kaddr); | ||
2903 | good: | ||
2904 | return 0; | ||
2905 | |||
2906 | zeroit: | ||
2907 | if (__ratelimit(&_rs)) | ||
2908 | btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", | ||
2909 | btrfs_ino(page->mapping->host), start, csum, csum_expected); | ||
2910 | memset(kaddr + offset, 1, end - start + 1); | ||
2911 | flush_dcache_page(page); | ||
2912 | kunmap_atomic(kaddr); | ||
2913 | if (csum_expected == 0) | ||
2914 | return 0; | ||
2915 | return -EIO; | ||
2916 | } | 2972 | } |
2917 | 2973 | ||
2918 | struct delayed_iput { | 2974 | struct delayed_iput { |
@@ -3159,7 +3215,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3159 | path->reada = -1; | 3215 | path->reada = -1; |
3160 | 3216 | ||
3161 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 3217 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
3162 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 3218 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
3163 | key.offset = (u64)-1; | 3219 | key.offset = (u64)-1; |
3164 | 3220 | ||
3165 | while (1) { | 3221 | while (1) { |
@@ -3186,7 +3242,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
3186 | /* make sure the item matches what we want */ | 3242 | /* make sure the item matches what we want */ |
3187 | if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) | 3243 | if (found_key.objectid != BTRFS_ORPHAN_OBJECTID) |
3188 | break; | 3244 | break; |
3189 | if (btrfs_key_type(&found_key) != BTRFS_ORPHAN_ITEM_KEY) | 3245 | if (found_key.type != BTRFS_ORPHAN_ITEM_KEY) |
3190 | break; | 3246 | break; |
3191 | 3247 | ||
3192 | /* release the path since we're done with it */ | 3248 | /* release the path since we're done with it */ |
@@ -3662,7 +3718,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
3662 | * without delay | 3718 | * without delay |
3663 | */ | 3719 | */ |
3664 | if (!btrfs_is_free_space_inode(inode) | 3720 | if (!btrfs_is_free_space_inode(inode) |
3665 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | 3721 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID |
3722 | && !root->fs_info->log_root_recovering) { | ||
3666 | btrfs_update_root_times(trans, root); | 3723 | btrfs_update_root_times(trans, root); |
3667 | 3724 | ||
3668 | ret = btrfs_delayed_update_inode(trans, root, inode); | 3725 | ret = btrfs_delayed_update_inode(trans, root, inode); |
@@ -4085,7 +4142,7 @@ search_again: | |||
4085 | fi = NULL; | 4142 | fi = NULL; |
4086 | leaf = path->nodes[0]; | 4143 | leaf = path->nodes[0]; |
4087 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 4144 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
4088 | found_type = btrfs_key_type(&found_key); | 4145 | found_type = found_key.type; |
4089 | 4146 | ||
4090 | if (found_key.objectid != ino) | 4147 | if (found_key.objectid != ino) |
4091 | break; | 4148 | break; |
@@ -4747,6 +4804,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
4747 | /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ | 4804 | /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ |
4748 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 4805 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
4749 | 4806 | ||
4807 | btrfs_free_io_failure_record(inode, 0, (u64)-1); | ||
4808 | |||
4750 | if (root->fs_info->log_root_recovering) { | 4809 | if (root->fs_info->log_root_recovering) { |
4751 | BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | 4810 | BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
4752 | &BTRFS_I(inode)->runtime_flags)); | 4811 | &BTRFS_I(inode)->runtime_flags)); |
@@ -5331,7 +5390,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5331 | btrfs_get_delayed_items(inode, &ins_list, &del_list); | 5390 | btrfs_get_delayed_items(inode, &ins_list, &del_list); |
5332 | } | 5391 | } |
5333 | 5392 | ||
5334 | btrfs_set_key_type(&key, key_type); | 5393 | key.type = key_type; |
5335 | key.offset = ctx->pos; | 5394 | key.offset = ctx->pos; |
5336 | key.objectid = btrfs_ino(inode); | 5395 | key.objectid = btrfs_ino(inode); |
5337 | 5396 | ||
@@ -5356,7 +5415,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5356 | 5415 | ||
5357 | if (found_key.objectid != key.objectid) | 5416 | if (found_key.objectid != key.objectid) |
5358 | break; | 5417 | break; |
5359 | if (btrfs_key_type(&found_key) != key_type) | 5418 | if (found_key.type != key_type) |
5360 | break; | 5419 | break; |
5361 | if (found_key.offset < ctx->pos) | 5420 | if (found_key.offset < ctx->pos) |
5362 | goto next; | 5421 | goto next; |
@@ -5568,7 +5627,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) | |||
5568 | int ret; | 5627 | int ret; |
5569 | 5628 | ||
5570 | key.objectid = btrfs_ino(inode); | 5629 | key.objectid = btrfs_ino(inode); |
5571 | btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); | 5630 | key.type = BTRFS_DIR_INDEX_KEY; |
5572 | key.offset = (u64)-1; | 5631 | key.offset = (u64)-1; |
5573 | 5632 | ||
5574 | path = btrfs_alloc_path(); | 5633 | path = btrfs_alloc_path(); |
@@ -5600,7 +5659,7 @@ static int btrfs_set_inode_index_count(struct inode *inode) | |||
5600 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 5659 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
5601 | 5660 | ||
5602 | if (found_key.objectid != btrfs_ino(inode) || | 5661 | if (found_key.objectid != btrfs_ino(inode) || |
5603 | btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) { | 5662 | found_key.type != BTRFS_DIR_INDEX_KEY) { |
5604 | BTRFS_I(inode)->index_cnt = 2; | 5663 | BTRFS_I(inode)->index_cnt = 2; |
5605 | goto out; | 5664 | goto out; |
5606 | } | 5665 | } |
@@ -5718,7 +5777,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5718 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); | 5777 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
5719 | 5778 | ||
5720 | key[0].objectid = objectid; | 5779 | key[0].objectid = objectid; |
5721 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 5780 | key[0].type = BTRFS_INODE_ITEM_KEY; |
5722 | key[0].offset = 0; | 5781 | key[0].offset = 0; |
5723 | 5782 | ||
5724 | sizes[0] = sizeof(struct btrfs_inode_item); | 5783 | sizes[0] = sizeof(struct btrfs_inode_item); |
@@ -5731,7 +5790,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5731 | * add more hard links than can fit in the ref item. | 5790 | * add more hard links than can fit in the ref item. |
5732 | */ | 5791 | */ |
5733 | key[1].objectid = objectid; | 5792 | key[1].objectid = objectid; |
5734 | btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); | 5793 | key[1].type = BTRFS_INODE_REF_KEY; |
5735 | key[1].offset = ref_objectid; | 5794 | key[1].offset = ref_objectid; |
5736 | 5795 | ||
5737 | sizes[1] = name_len + sizeof(*ref); | 5796 | sizes[1] = name_len + sizeof(*ref); |
@@ -5740,7 +5799,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
5740 | location = &BTRFS_I(inode)->location; | 5799 | location = &BTRFS_I(inode)->location; |
5741 | location->objectid = objectid; | 5800 | location->objectid = objectid; |
5742 | location->offset = 0; | 5801 | location->offset = 0; |
5743 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | 5802 | location->type = BTRFS_INODE_ITEM_KEY; |
5744 | 5803 | ||
5745 | ret = btrfs_insert_inode_locked(inode); | 5804 | ret = btrfs_insert_inode_locked(inode); |
5746 | if (ret < 0) | 5805 | if (ret < 0) |
@@ -5832,7 +5891,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
5832 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); | 5891 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
5833 | } else { | 5892 | } else { |
5834 | key.objectid = ino; | 5893 | key.objectid = ino; |
5835 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 5894 | key.type = BTRFS_INODE_ITEM_KEY; |
5836 | key.offset = 0; | 5895 | key.offset = 0; |
5837 | } | 5896 | } |
5838 | 5897 | ||
@@ -6191,21 +6250,60 @@ out_fail_inode: | |||
6191 | goto out_fail; | 6250 | goto out_fail; |
6192 | } | 6251 | } |
6193 | 6252 | ||
6253 | /* Find next extent map of a given extent map, caller needs to ensure locks */ | ||
6254 | static struct extent_map *next_extent_map(struct extent_map *em) | ||
6255 | { | ||
6256 | struct rb_node *next; | ||
6257 | |||
6258 | next = rb_next(&em->rb_node); | ||
6259 | if (!next) | ||
6260 | return NULL; | ||
6261 | return container_of(next, struct extent_map, rb_node); | ||
6262 | } | ||
6263 | |||
6264 | static struct extent_map *prev_extent_map(struct extent_map *em) | ||
6265 | { | ||
6266 | struct rb_node *prev; | ||
6267 | |||
6268 | prev = rb_prev(&em->rb_node); | ||
6269 | if (!prev) | ||
6270 | return NULL; | ||
6271 | return container_of(prev, struct extent_map, rb_node); | ||
6272 | } | ||
6273 | |||
6194 | /* helper for btfs_get_extent. Given an existing extent in the tree, | 6274 | /* helper for btfs_get_extent. Given an existing extent in the tree, |
6275 | * the existing extent is the nearest extent to map_start, | ||
6195 | * and an extent that you want to insert, deal with overlap and insert | 6276 | * and an extent that you want to insert, deal with overlap and insert |
6196 | * the new extent into the tree. | 6277 | * the best fitted new extent into the tree. |
6197 | */ | 6278 | */ |
6198 | static int merge_extent_mapping(struct extent_map_tree *em_tree, | 6279 | static int merge_extent_mapping(struct extent_map_tree *em_tree, |
6199 | struct extent_map *existing, | 6280 | struct extent_map *existing, |
6200 | struct extent_map *em, | 6281 | struct extent_map *em, |
6201 | u64 map_start) | 6282 | u64 map_start) |
6202 | { | 6283 | { |
6284 | struct extent_map *prev; | ||
6285 | struct extent_map *next; | ||
6286 | u64 start; | ||
6287 | u64 end; | ||
6203 | u64 start_diff; | 6288 | u64 start_diff; |
6204 | 6289 | ||
6205 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); | 6290 | BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); |
6206 | start_diff = map_start - em->start; | 6291 | |
6207 | em->start = map_start; | 6292 | if (existing->start > map_start) { |
6208 | em->len = existing->start - em->start; | 6293 | next = existing; |
6294 | prev = prev_extent_map(next); | ||
6295 | } else { | ||
6296 | prev = existing; | ||
6297 | next = next_extent_map(prev); | ||
6298 | } | ||
6299 | |||
6300 | start = prev ? extent_map_end(prev) : em->start; | ||
6301 | start = max_t(u64, start, em->start); | ||
6302 | end = next ? next->start : extent_map_end(em); | ||
6303 | end = min_t(u64, end, extent_map_end(em)); | ||
6304 | start_diff = start - em->start; | ||
6305 | em->start = start; | ||
6306 | em->len = end - start; | ||
6209 | if (em->block_start < EXTENT_MAP_LAST_BYTE && | 6307 | if (em->block_start < EXTENT_MAP_LAST_BYTE && |
6210 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 6308 | !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
6211 | em->block_start += start_diff; | 6309 | em->block_start += start_diff; |
@@ -6333,7 +6431,7 @@ again: | |||
6333 | struct btrfs_file_extent_item); | 6431 | struct btrfs_file_extent_item); |
6334 | /* are we inside the extent that was found? */ | 6432 | /* are we inside the extent that was found? */ |
6335 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 6433 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
6336 | found_type = btrfs_key_type(&found_key); | 6434 | found_type = found_key.type; |
6337 | if (found_key.objectid != objectid || | 6435 | if (found_key.objectid != objectid || |
6338 | found_type != BTRFS_EXTENT_DATA_KEY) { | 6436 | found_type != BTRFS_EXTENT_DATA_KEY) { |
6339 | /* | 6437 | /* |
@@ -6482,25 +6580,21 @@ insert: | |||
6482 | 6580 | ||
6483 | ret = 0; | 6581 | ret = 0; |
6484 | 6582 | ||
6485 | existing = lookup_extent_mapping(em_tree, start, len); | 6583 | existing = search_extent_mapping(em_tree, start, len); |
6486 | if (existing && (existing->start > start || | 6584 | /* |
6487 | existing->start + existing->len <= start)) { | 6585 | * existing will always be non-NULL, since there must be |
6586 | * extent causing the -EEXIST. | ||
6587 | */ | ||
6588 | if (start >= extent_map_end(existing) || | ||
6589 | start <= existing->start) { | ||
6590 | /* | ||
6591 | * The existing extent map is the one nearest to | ||
6592 | * the [start, start + len) range which overlaps | ||
6593 | */ | ||
6594 | err = merge_extent_mapping(em_tree, existing, | ||
6595 | em, start); | ||
6488 | free_extent_map(existing); | 6596 | free_extent_map(existing); |
6489 | existing = NULL; | 6597 | if (err) { |
6490 | } | ||
6491 | if (!existing) { | ||
6492 | existing = lookup_extent_mapping(em_tree, em->start, | ||
6493 | em->len); | ||
6494 | if (existing) { | ||
6495 | err = merge_extent_mapping(em_tree, existing, | ||
6496 | em, start); | ||
6497 | free_extent_map(existing); | ||
6498 | if (err) { | ||
6499 | free_extent_map(em); | ||
6500 | em = NULL; | ||
6501 | } | ||
6502 | } else { | ||
6503 | err = -EIO; | ||
6504 | free_extent_map(em); | 6598 | free_extent_map(em); |
6505 | em = NULL; | 6599 | em = NULL; |
6506 | } | 6600 | } |
@@ -7112,8 +7206,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7112 | block_start, len, | 7206 | block_start, len, |
7113 | orig_block_len, | 7207 | orig_block_len, |
7114 | ram_bytes, type); | 7208 | ram_bytes, type); |
7115 | if (IS_ERR(em)) | 7209 | if (IS_ERR(em)) { |
7210 | ret = PTR_ERR(em); | ||
7116 | goto unlock_err; | 7211 | goto unlock_err; |
7212 | } | ||
7117 | } | 7213 | } |
7118 | 7214 | ||
7119 | ret = btrfs_add_ordered_extent_dio(inode, start, | 7215 | ret = btrfs_add_ordered_extent_dio(inode, start, |
@@ -7188,45 +7284,277 @@ unlock_err: | |||
7188 | return ret; | 7284 | return ret; |
7189 | } | 7285 | } |
7190 | 7286 | ||
7191 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 7287 | static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio, |
7288 | int rw, int mirror_num) | ||
7192 | { | 7289 | { |
7193 | struct btrfs_dio_private *dip = bio->bi_private; | ||
7194 | struct bio_vec *bvec; | ||
7195 | struct inode *inode = dip->inode; | ||
7196 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7197 | struct bio *dio_bio; | 7291 | int ret; |
7198 | u32 *csums = (u32 *)dip->csum; | 7292 | |
7293 | BUG_ON(rw & REQ_WRITE); | ||
7294 | |||
7295 | bio_get(bio); | ||
7296 | |||
7297 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, | ||
7298 | BTRFS_WQ_ENDIO_DIO_REPAIR); | ||
7299 | if (ret) | ||
7300 | goto err; | ||
7301 | |||
7302 | ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); | ||
7303 | err: | ||
7304 | bio_put(bio); | ||
7305 | return ret; | ||
7306 | } | ||
7307 | |||
7308 | static int btrfs_check_dio_repairable(struct inode *inode, | ||
7309 | struct bio *failed_bio, | ||
7310 | struct io_failure_record *failrec, | ||
7311 | int failed_mirror) | ||
7312 | { | ||
7313 | int num_copies; | ||
7314 | |||
7315 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, | ||
7316 | failrec->logical, failrec->len); | ||
7317 | if (num_copies == 1) { | ||
7318 | /* | ||
7319 | * we only have a single copy of the data, so don't bother with | ||
7320 | * all the retry and error correction code that follows. no | ||
7321 | * matter what the error is, it is very likely to persist. | ||
7322 | */ | ||
7323 | pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", | ||
7324 | num_copies, failrec->this_mirror, failed_mirror); | ||
7325 | return 0; | ||
7326 | } | ||
7327 | |||
7328 | failrec->failed_mirror = failed_mirror; | ||
7329 | failrec->this_mirror++; | ||
7330 | if (failrec->this_mirror == failed_mirror) | ||
7331 | failrec->this_mirror++; | ||
7332 | |||
7333 | if (failrec->this_mirror > num_copies) { | ||
7334 | pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", | ||
7335 | num_copies, failrec->this_mirror, failed_mirror); | ||
7336 | return 0; | ||
7337 | } | ||
7338 | |||
7339 | return 1; | ||
7340 | } | ||
7341 | |||
7342 | static int dio_read_error(struct inode *inode, struct bio *failed_bio, | ||
7343 | struct page *page, u64 start, u64 end, | ||
7344 | int failed_mirror, bio_end_io_t *repair_endio, | ||
7345 | void *repair_arg) | ||
7346 | { | ||
7347 | struct io_failure_record *failrec; | ||
7348 | struct bio *bio; | ||
7349 | int isector; | ||
7350 | int read_mode; | ||
7351 | int ret; | ||
7352 | |||
7353 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
7354 | |||
7355 | ret = btrfs_get_io_failure_record(inode, start, end, &failrec); | ||
7356 | if (ret) | ||
7357 | return ret; | ||
7358 | |||
7359 | ret = btrfs_check_dio_repairable(inode, failed_bio, failrec, | ||
7360 | failed_mirror); | ||
7361 | if (!ret) { | ||
7362 | free_io_failure(inode, failrec); | ||
7363 | return -EIO; | ||
7364 | } | ||
7365 | |||
7366 | if (failed_bio->bi_vcnt > 1) | ||
7367 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
7368 | else | ||
7369 | read_mode = READ_SYNC; | ||
7370 | |||
7371 | isector = start - btrfs_io_bio(failed_bio)->logical; | ||
7372 | isector >>= inode->i_sb->s_blocksize_bits; | ||
7373 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | ||
7374 | 0, isector, repair_endio, repair_arg); | ||
7375 | if (!bio) { | ||
7376 | free_io_failure(inode, failrec); | ||
7377 | return -EIO; | ||
7378 | } | ||
7379 | |||
7380 | btrfs_debug(BTRFS_I(inode)->root->fs_info, | ||
7381 | "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n", | ||
7382 | read_mode, failrec->this_mirror, failrec->in_validation); | ||
7383 | |||
7384 | ret = submit_dio_repair_bio(inode, bio, read_mode, | ||
7385 | failrec->this_mirror); | ||
7386 | if (ret) { | ||
7387 | free_io_failure(inode, failrec); | ||
7388 | bio_put(bio); | ||
7389 | } | ||
7390 | |||
7391 | return ret; | ||
7392 | } | ||
7393 | |||
7394 | struct btrfs_retry_complete { | ||
7395 | struct completion done; | ||
7396 | struct inode *inode; | ||
7397 | u64 start; | ||
7398 | int uptodate; | ||
7399 | }; | ||
7400 | |||
7401 | static void btrfs_retry_endio_nocsum(struct bio *bio, int err) | ||
7402 | { | ||
7403 | struct btrfs_retry_complete *done = bio->bi_private; | ||
7404 | struct bio_vec *bvec; | ||
7405 | int i; | ||
7406 | |||
7407 | if (err) | ||
7408 | goto end; | ||
7409 | |||
7410 | done->uptodate = 1; | ||
7411 | bio_for_each_segment_all(bvec, bio, i) | ||
7412 | clean_io_failure(done->inode, done->start, bvec->bv_page, 0); | ||
7413 | end: | ||
7414 | complete(&done->done); | ||
7415 | bio_put(bio); | ||
7416 | } | ||
7417 | |||
7418 | static int __btrfs_correct_data_nocsum(struct inode *inode, | ||
7419 | struct btrfs_io_bio *io_bio) | ||
7420 | { | ||
7421 | struct bio_vec *bvec; | ||
7422 | struct btrfs_retry_complete done; | ||
7199 | u64 start; | 7423 | u64 start; |
7200 | int i; | 7424 | int i; |
7425 | int ret; | ||
7426 | |||
7427 | start = io_bio->logical; | ||
7428 | done.inode = inode; | ||
7429 | |||
7430 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | ||
7431 | try_again: | ||
7432 | done.uptodate = 0; | ||
7433 | done.start = start; | ||
7434 | init_completion(&done.done); | ||
7435 | |||
7436 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, | ||
7437 | start + bvec->bv_len - 1, | ||
7438 | io_bio->mirror_num, | ||
7439 | btrfs_retry_endio_nocsum, &done); | ||
7440 | if (ret) | ||
7441 | return ret; | ||
7442 | |||
7443 | wait_for_completion(&done.done); | ||
7444 | |||
7445 | if (!done.uptodate) { | ||
7446 | /* We might have another mirror, so try again */ | ||
7447 | goto try_again; | ||
7448 | } | ||
7449 | |||
7450 | start += bvec->bv_len; | ||
7451 | } | ||
7452 | |||
7453 | return 0; | ||
7454 | } | ||
7455 | |||
7456 | static void btrfs_retry_endio(struct bio *bio, int err) | ||
7457 | { | ||
7458 | struct btrfs_retry_complete *done = bio->bi_private; | ||
7459 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
7460 | struct bio_vec *bvec; | ||
7461 | int uptodate; | ||
7462 | int ret; | ||
7463 | int i; | ||
7464 | |||
7465 | if (err) | ||
7466 | goto end; | ||
7201 | 7467 | ||
7202 | start = dip->logical_offset; | 7468 | uptodate = 1; |
7203 | bio_for_each_segment_all(bvec, bio, i) { | 7469 | bio_for_each_segment_all(bvec, bio, i) { |
7204 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | 7470 | ret = __readpage_endio_check(done->inode, io_bio, i, |
7205 | struct page *page = bvec->bv_page; | 7471 | bvec->bv_page, 0, |
7206 | char *kaddr; | 7472 | done->start, bvec->bv_len); |
7207 | u32 csum = ~(u32)0; | 7473 | if (!ret) |
7208 | unsigned long flags; | 7474 | clean_io_failure(done->inode, done->start, |
7209 | 7475 | bvec->bv_page, 0); | |
7210 | local_irq_save(flags); | 7476 | else |
7211 | kaddr = kmap_atomic(page); | 7477 | uptodate = 0; |
7212 | csum = btrfs_csum_data(kaddr + bvec->bv_offset, | 7478 | } |
7213 | csum, bvec->bv_len); | 7479 | |
7214 | btrfs_csum_final(csum, (char *)&csum); | 7480 | done->uptodate = uptodate; |
7215 | kunmap_atomic(kaddr); | 7481 | end: |
7216 | local_irq_restore(flags); | 7482 | complete(&done->done); |
7217 | 7483 | bio_put(bio); | |
7218 | flush_dcache_page(bvec->bv_page); | 7484 | } |
7219 | if (csum != csums[i]) { | 7485 | |
7220 | btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", | 7486 | static int __btrfs_subio_endio_read(struct inode *inode, |
7221 | btrfs_ino(inode), start, csum, | 7487 | struct btrfs_io_bio *io_bio, int err) |
7222 | csums[i]); | 7488 | { |
7223 | err = -EIO; | 7489 | struct bio_vec *bvec; |
7224 | } | 7490 | struct btrfs_retry_complete done; |
7491 | u64 start; | ||
7492 | u64 offset = 0; | ||
7493 | int i; | ||
7494 | int ret; | ||
7495 | |||
7496 | err = 0; | ||
7497 | start = io_bio->logical; | ||
7498 | done.inode = inode; | ||
7499 | |||
7500 | bio_for_each_segment_all(bvec, &io_bio->bio, i) { | ||
7501 | ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, | ||
7502 | 0, start, bvec->bv_len); | ||
7503 | if (likely(!ret)) | ||
7504 | goto next; | ||
7505 | try_again: | ||
7506 | done.uptodate = 0; | ||
7507 | done.start = start; | ||
7508 | init_completion(&done.done); | ||
7509 | |||
7510 | ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start, | ||
7511 | start + bvec->bv_len - 1, | ||
7512 | io_bio->mirror_num, | ||
7513 | btrfs_retry_endio, &done); | ||
7514 | if (ret) { | ||
7515 | err = ret; | ||
7516 | goto next; | ||
7225 | } | 7517 | } |
7226 | 7518 | ||
7519 | wait_for_completion(&done.done); | ||
7520 | |||
7521 | if (!done.uptodate) { | ||
7522 | /* We might have another mirror, so try again */ | ||
7523 | goto try_again; | ||
7524 | } | ||
7525 | next: | ||
7526 | offset += bvec->bv_len; | ||
7227 | start += bvec->bv_len; | 7527 | start += bvec->bv_len; |
7228 | } | 7528 | } |
7229 | 7529 | ||
7530 | return err; | ||
7531 | } | ||
7532 | |||
7533 | static int btrfs_subio_endio_read(struct inode *inode, | ||
7534 | struct btrfs_io_bio *io_bio, int err) | ||
7535 | { | ||
7536 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
7537 | |||
7538 | if (skip_csum) { | ||
7539 | if (unlikely(err)) | ||
7540 | return __btrfs_correct_data_nocsum(inode, io_bio); | ||
7541 | else | ||
7542 | return 0; | ||
7543 | } else { | ||
7544 | return __btrfs_subio_endio_read(inode, io_bio, err); | ||
7545 | } | ||
7546 | } | ||
7547 | |||
7548 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
7549 | { | ||
7550 | struct btrfs_dio_private *dip = bio->bi_private; | ||
7551 | struct inode *inode = dip->inode; | ||
7552 | struct bio *dio_bio; | ||
7553 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
7554 | |||
7555 | if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) | ||
7556 | err = btrfs_subio_endio_read(inode, io_bio, err); | ||
7557 | |||
7230 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | 7558 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, |
7231 | dip->logical_offset + dip->bytes - 1); | 7559 | dip->logical_offset + dip->bytes - 1); |
7232 | dio_bio = dip->dio_bio; | 7560 | dio_bio = dip->dio_bio; |
@@ -7237,6 +7565,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
7237 | if (err) | 7565 | if (err) |
7238 | clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); | 7566 | clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); |
7239 | dio_end_io(dio_bio, err); | 7567 | dio_end_io(dio_bio, err); |
7568 | |||
7569 | if (io_bio->end_io) | ||
7570 | io_bio->end_io(io_bio, err); | ||
7240 | bio_put(bio); | 7571 | bio_put(bio); |
7241 | } | 7572 | } |
7242 | 7573 | ||
@@ -7302,12 +7633,17 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) | |||
7302 | { | 7633 | { |
7303 | struct btrfs_dio_private *dip = bio->bi_private; | 7634 | struct btrfs_dio_private *dip = bio->bi_private; |
7304 | 7635 | ||
7636 | if (err) | ||
7637 | btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, | ||
7638 | "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", | ||
7639 | btrfs_ino(dip->inode), bio->bi_rw, | ||
7640 | (unsigned long long)bio->bi_iter.bi_sector, | ||
7641 | bio->bi_iter.bi_size, err); | ||
7642 | |||
7643 | if (dip->subio_endio) | ||
7644 | err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err); | ||
7645 | |||
7305 | if (err) { | 7646 | if (err) { |
7306 | btrfs_err(BTRFS_I(dip->inode)->root->fs_info, | ||
7307 | "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", | ||
7308 | btrfs_ino(dip->inode), bio->bi_rw, | ||
7309 | (unsigned long long)bio->bi_iter.bi_sector, | ||
7310 | bio->bi_iter.bi_size, err); | ||
7311 | dip->errors = 1; | 7647 | dip->errors = 1; |
7312 | 7648 | ||
7313 | /* | 7649 | /* |
@@ -7338,6 +7674,38 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | |||
7338 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | 7674 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); |
7339 | } | 7675 | } |
7340 | 7676 | ||
7677 | static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root, | ||
7678 | struct inode *inode, | ||
7679 | struct btrfs_dio_private *dip, | ||
7680 | struct bio *bio, | ||
7681 | u64 file_offset) | ||
7682 | { | ||
7683 | struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); | ||
7684 | struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); | ||
7685 | int ret; | ||
7686 | |||
7687 | /* | ||
7688 | * We load all the csum data we need when we submit | ||
7689 | * the first bio to reduce the csum tree search and | ||
7690 | * contention. | ||
7691 | */ | ||
7692 | if (dip->logical_offset == file_offset) { | ||
7693 | ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio, | ||
7694 | file_offset); | ||
7695 | if (ret) | ||
7696 | return ret; | ||
7697 | } | ||
7698 | |||
7699 | if (bio == dip->orig_bio) | ||
7700 | return 0; | ||
7701 | |||
7702 | file_offset -= dip->logical_offset; | ||
7703 | file_offset >>= inode->i_sb->s_blocksize_bits; | ||
7704 | io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); | ||
7705 | |||
7706 | return 0; | ||
7707 | } | ||
7708 | |||
7341 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | 7709 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, |
7342 | int rw, u64 file_offset, int skip_sum, | 7710 | int rw, u64 file_offset, int skip_sum, |
7343 | int async_submit) | 7711 | int async_submit) |
@@ -7353,7 +7721,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
7353 | bio_get(bio); | 7721 | bio_get(bio); |
7354 | 7722 | ||
7355 | if (!write) { | 7723 | if (!write) { |
7356 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 7724 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, |
7725 | BTRFS_WQ_ENDIO_DATA); | ||
7357 | if (ret) | 7726 | if (ret) |
7358 | goto err; | 7727 | goto err; |
7359 | } | 7728 | } |
@@ -7376,13 +7745,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
7376 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); | 7745 | ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1); |
7377 | if (ret) | 7746 | if (ret) |
7378 | goto err; | 7747 | goto err; |
7379 | } else if (!skip_sum) { | 7748 | } else { |
7380 | ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio, | 7749 | ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio, |
7381 | file_offset); | 7750 | file_offset); |
7382 | if (ret) | 7751 | if (ret) |
7383 | goto err; | 7752 | goto err; |
7384 | } | 7753 | } |
7385 | |||
7386 | map: | 7754 | map: |
7387 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); | 7755 | ret = btrfs_map_bio(root, rw, bio, 0, async_submit); |
7388 | err: | 7756 | err: |
@@ -7403,7 +7771,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7403 | u64 submit_len = 0; | 7771 | u64 submit_len = 0; |
7404 | u64 map_length; | 7772 | u64 map_length; |
7405 | int nr_pages = 0; | 7773 | int nr_pages = 0; |
7406 | int ret = 0; | 7774 | int ret; |
7407 | int async_submit = 0; | 7775 | int async_submit = 0; |
7408 | 7776 | ||
7409 | map_length = orig_bio->bi_iter.bi_size; | 7777 | map_length = orig_bio->bi_iter.bi_size; |
@@ -7414,6 +7782,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7414 | 7782 | ||
7415 | if (map_length >= orig_bio->bi_iter.bi_size) { | 7783 | if (map_length >= orig_bio->bi_iter.bi_size) { |
7416 | bio = orig_bio; | 7784 | bio = orig_bio; |
7785 | dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED; | ||
7417 | goto submit; | 7786 | goto submit; |
7418 | } | 7787 | } |
7419 | 7788 | ||
@@ -7430,12 +7799,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7430 | 7799 | ||
7431 | bio->bi_private = dip; | 7800 | bio->bi_private = dip; |
7432 | bio->bi_end_io = btrfs_end_dio_bio; | 7801 | bio->bi_end_io = btrfs_end_dio_bio; |
7802 | btrfs_io_bio(bio)->logical = file_offset; | ||
7433 | atomic_inc(&dip->pending_bios); | 7803 | atomic_inc(&dip->pending_bios); |
7434 | 7804 | ||
7435 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | 7805 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { |
7436 | if (unlikely(map_length < submit_len + bvec->bv_len || | 7806 | if (map_length < submit_len + bvec->bv_len || |
7437 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | 7807 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, |
7438 | bvec->bv_offset) < bvec->bv_len)) { | 7808 | bvec->bv_offset) < bvec->bv_len) { |
7439 | /* | 7809 | /* |
7440 | * inc the count before we submit the bio so | 7810 | * inc the count before we submit the bio so |
7441 | * we know the end IO handler won't happen before | 7811 | * we know the end IO handler won't happen before |
@@ -7464,6 +7834,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
7464 | goto out_err; | 7834 | goto out_err; |
7465 | bio->bi_private = dip; | 7835 | bio->bi_private = dip; |
7466 | bio->bi_end_io = btrfs_end_dio_bio; | 7836 | bio->bi_end_io = btrfs_end_dio_bio; |
7837 | btrfs_io_bio(bio)->logical = file_offset; | ||
7467 | 7838 | ||
7468 | map_length = orig_bio->bi_iter.bi_size; | 7839 | map_length = orig_bio->bi_iter.bi_size; |
7469 | ret = btrfs_map_block(root->fs_info, rw, | 7840 | ret = btrfs_map_block(root->fs_info, rw, |
@@ -7507,11 +7878,10 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7507 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7878 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7508 | struct btrfs_dio_private *dip; | 7879 | struct btrfs_dio_private *dip; |
7509 | struct bio *io_bio; | 7880 | struct bio *io_bio; |
7881 | struct btrfs_io_bio *btrfs_bio; | ||
7510 | int skip_sum; | 7882 | int skip_sum; |
7511 | int sum_len; | ||
7512 | int write = rw & REQ_WRITE; | 7883 | int write = rw & REQ_WRITE; |
7513 | int ret = 0; | 7884 | int ret = 0; |
7514 | u16 csum_size; | ||
7515 | 7885 | ||
7516 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 7886 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
7517 | 7887 | ||
@@ -7521,16 +7891,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7521 | goto free_ordered; | 7891 | goto free_ordered; |
7522 | } | 7892 | } |
7523 | 7893 | ||
7524 | if (!skip_sum && !write) { | 7894 | dip = kzalloc(sizeof(*dip), GFP_NOFS); |
7525 | csum_size = btrfs_super_csum_size(root->fs_info->super_copy); | ||
7526 | sum_len = dio_bio->bi_iter.bi_size >> | ||
7527 | inode->i_sb->s_blocksize_bits; | ||
7528 | sum_len *= csum_size; | ||
7529 | } else { | ||
7530 | sum_len = 0; | ||
7531 | } | ||
7532 | |||
7533 | dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS); | ||
7534 | if (!dip) { | 7895 | if (!dip) { |
7535 | ret = -ENOMEM; | 7896 | ret = -ENOMEM; |
7536 | goto free_io_bio; | 7897 | goto free_io_bio; |
@@ -7542,20 +7903,25 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
7542 | dip->bytes = dio_bio->bi_iter.bi_size; | 7903 | dip->bytes = dio_bio->bi_iter.bi_size; |
7543 | dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; | 7904 | dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9; |
7544 | io_bio->bi_private = dip; | 7905 | io_bio->bi_private = dip; |
7545 | dip->errors = 0; | ||
7546 | dip->orig_bio = io_bio; | 7906 | dip->orig_bio = io_bio; |
7547 | dip->dio_bio = dio_bio; | 7907 | dip->dio_bio = dio_bio; |
7548 | atomic_set(&dip->pending_bios, 0); | 7908 | atomic_set(&dip->pending_bios, 0); |
7909 | btrfs_bio = btrfs_io_bio(io_bio); | ||
7910 | btrfs_bio->logical = file_offset; | ||
7549 | 7911 | ||
7550 | if (write) | 7912 | if (write) { |
7551 | io_bio->bi_end_io = btrfs_endio_direct_write; | 7913 | io_bio->bi_end_io = btrfs_endio_direct_write; |
7552 | else | 7914 | } else { |
7553 | io_bio->bi_end_io = btrfs_endio_direct_read; | 7915 | io_bio->bi_end_io = btrfs_endio_direct_read; |
7916 | dip->subio_endio = btrfs_subio_endio_read; | ||
7917 | } | ||
7554 | 7918 | ||
7555 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); | 7919 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
7556 | if (!ret) | 7920 | if (!ret) |
7557 | return; | 7921 | return; |
7558 | 7922 | ||
7923 | if (btrfs_bio->end_io) | ||
7924 | btrfs_bio->end_io(btrfs_bio, ret); | ||
7559 | free_io_bio: | 7925 | free_io_bio: |
7560 | bio_put(io_bio); | 7926 | bio_put(io_bio); |
7561 | 7927 | ||
@@ -7652,8 +8018,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7652 | ret = btrfs_delalloc_reserve_space(inode, count); | 8018 | ret = btrfs_delalloc_reserve_space(inode, count); |
7653 | if (ret) | 8019 | if (ret) |
7654 | goto out; | 8020 | goto out; |
7655 | } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 8021 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
7656 | &BTRFS_I(inode)->runtime_flags))) { | 8022 | &BTRFS_I(inode)->runtime_flags)) { |
7657 | inode_dio_done(inode); | 8023 | inode_dio_done(inode); |
7658 | flags = DIO_LOCKING | DIO_SKIP_HOLES; | 8024 | flags = DIO_LOCKING | DIO_SKIP_HOLES; |
7659 | wakeup = false; | 8025 | wakeup = false; |
@@ -8173,6 +8539,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
8173 | ei->last_sub_trans = 0; | 8539 | ei->last_sub_trans = 0; |
8174 | ei->logged_trans = 0; | 8540 | ei->logged_trans = 0; |
8175 | ei->delalloc_bytes = 0; | 8541 | ei->delalloc_bytes = 0; |
8542 | ei->defrag_bytes = 0; | ||
8176 | ei->disk_i_size = 0; | 8543 | ei->disk_i_size = 0; |
8177 | ei->flags = 0; | 8544 | ei->flags = 0; |
8178 | ei->csum_bytes = 0; | 8545 | ei->csum_bytes = 0; |
@@ -8231,6 +8598,7 @@ void btrfs_destroy_inode(struct inode *inode) | |||
8231 | WARN_ON(BTRFS_I(inode)->reserved_extents); | 8598 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
8232 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); | 8599 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); |
8233 | WARN_ON(BTRFS_I(inode)->csum_bytes); | 8600 | WARN_ON(BTRFS_I(inode)->csum_bytes); |
8601 | WARN_ON(BTRFS_I(inode)->defrag_bytes); | ||
8234 | 8602 | ||
8235 | /* | 8603 | /* |
8236 | * This can happen where we create an inode, but somebody else also | 8604 | * This can happen where we create an inode, but somebody else also |
@@ -8646,7 +9014,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, | |||
8646 | spin_unlock(&root->delalloc_lock); | 9014 | spin_unlock(&root->delalloc_lock); |
8647 | 9015 | ||
8648 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); | 9016 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); |
8649 | if (unlikely(!work)) { | 9017 | if (!work) { |
8650 | if (delay_iput) | 9018 | if (delay_iput) |
8651 | btrfs_add_delayed_iput(inode); | 9019 | btrfs_add_delayed_iput(inode); |
8652 | else | 9020 | else |
@@ -8832,7 +9200,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
8832 | } | 9200 | } |
8833 | key.objectid = btrfs_ino(inode); | 9201 | key.objectid = btrfs_ino(inode); |
8834 | key.offset = 0; | 9202 | key.offset = 0; |
8835 | btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); | 9203 | key.type = BTRFS_EXTENT_DATA_KEY; |
8836 | datasize = btrfs_file_extent_calc_inline_size(name_len); | 9204 | datasize = btrfs_file_extent_calc_inline_size(name_len); |
8837 | err = btrfs_insert_empty_item(trans, root, path, &key, | 9205 | err = btrfs_insert_empty_item(trans, root, path, &key, |
8838 | datasize); | 9206 | datasize); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8a8e29878c34..e732274f1afd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -332,6 +332,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
332 | goto out_drop; | 332 | goto out_drop; |
333 | 333 | ||
334 | } else { | 334 | } else { |
335 | ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); | ||
336 | if (ret && ret != -ENODATA) | ||
337 | goto out_drop; | ||
335 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); | 338 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
336 | } | 339 | } |
337 | 340 | ||
@@ -477,8 +480,7 @@ static noinline int create_subvol(struct inode *dir, | |||
477 | if (ret) | 480 | if (ret) |
478 | goto fail; | 481 | goto fail; |
479 | 482 | ||
480 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 483 | leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); |
481 | 0, objectid, NULL, 0, 0, 0); | ||
482 | if (IS_ERR(leaf)) { | 484 | if (IS_ERR(leaf)) { |
483 | ret = PTR_ERR(leaf); | 485 | ret = PTR_ERR(leaf); |
484 | goto fail; | 486 | goto fail; |
@@ -503,7 +505,7 @@ static noinline int create_subvol(struct inode *dir, | |||
503 | btrfs_set_stack_inode_generation(inode_item, 1); | 505 | btrfs_set_stack_inode_generation(inode_item, 1); |
504 | btrfs_set_stack_inode_size(inode_item, 3); | 506 | btrfs_set_stack_inode_size(inode_item, 3); |
505 | btrfs_set_stack_inode_nlink(inode_item, 1); | 507 | btrfs_set_stack_inode_nlink(inode_item, 1); |
506 | btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); | 508 | btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); |
507 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); | 509 | btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); |
508 | 510 | ||
509 | btrfs_set_root_flags(&root_item, 0); | 511 | btrfs_set_root_flags(&root_item, 0); |
@@ -535,7 +537,7 @@ static noinline int create_subvol(struct inode *dir, | |||
535 | 537 | ||
536 | key.objectid = objectid; | 538 | key.objectid = objectid; |
537 | key.offset = 0; | 539 | key.offset = 0; |
538 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 540 | key.type = BTRFS_ROOT_ITEM_KEY; |
539 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 541 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, |
540 | &root_item); | 542 | &root_item); |
541 | if (ret) | 543 | if (ret) |
@@ -882,7 +884,7 @@ out_unlock: | |||
882 | * file you want to defrag, we return 0 to let you know to skip this | 884 | * file you want to defrag, we return 0 to let you know to skip this |
883 | * part of the file | 885 | * part of the file |
884 | */ | 886 | */ |
885 | static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) | 887 | static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) |
886 | { | 888 | { |
887 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 889 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
888 | struct extent_map *em = NULL; | 890 | struct extent_map *em = NULL; |
@@ -917,7 +919,7 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) | |||
917 | */ | 919 | */ |
918 | static int find_new_extents(struct btrfs_root *root, | 920 | static int find_new_extents(struct btrfs_root *root, |
919 | struct inode *inode, u64 newer_than, | 921 | struct inode *inode, u64 newer_than, |
920 | u64 *off, int thresh) | 922 | u64 *off, u32 thresh) |
921 | { | 923 | { |
922 | struct btrfs_path *path; | 924 | struct btrfs_path *path; |
923 | struct btrfs_key min_key; | 925 | struct btrfs_key min_key; |
@@ -936,12 +938,9 @@ static int find_new_extents(struct btrfs_root *root, | |||
936 | min_key.offset = *off; | 938 | min_key.offset = *off; |
937 | 939 | ||
938 | while (1) { | 940 | while (1) { |
939 | path->keep_locks = 1; | ||
940 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 941 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
941 | if (ret != 0) | 942 | if (ret != 0) |
942 | goto none; | 943 | goto none; |
943 | path->keep_locks = 0; | ||
944 | btrfs_unlock_up_safe(path, 1); | ||
945 | process_slot: | 944 | process_slot: |
946 | if (min_key.objectid != ino) | 945 | if (min_key.objectid != ino) |
947 | goto none; | 946 | goto none; |
@@ -1029,7 +1028,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
1029 | return ret; | 1028 | return ret; |
1030 | } | 1029 | } |
1031 | 1030 | ||
1032 | static int should_defrag_range(struct inode *inode, u64 start, int thresh, | 1031 | static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, |
1033 | u64 *last_len, u64 *skip, u64 *defrag_end, | 1032 | u64 *last_len, u64 *skip, u64 *defrag_end, |
1034 | int compress) | 1033 | int compress) |
1035 | { | 1034 | { |
@@ -1259,7 +1258,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1259 | int ret; | 1258 | int ret; |
1260 | int defrag_count = 0; | 1259 | int defrag_count = 0; |
1261 | int compress_type = BTRFS_COMPRESS_ZLIB; | 1260 | int compress_type = BTRFS_COMPRESS_ZLIB; |
1262 | int extent_thresh = range->extent_thresh; | 1261 | u32 extent_thresh = range->extent_thresh; |
1263 | unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; | 1262 | unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; |
1264 | unsigned long cluster = max_cluster; | 1263 | unsigned long cluster = max_cluster; |
1265 | u64 new_align = ~((u64)128 * 1024 - 1); | 1264 | u64 new_align = ~((u64)128 * 1024 - 1); |
@@ -1335,8 +1334,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1335 | inode->i_mapping->writeback_index = i; | 1334 | inode->i_mapping->writeback_index = i; |
1336 | 1335 | ||
1337 | while (i <= last_index && defrag_count < max_to_defrag && | 1336 | while (i <= last_index && defrag_count < max_to_defrag && |
1338 | (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 1337 | (i < DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE))) { |
1339 | PAGE_CACHE_SHIFT)) { | ||
1340 | /* | 1338 | /* |
1341 | * make sure we stop running if someone unmounts | 1339 | * make sure we stop running if someone unmounts |
1342 | * the FS | 1340 | * the FS |
@@ -1359,7 +1357,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1359 | * the should_defrag function tells us how much to skip | 1357 | * the should_defrag function tells us how much to skip |
1360 | * bump our counter by the suggested amount | 1358 | * bump our counter by the suggested amount |
1361 | */ | 1359 | */ |
1362 | next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1360 | next = DIV_ROUND_UP(skip, PAGE_CACHE_SIZE); |
1363 | i = max(i + 1, next); | 1361 | i = max(i + 1, next); |
1364 | continue; | 1362 | continue; |
1365 | } | 1363 | } |
@@ -1554,7 +1552,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1554 | goto out_free; | 1552 | goto out_free; |
1555 | } | 1553 | } |
1556 | 1554 | ||
1557 | old_size = device->total_bytes; | 1555 | old_size = btrfs_device_get_total_bytes(device); |
1558 | 1556 | ||
1559 | if (mod < 0) { | 1557 | if (mod < 0) { |
1560 | if (new_size > old_size) { | 1558 | if (new_size > old_size) { |
@@ -2089,8 +2087,6 @@ static noinline int search_ioctl(struct inode *inode, | |||
2089 | key.type = sk->min_type; | 2087 | key.type = sk->min_type; |
2090 | key.offset = sk->min_offset; | 2088 | key.offset = sk->min_offset; |
2091 | 2089 | ||
2092 | path->keep_locks = 1; | ||
2093 | |||
2094 | while (1) { | 2090 | while (1) { |
2095 | ret = btrfs_search_forward(root, &key, path, sk->min_transid); | 2091 | ret = btrfs_search_forward(root, &key, path, sk->min_transid); |
2096 | if (ret != 0) { | 2092 | if (ret != 0) { |
@@ -2526,9 +2522,9 @@ out_unlock: | |||
2526 | ASSERT(dest->send_in_progress == 0); | 2522 | ASSERT(dest->send_in_progress == 0); |
2527 | 2523 | ||
2528 | /* the last ref */ | 2524 | /* the last ref */ |
2529 | if (dest->cache_inode) { | 2525 | if (dest->ino_cache_inode) { |
2530 | iput(dest->cache_inode); | 2526 | iput(dest->ino_cache_inode); |
2531 | dest->cache_inode = NULL; | 2527 | dest->ino_cache_inode = NULL; |
2532 | } | 2528 | } |
2533 | } | 2529 | } |
2534 | out_dput: | 2530 | out_dput: |
@@ -2634,6 +2630,9 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
2634 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 2630 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
2635 | ret = btrfs_init_new_device(root, vol_args->name); | 2631 | ret = btrfs_init_new_device(root, vol_args->name); |
2636 | 2632 | ||
2633 | if (!ret) | ||
2634 | btrfs_info(root->fs_info, "disk added %s",vol_args->name); | ||
2635 | |||
2637 | kfree(vol_args); | 2636 | kfree(vol_args); |
2638 | out: | 2637 | out: |
2639 | mutex_unlock(&root->fs_info->volume_mutex); | 2638 | mutex_unlock(&root->fs_info->volume_mutex); |
@@ -2673,6 +2672,9 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) | |||
2673 | mutex_unlock(&root->fs_info->volume_mutex); | 2672 | mutex_unlock(&root->fs_info->volume_mutex); |
2674 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | 2673 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); |
2675 | 2674 | ||
2675 | if (!ret) | ||
2676 | btrfs_info(root->fs_info, "disk deleted %s",vol_args->name); | ||
2677 | |||
2676 | out: | 2678 | out: |
2677 | kfree(vol_args); | 2679 | kfree(vol_args); |
2678 | err_drop: | 2680 | err_drop: |
@@ -2737,8 +2739,8 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
2737 | } | 2739 | } |
2738 | 2740 | ||
2739 | di_args->devid = dev->devid; | 2741 | di_args->devid = dev->devid; |
2740 | di_args->bytes_used = dev->bytes_used; | 2742 | di_args->bytes_used = btrfs_device_get_bytes_used(dev); |
2741 | di_args->total_bytes = dev->total_bytes; | 2743 | di_args->total_bytes = btrfs_device_get_total_bytes(dev); |
2742 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); | 2744 | memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); |
2743 | if (dev->name) { | 2745 | if (dev->name) { |
2744 | struct rcu_string *name; | 2746 | struct rcu_string *name; |
@@ -3164,7 +3166,7 @@ static void clone_update_extent_map(struct inode *inode, | |||
3164 | em->start + em->len - 1, 0); | 3166 | em->start + em->len - 1, 0); |
3165 | } | 3167 | } |
3166 | 3168 | ||
3167 | if (unlikely(ret)) | 3169 | if (ret) |
3168 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 3170 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
3169 | &BTRFS_I(inode)->runtime_flags); | 3171 | &BTRFS_I(inode)->runtime_flags); |
3170 | } | 3172 | } |
@@ -3199,7 +3201,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, | |||
3199 | u64 last_dest_end = destoff; | 3201 | u64 last_dest_end = destoff; |
3200 | 3202 | ||
3201 | ret = -ENOMEM; | 3203 | ret = -ENOMEM; |
3202 | buf = vmalloc(btrfs_level_size(root, 0)); | 3204 | buf = vmalloc(root->nodesize); |
3203 | if (!buf) | 3205 | if (!buf) |
3204 | return ret; | 3206 | return ret; |
3205 | 3207 | ||
@@ -3252,11 +3254,11 @@ process_slot: | |||
3252 | slot = path->slots[0]; | 3254 | slot = path->slots[0]; |
3253 | 3255 | ||
3254 | btrfs_item_key_to_cpu(leaf, &key, slot); | 3256 | btrfs_item_key_to_cpu(leaf, &key, slot); |
3255 | if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || | 3257 | if (key.type > BTRFS_EXTENT_DATA_KEY || |
3256 | key.objectid != btrfs_ino(src)) | 3258 | key.objectid != btrfs_ino(src)) |
3257 | break; | 3259 | break; |
3258 | 3260 | ||
3259 | if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { | 3261 | if (key.type == BTRFS_EXTENT_DATA_KEY) { |
3260 | struct btrfs_file_extent_item *extent; | 3262 | struct btrfs_file_extent_item *extent; |
3261 | int type; | 3263 | int type; |
3262 | u32 size; | 3264 | u32 size; |
@@ -5283,6 +5285,12 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5283 | if (ret) | 5285 | if (ret) |
5284 | return ret; | 5286 | return ret; |
5285 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 5287 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
5288 | /* | ||
5289 | * The transaction thread may want to do more work, | ||
5290 | * namely it pokes the cleaner ktread that will start | ||
5291 | * processing uncleaned subvols. | ||
5292 | */ | ||
5293 | wake_up_process(root->fs_info->transaction_kthread); | ||
5286 | return ret; | 5294 | return ret; |
5287 | } | 5295 | } |
5288 | case BTRFS_IOC_START_SYNC: | 5296 | case BTRFS_IOC_START_SYNC: |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index dfad8514f0da..78285f30909e 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -266,8 +266,7 @@ static int lzo_decompress_biovec(struct list_head *ws, | |||
266 | char *data_in; | 266 | char *data_in; |
267 | unsigned long page_in_index = 0; | 267 | unsigned long page_in_index = 0; |
268 | unsigned long page_out_index = 0; | 268 | unsigned long page_out_index = 0; |
269 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 269 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE); |
270 | PAGE_CACHE_SIZE; | ||
271 | unsigned long buf_start; | 270 | unsigned long buf_start; |
272 | unsigned long buf_offset = 0; | 271 | unsigned long buf_offset = 0; |
273 | unsigned long bytes; | 272 | unsigned long bytes; |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 65793edb38ca..47767d5b8f0b 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -27,7 +27,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | |||
27 | int ret = 0; | 27 | int ret = 0; |
28 | 28 | ||
29 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 29 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
30 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 30 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
31 | key.offset = offset; | 31 | key.offset = offset; |
32 | 32 | ||
33 | path = btrfs_alloc_path(); | 33 | path = btrfs_alloc_path(); |
@@ -48,7 +48,7 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
48 | int ret = 0; | 48 | int ret = 0; |
49 | 49 | ||
50 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 50 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
51 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 51 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
52 | key.offset = offset; | 52 | key.offset = offset; |
53 | 53 | ||
54 | path = btrfs_alloc_path(); | 54 | path = btrfs_alloc_path(); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 9626b4ad3b9a..647ab12fdf5d 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -195,7 +195,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
195 | for (i = 0 ; i < nr ; i++) { | 195 | for (i = 0 ; i < nr ; i++) { |
196 | item = btrfs_item_nr(i); | 196 | item = btrfs_item_nr(i); |
197 | btrfs_item_key_to_cpu(l, &key, i); | 197 | btrfs_item_key_to_cpu(l, &key, i); |
198 | type = btrfs_key_type(&key); | 198 | type = key.type; |
199 | printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " | 199 | printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " |
200 | "itemsize %d\n", | 200 | "itemsize %d\n", |
201 | i, key.objectid, type, key.offset, | 201 | i, key.objectid, type, key.offset, |
@@ -336,7 +336,6 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) | |||
336 | for (i = 0; i < nr; i++) { | 336 | for (i = 0; i < nr; i++) { |
337 | struct extent_buffer *next = read_tree_block(root, | 337 | struct extent_buffer *next = read_tree_block(root, |
338 | btrfs_node_blockptr(c, i), | 338 | btrfs_node_blockptr(c, i), |
339 | btrfs_level_size(root, level - 1), | ||
340 | btrfs_node_ptr_generation(c, i)); | 339 | btrfs_node_ptr_generation(c, i)); |
341 | if (btrfs_is_leaf(next) && | 340 | if (btrfs_is_leaf(next) && |
342 | level != 1) | 341 | level != 1) |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index ded5c601d916..48b60dbf807f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -539,10 +539,9 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
539 | struct extent_buffer *leaf; | 539 | struct extent_buffer *leaf; |
540 | struct btrfs_key key; | 540 | struct btrfs_key key; |
541 | 541 | ||
542 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 542 | if (btrfs_test_is_dummy_root(quota_root)) |
543 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state))) | ||
544 | return 0; | 543 | return 0; |
545 | #endif | 544 | |
546 | path = btrfs_alloc_path(); | 545 | path = btrfs_alloc_path(); |
547 | if (!path) | 546 | if (!path) |
548 | return -ENOMEM; | 547 | return -ENOMEM; |
@@ -551,9 +550,15 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
551 | key.type = BTRFS_QGROUP_INFO_KEY; | 550 | key.type = BTRFS_QGROUP_INFO_KEY; |
552 | key.offset = qgroupid; | 551 | key.offset = qgroupid; |
553 | 552 | ||
553 | /* | ||
554 | * Avoid a transaction abort by catching -EEXIST here. In that | ||
555 | * case, we proceed by re-initializing the existing structure | ||
556 | * on disk. | ||
557 | */ | ||
558 | |||
554 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | 559 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, |
555 | sizeof(*qgroup_info)); | 560 | sizeof(*qgroup_info)); |
556 | if (ret) | 561 | if (ret && ret != -EEXIST) |
557 | goto out; | 562 | goto out; |
558 | 563 | ||
559 | leaf = path->nodes[0]; | 564 | leaf = path->nodes[0]; |
@@ -572,7 +577,7 @@ static int add_qgroup_item(struct btrfs_trans_handle *trans, | |||
572 | key.type = BTRFS_QGROUP_LIMIT_KEY; | 577 | key.type = BTRFS_QGROUP_LIMIT_KEY; |
573 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, | 578 | ret = btrfs_insert_empty_item(trans, quota_root, path, &key, |
574 | sizeof(*qgroup_limit)); | 579 | sizeof(*qgroup_limit)); |
575 | if (ret) | 580 | if (ret && ret != -EEXIST) |
576 | goto out; | 581 | goto out; |
577 | 582 | ||
578 | leaf = path->nodes[0]; | 583 | leaf = path->nodes[0]; |
@@ -692,10 +697,9 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans, | |||
692 | int ret; | 697 | int ret; |
693 | int slot; | 698 | int slot; |
694 | 699 | ||
695 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 700 | if (btrfs_test_is_dummy_root(root)) |
696 | if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) | ||
697 | return 0; | 701 | return 0; |
698 | #endif | 702 | |
699 | key.objectid = 0; | 703 | key.objectid = 0; |
700 | key.type = BTRFS_QGROUP_INFO_KEY; | 704 | key.type = BTRFS_QGROUP_INFO_KEY; |
701 | key.offset = qgroup->qgroupid; | 705 | key.offset = qgroup->qgroupid; |
@@ -1335,6 +1339,8 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, | |||
1335 | INIT_LIST_HEAD(&oper->elem.list); | 1339 | INIT_LIST_HEAD(&oper->elem.list); |
1336 | oper->elem.seq = 0; | 1340 | oper->elem.seq = 0; |
1337 | 1341 | ||
1342 | trace_btrfs_qgroup_record_ref(oper); | ||
1343 | |||
1338 | if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { | 1344 | if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { |
1339 | /* | 1345 | /* |
1340 | * If any operation for this bytenr/ref_root combo | 1346 | * If any operation for this bytenr/ref_root combo |
@@ -2077,6 +2083,8 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, | |||
2077 | 2083 | ||
2078 | ASSERT(is_fstree(oper->ref_root)); | 2084 | ASSERT(is_fstree(oper->ref_root)); |
2079 | 2085 | ||
2086 | trace_btrfs_qgroup_account(oper); | ||
2087 | |||
2080 | switch (oper->type) { | 2088 | switch (oper->type) { |
2081 | case BTRFS_QGROUP_OPER_ADD_EXCL: | 2089 | case BTRFS_QGROUP_OPER_ADD_EXCL: |
2082 | case BTRFS_QGROUP_OPER_SUB_EXCL: | 2090 | case BTRFS_QGROUP_OPER_SUB_EXCL: |
@@ -2237,7 +2245,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2237 | if (srcid) { | 2245 | if (srcid) { |
2238 | struct btrfs_root *srcroot; | 2246 | struct btrfs_root *srcroot; |
2239 | struct btrfs_key srckey; | 2247 | struct btrfs_key srckey; |
2240 | int srcroot_level; | ||
2241 | 2248 | ||
2242 | srckey.objectid = srcid; | 2249 | srckey.objectid = srcid; |
2243 | srckey.type = BTRFS_ROOT_ITEM_KEY; | 2250 | srckey.type = BTRFS_ROOT_ITEM_KEY; |
@@ -2249,8 +2256,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, | |||
2249 | } | 2256 | } |
2250 | 2257 | ||
2251 | rcu_read_lock(); | 2258 | rcu_read_lock(); |
2252 | srcroot_level = btrfs_header_level(srcroot->node); | 2259 | level_size = srcroot->nodesize; |
2253 | level_size = btrfs_level_size(srcroot, srcroot_level); | ||
2254 | rcu_read_unlock(); | 2260 | rcu_read_unlock(); |
2255 | } | 2261 | } |
2256 | 2262 | ||
@@ -2566,7 +2572,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
2566 | found.type != BTRFS_METADATA_ITEM_KEY) | 2572 | found.type != BTRFS_METADATA_ITEM_KEY) |
2567 | continue; | 2573 | continue; |
2568 | if (found.type == BTRFS_METADATA_ITEM_KEY) | 2574 | if (found.type == BTRFS_METADATA_ITEM_KEY) |
2569 | num_bytes = fs_info->extent_root->leafsize; | 2575 | num_bytes = fs_info->extent_root->nodesize; |
2570 | else | 2576 | else |
2571 | num_bytes = found.offset; | 2577 | num_bytes = found.offset; |
2572 | 2578 | ||
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0a6b6e4bcbb9..6a41631cb959 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -912,7 +912,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio, | |||
912 | static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) | 912 | static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) |
913 | { | 913 | { |
914 | unsigned long nr = stripe_len * nr_stripes; | 914 | unsigned long nr = stripe_len * nr_stripes; |
915 | return (nr + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 915 | return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE); |
916 | } | 916 | } |
917 | 917 | ||
918 | /* | 918 | /* |
@@ -1442,7 +1442,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1442 | struct btrfs_bio *bbio = rbio->bbio; | 1442 | struct btrfs_bio *bbio = rbio->bbio; |
1443 | struct bio_list bio_list; | 1443 | struct bio_list bio_list; |
1444 | int ret; | 1444 | int ret; |
1445 | int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1445 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
1446 | int pagenr; | 1446 | int pagenr; |
1447 | int stripe; | 1447 | int stripe; |
1448 | struct bio *bio; | 1448 | struct bio *bio; |
@@ -1725,7 +1725,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) | |||
1725 | int pagenr, stripe; | 1725 | int pagenr, stripe; |
1726 | void **pointers; | 1726 | void **pointers; |
1727 | int faila = -1, failb = -1; | 1727 | int faila = -1, failb = -1; |
1728 | int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1728 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
1729 | struct page *page; | 1729 | struct page *page; |
1730 | int err; | 1730 | int err; |
1731 | int i; | 1731 | int i; |
@@ -1940,7 +1940,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) | |||
1940 | struct btrfs_bio *bbio = rbio->bbio; | 1940 | struct btrfs_bio *bbio = rbio->bbio; |
1941 | struct bio_list bio_list; | 1941 | struct bio_list bio_list; |
1942 | int ret; | 1942 | int ret; |
1943 | int nr_pages = (rbio->stripe_len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1943 | int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE); |
1944 | int pagenr; | 1944 | int pagenr; |
1945 | int stripe; | 1945 | int stripe; |
1946 | struct bio *bio; | 1946 | struct bio *bio; |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 20408c6b665a..b63ae20618fb 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -347,7 +347,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
347 | if (!re) | 347 | if (!re) |
348 | return NULL; | 348 | return NULL; |
349 | 349 | ||
350 | blocksize = btrfs_level_size(root, level); | 350 | blocksize = root->nodesize; |
351 | re->logical = logical; | 351 | re->logical = logical; |
352 | re->blocksize = blocksize; | 352 | re->blocksize = blocksize; |
353 | re->top = *top; | 353 | re->top = *top; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 65245a07275b..74257d6436ad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -736,7 +736,8 @@ again: | |||
736 | err = ret; | 736 | err = ret; |
737 | goto out; | 737 | goto out; |
738 | } | 738 | } |
739 | BUG_ON(!ret || !path1->slots[0]); | 739 | ASSERT(ret); |
740 | ASSERT(path1->slots[0]); | ||
740 | 741 | ||
741 | path1->slots[0]--; | 742 | path1->slots[0]--; |
742 | 743 | ||
@@ -746,10 +747,10 @@ again: | |||
746 | * the backref was added previously when processing | 747 | * the backref was added previously when processing |
747 | * backref of type BTRFS_TREE_BLOCK_REF_KEY | 748 | * backref of type BTRFS_TREE_BLOCK_REF_KEY |
748 | */ | 749 | */ |
749 | BUG_ON(!list_is_singular(&cur->upper)); | 750 | ASSERT(list_is_singular(&cur->upper)); |
750 | edge = list_entry(cur->upper.next, struct backref_edge, | 751 | edge = list_entry(cur->upper.next, struct backref_edge, |
751 | list[LOWER]); | 752 | list[LOWER]); |
752 | BUG_ON(!list_empty(&edge->list[UPPER])); | 753 | ASSERT(list_empty(&edge->list[UPPER])); |
753 | exist = edge->node[UPPER]; | 754 | exist = edge->node[UPPER]; |
754 | /* | 755 | /* |
755 | * add the upper level block to pending list if we need | 756 | * add the upper level block to pending list if we need |
@@ -831,7 +832,7 @@ again: | |||
831 | cur->cowonly = 1; | 832 | cur->cowonly = 1; |
832 | } | 833 | } |
833 | #else | 834 | #else |
834 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 835 | ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); |
835 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { | 836 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { |
836 | #endif | 837 | #endif |
837 | if (key.objectid == key.offset) { | 838 | if (key.objectid == key.offset) { |
@@ -840,7 +841,7 @@ again: | |||
840 | * backref of this type. | 841 | * backref of this type. |
841 | */ | 842 | */ |
842 | root = find_reloc_root(rc, cur->bytenr); | 843 | root = find_reloc_root(rc, cur->bytenr); |
843 | BUG_ON(!root); | 844 | ASSERT(root); |
844 | cur->root = root; | 845 | cur->root = root; |
845 | break; | 846 | break; |
846 | } | 847 | } |
@@ -868,7 +869,7 @@ again: | |||
868 | } else { | 869 | } else { |
869 | upper = rb_entry(rb_node, struct backref_node, | 870 | upper = rb_entry(rb_node, struct backref_node, |
870 | rb_node); | 871 | rb_node); |
871 | BUG_ON(!upper->checked); | 872 | ASSERT(upper->checked); |
872 | INIT_LIST_HEAD(&edge->list[UPPER]); | 873 | INIT_LIST_HEAD(&edge->list[UPPER]); |
873 | } | 874 | } |
874 | list_add_tail(&edge->list[LOWER], &cur->upper); | 875 | list_add_tail(&edge->list[LOWER], &cur->upper); |
@@ -892,7 +893,7 @@ again: | |||
892 | 893 | ||
893 | if (btrfs_root_level(&root->root_item) == cur->level) { | 894 | if (btrfs_root_level(&root->root_item) == cur->level) { |
894 | /* tree root */ | 895 | /* tree root */ |
895 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 896 | ASSERT(btrfs_root_bytenr(&root->root_item) == |
896 | cur->bytenr); | 897 | cur->bytenr); |
897 | if (should_ignore_root(root)) | 898 | if (should_ignore_root(root)) |
898 | list_add(&cur->list, &useless); | 899 | list_add(&cur->list, &useless); |
@@ -927,7 +928,7 @@ again: | |||
927 | need_check = true; | 928 | need_check = true; |
928 | for (; level < BTRFS_MAX_LEVEL; level++) { | 929 | for (; level < BTRFS_MAX_LEVEL; level++) { |
929 | if (!path2->nodes[level]) { | 930 | if (!path2->nodes[level]) { |
930 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 931 | ASSERT(btrfs_root_bytenr(&root->root_item) == |
931 | lower->bytenr); | 932 | lower->bytenr); |
932 | if (should_ignore_root(root)) | 933 | if (should_ignore_root(root)) |
933 | list_add(&lower->list, &useless); | 934 | list_add(&lower->list, &useless); |
@@ -977,12 +978,15 @@ again: | |||
977 | need_check = false; | 978 | need_check = false; |
978 | list_add_tail(&edge->list[UPPER], | 979 | list_add_tail(&edge->list[UPPER], |
979 | &list); | 980 | &list); |
980 | } else | 981 | } else { |
982 | if (upper->checked) | ||
983 | need_check = true; | ||
981 | INIT_LIST_HEAD(&edge->list[UPPER]); | 984 | INIT_LIST_HEAD(&edge->list[UPPER]); |
985 | } | ||
982 | } else { | 986 | } else { |
983 | upper = rb_entry(rb_node, struct backref_node, | 987 | upper = rb_entry(rb_node, struct backref_node, |
984 | rb_node); | 988 | rb_node); |
985 | BUG_ON(!upper->checked); | 989 | ASSERT(upper->checked); |
986 | INIT_LIST_HEAD(&edge->list[UPPER]); | 990 | INIT_LIST_HEAD(&edge->list[UPPER]); |
987 | if (!upper->owner) | 991 | if (!upper->owner) |
988 | upper->owner = btrfs_header_owner(eb); | 992 | upper->owner = btrfs_header_owner(eb); |
@@ -1026,7 +1030,7 @@ next: | |||
1026 | * everything goes well, connect backref nodes and insert backref nodes | 1030 | * everything goes well, connect backref nodes and insert backref nodes |
1027 | * into the cache. | 1031 | * into the cache. |
1028 | */ | 1032 | */ |
1029 | BUG_ON(!node->checked); | 1033 | ASSERT(node->checked); |
1030 | cowonly = node->cowonly; | 1034 | cowonly = node->cowonly; |
1031 | if (!cowonly) { | 1035 | if (!cowonly) { |
1032 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | 1036 | rb_node = tree_insert(&cache->rb_root, node->bytenr, |
@@ -1062,8 +1066,21 @@ next: | |||
1062 | continue; | 1066 | continue; |
1063 | } | 1067 | } |
1064 | 1068 | ||
1065 | BUG_ON(!upper->checked); | 1069 | if (!upper->checked) { |
1066 | BUG_ON(cowonly != upper->cowonly); | 1070 | /* |
1071 | * Still want to blow up for developers since this is a | ||
1072 | * logic bug. | ||
1073 | */ | ||
1074 | ASSERT(0); | ||
1075 | err = -EINVAL; | ||
1076 | goto out; | ||
1077 | } | ||
1078 | if (cowonly != upper->cowonly) { | ||
1079 | ASSERT(0); | ||
1080 | err = -EINVAL; | ||
1081 | goto out; | ||
1082 | } | ||
1083 | |||
1067 | if (!cowonly) { | 1084 | if (!cowonly) { |
1068 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1085 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
1069 | &upper->rb_node); | 1086 | &upper->rb_node); |
@@ -1086,7 +1103,7 @@ next: | |||
1086 | while (!list_empty(&useless)) { | 1103 | while (!list_empty(&useless)) { |
1087 | upper = list_entry(useless.next, struct backref_node, list); | 1104 | upper = list_entry(useless.next, struct backref_node, list); |
1088 | list_del_init(&upper->list); | 1105 | list_del_init(&upper->list); |
1089 | BUG_ON(!list_empty(&upper->upper)); | 1106 | ASSERT(list_empty(&upper->upper)); |
1090 | if (upper == node) | 1107 | if (upper == node) |
1091 | node = NULL; | 1108 | node = NULL; |
1092 | if (upper->lowest) { | 1109 | if (upper->lowest) { |
@@ -1119,29 +1136,45 @@ out: | |||
1119 | if (err) { | 1136 | if (err) { |
1120 | while (!list_empty(&useless)) { | 1137 | while (!list_empty(&useless)) { |
1121 | lower = list_entry(useless.next, | 1138 | lower = list_entry(useless.next, |
1122 | struct backref_node, upper); | 1139 | struct backref_node, list); |
1123 | list_del_init(&lower->upper); | 1140 | list_del_init(&lower->list); |
1124 | } | 1141 | } |
1125 | upper = node; | 1142 | while (!list_empty(&list)) { |
1126 | INIT_LIST_HEAD(&list); | 1143 | edge = list_first_entry(&list, struct backref_edge, |
1127 | while (upper) { | 1144 | list[UPPER]); |
1128 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1145 | list_del(&edge->list[UPPER]); |
1129 | list_splice_tail(&upper->upper, &list); | ||
1130 | free_backref_node(cache, upper); | ||
1131 | } | ||
1132 | |||
1133 | if (list_empty(&list)) | ||
1134 | break; | ||
1135 | |||
1136 | edge = list_entry(list.next, struct backref_edge, | ||
1137 | list[LOWER]); | ||
1138 | list_del(&edge->list[LOWER]); | 1146 | list_del(&edge->list[LOWER]); |
1147 | lower = edge->node[LOWER]; | ||
1139 | upper = edge->node[UPPER]; | 1148 | upper = edge->node[UPPER]; |
1140 | free_backref_edge(cache, edge); | 1149 | free_backref_edge(cache, edge); |
1150 | |||
1151 | /* | ||
1152 | * Lower is no longer linked to any upper backref nodes | ||
1153 | * and isn't in the cache, we can free it ourselves. | ||
1154 | */ | ||
1155 | if (list_empty(&lower->upper) && | ||
1156 | RB_EMPTY_NODE(&lower->rb_node)) | ||
1157 | list_add(&lower->list, &useless); | ||
1158 | |||
1159 | if (!RB_EMPTY_NODE(&upper->rb_node)) | ||
1160 | continue; | ||
1161 | |||
1162 | /* Add this guy's upper edges to the list to proces */ | ||
1163 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | ||
1164 | list_add_tail(&edge->list[UPPER], &list); | ||
1165 | if (list_empty(&upper->upper)) | ||
1166 | list_add(&upper->list, &useless); | ||
1167 | } | ||
1168 | |||
1169 | while (!list_empty(&useless)) { | ||
1170 | lower = list_entry(useless.next, | ||
1171 | struct backref_node, list); | ||
1172 | list_del_init(&lower->list); | ||
1173 | free_backref_node(cache, lower); | ||
1141 | } | 1174 | } |
1142 | return ERR_PTR(err); | 1175 | return ERR_PTR(err); |
1143 | } | 1176 | } |
1144 | BUG_ON(node && node->detached); | 1177 | ASSERT(!node || !node->detached); |
1145 | return node; | 1178 | return node; |
1146 | } | 1179 | } |
1147 | 1180 | ||
@@ -1787,7 +1820,7 @@ again: | |||
1787 | btrfs_node_key_to_cpu(parent, next_key, slot + 1); | 1820 | btrfs_node_key_to_cpu(parent, next_key, slot + 1); |
1788 | 1821 | ||
1789 | old_bytenr = btrfs_node_blockptr(parent, slot); | 1822 | old_bytenr = btrfs_node_blockptr(parent, slot); |
1790 | blocksize = btrfs_level_size(dest, level - 1); | 1823 | blocksize = dest->nodesize; |
1791 | old_ptr_gen = btrfs_node_ptr_generation(parent, slot); | 1824 | old_ptr_gen = btrfs_node_ptr_generation(parent, slot); |
1792 | 1825 | ||
1793 | if (level <= max_level) { | 1826 | if (level <= max_level) { |
@@ -1813,8 +1846,7 @@ again: | |||
1813 | break; | 1846 | break; |
1814 | } | 1847 | } |
1815 | 1848 | ||
1816 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1849 | eb = read_tree_block(dest, old_bytenr, old_ptr_gen); |
1817 | old_ptr_gen); | ||
1818 | if (!eb || !extent_buffer_uptodate(eb)) { | 1850 | if (!eb || !extent_buffer_uptodate(eb)) { |
1819 | ret = (!eb) ? -ENOMEM : -EIO; | 1851 | ret = (!eb) ? -ENOMEM : -EIO; |
1820 | free_extent_buffer(eb); | 1852 | free_extent_buffer(eb); |
@@ -1944,7 +1976,6 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, | |||
1944 | u64 bytenr; | 1976 | u64 bytenr; |
1945 | u64 ptr_gen = 0; | 1977 | u64 ptr_gen = 0; |
1946 | u64 last_snapshot; | 1978 | u64 last_snapshot; |
1947 | u32 blocksize; | ||
1948 | u32 nritems; | 1979 | u32 nritems; |
1949 | 1980 | ||
1950 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); | 1981 | last_snapshot = btrfs_root_last_snapshot(&root->root_item); |
@@ -1970,8 +2001,7 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, | |||
1970 | } | 2001 | } |
1971 | 2002 | ||
1972 | bytenr = btrfs_node_blockptr(eb, path->slots[i]); | 2003 | bytenr = btrfs_node_blockptr(eb, path->slots[i]); |
1973 | blocksize = btrfs_level_size(root, i - 1); | 2004 | eb = read_tree_block(root, bytenr, ptr_gen); |
1974 | eb = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
1975 | if (!eb || !extent_buffer_uptodate(eb)) { | 2005 | if (!eb || !extent_buffer_uptodate(eb)) { |
1976 | free_extent_buffer(eb); | 2006 | free_extent_buffer(eb); |
1977 | return -EIO; | 2007 | return -EIO; |
@@ -2316,7 +2346,7 @@ void free_reloc_roots(struct list_head *list) | |||
2316 | } | 2346 | } |
2317 | 2347 | ||
2318 | static noinline_for_stack | 2348 | static noinline_for_stack |
2319 | int merge_reloc_roots(struct reloc_control *rc) | 2349 | void merge_reloc_roots(struct reloc_control *rc) |
2320 | { | 2350 | { |
2321 | struct btrfs_root *root; | 2351 | struct btrfs_root *root; |
2322 | struct btrfs_root *reloc_root; | 2352 | struct btrfs_root *reloc_root; |
@@ -2397,7 +2427,6 @@ out: | |||
2397 | } | 2427 | } |
2398 | 2428 | ||
2399 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2429 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
2400 | return ret; | ||
2401 | } | 2430 | } |
2402 | 2431 | ||
2403 | static void free_block_list(struct rb_root *blocks) | 2432 | static void free_block_list(struct rb_root *blocks) |
@@ -2544,8 +2573,7 @@ u64 calcu_metadata_size(struct reloc_control *rc, | |||
2544 | if (next->processed && (reserve || next != node)) | 2573 | if (next->processed && (reserve || next != node)) |
2545 | break; | 2574 | break; |
2546 | 2575 | ||
2547 | num_bytes += btrfs_level_size(rc->extent_root, | 2576 | num_bytes += rc->extent_root->nodesize; |
2548 | next->level); | ||
2549 | 2577 | ||
2550 | if (list_empty(&next->upper)) | 2578 | if (list_empty(&next->upper)) |
2551 | break; | 2579 | break; |
@@ -2679,9 +2707,9 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2679 | goto next; | 2707 | goto next; |
2680 | } | 2708 | } |
2681 | 2709 | ||
2682 | blocksize = btrfs_level_size(root, node->level); | 2710 | blocksize = root->nodesize; |
2683 | generation = btrfs_node_ptr_generation(upper->eb, slot); | 2711 | generation = btrfs_node_ptr_generation(upper->eb, slot); |
2684 | eb = read_tree_block(root, bytenr, blocksize, generation); | 2712 | eb = read_tree_block(root, bytenr, generation); |
2685 | if (!eb || !extent_buffer_uptodate(eb)) { | 2713 | if (!eb || !extent_buffer_uptodate(eb)) { |
2686 | free_extent_buffer(eb); | 2714 | free_extent_buffer(eb); |
2687 | err = -EIO; | 2715 | err = -EIO; |
@@ -2789,7 +2817,7 @@ static void __mark_block_processed(struct reloc_control *rc, | |||
2789 | u32 blocksize; | 2817 | u32 blocksize; |
2790 | if (node->level == 0 || | 2818 | if (node->level == 0 || |
2791 | in_block_group(node->bytenr, rc->block_group)) { | 2819 | in_block_group(node->bytenr, rc->block_group)) { |
2792 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2820 | blocksize = rc->extent_root->nodesize; |
2793 | mark_block_processed(rc, node->bytenr, blocksize); | 2821 | mark_block_processed(rc, node->bytenr, blocksize); |
2794 | } | 2822 | } |
2795 | node->processed = 1; | 2823 | node->processed = 1; |
@@ -2843,7 +2871,7 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2843 | 2871 | ||
2844 | BUG_ON(block->key_ready); | 2872 | BUG_ON(block->key_ready); |
2845 | eb = read_tree_block(rc->extent_root, block->bytenr, | 2873 | eb = read_tree_block(rc->extent_root, block->bytenr, |
2846 | block->key.objectid, block->key.offset); | 2874 | block->key.offset); |
2847 | if (!eb || !extent_buffer_uptodate(eb)) { | 2875 | if (!eb || !extent_buffer_uptodate(eb)) { |
2848 | free_extent_buffer(eb); | 2876 | free_extent_buffer(eb); |
2849 | return -EIO; | 2877 | return -EIO; |
@@ -2858,20 +2886,6 @@ static int get_tree_block_key(struct reloc_control *rc, | |||
2858 | return 0; | 2886 | return 0; |
2859 | } | 2887 | } |
2860 | 2888 | ||
2861 | static int reada_tree_block(struct reloc_control *rc, | ||
2862 | struct tree_block *block) | ||
2863 | { | ||
2864 | BUG_ON(block->key_ready); | ||
2865 | if (block->key.type == BTRFS_METADATA_ITEM_KEY) | ||
2866 | readahead_tree_block(rc->extent_root, block->bytenr, | ||
2867 | block->key.objectid, | ||
2868 | rc->extent_root->leafsize); | ||
2869 | else | ||
2870 | readahead_tree_block(rc->extent_root, block->bytenr, | ||
2871 | block->key.objectid, block->key.offset); | ||
2872 | return 0; | ||
2873 | } | ||
2874 | |||
2875 | /* | 2889 | /* |
2876 | * helper function to relocate a tree block | 2890 | * helper function to relocate a tree block |
2877 | */ | 2891 | */ |
@@ -2951,7 +2965,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2951 | while (rb_node) { | 2965 | while (rb_node) { |
2952 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2966 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2953 | if (!block->key_ready) | 2967 | if (!block->key_ready) |
2954 | reada_tree_block(rc, block); | 2968 | readahead_tree_block(rc->extent_root, block->bytenr, |
2969 | block->key.objectid); | ||
2955 | rb_node = rb_next(rb_node); | 2970 | rb_node = rb_next(rb_node); |
2956 | } | 2971 | } |
2957 | 2972 | ||
@@ -3313,7 +3328,7 @@ static int add_tree_block(struct reloc_control *rc, | |||
3313 | return -ENOMEM; | 3328 | return -ENOMEM; |
3314 | 3329 | ||
3315 | block->bytenr = extent_key->objectid; | 3330 | block->bytenr = extent_key->objectid; |
3316 | block->key.objectid = rc->extent_root->leafsize; | 3331 | block->key.objectid = rc->extent_root->nodesize; |
3317 | block->key.offset = generation; | 3332 | block->key.offset = generation; |
3318 | block->level = level; | 3333 | block->level = level; |
3319 | block->key_ready = 0; | 3334 | block->key_ready = 0; |
@@ -3640,7 +3655,7 @@ int add_data_references(struct reloc_control *rc, | |||
3640 | struct btrfs_extent_inline_ref *iref; | 3655 | struct btrfs_extent_inline_ref *iref; |
3641 | unsigned long ptr; | 3656 | unsigned long ptr; |
3642 | unsigned long end; | 3657 | unsigned long end; |
3643 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); | 3658 | u32 blocksize = rc->extent_root->nodesize; |
3644 | int ret = 0; | 3659 | int ret = 0; |
3645 | int err = 0; | 3660 | int err = 0; |
3646 | 3661 | ||
@@ -3783,7 +3798,7 @@ next: | |||
3783 | } | 3798 | } |
3784 | 3799 | ||
3785 | if (key.type == BTRFS_METADATA_ITEM_KEY && | 3800 | if (key.type == BTRFS_METADATA_ITEM_KEY && |
3786 | key.objectid + rc->extent_root->leafsize <= | 3801 | key.objectid + rc->extent_root->nodesize <= |
3787 | rc->search_start) { | 3802 | rc->search_start) { |
3788 | path->slots[0]++; | 3803 | path->slots[0]++; |
3789 | goto next; | 3804 | goto next; |
@@ -3801,7 +3816,7 @@ next: | |||
3801 | rc->search_start = key.objectid + key.offset; | 3816 | rc->search_start = key.objectid + key.offset; |
3802 | else | 3817 | else |
3803 | rc->search_start = key.objectid + | 3818 | rc->search_start = key.objectid + |
3804 | rc->extent_root->leafsize; | 3819 | rc->extent_root->nodesize; |
3805 | memcpy(extent_key, &key, sizeof(key)); | 3820 | memcpy(extent_key, &key, sizeof(key)); |
3806 | return 0; | 3821 | return 0; |
3807 | } | 3822 | } |
@@ -4096,7 +4111,6 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
4096 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | | 4111 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
4097 | BTRFS_INODE_PREALLOC); | 4112 | BTRFS_INODE_PREALLOC); |
4098 | btrfs_mark_buffer_dirty(leaf); | 4113 | btrfs_mark_buffer_dirty(leaf); |
4099 | btrfs_release_path(path); | ||
4100 | out: | 4114 | out: |
4101 | btrfs_free_path(path); | 4115 | btrfs_free_path(path); |
4102 | return ret; | 4116 | return ret; |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f4a41f37be22..efa083113827 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -137,7 +137,6 @@ struct scrub_ctx { | |||
137 | int pages_per_rd_bio; | 137 | int pages_per_rd_bio; |
138 | u32 sectorsize; | 138 | u32 sectorsize; |
139 | u32 nodesize; | 139 | u32 nodesize; |
140 | u32 leafsize; | ||
141 | 140 | ||
142 | int is_dev_replace; | 141 | int is_dev_replace; |
143 | struct scrub_wr_ctx wr_ctx; | 142 | struct scrub_wr_ctx wr_ctx; |
@@ -178,17 +177,12 @@ struct scrub_copy_nocow_ctx { | |||
178 | struct scrub_warning { | 177 | struct scrub_warning { |
179 | struct btrfs_path *path; | 178 | struct btrfs_path *path; |
180 | u64 extent_item_size; | 179 | u64 extent_item_size; |
181 | char *scratch_buf; | ||
182 | char *msg_buf; | ||
183 | const char *errstr; | 180 | const char *errstr; |
184 | sector_t sector; | 181 | sector_t sector; |
185 | u64 logical; | 182 | u64 logical; |
186 | struct btrfs_device *dev; | 183 | struct btrfs_device *dev; |
187 | int msg_bufsize; | ||
188 | int scratch_bufsize; | ||
189 | }; | 184 | }; |
190 | 185 | ||
191 | |||
192 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx); | 186 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx); |
193 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx); | 187 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx); |
194 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); | 188 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); |
@@ -438,7 +432,6 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
438 | } | 432 | } |
439 | sctx->first_free = 0; | 433 | sctx->first_free = 0; |
440 | sctx->nodesize = dev->dev_root->nodesize; | 434 | sctx->nodesize = dev->dev_root->nodesize; |
441 | sctx->leafsize = dev->dev_root->leafsize; | ||
442 | sctx->sectorsize = dev->dev_root->sectorsize; | 435 | sctx->sectorsize = dev->dev_root->sectorsize; |
443 | atomic_set(&sctx->bios_in_flight, 0); | 436 | atomic_set(&sctx->bios_in_flight, 0); |
444 | atomic_set(&sctx->workers_pending, 0); | 437 | atomic_set(&sctx->workers_pending, 0); |
@@ -553,7 +546,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
553 | u64 ref_root; | 546 | u64 ref_root; |
554 | u32 item_size; | 547 | u32 item_size; |
555 | u8 ref_level; | 548 | u8 ref_level; |
556 | const int bufsize = 4096; | ||
557 | int ret; | 549 | int ret; |
558 | 550 | ||
559 | WARN_ON(sblock->page_count < 1); | 551 | WARN_ON(sblock->page_count < 1); |
@@ -561,18 +553,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
561 | fs_info = sblock->sctx->dev_root->fs_info; | 553 | fs_info = sblock->sctx->dev_root->fs_info; |
562 | 554 | ||
563 | path = btrfs_alloc_path(); | 555 | path = btrfs_alloc_path(); |
556 | if (!path) | ||
557 | return; | ||
564 | 558 | ||
565 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); | ||
566 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); | ||
567 | swarn.sector = (sblock->pagev[0]->physical) >> 9; | 559 | swarn.sector = (sblock->pagev[0]->physical) >> 9; |
568 | swarn.logical = sblock->pagev[0]->logical; | 560 | swarn.logical = sblock->pagev[0]->logical; |
569 | swarn.errstr = errstr; | 561 | swarn.errstr = errstr; |
570 | swarn.dev = NULL; | 562 | swarn.dev = NULL; |
571 | swarn.msg_bufsize = bufsize; | ||
572 | swarn.scratch_bufsize = bufsize; | ||
573 | |||
574 | if (!path || !swarn.scratch_buf || !swarn.msg_buf) | ||
575 | goto out; | ||
576 | 563 | ||
577 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, | 564 | ret = extent_from_logical(fs_info, swarn.logical, path, &found_key, |
578 | &flags); | 565 | &flags); |
@@ -613,8 +600,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
613 | 600 | ||
614 | out: | 601 | out: |
615 | btrfs_free_path(path); | 602 | btrfs_free_path(path); |
616 | kfree(swarn.scratch_buf); | ||
617 | kfree(swarn.msg_buf); | ||
618 | } | 603 | } |
619 | 604 | ||
620 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | 605 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) |
@@ -681,9 +666,9 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) | |||
681 | ret = -EIO; | 666 | ret = -EIO; |
682 | goto out; | 667 | goto out; |
683 | } | 668 | } |
684 | fs_info = BTRFS_I(inode)->root->fs_info; | 669 | ret = repair_io_failure(inode, offset, PAGE_SIZE, |
685 | ret = repair_io_failure(fs_info, offset, PAGE_SIZE, | ||
686 | fixup->logical, page, | 670 | fixup->logical, page, |
671 | offset - page_offset(page), | ||
687 | fixup->mirror_num); | 672 | fixup->mirror_num); |
688 | unlock_page(page); | 673 | unlock_page(page); |
689 | corrected = !ret; | 674 | corrected = !ret; |
@@ -1361,6 +1346,16 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
1361 | return; | 1346 | return; |
1362 | } | 1347 | } |
1363 | 1348 | ||
1349 | static inline int scrub_check_fsid(u8 fsid[], | ||
1350 | struct scrub_page *spage) | ||
1351 | { | ||
1352 | struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices; | ||
1353 | int ret; | ||
1354 | |||
1355 | ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE); | ||
1356 | return !ret; | ||
1357 | } | ||
1358 | |||
1364 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 1359 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
1365 | struct scrub_block *sblock, | 1360 | struct scrub_block *sblock, |
1366 | int is_metadata, int have_csum, | 1361 | int is_metadata, int have_csum, |
@@ -1380,7 +1375,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
1380 | h = (struct btrfs_header *)mapped_buffer; | 1375 | h = (struct btrfs_header *)mapped_buffer; |
1381 | 1376 | ||
1382 | if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || | 1377 | if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) || |
1383 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1378 | !scrub_check_fsid(h->fsid, sblock->pagev[0]) || |
1384 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1379 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
1385 | BTRFS_UUID_SIZE)) { | 1380 | BTRFS_UUID_SIZE)) { |
1386 | sblock->header_error = 1; | 1381 | sblock->header_error = 1; |
@@ -1751,14 +1746,13 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
1751 | if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) | 1746 | if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) |
1752 | ++fail; | 1747 | ++fail; |
1753 | 1748 | ||
1754 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1749 | if (!scrub_check_fsid(h->fsid, sblock->pagev[0])) |
1755 | ++fail; | 1750 | ++fail; |
1756 | 1751 | ||
1757 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1752 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
1758 | BTRFS_UUID_SIZE)) | 1753 | BTRFS_UUID_SIZE)) |
1759 | ++fail; | 1754 | ++fail; |
1760 | 1755 | ||
1761 | WARN_ON(sctx->nodesize != sctx->leafsize); | ||
1762 | len = sctx->nodesize - BTRFS_CSUM_SIZE; | 1756 | len = sctx->nodesize - BTRFS_CSUM_SIZE; |
1763 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1757 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
1764 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; | 1758 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
@@ -1791,8 +1785,6 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1791 | { | 1785 | { |
1792 | struct btrfs_super_block *s; | 1786 | struct btrfs_super_block *s; |
1793 | struct scrub_ctx *sctx = sblock->sctx; | 1787 | struct scrub_ctx *sctx = sblock->sctx; |
1794 | struct btrfs_root *root = sctx->dev_root; | ||
1795 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1796 | u8 calculated_csum[BTRFS_CSUM_SIZE]; | 1788 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
1797 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; | 1789 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
1798 | struct page *page; | 1790 | struct page *page; |
@@ -1817,7 +1809,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
1817 | if (sblock->pagev[0]->generation != btrfs_super_generation(s)) | 1809 | if (sblock->pagev[0]->generation != btrfs_super_generation(s)) |
1818 | ++fail_gen; | 1810 | ++fail_gen; |
1819 | 1811 | ||
1820 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1812 | if (!scrub_check_fsid(s->fsid, sblock->pagev[0])) |
1821 | ++fail_cor; | 1813 | ++fail_cor; |
1822 | 1814 | ||
1823 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; | 1815 | len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE; |
@@ -2196,7 +2188,6 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
2196 | sctx->stat.data_bytes_scrubbed += len; | 2188 | sctx->stat.data_bytes_scrubbed += len; |
2197 | spin_unlock(&sctx->stat_lock); | 2189 | spin_unlock(&sctx->stat_lock); |
2198 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2190 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
2199 | WARN_ON(sctx->nodesize != sctx->leafsize); | ||
2200 | blocksize = sctx->nodesize; | 2191 | blocksize = sctx->nodesize; |
2201 | spin_lock(&sctx->stat_lock); | 2192 | spin_lock(&sctx->stat_lock); |
2202 | sctx->stat.tree_extents_scrubbed++; | 2193 | sctx->stat.tree_extents_scrubbed++; |
@@ -2487,7 +2478,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2487 | btrfs_item_key_to_cpu(l, &key, slot); | 2478 | btrfs_item_key_to_cpu(l, &key, slot); |
2488 | 2479 | ||
2489 | if (key.type == BTRFS_METADATA_ITEM_KEY) | 2480 | if (key.type == BTRFS_METADATA_ITEM_KEY) |
2490 | bytes = root->leafsize; | 2481 | bytes = root->nodesize; |
2491 | else | 2482 | else |
2492 | bytes = key.offset; | 2483 | bytes = key.offset; |
2493 | 2484 | ||
@@ -2714,7 +2705,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
2714 | if (found_key.objectid != scrub_dev->devid) | 2705 | if (found_key.objectid != scrub_dev->devid) |
2715 | break; | 2706 | break; |
2716 | 2707 | ||
2717 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) | 2708 | if (found_key.type != BTRFS_DEV_EXTENT_KEY) |
2718 | break; | 2709 | break; |
2719 | 2710 | ||
2720 | if (found_key.offset >= end) | 2711 | if (found_key.offset >= end) |
@@ -2828,11 +2819,16 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, | |||
2828 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 2819 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
2829 | return -EIO; | 2820 | return -EIO; |
2830 | 2821 | ||
2831 | gen = root->fs_info->last_trans_committed; | 2822 | /* Seed devices of a new filesystem has their own generation. */ |
2823 | if (scrub_dev->fs_devices != root->fs_info->fs_devices) | ||
2824 | gen = scrub_dev->generation; | ||
2825 | else | ||
2826 | gen = root->fs_info->last_trans_committed; | ||
2832 | 2827 | ||
2833 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | 2828 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { |
2834 | bytenr = btrfs_sb_offset(i); | 2829 | bytenr = btrfs_sb_offset(i); |
2835 | if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes) | 2830 | if (bytenr + BTRFS_SUPER_INFO_SIZE > |
2831 | scrub_dev->commit_total_bytes) | ||
2836 | break; | 2832 | break; |
2837 | 2833 | ||
2838 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, | 2834 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, |
@@ -2910,17 +2906,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
2910 | if (btrfs_fs_closing(fs_info)) | 2906 | if (btrfs_fs_closing(fs_info)) |
2911 | return -EINVAL; | 2907 | return -EINVAL; |
2912 | 2908 | ||
2913 | /* | ||
2914 | * check some assumptions | ||
2915 | */ | ||
2916 | if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { | ||
2917 | btrfs_err(fs_info, | ||
2918 | "scrub: size assumption nodesize == leafsize (%d == %d) fails", | ||
2919 | fs_info->chunk_root->nodesize, | ||
2920 | fs_info->chunk_root->leafsize); | ||
2921 | return -EINVAL; | ||
2922 | } | ||
2923 | |||
2924 | if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { | 2909 | if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { |
2925 | /* | 2910 | /* |
2926 | * in this case scrub is unable to calculate the checksum | 2911 | * in this case scrub is unable to calculate the checksum |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6528aa662181..874828dd0a86 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -515,7 +515,8 @@ static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) | |||
515 | set_fs(KERNEL_DS); | 515 | set_fs(KERNEL_DS); |
516 | 516 | ||
517 | while (pos < len) { | 517 | while (pos < len) { |
518 | ret = vfs_write(filp, (char *)buf + pos, len - pos, off); | 518 | ret = vfs_write(filp, (__force const char __user *)buf + pos, |
519 | len - pos, off); | ||
519 | /* TODO handle that correctly */ | 520 | /* TODO handle that correctly */ |
520 | /*if (ret == -ERESTARTSYS) { | 521 | /*if (ret == -ERESTARTSYS) { |
521 | continue; | 522 | continue; |
@@ -985,11 +986,13 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
985 | int num; | 986 | int num; |
986 | u8 type; | 987 | u8 type; |
987 | 988 | ||
988 | if (found_key->type == BTRFS_XATTR_ITEM_KEY) | 989 | /* |
989 | buf_len = BTRFS_MAX_XATTR_SIZE(root); | 990 | * Start with a small buffer (1 page). If later we end up needing more |
990 | else | 991 | * space, which can happen for xattrs on a fs with a leaf size greater |
991 | buf_len = PATH_MAX; | 992 | * then the page size, attempt to increase the buffer. Typically xattr |
992 | 993 | * values are small. | |
994 | */ | ||
995 | buf_len = PATH_MAX; | ||
993 | buf = kmalloc(buf_len, GFP_NOFS); | 996 | buf = kmalloc(buf_len, GFP_NOFS); |
994 | if (!buf) { | 997 | if (!buf) { |
995 | ret = -ENOMEM; | 998 | ret = -ENOMEM; |
@@ -1016,7 +1019,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1016 | ret = -ENAMETOOLONG; | 1019 | ret = -ENAMETOOLONG; |
1017 | goto out; | 1020 | goto out; |
1018 | } | 1021 | } |
1019 | if (name_len + data_len > buf_len) { | 1022 | if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { |
1020 | ret = -E2BIG; | 1023 | ret = -E2BIG; |
1021 | goto out; | 1024 | goto out; |
1022 | } | 1025 | } |
@@ -1024,12 +1027,34 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1024 | /* | 1027 | /* |
1025 | * Path too long | 1028 | * Path too long |
1026 | */ | 1029 | */ |
1027 | if (name_len + data_len > buf_len) { | 1030 | if (name_len + data_len > PATH_MAX) { |
1028 | ret = -ENAMETOOLONG; | 1031 | ret = -ENAMETOOLONG; |
1029 | goto out; | 1032 | goto out; |
1030 | } | 1033 | } |
1031 | } | 1034 | } |
1032 | 1035 | ||
1036 | if (name_len + data_len > buf_len) { | ||
1037 | buf_len = name_len + data_len; | ||
1038 | if (is_vmalloc_addr(buf)) { | ||
1039 | vfree(buf); | ||
1040 | buf = NULL; | ||
1041 | } else { | ||
1042 | char *tmp = krealloc(buf, buf_len, | ||
1043 | GFP_NOFS | __GFP_NOWARN); | ||
1044 | |||
1045 | if (!tmp) | ||
1046 | kfree(buf); | ||
1047 | buf = tmp; | ||
1048 | } | ||
1049 | if (!buf) { | ||
1050 | buf = vmalloc(buf_len); | ||
1051 | if (!buf) { | ||
1052 | ret = -ENOMEM; | ||
1053 | goto out; | ||
1054 | } | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1033 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1058 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
1034 | name_len + data_len); | 1059 | name_len + data_len); |
1035 | 1060 | ||
@@ -1050,7 +1075,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
1050 | } | 1075 | } |
1051 | 1076 | ||
1052 | out: | 1077 | out: |
1053 | kfree(buf); | 1078 | kvfree(buf); |
1054 | return ret; | 1079 | return ret; |
1055 | } | 1080 | } |
1056 | 1081 | ||
@@ -3302,7 +3327,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3302 | if (ret < 0 && ret != -ENOENT) { | 3327 | if (ret < 0 && ret != -ENOENT) { |
3303 | goto out; | 3328 | goto out; |
3304 | } else if (ret == -ENOENT) { | 3329 | } else if (ret == -ENOENT) { |
3305 | ret = 1; | 3330 | ret = 0; |
3306 | break; | 3331 | break; |
3307 | } | 3332 | } |
3308 | 3333 | ||
@@ -5703,7 +5728,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5703 | NULL); | 5728 | NULL); |
5704 | sort_clone_roots = 1; | 5729 | sort_clone_roots = 1; |
5705 | 5730 | ||
5706 | current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; | 5731 | current->journal_info = BTRFS_SEND_TRANS_STUB; |
5707 | ret = send_subvol(sctx); | 5732 | ret = send_subvol(sctx); |
5708 | current->journal_info = NULL; | 5733 | current->journal_info = NULL; |
5709 | if (ret < 0) | 5734 | if (ret < 0) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c4124de4435b..a2b97ef10317 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "backref.h" | 60 | #include "backref.h" |
61 | #include "tests/btrfs-tests.h" | 61 | #include "tests/btrfs-tests.h" |
62 | 62 | ||
63 | #include "qgroup.h" | ||
63 | #define CREATE_TRACE_POINTS | 64 | #define CREATE_TRACE_POINTS |
64 | #include <trace/events/btrfs.h> | 65 | #include <trace/events/btrfs.h> |
65 | 66 | ||
@@ -307,13 +308,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | |||
307 | 308 | ||
308 | static void btrfs_put_super(struct super_block *sb) | 309 | static void btrfs_put_super(struct super_block *sb) |
309 | { | 310 | { |
310 | (void)close_ctree(btrfs_sb(sb)->tree_root); | 311 | close_ctree(btrfs_sb(sb)->tree_root); |
311 | /* FIXME: need to fix VFS to return error? */ | ||
312 | /* AV: return it _where_? ->put_super() can be triggered by any number | ||
313 | * of async events, up to and including delivery of SIGKILL to the | ||
314 | * last process that kept it busy. Or segfault in the aforementioned | ||
315 | * process... Whom would you report that to? | ||
316 | */ | ||
317 | } | 312 | } |
318 | 313 | ||
319 | enum { | 314 | enum { |
@@ -400,7 +395,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
400 | int ret = 0; | 395 | int ret = 0; |
401 | char *compress_type; | 396 | char *compress_type; |
402 | bool compress_force = false; | 397 | bool compress_force = false; |
403 | bool compress = false; | ||
404 | 398 | ||
405 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); | 399 | cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); |
406 | if (cache_gen) | 400 | if (cache_gen) |
@@ -478,7 +472,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
478 | /* Fallthrough */ | 472 | /* Fallthrough */ |
479 | case Opt_compress: | 473 | case Opt_compress: |
480 | case Opt_compress_type: | 474 | case Opt_compress_type: |
481 | compress = true; | ||
482 | if (token == Opt_compress || | 475 | if (token == Opt_compress || |
483 | token == Opt_compress_force || | 476 | token == Opt_compress_force || |
484 | strcmp(args[0].from, "zlib") == 0) { | 477 | strcmp(args[0].from, "zlib") == 0) { |
@@ -508,11 +501,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
508 | btrfs_set_and_info(root, FORCE_COMPRESS, | 501 | btrfs_set_and_info(root, FORCE_COMPRESS, |
509 | "force %s compression", | 502 | "force %s compression", |
510 | compress_type); | 503 | compress_type); |
511 | } else if (compress) { | 504 | } else { |
512 | if (!btrfs_test_opt(root, COMPRESS)) | 505 | if (!btrfs_test_opt(root, COMPRESS)) |
513 | btrfs_info(root->fs_info, | 506 | btrfs_info(root->fs_info, |
514 | "btrfs: use %s compression", | 507 | "btrfs: use %s compression", |
515 | compress_type); | 508 | compress_type); |
509 | /* | ||
510 | * If we remount from compress-force=xxx to | ||
511 | * compress=xxx, we need clear FORCE_COMPRESS | ||
512 | * flag, otherwise, there is no way for users | ||
513 | * to disable forcible compression separately. | ||
514 | */ | ||
515 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | ||
516 | } | 516 | } |
517 | break; | 517 | break; |
518 | case Opt_ssd: | 518 | case Opt_ssd: |
@@ -1014,7 +1014,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1014 | seq_puts(seq, ",nodatacow"); | 1014 | seq_puts(seq, ",nodatacow"); |
1015 | if (btrfs_test_opt(root, NOBARRIER)) | 1015 | if (btrfs_test_opt(root, NOBARRIER)) |
1016 | seq_puts(seq, ",nobarrier"); | 1016 | seq_puts(seq, ",nobarrier"); |
1017 | if (info->max_inline != 8192 * 1024) | 1017 | if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) |
1018 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | 1018 | seq_printf(seq, ",max_inline=%llu", info->max_inline); |
1019 | if (info->alloc_start != 0) | 1019 | if (info->alloc_start != 0) |
1020 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | 1020 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); |
@@ -1215,6 +1215,56 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, | |||
1215 | return root; | 1215 | return root; |
1216 | } | 1216 | } |
1217 | 1217 | ||
1218 | static int parse_security_options(char *orig_opts, | ||
1219 | struct security_mnt_opts *sec_opts) | ||
1220 | { | ||
1221 | char *secdata = NULL; | ||
1222 | int ret = 0; | ||
1223 | |||
1224 | secdata = alloc_secdata(); | ||
1225 | if (!secdata) | ||
1226 | return -ENOMEM; | ||
1227 | ret = security_sb_copy_data(orig_opts, secdata); | ||
1228 | if (ret) { | ||
1229 | free_secdata(secdata); | ||
1230 | return ret; | ||
1231 | } | ||
1232 | ret = security_sb_parse_opts_str(secdata, sec_opts); | ||
1233 | free_secdata(secdata); | ||
1234 | return ret; | ||
1235 | } | ||
1236 | |||
1237 | static int setup_security_options(struct btrfs_fs_info *fs_info, | ||
1238 | struct super_block *sb, | ||
1239 | struct security_mnt_opts *sec_opts) | ||
1240 | { | ||
1241 | int ret = 0; | ||
1242 | |||
1243 | /* | ||
1244 | * Call security_sb_set_mnt_opts() to check whether new sec_opts | ||
1245 | * is valid. | ||
1246 | */ | ||
1247 | ret = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL); | ||
1248 | if (ret) | ||
1249 | return ret; | ||
1250 | |||
1251 | #ifdef CONFIG_SECURITY | ||
1252 | if (!fs_info->security_opts.num_mnt_opts) { | ||
1253 | /* first time security setup, copy sec_opts to fs_info */ | ||
1254 | memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); | ||
1255 | } else { | ||
1256 | /* | ||
1257 | * Since SELinux(the only one supports security_mnt_opts) does | ||
1258 | * NOT support changing context during remount/mount same sb, | ||
1259 | * This must be the same or part of the same security options, | ||
1260 | * just free it. | ||
1261 | */ | ||
1262 | security_free_mnt_opts(sec_opts); | ||
1263 | } | ||
1264 | #endif | ||
1265 | return ret; | ||
1266 | } | ||
1267 | |||
1218 | /* | 1268 | /* |
1219 | * Find a superblock for the given device / mount point. | 1269 | * Find a superblock for the given device / mount point. |
1220 | * | 1270 | * |
@@ -1229,6 +1279,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1229 | struct dentry *root; | 1279 | struct dentry *root; |
1230 | struct btrfs_fs_devices *fs_devices = NULL; | 1280 | struct btrfs_fs_devices *fs_devices = NULL; |
1231 | struct btrfs_fs_info *fs_info = NULL; | 1281 | struct btrfs_fs_info *fs_info = NULL; |
1282 | struct security_mnt_opts new_sec_opts; | ||
1232 | fmode_t mode = FMODE_READ; | 1283 | fmode_t mode = FMODE_READ; |
1233 | char *subvol_name = NULL; | 1284 | char *subvol_name = NULL; |
1234 | u64 subvol_objectid = 0; | 1285 | u64 subvol_objectid = 0; |
@@ -1251,9 +1302,16 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1251 | return root; | 1302 | return root; |
1252 | } | 1303 | } |
1253 | 1304 | ||
1305 | security_init_mnt_opts(&new_sec_opts); | ||
1306 | if (data) { | ||
1307 | error = parse_security_options(data, &new_sec_opts); | ||
1308 | if (error) | ||
1309 | return ERR_PTR(error); | ||
1310 | } | ||
1311 | |||
1254 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); | 1312 | error = btrfs_scan_one_device(device_name, mode, fs_type, &fs_devices); |
1255 | if (error) | 1313 | if (error) |
1256 | return ERR_PTR(error); | 1314 | goto error_sec_opts; |
1257 | 1315 | ||
1258 | /* | 1316 | /* |
1259 | * Setup a dummy root and fs_info for test/set super. This is because | 1317 | * Setup a dummy root and fs_info for test/set super. This is because |
@@ -1262,13 +1320,16 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1262 | * then open_ctree will properly initialize everything later. | 1320 | * then open_ctree will properly initialize everything later. |
1263 | */ | 1321 | */ |
1264 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | 1322 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); |
1265 | if (!fs_info) | 1323 | if (!fs_info) { |
1266 | return ERR_PTR(-ENOMEM); | 1324 | error = -ENOMEM; |
1325 | goto error_sec_opts; | ||
1326 | } | ||
1267 | 1327 | ||
1268 | fs_info->fs_devices = fs_devices; | 1328 | fs_info->fs_devices = fs_devices; |
1269 | 1329 | ||
1270 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 1330 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
1271 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 1331 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
1332 | security_init_mnt_opts(&fs_info->security_opts); | ||
1272 | if (!fs_info->super_copy || !fs_info->super_for_commit) { | 1333 | if (!fs_info->super_copy || !fs_info->super_for_commit) { |
1273 | error = -ENOMEM; | 1334 | error = -ENOMEM; |
1274 | goto error_fs_info; | 1335 | goto error_fs_info; |
@@ -1306,8 +1367,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
1306 | } | 1367 | } |
1307 | 1368 | ||
1308 | root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); | 1369 | root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error); |
1309 | if (IS_ERR(root)) | 1370 | if (IS_ERR(root)) { |
1371 | deactivate_locked_super(s); | ||
1372 | error = PTR_ERR(root); | ||
1373 | goto error_sec_opts; | ||
1374 | } | ||
1375 | |||
1376 | fs_info = btrfs_sb(s); | ||
1377 | error = setup_security_options(fs_info, s, &new_sec_opts); | ||
1378 | if (error) { | ||
1379 | dput(root); | ||
1310 | deactivate_locked_super(s); | 1380 | deactivate_locked_super(s); |
1381 | goto error_sec_opts; | ||
1382 | } | ||
1311 | 1383 | ||
1312 | return root; | 1384 | return root; |
1313 | 1385 | ||
@@ -1315,6 +1387,8 @@ error_close_devices: | |||
1315 | btrfs_close_devices(fs_devices); | 1387 | btrfs_close_devices(fs_devices); |
1316 | error_fs_info: | 1388 | error_fs_info: |
1317 | free_fs_info(fs_info); | 1389 | free_fs_info(fs_info); |
1390 | error_sec_opts: | ||
1391 | security_free_mnt_opts(&new_sec_opts); | ||
1318 | return ERR_PTR(error); | 1392 | return ERR_PTR(error); |
1319 | } | 1393 | } |
1320 | 1394 | ||
@@ -1396,6 +1470,21 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1396 | sync_filesystem(sb); | 1470 | sync_filesystem(sb); |
1397 | btrfs_remount_prepare(fs_info); | 1471 | btrfs_remount_prepare(fs_info); |
1398 | 1472 | ||
1473 | if (data) { | ||
1474 | struct security_mnt_opts new_sec_opts; | ||
1475 | |||
1476 | security_init_mnt_opts(&new_sec_opts); | ||
1477 | ret = parse_security_options(data, &new_sec_opts); | ||
1478 | if (ret) | ||
1479 | goto restore; | ||
1480 | ret = setup_security_options(fs_info, sb, | ||
1481 | &new_sec_opts); | ||
1482 | if (ret) { | ||
1483 | security_free_mnt_opts(&new_sec_opts); | ||
1484 | goto restore; | ||
1485 | } | ||
1486 | } | ||
1487 | |||
1399 | ret = btrfs_parse_options(root, data); | 1488 | ret = btrfs_parse_options(root, data); |
1400 | if (ret) { | 1489 | if (ret) { |
1401 | ret = -EINVAL; | 1490 | ret = -EINVAL; |
@@ -1694,7 +1783,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1694 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 1783 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
1695 | int ret; | 1784 | int ret; |
1696 | 1785 | ||
1697 | /* holding chunk_muext to avoid allocating new chunks */ | 1786 | /* |
1787 | * holding chunk_muext to avoid allocating new chunks, holding | ||
1788 | * device_list_mutex to avoid the device being removed | ||
1789 | */ | ||
1790 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
1698 | mutex_lock(&fs_info->chunk_mutex); | 1791 | mutex_lock(&fs_info->chunk_mutex); |
1699 | rcu_read_lock(); | 1792 | rcu_read_lock(); |
1700 | list_for_each_entry_rcu(found, head, list) { | 1793 | list_for_each_entry_rcu(found, head, list) { |
@@ -1735,11 +1828,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1735 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); | 1828 | ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); |
1736 | if (ret) { | 1829 | if (ret) { |
1737 | mutex_unlock(&fs_info->chunk_mutex); | 1830 | mutex_unlock(&fs_info->chunk_mutex); |
1831 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1738 | return ret; | 1832 | return ret; |
1739 | } | 1833 | } |
1740 | buf->f_bavail += div_u64(total_free_data, factor); | 1834 | buf->f_bavail += div_u64(total_free_data, factor); |
1741 | buf->f_bavail = buf->f_bavail >> bits; | 1835 | buf->f_bavail = buf->f_bavail >> bits; |
1742 | mutex_unlock(&fs_info->chunk_mutex); | 1836 | mutex_unlock(&fs_info->chunk_mutex); |
1837 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1743 | 1838 | ||
1744 | buf->f_type = BTRFS_SUPER_MAGIC; | 1839 | buf->f_type = BTRFS_SUPER_MAGIC; |
1745 | buf->f_bsize = dentry->d_sb->s_blocksize; | 1840 | buf->f_bsize = dentry->d_sb->s_blocksize; |
@@ -1769,7 +1864,7 @@ static struct file_system_type btrfs_fs_type = { | |||
1769 | .name = "btrfs", | 1864 | .name = "btrfs", |
1770 | .mount = btrfs_mount, | 1865 | .mount = btrfs_mount, |
1771 | .kill_sb = btrfs_kill_super, | 1866 | .kill_sb = btrfs_kill_super, |
1772 | .fs_flags = FS_REQUIRES_DEV, | 1867 | .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, |
1773 | }; | 1868 | }; |
1774 | MODULE_ALIAS_FS("btrfs"); | 1869 | MODULE_ALIAS_FS("btrfs"); |
1775 | 1870 | ||
@@ -1993,11 +2088,15 @@ static int __init init_btrfs_fs(void) | |||
1993 | 2088 | ||
1994 | err = btrfs_prelim_ref_init(); | 2089 | err = btrfs_prelim_ref_init(); |
1995 | if (err) | 2090 | if (err) |
2091 | goto free_delayed_ref; | ||
2092 | |||
2093 | err = btrfs_end_io_wq_init(); | ||
2094 | if (err) | ||
1996 | goto free_prelim_ref; | 2095 | goto free_prelim_ref; |
1997 | 2096 | ||
1998 | err = btrfs_interface_init(); | 2097 | err = btrfs_interface_init(); |
1999 | if (err) | 2098 | if (err) |
2000 | goto free_delayed_ref; | 2099 | goto free_end_io_wq; |
2001 | 2100 | ||
2002 | btrfs_init_lockdep(); | 2101 | btrfs_init_lockdep(); |
2003 | 2102 | ||
@@ -2015,6 +2114,8 @@ static int __init init_btrfs_fs(void) | |||
2015 | 2114 | ||
2016 | unregister_ioctl: | 2115 | unregister_ioctl: |
2017 | btrfs_interface_exit(); | 2116 | btrfs_interface_exit(); |
2117 | free_end_io_wq: | ||
2118 | btrfs_end_io_wq_exit(); | ||
2018 | free_prelim_ref: | 2119 | free_prelim_ref: |
2019 | btrfs_prelim_ref_exit(); | 2120 | btrfs_prelim_ref_exit(); |
2020 | free_delayed_ref: | 2121 | free_delayed_ref: |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 12e53556e214..b2e7bb4393f6 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -242,7 +242,7 @@ static ssize_t global_rsv_size_show(struct kobject *kobj, | |||
242 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 242 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
243 | return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); | 243 | return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); |
244 | } | 244 | } |
245 | BTRFS_ATTR(global_rsv_size, 0444, global_rsv_size_show); | 245 | BTRFS_ATTR(global_rsv_size, global_rsv_size_show); |
246 | 246 | ||
247 | static ssize_t global_rsv_reserved_show(struct kobject *kobj, | 247 | static ssize_t global_rsv_reserved_show(struct kobject *kobj, |
248 | struct kobj_attribute *a, char *buf) | 248 | struct kobj_attribute *a, char *buf) |
@@ -251,7 +251,7 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj, | |||
251 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 251 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
252 | return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); | 252 | return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); |
253 | } | 253 | } |
254 | BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show); | 254 | BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show); |
255 | 255 | ||
256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) | 256 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) |
257 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) | 257 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) |
@@ -306,7 +306,7 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \ | |||
306 | struct btrfs_space_info *sinfo = to_space_info(kobj); \ | 306 | struct btrfs_space_info *sinfo = to_space_info(kobj); \ |
307 | return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ | 307 | return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ |
308 | } \ | 308 | } \ |
309 | BTRFS_ATTR(field, 0444, btrfs_space_info_show_##field) | 309 | BTRFS_ATTR(field, btrfs_space_info_show_##field) |
310 | 310 | ||
311 | static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, | 311 | static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, |
312 | struct kobj_attribute *a, | 312 | struct kobj_attribute *a, |
@@ -325,7 +325,7 @@ SPACE_INFO_ATTR(bytes_reserved); | |||
325 | SPACE_INFO_ATTR(bytes_may_use); | 325 | SPACE_INFO_ATTR(bytes_may_use); |
326 | SPACE_INFO_ATTR(disk_used); | 326 | SPACE_INFO_ATTR(disk_used); |
327 | SPACE_INFO_ATTR(disk_total); | 327 | SPACE_INFO_ATTR(disk_total); |
328 | BTRFS_ATTR(total_bytes_pinned, 0444, btrfs_space_info_show_total_bytes_pinned); | 328 | BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned); |
329 | 329 | ||
330 | static struct attribute *space_info_attrs[] = { | 330 | static struct attribute *space_info_attrs[] = { |
331 | BTRFS_ATTR_PTR(flags), | 331 | BTRFS_ATTR_PTR(flags), |
@@ -363,7 +363,8 @@ static ssize_t btrfs_label_show(struct kobject *kobj, | |||
363 | struct kobj_attribute *a, char *buf) | 363 | struct kobj_attribute *a, char *buf) |
364 | { | 364 | { |
365 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); | 365 | struct btrfs_fs_info *fs_info = to_fs_info(kobj); |
366 | return snprintf(buf, PAGE_SIZE, "%s\n", fs_info->super_copy->label); | 366 | char *label = fs_info->super_copy->label; |
367 | return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label); | ||
367 | } | 368 | } |
368 | 369 | ||
369 | static ssize_t btrfs_label_store(struct kobject *kobj, | 370 | static ssize_t btrfs_label_store(struct kobject *kobj, |
@@ -374,8 +375,18 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
374 | struct btrfs_trans_handle *trans; | 375 | struct btrfs_trans_handle *trans; |
375 | struct btrfs_root *root = fs_info->fs_root; | 376 | struct btrfs_root *root = fs_info->fs_root; |
376 | int ret; | 377 | int ret; |
378 | size_t p_len; | ||
377 | 379 | ||
378 | if (len >= BTRFS_LABEL_SIZE) | 380 | if (fs_info->sb->s_flags & MS_RDONLY) |
381 | return -EROFS; | ||
382 | |||
383 | /* | ||
384 | * p_len is the len until the first occurrence of either | ||
385 | * '\n' or '\0' | ||
386 | */ | ||
387 | p_len = strcspn(buf, "\n"); | ||
388 | |||
389 | if (p_len >= BTRFS_LABEL_SIZE) | ||
379 | return -EINVAL; | 390 | return -EINVAL; |
380 | 391 | ||
381 | trans = btrfs_start_transaction(root, 0); | 392 | trans = btrfs_start_transaction(root, 0); |
@@ -383,7 +394,8 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
383 | return PTR_ERR(trans); | 394 | return PTR_ERR(trans); |
384 | 395 | ||
385 | spin_lock(&root->fs_info->super_lock); | 396 | spin_lock(&root->fs_info->super_lock); |
386 | strcpy(fs_info->super_copy->label, buf); | 397 | memset(fs_info->super_copy->label, 0, BTRFS_LABEL_SIZE); |
398 | memcpy(fs_info->super_copy->label, buf, p_len); | ||
387 | spin_unlock(&root->fs_info->super_lock); | 399 | spin_unlock(&root->fs_info->super_lock); |
388 | ret = btrfs_commit_transaction(trans, root); | 400 | ret = btrfs_commit_transaction(trans, root); |
389 | 401 | ||
@@ -392,14 +404,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
392 | 404 | ||
393 | return ret; | 405 | return ret; |
394 | } | 406 | } |
395 | BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store); | 407 | BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store); |
396 | |||
397 | static ssize_t btrfs_no_store(struct kobject *kobj, | ||
398 | struct kobj_attribute *a, | ||
399 | const char *buf, size_t len) | ||
400 | { | ||
401 | return -EPERM; | ||
402 | } | ||
403 | 408 | ||
404 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, | 409 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, |
405 | struct kobj_attribute *a, char *buf) | 410 | struct kobj_attribute *a, char *buf) |
@@ -409,7 +414,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, | |||
409 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); | 414 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); |
410 | } | 415 | } |
411 | 416 | ||
412 | BTRFS_ATTR_RW(nodesize, 0444, btrfs_nodesize_show, btrfs_no_store); | 417 | BTRFS_ATTR(nodesize, btrfs_nodesize_show); |
413 | 418 | ||
414 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | 419 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, |
415 | struct kobj_attribute *a, char *buf) | 420 | struct kobj_attribute *a, char *buf) |
@@ -419,7 +424,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | |||
419 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | 424 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); |
420 | } | 425 | } |
421 | 426 | ||
422 | BTRFS_ATTR_RW(sectorsize, 0444, btrfs_sectorsize_show, btrfs_no_store); | 427 | BTRFS_ATTR(sectorsize, btrfs_sectorsize_show); |
423 | 428 | ||
424 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | 429 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, |
425 | struct kobj_attribute *a, char *buf) | 430 | struct kobj_attribute *a, char *buf) |
@@ -429,7 +434,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | |||
429 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); | 434 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize); |
430 | } | 435 | } |
431 | 436 | ||
432 | BTRFS_ATTR_RW(clone_alignment, 0444, btrfs_clone_alignment_show, btrfs_no_store); | 437 | BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); |
433 | 438 | ||
434 | static struct attribute *btrfs_attrs[] = { | 439 | static struct attribute *btrfs_attrs[] = { |
435 | BTRFS_ATTR_PTR(label), | 440 | BTRFS_ATTR_PTR(label), |
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index ac46df37504c..f7dd298b3cf6 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -20,16 +20,20 @@ enum btrfs_feature_set { | |||
20 | .store = _store, \ | 20 | .store = _store, \ |
21 | } | 21 | } |
22 | 22 | ||
23 | #define BTRFS_ATTR_RW(_name, _mode, _show, _store) \ | 23 | #define BTRFS_ATTR_RW(_name, _show, _store) \ |
24 | static struct kobj_attribute btrfs_attr_##_name = \ | 24 | static struct kobj_attribute btrfs_attr_##_name = \ |
25 | __INIT_KOBJ_ATTR(_name, _mode, _show, _store) | 25 | __INIT_KOBJ_ATTR(_name, 0644, _show, _store) |
26 | #define BTRFS_ATTR(_name, _mode, _show) \ | 26 | |
27 | BTRFS_ATTR_RW(_name, _mode, _show, NULL) | 27 | #define BTRFS_ATTR(_name, _show) \ |
28 | static struct kobj_attribute btrfs_attr_##_name = \ | ||
29 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) | ||
30 | |||
28 | #define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr) | 31 | #define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr) |
29 | 32 | ||
30 | #define BTRFS_RAID_ATTR(_name, _show) \ | 33 | #define BTRFS_RAID_ATTR(_name, _show) \ |
31 | static struct kobj_attribute btrfs_raid_attr_##_name = \ | 34 | static struct kobj_attribute btrfs_raid_attr_##_name = \ |
32 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) | 35 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) |
36 | |||
33 | #define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr) | 37 | #define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr) |
34 | 38 | ||
35 | 39 | ||
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c8d9ddf84c69..2299bfde39ee 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c | |||
@@ -40,11 +40,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void) | |||
40 | cache->key.offset = 1024 * 1024 * 1024; | 40 | cache->key.offset = 1024 * 1024 * 1024; |
41 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 41 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
42 | cache->sectorsize = 4096; | 42 | cache->sectorsize = 4096; |
43 | cache->full_stripe_len = 4096; | ||
43 | 44 | ||
44 | spin_lock_init(&cache->lock); | 45 | spin_lock_init(&cache->lock); |
45 | INIT_LIST_HEAD(&cache->list); | 46 | INIT_LIST_HEAD(&cache->list); |
46 | INIT_LIST_HEAD(&cache->cluster_list); | 47 | INIT_LIST_HEAD(&cache->cluster_list); |
47 | INIT_LIST_HEAD(&cache->new_bg_list); | 48 | INIT_LIST_HEAD(&cache->bg_list); |
48 | 49 | ||
49 | btrfs_init_free_space_ctl(cache); | 50 | btrfs_init_free_space_ctl(cache); |
50 | 51 | ||
@@ -364,6 +365,517 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) | |||
364 | return 0; | 365 | return 0; |
365 | } | 366 | } |
366 | 367 | ||
368 | /* Used by test_steal_space_from_bitmap_to_extent(). */ | ||
369 | static bool test_use_bitmap(struct btrfs_free_space_ctl *ctl, | ||
370 | struct btrfs_free_space *info) | ||
371 | { | ||
372 | return ctl->free_extents > 0; | ||
373 | } | ||
374 | |||
375 | /* Used by test_steal_space_from_bitmap_to_extent(). */ | ||
376 | static int | ||
377 | check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache, | ||
378 | const int num_extents, | ||
379 | const int num_bitmaps) | ||
380 | { | ||
381 | if (cache->free_space_ctl->free_extents != num_extents) { | ||
382 | test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n", | ||
383 | cache->free_space_ctl->free_extents, num_extents); | ||
384 | return -EINVAL; | ||
385 | } | ||
386 | if (cache->free_space_ctl->total_bitmaps != num_bitmaps) { | ||
387 | test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n", | ||
388 | cache->free_space_ctl->total_bitmaps, num_bitmaps); | ||
389 | return -EINVAL; | ||
390 | } | ||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | /* Used by test_steal_space_from_bitmap_to_extent(). */ | ||
395 | static int check_cache_empty(struct btrfs_block_group_cache *cache) | ||
396 | { | ||
397 | u64 offset; | ||
398 | u64 max_extent_size; | ||
399 | |||
400 | /* | ||
401 | * Now lets confirm that there's absolutely no free space left to | ||
402 | * allocate. | ||
403 | */ | ||
404 | if (cache->free_space_ctl->free_space != 0) { | ||
405 | test_msg("Cache free space is not 0\n"); | ||
406 | return -EINVAL; | ||
407 | } | ||
408 | |||
409 | /* And any allocation request, no matter how small, should fail now. */ | ||
410 | offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0, | ||
411 | &max_extent_size); | ||
412 | if (offset != 0) { | ||
413 | test_msg("Space allocation did not fail, returned offset: %llu", | ||
414 | offset); | ||
415 | return -EINVAL; | ||
416 | } | ||
417 | |||
418 | /* And no extent nor bitmap entries in the cache anymore. */ | ||
419 | return check_num_extents_and_bitmaps(cache, 0, 0); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Before we were able to steal free space from a bitmap entry to an extent | ||
424 | * entry, we could end up with 2 entries representing a contiguous free space. | ||
425 | * One would be an extent entry and the other a bitmap entry. Since in order | ||
426 | * to allocate space to a caller we use only 1 entry, we couldn't return that | ||
427 | * whole range to the caller if it was requested. This forced the caller to | ||
428 | * either assume ENOSPC or perform several smaller space allocations, which | ||
429 | * wasn't optimal as they could be spread all over the block group while under | ||
430 | * concurrency (extra overhead and fragmentation). | ||
431 | * | ||
432 | * This stealing approach is benefical, since we always prefer to allocate from | ||
433 | * extent entries, both for clustered and non-clustered allocation requests. | ||
434 | */ | ||
435 | static int | ||
436 | test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) | ||
437 | { | ||
438 | int ret; | ||
439 | u64 offset; | ||
440 | u64 max_extent_size; | ||
441 | |||
442 | bool (*use_bitmap_op)(struct btrfs_free_space_ctl *, | ||
443 | struct btrfs_free_space *); | ||
444 | |||
445 | test_msg("Running space stealing from bitmap to extent\n"); | ||
446 | |||
447 | /* | ||
448 | * For this test, we want to ensure we end up with an extent entry | ||
449 | * immediately adjacent to a bitmap entry, where the bitmap starts | ||
450 | * at an offset where the extent entry ends. We keep adding and | ||
451 | * removing free space to reach into this state, but to get there | ||
452 | * we need to reach a point where marking new free space doesn't | ||
453 | * result in adding new extent entries or merging the new space | ||
454 | * with existing extent entries - the space ends up being marked | ||
455 | * in an existing bitmap that covers the new free space range. | ||
456 | * | ||
457 | * To get there, we need to reach the threshold defined set at | ||
458 | * cache->free_space_ctl->extents_thresh, which currently is | ||
459 | * 256 extents on a x86_64 system at least, and a few other | ||
460 | * conditions (check free_space_cache.c). Instead of making the | ||
461 | * test much longer and complicated, use a "use_bitmap" operation | ||
462 | * that forces use of bitmaps as soon as we have at least 1 | ||
463 | * extent entry. | ||
464 | */ | ||
465 | use_bitmap_op = cache->free_space_ctl->op->use_bitmap; | ||
466 | cache->free_space_ctl->op->use_bitmap = test_use_bitmap; | ||
467 | |||
468 | /* | ||
469 | * Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[ | ||
470 | */ | ||
471 | ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 - 256 * 1024, | ||
472 | 128 * 1024, 0); | ||
473 | if (ret) { | ||
474 | test_msg("Couldn't add extent entry %d\n", ret); | ||
475 | return ret; | ||
476 | } | ||
477 | |||
478 | /* Bitmap entry covering free space range [128Mb + 512Kb, 256Mb[ */ | ||
479 | ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 512 * 1024, | ||
480 | 128 * 1024 * 1024 - 512 * 1024, 1); | ||
481 | if (ret) { | ||
482 | test_msg("Couldn't add bitmap entry %d\n", ret); | ||
483 | return ret; | ||
484 | } | ||
485 | |||
486 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
487 | if (ret) | ||
488 | return ret; | ||
489 | |||
490 | /* | ||
491 | * Now make only the first 256Kb of the bitmap marked as free, so that | ||
492 | * we end up with only the following ranges marked as free space: | ||
493 | * | ||
494 | * [128Mb - 256Kb, 128Mb - 128Kb[ | ||
495 | * [128Mb + 512Kb, 128Mb + 768Kb[ | ||
496 | */ | ||
497 | ret = btrfs_remove_free_space(cache, | ||
498 | 128 * 1024 * 1024 + 768 * 1024, | ||
499 | 128 * 1024 * 1024 - 768 * 1024); | ||
500 | if (ret) { | ||
501 | test_msg("Failed to free part of bitmap space %d\n", ret); | ||
502 | return ret; | ||
503 | } | ||
504 | |||
505 | /* Confirm that only those 2 ranges are marked as free. */ | ||
506 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024, | ||
507 | 128 * 1024)) { | ||
508 | test_msg("Free space range missing\n"); | ||
509 | return -ENOENT; | ||
510 | } | ||
511 | if (!test_check_exists(cache, 128 * 1024 * 1024 + 512 * 1024, | ||
512 | 256 * 1024)) { | ||
513 | test_msg("Free space range missing\n"); | ||
514 | return -ENOENT; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Confirm that the bitmap range [128Mb + 768Kb, 256Mb[ isn't marked | ||
519 | * as free anymore. | ||
520 | */ | ||
521 | if (test_check_exists(cache, 128 * 1024 * 1024 + 768 * 1024, | ||
522 | 128 * 1024 * 1024 - 768 * 1024)) { | ||
523 | test_msg("Bitmap region not removed from space cache\n"); | ||
524 | return -EINVAL; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * Confirm that the region [128Mb + 256Kb, 128Mb + 512Kb[, which is | ||
529 | * covered by the bitmap, isn't marked as free. | ||
530 | */ | ||
531 | if (test_check_exists(cache, 128 * 1024 * 1024 + 256 * 1024, | ||
532 | 256 * 1024)) { | ||
533 | test_msg("Invalid bitmap region marked as free\n"); | ||
534 | return -EINVAL; | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * Confirm that the region [128Mb, 128Mb + 256Kb[, which is covered | ||
539 | * by the bitmap too, isn't marked as free either. | ||
540 | */ | ||
541 | if (test_check_exists(cache, 128 * 1024 * 1024, | ||
542 | 256 * 1024)) { | ||
543 | test_msg("Invalid bitmap region marked as free\n"); | ||
544 | return -EINVAL; | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Now lets mark the region [128Mb, 128Mb + 512Kb[ as free too. But, | ||
549 | * lets make sure the free space cache marks it as free in the bitmap, | ||
550 | * and doesn't insert a new extent entry to represent this region. | ||
551 | */ | ||
552 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 512 * 1024); | ||
553 | if (ret) { | ||
554 | test_msg("Error adding free space: %d\n", ret); | ||
555 | return ret; | ||
556 | } | ||
557 | /* Confirm the region is marked as free. */ | ||
558 | if (!test_check_exists(cache, 128 * 1024 * 1024, 512 * 1024)) { | ||
559 | test_msg("Bitmap region not marked as free\n"); | ||
560 | return -ENOENT; | ||
561 | } | ||
562 | |||
563 | /* | ||
564 | * Confirm that no new extent entries or bitmap entries were added to | ||
565 | * the cache after adding that free space region. | ||
566 | */ | ||
567 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
568 | if (ret) | ||
569 | return ret; | ||
570 | |||
571 | /* | ||
572 | * Now lets add a small free space region to the right of the previous | ||
573 | * one, which is not contiguous with it and is part of the bitmap too. | ||
574 | * The goal is to test that the bitmap entry space stealing doesn't | ||
575 | * steal this space region. | ||
576 | */ | ||
577 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 + 16 * 1024 * 1024, | ||
578 | 4096); | ||
579 | if (ret) { | ||
580 | test_msg("Error adding free space: %d\n", ret); | ||
581 | return ret; | ||
582 | } | ||
583 | |||
584 | /* | ||
585 | * Confirm that no new extent entries or bitmap entries were added to | ||
586 | * the cache after adding that free space region. | ||
587 | */ | ||
588 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
589 | if (ret) | ||
590 | return ret; | ||
591 | |||
592 | /* | ||
593 | * Now mark the region [128Mb - 128Kb, 128Mb[ as free too. This will | ||
594 | * expand the range covered by the existing extent entry that represents | ||
595 | * the free space [128Mb - 256Kb, 128Mb - 128Kb[. | ||
596 | */ | ||
597 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 128 * 1024, | ||
598 | 128 * 1024); | ||
599 | if (ret) { | ||
600 | test_msg("Error adding free space: %d\n", ret); | ||
601 | return ret; | ||
602 | } | ||
603 | /* Confirm the region is marked as free. */ | ||
604 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 128 * 1024, | ||
605 | 128 * 1024)) { | ||
606 | test_msg("Extent region not marked as free\n"); | ||
607 | return -ENOENT; | ||
608 | } | ||
609 | |||
610 | /* | ||
611 | * Confirm that our extent entry didn't stole all free space from the | ||
612 | * bitmap, because of the small 4Kb free space region. | ||
613 | */ | ||
614 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
615 | if (ret) | ||
616 | return ret; | ||
617 | |||
618 | /* | ||
619 | * So now we have the range [128Mb - 256Kb, 128Mb + 768Kb[ as free | ||
620 | * space. Without stealing bitmap free space into extent entry space, | ||
621 | * we would have all this free space represented by 2 entries in the | ||
622 | * cache: | ||
623 | * | ||
624 | * extent entry covering range: [128Mb - 256Kb, 128Mb[ | ||
625 | * bitmap entry covering range: [128Mb, 128Mb + 768Kb[ | ||
626 | * | ||
627 | * Attempting to allocate the whole free space (1Mb) would fail, because | ||
628 | * we can't allocate from multiple entries. | ||
629 | * With the bitmap free space stealing, we get a single extent entry | ||
630 | * that represents the 1Mb free space, and therefore we're able to | ||
631 | * allocate the whole free space at once. | ||
632 | */ | ||
633 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024, | ||
634 | 1 * 1024 * 1024)) { | ||
635 | test_msg("Expected region not marked as free\n"); | ||
636 | return -ENOENT; | ||
637 | } | ||
638 | |||
639 | if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 4096)) { | ||
640 | test_msg("Cache free space is not 1Mb + 4Kb\n"); | ||
641 | return -EINVAL; | ||
642 | } | ||
643 | |||
644 | offset = btrfs_find_space_for_alloc(cache, | ||
645 | 0, 1 * 1024 * 1024, 0, | ||
646 | &max_extent_size); | ||
647 | if (offset != (128 * 1024 * 1024 - 256 * 1024)) { | ||
648 | test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n", | ||
649 | offset); | ||
650 | return -EINVAL; | ||
651 | } | ||
652 | |||
653 | /* All that remains is a 4Kb free space region in a bitmap. Confirm. */ | ||
654 | ret = check_num_extents_and_bitmaps(cache, 1, 1); | ||
655 | if (ret) | ||
656 | return ret; | ||
657 | |||
658 | if (cache->free_space_ctl->free_space != 4096) { | ||
659 | test_msg("Cache free space is not 4Kb\n"); | ||
660 | return -EINVAL; | ||
661 | } | ||
662 | |||
663 | offset = btrfs_find_space_for_alloc(cache, | ||
664 | 0, 4096, 0, | ||
665 | &max_extent_size); | ||
666 | if (offset != (128 * 1024 * 1024 + 16 * 1024 * 1024)) { | ||
667 | test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", | ||
668 | offset); | ||
669 | return -EINVAL; | ||
670 | } | ||
671 | |||
672 | ret = check_cache_empty(cache); | ||
673 | if (ret) | ||
674 | return ret; | ||
675 | |||
676 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | ||
677 | |||
678 | /* | ||
679 | * Now test a similar scenario, but where our extent entry is located | ||
680 | * to the right of the bitmap entry, so that we can check that stealing | ||
681 | * space from a bitmap to the front of an extent entry works. | ||
682 | */ | ||
683 | |||
684 | /* | ||
685 | * Extent entry covering free space range [128Mb + 128Kb, 128Mb + 256Kb[ | ||
686 | */ | ||
687 | ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 128 * 1024, | ||
688 | 128 * 1024, 0); | ||
689 | if (ret) { | ||
690 | test_msg("Couldn't add extent entry %d\n", ret); | ||
691 | return ret; | ||
692 | } | ||
693 | |||
694 | /* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */ | ||
695 | ret = test_add_free_space_entry(cache, 0, | ||
696 | 128 * 1024 * 1024 - 512 * 1024, 1); | ||
697 | if (ret) { | ||
698 | test_msg("Couldn't add bitmap entry %d\n", ret); | ||
699 | return ret; | ||
700 | } | ||
701 | |||
702 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
703 | if (ret) | ||
704 | return ret; | ||
705 | |||
706 | /* | ||
707 | * Now make only the last 256Kb of the bitmap marked as free, so that | ||
708 | * we end up with only the following ranges marked as free space: | ||
709 | * | ||
710 | * [128Mb + 128b, 128Mb + 256Kb[ | ||
711 | * [128Mb - 768Kb, 128Mb - 512Kb[ | ||
712 | */ | ||
713 | ret = btrfs_remove_free_space(cache, | ||
714 | 0, | ||
715 | 128 * 1024 * 1024 - 768 * 1024); | ||
716 | if (ret) { | ||
717 | test_msg("Failed to free part of bitmap space %d\n", ret); | ||
718 | return ret; | ||
719 | } | ||
720 | |||
721 | /* Confirm that only those 2 ranges are marked as free. */ | ||
722 | if (!test_check_exists(cache, 128 * 1024 * 1024 + 128 * 1024, | ||
723 | 128 * 1024)) { | ||
724 | test_msg("Free space range missing\n"); | ||
725 | return -ENOENT; | ||
726 | } | ||
727 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024, | ||
728 | 256 * 1024)) { | ||
729 | test_msg("Free space range missing\n"); | ||
730 | return -ENOENT; | ||
731 | } | ||
732 | |||
733 | /* | ||
734 | * Confirm that the bitmap range [0, 128Mb - 768Kb[ isn't marked | ||
735 | * as free anymore. | ||
736 | */ | ||
737 | if (test_check_exists(cache, 0, | ||
738 | 128 * 1024 * 1024 - 768 * 1024)) { | ||
739 | test_msg("Bitmap region not removed from space cache\n"); | ||
740 | return -EINVAL; | ||
741 | } | ||
742 | |||
743 | /* | ||
744 | * Confirm that the region [128Mb - 512Kb, 128Mb[, which is | ||
745 | * covered by the bitmap, isn't marked as free. | ||
746 | */ | ||
747 | if (test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024, | ||
748 | 512 * 1024)) { | ||
749 | test_msg("Invalid bitmap region marked as free\n"); | ||
750 | return -EINVAL; | ||
751 | } | ||
752 | |||
753 | /* | ||
754 | * Now lets mark the region [128Mb - 512Kb, 128Mb[ as free too. But, | ||
755 | * lets make sure the free space cache marks it as free in the bitmap, | ||
756 | * and doesn't insert a new extent entry to represent this region. | ||
757 | */ | ||
758 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 512 * 1024, | ||
759 | 512 * 1024); | ||
760 | if (ret) { | ||
761 | test_msg("Error adding free space: %d\n", ret); | ||
762 | return ret; | ||
763 | } | ||
764 | /* Confirm the region is marked as free. */ | ||
765 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024, | ||
766 | 512 * 1024)) { | ||
767 | test_msg("Bitmap region not marked as free\n"); | ||
768 | return -ENOENT; | ||
769 | } | ||
770 | |||
771 | /* | ||
772 | * Confirm that no new extent entries or bitmap entries were added to | ||
773 | * the cache after adding that free space region. | ||
774 | */ | ||
775 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
776 | if (ret) | ||
777 | return ret; | ||
778 | |||
779 | /* | ||
780 | * Now lets add a small free space region to the left of the previous | ||
781 | * one, which is not contiguous with it and is part of the bitmap too. | ||
782 | * The goal is to test that the bitmap entry space stealing doesn't | ||
783 | * steal this space region. | ||
784 | */ | ||
785 | ret = btrfs_add_free_space(cache, 32 * 1024 * 1024, 8192); | ||
786 | if (ret) { | ||
787 | test_msg("Error adding free space: %d\n", ret); | ||
788 | return ret; | ||
789 | } | ||
790 | |||
791 | /* | ||
792 | * Now mark the region [128Mb, 128Mb + 128Kb[ as free too. This will | ||
793 | * expand the range covered by the existing extent entry that represents | ||
794 | * the free space [128Mb + 128Kb, 128Mb + 256Kb[. | ||
795 | */ | ||
796 | ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 128 * 1024); | ||
797 | if (ret) { | ||
798 | test_msg("Error adding free space: %d\n", ret); | ||
799 | return ret; | ||
800 | } | ||
801 | /* Confirm the region is marked as free. */ | ||
802 | if (!test_check_exists(cache, 128 * 1024 * 1024, 128 * 1024)) { | ||
803 | test_msg("Extent region not marked as free\n"); | ||
804 | return -ENOENT; | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * Confirm that our extent entry didn't stole all free space from the | ||
809 | * bitmap, because of the small 8Kb free space region. | ||
810 | */ | ||
811 | ret = check_num_extents_and_bitmaps(cache, 2, 1); | ||
812 | if (ret) | ||
813 | return ret; | ||
814 | |||
815 | /* | ||
816 | * So now we have the range [128Mb - 768Kb, 128Mb + 256Kb[ as free | ||
817 | * space. Without stealing bitmap free space into extent entry space, | ||
818 | * we would have all this free space represented by 2 entries in the | ||
819 | * cache: | ||
820 | * | ||
821 | * extent entry covering range: [128Mb, 128Mb + 256Kb[ | ||
822 | * bitmap entry covering range: [128Mb - 768Kb, 128Mb[ | ||
823 | * | ||
824 | * Attempting to allocate the whole free space (1Mb) would fail, because | ||
825 | * we can't allocate from multiple entries. | ||
826 | * With the bitmap free space stealing, we get a single extent entry | ||
827 | * that represents the 1Mb free space, and therefore we're able to | ||
828 | * allocate the whole free space at once. | ||
829 | */ | ||
830 | if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024, | ||
831 | 1 * 1024 * 1024)) { | ||
832 | test_msg("Expected region not marked as free\n"); | ||
833 | return -ENOENT; | ||
834 | } | ||
835 | |||
836 | if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 8192)) { | ||
837 | test_msg("Cache free space is not 1Mb + 8Kb\n"); | ||
838 | return -EINVAL; | ||
839 | } | ||
840 | |||
841 | offset = btrfs_find_space_for_alloc(cache, | ||
842 | 0, 1 * 1024 * 1024, 0, | ||
843 | &max_extent_size); | ||
844 | if (offset != (128 * 1024 * 1024 - 768 * 1024)) { | ||
845 | test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n", | ||
846 | offset); | ||
847 | return -EINVAL; | ||
848 | } | ||
849 | |||
850 | /* All that remains is a 8Kb free space region in a bitmap. Confirm. */ | ||
851 | ret = check_num_extents_and_bitmaps(cache, 1, 1); | ||
852 | if (ret) | ||
853 | return ret; | ||
854 | |||
855 | if (cache->free_space_ctl->free_space != 8192) { | ||
856 | test_msg("Cache free space is not 8Kb\n"); | ||
857 | return -EINVAL; | ||
858 | } | ||
859 | |||
860 | offset = btrfs_find_space_for_alloc(cache, | ||
861 | 0, 8192, 0, | ||
862 | &max_extent_size); | ||
863 | if (offset != (32 * 1024 * 1024)) { | ||
864 | test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", | ||
865 | offset); | ||
866 | return -EINVAL; | ||
867 | } | ||
868 | |||
869 | ret = check_cache_empty(cache); | ||
870 | if (ret) | ||
871 | return ret; | ||
872 | |||
873 | cache->free_space_ctl->op->use_bitmap = use_bitmap_op; | ||
874 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | ||
875 | |||
876 | return 0; | ||
877 | } | ||
878 | |||
367 | int btrfs_test_free_space_cache(void) | 879 | int btrfs_test_free_space_cache(void) |
368 | { | 880 | { |
369 | struct btrfs_block_group_cache *cache; | 881 | struct btrfs_block_group_cache *cache; |
@@ -386,6 +898,8 @@ int btrfs_test_free_space_cache(void) | |||
386 | ret = test_bitmaps_and_extents(cache); | 898 | ret = test_bitmaps_and_extents(cache); |
387 | if (ret) | 899 | if (ret) |
388 | goto out; | 900 | goto out; |
901 | |||
902 | ret = test_steal_space_from_bitmap_to_extent(cache); | ||
389 | out: | 903 | out: |
390 | __btrfs_remove_free_space_cache(cache->free_space_ctl); | 904 | __btrfs_remove_free_space_cache(cache->free_space_ctl); |
391 | kfree(cache->free_space_ctl); | 905 | kfree(cache->free_space_ctl); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d89c6d3542ca..dcaae3616728 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -386,7 +386,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
386 | int ret; | 386 | int ret; |
387 | 387 | ||
388 | /* Send isn't supposed to start transactions. */ | 388 | /* Send isn't supposed to start transactions. */ |
389 | ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB); | 389 | ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB); |
390 | 390 | ||
391 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 391 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
392 | return ERR_PTR(-EROFS); | 392 | return ERR_PTR(-EROFS); |
@@ -408,7 +408,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
408 | if (num_items > 0 && root != root->fs_info->chunk_root) { | 408 | if (num_items > 0 && root != root->fs_info->chunk_root) { |
409 | if (root->fs_info->quota_enabled && | 409 | if (root->fs_info->quota_enabled && |
410 | is_fstree(root->root_key.objectid)) { | 410 | is_fstree(root->root_key.objectid)) { |
411 | qgroup_reserved = num_items * root->leafsize; | 411 | qgroup_reserved = num_items * root->nodesize; |
412 | ret = btrfs_qgroup_reserve(root, qgroup_reserved); | 412 | ret = btrfs_qgroup_reserve(root, qgroup_reserved); |
413 | if (ret) | 413 | if (ret) |
414 | return ERR_PTR(ret); | 414 | return ERR_PTR(ret); |
@@ -418,7 +418,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, | |||
418 | /* | 418 | /* |
419 | * Do the reservation for the relocation root creation | 419 | * Do the reservation for the relocation root creation |
420 | */ | 420 | */ |
421 | if (unlikely(need_reserve_reloc_root(root))) { | 421 | if (need_reserve_reloc_root(root)) { |
422 | num_bytes += root->nodesize; | 422 | num_bytes += root->nodesize; |
423 | reloc_reserved = true; | 423 | reloc_reserved = true; |
424 | } | 424 | } |
@@ -609,7 +609,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
609 | if (transid <= root->fs_info->last_trans_committed) | 609 | if (transid <= root->fs_info->last_trans_committed) |
610 | goto out; | 610 | goto out; |
611 | 611 | ||
612 | ret = -EINVAL; | ||
613 | /* find specified transaction */ | 612 | /* find specified transaction */ |
614 | spin_lock(&root->fs_info->trans_lock); | 613 | spin_lock(&root->fs_info->trans_lock); |
615 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 614 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
@@ -625,9 +624,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
625 | } | 624 | } |
626 | } | 625 | } |
627 | spin_unlock(&root->fs_info->trans_lock); | 626 | spin_unlock(&root->fs_info->trans_lock); |
628 | /* The specified transaction doesn't exist */ | 627 | |
629 | if (!cur_trans) | 628 | /* |
629 | * The specified transaction doesn't exist, or we | ||
630 | * raced with btrfs_commit_transaction | ||
631 | */ | ||
632 | if (!cur_trans) { | ||
633 | if (transid > root->fs_info->last_trans_committed) | ||
634 | ret = -EINVAL; | ||
630 | goto out; | 635 | goto out; |
636 | } | ||
631 | } else { | 637 | } else { |
632 | /* find newest transaction that is committing | committed */ | 638 | /* find newest transaction that is committing | committed */ |
633 | spin_lock(&root->fs_info->trans_lock); | 639 | spin_lock(&root->fs_info->trans_lock); |
@@ -851,6 +857,8 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
851 | struct extent_state *cached_state = NULL; | 857 | struct extent_state *cached_state = NULL; |
852 | u64 start = 0; | 858 | u64 start = 0; |
853 | u64 end; | 859 | u64 end; |
860 | struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); | ||
861 | bool errors = false; | ||
854 | 862 | ||
855 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 863 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
856 | EXTENT_NEED_WAIT, &cached_state)) { | 864 | EXTENT_NEED_WAIT, &cached_state)) { |
@@ -864,6 +872,26 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, | |||
864 | } | 872 | } |
865 | if (err) | 873 | if (err) |
866 | werr = err; | 874 | werr = err; |
875 | |||
876 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
877 | if ((mark & EXTENT_DIRTY) && | ||
878 | test_and_clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, | ||
879 | &btree_ino->runtime_flags)) | ||
880 | errors = true; | ||
881 | |||
882 | if ((mark & EXTENT_NEW) && | ||
883 | test_and_clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, | ||
884 | &btree_ino->runtime_flags)) | ||
885 | errors = true; | ||
886 | } else { | ||
887 | if (test_and_clear_bit(BTRFS_INODE_BTREE_ERR, | ||
888 | &btree_ino->runtime_flags)) | ||
889 | errors = true; | ||
890 | } | ||
891 | |||
892 | if (errors && !werr) | ||
893 | werr = -EIO; | ||
894 | |||
867 | return werr; | 895 | return werr; |
868 | } | 896 | } |
869 | 897 | ||
@@ -1629,6 +1657,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1629 | { | 1657 | { |
1630 | struct btrfs_transaction *cur_trans = trans->transaction; | 1658 | struct btrfs_transaction *cur_trans = trans->transaction; |
1631 | struct btrfs_transaction *prev_trans = NULL; | 1659 | struct btrfs_transaction *prev_trans = NULL; |
1660 | struct btrfs_inode *btree_ino = BTRFS_I(root->fs_info->btree_inode); | ||
1632 | int ret; | 1661 | int ret; |
1633 | 1662 | ||
1634 | /* Stop the commit early if ->aborted is set */ | 1663 | /* Stop the commit early if ->aborted is set */ |
@@ -1868,6 +1897,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1868 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, | 1897 | memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, |
1869 | sizeof(*root->fs_info->super_copy)); | 1898 | sizeof(*root->fs_info->super_copy)); |
1870 | 1899 | ||
1900 | btrfs_update_commit_device_size(root->fs_info); | ||
1901 | btrfs_update_commit_device_bytes_used(root, cur_trans); | ||
1902 | |||
1903 | clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags); | ||
1904 | clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags); | ||
1905 | |||
1871 | spin_lock(&root->fs_info->trans_lock); | 1906 | spin_lock(&root->fs_info->trans_lock); |
1872 | cur_trans->state = TRANS_STATE_UNBLOCKED; | 1907 | cur_trans->state = TRANS_STATE_UNBLOCKED; |
1873 | root->fs_info->running_transaction = NULL; | 1908 | root->fs_info->running_transaction = NULL; |
@@ -1981,9 +2016,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) | |||
1981 | ret = btrfs_drop_snapshot(root, NULL, 0, 0); | 2016 | ret = btrfs_drop_snapshot(root, NULL, 0, 0); |
1982 | else | 2017 | else |
1983 | ret = btrfs_drop_snapshot(root, NULL, 1, 0); | 2018 | ret = btrfs_drop_snapshot(root, NULL, 1, 0); |
1984 | /* | 2019 | |
1985 | * If we encounter a transaction abort during snapshot cleaning, we | ||
1986 | * don't want to crash here | ||
1987 | */ | ||
1988 | return (ret < 0) ? 0 : 1; | 2020 | return (ret < 0) ? 0 : 1; |
1989 | } | 2021 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 579be51b27e5..d8f40e1a5d2d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -79,7 +79,7 @@ struct btrfs_transaction { | |||
79 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ | 79 | #define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ |
80 | __TRANS_ATTACH) | 80 | __TRANS_ATTACH) |
81 | 81 | ||
82 | #define BTRFS_SEND_TRANS_STUB 1 | 82 | #define BTRFS_SEND_TRANS_STUB ((void *)1) |
83 | 83 | ||
84 | struct btrfs_trans_handle { | 84 | struct btrfs_trans_handle { |
85 | u64 transid; | 85 | u64 transid; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d0262ceb85e1..1475979e5718 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -97,7 +97,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
97 | struct btrfs_root *root, struct inode *inode, | 97 | struct btrfs_root *root, struct inode *inode, |
98 | int inode_only, | 98 | int inode_only, |
99 | const loff_t start, | 99 | const loff_t start, |
100 | const loff_t end); | 100 | const loff_t end, |
101 | struct btrfs_log_ctx *ctx); | ||
101 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, | 102 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, |
102 | struct btrfs_root *root, | 103 | struct btrfs_root *root, |
103 | struct btrfs_path *path, u64 objectid); | 104 | struct btrfs_path *path, u64 objectid); |
@@ -1498,7 +1499,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, | |||
1498 | return -EIO; | 1499 | return -EIO; |
1499 | 1500 | ||
1500 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; | 1501 | key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; |
1501 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 1502 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
1502 | key.offset = objectid; | 1503 | key.offset = objectid; |
1503 | 1504 | ||
1504 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); | 1505 | ret = btrfs_insert_empty_item(trans, root, path, &key, 0); |
@@ -1637,6 +1638,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1637 | found_key.type == log_key.type && | 1638 | found_key.type == log_key.type && |
1638 | found_key.offset == log_key.offset && | 1639 | found_key.offset == log_key.offset && |
1639 | btrfs_dir_type(path->nodes[0], dst_di) == log_type) { | 1640 | btrfs_dir_type(path->nodes[0], dst_di) == log_type) { |
1641 | update_size = false; | ||
1640 | goto out; | 1642 | goto out; |
1641 | } | 1643 | } |
1642 | 1644 | ||
@@ -2157,7 +2159,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
2157 | 2159 | ||
2158 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | 2160 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); |
2159 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 2161 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
2160 | blocksize = btrfs_level_size(root, *level - 1); | 2162 | blocksize = root->nodesize; |
2161 | 2163 | ||
2162 | parent = path->nodes[*level]; | 2164 | parent = path->nodes[*level]; |
2163 | root_owner = btrfs_header_owner(parent); | 2165 | root_owner = btrfs_header_owner(parent); |
@@ -2983,8 +2985,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2983 | min_key.type = key_type; | 2985 | min_key.type = key_type; |
2984 | min_key.offset = min_offset; | 2986 | min_key.offset = min_offset; |
2985 | 2987 | ||
2986 | path->keep_locks = 1; | ||
2987 | |||
2988 | ret = btrfs_search_forward(root, &min_key, path, trans->transid); | 2988 | ret = btrfs_search_forward(root, &min_key, path, trans->transid); |
2989 | 2989 | ||
2990 | /* | 2990 | /* |
@@ -3364,7 +3364,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3364 | * or deletes of this inode don't have to relog the inode | 3364 | * or deletes of this inode don't have to relog the inode |
3365 | * again | 3365 | * again |
3366 | */ | 3366 | */ |
3367 | if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY && | 3367 | if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY && |
3368 | !skip_csum) { | 3368 | !skip_csum) { |
3369 | int found_type; | 3369 | int found_type; |
3370 | extent = btrfs_item_ptr(src, start_slot + i, | 3370 | extent = btrfs_item_ptr(src, start_slot + i, |
@@ -3573,107 +3573,33 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
3573 | return 0; | 3573 | return 0; |
3574 | } | 3574 | } |
3575 | 3575 | ||
3576 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3576 | static int wait_ordered_extents(struct btrfs_trans_handle *trans, |
3577 | struct inode *inode, struct btrfs_root *root, | 3577 | struct inode *inode, |
3578 | struct extent_map *em, struct btrfs_path *path, | 3578 | struct btrfs_root *root, |
3579 | struct list_head *logged_list) | 3579 | const struct extent_map *em, |
3580 | const struct list_head *logged_list, | ||
3581 | bool *ordered_io_error) | ||
3580 | { | 3582 | { |
3581 | struct btrfs_root *log = root->log_root; | ||
3582 | struct btrfs_file_extent_item *fi; | ||
3583 | struct extent_buffer *leaf; | ||
3584 | struct btrfs_ordered_extent *ordered; | 3583 | struct btrfs_ordered_extent *ordered; |
3585 | struct list_head ordered_sums; | 3584 | struct btrfs_root *log = root->log_root; |
3586 | struct btrfs_map_token token; | ||
3587 | struct btrfs_key key; | ||
3588 | u64 mod_start = em->mod_start; | 3585 | u64 mod_start = em->mod_start; |
3589 | u64 mod_len = em->mod_len; | 3586 | u64 mod_len = em->mod_len; |
3587 | const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
3590 | u64 csum_offset; | 3588 | u64 csum_offset; |
3591 | u64 csum_len; | 3589 | u64 csum_len; |
3592 | u64 extent_offset = em->start - em->orig_start; | 3590 | LIST_HEAD(ordered_sums); |
3593 | u64 block_len; | 3591 | int ret = 0; |
3594 | int ret; | ||
3595 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
3596 | int extent_inserted = 0; | ||
3597 | |||
3598 | INIT_LIST_HEAD(&ordered_sums); | ||
3599 | btrfs_init_map_token(&token); | ||
3600 | |||
3601 | ret = __btrfs_drop_extents(trans, log, inode, path, em->start, | ||
3602 | em->start + em->len, NULL, 0, 1, | ||
3603 | sizeof(*fi), &extent_inserted); | ||
3604 | if (ret) | ||
3605 | return ret; | ||
3606 | |||
3607 | if (!extent_inserted) { | ||
3608 | key.objectid = btrfs_ino(inode); | ||
3609 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3610 | key.offset = em->start; | ||
3611 | |||
3612 | ret = btrfs_insert_empty_item(trans, log, path, &key, | ||
3613 | sizeof(*fi)); | ||
3614 | if (ret) | ||
3615 | return ret; | ||
3616 | } | ||
3617 | leaf = path->nodes[0]; | ||
3618 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
3619 | struct btrfs_file_extent_item); | ||
3620 | |||
3621 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | ||
3622 | &token); | ||
3623 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
3624 | skip_csum = true; | ||
3625 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3626 | BTRFS_FILE_EXTENT_PREALLOC, | ||
3627 | &token); | ||
3628 | } else { | ||
3629 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3630 | BTRFS_FILE_EXTENT_REG, | ||
3631 | &token); | ||
3632 | if (em->block_start == EXTENT_MAP_HOLE) | ||
3633 | skip_csum = true; | ||
3634 | } | ||
3635 | |||
3636 | block_len = max(em->block_len, em->orig_block_len); | ||
3637 | if (em->compress_type != BTRFS_COMPRESS_NONE) { | ||
3638 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3639 | em->block_start, | ||
3640 | &token); | ||
3641 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3642 | &token); | ||
3643 | } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
3644 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3645 | em->block_start - | ||
3646 | extent_offset, &token); | ||
3647 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3648 | &token); | ||
3649 | } else { | ||
3650 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); | ||
3651 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, | ||
3652 | &token); | ||
3653 | } | ||
3654 | |||
3655 | btrfs_set_token_file_extent_offset(leaf, fi, | ||
3656 | em->start - em->orig_start, | ||
3657 | &token); | ||
3658 | btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); | ||
3659 | btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); | ||
3660 | btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, | ||
3661 | &token); | ||
3662 | btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); | ||
3663 | btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); | ||
3664 | btrfs_mark_buffer_dirty(leaf); | ||
3665 | 3592 | ||
3666 | btrfs_release_path(path); | 3593 | *ordered_io_error = false; |
3667 | if (ret) { | ||
3668 | return ret; | ||
3669 | } | ||
3670 | 3594 | ||
3671 | if (skip_csum) | 3595 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || |
3596 | em->block_start == EXTENT_MAP_HOLE) | ||
3672 | return 0; | 3597 | return 0; |
3673 | 3598 | ||
3674 | /* | 3599 | /* |
3675 | * First check and see if our csums are on our outstanding ordered | 3600 | * Wait far any ordered extent that covers our extent map. If it |
3676 | * extents. | 3601 | * finishes without an error, first check and see if our csums are on |
3602 | * our outstanding ordered extents. | ||
3677 | */ | 3603 | */ |
3678 | list_for_each_entry(ordered, logged_list, log_list) { | 3604 | list_for_each_entry(ordered, logged_list, log_list) { |
3679 | struct btrfs_ordered_sum *sum; | 3605 | struct btrfs_ordered_sum *sum; |
@@ -3685,6 +3611,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3685 | mod_start + mod_len <= ordered->file_offset) | 3611 | mod_start + mod_len <= ordered->file_offset) |
3686 | continue; | 3612 | continue; |
3687 | 3613 | ||
3614 | if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && | ||
3615 | !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && | ||
3616 | !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { | ||
3617 | const u64 start = ordered->file_offset; | ||
3618 | const u64 end = ordered->file_offset + ordered->len - 1; | ||
3619 | |||
3620 | WARN_ON(ordered->inode != inode); | ||
3621 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
3622 | } | ||
3623 | |||
3624 | wait_event(ordered->wait, | ||
3625 | (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || | ||
3626 | test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); | ||
3627 | |||
3628 | if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { | ||
3629 | *ordered_io_error = true; | ||
3630 | break; | ||
3631 | } | ||
3688 | /* | 3632 | /* |
3689 | * We are going to copy all the csums on this ordered extent, so | 3633 | * We are going to copy all the csums on this ordered extent, so |
3690 | * go ahead and adjust mod_start and mod_len in case this | 3634 | * go ahead and adjust mod_start and mod_len in case this |
@@ -3716,6 +3660,9 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3716 | } | 3660 | } |
3717 | } | 3661 | } |
3718 | 3662 | ||
3663 | if (skip_csum) | ||
3664 | continue; | ||
3665 | |||
3719 | /* | 3666 | /* |
3720 | * To keep us from looping for the above case of an ordered | 3667 | * To keep us from looping for the above case of an ordered |
3721 | * extent that falls inside of the logged extent. | 3668 | * extent that falls inside of the logged extent. |
@@ -3733,18 +3680,16 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3733 | list_for_each_entry(sum, &ordered->list, list) { | 3680 | list_for_each_entry(sum, &ordered->list, list) { |
3734 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3681 | ret = btrfs_csum_file_blocks(trans, log, sum); |
3735 | if (ret) | 3682 | if (ret) |
3736 | goto unlocked; | 3683 | break; |
3737 | } | 3684 | } |
3738 | |||
3739 | } | 3685 | } |
3740 | unlocked: | ||
3741 | 3686 | ||
3742 | if (!mod_len || ret) | 3687 | if (*ordered_io_error || !mod_len || ret || skip_csum) |
3743 | return ret; | 3688 | return ret; |
3744 | 3689 | ||
3745 | if (em->compress_type) { | 3690 | if (em->compress_type) { |
3746 | csum_offset = 0; | 3691 | csum_offset = 0; |
3747 | csum_len = block_len; | 3692 | csum_len = max(em->block_len, em->orig_block_len); |
3748 | } else { | 3693 | } else { |
3749 | csum_offset = mod_start - em->start; | 3694 | csum_offset = mod_start - em->start; |
3750 | csum_len = mod_len; | 3695 | csum_len = mod_len; |
@@ -3771,11 +3716,106 @@ unlocked: | |||
3771 | return ret; | 3716 | return ret; |
3772 | } | 3717 | } |
3773 | 3718 | ||
3719 | static int log_one_extent(struct btrfs_trans_handle *trans, | ||
3720 | struct inode *inode, struct btrfs_root *root, | ||
3721 | const struct extent_map *em, | ||
3722 | struct btrfs_path *path, | ||
3723 | const struct list_head *logged_list, | ||
3724 | struct btrfs_log_ctx *ctx) | ||
3725 | { | ||
3726 | struct btrfs_root *log = root->log_root; | ||
3727 | struct btrfs_file_extent_item *fi; | ||
3728 | struct extent_buffer *leaf; | ||
3729 | struct btrfs_map_token token; | ||
3730 | struct btrfs_key key; | ||
3731 | u64 extent_offset = em->start - em->orig_start; | ||
3732 | u64 block_len; | ||
3733 | int ret; | ||
3734 | int extent_inserted = 0; | ||
3735 | bool ordered_io_err = false; | ||
3736 | |||
3737 | ret = wait_ordered_extents(trans, inode, root, em, logged_list, | ||
3738 | &ordered_io_err); | ||
3739 | if (ret) | ||
3740 | return ret; | ||
3741 | |||
3742 | if (ordered_io_err) { | ||
3743 | ctx->io_err = -EIO; | ||
3744 | return 0; | ||
3745 | } | ||
3746 | |||
3747 | btrfs_init_map_token(&token); | ||
3748 | |||
3749 | ret = __btrfs_drop_extents(trans, log, inode, path, em->start, | ||
3750 | em->start + em->len, NULL, 0, 1, | ||
3751 | sizeof(*fi), &extent_inserted); | ||
3752 | if (ret) | ||
3753 | return ret; | ||
3754 | |||
3755 | if (!extent_inserted) { | ||
3756 | key.objectid = btrfs_ino(inode); | ||
3757 | key.type = BTRFS_EXTENT_DATA_KEY; | ||
3758 | key.offset = em->start; | ||
3759 | |||
3760 | ret = btrfs_insert_empty_item(trans, log, path, &key, | ||
3761 | sizeof(*fi)); | ||
3762 | if (ret) | ||
3763 | return ret; | ||
3764 | } | ||
3765 | leaf = path->nodes[0]; | ||
3766 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
3767 | struct btrfs_file_extent_item); | ||
3768 | |||
3769 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | ||
3770 | &token); | ||
3771 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
3772 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3773 | BTRFS_FILE_EXTENT_PREALLOC, | ||
3774 | &token); | ||
3775 | else | ||
3776 | btrfs_set_token_file_extent_type(leaf, fi, | ||
3777 | BTRFS_FILE_EXTENT_REG, | ||
3778 | &token); | ||
3779 | |||
3780 | block_len = max(em->block_len, em->orig_block_len); | ||
3781 | if (em->compress_type != BTRFS_COMPRESS_NONE) { | ||
3782 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3783 | em->block_start, | ||
3784 | &token); | ||
3785 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3786 | &token); | ||
3787 | } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
3788 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
3789 | em->block_start - | ||
3790 | extent_offset, &token); | ||
3791 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
3792 | &token); | ||
3793 | } else { | ||
3794 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); | ||
3795 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, | ||
3796 | &token); | ||
3797 | } | ||
3798 | |||
3799 | btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, &token); | ||
3800 | btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); | ||
3801 | btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token); | ||
3802 | btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, | ||
3803 | &token); | ||
3804 | btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); | ||
3805 | btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); | ||
3806 | btrfs_mark_buffer_dirty(leaf); | ||
3807 | |||
3808 | btrfs_release_path(path); | ||
3809 | |||
3810 | return ret; | ||
3811 | } | ||
3812 | |||
3774 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3813 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
3775 | struct btrfs_root *root, | 3814 | struct btrfs_root *root, |
3776 | struct inode *inode, | 3815 | struct inode *inode, |
3777 | struct btrfs_path *path, | 3816 | struct btrfs_path *path, |
3778 | struct list_head *logged_list) | 3817 | struct list_head *logged_list, |
3818 | struct btrfs_log_ctx *ctx) | ||
3779 | { | 3819 | { |
3780 | struct extent_map *em, *n; | 3820 | struct extent_map *em, *n; |
3781 | struct list_head extents; | 3821 | struct list_head extents; |
@@ -3833,7 +3873,8 @@ process: | |||
3833 | 3873 | ||
3834 | write_unlock(&tree->lock); | 3874 | write_unlock(&tree->lock); |
3835 | 3875 | ||
3836 | ret = log_one_extent(trans, inode, root, em, path, logged_list); | 3876 | ret = log_one_extent(trans, inode, root, em, path, logged_list, |
3877 | ctx); | ||
3837 | write_lock(&tree->lock); | 3878 | write_lock(&tree->lock); |
3838 | clear_em_logging(tree, em); | 3879 | clear_em_logging(tree, em); |
3839 | free_extent_map(em); | 3880 | free_extent_map(em); |
@@ -3863,7 +3904,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3863 | struct btrfs_root *root, struct inode *inode, | 3904 | struct btrfs_root *root, struct inode *inode, |
3864 | int inode_only, | 3905 | int inode_only, |
3865 | const loff_t start, | 3906 | const loff_t start, |
3866 | const loff_t end) | 3907 | const loff_t end, |
3908 | struct btrfs_log_ctx *ctx) | ||
3867 | { | 3909 | { |
3868 | struct btrfs_path *path; | 3910 | struct btrfs_path *path; |
3869 | struct btrfs_path *dst_path; | 3911 | struct btrfs_path *dst_path; |
@@ -3964,7 +4006,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3964 | err = ret; | 4006 | err = ret; |
3965 | goto out_unlock; | 4007 | goto out_unlock; |
3966 | } | 4008 | } |
3967 | path->keep_locks = 1; | ||
3968 | 4009 | ||
3969 | while (1) { | 4010 | while (1) { |
3970 | ins_nr = 0; | 4011 | ins_nr = 0; |
@@ -4049,7 +4090,7 @@ log_extents: | |||
4049 | btrfs_release_path(dst_path); | 4090 | btrfs_release_path(dst_path); |
4050 | if (fast_search) { | 4091 | if (fast_search) { |
4051 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, | 4092 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4052 | &logged_list); | 4093 | &logged_list, ctx); |
4053 | if (ret) { | 4094 | if (ret) { |
4054 | err = ret; | 4095 | err = ret; |
4055 | goto out_unlock; | 4096 | goto out_unlock; |
@@ -4239,7 +4280,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4239 | if (ret) | 4280 | if (ret) |
4240 | goto end_no_trans; | 4281 | goto end_no_trans; |
4241 | 4282 | ||
4242 | ret = btrfs_log_inode(trans, root, inode, inode_only, start, end); | 4283 | ret = btrfs_log_inode(trans, root, inode, inode_only, start, end, ctx); |
4243 | if (ret) | 4284 | if (ret) |
4244 | goto end_trans; | 4285 | goto end_trans; |
4245 | 4286 | ||
@@ -4268,7 +4309,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4268 | if (BTRFS_I(inode)->generation > | 4309 | if (BTRFS_I(inode)->generation > |
4269 | root->fs_info->last_trans_committed) { | 4310 | root->fs_info->last_trans_committed) { |
4270 | ret = btrfs_log_inode(trans, root, inode, inode_only, | 4311 | ret = btrfs_log_inode(trans, root, inode, inode_only, |
4271 | 0, LLONG_MAX); | 4312 | 0, LLONG_MAX, ctx); |
4272 | if (ret) | 4313 | if (ret) |
4273 | goto end_trans; | 4314 | goto end_trans; |
4274 | } | 4315 | } |
@@ -4360,7 +4401,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
4360 | again: | 4401 | again: |
4361 | key.objectid = BTRFS_TREE_LOG_OBJECTID; | 4402 | key.objectid = BTRFS_TREE_LOG_OBJECTID; |
4362 | key.offset = (u64)-1; | 4403 | key.offset = (u64)-1; |
4363 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 4404 | key.type = BTRFS_ROOT_ITEM_KEY; |
4364 | 4405 | ||
4365 | while (1) { | 4406 | while (1) { |
4366 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); | 4407 | ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index e2e798ae7cd7..154990c26dcb 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -28,6 +28,7 @@ | |||
28 | struct btrfs_log_ctx { | 28 | struct btrfs_log_ctx { |
29 | int log_ret; | 29 | int log_ret; |
30 | int log_transid; | 30 | int log_transid; |
31 | int io_err; | ||
31 | struct list_head list; | 32 | struct list_head list; |
32 | }; | 33 | }; |
33 | 34 | ||
@@ -35,6 +36,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | |||
35 | { | 36 | { |
36 | ctx->log_ret = 0; | 37 | ctx->log_ret = 0; |
37 | ctx->log_transid = 0; | 38 | ctx->log_transid = 0; |
39 | ctx->io_err = 0; | ||
38 | INIT_LIST_HEAD(&ctx->list); | 40 | INIT_LIST_HEAD(&ctx->list); |
39 | } | 41 | } |
40 | 42 | ||
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index f6a4c03ee7d8..778282944530 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c | |||
@@ -279,7 +279,6 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, | |||
279 | key.offset = 0; | 279 | key.offset = 0; |
280 | 280 | ||
281 | again_search_slot: | 281 | again_search_slot: |
282 | path->keep_locks = 1; | ||
283 | ret = btrfs_search_forward(root, &key, path, 0); | 282 | ret = btrfs_search_forward(root, &key, path, 0); |
284 | if (ret) { | 283 | if (ret) { |
285 | if (ret > 0) | 284 | if (ret > 0) |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2c2d6d1d8eee..d47289c715c8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -50,7 +50,7 @@ static void __btrfs_reset_dev_stats(struct btrfs_device *dev); | |||
50 | static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); | 50 | static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); |
51 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); | 51 | static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); |
52 | 52 | ||
53 | static DEFINE_MUTEX(uuid_mutex); | 53 | DEFINE_MUTEX(uuid_mutex); |
54 | static LIST_HEAD(fs_uuids); | 54 | static LIST_HEAD(fs_uuids); |
55 | 55 | ||
56 | static void lock_chunks(struct btrfs_root *root) | 56 | static void lock_chunks(struct btrfs_root *root) |
@@ -74,6 +74,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) | |||
74 | mutex_init(&fs_devs->device_list_mutex); | 74 | mutex_init(&fs_devs->device_list_mutex); |
75 | 75 | ||
76 | INIT_LIST_HEAD(&fs_devs->devices); | 76 | INIT_LIST_HEAD(&fs_devs->devices); |
77 | INIT_LIST_HEAD(&fs_devs->resized_devices); | ||
77 | INIT_LIST_HEAD(&fs_devs->alloc_list); | 78 | INIT_LIST_HEAD(&fs_devs->alloc_list); |
78 | INIT_LIST_HEAD(&fs_devs->list); | 79 | INIT_LIST_HEAD(&fs_devs->list); |
79 | 80 | ||
@@ -154,11 +155,13 @@ static struct btrfs_device *__alloc_device(void) | |||
154 | 155 | ||
155 | INIT_LIST_HEAD(&dev->dev_list); | 156 | INIT_LIST_HEAD(&dev->dev_list); |
156 | INIT_LIST_HEAD(&dev->dev_alloc_list); | 157 | INIT_LIST_HEAD(&dev->dev_alloc_list); |
158 | INIT_LIST_HEAD(&dev->resized_list); | ||
157 | 159 | ||
158 | spin_lock_init(&dev->io_lock); | 160 | spin_lock_init(&dev->io_lock); |
159 | 161 | ||
160 | spin_lock_init(&dev->reada_lock); | 162 | spin_lock_init(&dev->reada_lock); |
161 | atomic_set(&dev->reada_in_flight, 0); | 163 | atomic_set(&dev->reada_in_flight, 0); |
164 | atomic_set(&dev->dev_stats_ccnt, 0); | ||
162 | INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); | 165 | INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); |
163 | INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); | 166 | INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); |
164 | 167 | ||
@@ -474,14 +477,13 @@ static noinline int device_list_add(const char *path, | |||
474 | return PTR_ERR(fs_devices); | 477 | return PTR_ERR(fs_devices); |
475 | 478 | ||
476 | list_add(&fs_devices->list, &fs_uuids); | 479 | list_add(&fs_devices->list, &fs_uuids); |
477 | fs_devices->latest_devid = devid; | ||
478 | fs_devices->latest_trans = found_transid; | ||
479 | 480 | ||
480 | device = NULL; | 481 | device = NULL; |
481 | } else { | 482 | } else { |
482 | device = __find_device(&fs_devices->devices, devid, | 483 | device = __find_device(&fs_devices->devices, devid, |
483 | disk_super->dev_item.uuid); | 484 | disk_super->dev_item.uuid); |
484 | } | 485 | } |
486 | |||
485 | if (!device) { | 487 | if (!device) { |
486 | if (fs_devices->opened) | 488 | if (fs_devices->opened) |
487 | return -EBUSY; | 489 | return -EBUSY; |
@@ -565,10 +567,6 @@ static noinline int device_list_add(const char *path, | |||
565 | if (!fs_devices->opened) | 567 | if (!fs_devices->opened) |
566 | device->generation = found_transid; | 568 | device->generation = found_transid; |
567 | 569 | ||
568 | if (found_transid > fs_devices->latest_trans) { | ||
569 | fs_devices->latest_devid = devid; | ||
570 | fs_devices->latest_trans = found_transid; | ||
571 | } | ||
572 | *fs_devices_ret = fs_devices; | 570 | *fs_devices_ret = fs_devices; |
573 | 571 | ||
574 | return ret; | 572 | return ret; |
@@ -584,8 +582,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
584 | if (IS_ERR(fs_devices)) | 582 | if (IS_ERR(fs_devices)) |
585 | return fs_devices; | 583 | return fs_devices; |
586 | 584 | ||
587 | fs_devices->latest_devid = orig->latest_devid; | 585 | mutex_lock(&orig->device_list_mutex); |
588 | fs_devices->latest_trans = orig->latest_trans; | ||
589 | fs_devices->total_devices = orig->total_devices; | 586 | fs_devices->total_devices = orig->total_devices; |
590 | 587 | ||
591 | /* We have held the volume lock, it is safe to get the devices. */ | 588 | /* We have held the volume lock, it is safe to get the devices. */ |
@@ -614,8 +611,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
614 | device->fs_devices = fs_devices; | 611 | device->fs_devices = fs_devices; |
615 | fs_devices->num_devices++; | 612 | fs_devices->num_devices++; |
616 | } | 613 | } |
614 | mutex_unlock(&orig->device_list_mutex); | ||
617 | return fs_devices; | 615 | return fs_devices; |
618 | error: | 616 | error: |
617 | mutex_unlock(&orig->device_list_mutex); | ||
619 | free_fs_devices(fs_devices); | 618 | free_fs_devices(fs_devices); |
620 | return ERR_PTR(-ENOMEM); | 619 | return ERR_PTR(-ENOMEM); |
621 | } | 620 | } |
@@ -624,10 +623,7 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, | |||
624 | struct btrfs_fs_devices *fs_devices, int step) | 623 | struct btrfs_fs_devices *fs_devices, int step) |
625 | { | 624 | { |
626 | struct btrfs_device *device, *next; | 625 | struct btrfs_device *device, *next; |
627 | 626 | struct btrfs_device *latest_dev = NULL; | |
628 | struct block_device *latest_bdev = NULL; | ||
629 | u64 latest_devid = 0; | ||
630 | u64 latest_transid = 0; | ||
631 | 627 | ||
632 | mutex_lock(&uuid_mutex); | 628 | mutex_lock(&uuid_mutex); |
633 | again: | 629 | again: |
@@ -635,11 +631,9 @@ again: | |||
635 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 631 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
636 | if (device->in_fs_metadata) { | 632 | if (device->in_fs_metadata) { |
637 | if (!device->is_tgtdev_for_dev_replace && | 633 | if (!device->is_tgtdev_for_dev_replace && |
638 | (!latest_transid || | 634 | (!latest_dev || |
639 | device->generation > latest_transid)) { | 635 | device->generation > latest_dev->generation)) { |
640 | latest_devid = device->devid; | 636 | latest_dev = device; |
641 | latest_transid = device->generation; | ||
642 | latest_bdev = device->bdev; | ||
643 | } | 637 | } |
644 | continue; | 638 | continue; |
645 | } | 639 | } |
@@ -681,9 +675,7 @@ again: | |||
681 | goto again; | 675 | goto again; |
682 | } | 676 | } |
683 | 677 | ||
684 | fs_devices->latest_bdev = latest_bdev; | 678 | fs_devices->latest_bdev = latest_dev->bdev; |
685 | fs_devices->latest_devid = latest_devid; | ||
686 | fs_devices->latest_trans = latest_transid; | ||
687 | 679 | ||
688 | mutex_unlock(&uuid_mutex); | 680 | mutex_unlock(&uuid_mutex); |
689 | } | 681 | } |
@@ -732,8 +724,6 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
732 | fs_devices->rw_devices--; | 724 | fs_devices->rw_devices--; |
733 | } | 725 | } |
734 | 726 | ||
735 | if (device->can_discard) | ||
736 | fs_devices->num_can_discard--; | ||
737 | if (device->missing) | 727 | if (device->missing) |
738 | fs_devices->missing_devices--; | 728 | fs_devices->missing_devices--; |
739 | 729 | ||
@@ -798,11 +788,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
798 | struct block_device *bdev; | 788 | struct block_device *bdev; |
799 | struct list_head *head = &fs_devices->devices; | 789 | struct list_head *head = &fs_devices->devices; |
800 | struct btrfs_device *device; | 790 | struct btrfs_device *device; |
801 | struct block_device *latest_bdev = NULL; | 791 | struct btrfs_device *latest_dev = NULL; |
802 | struct buffer_head *bh; | 792 | struct buffer_head *bh; |
803 | struct btrfs_super_block *disk_super; | 793 | struct btrfs_super_block *disk_super; |
804 | u64 latest_devid = 0; | ||
805 | u64 latest_transid = 0; | ||
806 | u64 devid; | 794 | u64 devid; |
807 | int seeding = 1; | 795 | int seeding = 1; |
808 | int ret = 0; | 796 | int ret = 0; |
@@ -830,11 +818,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
830 | goto error_brelse; | 818 | goto error_brelse; |
831 | 819 | ||
832 | device->generation = btrfs_super_generation(disk_super); | 820 | device->generation = btrfs_super_generation(disk_super); |
833 | if (!latest_transid || device->generation > latest_transid) { | 821 | if (!latest_dev || |
834 | latest_devid = devid; | 822 | device->generation > latest_dev->generation) |
835 | latest_transid = device->generation; | 823 | latest_dev = device; |
836 | latest_bdev = bdev; | ||
837 | } | ||
838 | 824 | ||
839 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { | 825 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { |
840 | device->writeable = 0; | 826 | device->writeable = 0; |
@@ -844,10 +830,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
844 | } | 830 | } |
845 | 831 | ||
846 | q = bdev_get_queue(bdev); | 832 | q = bdev_get_queue(bdev); |
847 | if (blk_queue_discard(q)) { | 833 | if (blk_queue_discard(q)) |
848 | device->can_discard = 1; | 834 | device->can_discard = 1; |
849 | fs_devices->num_can_discard++; | ||
850 | } | ||
851 | 835 | ||
852 | device->bdev = bdev; | 836 | device->bdev = bdev; |
853 | device->in_fs_metadata = 0; | 837 | device->in_fs_metadata = 0; |
@@ -877,9 +861,7 @@ error_brelse: | |||
877 | } | 861 | } |
878 | fs_devices->seeding = seeding; | 862 | fs_devices->seeding = seeding; |
879 | fs_devices->opened = 1; | 863 | fs_devices->opened = 1; |
880 | fs_devices->latest_bdev = latest_bdev; | 864 | fs_devices->latest_bdev = latest_dev->bdev; |
881 | fs_devices->latest_devid = latest_devid; | ||
882 | fs_devices->latest_trans = latest_transid; | ||
883 | fs_devices->total_rw_bytes = 0; | 865 | fs_devices->total_rw_bytes = 0; |
884 | out: | 866 | out: |
885 | return ret; | 867 | return ret; |
@@ -1053,7 +1035,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | |||
1053 | if (key.objectid > device->devid) | 1035 | if (key.objectid > device->devid) |
1054 | break; | 1036 | break; |
1055 | 1037 | ||
1056 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | 1038 | if (key.type != BTRFS_DEV_EXTENT_KEY) |
1057 | goto next; | 1039 | goto next; |
1058 | 1040 | ||
1059 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 1041 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
@@ -1205,7 +1187,7 @@ again: | |||
1205 | if (key.objectid > device->devid) | 1187 | if (key.objectid > device->devid) |
1206 | break; | 1188 | break; |
1207 | 1189 | ||
1208 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | 1190 | if (key.type != BTRFS_DEV_EXTENT_KEY) |
1209 | goto next; | 1191 | goto next; |
1210 | 1192 | ||
1211 | if (key.offset > search_start) { | 1193 | if (key.offset > search_start) { |
@@ -1284,7 +1266,7 @@ out: | |||
1284 | 1266 | ||
1285 | static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | 1267 | static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, |
1286 | struct btrfs_device *device, | 1268 | struct btrfs_device *device, |
1287 | u64 start) | 1269 | u64 start, u64 *dev_extent_len) |
1288 | { | 1270 | { |
1289 | int ret; | 1271 | int ret; |
1290 | struct btrfs_path *path; | 1272 | struct btrfs_path *path; |
@@ -1326,13 +1308,8 @@ again: | |||
1326 | goto out; | 1308 | goto out; |
1327 | } | 1309 | } |
1328 | 1310 | ||
1329 | if (device->bytes_used > 0) { | 1311 | *dev_extent_len = btrfs_dev_extent_length(leaf, extent); |
1330 | u64 len = btrfs_dev_extent_length(leaf, extent); | 1312 | |
1331 | device->bytes_used -= len; | ||
1332 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1333 | root->fs_info->free_chunk_space += len; | ||
1334 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1335 | } | ||
1336 | ret = btrfs_del_item(trans, root, path); | 1313 | ret = btrfs_del_item(trans, root, path); |
1337 | if (ret) { | 1314 | if (ret) { |
1338 | btrfs_error(root->fs_info, ret, | 1315 | btrfs_error(root->fs_info, ret, |
@@ -1482,8 +1459,10 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, | |||
1482 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 1459 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
1483 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 1460 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
1484 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 1461 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
1485 | btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); | 1462 | btrfs_set_device_total_bytes(leaf, dev_item, |
1486 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 1463 | btrfs_device_get_disk_total_bytes(device)); |
1464 | btrfs_set_device_bytes_used(leaf, dev_item, | ||
1465 | btrfs_device_get_bytes_used(device)); | ||
1487 | btrfs_set_device_group(leaf, dev_item, 0); | 1466 | btrfs_set_device_group(leaf, dev_item, 0); |
1488 | btrfs_set_device_seek_speed(leaf, dev_item, 0); | 1467 | btrfs_set_device_seek_speed(leaf, dev_item, 0); |
1489 | btrfs_set_device_bandwidth(leaf, dev_item, 0); | 1468 | btrfs_set_device_bandwidth(leaf, dev_item, 0); |
@@ -1539,7 +1518,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1539 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1518 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1540 | key.type = BTRFS_DEV_ITEM_KEY; | 1519 | key.type = BTRFS_DEV_ITEM_KEY; |
1541 | key.offset = device->devid; | 1520 | key.offset = device->devid; |
1542 | lock_chunks(root); | ||
1543 | 1521 | ||
1544 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1522 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1545 | if (ret < 0) | 1523 | if (ret < 0) |
@@ -1555,7 +1533,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1555 | goto out; | 1533 | goto out; |
1556 | out: | 1534 | out: |
1557 | btrfs_free_path(path); | 1535 | btrfs_free_path(path); |
1558 | unlock_chunks(root); | ||
1559 | btrfs_commit_transaction(trans, root); | 1536 | btrfs_commit_transaction(trans, root); |
1560 | return ret; | 1537 | return ret; |
1561 | } | 1538 | } |
@@ -1671,8 +1648,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1671 | if (device->writeable) { | 1648 | if (device->writeable) { |
1672 | lock_chunks(root); | 1649 | lock_chunks(root); |
1673 | list_del_init(&device->dev_alloc_list); | 1650 | list_del_init(&device->dev_alloc_list); |
1651 | device->fs_devices->rw_devices--; | ||
1674 | unlock_chunks(root); | 1652 | unlock_chunks(root); |
1675 | root->fs_info->fs_devices->rw_devices--; | ||
1676 | clear_super = true; | 1653 | clear_super = true; |
1677 | } | 1654 | } |
1678 | 1655 | ||
@@ -1691,11 +1668,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1691 | if (ret) | 1668 | if (ret) |
1692 | goto error_undo; | 1669 | goto error_undo; |
1693 | 1670 | ||
1694 | spin_lock(&root->fs_info->free_chunk_lock); | ||
1695 | root->fs_info->free_chunk_space = device->total_bytes - | ||
1696 | device->bytes_used; | ||
1697 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
1698 | |||
1699 | device->in_fs_metadata = 0; | 1671 | device->in_fs_metadata = 0; |
1700 | btrfs_scrub_cancel_dev(root->fs_info, device); | 1672 | btrfs_scrub_cancel_dev(root->fs_info, device); |
1701 | 1673 | ||
@@ -1749,9 +1721,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1749 | fs_devices = fs_devices->seed; | 1721 | fs_devices = fs_devices->seed; |
1750 | } | 1722 | } |
1751 | cur_devices->seed = NULL; | 1723 | cur_devices->seed = NULL; |
1752 | lock_chunks(root); | ||
1753 | __btrfs_close_devices(cur_devices); | 1724 | __btrfs_close_devices(cur_devices); |
1754 | unlock_chunks(root); | ||
1755 | free_fs_devices(cur_devices); | 1725 | free_fs_devices(cur_devices); |
1756 | } | 1726 | } |
1757 | 1727 | ||
@@ -1824,8 +1794,8 @@ error_undo: | |||
1824 | lock_chunks(root); | 1794 | lock_chunks(root); |
1825 | list_add(&device->dev_alloc_list, | 1795 | list_add(&device->dev_alloc_list, |
1826 | &root->fs_info->fs_devices->alloc_list); | 1796 | &root->fs_info->fs_devices->alloc_list); |
1797 | device->fs_devices->rw_devices++; | ||
1827 | unlock_chunks(root); | 1798 | unlock_chunks(root); |
1828 | root->fs_info->fs_devices->rw_devices++; | ||
1829 | } | 1799 | } |
1830 | goto error_brelse; | 1800 | goto error_brelse; |
1831 | } | 1801 | } |
@@ -1833,29 +1803,57 @@ error_undo: | |||
1833 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | 1803 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, |
1834 | struct btrfs_device *srcdev) | 1804 | struct btrfs_device *srcdev) |
1835 | { | 1805 | { |
1806 | struct btrfs_fs_devices *fs_devices; | ||
1807 | |||
1836 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); | 1808 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); |
1837 | 1809 | ||
1810 | /* | ||
1811 | * in case of fs with no seed, srcdev->fs_devices will point | ||
1812 | * to fs_devices of fs_info. However when the dev being replaced is | ||
1813 | * a seed dev it will point to the seed's local fs_devices. In short | ||
1814 | * srcdev will have its correct fs_devices in both the cases. | ||
1815 | */ | ||
1816 | fs_devices = srcdev->fs_devices; | ||
1817 | |||
1838 | list_del_rcu(&srcdev->dev_list); | 1818 | list_del_rcu(&srcdev->dev_list); |
1839 | list_del_rcu(&srcdev->dev_alloc_list); | 1819 | list_del_rcu(&srcdev->dev_alloc_list); |
1840 | fs_info->fs_devices->num_devices--; | 1820 | fs_devices->num_devices--; |
1841 | if (srcdev->missing) { | 1821 | if (srcdev->missing) |
1842 | fs_info->fs_devices->missing_devices--; | 1822 | fs_devices->missing_devices--; |
1843 | fs_info->fs_devices->rw_devices++; | ||
1844 | } | ||
1845 | if (srcdev->can_discard) | ||
1846 | fs_info->fs_devices->num_can_discard--; | ||
1847 | if (srcdev->bdev) { | ||
1848 | fs_info->fs_devices->open_devices--; | ||
1849 | 1823 | ||
1850 | /* | 1824 | if (srcdev->writeable) { |
1851 | * zero out the old super if it is not writable | 1825 | fs_devices->rw_devices--; |
1852 | * (e.g. seed device) | 1826 | /* zero out the old super if it is writable */ |
1853 | */ | 1827 | btrfs_scratch_superblock(srcdev); |
1854 | if (srcdev->writeable) | ||
1855 | btrfs_scratch_superblock(srcdev); | ||
1856 | } | 1828 | } |
1857 | 1829 | ||
1830 | if (srcdev->bdev) | ||
1831 | fs_devices->open_devices--; | ||
1832 | |||
1858 | call_rcu(&srcdev->rcu, free_device); | 1833 | call_rcu(&srcdev->rcu, free_device); |
1834 | |||
1835 | /* | ||
1836 | * unless fs_devices is seed fs, num_devices shouldn't go | ||
1837 | * zero | ||
1838 | */ | ||
1839 | BUG_ON(!fs_devices->num_devices && !fs_devices->seeding); | ||
1840 | |||
1841 | /* if this is no devs we rather delete the fs_devices */ | ||
1842 | if (!fs_devices->num_devices) { | ||
1843 | struct btrfs_fs_devices *tmp_fs_devices; | ||
1844 | |||
1845 | tmp_fs_devices = fs_info->fs_devices; | ||
1846 | while (tmp_fs_devices) { | ||
1847 | if (tmp_fs_devices->seed == fs_devices) { | ||
1848 | tmp_fs_devices->seed = fs_devices->seed; | ||
1849 | break; | ||
1850 | } | ||
1851 | tmp_fs_devices = tmp_fs_devices->seed; | ||
1852 | } | ||
1853 | fs_devices->seed = NULL; | ||
1854 | __btrfs_close_devices(fs_devices); | ||
1855 | free_fs_devices(fs_devices); | ||
1856 | } | ||
1859 | } | 1857 | } |
1860 | 1858 | ||
1861 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | 1859 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, |
@@ -1863,6 +1861,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
1863 | { | 1861 | { |
1864 | struct btrfs_device *next_device; | 1862 | struct btrfs_device *next_device; |
1865 | 1863 | ||
1864 | mutex_lock(&uuid_mutex); | ||
1866 | WARN_ON(!tgtdev); | 1865 | WARN_ON(!tgtdev); |
1867 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | 1866 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
1868 | if (tgtdev->bdev) { | 1867 | if (tgtdev->bdev) { |
@@ -1870,8 +1869,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
1870 | fs_info->fs_devices->open_devices--; | 1869 | fs_info->fs_devices->open_devices--; |
1871 | } | 1870 | } |
1872 | fs_info->fs_devices->num_devices--; | 1871 | fs_info->fs_devices->num_devices--; |
1873 | if (tgtdev->can_discard) | ||
1874 | fs_info->fs_devices->num_can_discard++; | ||
1875 | 1872 | ||
1876 | next_device = list_entry(fs_info->fs_devices->devices.next, | 1873 | next_device = list_entry(fs_info->fs_devices->devices.next, |
1877 | struct btrfs_device, dev_list); | 1874 | struct btrfs_device, dev_list); |
@@ -1884,6 +1881,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | |||
1884 | call_rcu(&tgtdev->rcu, free_device); | 1881 | call_rcu(&tgtdev->rcu, free_device); |
1885 | 1882 | ||
1886 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 1883 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
1884 | mutex_unlock(&uuid_mutex); | ||
1887 | } | 1885 | } |
1888 | 1886 | ||
1889 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | 1887 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, |
@@ -1982,17 +1980,17 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) | |||
1982 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 1980 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
1983 | list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, | 1981 | list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, |
1984 | synchronize_rcu); | 1982 | synchronize_rcu); |
1983 | list_for_each_entry(device, &seed_devices->devices, dev_list) | ||
1984 | device->fs_devices = seed_devices; | ||
1985 | 1985 | ||
1986 | lock_chunks(root); | ||
1986 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); | 1987 | list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); |
1987 | list_for_each_entry(device, &seed_devices->devices, dev_list) { | 1988 | unlock_chunks(root); |
1988 | device->fs_devices = seed_devices; | ||
1989 | } | ||
1990 | 1989 | ||
1991 | fs_devices->seeding = 0; | 1990 | fs_devices->seeding = 0; |
1992 | fs_devices->num_devices = 0; | 1991 | fs_devices->num_devices = 0; |
1993 | fs_devices->open_devices = 0; | 1992 | fs_devices->open_devices = 0; |
1994 | fs_devices->missing_devices = 0; | 1993 | fs_devices->missing_devices = 0; |
1995 | fs_devices->num_can_discard = 0; | ||
1996 | fs_devices->rotating = 0; | 1994 | fs_devices->rotating = 0; |
1997 | fs_devices->seed = seed_devices; | 1995 | fs_devices->seed = seed_devices; |
1998 | 1996 | ||
@@ -2092,7 +2090,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2092 | struct list_head *devices; | 2090 | struct list_head *devices; |
2093 | struct super_block *sb = root->fs_info->sb; | 2091 | struct super_block *sb = root->fs_info->sb; |
2094 | struct rcu_string *name; | 2092 | struct rcu_string *name; |
2095 | u64 total_bytes; | 2093 | u64 tmp; |
2096 | int seeding_dev = 0; | 2094 | int seeding_dev = 0; |
2097 | int ret = 0; | 2095 | int ret = 0; |
2098 | 2096 | ||
@@ -2148,8 +2146,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2148 | goto error; | 2146 | goto error; |
2149 | } | 2147 | } |
2150 | 2148 | ||
2151 | lock_chunks(root); | ||
2152 | |||
2153 | q = bdev_get_queue(bdev); | 2149 | q = bdev_get_queue(bdev); |
2154 | if (blk_queue_discard(q)) | 2150 | if (blk_queue_discard(q)) |
2155 | device->can_discard = 1; | 2151 | device->can_discard = 1; |
@@ -2160,6 +2156,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2160 | device->sector_size = root->sectorsize; | 2156 | device->sector_size = root->sectorsize; |
2161 | device->total_bytes = i_size_read(bdev->bd_inode); | 2157 | device->total_bytes = i_size_read(bdev->bd_inode); |
2162 | device->disk_total_bytes = device->total_bytes; | 2158 | device->disk_total_bytes = device->total_bytes; |
2159 | device->commit_total_bytes = device->total_bytes; | ||
2163 | device->dev_root = root->fs_info->dev_root; | 2160 | device->dev_root = root->fs_info->dev_root; |
2164 | device->bdev = bdev; | 2161 | device->bdev = bdev; |
2165 | device->in_fs_metadata = 1; | 2162 | device->in_fs_metadata = 1; |
@@ -2177,6 +2174,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2177 | device->fs_devices = root->fs_info->fs_devices; | 2174 | device->fs_devices = root->fs_info->fs_devices; |
2178 | 2175 | ||
2179 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2176 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2177 | lock_chunks(root); | ||
2180 | list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); | 2178 | list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices); |
2181 | list_add(&device->dev_alloc_list, | 2179 | list_add(&device->dev_alloc_list, |
2182 | &root->fs_info->fs_devices->alloc_list); | 2180 | &root->fs_info->fs_devices->alloc_list); |
@@ -2184,8 +2182,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2184 | root->fs_info->fs_devices->open_devices++; | 2182 | root->fs_info->fs_devices->open_devices++; |
2185 | root->fs_info->fs_devices->rw_devices++; | 2183 | root->fs_info->fs_devices->rw_devices++; |
2186 | root->fs_info->fs_devices->total_devices++; | 2184 | root->fs_info->fs_devices->total_devices++; |
2187 | if (device->can_discard) | ||
2188 | root->fs_info->fs_devices->num_can_discard++; | ||
2189 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; | 2185 | root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; |
2190 | 2186 | ||
2191 | spin_lock(&root->fs_info->free_chunk_lock); | 2187 | spin_lock(&root->fs_info->free_chunk_lock); |
@@ -2195,26 +2191,45 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2195 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) | 2191 | if (!blk_queue_nonrot(bdev_get_queue(bdev))) |
2196 | root->fs_info->fs_devices->rotating = 1; | 2192 | root->fs_info->fs_devices->rotating = 1; |
2197 | 2193 | ||
2198 | total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy); | 2194 | tmp = btrfs_super_total_bytes(root->fs_info->super_copy); |
2199 | btrfs_set_super_total_bytes(root->fs_info->super_copy, | 2195 | btrfs_set_super_total_bytes(root->fs_info->super_copy, |
2200 | total_bytes + device->total_bytes); | 2196 | tmp + device->total_bytes); |
2201 | 2197 | ||
2202 | total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); | 2198 | tmp = btrfs_super_num_devices(root->fs_info->super_copy); |
2203 | btrfs_set_super_num_devices(root->fs_info->super_copy, | 2199 | btrfs_set_super_num_devices(root->fs_info->super_copy, |
2204 | total_bytes + 1); | 2200 | tmp + 1); |
2205 | 2201 | ||
2206 | /* add sysfs device entry */ | 2202 | /* add sysfs device entry */ |
2207 | btrfs_kobj_add_device(root->fs_info, device); | 2203 | btrfs_kobj_add_device(root->fs_info, device); |
2208 | 2204 | ||
2205 | /* | ||
2206 | * we've got more storage, clear any full flags on the space | ||
2207 | * infos | ||
2208 | */ | ||
2209 | btrfs_clear_space_info_full(root->fs_info); | ||
2210 | |||
2211 | unlock_chunks(root); | ||
2209 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2212 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
2210 | 2213 | ||
2211 | if (seeding_dev) { | 2214 | if (seeding_dev) { |
2212 | char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; | 2215 | lock_chunks(root); |
2213 | ret = init_first_rw_device(trans, root, device); | 2216 | ret = init_first_rw_device(trans, root, device); |
2217 | unlock_chunks(root); | ||
2214 | if (ret) { | 2218 | if (ret) { |
2215 | btrfs_abort_transaction(trans, root, ret); | 2219 | btrfs_abort_transaction(trans, root, ret); |
2216 | goto error_trans; | 2220 | goto error_trans; |
2217 | } | 2221 | } |
2222 | } | ||
2223 | |||
2224 | ret = btrfs_add_device(trans, root, device); | ||
2225 | if (ret) { | ||
2226 | btrfs_abort_transaction(trans, root, ret); | ||
2227 | goto error_trans; | ||
2228 | } | ||
2229 | |||
2230 | if (seeding_dev) { | ||
2231 | char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; | ||
2232 | |||
2218 | ret = btrfs_finish_sprout(trans, root); | 2233 | ret = btrfs_finish_sprout(trans, root); |
2219 | if (ret) { | 2234 | if (ret) { |
2220 | btrfs_abort_transaction(trans, root, ret); | 2235 | btrfs_abort_transaction(trans, root, ret); |
@@ -2228,21 +2243,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2228 | root->fs_info->fsid); | 2243 | root->fs_info->fsid); |
2229 | if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) | 2244 | if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) |
2230 | goto error_trans; | 2245 | goto error_trans; |
2231 | } else { | ||
2232 | ret = btrfs_add_device(trans, root, device); | ||
2233 | if (ret) { | ||
2234 | btrfs_abort_transaction(trans, root, ret); | ||
2235 | goto error_trans; | ||
2236 | } | ||
2237 | } | 2246 | } |
2238 | 2247 | ||
2239 | /* | ||
2240 | * we've got more storage, clear any full flags on the space | ||
2241 | * infos | ||
2242 | */ | ||
2243 | btrfs_clear_space_info_full(root->fs_info); | ||
2244 | |||
2245 | unlock_chunks(root); | ||
2246 | root->fs_info->num_tolerated_disk_barrier_failures = | 2248 | root->fs_info->num_tolerated_disk_barrier_failures = |
2247 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); | 2249 | btrfs_calc_num_tolerated_disk_barrier_failures(root->fs_info); |
2248 | ret = btrfs_commit_transaction(trans, root); | 2250 | ret = btrfs_commit_transaction(trans, root); |
@@ -2274,7 +2276,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2274 | return ret; | 2276 | return ret; |
2275 | 2277 | ||
2276 | error_trans: | 2278 | error_trans: |
2277 | unlock_chunks(root); | ||
2278 | btrfs_end_transaction(trans, root); | 2279 | btrfs_end_transaction(trans, root); |
2279 | rcu_string_free(device->name); | 2280 | rcu_string_free(device->name); |
2280 | btrfs_kobj_rm_device(root->fs_info, device); | 2281 | btrfs_kobj_rm_device(root->fs_info, device); |
@@ -2289,6 +2290,7 @@ error: | |||
2289 | } | 2290 | } |
2290 | 2291 | ||
2291 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | 2292 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, |
2293 | struct btrfs_device *srcdev, | ||
2292 | struct btrfs_device **device_out) | 2294 | struct btrfs_device **device_out) |
2293 | { | 2295 | { |
2294 | struct request_queue *q; | 2296 | struct request_queue *q; |
@@ -2301,24 +2303,38 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | |||
2301 | int ret = 0; | 2303 | int ret = 0; |
2302 | 2304 | ||
2303 | *device_out = NULL; | 2305 | *device_out = NULL; |
2304 | if (fs_info->fs_devices->seeding) | 2306 | if (fs_info->fs_devices->seeding) { |
2307 | btrfs_err(fs_info, "the filesystem is a seed filesystem!"); | ||
2305 | return -EINVAL; | 2308 | return -EINVAL; |
2309 | } | ||
2306 | 2310 | ||
2307 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | 2311 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, |
2308 | fs_info->bdev_holder); | 2312 | fs_info->bdev_holder); |
2309 | if (IS_ERR(bdev)) | 2313 | if (IS_ERR(bdev)) { |
2314 | btrfs_err(fs_info, "target device %s is invalid!", device_path); | ||
2310 | return PTR_ERR(bdev); | 2315 | return PTR_ERR(bdev); |
2316 | } | ||
2311 | 2317 | ||
2312 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 2318 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
2313 | 2319 | ||
2314 | devices = &fs_info->fs_devices->devices; | 2320 | devices = &fs_info->fs_devices->devices; |
2315 | list_for_each_entry(device, devices, dev_list) { | 2321 | list_for_each_entry(device, devices, dev_list) { |
2316 | if (device->bdev == bdev) { | 2322 | if (device->bdev == bdev) { |
2323 | btrfs_err(fs_info, "target device is in the filesystem!"); | ||
2317 | ret = -EEXIST; | 2324 | ret = -EEXIST; |
2318 | goto error; | 2325 | goto error; |
2319 | } | 2326 | } |
2320 | } | 2327 | } |
2321 | 2328 | ||
2329 | |||
2330 | if (i_size_read(bdev->bd_inode) < | ||
2331 | btrfs_device_get_total_bytes(srcdev)) { | ||
2332 | btrfs_err(fs_info, "target device is smaller than source device!"); | ||
2333 | ret = -EINVAL; | ||
2334 | goto error; | ||
2335 | } | ||
2336 | |||
2337 | |||
2322 | device = btrfs_alloc_device(NULL, &devid, NULL); | 2338 | device = btrfs_alloc_device(NULL, &devid, NULL); |
2323 | if (IS_ERR(device)) { | 2339 | if (IS_ERR(device)) { |
2324 | ret = PTR_ERR(device); | 2340 | ret = PTR_ERR(device); |
@@ -2342,8 +2358,12 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | |||
2342 | device->io_width = root->sectorsize; | 2358 | device->io_width = root->sectorsize; |
2343 | device->io_align = root->sectorsize; | 2359 | device->io_align = root->sectorsize; |
2344 | device->sector_size = root->sectorsize; | 2360 | device->sector_size = root->sectorsize; |
2345 | device->total_bytes = i_size_read(bdev->bd_inode); | 2361 | device->total_bytes = btrfs_device_get_total_bytes(srcdev); |
2346 | device->disk_total_bytes = device->total_bytes; | 2362 | device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev); |
2363 | device->bytes_used = btrfs_device_get_bytes_used(srcdev); | ||
2364 | ASSERT(list_empty(&srcdev->resized_list)); | ||
2365 | device->commit_total_bytes = srcdev->commit_total_bytes; | ||
2366 | device->commit_bytes_used = device->bytes_used; | ||
2347 | device->dev_root = fs_info->dev_root; | 2367 | device->dev_root = fs_info->dev_root; |
2348 | device->bdev = bdev; | 2368 | device->bdev = bdev; |
2349 | device->in_fs_metadata = 1; | 2369 | device->in_fs_metadata = 1; |
@@ -2355,8 +2375,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | |||
2355 | list_add(&device->dev_list, &fs_info->fs_devices->devices); | 2375 | list_add(&device->dev_list, &fs_info->fs_devices->devices); |
2356 | fs_info->fs_devices->num_devices++; | 2376 | fs_info->fs_devices->num_devices++; |
2357 | fs_info->fs_devices->open_devices++; | 2377 | fs_info->fs_devices->open_devices++; |
2358 | if (device->can_discard) | ||
2359 | fs_info->fs_devices->num_can_discard++; | ||
2360 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2378 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
2361 | 2379 | ||
2362 | *device_out = device; | 2380 | *device_out = device; |
@@ -2415,8 +2433,10 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | |||
2415 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 2433 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
2416 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 2434 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
2417 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 2435 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
2418 | btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); | 2436 | btrfs_set_device_total_bytes(leaf, dev_item, |
2419 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 2437 | btrfs_device_get_disk_total_bytes(device)); |
2438 | btrfs_set_device_bytes_used(leaf, dev_item, | ||
2439 | btrfs_device_get_bytes_used(device)); | ||
2420 | btrfs_mark_buffer_dirty(leaf); | 2440 | btrfs_mark_buffer_dirty(leaf); |
2421 | 2441 | ||
2422 | out: | 2442 | out: |
@@ -2424,40 +2444,44 @@ out: | |||
2424 | return ret; | 2444 | return ret; |
2425 | } | 2445 | } |
2426 | 2446 | ||
2427 | static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | 2447 | int btrfs_grow_device(struct btrfs_trans_handle *trans, |
2428 | struct btrfs_device *device, u64 new_size) | 2448 | struct btrfs_device *device, u64 new_size) |
2429 | { | 2449 | { |
2430 | struct btrfs_super_block *super_copy = | 2450 | struct btrfs_super_block *super_copy = |
2431 | device->dev_root->fs_info->super_copy; | 2451 | device->dev_root->fs_info->super_copy; |
2432 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2452 | struct btrfs_fs_devices *fs_devices; |
2433 | u64 diff = new_size - device->total_bytes; | 2453 | u64 old_total; |
2454 | u64 diff; | ||
2434 | 2455 | ||
2435 | if (!device->writeable) | 2456 | if (!device->writeable) |
2436 | return -EACCES; | 2457 | return -EACCES; |
2458 | |||
2459 | lock_chunks(device->dev_root); | ||
2460 | old_total = btrfs_super_total_bytes(super_copy); | ||
2461 | diff = new_size - device->total_bytes; | ||
2462 | |||
2437 | if (new_size <= device->total_bytes || | 2463 | if (new_size <= device->total_bytes || |
2438 | device->is_tgtdev_for_dev_replace) | 2464 | device->is_tgtdev_for_dev_replace) { |
2465 | unlock_chunks(device->dev_root); | ||
2439 | return -EINVAL; | 2466 | return -EINVAL; |
2467 | } | ||
2468 | |||
2469 | fs_devices = device->dev_root->fs_info->fs_devices; | ||
2440 | 2470 | ||
2441 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | 2471 | btrfs_set_super_total_bytes(super_copy, old_total + diff); |
2442 | device->fs_devices->total_rw_bytes += diff; | 2472 | device->fs_devices->total_rw_bytes += diff; |
2443 | 2473 | ||
2444 | device->total_bytes = new_size; | 2474 | btrfs_device_set_total_bytes(device, new_size); |
2445 | device->disk_total_bytes = new_size; | 2475 | btrfs_device_set_disk_total_bytes(device, new_size); |
2446 | btrfs_clear_space_info_full(device->dev_root->fs_info); | 2476 | btrfs_clear_space_info_full(device->dev_root->fs_info); |
2477 | if (list_empty(&device->resized_list)) | ||
2478 | list_add_tail(&device->resized_list, | ||
2479 | &fs_devices->resized_devices); | ||
2480 | unlock_chunks(device->dev_root); | ||
2447 | 2481 | ||
2448 | return btrfs_update_device(trans, device); | 2482 | return btrfs_update_device(trans, device); |
2449 | } | 2483 | } |
2450 | 2484 | ||
2451 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | ||
2452 | struct btrfs_device *device, u64 new_size) | ||
2453 | { | ||
2454 | int ret; | ||
2455 | lock_chunks(device->dev_root); | ||
2456 | ret = __btrfs_grow_device(trans, device, new_size); | ||
2457 | unlock_chunks(device->dev_root); | ||
2458 | return ret; | ||
2459 | } | ||
2460 | |||
2461 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | 2485 | static int btrfs_free_chunk(struct btrfs_trans_handle *trans, |
2462 | struct btrfs_root *root, | 2486 | struct btrfs_root *root, |
2463 | u64 chunk_tree, u64 chunk_objectid, | 2487 | u64 chunk_tree, u64 chunk_objectid, |
@@ -2509,6 +2533,7 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | |||
2509 | u32 cur; | 2533 | u32 cur; |
2510 | struct btrfs_key key; | 2534 | struct btrfs_key key; |
2511 | 2535 | ||
2536 | lock_chunks(root); | ||
2512 | array_size = btrfs_super_sys_array_size(super_copy); | 2537 | array_size = btrfs_super_sys_array_size(super_copy); |
2513 | 2538 | ||
2514 | ptr = super_copy->sys_chunk_array; | 2539 | ptr = super_copy->sys_chunk_array; |
@@ -2538,79 +2563,95 @@ static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 | |||
2538 | cur += len; | 2563 | cur += len; |
2539 | } | 2564 | } |
2540 | } | 2565 | } |
2566 | unlock_chunks(root); | ||
2541 | return ret; | 2567 | return ret; |
2542 | } | 2568 | } |
2543 | 2569 | ||
2544 | static int btrfs_relocate_chunk(struct btrfs_root *root, | 2570 | int btrfs_remove_chunk(struct btrfs_trans_handle *trans, |
2545 | u64 chunk_tree, u64 chunk_objectid, | 2571 | struct btrfs_root *root, u64 chunk_offset) |
2546 | u64 chunk_offset) | ||
2547 | { | 2572 | { |
2548 | struct extent_map_tree *em_tree; | 2573 | struct extent_map_tree *em_tree; |
2549 | struct btrfs_root *extent_root; | ||
2550 | struct btrfs_trans_handle *trans; | ||
2551 | struct extent_map *em; | 2574 | struct extent_map *em; |
2575 | struct btrfs_root *extent_root = root->fs_info->extent_root; | ||
2552 | struct map_lookup *map; | 2576 | struct map_lookup *map; |
2553 | int ret; | 2577 | u64 dev_extent_len = 0; |
2554 | int i; | 2578 | u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
2579 | u64 chunk_tree = root->fs_info->chunk_root->objectid; | ||
2580 | int i, ret = 0; | ||
2555 | 2581 | ||
2582 | /* Just in case */ | ||
2556 | root = root->fs_info->chunk_root; | 2583 | root = root->fs_info->chunk_root; |
2557 | extent_root = root->fs_info->extent_root; | ||
2558 | em_tree = &root->fs_info->mapping_tree.map_tree; | 2584 | em_tree = &root->fs_info->mapping_tree.map_tree; |
2559 | 2585 | ||
2560 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
2561 | if (ret) | ||
2562 | return -ENOSPC; | ||
2563 | |||
2564 | /* step one, relocate all the extents inside this chunk */ | ||
2565 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | ||
2566 | if (ret) | ||
2567 | return ret; | ||
2568 | |||
2569 | trans = btrfs_start_transaction(root, 0); | ||
2570 | if (IS_ERR(trans)) { | ||
2571 | ret = PTR_ERR(trans); | ||
2572 | btrfs_std_error(root->fs_info, ret); | ||
2573 | return ret; | ||
2574 | } | ||
2575 | |||
2576 | lock_chunks(root); | ||
2577 | |||
2578 | /* | ||
2579 | * step two, delete the device extents and the | ||
2580 | * chunk tree entries | ||
2581 | */ | ||
2582 | read_lock(&em_tree->lock); | 2586 | read_lock(&em_tree->lock); |
2583 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 2587 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
2584 | read_unlock(&em_tree->lock); | 2588 | read_unlock(&em_tree->lock); |
2585 | 2589 | ||
2586 | BUG_ON(!em || em->start > chunk_offset || | 2590 | if (!em || em->start > chunk_offset || |
2587 | em->start + em->len < chunk_offset); | 2591 | em->start + em->len < chunk_offset) { |
2592 | /* | ||
2593 | * This is a logic error, but we don't want to just rely on the | ||
2594 | * user having built with ASSERT enabled, so if ASSERT doens't | ||
2595 | * do anything we still error out. | ||
2596 | */ | ||
2597 | ASSERT(0); | ||
2598 | if (em) | ||
2599 | free_extent_map(em); | ||
2600 | return -EINVAL; | ||
2601 | } | ||
2588 | map = (struct map_lookup *)em->bdev; | 2602 | map = (struct map_lookup *)em->bdev; |
2589 | 2603 | ||
2590 | for (i = 0; i < map->num_stripes; i++) { | 2604 | for (i = 0; i < map->num_stripes; i++) { |
2591 | ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, | 2605 | struct btrfs_device *device = map->stripes[i].dev; |
2592 | map->stripes[i].physical); | 2606 | ret = btrfs_free_dev_extent(trans, device, |
2593 | BUG_ON(ret); | 2607 | map->stripes[i].physical, |
2608 | &dev_extent_len); | ||
2609 | if (ret) { | ||
2610 | btrfs_abort_transaction(trans, root, ret); | ||
2611 | goto out; | ||
2612 | } | ||
2613 | |||
2614 | if (device->bytes_used > 0) { | ||
2615 | lock_chunks(root); | ||
2616 | btrfs_device_set_bytes_used(device, | ||
2617 | device->bytes_used - dev_extent_len); | ||
2618 | spin_lock(&root->fs_info->free_chunk_lock); | ||
2619 | root->fs_info->free_chunk_space += dev_extent_len; | ||
2620 | spin_unlock(&root->fs_info->free_chunk_lock); | ||
2621 | btrfs_clear_space_info_full(root->fs_info); | ||
2622 | unlock_chunks(root); | ||
2623 | } | ||
2594 | 2624 | ||
2595 | if (map->stripes[i].dev) { | 2625 | if (map->stripes[i].dev) { |
2596 | ret = btrfs_update_device(trans, map->stripes[i].dev); | 2626 | ret = btrfs_update_device(trans, map->stripes[i].dev); |
2597 | BUG_ON(ret); | 2627 | if (ret) { |
2628 | btrfs_abort_transaction(trans, root, ret); | ||
2629 | goto out; | ||
2630 | } | ||
2598 | } | 2631 | } |
2599 | } | 2632 | } |
2600 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, | 2633 | ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, |
2601 | chunk_offset); | 2634 | chunk_offset); |
2602 | 2635 | if (ret) { | |
2603 | BUG_ON(ret); | 2636 | btrfs_abort_transaction(trans, root, ret); |
2637 | goto out; | ||
2638 | } | ||
2604 | 2639 | ||
2605 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); | 2640 | trace_btrfs_chunk_free(root, map, chunk_offset, em->len); |
2606 | 2641 | ||
2607 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { | 2642 | if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { |
2608 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); | 2643 | ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); |
2609 | BUG_ON(ret); | 2644 | if (ret) { |
2645 | btrfs_abort_transaction(trans, root, ret); | ||
2646 | goto out; | ||
2647 | } | ||
2610 | } | 2648 | } |
2611 | 2649 | ||
2612 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 2650 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
2613 | BUG_ON(ret); | 2651 | if (ret) { |
2652 | btrfs_abort_transaction(trans, extent_root, ret); | ||
2653 | goto out; | ||
2654 | } | ||
2614 | 2655 | ||
2615 | write_lock(&em_tree->lock); | 2656 | write_lock(&em_tree->lock); |
2616 | remove_extent_mapping(em_tree, em); | 2657 | remove_extent_mapping(em_tree, em); |
@@ -2618,12 +2659,46 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
2618 | 2659 | ||
2619 | /* once for the tree */ | 2660 | /* once for the tree */ |
2620 | free_extent_map(em); | 2661 | free_extent_map(em); |
2662 | out: | ||
2621 | /* once for us */ | 2663 | /* once for us */ |
2622 | free_extent_map(em); | 2664 | free_extent_map(em); |
2665 | return ret; | ||
2666 | } | ||
2623 | 2667 | ||
2624 | unlock_chunks(root); | 2668 | static int btrfs_relocate_chunk(struct btrfs_root *root, |
2669 | u64 chunk_tree, u64 chunk_objectid, | ||
2670 | u64 chunk_offset) | ||
2671 | { | ||
2672 | struct btrfs_root *extent_root; | ||
2673 | struct btrfs_trans_handle *trans; | ||
2674 | int ret; | ||
2675 | |||
2676 | root = root->fs_info->chunk_root; | ||
2677 | extent_root = root->fs_info->extent_root; | ||
2678 | |||
2679 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
2680 | if (ret) | ||
2681 | return -ENOSPC; | ||
2682 | |||
2683 | /* step one, relocate all the extents inside this chunk */ | ||
2684 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | ||
2685 | if (ret) | ||
2686 | return ret; | ||
2687 | |||
2688 | trans = btrfs_start_transaction(root, 0); | ||
2689 | if (IS_ERR(trans)) { | ||
2690 | ret = PTR_ERR(trans); | ||
2691 | btrfs_std_error(root->fs_info, ret); | ||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2695 | /* | ||
2696 | * step two, delete the device extents and the | ||
2697 | * chunk tree entries | ||
2698 | */ | ||
2699 | ret = btrfs_remove_chunk(trans, root, chunk_offset); | ||
2625 | btrfs_end_transaction(trans, root); | 2700 | btrfs_end_transaction(trans, root); |
2626 | return 0; | 2701 | return ret; |
2627 | } | 2702 | } |
2628 | 2703 | ||
2629 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | 2704 | static int btrfs_relocate_sys_chunks(struct btrfs_root *root) |
@@ -2676,8 +2751,8 @@ again: | |||
2676 | found_key.offset); | 2751 | found_key.offset); |
2677 | if (ret == -ENOSPC) | 2752 | if (ret == -ENOSPC) |
2678 | failed++; | 2753 | failed++; |
2679 | else if (ret) | 2754 | else |
2680 | BUG(); | 2755 | BUG_ON(ret); |
2681 | } | 2756 | } |
2682 | 2757 | ||
2683 | if (found_key.offset == 0) | 2758 | if (found_key.offset == 0) |
@@ -3084,11 +3159,12 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
3084 | /* step one make some room on all the devices */ | 3159 | /* step one make some room on all the devices */ |
3085 | devices = &fs_info->fs_devices->devices; | 3160 | devices = &fs_info->fs_devices->devices; |
3086 | list_for_each_entry(device, devices, dev_list) { | 3161 | list_for_each_entry(device, devices, dev_list) { |
3087 | old_size = device->total_bytes; | 3162 | old_size = btrfs_device_get_total_bytes(device); |
3088 | size_to_free = div_factor(old_size, 1); | 3163 | size_to_free = div_factor(old_size, 1); |
3089 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); | 3164 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); |
3090 | if (!device->writeable || | 3165 | if (!device->writeable || |
3091 | device->total_bytes - device->bytes_used > size_to_free || | 3166 | btrfs_device_get_total_bytes(device) - |
3167 | btrfs_device_get_bytes_used(device) > size_to_free || | ||
3092 | device->is_tgtdev_for_dev_replace) | 3168 | device->is_tgtdev_for_dev_replace) |
3093 | continue; | 3169 | continue; |
3094 | 3170 | ||
@@ -3643,8 +3719,6 @@ static int btrfs_uuid_scan_kthread(void *data) | |||
3643 | max_key.type = BTRFS_ROOT_ITEM_KEY; | 3719 | max_key.type = BTRFS_ROOT_ITEM_KEY; |
3644 | max_key.offset = (u64)-1; | 3720 | max_key.offset = (u64)-1; |
3645 | 3721 | ||
3646 | path->keep_locks = 1; | ||
3647 | |||
3648 | while (1) { | 3722 | while (1) { |
3649 | ret = btrfs_search_forward(root, &key, path, 0); | 3723 | ret = btrfs_search_forward(root, &key, path, 0); |
3650 | if (ret) { | 3724 | if (ret) { |
@@ -3896,8 +3970,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
3896 | struct btrfs_key key; | 3970 | struct btrfs_key key; |
3897 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; | 3971 | struct btrfs_super_block *super_copy = root->fs_info->super_copy; |
3898 | u64 old_total = btrfs_super_total_bytes(super_copy); | 3972 | u64 old_total = btrfs_super_total_bytes(super_copy); |
3899 | u64 old_size = device->total_bytes; | 3973 | u64 old_size = btrfs_device_get_total_bytes(device); |
3900 | u64 diff = device->total_bytes - new_size; | 3974 | u64 diff = old_size - new_size; |
3901 | 3975 | ||
3902 | if (device->is_tgtdev_for_dev_replace) | 3976 | if (device->is_tgtdev_for_dev_replace) |
3903 | return -EINVAL; | 3977 | return -EINVAL; |
@@ -3910,7 +3984,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
3910 | 3984 | ||
3911 | lock_chunks(root); | 3985 | lock_chunks(root); |
3912 | 3986 | ||
3913 | device->total_bytes = new_size; | 3987 | btrfs_device_set_total_bytes(device, new_size); |
3914 | if (device->writeable) { | 3988 | if (device->writeable) { |
3915 | device->fs_devices->total_rw_bytes -= diff; | 3989 | device->fs_devices->total_rw_bytes -= diff; |
3916 | spin_lock(&root->fs_info->free_chunk_lock); | 3990 | spin_lock(&root->fs_info->free_chunk_lock); |
@@ -3976,7 +4050,7 @@ again: | |||
3976 | ret = -ENOSPC; | 4050 | ret = -ENOSPC; |
3977 | lock_chunks(root); | 4051 | lock_chunks(root); |
3978 | 4052 | ||
3979 | device->total_bytes = old_size; | 4053 | btrfs_device_set_total_bytes(device, old_size); |
3980 | if (device->writeable) | 4054 | if (device->writeable) |
3981 | device->fs_devices->total_rw_bytes += diff; | 4055 | device->fs_devices->total_rw_bytes += diff; |
3982 | spin_lock(&root->fs_info->free_chunk_lock); | 4056 | spin_lock(&root->fs_info->free_chunk_lock); |
@@ -3994,18 +4068,17 @@ again: | |||
3994 | } | 4068 | } |
3995 | 4069 | ||
3996 | lock_chunks(root); | 4070 | lock_chunks(root); |
4071 | btrfs_device_set_disk_total_bytes(device, new_size); | ||
4072 | if (list_empty(&device->resized_list)) | ||
4073 | list_add_tail(&device->resized_list, | ||
4074 | &root->fs_info->fs_devices->resized_devices); | ||
3997 | 4075 | ||
3998 | device->disk_total_bytes = new_size; | ||
3999 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
4000 | ret = btrfs_update_device(trans, device); | ||
4001 | if (ret) { | ||
4002 | unlock_chunks(root); | ||
4003 | btrfs_end_transaction(trans, root); | ||
4004 | goto done; | ||
4005 | } | ||
4006 | WARN_ON(diff > old_total); | 4076 | WARN_ON(diff > old_total); |
4007 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | 4077 | btrfs_set_super_total_bytes(super_copy, old_total - diff); |
4008 | unlock_chunks(root); | 4078 | unlock_chunks(root); |
4079 | |||
4080 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
4081 | ret = btrfs_update_device(trans, device); | ||
4009 | btrfs_end_transaction(trans, root); | 4082 | btrfs_end_transaction(trans, root); |
4010 | done: | 4083 | done: |
4011 | btrfs_free_path(path); | 4084 | btrfs_free_path(path); |
@@ -4021,10 +4094,13 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, | |||
4021 | u32 array_size; | 4094 | u32 array_size; |
4022 | u8 *ptr; | 4095 | u8 *ptr; |
4023 | 4096 | ||
4097 | lock_chunks(root); | ||
4024 | array_size = btrfs_super_sys_array_size(super_copy); | 4098 | array_size = btrfs_super_sys_array_size(super_copy); |
4025 | if (array_size + item_size + sizeof(disk_key) | 4099 | if (array_size + item_size + sizeof(disk_key) |
4026 | > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) | 4100 | > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { |
4101 | unlock_chunks(root); | ||
4027 | return -EFBIG; | 4102 | return -EFBIG; |
4103 | } | ||
4028 | 4104 | ||
4029 | ptr = super_copy->sys_chunk_array + array_size; | 4105 | ptr = super_copy->sys_chunk_array + array_size; |
4030 | btrfs_cpu_key_to_disk(&disk_key, key); | 4106 | btrfs_cpu_key_to_disk(&disk_key, key); |
@@ -4033,6 +4109,8 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, | |||
4033 | memcpy(ptr, chunk, item_size); | 4109 | memcpy(ptr, chunk, item_size); |
4034 | item_size += sizeof(disk_key); | 4110 | item_size += sizeof(disk_key); |
4035 | btrfs_set_super_sys_array_size(super_copy, array_size + item_size); | 4111 | btrfs_set_super_sys_array_size(super_copy, array_size + item_size); |
4112 | unlock_chunks(root); | ||
4113 | |||
4036 | return 0; | 4114 | return 0; |
4037 | } | 4115 | } |
4038 | 4116 | ||
@@ -4402,6 +4480,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4402 | if (ret) | 4480 | if (ret) |
4403 | goto error_del_extent; | 4481 | goto error_del_extent; |
4404 | 4482 | ||
4483 | for (i = 0; i < map->num_stripes; i++) { | ||
4484 | num_bytes = map->stripes[i].dev->bytes_used + stripe_size; | ||
4485 | btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes); | ||
4486 | } | ||
4487 | |||
4488 | spin_lock(&extent_root->fs_info->free_chunk_lock); | ||
4489 | extent_root->fs_info->free_chunk_space -= (stripe_size * | ||
4490 | map->num_stripes); | ||
4491 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | ||
4492 | |||
4405 | free_extent_map(em); | 4493 | free_extent_map(em); |
4406 | check_raid56_incompat_flag(extent_root->fs_info, type); | 4494 | check_raid56_incompat_flag(extent_root->fs_info, type); |
4407 | 4495 | ||
@@ -4473,7 +4561,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4473 | device = map->stripes[i].dev; | 4561 | device = map->stripes[i].dev; |
4474 | dev_offset = map->stripes[i].physical; | 4562 | dev_offset = map->stripes[i].physical; |
4475 | 4563 | ||
4476 | device->bytes_used += stripe_size; | ||
4477 | ret = btrfs_update_device(trans, device); | 4564 | ret = btrfs_update_device(trans, device); |
4478 | if (ret) | 4565 | if (ret) |
4479 | goto out; | 4566 | goto out; |
@@ -4486,11 +4573,6 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4486 | goto out; | 4573 | goto out; |
4487 | } | 4574 | } |
4488 | 4575 | ||
4489 | spin_lock(&extent_root->fs_info->free_chunk_lock); | ||
4490 | extent_root->fs_info->free_chunk_space -= (stripe_size * | ||
4491 | map->num_stripes); | ||
4492 | spin_unlock(&extent_root->fs_info->free_chunk_lock); | ||
4493 | |||
4494 | stripe = &chunk->stripe; | 4576 | stripe = &chunk->stripe; |
4495 | for (i = 0; i < map->num_stripes; i++) { | 4577 | for (i = 0; i < map->num_stripes; i++) { |
4496 | device = map->stripes[i].dev; | 4578 | device = map->stripes[i].dev; |
@@ -4570,16 +4652,25 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
4570 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); | 4652 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); |
4571 | ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, | 4653 | ret = __btrfs_alloc_chunk(trans, extent_root, sys_chunk_offset, |
4572 | alloc_profile); | 4654 | alloc_profile); |
4573 | if (ret) { | 4655 | return ret; |
4574 | btrfs_abort_transaction(trans, root, ret); | 4656 | } |
4575 | goto out; | 4657 | |
4658 | static inline int btrfs_chunk_max_errors(struct map_lookup *map) | ||
4659 | { | ||
4660 | int max_errors; | ||
4661 | |||
4662 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | ||
4663 | BTRFS_BLOCK_GROUP_RAID10 | | ||
4664 | BTRFS_BLOCK_GROUP_RAID5 | | ||
4665 | BTRFS_BLOCK_GROUP_DUP)) { | ||
4666 | max_errors = 1; | ||
4667 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { | ||
4668 | max_errors = 2; | ||
4669 | } else { | ||
4670 | max_errors = 0; | ||
4576 | } | 4671 | } |
4577 | 4672 | ||
4578 | ret = btrfs_add_device(trans, fs_info->chunk_root, device); | 4673 | return max_errors; |
4579 | if (ret) | ||
4580 | btrfs_abort_transaction(trans, root, ret); | ||
4581 | out: | ||
4582 | return ret; | ||
4583 | } | 4674 | } |
4584 | 4675 | ||
4585 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | 4676 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) |
@@ -4588,6 +4679,7 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
4588 | struct map_lookup *map; | 4679 | struct map_lookup *map; |
4589 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | 4680 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; |
4590 | int readonly = 0; | 4681 | int readonly = 0; |
4682 | int miss_ndevs = 0; | ||
4591 | int i; | 4683 | int i; |
4592 | 4684 | ||
4593 | read_lock(&map_tree->map_tree.lock); | 4685 | read_lock(&map_tree->map_tree.lock); |
@@ -4596,18 +4688,27 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
4596 | if (!em) | 4688 | if (!em) |
4597 | return 1; | 4689 | return 1; |
4598 | 4690 | ||
4599 | if (btrfs_test_opt(root, DEGRADED)) { | ||
4600 | free_extent_map(em); | ||
4601 | return 0; | ||
4602 | } | ||
4603 | |||
4604 | map = (struct map_lookup *)em->bdev; | 4691 | map = (struct map_lookup *)em->bdev; |
4605 | for (i = 0; i < map->num_stripes; i++) { | 4692 | for (i = 0; i < map->num_stripes; i++) { |
4693 | if (map->stripes[i].dev->missing) { | ||
4694 | miss_ndevs++; | ||
4695 | continue; | ||
4696 | } | ||
4697 | |||
4606 | if (!map->stripes[i].dev->writeable) { | 4698 | if (!map->stripes[i].dev->writeable) { |
4607 | readonly = 1; | 4699 | readonly = 1; |
4608 | break; | 4700 | goto end; |
4609 | } | 4701 | } |
4610 | } | 4702 | } |
4703 | |||
4704 | /* | ||
4705 | * If the number of missing devices is larger than max errors, | ||
4706 | * we can not write the data into that chunk successfully, so | ||
4707 | * set it readonly. | ||
4708 | */ | ||
4709 | if (miss_ndevs > btrfs_chunk_max_errors(map)) | ||
4710 | readonly = 1; | ||
4711 | end: | ||
4611 | free_extent_map(em); | 4712 | free_extent_map(em); |
4612 | return readonly; | 4713 | return readonly; |
4613 | } | 4714 | } |
@@ -5008,6 +5109,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5008 | num_stripes = min_t(u64, map->num_stripes, | 5109 | num_stripes = min_t(u64, map->num_stripes, |
5009 | stripe_nr_end - stripe_nr_orig); | 5110 | stripe_nr_end - stripe_nr_orig); |
5010 | stripe_index = do_div(stripe_nr, map->num_stripes); | 5111 | stripe_index = do_div(stripe_nr, map->num_stripes); |
5112 | if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) | ||
5113 | mirror_num = 1; | ||
5011 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 5114 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
5012 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) | 5115 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) |
5013 | num_stripes = map->num_stripes; | 5116 | num_stripes = map->num_stripes; |
@@ -5111,6 +5214,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5111 | /* We distribute the parity blocks across stripes */ | 5214 | /* We distribute the parity blocks across stripes */ |
5112 | tmp = stripe_nr + stripe_index; | 5215 | tmp = stripe_nr + stripe_index; |
5113 | stripe_index = do_div(tmp, map->num_stripes); | 5216 | stripe_index = do_div(tmp, map->num_stripes); |
5217 | if (!(rw & (REQ_WRITE | REQ_DISCARD | | ||
5218 | REQ_GET_READ_MIRRORS)) && mirror_num <= 1) | ||
5219 | mirror_num = 1; | ||
5114 | } | 5220 | } |
5115 | } else { | 5221 | } else { |
5116 | /* | 5222 | /* |
@@ -5218,16 +5324,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5218 | } | 5324 | } |
5219 | } | 5325 | } |
5220 | 5326 | ||
5221 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) { | 5327 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
5222 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 5328 | max_errors = btrfs_chunk_max_errors(map); |
5223 | BTRFS_BLOCK_GROUP_RAID10 | | ||
5224 | BTRFS_BLOCK_GROUP_RAID5 | | ||
5225 | BTRFS_BLOCK_GROUP_DUP)) { | ||
5226 | max_errors = 1; | ||
5227 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) { | ||
5228 | max_errors = 2; | ||
5229 | } | ||
5230 | } | ||
5231 | 5329 | ||
5232 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | 5330 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && |
5233 | dev_replace->tgtdev != NULL) { | 5331 | dev_replace->tgtdev != NULL) { |
@@ -5610,8 +5708,8 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
5610 | name = rcu_dereference(dev->name); | 5708 | name = rcu_dereference(dev->name); |
5611 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " | 5709 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " |
5612 | "(%s id %llu), size=%u\n", rw, | 5710 | "(%s id %llu), size=%u\n", rw, |
5613 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | 5711 | (u64)bio->bi_iter.bi_sector, (u_long)dev->bdev->bd_dev, |
5614 | name->str, dev->devid, bio->bi_size); | 5712 | name->str, dev->devid, bio->bi_iter.bi_size); |
5615 | rcu_read_unlock(); | 5713 | rcu_read_unlock(); |
5616 | } | 5714 | } |
5617 | #endif | 5715 | #endif |
@@ -5789,10 +5887,10 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, | |||
5789 | } | 5887 | } |
5790 | 5888 | ||
5791 | static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | 5889 | static struct btrfs_device *add_missing_dev(struct btrfs_root *root, |
5890 | struct btrfs_fs_devices *fs_devices, | ||
5792 | u64 devid, u8 *dev_uuid) | 5891 | u64 devid, u8 *dev_uuid) |
5793 | { | 5892 | { |
5794 | struct btrfs_device *device; | 5893 | struct btrfs_device *device; |
5795 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
5796 | 5894 | ||
5797 | device = btrfs_alloc_device(NULL, &devid, dev_uuid); | 5895 | device = btrfs_alloc_device(NULL, &devid, dev_uuid); |
5798 | if (IS_ERR(device)) | 5896 | if (IS_ERR(device)) |
@@ -5929,7 +6027,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
5929 | } | 6027 | } |
5930 | if (!map->stripes[i].dev) { | 6028 | if (!map->stripes[i].dev) { |
5931 | map->stripes[i].dev = | 6029 | map->stripes[i].dev = |
5932 | add_missing_dev(root, devid, uuid); | 6030 | add_missing_dev(root, root->fs_info->fs_devices, |
6031 | devid, uuid); | ||
5933 | if (!map->stripes[i].dev) { | 6032 | if (!map->stripes[i].dev) { |
5934 | free_extent_map(em); | 6033 | free_extent_map(em); |
5935 | return -EIO; | 6034 | return -EIO; |
@@ -5956,7 +6055,9 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
5956 | device->devid = btrfs_device_id(leaf, dev_item); | 6055 | device->devid = btrfs_device_id(leaf, dev_item); |
5957 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); | 6056 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); |
5958 | device->total_bytes = device->disk_total_bytes; | 6057 | device->total_bytes = device->disk_total_bytes; |
6058 | device->commit_total_bytes = device->disk_total_bytes; | ||
5959 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | 6059 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); |
6060 | device->commit_bytes_used = device->bytes_used; | ||
5960 | device->type = btrfs_device_type(leaf, dev_item); | 6061 | device->type = btrfs_device_type(leaf, dev_item); |
5961 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 6062 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
5962 | device->io_width = btrfs_device_io_width(leaf, dev_item); | 6063 | device->io_width = btrfs_device_io_width(leaf, dev_item); |
@@ -5968,7 +6069,8 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
5968 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | 6069 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); |
5969 | } | 6070 | } |
5970 | 6071 | ||
5971 | static int open_seed_devices(struct btrfs_root *root, u8 *fsid) | 6072 | static struct btrfs_fs_devices *open_seed_devices(struct btrfs_root *root, |
6073 | u8 *fsid) | ||
5972 | { | 6074 | { |
5973 | struct btrfs_fs_devices *fs_devices; | 6075 | struct btrfs_fs_devices *fs_devices; |
5974 | int ret; | 6076 | int ret; |
@@ -5977,49 +6079,56 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) | |||
5977 | 6079 | ||
5978 | fs_devices = root->fs_info->fs_devices->seed; | 6080 | fs_devices = root->fs_info->fs_devices->seed; |
5979 | while (fs_devices) { | 6081 | while (fs_devices) { |
5980 | if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) { | 6082 | if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) |
5981 | ret = 0; | 6083 | return fs_devices; |
5982 | goto out; | 6084 | |
5983 | } | ||
5984 | fs_devices = fs_devices->seed; | 6085 | fs_devices = fs_devices->seed; |
5985 | } | 6086 | } |
5986 | 6087 | ||
5987 | fs_devices = find_fsid(fsid); | 6088 | fs_devices = find_fsid(fsid); |
5988 | if (!fs_devices) { | 6089 | if (!fs_devices) { |
5989 | ret = -ENOENT; | 6090 | if (!btrfs_test_opt(root, DEGRADED)) |
5990 | goto out; | 6091 | return ERR_PTR(-ENOENT); |
6092 | |||
6093 | fs_devices = alloc_fs_devices(fsid); | ||
6094 | if (IS_ERR(fs_devices)) | ||
6095 | return fs_devices; | ||
6096 | |||
6097 | fs_devices->seeding = 1; | ||
6098 | fs_devices->opened = 1; | ||
6099 | return fs_devices; | ||
5991 | } | 6100 | } |
5992 | 6101 | ||
5993 | fs_devices = clone_fs_devices(fs_devices); | 6102 | fs_devices = clone_fs_devices(fs_devices); |
5994 | if (IS_ERR(fs_devices)) { | 6103 | if (IS_ERR(fs_devices)) |
5995 | ret = PTR_ERR(fs_devices); | 6104 | return fs_devices; |
5996 | goto out; | ||
5997 | } | ||
5998 | 6105 | ||
5999 | ret = __btrfs_open_devices(fs_devices, FMODE_READ, | 6106 | ret = __btrfs_open_devices(fs_devices, FMODE_READ, |
6000 | root->fs_info->bdev_holder); | 6107 | root->fs_info->bdev_holder); |
6001 | if (ret) { | 6108 | if (ret) { |
6002 | free_fs_devices(fs_devices); | 6109 | free_fs_devices(fs_devices); |
6110 | fs_devices = ERR_PTR(ret); | ||
6003 | goto out; | 6111 | goto out; |
6004 | } | 6112 | } |
6005 | 6113 | ||
6006 | if (!fs_devices->seeding) { | 6114 | if (!fs_devices->seeding) { |
6007 | __btrfs_close_devices(fs_devices); | 6115 | __btrfs_close_devices(fs_devices); |
6008 | free_fs_devices(fs_devices); | 6116 | free_fs_devices(fs_devices); |
6009 | ret = -EINVAL; | 6117 | fs_devices = ERR_PTR(-EINVAL); |
6010 | goto out; | 6118 | goto out; |
6011 | } | 6119 | } |
6012 | 6120 | ||
6013 | fs_devices->seed = root->fs_info->fs_devices->seed; | 6121 | fs_devices->seed = root->fs_info->fs_devices->seed; |
6014 | root->fs_info->fs_devices->seed = fs_devices; | 6122 | root->fs_info->fs_devices->seed = fs_devices; |
6015 | out: | 6123 | out: |
6016 | return ret; | 6124 | return fs_devices; |
6017 | } | 6125 | } |
6018 | 6126 | ||
6019 | static int read_one_dev(struct btrfs_root *root, | 6127 | static int read_one_dev(struct btrfs_root *root, |
6020 | struct extent_buffer *leaf, | 6128 | struct extent_buffer *leaf, |
6021 | struct btrfs_dev_item *dev_item) | 6129 | struct btrfs_dev_item *dev_item) |
6022 | { | 6130 | { |
6131 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; | ||
6023 | struct btrfs_device *device; | 6132 | struct btrfs_device *device; |
6024 | u64 devid; | 6133 | u64 devid; |
6025 | int ret; | 6134 | int ret; |
@@ -6033,31 +6142,48 @@ static int read_one_dev(struct btrfs_root *root, | |||
6033 | BTRFS_UUID_SIZE); | 6142 | BTRFS_UUID_SIZE); |
6034 | 6143 | ||
6035 | if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { | 6144 | if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { |
6036 | ret = open_seed_devices(root, fs_uuid); | 6145 | fs_devices = open_seed_devices(root, fs_uuid); |
6037 | if (ret && !btrfs_test_opt(root, DEGRADED)) | 6146 | if (IS_ERR(fs_devices)) |
6038 | return ret; | 6147 | return PTR_ERR(fs_devices); |
6039 | } | 6148 | } |
6040 | 6149 | ||
6041 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); | 6150 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); |
6042 | if (!device || !device->bdev) { | 6151 | if (!device) { |
6043 | if (!btrfs_test_opt(root, DEGRADED)) | 6152 | if (!btrfs_test_opt(root, DEGRADED)) |
6044 | return -EIO; | 6153 | return -EIO; |
6045 | 6154 | ||
6046 | if (!device) { | 6155 | btrfs_warn(root->fs_info, "devid %llu missing", devid); |
6047 | btrfs_warn(root->fs_info, "devid %llu missing", devid); | 6156 | device = add_missing_dev(root, fs_devices, devid, dev_uuid); |
6048 | device = add_missing_dev(root, devid, dev_uuid); | 6157 | if (!device) |
6049 | if (!device) | 6158 | return -ENOMEM; |
6050 | return -ENOMEM; | 6159 | } else { |
6051 | } else if (!device->missing) { | 6160 | if (!device->bdev && !btrfs_test_opt(root, DEGRADED)) |
6161 | return -EIO; | ||
6162 | |||
6163 | if(!device->bdev && !device->missing) { | ||
6052 | /* | 6164 | /* |
6053 | * this happens when a device that was properly setup | 6165 | * this happens when a device that was properly setup |
6054 | * in the device info lists suddenly goes bad. | 6166 | * in the device info lists suddenly goes bad. |
6055 | * device->bdev is NULL, and so we have to set | 6167 | * device->bdev is NULL, and so we have to set |
6056 | * device->missing to one here | 6168 | * device->missing to one here |
6057 | */ | 6169 | */ |
6058 | root->fs_info->fs_devices->missing_devices++; | 6170 | device->fs_devices->missing_devices++; |
6059 | device->missing = 1; | 6171 | device->missing = 1; |
6060 | } | 6172 | } |
6173 | |||
6174 | /* Move the device to its own fs_devices */ | ||
6175 | if (device->fs_devices != fs_devices) { | ||
6176 | ASSERT(device->missing); | ||
6177 | |||
6178 | list_move(&device->dev_list, &fs_devices->devices); | ||
6179 | device->fs_devices->num_devices--; | ||
6180 | fs_devices->num_devices++; | ||
6181 | |||
6182 | device->fs_devices->missing_devices--; | ||
6183 | fs_devices->missing_devices++; | ||
6184 | |||
6185 | device->fs_devices = fs_devices; | ||
6186 | } | ||
6061 | } | 6187 | } |
6062 | 6188 | ||
6063 | if (device->fs_devices != root->fs_info->fs_devices) { | 6189 | if (device->fs_devices != root->fs_info->fs_devices) { |
@@ -6373,16 +6499,18 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | |||
6373 | struct btrfs_root *dev_root = fs_info->dev_root; | 6499 | struct btrfs_root *dev_root = fs_info->dev_root; |
6374 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | 6500 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; |
6375 | struct btrfs_device *device; | 6501 | struct btrfs_device *device; |
6502 | int stats_cnt; | ||
6376 | int ret = 0; | 6503 | int ret = 0; |
6377 | 6504 | ||
6378 | mutex_lock(&fs_devices->device_list_mutex); | 6505 | mutex_lock(&fs_devices->device_list_mutex); |
6379 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 6506 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
6380 | if (!device->dev_stats_valid || !device->dev_stats_dirty) | 6507 | if (!device->dev_stats_valid || !btrfs_dev_stats_dirty(device)) |
6381 | continue; | 6508 | continue; |
6382 | 6509 | ||
6510 | stats_cnt = atomic_read(&device->dev_stats_ccnt); | ||
6383 | ret = update_dev_stat_item(trans, dev_root, device); | 6511 | ret = update_dev_stat_item(trans, dev_root, device); |
6384 | if (!ret) | 6512 | if (!ret) |
6385 | device->dev_stats_dirty = 0; | 6513 | atomic_sub(stats_cnt, &device->dev_stats_ccnt); |
6386 | } | 6514 | } |
6387 | mutex_unlock(&fs_devices->device_list_mutex); | 6515 | mutex_unlock(&fs_devices->device_list_mutex); |
6388 | 6516 | ||
@@ -6481,3 +6609,51 @@ int btrfs_scratch_superblock(struct btrfs_device *device) | |||
6481 | 6609 | ||
6482 | return 0; | 6610 | return 0; |
6483 | } | 6611 | } |
6612 | |||
6613 | /* | ||
6614 | * Update the size of all devices, which is used for writing out the | ||
6615 | * super blocks. | ||
6616 | */ | ||
6617 | void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info) | ||
6618 | { | ||
6619 | struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; | ||
6620 | struct btrfs_device *curr, *next; | ||
6621 | |||
6622 | if (list_empty(&fs_devices->resized_devices)) | ||
6623 | return; | ||
6624 | |||
6625 | mutex_lock(&fs_devices->device_list_mutex); | ||
6626 | lock_chunks(fs_info->dev_root); | ||
6627 | list_for_each_entry_safe(curr, next, &fs_devices->resized_devices, | ||
6628 | resized_list) { | ||
6629 | list_del_init(&curr->resized_list); | ||
6630 | curr->commit_total_bytes = curr->disk_total_bytes; | ||
6631 | } | ||
6632 | unlock_chunks(fs_info->dev_root); | ||
6633 | mutex_unlock(&fs_devices->device_list_mutex); | ||
6634 | } | ||
6635 | |||
6636 | /* Must be invoked during the transaction commit */ | ||
6637 | void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, | ||
6638 | struct btrfs_transaction *transaction) | ||
6639 | { | ||
6640 | struct extent_map *em; | ||
6641 | struct map_lookup *map; | ||
6642 | struct btrfs_device *dev; | ||
6643 | int i; | ||
6644 | |||
6645 | if (list_empty(&transaction->pending_chunks)) | ||
6646 | return; | ||
6647 | |||
6648 | /* In order to kick the device replace finish process */ | ||
6649 | lock_chunks(root); | ||
6650 | list_for_each_entry(em, &transaction->pending_chunks, list) { | ||
6651 | map = (struct map_lookup *)em->bdev; | ||
6652 | |||
6653 | for (i = 0; i < map->num_stripes; i++) { | ||
6654 | dev = map->stripes[i].dev; | ||
6655 | dev->commit_bytes_used = dev->bytes_used; | ||
6656 | } | ||
6657 | } | ||
6658 | unlock_chunks(root); | ||
6659 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2aaa00c47816..08980fa23039 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/btrfs.h> | 24 | #include <linux/btrfs.h> |
25 | #include "async-thread.h" | 25 | #include "async-thread.h" |
26 | 26 | ||
27 | extern struct mutex uuid_mutex; | ||
28 | |||
27 | #define BTRFS_STRIPE_LEN (64 * 1024) | 29 | #define BTRFS_STRIPE_LEN (64 * 1024) |
28 | 30 | ||
29 | struct buffer_head; | 31 | struct buffer_head; |
@@ -32,41 +34,59 @@ struct btrfs_pending_bios { | |||
32 | struct bio *tail; | 34 | struct bio *tail; |
33 | }; | 35 | }; |
34 | 36 | ||
37 | /* | ||
38 | * Use sequence counter to get consistent device stat data on | ||
39 | * 32-bit processors. | ||
40 | */ | ||
41 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | ||
42 | #include <linux/seqlock.h> | ||
43 | #define __BTRFS_NEED_DEVICE_DATA_ORDERED | ||
44 | #define btrfs_device_data_ordered_init(device) \ | ||
45 | seqcount_init(&device->data_seqcount) | ||
46 | #else | ||
47 | #define btrfs_device_data_ordered_init(device) do { } while (0) | ||
48 | #endif | ||
49 | |||
35 | struct btrfs_device { | 50 | struct btrfs_device { |
36 | struct list_head dev_list; | 51 | struct list_head dev_list; |
37 | struct list_head dev_alloc_list; | 52 | struct list_head dev_alloc_list; |
38 | struct btrfs_fs_devices *fs_devices; | 53 | struct btrfs_fs_devices *fs_devices; |
54 | |||
39 | struct btrfs_root *dev_root; | 55 | struct btrfs_root *dev_root; |
40 | 56 | ||
57 | struct rcu_string *name; | ||
58 | |||
59 | u64 generation; | ||
60 | |||
61 | spinlock_t io_lock ____cacheline_aligned; | ||
62 | int running_pending; | ||
41 | /* regular prio bios */ | 63 | /* regular prio bios */ |
42 | struct btrfs_pending_bios pending_bios; | 64 | struct btrfs_pending_bios pending_bios; |
43 | /* WRITE_SYNC bios */ | 65 | /* WRITE_SYNC bios */ |
44 | struct btrfs_pending_bios pending_sync_bios; | 66 | struct btrfs_pending_bios pending_sync_bios; |
45 | 67 | ||
46 | u64 generation; | 68 | struct block_device *bdev; |
47 | int running_pending; | 69 | |
70 | /* the mode sent to blkdev_get */ | ||
71 | fmode_t mode; | ||
72 | |||
48 | int writeable; | 73 | int writeable; |
49 | int in_fs_metadata; | 74 | int in_fs_metadata; |
50 | int missing; | 75 | int missing; |
51 | int can_discard; | 76 | int can_discard; |
52 | int is_tgtdev_for_dev_replace; | 77 | int is_tgtdev_for_dev_replace; |
53 | 78 | ||
54 | spinlock_t io_lock; | 79 | #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED |
55 | /* the mode sent to blkdev_get */ | 80 | seqcount_t data_seqcount; |
56 | fmode_t mode; | 81 | #endif |
57 | |||
58 | struct block_device *bdev; | ||
59 | |||
60 | |||
61 | struct rcu_string *name; | ||
62 | 82 | ||
63 | /* the internal btrfs device id */ | 83 | /* the internal btrfs device id */ |
64 | u64 devid; | 84 | u64 devid; |
65 | 85 | ||
66 | /* size of the device */ | 86 | /* size of the device in memory */ |
67 | u64 total_bytes; | 87 | u64 total_bytes; |
68 | 88 | ||
69 | /* size of the disk */ | 89 | /* size of the device on disk */ |
70 | u64 disk_total_bytes; | 90 | u64 disk_total_bytes; |
71 | 91 | ||
72 | /* bytes used */ | 92 | /* bytes used */ |
@@ -83,10 +103,26 @@ struct btrfs_device { | |||
83 | /* minimal io size for this device */ | 103 | /* minimal io size for this device */ |
84 | u32 sector_size; | 104 | u32 sector_size; |
85 | 105 | ||
86 | |||
87 | /* physical drive uuid (or lvm uuid) */ | 106 | /* physical drive uuid (or lvm uuid) */ |
88 | u8 uuid[BTRFS_UUID_SIZE]; | 107 | u8 uuid[BTRFS_UUID_SIZE]; |
89 | 108 | ||
109 | /* | ||
110 | * size of the device on the current transaction | ||
111 | * | ||
112 | * This variant is update when committing the transaction, | ||
113 | * and protected by device_list_mutex | ||
114 | */ | ||
115 | u64 commit_total_bytes; | ||
116 | |||
117 | /* bytes used on the current transaction */ | ||
118 | u64 commit_bytes_used; | ||
119 | /* | ||
120 | * used to manage the device which is resized | ||
121 | * | ||
122 | * It is protected by chunk_lock. | ||
123 | */ | ||
124 | struct list_head resized_list; | ||
125 | |||
90 | /* for sending down flush barriers */ | 126 | /* for sending down flush barriers */ |
91 | int nobarriers; | 127 | int nobarriers; |
92 | struct bio *flush_bio; | 128 | struct bio *flush_bio; |
@@ -107,26 +143,90 @@ struct btrfs_device { | |||
107 | struct radix_tree_root reada_zones; | 143 | struct radix_tree_root reada_zones; |
108 | struct radix_tree_root reada_extents; | 144 | struct radix_tree_root reada_extents; |
109 | 145 | ||
110 | |||
111 | /* disk I/O failure stats. For detailed description refer to | 146 | /* disk I/O failure stats. For detailed description refer to |
112 | * enum btrfs_dev_stat_values in ioctl.h */ | 147 | * enum btrfs_dev_stat_values in ioctl.h */ |
113 | int dev_stats_valid; | 148 | int dev_stats_valid; |
114 | int dev_stats_dirty; /* counters need to be written to disk */ | 149 | |
150 | /* Counter to record the change of device stats */ | ||
151 | atomic_t dev_stats_ccnt; | ||
115 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; | 152 | atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX]; |
116 | }; | 153 | }; |
117 | 154 | ||
155 | /* | ||
156 | * If we read those variants at the context of their own lock, we needn't | ||
157 | * use the following helpers, reading them directly is safe. | ||
158 | */ | ||
159 | #if BITS_PER_LONG==32 && defined(CONFIG_SMP) | ||
160 | #define BTRFS_DEVICE_GETSET_FUNCS(name) \ | ||
161 | static inline u64 \ | ||
162 | btrfs_device_get_##name(const struct btrfs_device *dev) \ | ||
163 | { \ | ||
164 | u64 size; \ | ||
165 | unsigned int seq; \ | ||
166 | \ | ||
167 | do { \ | ||
168 | seq = read_seqcount_begin(&dev->data_seqcount); \ | ||
169 | size = dev->name; \ | ||
170 | } while (read_seqcount_retry(&dev->data_seqcount, seq)); \ | ||
171 | return size; \ | ||
172 | } \ | ||
173 | \ | ||
174 | static inline void \ | ||
175 | btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ | ||
176 | { \ | ||
177 | preempt_disable(); \ | ||
178 | write_seqcount_begin(&dev->data_seqcount); \ | ||
179 | dev->name = size; \ | ||
180 | write_seqcount_end(&dev->data_seqcount); \ | ||
181 | preempt_enable(); \ | ||
182 | } | ||
183 | #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) | ||
184 | #define BTRFS_DEVICE_GETSET_FUNCS(name) \ | ||
185 | static inline u64 \ | ||
186 | btrfs_device_get_##name(const struct btrfs_device *dev) \ | ||
187 | { \ | ||
188 | u64 size; \ | ||
189 | \ | ||
190 | preempt_disable(); \ | ||
191 | size = dev->name; \ | ||
192 | preempt_enable(); \ | ||
193 | return size; \ | ||
194 | } \ | ||
195 | \ | ||
196 | static inline void \ | ||
197 | btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ | ||
198 | { \ | ||
199 | preempt_disable(); \ | ||
200 | dev->name = size; \ | ||
201 | preempt_enable(); \ | ||
202 | } | ||
203 | #else | ||
204 | #define BTRFS_DEVICE_GETSET_FUNCS(name) \ | ||
205 | static inline u64 \ | ||
206 | btrfs_device_get_##name(const struct btrfs_device *dev) \ | ||
207 | { \ | ||
208 | return dev->name; \ | ||
209 | } \ | ||
210 | \ | ||
211 | static inline void \ | ||
212 | btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ | ||
213 | { \ | ||
214 | dev->name = size; \ | ||
215 | } | ||
216 | #endif | ||
217 | |||
218 | BTRFS_DEVICE_GETSET_FUNCS(total_bytes); | ||
219 | BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes); | ||
220 | BTRFS_DEVICE_GETSET_FUNCS(bytes_used); | ||
221 | |||
118 | struct btrfs_fs_devices { | 222 | struct btrfs_fs_devices { |
119 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ | 223 | u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ |
120 | 224 | ||
121 | /* the device with this id has the most recent copy of the super */ | ||
122 | u64 latest_devid; | ||
123 | u64 latest_trans; | ||
124 | u64 num_devices; | 225 | u64 num_devices; |
125 | u64 open_devices; | 226 | u64 open_devices; |
126 | u64 rw_devices; | 227 | u64 rw_devices; |
127 | u64 missing_devices; | 228 | u64 missing_devices; |
128 | u64 total_rw_bytes; | 229 | u64 total_rw_bytes; |
129 | u64 num_can_discard; | ||
130 | u64 total_devices; | 230 | u64 total_devices; |
131 | struct block_device *latest_bdev; | 231 | struct block_device *latest_bdev; |
132 | 232 | ||
@@ -139,6 +239,7 @@ struct btrfs_fs_devices { | |||
139 | struct mutex device_list_mutex; | 239 | struct mutex device_list_mutex; |
140 | struct list_head devices; | 240 | struct list_head devices; |
141 | 241 | ||
242 | struct list_head resized_devices; | ||
142 | /* devices not currently being allocated */ | 243 | /* devices not currently being allocated */ |
143 | struct list_head alloc_list; | 244 | struct list_head alloc_list; |
144 | struct list_head list; | 245 | struct list_head list; |
@@ -167,8 +268,9 @@ struct btrfs_fs_devices { | |||
167 | */ | 268 | */ |
168 | typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); | 269 | typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err); |
169 | struct btrfs_io_bio { | 270 | struct btrfs_io_bio { |
170 | unsigned long mirror_num; | 271 | unsigned int mirror_num; |
171 | unsigned long stripe_index; | 272 | unsigned int stripe_index; |
273 | u64 logical; | ||
172 | u8 *csum; | 274 | u8 *csum; |
173 | u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; | 275 | u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE]; |
174 | u8 *csum_allocated; | 276 | u8 *csum_allocated; |
@@ -325,6 +427,7 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, | |||
325 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | 427 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); |
326 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | 428 | int btrfs_init_new_device(struct btrfs_root *root, char *path); |
327 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | 429 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, |
430 | struct btrfs_device *srcdev, | ||
328 | struct btrfs_device **device_out); | 431 | struct btrfs_device **device_out); |
329 | int btrfs_balance(struct btrfs_balance_control *bctl, | 432 | int btrfs_balance(struct btrfs_balance_control *bctl, |
330 | struct btrfs_ioctl_balance_args *bargs); | 433 | struct btrfs_ioctl_balance_args *bargs); |
@@ -360,11 +463,20 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, | |||
360 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | 463 | int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, |
361 | struct btrfs_root *extent_root, | 464 | struct btrfs_root *extent_root, |
362 | u64 chunk_offset, u64 chunk_size); | 465 | u64 chunk_offset, u64 chunk_size); |
466 | int btrfs_remove_chunk(struct btrfs_trans_handle *trans, | ||
467 | struct btrfs_root *root, u64 chunk_offset); | ||
468 | |||
469 | static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev) | ||
470 | { | ||
471 | return atomic_read(&dev->dev_stats_ccnt); | ||
472 | } | ||
473 | |||
363 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | 474 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, |
364 | int index) | 475 | int index) |
365 | { | 476 | { |
366 | atomic_inc(dev->dev_stat_values + index); | 477 | atomic_inc(dev->dev_stat_values + index); |
367 | dev->dev_stats_dirty = 1; | 478 | smp_mb__before_atomic(); |
479 | atomic_inc(&dev->dev_stats_ccnt); | ||
368 | } | 480 | } |
369 | 481 | ||
370 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, | 482 | static inline int btrfs_dev_stat_read(struct btrfs_device *dev, |
@@ -379,7 +491,8 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev, | |||
379 | int ret; | 491 | int ret; |
380 | 492 | ||
381 | ret = atomic_xchg(dev->dev_stat_values + index, 0); | 493 | ret = atomic_xchg(dev->dev_stat_values + index, 0); |
382 | dev->dev_stats_dirty = 1; | 494 | smp_mb__before_atomic(); |
495 | atomic_inc(&dev->dev_stats_ccnt); | ||
383 | return ret; | 496 | return ret; |
384 | } | 497 | } |
385 | 498 | ||
@@ -387,7 +500,8 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev, | |||
387 | int index, unsigned long val) | 500 | int index, unsigned long val) |
388 | { | 501 | { |
389 | atomic_set(dev->dev_stat_values + index, val); | 502 | atomic_set(dev->dev_stat_values + index, val); |
390 | dev->dev_stats_dirty = 1; | 503 | smp_mb__before_atomic(); |
504 | atomic_inc(&dev->dev_stats_ccnt); | ||
391 | } | 505 | } |
392 | 506 | ||
393 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | 507 | static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, |
@@ -395,4 +509,8 @@ static inline void btrfs_dev_stat_reset(struct btrfs_device *dev, | |||
395 | { | 509 | { |
396 | btrfs_dev_stat_set(dev, index, 0); | 510 | btrfs_dev_stat_set(dev, index, 0); |
397 | } | 511 | } |
512 | |||
513 | void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info); | ||
514 | void btrfs_update_commit_device_bytes_used(struct btrfs_root *root, | ||
515 | struct btrfs_transaction *transaction); | ||
398 | #endif | 516 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index ad8328d797ea..dcf20131fbe4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -237,7 +237,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
237 | * first xattr that we find and walk forward | 237 | * first xattr that we find and walk forward |
238 | */ | 238 | */ |
239 | key.objectid = btrfs_ino(inode); | 239 | key.objectid = btrfs_ino(inode); |
240 | btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); | 240 | key.type = BTRFS_XATTR_ITEM_KEY; |
241 | key.offset = 0; | 241 | key.offset = 0; |
242 | 242 | ||
243 | path = btrfs_alloc_path(); | 243 | path = btrfs_alloc_path(); |
@@ -273,7 +273,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
273 | /* check to make sure this item is what we want */ | 273 | /* check to make sure this item is what we want */ |
274 | if (found_key.objectid != key.objectid) | 274 | if (found_key.objectid != key.objectid) |
275 | break; | 275 | break; |
276 | if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) | 276 | if (found_key.type != BTRFS_XATTR_ITEM_KEY) |
277 | break; | 277 | break; |
278 | 278 | ||
279 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 279 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index b67d8fc81277..759fa4e2de8f 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -33,8 +33,7 @@ | |||
33 | #include "compression.h" | 33 | #include "compression.h" |
34 | 34 | ||
35 | struct workspace { | 35 | struct workspace { |
36 | z_stream inf_strm; | 36 | z_stream strm; |
37 | z_stream def_strm; | ||
38 | char *buf; | 37 | char *buf; |
39 | struct list_head list; | 38 | struct list_head list; |
40 | }; | 39 | }; |
@@ -43,8 +42,7 @@ static void zlib_free_workspace(struct list_head *ws) | |||
43 | { | 42 | { |
44 | struct workspace *workspace = list_entry(ws, struct workspace, list); | 43 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
45 | 44 | ||
46 | vfree(workspace->def_strm.workspace); | 45 | vfree(workspace->strm.workspace); |
47 | vfree(workspace->inf_strm.workspace); | ||
48 | kfree(workspace->buf); | 46 | kfree(workspace->buf); |
49 | kfree(workspace); | 47 | kfree(workspace); |
50 | } | 48 | } |
@@ -52,17 +50,17 @@ static void zlib_free_workspace(struct list_head *ws) | |||
52 | static struct list_head *zlib_alloc_workspace(void) | 50 | static struct list_head *zlib_alloc_workspace(void) |
53 | { | 51 | { |
54 | struct workspace *workspace; | 52 | struct workspace *workspace; |
53 | int workspacesize; | ||
55 | 54 | ||
56 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); | 55 | workspace = kzalloc(sizeof(*workspace), GFP_NOFS); |
57 | if (!workspace) | 56 | if (!workspace) |
58 | return ERR_PTR(-ENOMEM); | 57 | return ERR_PTR(-ENOMEM); |
59 | 58 | ||
60 | workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize( | 59 | workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), |
61 | MAX_WBITS, MAX_MEM_LEVEL)); | 60 | zlib_inflate_workspacesize()); |
62 | workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); | 61 | workspace->strm.workspace = vmalloc(workspacesize); |
63 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 62 | workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
64 | if (!workspace->def_strm.workspace || | 63 | if (!workspace->strm.workspace || !workspace->buf) |
65 | !workspace->inf_strm.workspace || !workspace->buf) | ||
66 | goto fail; | 64 | goto fail; |
67 | 65 | ||
68 | INIT_LIST_HEAD(&workspace->list); | 66 | INIT_LIST_HEAD(&workspace->list); |
@@ -96,14 +94,14 @@ static int zlib_compress_pages(struct list_head *ws, | |||
96 | *total_out = 0; | 94 | *total_out = 0; |
97 | *total_in = 0; | 95 | *total_in = 0; |
98 | 96 | ||
99 | if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { | 97 | if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) { |
100 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); | 98 | printk(KERN_WARNING "BTRFS: deflateInit failed\n"); |
101 | ret = -EIO; | 99 | ret = -EIO; |
102 | goto out; | 100 | goto out; |
103 | } | 101 | } |
104 | 102 | ||
105 | workspace->def_strm.total_in = 0; | 103 | workspace->strm.total_in = 0; |
106 | workspace->def_strm.total_out = 0; | 104 | workspace->strm.total_out = 0; |
107 | 105 | ||
108 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); | 106 | in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); |
109 | data_in = kmap(in_page); | 107 | data_in = kmap(in_page); |
@@ -117,25 +115,25 @@ static int zlib_compress_pages(struct list_head *ws, | |||
117 | pages[0] = out_page; | 115 | pages[0] = out_page; |
118 | nr_pages = 1; | 116 | nr_pages = 1; |
119 | 117 | ||
120 | workspace->def_strm.next_in = data_in; | 118 | workspace->strm.next_in = data_in; |
121 | workspace->def_strm.next_out = cpage_out; | 119 | workspace->strm.next_out = cpage_out; |
122 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | 120 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
123 | workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); | 121 | workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE); |
124 | 122 | ||
125 | while (workspace->def_strm.total_in < len) { | 123 | while (workspace->strm.total_in < len) { |
126 | ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); | 124 | ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); |
127 | if (ret != Z_OK) { | 125 | if (ret != Z_OK) { |
128 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", | 126 | printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n", |
129 | ret); | 127 | ret); |
130 | zlib_deflateEnd(&workspace->def_strm); | 128 | zlib_deflateEnd(&workspace->strm); |
131 | ret = -EIO; | 129 | ret = -EIO; |
132 | goto out; | 130 | goto out; |
133 | } | 131 | } |
134 | 132 | ||
135 | /* we're making it bigger, give up */ | 133 | /* we're making it bigger, give up */ |
136 | if (workspace->def_strm.total_in > 8192 && | 134 | if (workspace->strm.total_in > 8192 && |
137 | workspace->def_strm.total_in < | 135 | workspace->strm.total_in < |
138 | workspace->def_strm.total_out) { | 136 | workspace->strm.total_out) { |
139 | ret = -E2BIG; | 137 | ret = -E2BIG; |
140 | goto out; | 138 | goto out; |
141 | } | 139 | } |
@@ -143,7 +141,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
143 | * before the total_in so we will pull in a new page for | 141 | * before the total_in so we will pull in a new page for |
144 | * the stream end if required | 142 | * the stream end if required |
145 | */ | 143 | */ |
146 | if (workspace->def_strm.avail_out == 0) { | 144 | if (workspace->strm.avail_out == 0) { |
147 | kunmap(out_page); | 145 | kunmap(out_page); |
148 | if (nr_pages == nr_dest_pages) { | 146 | if (nr_pages == nr_dest_pages) { |
149 | out_page = NULL; | 147 | out_page = NULL; |
@@ -158,19 +156,19 @@ static int zlib_compress_pages(struct list_head *ws, | |||
158 | cpage_out = kmap(out_page); | 156 | cpage_out = kmap(out_page); |
159 | pages[nr_pages] = out_page; | 157 | pages[nr_pages] = out_page; |
160 | nr_pages++; | 158 | nr_pages++; |
161 | workspace->def_strm.avail_out = PAGE_CACHE_SIZE; | 159 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
162 | workspace->def_strm.next_out = cpage_out; | 160 | workspace->strm.next_out = cpage_out; |
163 | } | 161 | } |
164 | /* we're all done */ | 162 | /* we're all done */ |
165 | if (workspace->def_strm.total_in >= len) | 163 | if (workspace->strm.total_in >= len) |
166 | break; | 164 | break; |
167 | 165 | ||
168 | /* we've read in a full page, get a new one */ | 166 | /* we've read in a full page, get a new one */ |
169 | if (workspace->def_strm.avail_in == 0) { | 167 | if (workspace->strm.avail_in == 0) { |
170 | if (workspace->def_strm.total_out > max_out) | 168 | if (workspace->strm.total_out > max_out) |
171 | break; | 169 | break; |
172 | 170 | ||
173 | bytes_left = len - workspace->def_strm.total_in; | 171 | bytes_left = len - workspace->strm.total_in; |
174 | kunmap(in_page); | 172 | kunmap(in_page); |
175 | page_cache_release(in_page); | 173 | page_cache_release(in_page); |
176 | 174 | ||
@@ -178,28 +176,28 @@ static int zlib_compress_pages(struct list_head *ws, | |||
178 | in_page = find_get_page(mapping, | 176 | in_page = find_get_page(mapping, |
179 | start >> PAGE_CACHE_SHIFT); | 177 | start >> PAGE_CACHE_SHIFT); |
180 | data_in = kmap(in_page); | 178 | data_in = kmap(in_page); |
181 | workspace->def_strm.avail_in = min(bytes_left, | 179 | workspace->strm.avail_in = min(bytes_left, |
182 | PAGE_CACHE_SIZE); | 180 | PAGE_CACHE_SIZE); |
183 | workspace->def_strm.next_in = data_in; | 181 | workspace->strm.next_in = data_in; |
184 | } | 182 | } |
185 | } | 183 | } |
186 | workspace->def_strm.avail_in = 0; | 184 | workspace->strm.avail_in = 0; |
187 | ret = zlib_deflate(&workspace->def_strm, Z_FINISH); | 185 | ret = zlib_deflate(&workspace->strm, Z_FINISH); |
188 | zlib_deflateEnd(&workspace->def_strm); | 186 | zlib_deflateEnd(&workspace->strm); |
189 | 187 | ||
190 | if (ret != Z_STREAM_END) { | 188 | if (ret != Z_STREAM_END) { |
191 | ret = -EIO; | 189 | ret = -EIO; |
192 | goto out; | 190 | goto out; |
193 | } | 191 | } |
194 | 192 | ||
195 | if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { | 193 | if (workspace->strm.total_out >= workspace->strm.total_in) { |
196 | ret = -E2BIG; | 194 | ret = -E2BIG; |
197 | goto out; | 195 | goto out; |
198 | } | 196 | } |
199 | 197 | ||
200 | ret = 0; | 198 | ret = 0; |
201 | *total_out = workspace->def_strm.total_out; | 199 | *total_out = workspace->strm.total_out; |
202 | *total_in = workspace->def_strm.total_in; | 200 | *total_in = workspace->strm.total_in; |
203 | out: | 201 | out: |
204 | *out_pages = nr_pages; | 202 | *out_pages = nr_pages; |
205 | if (out_page) | 203 | if (out_page) |
@@ -225,19 +223,18 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
225 | size_t total_out = 0; | 223 | size_t total_out = 0; |
226 | unsigned long page_in_index = 0; | 224 | unsigned long page_in_index = 0; |
227 | unsigned long page_out_index = 0; | 225 | unsigned long page_out_index = 0; |
228 | unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / | 226 | unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE); |
229 | PAGE_CACHE_SIZE; | ||
230 | unsigned long buf_start; | 227 | unsigned long buf_start; |
231 | unsigned long pg_offset; | 228 | unsigned long pg_offset; |
232 | 229 | ||
233 | data_in = kmap(pages_in[page_in_index]); | 230 | data_in = kmap(pages_in[page_in_index]); |
234 | workspace->inf_strm.next_in = data_in; | 231 | workspace->strm.next_in = data_in; |
235 | workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); | 232 | workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); |
236 | workspace->inf_strm.total_in = 0; | 233 | workspace->strm.total_in = 0; |
237 | 234 | ||
238 | workspace->inf_strm.total_out = 0; | 235 | workspace->strm.total_out = 0; |
239 | workspace->inf_strm.next_out = workspace->buf; | 236 | workspace->strm.next_out = workspace->buf; |
240 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 237 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
241 | pg_offset = 0; | 238 | pg_offset = 0; |
242 | 239 | ||
243 | /* If it's deflate, and it's got no preset dictionary, then | 240 | /* If it's deflate, and it's got no preset dictionary, then |
@@ -247,21 +244,21 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
247 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | 244 | !(((data_in[0]<<8) + data_in[1]) % 31)) { |
248 | 245 | ||
249 | wbits = -((data_in[0] >> 4) + 8); | 246 | wbits = -((data_in[0] >> 4) + 8); |
250 | workspace->inf_strm.next_in += 2; | 247 | workspace->strm.next_in += 2; |
251 | workspace->inf_strm.avail_in -= 2; | 248 | workspace->strm.avail_in -= 2; |
252 | } | 249 | } |
253 | 250 | ||
254 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 251 | if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { |
255 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 252 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
256 | return -EIO; | 253 | return -EIO; |
257 | } | 254 | } |
258 | while (workspace->inf_strm.total_in < srclen) { | 255 | while (workspace->strm.total_in < srclen) { |
259 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 256 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); |
260 | if (ret != Z_OK && ret != Z_STREAM_END) | 257 | if (ret != Z_OK && ret != Z_STREAM_END) |
261 | break; | 258 | break; |
262 | 259 | ||
263 | buf_start = total_out; | 260 | buf_start = total_out; |
264 | total_out = workspace->inf_strm.total_out; | 261 | total_out = workspace->strm.total_out; |
265 | 262 | ||
266 | /* we didn't make progress in this inflate call, we're done */ | 263 | /* we didn't make progress in this inflate call, we're done */ |
267 | if (buf_start == total_out) | 264 | if (buf_start == total_out) |
@@ -276,10 +273,10 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
276 | goto done; | 273 | goto done; |
277 | } | 274 | } |
278 | 275 | ||
279 | workspace->inf_strm.next_out = workspace->buf; | 276 | workspace->strm.next_out = workspace->buf; |
280 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 277 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
281 | 278 | ||
282 | if (workspace->inf_strm.avail_in == 0) { | 279 | if (workspace->strm.avail_in == 0) { |
283 | unsigned long tmp; | 280 | unsigned long tmp; |
284 | kunmap(pages_in[page_in_index]); | 281 | kunmap(pages_in[page_in_index]); |
285 | page_in_index++; | 282 | page_in_index++; |
@@ -288,9 +285,9 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
288 | break; | 285 | break; |
289 | } | 286 | } |
290 | data_in = kmap(pages_in[page_in_index]); | 287 | data_in = kmap(pages_in[page_in_index]); |
291 | workspace->inf_strm.next_in = data_in; | 288 | workspace->strm.next_in = data_in; |
292 | tmp = srclen - workspace->inf_strm.total_in; | 289 | tmp = srclen - workspace->strm.total_in; |
293 | workspace->inf_strm.avail_in = min(tmp, | 290 | workspace->strm.avail_in = min(tmp, |
294 | PAGE_CACHE_SIZE); | 291 | PAGE_CACHE_SIZE); |
295 | } | 292 | } |
296 | } | 293 | } |
@@ -299,7 +296,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in, | |||
299 | else | 296 | else |
300 | ret = 0; | 297 | ret = 0; |
301 | done: | 298 | done: |
302 | zlib_inflateEnd(&workspace->inf_strm); | 299 | zlib_inflateEnd(&workspace->strm); |
303 | if (data_in) | 300 | if (data_in) |
304 | kunmap(pages_in[page_in_index]); | 301 | kunmap(pages_in[page_in_index]); |
305 | return ret; | 302 | return ret; |
@@ -317,13 +314,13 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
317 | unsigned long total_out = 0; | 314 | unsigned long total_out = 0; |
318 | char *kaddr; | 315 | char *kaddr; |
319 | 316 | ||
320 | workspace->inf_strm.next_in = data_in; | 317 | workspace->strm.next_in = data_in; |
321 | workspace->inf_strm.avail_in = srclen; | 318 | workspace->strm.avail_in = srclen; |
322 | workspace->inf_strm.total_in = 0; | 319 | workspace->strm.total_in = 0; |
323 | 320 | ||
324 | workspace->inf_strm.next_out = workspace->buf; | 321 | workspace->strm.next_out = workspace->buf; |
325 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 322 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
326 | workspace->inf_strm.total_out = 0; | 323 | workspace->strm.total_out = 0; |
327 | /* If it's deflate, and it's got no preset dictionary, then | 324 | /* If it's deflate, and it's got no preset dictionary, then |
328 | we can tell zlib to skip the adler32 check. */ | 325 | we can tell zlib to skip the adler32 check. */ |
329 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && | 326 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && |
@@ -331,11 +328,11 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
331 | !(((data_in[0]<<8) + data_in[1]) % 31)) { | 328 | !(((data_in[0]<<8) + data_in[1]) % 31)) { |
332 | 329 | ||
333 | wbits = -((data_in[0] >> 4) + 8); | 330 | wbits = -((data_in[0] >> 4) + 8); |
334 | workspace->inf_strm.next_in += 2; | 331 | workspace->strm.next_in += 2; |
335 | workspace->inf_strm.avail_in -= 2; | 332 | workspace->strm.avail_in -= 2; |
336 | } | 333 | } |
337 | 334 | ||
338 | if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { | 335 | if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) { |
339 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); | 336 | printk(KERN_WARNING "BTRFS: inflateInit failed\n"); |
340 | return -EIO; | 337 | return -EIO; |
341 | } | 338 | } |
@@ -346,12 +343,12 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
346 | unsigned long bytes; | 343 | unsigned long bytes; |
347 | unsigned long pg_offset = 0; | 344 | unsigned long pg_offset = 0; |
348 | 345 | ||
349 | ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); | 346 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); |
350 | if (ret != Z_OK && ret != Z_STREAM_END) | 347 | if (ret != Z_OK && ret != Z_STREAM_END) |
351 | break; | 348 | break; |
352 | 349 | ||
353 | buf_start = total_out; | 350 | buf_start = total_out; |
354 | total_out = workspace->inf_strm.total_out; | 351 | total_out = workspace->strm.total_out; |
355 | 352 | ||
356 | if (total_out == buf_start) { | 353 | if (total_out == buf_start) { |
357 | ret = -EIO; | 354 | ret = -EIO; |
@@ -377,8 +374,8 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, | |||
377 | pg_offset += bytes; | 374 | pg_offset += bytes; |
378 | bytes_left -= bytes; | 375 | bytes_left -= bytes; |
379 | next: | 376 | next: |
380 | workspace->inf_strm.next_out = workspace->buf; | 377 | workspace->strm.next_out = workspace->buf; |
381 | workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; | 378 | workspace->strm.avail_out = PAGE_CACHE_SIZE; |
382 | } | 379 | } |
383 | 380 | ||
384 | if (ret != Z_STREAM_END && bytes_left != 0) | 381 | if (ret != Z_STREAM_END && bytes_left != 0) |
@@ -386,7 +383,7 @@ next: | |||
386 | else | 383 | else |
387 | ret = 0; | 384 | ret = 0; |
388 | 385 | ||
389 | zlib_inflateEnd(&workspace->inf_strm); | 386 | zlib_inflateEnd(&workspace->strm); |
390 | return ret; | 387 | return ret; |
391 | } | 388 | } |
392 | 389 | ||
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 4ee4e30d26d9..1faecea101f3 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h | |||
@@ -23,6 +23,7 @@ struct map_lookup; | |||
23 | struct extent_buffer; | 23 | struct extent_buffer; |
24 | struct btrfs_work; | 24 | struct btrfs_work; |
25 | struct __btrfs_workqueue; | 25 | struct __btrfs_workqueue; |
26 | struct btrfs_qgroup_operation; | ||
26 | 27 | ||
27 | #define show_ref_type(type) \ | 28 | #define show_ref_type(type) \ |
28 | __print_symbolic(type, \ | 29 | __print_symbolic(type, \ |
@@ -157,12 +158,13 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, | |||
157 | 158 | ||
158 | #define show_map_flags(flag) \ | 159 | #define show_map_flags(flag) \ |
159 | __print_flags(flag, "|", \ | 160 | __print_flags(flag, "|", \ |
160 | { EXTENT_FLAG_PINNED, "PINNED" }, \ | 161 | { (1 << EXTENT_FLAG_PINNED), "PINNED" },\ |
161 | { EXTENT_FLAG_COMPRESSED, "COMPRESSED" }, \ | 162 | { (1 << EXTENT_FLAG_COMPRESSED), "COMPRESSED" },\ |
162 | { EXTENT_FLAG_VACANCY, "VACANCY" }, \ | 163 | { (1 << EXTENT_FLAG_VACANCY), "VACANCY" },\ |
163 | { EXTENT_FLAG_PREALLOC, "PREALLOC" }, \ | 164 | { (1 << EXTENT_FLAG_PREALLOC), "PREALLOC" },\ |
164 | { EXTENT_FLAG_LOGGING, "LOGGING" }, \ | 165 | { (1 << EXTENT_FLAG_LOGGING), "LOGGING" },\ |
165 | { EXTENT_FLAG_FILLING, "FILLING" }) | 166 | { (1 << EXTENT_FLAG_FILLING), "FILLING" },\ |
167 | { (1 << EXTENT_FLAG_FS_MAPPING), "FS_MAPPING" }) | ||
166 | 168 | ||
167 | TRACE_EVENT_CONDITION(btrfs_get_extent, | 169 | TRACE_EVENT_CONDITION(btrfs_get_extent, |
168 | 170 | ||
@@ -996,6 +998,7 @@ DECLARE_EVENT_CLASS(btrfs__work, | |||
996 | __field( void *, func ) | 998 | __field( void *, func ) |
997 | __field( void *, ordered_func ) | 999 | __field( void *, ordered_func ) |
998 | __field( void *, ordered_free ) | 1000 | __field( void *, ordered_free ) |
1001 | __field( void *, normal_work ) | ||
999 | ), | 1002 | ), |
1000 | 1003 | ||
1001 | TP_fast_assign( | 1004 | TP_fast_assign( |
@@ -1004,11 +1007,13 @@ DECLARE_EVENT_CLASS(btrfs__work, | |||
1004 | __entry->func = work->func; | 1007 | __entry->func = work->func; |
1005 | __entry->ordered_func = work->ordered_func; | 1008 | __entry->ordered_func = work->ordered_func; |
1006 | __entry->ordered_free = work->ordered_free; | 1009 | __entry->ordered_free = work->ordered_free; |
1010 | __entry->normal_work = &work->normal_work; | ||
1007 | ), | 1011 | ), |
1008 | 1012 | ||
1009 | TP_printk("work=%p, wq=%p, func=%p, ordered_func=%p, ordered_free=%p", | 1013 | TP_printk("work=%p (normal_work=%p), wq=%p, func=%pf, ordered_func=%p," |
1010 | __entry->work, __entry->wq, __entry->func, | 1014 | " ordered_free=%p", |
1011 | __entry->ordered_func, __entry->ordered_free) | 1015 | __entry->work, __entry->normal_work, __entry->wq, |
1016 | __entry->func, __entry->ordered_func, __entry->ordered_free) | ||
1012 | ); | 1017 | ); |
1013 | 1018 | ||
1014 | /* For situiations that the work is freed */ | 1019 | /* For situiations that the work is freed */ |
@@ -1043,13 +1048,6 @@ DEFINE_EVENT(btrfs__work, btrfs_work_sched, | |||
1043 | TP_ARGS(work) | 1048 | TP_ARGS(work) |
1044 | ); | 1049 | ); |
1045 | 1050 | ||
1046 | DEFINE_EVENT(btrfs__work, btrfs_normal_work_done, | ||
1047 | |||
1048 | TP_PROTO(struct btrfs_work *work), | ||
1049 | |||
1050 | TP_ARGS(work) | ||
1051 | ); | ||
1052 | |||
1053 | DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, | 1051 | DEFINE_EVENT(btrfs__work__done, btrfs_all_work_done, |
1054 | 1052 | ||
1055 | TP_PROTO(struct btrfs_work *work), | 1053 | TP_PROTO(struct btrfs_work *work), |
@@ -1119,6 +1117,61 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy, | |||
1119 | TP_ARGS(wq) | 1117 | TP_ARGS(wq) |
1120 | ); | 1118 | ); |
1121 | 1119 | ||
1120 | #define show_oper_type(type) \ | ||
1121 | __print_symbolic(type, \ | ||
1122 | { BTRFS_QGROUP_OPER_ADD_EXCL, "OPER_ADD_EXCL" }, \ | ||
1123 | { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" }, \ | ||
1124 | { BTRFS_QGROUP_OPER_SUB_EXCL, "OPER_SUB_EXCL" }, \ | ||
1125 | { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" }) | ||
1126 | |||
1127 | DECLARE_EVENT_CLASS(btrfs_qgroup_oper, | ||
1128 | |||
1129 | TP_PROTO(struct btrfs_qgroup_operation *oper), | ||
1130 | |||
1131 | TP_ARGS(oper), | ||
1132 | |||
1133 | TP_STRUCT__entry( | ||
1134 | __field( u64, ref_root ) | ||
1135 | __field( u64, bytenr ) | ||
1136 | __field( u64, num_bytes ) | ||
1137 | __field( u64, seq ) | ||
1138 | __field( int, type ) | ||
1139 | __field( u64, elem_seq ) | ||
1140 | ), | ||
1141 | |||
1142 | TP_fast_assign( | ||
1143 | __entry->ref_root = oper->ref_root; | ||
1144 | __entry->bytenr = oper->bytenr, | ||
1145 | __entry->num_bytes = oper->num_bytes; | ||
1146 | __entry->seq = oper->seq; | ||
1147 | __entry->type = oper->type; | ||
1148 | __entry->elem_seq = oper->elem.seq; | ||
1149 | ), | ||
1150 | |||
1151 | TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, " | ||
1152 | "seq = %llu, elem.seq = %llu, type = %s", | ||
1153 | (unsigned long long)__entry->ref_root, | ||
1154 | (unsigned long long)__entry->bytenr, | ||
1155 | (unsigned long long)__entry->num_bytes, | ||
1156 | (unsigned long long)__entry->seq, | ||
1157 | (unsigned long long)__entry->elem_seq, | ||
1158 | show_oper_type(__entry->type)) | ||
1159 | ); | ||
1160 | |||
1161 | DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account, | ||
1162 | |||
1163 | TP_PROTO(struct btrfs_qgroup_operation *oper), | ||
1164 | |||
1165 | TP_ARGS(oper) | ||
1166 | ); | ||
1167 | |||
1168 | DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref, | ||
1169 | |||
1170 | TP_PROTO(struct btrfs_qgroup_operation *oper), | ||
1171 | |||
1172 | TP_ARGS(oper) | ||
1173 | ); | ||
1174 | |||
1122 | #endif /* _TRACE_BTRFS_H */ | 1175 | #endif /* _TRACE_BTRFS_H */ |
1123 | 1176 | ||
1124 | /* This part must be outside protection */ | 1177 | /* This part must be outside protection */ |