diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 146 |
1 files changed, 98 insertions, 48 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d7a96cfdc50a..8206b3900587 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | |||
467 | return 0; | 467 | return 0; |
468 | } | 468 | } |
469 | 469 | ||
470 | /* | ||
471 | * This allocates memory and gets a tree modification sequence number when | ||
472 | * needed. | ||
473 | * | ||
474 | * Returns 0 when no sequence number is needed, < 0 on error. | ||
475 | * Returns 1 when a sequence number was added. In this case, | ||
476 | * fs_info->tree_mod_seq_lock was acquired and must be released by the caller | ||
477 | * after inserting into the rb tree. | ||
478 | */ | ||
470 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | 479 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, |
471 | struct tree_mod_elem **tm_ret) | 480 | struct tree_mod_elem **tm_ret) |
472 | { | 481 | { |
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | |||
491 | */ | 500 | */ |
492 | kfree(tm); | 501 | kfree(tm); |
493 | seq = 0; | 502 | seq = 0; |
503 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
494 | } else { | 504 | } else { |
495 | __get_tree_mod_seq(fs_info, &tm->elem); | 505 | __get_tree_mod_seq(fs_info, &tm->elem); |
496 | seq = tm->elem.seq; | 506 | seq = tm->elem.seq; |
497 | } | 507 | } |
498 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
499 | 508 | ||
500 | return seq; | 509 | return seq; |
501 | } | 510 | } |
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | |||
521 | tm->slot = slot; | 530 | tm->slot = slot; |
522 | tm->generation = btrfs_node_ptr_generation(eb, slot); | 531 | tm->generation = btrfs_node_ptr_generation(eb, slot); |
523 | 532 | ||
524 | return __tree_mod_log_insert(fs_info, tm); | 533 | ret = __tree_mod_log_insert(fs_info, tm); |
534 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
535 | return ret; | ||
525 | } | 536 | } |
526 | 537 | ||
527 | static noinline int | 538 | static noinline int |
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | |||
559 | tm->move.nr_items = nr_items; | 570 | tm->move.nr_items = nr_items; |
560 | tm->op = MOD_LOG_MOVE_KEYS; | 571 | tm->op = MOD_LOG_MOVE_KEYS; |
561 | 572 | ||
562 | return __tree_mod_log_insert(fs_info, tm); | 573 | ret = __tree_mod_log_insert(fs_info, tm); |
574 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
575 | return ret; | ||
563 | } | 576 | } |
564 | 577 | ||
565 | static noinline int | 578 | static noinline int |
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | |||
580 | tm->generation = btrfs_header_generation(old_root); | 593 | tm->generation = btrfs_header_generation(old_root); |
581 | tm->op = MOD_LOG_ROOT_REPLACE; | 594 | tm->op = MOD_LOG_ROOT_REPLACE; |
582 | 595 | ||
583 | return __tree_mod_log_insert(fs_info, tm); | 596 | ret = __tree_mod_log_insert(fs_info, tm); |
597 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
598 | return ret; | ||
584 | } | 599 | } |
585 | 600 | ||
586 | static struct tree_mod_elem * | 601 | static struct tree_mod_elem * |
@@ -1009,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | |||
1009 | if (!looped && !tm) | 1024 | if (!looped && !tm) |
1010 | return 0; | 1025 | return 0; |
1011 | /* | 1026 | /* |
1012 | * we must have key remove operations in the log before the | 1027 | * if there are no tree operation for the oldest root, we simply |
1013 | * replace operation. | 1028 | * return it. this should only happen if that (old) root is at |
1029 | * level 0. | ||
1014 | */ | 1030 | */ |
1015 | BUG_ON(!tm); | 1031 | if (!tm) |
1032 | break; | ||
1016 | 1033 | ||
1034 | /* | ||
1035 | * if there's an operation that's not a root replacement, we | ||
1036 | * found the oldest version of our root. normally, we'll find a | ||
1037 | * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. | ||
1038 | */ | ||
1017 | if (tm->op != MOD_LOG_ROOT_REPLACE) | 1039 | if (tm->op != MOD_LOG_ROOT_REPLACE) |
1018 | break; | 1040 | break; |
1019 | 1041 | ||
@@ -1023,6 +1045,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | |||
1023 | looped = 1; | 1045 | looped = 1; |
1024 | } | 1046 | } |
1025 | 1047 | ||
1048 | /* if there's no old root to return, return what we found instead */ | ||
1049 | if (!found) | ||
1050 | found = tm; | ||
1051 | |||
1026 | return found; | 1052 | return found; |
1027 | } | 1053 | } |
1028 | 1054 | ||
@@ -1068,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
1068 | tm->generation); | 1094 | tm->generation); |
1069 | break; | 1095 | break; |
1070 | case MOD_LOG_KEY_ADD: | 1096 | case MOD_LOG_KEY_ADD: |
1071 | if (tm->slot != n - 1) { | 1097 | /* if a move operation is needed it's in the log */ |
1072 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1073 | o_src = btrfs_node_key_ptr_offset(tm->slot + 1); | ||
1074 | memmove_extent_buffer(eb, o_dst, o_src, p_size); | ||
1075 | } | ||
1076 | n--; | 1098 | n--; |
1077 | break; | 1099 | break; |
1078 | case MOD_LOG_MOVE_KEYS: | 1100 | case MOD_LOG_MOVE_KEYS: |
@@ -1143,45 +1165,57 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
1143 | return eb_rewin; | 1165 | return eb_rewin; |
1144 | } | 1166 | } |
1145 | 1167 | ||
1168 | /* | ||
1169 | * get_old_root() rewinds the state of @root's root node to the given @time_seq | ||
1170 | * value. If there are no changes, the current root->root_node is returned. If | ||
1171 | * anything changed in between, there's a fresh buffer allocated on which the | ||
1172 | * rewind operations are done. In any case, the returned buffer is read locked. | ||
1173 | * Returns NULL on error (with no locks held). | ||
1174 | */ | ||
1146 | static inline struct extent_buffer * | 1175 | static inline struct extent_buffer * |
1147 | get_old_root(struct btrfs_root *root, u64 time_seq) | 1176 | get_old_root(struct btrfs_root *root, u64 time_seq) |
1148 | { | 1177 | { |
1149 | struct tree_mod_elem *tm; | 1178 | struct tree_mod_elem *tm; |
1150 | struct extent_buffer *eb; | 1179 | struct extent_buffer *eb; |
1151 | struct tree_mod_root *old_root; | 1180 | struct tree_mod_root *old_root = NULL; |
1152 | u64 old_generation; | 1181 | u64 old_generation = 0; |
1182 | u64 logical; | ||
1153 | 1183 | ||
1184 | eb = btrfs_read_lock_root_node(root); | ||
1154 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | 1185 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); |
1155 | if (!tm) | 1186 | if (!tm) |
1156 | return root->node; | 1187 | return root->node; |
1157 | 1188 | ||
1158 | old_root = &tm->old_root; | 1189 | if (tm->op == MOD_LOG_ROOT_REPLACE) { |
1159 | old_generation = tm->generation; | 1190 | old_root = &tm->old_root; |
1160 | 1191 | old_generation = tm->generation; | |
1161 | tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); | 1192 | logical = old_root->logical; |
1162 | /* | 1193 | } else { |
1163 | * there was an item in the log when __tree_mod_log_oldest_root | 1194 | logical = root->node->start; |
1164 | * returned. this one must not go away, because the time_seq passed to | 1195 | } |
1165 | * us must be blocking its removal. | ||
1166 | */ | ||
1167 | BUG_ON(!tm); | ||
1168 | 1196 | ||
1169 | if (old_root->logical == root->node->start) { | 1197 | tm = tree_mod_log_search(root->fs_info, logical, time_seq); |
1170 | /* there are logged operations for the current root */ | 1198 | if (old_root) |
1199 | eb = alloc_dummy_extent_buffer(logical, root->nodesize); | ||
1200 | else | ||
1171 | eb = btrfs_clone_extent_buffer(root->node); | 1201 | eb = btrfs_clone_extent_buffer(root->node); |
1172 | } else { | 1202 | btrfs_tree_read_unlock(root->node); |
1173 | /* there's a root replace operation for the current root */ | 1203 | free_extent_buffer(root->node); |
1174 | eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, | 1204 | if (!eb) |
1175 | root->nodesize); | 1205 | return NULL; |
1206 | btrfs_tree_read_lock(eb); | ||
1207 | if (old_root) { | ||
1176 | btrfs_set_header_bytenr(eb, eb->start); | 1208 | btrfs_set_header_bytenr(eb, eb->start); |
1177 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); | 1209 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); |
1178 | btrfs_set_header_owner(eb, root->root_key.objectid); | 1210 | btrfs_set_header_owner(eb, root->root_key.objectid); |
1211 | btrfs_set_header_level(eb, old_root->level); | ||
1212 | btrfs_set_header_generation(eb, old_generation); | ||
1179 | } | 1213 | } |
1180 | if (!eb) | 1214 | if (tm) |
1181 | return NULL; | 1215 | __tree_mod_log_rewind(eb, time_seq, tm); |
1182 | btrfs_set_header_level(eb, old_root->level); | 1216 | else |
1183 | btrfs_set_header_generation(eb, old_generation); | 1217 | WARN_ON(btrfs_header_level(eb) != 0); |
1184 | __tree_mod_log_rewind(eb, time_seq, tm); | 1218 | extent_buffer_get(eb); |
1185 | 1219 | ||
1186 | return eb; | 1220 | return eb; |
1187 | } | 1221 | } |
@@ -1650,8 +1684,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1650 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1684 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
1651 | return 0; | 1685 | return 0; |
1652 | 1686 | ||
1653 | btrfs_header_nritems(mid); | ||
1654 | |||
1655 | left = read_node_slot(root, parent, pslot - 1); | 1687 | left = read_node_slot(root, parent, pslot - 1); |
1656 | if (left) { | 1688 | if (left) { |
1657 | btrfs_tree_lock(left); | 1689 | btrfs_tree_lock(left); |
@@ -1681,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1681 | wret = push_node_left(trans, root, left, mid, 1); | 1713 | wret = push_node_left(trans, root, left, mid, 1); |
1682 | if (wret < 0) | 1714 | if (wret < 0) |
1683 | ret = wret; | 1715 | ret = wret; |
1684 | btrfs_header_nritems(mid); | ||
1685 | } | 1716 | } |
1686 | 1717 | ||
1687 | /* | 1718 | /* |
@@ -2615,9 +2646,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | |||
2615 | 2646 | ||
2616 | again: | 2647 | again: |
2617 | b = get_old_root(root, time_seq); | 2648 | b = get_old_root(root, time_seq); |
2618 | extent_buffer_get(b); | ||
2619 | level = btrfs_header_level(b); | 2649 | level = btrfs_header_level(b); |
2620 | btrfs_tree_read_lock(b); | ||
2621 | p->locks[level] = BTRFS_READ_LOCK; | 2650 | p->locks[level] = BTRFS_READ_LOCK; |
2622 | 2651 | ||
2623 | while (b) { | 2652 | while (b) { |
@@ -2964,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2964 | static void insert_ptr(struct btrfs_trans_handle *trans, | 2993 | static void insert_ptr(struct btrfs_trans_handle *trans, |
2965 | struct btrfs_root *root, struct btrfs_path *path, | 2994 | struct btrfs_root *root, struct btrfs_path *path, |
2966 | struct btrfs_disk_key *key, u64 bytenr, | 2995 | struct btrfs_disk_key *key, u64 bytenr, |
2967 | int slot, int level, int tree_mod_log) | 2996 | int slot, int level) |
2968 | { | 2997 | { |
2969 | struct extent_buffer *lower; | 2998 | struct extent_buffer *lower; |
2970 | int nritems; | 2999 | int nritems; |
@@ -2977,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
2977 | BUG_ON(slot > nritems); | 3006 | BUG_ON(slot > nritems); |
2978 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); | 3007 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); |
2979 | if (slot != nritems) { | 3008 | if (slot != nritems) { |
2980 | if (tree_mod_log && level) | 3009 | if (level) |
2981 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, | 3010 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, |
2982 | slot, nritems - slot); | 3011 | slot, nritems - slot); |
2983 | memmove_extent_buffer(lower, | 3012 | memmove_extent_buffer(lower, |
@@ -2985,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
2985 | btrfs_node_key_ptr_offset(slot), | 3014 | btrfs_node_key_ptr_offset(slot), |
2986 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | 3015 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); |
2987 | } | 3016 | } |
2988 | if (tree_mod_log && level) { | 3017 | if (level) { |
2989 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, | 3018 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, |
2990 | MOD_LOG_KEY_ADD); | 3019 | MOD_LOG_KEY_ADD); |
2991 | BUG_ON(ret < 0); | 3020 | BUG_ON(ret < 0); |
@@ -3073,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
3073 | btrfs_mark_buffer_dirty(split); | 3102 | btrfs_mark_buffer_dirty(split); |
3074 | 3103 | ||
3075 | insert_ptr(trans, root, path, &disk_key, split->start, | 3104 | insert_ptr(trans, root, path, &disk_key, split->start, |
3076 | path->slots[level + 1] + 1, level + 1, 1); | 3105 | path->slots[level + 1] + 1, level + 1); |
3077 | 3106 | ||
3078 | if (path->slots[level] >= mid) { | 3107 | if (path->slots[level] >= mid) { |
3079 | path->slots[level] -= mid; | 3108 | path->slots[level] -= mid; |
@@ -3610,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
3610 | btrfs_set_header_nritems(l, mid); | 3639 | btrfs_set_header_nritems(l, mid); |
3611 | btrfs_item_key(right, &disk_key, 0); | 3640 | btrfs_item_key(right, &disk_key, 0); |
3612 | insert_ptr(trans, root, path, &disk_key, right->start, | 3641 | insert_ptr(trans, root, path, &disk_key, right->start, |
3613 | path->slots[1] + 1, 1, 0); | 3642 | path->slots[1] + 1, 1); |
3614 | 3643 | ||
3615 | btrfs_mark_buffer_dirty(right); | 3644 | btrfs_mark_buffer_dirty(right); |
3616 | btrfs_mark_buffer_dirty(l); | 3645 | btrfs_mark_buffer_dirty(l); |
@@ -3817,7 +3846,7 @@ again: | |||
3817 | if (mid <= slot) { | 3846 | if (mid <= slot) { |
3818 | btrfs_set_header_nritems(right, 0); | 3847 | btrfs_set_header_nritems(right, 0); |
3819 | insert_ptr(trans, root, path, &disk_key, right->start, | 3848 | insert_ptr(trans, root, path, &disk_key, right->start, |
3820 | path->slots[1] + 1, 1, 0); | 3849 | path->slots[1] + 1, 1); |
3821 | btrfs_tree_unlock(path->nodes[0]); | 3850 | btrfs_tree_unlock(path->nodes[0]); |
3822 | free_extent_buffer(path->nodes[0]); | 3851 | free_extent_buffer(path->nodes[0]); |
3823 | path->nodes[0] = right; | 3852 | path->nodes[0] = right; |
@@ -3826,7 +3855,7 @@ again: | |||
3826 | } else { | 3855 | } else { |
3827 | btrfs_set_header_nritems(right, 0); | 3856 | btrfs_set_header_nritems(right, 0); |
3828 | insert_ptr(trans, root, path, &disk_key, right->start, | 3857 | insert_ptr(trans, root, path, &disk_key, right->start, |
3829 | path->slots[1], 1, 0); | 3858 | path->slots[1], 1); |
3830 | btrfs_tree_unlock(path->nodes[0]); | 3859 | btrfs_tree_unlock(path->nodes[0]); |
3831 | free_extent_buffer(path->nodes[0]); | 3860 | free_extent_buffer(path->nodes[0]); |
3832 | path->nodes[0] = right; | 3861 | path->nodes[0] = right; |
@@ -5001,6 +5030,12 @@ next: | |||
5001 | */ | 5030 | */ |
5002 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | 5031 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) |
5003 | { | 5032 | { |
5033 | return btrfs_next_old_leaf(root, path, 0); | ||
5034 | } | ||
5035 | |||
5036 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, | ||
5037 | u64 time_seq) | ||
5038 | { | ||
5004 | int slot; | 5039 | int slot; |
5005 | int level; | 5040 | int level; |
5006 | struct extent_buffer *c; | 5041 | struct extent_buffer *c; |
@@ -5025,7 +5060,10 @@ again: | |||
5025 | path->keep_locks = 1; | 5060 | path->keep_locks = 1; |
5026 | path->leave_spinning = 1; | 5061 | path->leave_spinning = 1; |
5027 | 5062 | ||
5028 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5063 | if (time_seq) |
5064 | ret = btrfs_search_old_slot(root, &key, path, time_seq); | ||
5065 | else | ||
5066 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
5029 | path->keep_locks = 0; | 5067 | path->keep_locks = 0; |
5030 | 5068 | ||
5031 | if (ret < 0) | 5069 | if (ret < 0) |
@@ -5081,6 +5119,18 @@ again: | |||
5081 | 5119 | ||
5082 | if (!path->skip_locking) { | 5120 | if (!path->skip_locking) { |
5083 | ret = btrfs_try_tree_read_lock(next); | 5121 | ret = btrfs_try_tree_read_lock(next); |
5122 | if (!ret && time_seq) { | ||
5123 | /* | ||
5124 | * If we don't get the lock, we may be racing | ||
5125 | * with push_leaf_left, holding that lock while | ||
5126 | * itself waiting for the leaf we've currently | ||
5127 | * locked. To solve this situation, we give up | ||
5128 | * on our lock and cycle. | ||
5129 | */ | ||
5130 | btrfs_release_path(path); | ||
5131 | cond_resched(); | ||
5132 | goto again; | ||
5133 | } | ||
5084 | if (!ret) { | 5134 | if (!ret) { |
5085 | btrfs_set_path_blocking(path); | 5135 | btrfs_set_path_blocking(path); |
5086 | btrfs_tree_read_lock(next); | 5136 | btrfs_tree_read_lock(next); |