aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c146
1 files changed, 98 insertions, 48 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d7a96cfdc50a..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
467 return 0; 467 return 0;
468} 468}
469 469
470/*
471 * This allocates memory and gets a tree modification sequence number when
472 * needed.
473 *
474 * Returns 0 when no sequence number is needed, < 0 on error.
475 * Returns 1 when a sequence number was added. In this case,
476 * fs_info->tree_mod_seq_lock was acquired and must be released by the caller
477 * after inserting into the rb tree.
478 */
470static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, 479static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
471 struct tree_mod_elem **tm_ret) 480 struct tree_mod_elem **tm_ret)
472{ 481{
@@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
491 */ 500 */
492 kfree(tm); 501 kfree(tm);
493 seq = 0; 502 seq = 0;
503 spin_unlock(&fs_info->tree_mod_seq_lock);
494 } else { 504 } else {
495 __get_tree_mod_seq(fs_info, &tm->elem); 505 __get_tree_mod_seq(fs_info, &tm->elem);
496 seq = tm->elem.seq; 506 seq = tm->elem.seq;
497 } 507 }
498 spin_unlock(&fs_info->tree_mod_seq_lock);
499 508
500 return seq; 509 return seq;
501} 510}
@@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
521 tm->slot = slot; 530 tm->slot = slot;
522 tm->generation = btrfs_node_ptr_generation(eb, slot); 531 tm->generation = btrfs_node_ptr_generation(eb, slot);
523 532
524 return __tree_mod_log_insert(fs_info, tm); 533 ret = __tree_mod_log_insert(fs_info, tm);
534 spin_unlock(&fs_info->tree_mod_seq_lock);
535 return ret;
525} 536}
526 537
527static noinline int 538static noinline int
@@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
559 tm->move.nr_items = nr_items; 570 tm->move.nr_items = nr_items;
560 tm->op = MOD_LOG_MOVE_KEYS; 571 tm->op = MOD_LOG_MOVE_KEYS;
561 572
562 return __tree_mod_log_insert(fs_info, tm); 573 ret = __tree_mod_log_insert(fs_info, tm);
574 spin_unlock(&fs_info->tree_mod_seq_lock);
575 return ret;
563} 576}
564 577
565static noinline int 578static noinline int
@@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
580 tm->generation = btrfs_header_generation(old_root); 593 tm->generation = btrfs_header_generation(old_root);
581 tm->op = MOD_LOG_ROOT_REPLACE; 594 tm->op = MOD_LOG_ROOT_REPLACE;
582 595
583 return __tree_mod_log_insert(fs_info, tm); 596 ret = __tree_mod_log_insert(fs_info, tm);
597 spin_unlock(&fs_info->tree_mod_seq_lock);
598 return ret;
584} 599}
585 600
586static struct tree_mod_elem * 601static struct tree_mod_elem *
@@ -1009,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1009 if (!looped && !tm) 1024 if (!looped && !tm)
1010 return 0; 1025 return 0;
1011 /* 1026 /*
1012 * we must have key remove operations in the log before the 1027 * if there are no tree operation for the oldest root, we simply
1013 * replace operation. 1028 * return it. this should only happen if that (old) root is at
1029 * level 0.
1014 */ 1030 */
1015 BUG_ON(!tm); 1031 if (!tm)
1032 break;
1016 1033
1034 /*
1035 * if there's an operation that's not a root replacement, we
1036 * found the oldest version of our root. normally, we'll find a
1037 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1038 */
1017 if (tm->op != MOD_LOG_ROOT_REPLACE) 1039 if (tm->op != MOD_LOG_ROOT_REPLACE)
1018 break; 1040 break;
1019 1041
@@ -1023,6 +1045,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1023 looped = 1; 1045 looped = 1;
1024 } 1046 }
1025 1047
1048 /* if there's no old root to return, return what we found instead */
1049 if (!found)
1050 found = tm;
1051
1026 return found; 1052 return found;
1027} 1053}
1028 1054
@@ -1068,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1068 tm->generation); 1094 tm->generation);
1069 break; 1095 break;
1070 case MOD_LOG_KEY_ADD: 1096 case MOD_LOG_KEY_ADD:
1071 if (tm->slot != n - 1) { 1097 /* if a move operation is needed it's in the log */
1072 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1073 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1074 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1075 }
1076 n--; 1098 n--;
1077 break; 1099 break;
1078 case MOD_LOG_MOVE_KEYS: 1100 case MOD_LOG_MOVE_KEYS:
@@ -1143,45 +1165,57 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1143 return eb_rewin; 1165 return eb_rewin;
1144} 1166}
1145 1167
1168/*
1169 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1170 * value. If there are no changes, the current root->root_node is returned. If
1171 * anything changed in between, there's a fresh buffer allocated on which the
1172 * rewind operations are done. In any case, the returned buffer is read locked.
1173 * Returns NULL on error (with no locks held).
1174 */
1146static inline struct extent_buffer * 1175static inline struct extent_buffer *
1147get_old_root(struct btrfs_root *root, u64 time_seq) 1176get_old_root(struct btrfs_root *root, u64 time_seq)
1148{ 1177{
1149 struct tree_mod_elem *tm; 1178 struct tree_mod_elem *tm;
1150 struct extent_buffer *eb; 1179 struct extent_buffer *eb;
1151 struct tree_mod_root *old_root; 1180 struct tree_mod_root *old_root = NULL;
1152 u64 old_generation; 1181 u64 old_generation = 0;
1182 u64 logical;
1153 1183
1184 eb = btrfs_read_lock_root_node(root);
1154 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1185 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1155 if (!tm) 1186 if (!tm)
1156 return root->node; 1187 return root->node;
1157 1188
1158 old_root = &tm->old_root; 1189 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1159 old_generation = tm->generation; 1190 old_root = &tm->old_root;
1160 1191 old_generation = tm->generation;
1161 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); 1192 logical = old_root->logical;
1162 /* 1193 } else {
1163 * there was an item in the log when __tree_mod_log_oldest_root 1194 logical = root->node->start;
1164 * returned. this one must not go away, because the time_seq passed to 1195 }
1165 * us must be blocking its removal.
1166 */
1167 BUG_ON(!tm);
1168 1196
1169 if (old_root->logical == root->node->start) { 1197 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1170 /* there are logged operations for the current root */ 1198 if (old_root)
1199 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1200 else
1171 eb = btrfs_clone_extent_buffer(root->node); 1201 eb = btrfs_clone_extent_buffer(root->node);
1172 } else { 1202 btrfs_tree_read_unlock(root->node);
1173 /* there's a root replace operation for the current root */ 1203 free_extent_buffer(root->node);
1174 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, 1204 if (!eb)
1175 root->nodesize); 1205 return NULL;
1206 btrfs_tree_read_lock(eb);
1207 if (old_root) {
1176 btrfs_set_header_bytenr(eb, eb->start); 1208 btrfs_set_header_bytenr(eb, eb->start);
1177 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1209 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1178 btrfs_set_header_owner(eb, root->root_key.objectid); 1210 btrfs_set_header_owner(eb, root->root_key.objectid);
1211 btrfs_set_header_level(eb, old_root->level);
1212 btrfs_set_header_generation(eb, old_generation);
1179 } 1213 }
1180 if (!eb) 1214 if (tm)
1181 return NULL; 1215 __tree_mod_log_rewind(eb, time_seq, tm);
1182 btrfs_set_header_level(eb, old_root->level); 1216 else
1183 btrfs_set_header_generation(eb, old_generation); 1217 WARN_ON(btrfs_header_level(eb) != 0);
1184 __tree_mod_log_rewind(eb, time_seq, tm); 1218 extent_buffer_get(eb);
1185 1219
1186 return eb; 1220 return eb;
1187} 1221}
@@ -1650,8 +1684,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1650 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1684 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1651 return 0; 1685 return 0;
1652 1686
1653 btrfs_header_nritems(mid);
1654
1655 left = read_node_slot(root, parent, pslot - 1); 1687 left = read_node_slot(root, parent, pslot - 1);
1656 if (left) { 1688 if (left) {
1657 btrfs_tree_lock(left); 1689 btrfs_tree_lock(left);
@@ -1681,7 +1713,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1681 wret = push_node_left(trans, root, left, mid, 1); 1713 wret = push_node_left(trans, root, left, mid, 1);
1682 if (wret < 0) 1714 if (wret < 0)
1683 ret = wret; 1715 ret = wret;
1684 btrfs_header_nritems(mid);
1685 } 1716 }
1686 1717
1687 /* 1718 /*
@@ -2615,9 +2646,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2615 2646
2616again: 2647again:
2617 b = get_old_root(root, time_seq); 2648 b = get_old_root(root, time_seq);
2618 extent_buffer_get(b);
2619 level = btrfs_header_level(b); 2649 level = btrfs_header_level(b);
2620 btrfs_tree_read_lock(b);
2621 p->locks[level] = BTRFS_READ_LOCK; 2650 p->locks[level] = BTRFS_READ_LOCK;
2622 2651
2623 while (b) { 2652 while (b) {
@@ -2964,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2964static void insert_ptr(struct btrfs_trans_handle *trans, 2993static void insert_ptr(struct btrfs_trans_handle *trans,
2965 struct btrfs_root *root, struct btrfs_path *path, 2994 struct btrfs_root *root, struct btrfs_path *path,
2966 struct btrfs_disk_key *key, u64 bytenr, 2995 struct btrfs_disk_key *key, u64 bytenr,
2967 int slot, int level, int tree_mod_log) 2996 int slot, int level)
2968{ 2997{
2969 struct extent_buffer *lower; 2998 struct extent_buffer *lower;
2970 int nritems; 2999 int nritems;
@@ -2977,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2977 BUG_ON(slot > nritems); 3006 BUG_ON(slot > nritems);
2978 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 3007 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
2979 if (slot != nritems) { 3008 if (slot != nritems) {
2980 if (tree_mod_log && level) 3009 if (level)
2981 tree_mod_log_eb_move(root->fs_info, lower, slot + 1, 3010 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
2982 slot, nritems - slot); 3011 slot, nritems - slot);
2983 memmove_extent_buffer(lower, 3012 memmove_extent_buffer(lower,
@@ -2985,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2985 btrfs_node_key_ptr_offset(slot), 3014 btrfs_node_key_ptr_offset(slot),
2986 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 3015 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2987 } 3016 }
2988 if (tree_mod_log && level) { 3017 if (level) {
2989 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3018 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
2990 MOD_LOG_KEY_ADD); 3019 MOD_LOG_KEY_ADD);
2991 BUG_ON(ret < 0); 3020 BUG_ON(ret < 0);
@@ -3073,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3073 btrfs_mark_buffer_dirty(split); 3102 btrfs_mark_buffer_dirty(split);
3074 3103
3075 insert_ptr(trans, root, path, &disk_key, split->start, 3104 insert_ptr(trans, root, path, &disk_key, split->start,
3076 path->slots[level + 1] + 1, level + 1, 1); 3105 path->slots[level + 1] + 1, level + 1);
3077 3106
3078 if (path->slots[level] >= mid) { 3107 if (path->slots[level] >= mid) {
3079 path->slots[level] -= mid; 3108 path->slots[level] -= mid;
@@ -3610,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
3610 btrfs_set_header_nritems(l, mid); 3639 btrfs_set_header_nritems(l, mid);
3611 btrfs_item_key(right, &disk_key, 0); 3640 btrfs_item_key(right, &disk_key, 0);
3612 insert_ptr(trans, root, path, &disk_key, right->start, 3641 insert_ptr(trans, root, path, &disk_key, right->start,
3613 path->slots[1] + 1, 1, 0); 3642 path->slots[1] + 1, 1);
3614 3643
3615 btrfs_mark_buffer_dirty(right); 3644 btrfs_mark_buffer_dirty(right);
3616 btrfs_mark_buffer_dirty(l); 3645 btrfs_mark_buffer_dirty(l);
@@ -3817,7 +3846,7 @@ again:
3817 if (mid <= slot) { 3846 if (mid <= slot) {
3818 btrfs_set_header_nritems(right, 0); 3847 btrfs_set_header_nritems(right, 0);
3819 insert_ptr(trans, root, path, &disk_key, right->start, 3848 insert_ptr(trans, root, path, &disk_key, right->start,
3820 path->slots[1] + 1, 1, 0); 3849 path->slots[1] + 1, 1);
3821 btrfs_tree_unlock(path->nodes[0]); 3850 btrfs_tree_unlock(path->nodes[0]);
3822 free_extent_buffer(path->nodes[0]); 3851 free_extent_buffer(path->nodes[0]);
3823 path->nodes[0] = right; 3852 path->nodes[0] = right;
@@ -3826,7 +3855,7 @@ again:
3826 } else { 3855 } else {
3827 btrfs_set_header_nritems(right, 0); 3856 btrfs_set_header_nritems(right, 0);
3828 insert_ptr(trans, root, path, &disk_key, right->start, 3857 insert_ptr(trans, root, path, &disk_key, right->start,
3829 path->slots[1], 1, 0); 3858 path->slots[1], 1);
3830 btrfs_tree_unlock(path->nodes[0]); 3859 btrfs_tree_unlock(path->nodes[0]);
3831 free_extent_buffer(path->nodes[0]); 3860 free_extent_buffer(path->nodes[0]);
3832 path->nodes[0] = right; 3861 path->nodes[0] = right;
@@ -5001,6 +5030,12 @@ next:
5001 */ 5030 */
5002int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 5031int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5003{ 5032{
5033 return btrfs_next_old_leaf(root, path, 0);
5034}
5035
5036int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5037 u64 time_seq)
5038{
5004 int slot; 5039 int slot;
5005 int level; 5040 int level;
5006 struct extent_buffer *c; 5041 struct extent_buffer *c;
@@ -5025,7 +5060,10 @@ again:
5025 path->keep_locks = 1; 5060 path->keep_locks = 1;
5026 path->leave_spinning = 1; 5061 path->leave_spinning = 1;
5027 5062
5028 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5063 if (time_seq)
5064 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5065 else
5066 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5029 path->keep_locks = 0; 5067 path->keep_locks = 0;
5030 5068
5031 if (ret < 0) 5069 if (ret < 0)
@@ -5081,6 +5119,18 @@ again:
5081 5119
5082 if (!path->skip_locking) { 5120 if (!path->skip_locking) {
5083 ret = btrfs_try_tree_read_lock(next); 5121 ret = btrfs_try_tree_read_lock(next);
5122 if (!ret && time_seq) {
5123 /*
5124 * If we don't get the lock, we may be racing
5125 * with push_leaf_left, holding that lock while
5126 * itself waiting for the leaf we've currently
5127 * locked. To solve this situation, we give up
5128 * on our lock and cycle.
5129 */
5130 btrfs_release_path(path);
5131 cond_resched();
5132 goto again;
5133 }
5084 if (!ret) { 5134 if (!ret) {
5085 btrfs_set_path_blocking(path); 5135 btrfs_set_path_blocking(path);
5086 btrfs_tree_read_lock(next); 5136 btrfs_tree_read_lock(next);