aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJan Schmidt <list.btrfs@jan-o-sch.net>2012-05-16 12:25:47 -0400
committerJan Schmidt <list.btrfs@jan-o-sch.net>2012-05-30 09:17:33 -0400
commit5d9e75c41d11ca79b1d1ff6ed17c88c9047d1fea (patch)
treec7e6e1011997dcba5fd9522cd4172b34dec15860 /fs/btrfs
parentf3ea38da3e76455fbd6d405cdca4d050ed085458 (diff)
Btrfs: add btrfs_search_old_slot
The tree modification log together with the current state of the tree gives a consistent, old version of the tree. btrfs_search_old_slot is used to search through this old version and return old (dummy!) extent buffers. Naturally, this function cannot do any tree modifications. Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c319
-rw-r--r--fs/btrfs/ctree.h2
2 files changed, 317 insertions, 4 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 69ce3f7deeef..0954f1770fd0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -960,6 +960,207 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
960 return 0; 960 return 0;
961} 961}
962 962
963/*
964 * returns the logical address of the oldest predecessor of the given root.
965 * entries older than time_seq are ignored.
966 */
967static struct tree_mod_elem *
968__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
969 struct btrfs_root *root, u64 time_seq)
970{
971 struct tree_mod_elem *tm;
972 struct tree_mod_elem *found = NULL;
973 u64 root_logical = root->node->start;
974 int looped = 0;
975
976 if (!time_seq)
977 return 0;
978
979 /*
980 * the very last operation that's logged for a root is the replacement
981 * operation (if it is replaced at all). this has the index of the *new*
982 * root, making it the very first operation that's logged for this root.
983 */
984 while (1) {
985 tm = tree_mod_log_search_oldest(fs_info, root_logical,
986 time_seq);
987 if (!looped && !tm)
988 return 0;
989 /*
990 * we must have key remove operations in the log before the
991 * replace operation.
992 */
993 BUG_ON(!tm);
994
995 if (tm->op != MOD_LOG_ROOT_REPLACE)
996 break;
997
998 found = tm;
999 root_logical = tm->old_root.logical;
1000 BUG_ON(root_logical == root->node->start);
1001 looped = 1;
1002 }
1003
1004 return found;
1005}
1006
1007/*
1008 * tm is a pointer to the first operation to rewind within eb. then, all
1009 * previous operations will be rewinded (until we reach something older than
1010 * time_seq).
1011 */
1012static void
1013__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1014 struct tree_mod_elem *first_tm)
1015{
1016 u32 n;
1017 struct rb_node *next;
1018 struct tree_mod_elem *tm = first_tm;
1019 unsigned long o_dst;
1020 unsigned long o_src;
1021 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1022
1023 n = btrfs_header_nritems(eb);
1024 while (tm && tm->elem.seq >= time_seq) {
1025 /*
1026 * all the operations are recorded with the operator used for
1027 * the modification. as we're going backwards, we do the
1028 * opposite of each operation here.
1029 */
1030 switch (tm->op) {
1031 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1032 BUG_ON(tm->slot < n);
1033 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
1034 case MOD_LOG_KEY_REMOVE:
1035 btrfs_set_node_key(eb, &tm->key, tm->slot);
1036 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1037 btrfs_set_node_ptr_generation(eb, tm->slot,
1038 tm->generation);
1039 n++;
1040 break;
1041 case MOD_LOG_KEY_REPLACE:
1042 BUG_ON(tm->slot >= n);
1043 btrfs_set_node_key(eb, &tm->key, tm->slot);
1044 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1045 btrfs_set_node_ptr_generation(eb, tm->slot,
1046 tm->generation);
1047 break;
1048 case MOD_LOG_KEY_ADD:
1049 if (tm->slot != n - 1) {
1050 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1051 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1052 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1053 }
1054 n--;
1055 break;
1056 case MOD_LOG_MOVE_KEYS:
1057 memmove_extent_buffer(eb, tm->slot, tm->move.dst_slot,
1058 tm->move.nr_items * p_size);
1059 break;
1060 case MOD_LOG_ROOT_REPLACE:
1061 /*
1062 * this operation is special. for roots, this must be
1063 * handled explicitly before rewinding.
1064 * for non-roots, this operation may exist if the node
1065 * was a root: root A -> child B; then A gets empty and
1066 * B is promoted to the new root. in the mod log, we'll
1067 * have a root-replace operation for B, a tree block
1068 * that is no root. we simply ignore that operation.
1069 */
1070 break;
1071 }
1072 next = rb_next(&tm->node);
1073 if (!next)
1074 break;
1075 tm = container_of(next, struct tree_mod_elem, node);
1076 if (tm->index != first_tm->index)
1077 break;
1078 }
1079 btrfs_set_header_nritems(eb, n);
1080}
1081
1082static struct extent_buffer *
1083tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1084 u64 time_seq)
1085{
1086 struct extent_buffer *eb_rewin;
1087 struct tree_mod_elem *tm;
1088
1089 if (!time_seq)
1090 return eb;
1091
1092 if (btrfs_header_level(eb) == 0)
1093 return eb;
1094
1095 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1096 if (!tm)
1097 return eb;
1098
1099 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1100 BUG_ON(tm->slot != 0);
1101 eb_rewin = alloc_dummy_extent_buffer(eb->start,
1102 fs_info->tree_root->nodesize);
1103 BUG_ON(!eb_rewin);
1104 btrfs_set_header_bytenr(eb_rewin, eb->start);
1105 btrfs_set_header_backref_rev(eb_rewin,
1106 btrfs_header_backref_rev(eb));
1107 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
1108 } else {
1109 eb_rewin = btrfs_clone_extent_buffer(eb);
1110 BUG_ON(!eb_rewin);
1111 }
1112
1113 extent_buffer_get(eb_rewin);
1114 free_extent_buffer(eb);
1115
1116 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1117
1118 return eb_rewin;
1119}
1120
1121static inline struct extent_buffer *
1122get_old_root(struct btrfs_root *root, u64 time_seq)
1123{
1124 struct tree_mod_elem *tm;
1125 struct extent_buffer *eb;
1126 struct tree_mod_root *old_root;
1127 u64 old_generation;
1128
1129 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1130 if (!tm)
1131 return root->node;
1132
1133 old_root = &tm->old_root;
1134 old_generation = tm->generation;
1135
1136 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq);
1137 /*
1138 * there was an item in the log when __tree_mod_log_oldest_root
1139 * returned. this one must not go away, because the time_seq passed to
1140 * us must be blocking its removal.
1141 */
1142 BUG_ON(!tm);
1143
1144 if (old_root->logical == root->node->start) {
1145 /* there are logged operations for the current root */
1146 eb = btrfs_clone_extent_buffer(root->node);
1147 } else {
1148 /* there's a root replace operation for the current root */
1149 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
1150 root->nodesize);
1151 btrfs_set_header_bytenr(eb, eb->start);
1152 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1153 btrfs_set_header_owner(eb, root->root_key.objectid);
1154 }
1155 if (!eb)
1156 return NULL;
1157 btrfs_set_header_level(eb, old_root->level);
1158 btrfs_set_header_generation(eb, old_generation);
1159 __tree_mod_log_rewind(eb, time_seq, tm);
1160
1161 return eb;
1162}
1163
963static inline int should_cow_block(struct btrfs_trans_handle *trans, 1164static inline int should_cow_block(struct btrfs_trans_handle *trans,
964 struct btrfs_root *root, 1165 struct btrfs_root *root,
965 struct extent_buffer *buf) 1166 struct extent_buffer *buf)
@@ -1930,7 +2131,7 @@ static int
1930read_block_for_search(struct btrfs_trans_handle *trans, 2131read_block_for_search(struct btrfs_trans_handle *trans,
1931 struct btrfs_root *root, struct btrfs_path *p, 2132 struct btrfs_root *root, struct btrfs_path *p,
1932 struct extent_buffer **eb_ret, int level, int slot, 2133 struct extent_buffer **eb_ret, int level, int slot,
1933 struct btrfs_key *key) 2134 struct btrfs_key *key, u64 time_seq)
1934{ 2135{
1935 u64 blocknr; 2136 u64 blocknr;
1936 u64 gen; 2137 u64 gen;
@@ -2284,7 +2485,7 @@ cow_done:
2284 } 2485 }
2285 2486
2286 err = read_block_for_search(trans, root, p, 2487 err = read_block_for_search(trans, root, p,
2287 &b, level, slot, key); 2488 &b, level, slot, key, 0);
2288 if (err == -EAGAIN) 2489 if (err == -EAGAIN)
2289 goto again; 2490 goto again;
2290 if (err) { 2491 if (err) {
@@ -2356,6 +2557,116 @@ done:
2356} 2557}
2357 2558
2358/* 2559/*
2560 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2561 * current state of the tree together with the operations recorded in the tree
2562 * modification log to search for the key in a previous version of this tree, as
2563 * denoted by the time_seq parameter.
2564 *
2565 * Naturally, there is no support for insert, delete or cow operations.
2566 *
2567 * The resulting path and return value will be set up as if we called
2568 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2569 */
2570int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2571 struct btrfs_path *p, u64 time_seq)
2572{
2573 struct extent_buffer *b;
2574 int slot;
2575 int ret;
2576 int err;
2577 int level;
2578 int lowest_unlock = 1;
2579 u8 lowest_level = 0;
2580
2581 lowest_level = p->lowest_level;
2582 WARN_ON(p->nodes[0] != NULL);
2583
2584 if (p->search_commit_root) {
2585 BUG_ON(time_seq);
2586 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2587 }
2588
2589again:
2590 level = 0;
2591 b = get_old_root(root, time_seq);
2592 extent_buffer_get(b);
2593 level = btrfs_header_level(b);
2594 btrfs_tree_read_lock(b);
2595 p->locks[level] = BTRFS_READ_LOCK;
2596
2597 while (b) {
2598 level = btrfs_header_level(b);
2599 p->nodes[level] = b;
2600 btrfs_clear_path_blocking(p, NULL, 0);
2601
2602 /*
2603 * we have a lock on b and as long as we aren't changing
2604 * the tree, there is no way to for the items in b to change.
2605 * It is safe to drop the lock on our parent before we
2606 * go through the expensive btree search on b.
2607 */
2608 btrfs_unlock_up_safe(p, level + 1);
2609
2610 ret = bin_search(b, key, level, &slot);
2611
2612 if (level != 0) {
2613 int dec = 0;
2614 if (ret && slot > 0) {
2615 dec = 1;
2616 slot -= 1;
2617 }
2618 p->slots[level] = slot;
2619 unlock_up(p, level, lowest_unlock, 0, NULL);
2620
2621 if (level == lowest_level) {
2622 if (dec)
2623 p->slots[level]++;
2624 goto done;
2625 }
2626
2627 err = read_block_for_search(NULL, root, p, &b, level,
2628 slot, key, time_seq);
2629 if (err == -EAGAIN)
2630 goto again;
2631 if (err) {
2632 ret = err;
2633 goto done;
2634 }
2635
2636 level = btrfs_header_level(b);
2637 err = btrfs_try_tree_read_lock(b);
2638 if (!err) {
2639 btrfs_set_path_blocking(p);
2640 btrfs_tree_read_lock(b);
2641 btrfs_clear_path_blocking(p, b,
2642 BTRFS_READ_LOCK);
2643 }
2644 p->locks[level] = BTRFS_READ_LOCK;
2645 p->nodes[level] = b;
2646 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2647 if (b != p->nodes[level]) {
2648 btrfs_tree_unlock_rw(p->nodes[level],
2649 p->locks[level]);
2650 p->locks[level] = 0;
2651 p->nodes[level] = b;
2652 }
2653 } else {
2654 p->slots[level] = slot;
2655 unlock_up(p, level, lowest_unlock, 0, NULL);
2656 goto done;
2657 }
2658 }
2659 ret = 1;
2660done:
2661 if (!p->leave_spinning)
2662 btrfs_set_path_blocking(p);
2663 if (ret < 0)
2664 btrfs_release_path(p);
2665
2666 return ret;
2667}
2668
2669/*
2359 * adjust the pointers going up the tree, starting at level 2670 * adjust the pointers going up the tree, starting at level
2360 * making sure the right key of each node is points to 'key'. 2671 * making sure the right key of each node is points to 'key'.
2361 * This is used after shifting pointers to the left, so it stops 2672 * This is used after shifting pointers to the left, so it stops
@@ -4735,7 +5046,7 @@ again:
4735 next = c; 5046 next = c;
4736 next_rw_lock = path->locks[level]; 5047 next_rw_lock = path->locks[level];
4737 ret = read_block_for_search(NULL, root, path, &next, level, 5048 ret = read_block_for_search(NULL, root, path, &next, level,
4738 slot, &key); 5049 slot, &key, 0);
4739 if (ret == -EAGAIN) 5050 if (ret == -EAGAIN)
4740 goto again; 5051 goto again;
4741 5052
@@ -4772,7 +5083,7 @@ again:
4772 break; 5083 break;
4773 5084
4774 ret = read_block_for_search(NULL, root, path, &next, level, 5085 ret = read_block_for_search(NULL, root, path, &next, level,
4775 0, &key); 5086 0, &key, 0);
4776 if (ret == -EAGAIN) 5087 if (ret == -EAGAIN)
4777 goto again; 5088 goto again;
4778 5089
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e53bfb9b3915..6ba21b12cec4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2668,6 +2668,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
2668int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 2668int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2669 *root, struct btrfs_key *key, struct btrfs_path *p, int 2669 *root, struct btrfs_key *key, struct btrfs_path *p, int
2670 ins_len, int cow); 2670 ins_len, int cow);
2671int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2672 struct btrfs_path *p, u64 time_seq);
2671int btrfs_realloc_node(struct btrfs_trans_handle *trans, 2673int btrfs_realloc_node(struct btrfs_trans_handle *trans,
2672 struct btrfs_root *root, struct extent_buffer *parent, 2674 struct btrfs_root *root, struct extent_buffer *parent,
2673 int start_slot, int cache_only, u64 *last_ret, 2675 int start_slot, int cache_only, u64 *last_ret,