aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-06-25 16:01:31 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commit3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 (patch)
treedf9421e7b1d0c06d5efb8659f4317438d3d511d7 /fs/btrfs/ctree.c
parent1b1e2135dc1e4efbcf25ac9ac9979316d4e1193e (diff)
Btrfs: Online btree defragmentation fixes
The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c170
1 files changed, 161 insertions, 9 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7f4cc2b88d09..0cb80f32a9c7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -63,10 +63,9 @@ void btrfs_free_path(struct btrfs_path *p)
63void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 63void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
64{ 64{
65 int i; 65 int i;
66 int keep = p->keep_locks;
67 int skip = p->skip_locking;
68 66
69 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 67 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
68 p->slots[i] = 0;
70 if (!p->nodes[i]) 69 if (!p->nodes[i])
71 continue; 70 continue;
72 if (p->locks[i]) { 71 if (p->locks[i]) {
@@ -74,10 +73,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
74 p->locks[i] = 0; 73 p->locks[i] = 0;
75 } 74 }
76 free_extent_buffer(p->nodes[i]); 75 free_extent_buffer(p->nodes[i]);
76 p->nodes[i] = NULL;
77 } 77 }
78 memset(p, 0, sizeof(*p));
79 p->keep_locks = keep;
80 p->skip_locking = skip;
81} 78}
82 79
83struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 80struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
@@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
463 search_start = cur->start; 460 search_start = cur->start;
464 last_block = cur->start; 461 last_block = cur->start;
465 *last_ret = search_start; 462 *last_ret = search_start;
466 if (parent_level == 1)
467 btrfs_clear_buffer_defrag(cur);
468 btrfs_tree_unlock(cur); 463 btrfs_tree_unlock(cur);
469 free_extent_buffer(cur); 464 free_extent_buffer(cur);
470 } 465 }
@@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2969 return 1; 2964 return 1;
2970} 2965}
2971 2966
2967/*
2968 * A helper function to walk down the tree starting at min_key, and looking
2969 * for nodes or leaves that are either in cache or have a minimum
2970 * transaction id. This is used by the btree defrag code, but could
2971 * also be used to search for blocks that have changed since a given
2972 * transaction id.
2973 *
2974 * This does not cow, but it does stuff the starting key it finds back
2975 * into min_key, so you can call btrfs_search_slot with cow=1 on the
2976 * key and get a writable path.
2977 *
2978 * This does lock as it descends, and path->keep_locks should be set
2979 * to 1 by the caller.
2980 *
2981 * This honors path->lowest_level to prevent descent past a given level
2982 * of the tree.
2983 *
2984 * returns zero if something useful was found, < 0 on error and 1 if there
2985 * was nothing in the tree that matched the search criteria.
2986 */
2987int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
2988 struct btrfs_path *path, int cache_only,
2989 u64 min_trans)
2990{
2991 struct extent_buffer *cur;
2992 struct btrfs_key found_key;
2993 int slot;
2994 u32 nritems;
2995 int level;
2996 int ret = 1;
2997
2998again:
2999 cur = btrfs_lock_root_node(root);
3000 level = btrfs_header_level(cur);
3001 path->nodes[level] = cur;
3002 path->locks[level] = 1;
3003
3004 if (btrfs_header_generation(cur) < min_trans) {
3005 ret = 1;
3006 goto out;
3007 }
3008 while(1) {
3009 nritems = btrfs_header_nritems(cur);
3010 level = btrfs_header_level(cur);
3011 bin_search(cur, min_key, level, &slot);
3012
3013 /* at level = 0, we're done, setup the path and exit */
3014 if (level == 0) {
3015 ret = 0;
3016 path->slots[level] = slot;
3017 btrfs_item_key_to_cpu(cur, &found_key, slot);
3018 goto out;
3019 }
3020 /*
3021 * check this node pointer against the cache_only and
3022 * min_trans parameters. If it isn't in cache or is too
3023 * old, skip to the next one.
3024 */
3025 while(slot < nritems) {
3026 u64 blockptr;
3027 u64 gen;
3028 struct extent_buffer *tmp;
3029 blockptr = btrfs_node_blockptr(cur, slot);
3030 gen = btrfs_node_ptr_generation(cur, slot);
3031 if (gen < min_trans) {
3032 slot++;
3033 continue;
3034 }
3035 if (!cache_only)
3036 break;
3037
3038 tmp = btrfs_find_tree_block(root, blockptr,
3039 btrfs_level_size(root, level - 1));
3040
3041 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3042 free_extent_buffer(tmp);
3043 break;
3044 }
3045 if (tmp)
3046 free_extent_buffer(tmp);
3047 slot++;
3048 }
3049 /*
3050 * we didn't find a candidate key in this node, walk forward
3051 * and find another one
3052 */
3053 if (slot >= nritems) {
3054 ret = btrfs_find_next_key(root, path, min_key, level,
3055 cache_only, min_trans);
3056 if (ret == 0) {
3057 btrfs_release_path(root, path);
3058 goto again;
3059 } else {
3060 goto out;
3061 }
3062 }
3063 /* save our key for returning back */
3064 btrfs_node_key_to_cpu(cur, &found_key, slot);
3065 path->slots[level] = slot;
3066 if (level == path->lowest_level) {
3067 ret = 0;
3068 unlock_up(path, level, 1);
3069 goto out;
3070 }
3071 cur = read_node_slot(root, cur, slot);
3072
3073 btrfs_tree_lock(cur);
3074 path->locks[level - 1] = 1;
3075 path->nodes[level - 1] = cur;
3076 unlock_up(path, level, 1);
3077 }
3078out:
3079 if (ret == 0)
3080 memcpy(min_key, &found_key, sizeof(found_key));
3081 return ret;
3082}
3083
3084/*
3085 * this is similar to btrfs_next_leaf, but does not try to preserve
3086 * and fixup the path. It looks for and returns the next key in the
3087 * tree based on the current path and the cache_only and min_trans
3088 * parameters.
3089 *
3090 * 0 is returned if another key is found, < 0 if there are any errors
3091 * and 1 is returned if there are no higher keys in the tree
3092 *
3093 * path->keep_locks should be set to 1 on the search made before
3094 * calling this function.
3095 */
2972int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 3096int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
2973 struct btrfs_key *key, int lowest_level) 3097 struct btrfs_key *key, int lowest_level,
3098 int cache_only, u64 min_trans)
2974{ 3099{
2975 int level = lowest_level; 3100 int level = lowest_level;
2976 int slot; 3101 int slot;
@@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
2982 3107
2983 slot = path->slots[level] + 1; 3108 slot = path->slots[level] + 1;
2984 c = path->nodes[level]; 3109 c = path->nodes[level];
3110next:
2985 if (slot >= btrfs_header_nritems(c)) { 3111 if (slot >= btrfs_header_nritems(c)) {
2986 level++; 3112 level++;
2987 if (level == BTRFS_MAX_LEVEL) { 3113 if (level == BTRFS_MAX_LEVEL) {
@@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
2991 } 3117 }
2992 if (level == 0) 3118 if (level == 0)
2993 btrfs_item_key_to_cpu(c, key, slot); 3119 btrfs_item_key_to_cpu(c, key, slot);
2994 else 3120 else {
3121 u64 blockptr = btrfs_node_blockptr(c, slot);
3122 u64 gen = btrfs_node_ptr_generation(c, slot);
3123
3124 if (cache_only) {
3125 struct extent_buffer *cur;
3126 cur = btrfs_find_tree_block(root, blockptr,
3127 btrfs_level_size(root, level - 1));
3128 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
3129 slot++;
3130 if (cur)
3131 free_extent_buffer(cur);
3132 goto next;
3133 }
3134 free_extent_buffer(cur);
3135 }
3136 if (gen < min_trans) {
3137 slot++;
3138 goto next;
3139 }
2995 btrfs_node_key_to_cpu(c, key, slot); 3140 btrfs_node_key_to_cpu(c, key, slot);
3141 }
2996 return 0; 3142 return 0;
2997 } 3143 }
2998 return 1; 3144 return 1;
@@ -3095,6 +3241,12 @@ done:
3095 return 0; 3241 return 0;
3096} 3242}
3097 3243
3244/*
3245 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
3246 * searching until it gets past min_objectid or finds an item of 'type'
3247 *
3248 * returns 0 if something is found, 1 if nothing was found and < 0 on error
3249 */
3098int btrfs_previous_item(struct btrfs_root *root, 3250int btrfs_previous_item(struct btrfs_root *root,
3099 struct btrfs_path *path, u64 min_objectid, 3251 struct btrfs_path *path, u64 min_objectid,
3100 int type) 3252 int type)