diff options
author | Chris Mason <chris.mason@oracle.com> | 2007-08-07 16:15:09 -0400 |
---|---|---|
committer | David Woodhouse <dwmw2@hera.kernel.org> | 2007-08-07 16:15:09 -0400 |
commit | 6702ed490ca0bb44e17131818a5a18b773957c5a (patch) | |
tree | f6bc4b77e58506a68f1ab539ca6c96854f39f92d /fs/btrfs/ctree.c | |
parent | 3c69faecb8d83cb2ef085a98b196a3fecea67725 (diff) |
Btrfs: Add run time btree defrag, and an ioctl to force btree defrag
This adds two types of btree defrag, a run time form that tries to
defrag recently allocated blocks in the btree when they are still in ram,
and an ioctl that forces defrag of all btree blocks.
File data blocks are not defragged yet, but this can make a huge difference
in sequential btree reads.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 195 |
1 files changed, 153 insertions, 42 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7a08491e208e..c7e47e77723f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) | |||
65 | memset(p, 0, sizeof(*p)); | 65 | memset(p, 0, sizeof(*p)); |
66 | } | 66 | } |
67 | 67 | ||
68 | static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root | 68 | static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root |
69 | *root, struct buffer_head *buf, struct buffer_head | 69 | *root, struct buffer_head *buf, struct buffer_head |
70 | *parent, int parent_slot, struct buffer_head | 70 | *parent, int parent_slot, struct buffer_head |
71 | **cow_ret) | 71 | **cow_ret, u64 search_start, u64 empty_size) |
72 | { | 72 | { |
73 | struct buffer_head *cow; | 73 | struct buffer_head *cow; |
74 | struct btrfs_node *cow_node; | 74 | struct btrfs_node *cow_node; |
75 | int ret; | 75 | int ret = 0; |
76 | int different_trans = 0; | ||
76 | 77 | ||
78 | WARN_ON(root->ref_cows && trans->transid != root->last_trans); | ||
77 | WARN_ON(!buffer_uptodate(buf)); | 79 | WARN_ON(!buffer_uptodate(buf)); |
78 | if (trans->transaction != root->fs_info->running_transaction) { | 80 | cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); |
79 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
80 | root->fs_info->running_transaction->transid); | ||
81 | WARN_ON(1); | ||
82 | } | ||
83 | if (trans->transid != root->fs_info->generation) { | ||
84 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
85 | root->fs_info->generation); | ||
86 | WARN_ON(1); | ||
87 | } | ||
88 | if (btrfs_header_generation(btrfs_buffer_header(buf)) == | ||
89 | trans->transid) { | ||
90 | *cow_ret = buf; | ||
91 | return 0; | ||
92 | } | ||
93 | cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); | ||
94 | if (IS_ERR(cow)) | 81 | if (IS_ERR(cow)) |
95 | return PTR_ERR(cow); | 82 | return PTR_ERR(cow); |
83 | |||
96 | cow_node = btrfs_buffer_node(cow); | 84 | cow_node = btrfs_buffer_node(cow); |
97 | if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) | 85 | if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) |
98 | WARN_ON(1); | 86 | WARN_ON(1); |
87 | |||
99 | memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); | 88 | memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); |
100 | btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); | 89 | btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); |
101 | btrfs_set_header_generation(&cow_node->header, trans->transid); | 90 | btrfs_set_header_generation(&cow_node->header, trans->transid); |
102 | btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); | 91 | btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); |
103 | ret = btrfs_inc_ref(trans, root, buf); | 92 | |
104 | if (ret) | 93 | WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > |
105 | return ret; | 94 | trans->transid); |
95 | if (btrfs_header_generation(btrfs_buffer_header(buf)) != | ||
96 | trans->transid) { | ||
97 | different_trans = 1; | ||
98 | ret = btrfs_inc_ref(trans, root, buf); | ||
99 | if (ret) | ||
100 | return ret; | ||
101 | } else { | ||
102 | WARN_ON(!root->ref_cows); | ||
103 | clean_tree_block(trans, root, buf); | ||
104 | } | ||
105 | |||
106 | if (buf == root->node) { | 106 | if (buf == root->node) { |
107 | root->node = cow; | 107 | root->node = cow; |
108 | get_bh(cow); | 108 | get_bh(cow); |
@@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root | |||
114 | btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, | 114 | btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, |
115 | bh_blocknr(cow)); | 115 | bh_blocknr(cow)); |
116 | btrfs_mark_buffer_dirty(parent); | 116 | btrfs_mark_buffer_dirty(parent); |
117 | WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != | ||
118 | trans->transid); | ||
117 | btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); | 119 | btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); |
118 | } | 120 | } |
119 | btrfs_block_release(root, buf); | 121 | btrfs_block_release(root, buf); |
@@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root | |||
122 | return 0; | 124 | return 0; |
123 | } | 125 | } |
124 | 126 | ||
127 | int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root | ||
128 | *root, struct buffer_head *buf, struct buffer_head | ||
129 | *parent, int parent_slot, struct buffer_head | ||
130 | **cow_ret) | ||
131 | { | ||
132 | u64 search_start; | ||
133 | if (trans->transaction != root->fs_info->running_transaction) { | ||
134 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
135 | root->fs_info->running_transaction->transid); | ||
136 | WARN_ON(1); | ||
137 | } | ||
138 | if (trans->transid != root->fs_info->generation) { | ||
139 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
140 | root->fs_info->generation); | ||
141 | WARN_ON(1); | ||
142 | } | ||
143 | if (btrfs_header_generation(btrfs_buffer_header(buf)) == | ||
144 | trans->transid) { | ||
145 | *cow_ret = buf; | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | search_start = bh_blocknr(buf) & ~((u64)65535); | ||
150 | return __btrfs_cow_block(trans, root, buf, parent, | ||
151 | parent_slot, cow_ret, search_start, 0); | ||
152 | } | ||
153 | |||
154 | static int close_blocks(u64 blocknr, u64 other) | ||
155 | { | ||
156 | if (blocknr < other && other - blocknr < 8) | ||
157 | return 1; | ||
158 | if (blocknr > other && blocknr - other < 8) | ||
159 | return 1; | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | ||
164 | struct btrfs_root *root, struct buffer_head *parent, | ||
165 | int cache_only) | ||
166 | { | ||
167 | struct btrfs_node *parent_node; | ||
168 | struct buffer_head *cur_bh; | ||
169 | struct buffer_head *tmp_bh; | ||
170 | u64 blocknr; | ||
171 | u64 search_start = 0; | ||
172 | u64 other; | ||
173 | u32 parent_nritems; | ||
174 | int start_slot; | ||
175 | int end_slot; | ||
176 | int i; | ||
177 | int err = 0; | ||
178 | |||
179 | if (trans->transaction != root->fs_info->running_transaction) { | ||
180 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
181 | root->fs_info->running_transaction->transid); | ||
182 | WARN_ON(1); | ||
183 | } | ||
184 | if (trans->transid != root->fs_info->generation) { | ||
185 | printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, | ||
186 | root->fs_info->generation); | ||
187 | WARN_ON(1); | ||
188 | } | ||
189 | parent_node = btrfs_buffer_node(parent); | ||
190 | parent_nritems = btrfs_header_nritems(&parent_node->header); | ||
191 | |||
192 | start_slot = 0; | ||
193 | end_slot = parent_nritems; | ||
194 | |||
195 | if (parent_nritems == 1) | ||
196 | return 0; | ||
197 | |||
198 | for (i = start_slot; i < end_slot; i++) { | ||
199 | int close = 1; | ||
200 | blocknr = btrfs_node_blockptr(parent_node, i); | ||
201 | if (i > 0) { | ||
202 | other = btrfs_node_blockptr(parent_node, i - 1); | ||
203 | close = close_blocks(blocknr, other); | ||
204 | } | ||
205 | if (close && i < end_slot - 1) { | ||
206 | other = btrfs_node_blockptr(parent_node, i + 1); | ||
207 | close = close_blocks(blocknr, other); | ||
208 | } | ||
209 | if (close) | ||
210 | continue; | ||
211 | |||
212 | cur_bh = btrfs_find_tree_block(root, blocknr); | ||
213 | if (!cur_bh || !buffer_uptodate(cur_bh) || | ||
214 | buffer_locked(cur_bh)) { | ||
215 | if (cache_only) { | ||
216 | brelse(cur_bh); | ||
217 | continue; | ||
218 | } | ||
219 | brelse(cur_bh); | ||
220 | cur_bh = read_tree_block(root, blocknr); | ||
221 | } | ||
222 | if (search_start == 0) { | ||
223 | search_start = bh_blocknr(cur_bh) & ~((u64)65535); | ||
224 | } | ||
225 | err = __btrfs_cow_block(trans, root, cur_bh, parent, i, | ||
226 | &tmp_bh, search_start, | ||
227 | min(8, end_slot - i)); | ||
228 | if (err) | ||
229 | break; | ||
230 | search_start = bh_blocknr(tmp_bh); | ||
231 | brelse(tmp_bh); | ||
232 | } | ||
233 | return err; | ||
234 | } | ||
235 | |||
125 | /* | 236 | /* |
126 | * The leaf data grows from end-to-front in the node. | 237 | * The leaf data grows from end-to-front in the node. |
127 | * this returns the address of the start of the last item, | 238 | * this returns the address of the start of the last item, |
@@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, | |||
221 | 332 | ||
222 | parent_slot = path->slots[level + 1]; | 333 | parent_slot = path->slots[level + 1]; |
223 | parent_key = &parent->ptrs[parent_slot].key; | 334 | parent_key = &parent->ptrs[parent_slot].key; |
335 | |||
224 | BUG_ON(memcmp(parent_key, &leaf->items[0].key, | 336 | BUG_ON(memcmp(parent_key, &leaf->items[0].key, |
225 | sizeof(struct btrfs_disk_key))); | 337 | sizeof(struct btrfs_disk_key))); |
226 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != | 338 | BUG_ON(btrfs_node_blockptr(parent, parent_slot) != |
@@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
643 | * readahead one full node of leaves | 755 | * readahead one full node of leaves |
644 | */ | 756 | */ |
645 | static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, | 757 | static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, |
646 | int slot) | 758 | int level, int slot) |
647 | { | 759 | { |
648 | struct btrfs_node *node; | 760 | struct btrfs_node *node; |
649 | int i; | 761 | int i; |
@@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, | |||
659 | unsigned long gang[8]; | 771 | unsigned long gang[8]; |
660 | struct buffer_head *bh; | 772 | struct buffer_head *bh; |
661 | 773 | ||
662 | if (!path->nodes[1]) | 774 | if (level == 0) |
775 | return; | ||
776 | |||
777 | if (!path->nodes[level]) | ||
663 | return; | 778 | return; |
664 | 779 | ||
665 | node = btrfs_buffer_node(path->nodes[1]); | 780 | node = btrfs_buffer_node(path->nodes[level]); |
666 | search = btrfs_node_blockptr(node, slot); | 781 | search = btrfs_node_blockptr(node, slot); |
667 | bh = btrfs_find_tree_block(root, search); | 782 | bh = btrfs_find_tree_block(root, search); |
668 | if (bh) { | 783 | if (bh) { |
@@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, | |||
690 | for (i = 0; i < ret; i++) { | 805 | for (i = 0; i < ret; i++) { |
691 | blocknr = gang[i]; | 806 | blocknr = gang[i]; |
692 | clear_radix_bit(&found, blocknr); | 807 | clear_radix_bit(&found, blocknr); |
693 | if (nread > 64) | 808 | if (nread > 32) |
694 | continue; | 809 | continue; |
695 | if (direction > 0 && cluster_start <= blocknr && | 810 | if (direction > 0 && cluster_start <= blocknr && |
696 | cluster_start + 8 > blocknr) { | 811 | cluster_start + 8 > blocknr) { |
@@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
726 | struct buffer_head *b; | 841 | struct buffer_head *b; |
727 | struct buffer_head *cow_buf; | 842 | struct buffer_head *cow_buf; |
728 | struct btrfs_node *c; | 843 | struct btrfs_node *c; |
729 | struct btrfs_root_item *root_item = &root->root_item; | ||
730 | u64 blocknr; | 844 | u64 blocknr; |
731 | int slot; | 845 | int slot; |
732 | int ret; | 846 | int ret; |
@@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
734 | int should_reada = p->reada; | 848 | int should_reada = p->reada; |
735 | u8 lowest_level = 0; | 849 | u8 lowest_level = 0; |
736 | 850 | ||
737 | if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { | 851 | lowest_level = p->lowest_level; |
738 | lowest_level = root_item->drop_level; | 852 | WARN_ON(lowest_level && ins_len); |
739 | WARN_ON(ins_len || cow); | ||
740 | } | ||
741 | |||
742 | WARN_ON(p->nodes[0] != NULL); | 853 | WARN_ON(p->nodes[0] != NULL); |
743 | WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); | 854 | WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); |
744 | again: | 855 | again: |
@@ -798,8 +909,8 @@ again: | |||
798 | if (level == lowest_level) | 909 | if (level == lowest_level) |
799 | break; | 910 | break; |
800 | blocknr = btrfs_node_blockptr(c, slot); | 911 | blocknr = btrfs_node_blockptr(c, slot); |
801 | if (level == 1 && should_reada) | 912 | if (should_reada) |
802 | reada_for_search(root, p, slot); | 913 | reada_for_search(root, p, level, slot); |
803 | b = read_tree_block(root, btrfs_node_blockptr(c, slot)); | 914 | b = read_tree_block(root, btrfs_node_blockptr(c, slot)); |
804 | 915 | ||
805 | } else { | 916 | } else { |
@@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
960 | BUG_ON(path->nodes[level]); | 1071 | BUG_ON(path->nodes[level]); |
961 | BUG_ON(path->nodes[level-1] != root->node); | 1072 | BUG_ON(path->nodes[level-1] != root->node); |
962 | 1073 | ||
963 | t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); | 1074 | t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); |
964 | if (IS_ERR(t)) | 1075 | if (IS_ERR(t)) |
965 | return PTR_ERR(t); | 1076 | return PTR_ERR(t); |
966 | c = btrfs_buffer_node(t); | 1077 | c = btrfs_buffer_node(t); |
@@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1070 | } | 1181 | } |
1071 | 1182 | ||
1072 | c_nritems = btrfs_header_nritems(&c->header); | 1183 | c_nritems = btrfs_header_nritems(&c->header); |
1073 | split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); | 1184 | split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); |
1074 | if (IS_ERR(split_buffer)) | 1185 | if (IS_ERR(split_buffer)) |
1075 | return PTR_ERR(split_buffer); | 1186 | return PTR_ERR(split_buffer); |
1076 | 1187 | ||
@@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1461 | nritems = btrfs_header_nritems(&l->header); | 1572 | nritems = btrfs_header_nritems(&l->header); |
1462 | mid = (nritems + 1)/ 2; | 1573 | mid = (nritems + 1)/ 2; |
1463 | 1574 | ||
1464 | right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); | 1575 | right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); |
1465 | if (IS_ERR(right_buffer)) | 1576 | if (IS_ERR(right_buffer)) |
1466 | return PTR_ERR(right_buffer); | 1577 | return PTR_ERR(right_buffer); |
1467 | 1578 | ||
@@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1560 | 1671 | ||
1561 | if (!double_split) | 1672 | if (!double_split) |
1562 | return ret; | 1673 | return ret; |
1563 | right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); | 1674 | right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); |
1564 | if (IS_ERR(right_buffer)) | 1675 | if (IS_ERR(right_buffer)) |
1565 | return PTR_ERR(right_buffer); | 1676 | return PTR_ERR(right_buffer); |
1566 | 1677 | ||
@@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
1988 | blocknr = btrfs_node_blockptr(c_node, slot); | 2099 | blocknr = btrfs_node_blockptr(c_node, slot); |
1989 | if (next) | 2100 | if (next) |
1990 | btrfs_block_release(root, next); | 2101 | btrfs_block_release(root, next); |
1991 | if (level == 1 && path->reada) | 2102 | if (path->reada) |
1992 | reada_for_search(root, path, slot); | 2103 | reada_for_search(root, path, level, slot); |
1993 | next = read_tree_block(root, blocknr); | 2104 | next = read_tree_block(root, blocknr); |
1994 | break; | 2105 | break; |
1995 | } | 2106 | } |
@@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
2002 | path->slots[level] = 0; | 2113 | path->slots[level] = 0; |
2003 | if (!level) | 2114 | if (!level) |
2004 | break; | 2115 | break; |
2005 | if (level == 1 && path->reada) | 2116 | if (path->reada) |
2006 | reada_for_search(root, path, slot); | 2117 | reada_for_search(root, path, level, slot); |
2007 | next = read_tree_block(root, | 2118 | next = read_tree_block(root, |
2008 | btrfs_node_blockptr(btrfs_buffer_node(next), 0)); | 2119 | btrfs_node_blockptr(btrfs_buffer_node(next), 0)); |
2009 | } | 2120 | } |