aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-08-07 16:15:09 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-08-07 16:15:09 -0400
commit6702ed490ca0bb44e17131818a5a18b773957c5a (patch)
treef6bc4b77e58506a68f1ab539ca6c96854f39f92d /fs/btrfs/ctree.c
parent3c69faecb8d83cb2ef085a98b196a3fecea67725 (diff)
Btrfs: Add run time btree defrag, and an ioctl to force btree defrag
This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c195
1 files changed, 153 insertions, 42 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7a08491e208..c7e47e77723 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
65 memset(p, 0, sizeof(*p)); 65 memset(p, 0, sizeof(*p));
66} 66}
67 67
68static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root 68static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
69 *root, struct buffer_head *buf, struct buffer_head 69 *root, struct buffer_head *buf, struct buffer_head
70 *parent, int parent_slot, struct buffer_head 70 *parent, int parent_slot, struct buffer_head
71 **cow_ret) 71 **cow_ret, u64 search_start, u64 empty_size)
72{ 72{
73 struct buffer_head *cow; 73 struct buffer_head *cow;
74 struct btrfs_node *cow_node; 74 struct btrfs_node *cow_node;
75 int ret; 75 int ret = 0;
76 int different_trans = 0;
76 77
78 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
77 WARN_ON(!buffer_uptodate(buf)); 79 WARN_ON(!buffer_uptodate(buf));
78 if (trans->transaction != root->fs_info->running_transaction) { 80 cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
79 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
80 root->fs_info->running_transaction->transid);
81 WARN_ON(1);
82 }
83 if (trans->transid != root->fs_info->generation) {
84 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
85 root->fs_info->generation);
86 WARN_ON(1);
87 }
88 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
89 trans->transid) {
90 *cow_ret = buf;
91 return 0;
92 }
93 cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
94 if (IS_ERR(cow)) 81 if (IS_ERR(cow))
95 return PTR_ERR(cow); 82 return PTR_ERR(cow);
83
96 cow_node = btrfs_buffer_node(cow); 84 cow_node = btrfs_buffer_node(cow);
97 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) 85 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
98 WARN_ON(1); 86 WARN_ON(1);
87
99 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); 88 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
100 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); 89 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
101 btrfs_set_header_generation(&cow_node->header, trans->transid); 90 btrfs_set_header_generation(&cow_node->header, trans->transid);
102 btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); 91 btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
103 ret = btrfs_inc_ref(trans, root, buf); 92
104 if (ret) 93 WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
105 return ret; 94 trans->transid);
95 if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
96 trans->transid) {
97 different_trans = 1;
98 ret = btrfs_inc_ref(trans, root, buf);
99 if (ret)
100 return ret;
101 } else {
102 WARN_ON(!root->ref_cows);
103 clean_tree_block(trans, root, buf);
104 }
105
106 if (buf == root->node) { 106 if (buf == root->node) {
107 root->node = cow; 107 root->node = cow;
108 get_bh(cow); 108 get_bh(cow);
@@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
114 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, 114 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
115 bh_blocknr(cow)); 115 bh_blocknr(cow));
116 btrfs_mark_buffer_dirty(parent); 116 btrfs_mark_buffer_dirty(parent);
117 WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
118 trans->transid);
117 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); 119 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
118 } 120 }
119 btrfs_block_release(root, buf); 121 btrfs_block_release(root, buf);
@@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
122 return 0; 124 return 0;
123} 125}
124 126
127int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
128 *root, struct buffer_head *buf, struct buffer_head
129 *parent, int parent_slot, struct buffer_head
130 **cow_ret)
131{
132 u64 search_start;
133 if (trans->transaction != root->fs_info->running_transaction) {
134 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
135 root->fs_info->running_transaction->transid);
136 WARN_ON(1);
137 }
138 if (trans->transid != root->fs_info->generation) {
139 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
140 root->fs_info->generation);
141 WARN_ON(1);
142 }
143 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
144 trans->transid) {
145 *cow_ret = buf;
146 return 0;
147 }
148
149 search_start = bh_blocknr(buf) & ~((u64)65535);
150 return __btrfs_cow_block(trans, root, buf, parent,
151 parent_slot, cow_ret, search_start, 0);
152}
153
154static int close_blocks(u64 blocknr, u64 other)
155{
156 if (blocknr < other && other - blocknr < 8)
157 return 1;
158 if (blocknr > other && blocknr - other < 8)
159 return 1;
160 return 0;
161}
162
163int btrfs_realloc_node(struct btrfs_trans_handle *trans,
164 struct btrfs_root *root, struct buffer_head *parent,
165 int cache_only)
166{
167 struct btrfs_node *parent_node;
168 struct buffer_head *cur_bh;
169 struct buffer_head *tmp_bh;
170 u64 blocknr;
171 u64 search_start = 0;
172 u64 other;
173 u32 parent_nritems;
174 int start_slot;
175 int end_slot;
176 int i;
177 int err = 0;
178
179 if (trans->transaction != root->fs_info->running_transaction) {
180 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
181 root->fs_info->running_transaction->transid);
182 WARN_ON(1);
183 }
184 if (trans->transid != root->fs_info->generation) {
185 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
186 root->fs_info->generation);
187 WARN_ON(1);
188 }
189 parent_node = btrfs_buffer_node(parent);
190 parent_nritems = btrfs_header_nritems(&parent_node->header);
191
192 start_slot = 0;
193 end_slot = parent_nritems;
194
195 if (parent_nritems == 1)
196 return 0;
197
198 for (i = start_slot; i < end_slot; i++) {
199 int close = 1;
200 blocknr = btrfs_node_blockptr(parent_node, i);
201 if (i > 0) {
202 other = btrfs_node_blockptr(parent_node, i - 1);
203 close = close_blocks(blocknr, other);
204 }
205 if (close && i < end_slot - 1) {
206 other = btrfs_node_blockptr(parent_node, i + 1);
207 close = close_blocks(blocknr, other);
208 }
209 if (close)
210 continue;
211
212 cur_bh = btrfs_find_tree_block(root, blocknr);
213 if (!cur_bh || !buffer_uptodate(cur_bh) ||
214 buffer_locked(cur_bh)) {
215 if (cache_only) {
216 brelse(cur_bh);
217 continue;
218 }
219 brelse(cur_bh);
220 cur_bh = read_tree_block(root, blocknr);
221 }
222 if (search_start == 0) {
223 search_start = bh_blocknr(cur_bh) & ~((u64)65535);
224 }
225 err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
226 &tmp_bh, search_start,
227 min(8, end_slot - i));
228 if (err)
229 break;
230 search_start = bh_blocknr(tmp_bh);
231 brelse(tmp_bh);
232 }
233 return err;
234}
235
125/* 236/*
126 * The leaf data grows from end-to-front in the node. 237 * The leaf data grows from end-to-front in the node.
127 * this returns the address of the start of the last item, 238 * this returns the address of the start of the last item,
@@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
221 332
222 parent_slot = path->slots[level + 1]; 333 parent_slot = path->slots[level + 1];
223 parent_key = &parent->ptrs[parent_slot].key; 334 parent_key = &parent->ptrs[parent_slot].key;
335
224 BUG_ON(memcmp(parent_key, &leaf->items[0].key, 336 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
225 sizeof(struct btrfs_disk_key))); 337 sizeof(struct btrfs_disk_key)));
226 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 338 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
@@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
643 * readahead one full node of leaves 755 * readahead one full node of leaves
644 */ 756 */
645static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, 757static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
646 int slot) 758 int level, int slot)
647{ 759{
648 struct btrfs_node *node; 760 struct btrfs_node *node;
649 int i; 761 int i;
@@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
659 unsigned long gang[8]; 771 unsigned long gang[8];
660 struct buffer_head *bh; 772 struct buffer_head *bh;
661 773
662 if (!path->nodes[1]) 774 if (level == 0)
775 return;
776
777 if (!path->nodes[level])
663 return; 778 return;
664 779
665 node = btrfs_buffer_node(path->nodes[1]); 780 node = btrfs_buffer_node(path->nodes[level]);
666 search = btrfs_node_blockptr(node, slot); 781 search = btrfs_node_blockptr(node, slot);
667 bh = btrfs_find_tree_block(root, search); 782 bh = btrfs_find_tree_block(root, search);
668 if (bh) { 783 if (bh) {
@@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
690 for (i = 0; i < ret; i++) { 805 for (i = 0; i < ret; i++) {
691 blocknr = gang[i]; 806 blocknr = gang[i];
692 clear_radix_bit(&found, blocknr); 807 clear_radix_bit(&found, blocknr);
693 if (nread > 64) 808 if (nread > 32)
694 continue; 809 continue;
695 if (direction > 0 && cluster_start <= blocknr && 810 if (direction > 0 && cluster_start <= blocknr &&
696 cluster_start + 8 > blocknr) { 811 cluster_start + 8 > blocknr) {
@@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
726 struct buffer_head *b; 841 struct buffer_head *b;
727 struct buffer_head *cow_buf; 842 struct buffer_head *cow_buf;
728 struct btrfs_node *c; 843 struct btrfs_node *c;
729 struct btrfs_root_item *root_item = &root->root_item;
730 u64 blocknr; 844 u64 blocknr;
731 int slot; 845 int slot;
732 int ret; 846 int ret;
@@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
734 int should_reada = p->reada; 848 int should_reada = p->reada;
735 u8 lowest_level = 0; 849 u8 lowest_level = 0;
736 850
737 if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { 851 lowest_level = p->lowest_level;
738 lowest_level = root_item->drop_level; 852 WARN_ON(lowest_level && ins_len);
739 WARN_ON(ins_len || cow);
740 }
741
742 WARN_ON(p->nodes[0] != NULL); 853 WARN_ON(p->nodes[0] != NULL);
743 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 854 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
744again: 855again:
@@ -798,8 +909,8 @@ again:
798 if (level == lowest_level) 909 if (level == lowest_level)
799 break; 910 break;
800 blocknr = btrfs_node_blockptr(c, slot); 911 blocknr = btrfs_node_blockptr(c, slot);
801 if (level == 1 && should_reada) 912 if (should_reada)
802 reada_for_search(root, p, slot); 913 reada_for_search(root, p, level, slot);
803 b = read_tree_block(root, btrfs_node_blockptr(c, slot)); 914 b = read_tree_block(root, btrfs_node_blockptr(c, slot));
804 915
805 } else { 916 } else {
@@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
960 BUG_ON(path->nodes[level]); 1071 BUG_ON(path->nodes[level]);
961 BUG_ON(path->nodes[level-1] != root->node); 1072 BUG_ON(path->nodes[level-1] != root->node);
962 1073
963 t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); 1074 t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
964 if (IS_ERR(t)) 1075 if (IS_ERR(t))
965 return PTR_ERR(t); 1076 return PTR_ERR(t);
966 c = btrfs_buffer_node(t); 1077 c = btrfs_buffer_node(t);
@@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1070 } 1181 }
1071 1182
1072 c_nritems = btrfs_header_nritems(&c->header); 1183 c_nritems = btrfs_header_nritems(&c->header);
1073 split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); 1184 split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
1074 if (IS_ERR(split_buffer)) 1185 if (IS_ERR(split_buffer))
1075 return PTR_ERR(split_buffer); 1186 return PTR_ERR(split_buffer);
1076 1187
@@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1461 nritems = btrfs_header_nritems(&l->header); 1572 nritems = btrfs_header_nritems(&l->header);
1462 mid = (nritems + 1)/ 2; 1573 mid = (nritems + 1)/ 2;
1463 1574
1464 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); 1575 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
1465 if (IS_ERR(right_buffer)) 1576 if (IS_ERR(right_buffer))
1466 return PTR_ERR(right_buffer); 1577 return PTR_ERR(right_buffer);
1467 1578
@@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1560 1671
1561 if (!double_split) 1672 if (!double_split)
1562 return ret; 1673 return ret;
1563 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); 1674 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
1564 if (IS_ERR(right_buffer)) 1675 if (IS_ERR(right_buffer))
1565 return PTR_ERR(right_buffer); 1676 return PTR_ERR(right_buffer);
1566 1677
@@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1988 blocknr = btrfs_node_blockptr(c_node, slot); 2099 blocknr = btrfs_node_blockptr(c_node, slot);
1989 if (next) 2100 if (next)
1990 btrfs_block_release(root, next); 2101 btrfs_block_release(root, next);
1991 if (level == 1 && path->reada) 2102 if (path->reada)
1992 reada_for_search(root, path, slot); 2103 reada_for_search(root, path, level, slot);
1993 next = read_tree_block(root, blocknr); 2104 next = read_tree_block(root, blocknr);
1994 break; 2105 break;
1995 } 2106 }
@@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2002 path->slots[level] = 0; 2113 path->slots[level] = 0;
2003 if (!level) 2114 if (!level)
2004 break; 2115 break;
2005 if (level == 1 && path->reada) 2116 if (path->reada)
2006 reada_for_search(root, path, slot); 2117 reada_for_search(root, path, level, slot);
2007 next = read_tree_block(root, 2118 next = read_tree_block(root,
2008 btrfs_node_blockptr(btrfs_buffer_node(next), 0)); 2119 btrfs_node_blockptr(btrfs_buffer_node(next), 0));
2009 } 2120 }