aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-08-07 16:15:09 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-08-07 16:15:09 -0400
commit6702ed490ca0bb44e17131818a5a18b773957c5a (patch)
treef6bc4b77e58506a68f1ab539ca6c96854f39f92d
parent3c69faecb8d83cb2ef085a98b196a3fecea67725 (diff)
Btrfs: Add run time btree defrag, and an ioctl to force btree defrag
This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c195
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c53
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c33
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/transaction.c105
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-defrag.c222
11 files changed, 553 insertions, 89 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a4e2df66bdbb..9321438c5eef 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
4obj-m := btrfs.o 4obj-m := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \ 6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o bit-radix.o inode.o file.o 7 transaction.o bit-radix.o inode.o file.o tree-defrag.o
8 8
9#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ 9#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
10# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ 10# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7a08491e208e..c7e47e77723f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
65 memset(p, 0, sizeof(*p)); 65 memset(p, 0, sizeof(*p));
66} 66}
67 67
68static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root 68static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
69 *root, struct buffer_head *buf, struct buffer_head 69 *root, struct buffer_head *buf, struct buffer_head
70 *parent, int parent_slot, struct buffer_head 70 *parent, int parent_slot, struct buffer_head
71 **cow_ret) 71 **cow_ret, u64 search_start, u64 empty_size)
72{ 72{
73 struct buffer_head *cow; 73 struct buffer_head *cow;
74 struct btrfs_node *cow_node; 74 struct btrfs_node *cow_node;
75 int ret; 75 int ret = 0;
76 int different_trans = 0;
76 77
78 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
77 WARN_ON(!buffer_uptodate(buf)); 79 WARN_ON(!buffer_uptodate(buf));
78 if (trans->transaction != root->fs_info->running_transaction) { 80 cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
79 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
80 root->fs_info->running_transaction->transid);
81 WARN_ON(1);
82 }
83 if (trans->transid != root->fs_info->generation) {
84 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
85 root->fs_info->generation);
86 WARN_ON(1);
87 }
88 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
89 trans->transid) {
90 *cow_ret = buf;
91 return 0;
92 }
93 cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
94 if (IS_ERR(cow)) 81 if (IS_ERR(cow))
95 return PTR_ERR(cow); 82 return PTR_ERR(cow);
83
96 cow_node = btrfs_buffer_node(cow); 84 cow_node = btrfs_buffer_node(cow);
97 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) 85 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
98 WARN_ON(1); 86 WARN_ON(1);
87
99 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); 88 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
100 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); 89 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
101 btrfs_set_header_generation(&cow_node->header, trans->transid); 90 btrfs_set_header_generation(&cow_node->header, trans->transid);
102 btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); 91 btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
103 ret = btrfs_inc_ref(trans, root, buf); 92
104 if (ret) 93 WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
105 return ret; 94 trans->transid);
95 if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
96 trans->transid) {
97 different_trans = 1;
98 ret = btrfs_inc_ref(trans, root, buf);
99 if (ret)
100 return ret;
101 } else {
102 WARN_ON(!root->ref_cows);
103 clean_tree_block(trans, root, buf);
104 }
105
106 if (buf == root->node) { 106 if (buf == root->node) {
107 root->node = cow; 107 root->node = cow;
108 get_bh(cow); 108 get_bh(cow);
@@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
114 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, 114 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
115 bh_blocknr(cow)); 115 bh_blocknr(cow));
116 btrfs_mark_buffer_dirty(parent); 116 btrfs_mark_buffer_dirty(parent);
117 WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
118 trans->transid);
117 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); 119 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
118 } 120 }
119 btrfs_block_release(root, buf); 121 btrfs_block_release(root, buf);
@@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
122 return 0; 124 return 0;
123} 125}
124 126
127int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
128 *root, struct buffer_head *buf, struct buffer_head
129 *parent, int parent_slot, struct buffer_head
130 **cow_ret)
131{
132 u64 search_start;
133 if (trans->transaction != root->fs_info->running_transaction) {
134 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
135 root->fs_info->running_transaction->transid);
136 WARN_ON(1);
137 }
138 if (trans->transid != root->fs_info->generation) {
139 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
140 root->fs_info->generation);
141 WARN_ON(1);
142 }
143 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
144 trans->transid) {
145 *cow_ret = buf;
146 return 0;
147 }
148
149 search_start = bh_blocknr(buf) & ~((u64)65535);
150 return __btrfs_cow_block(trans, root, buf, parent,
151 parent_slot, cow_ret, search_start, 0);
152}
153
154static int close_blocks(u64 blocknr, u64 other)
155{
156 if (blocknr < other && other - blocknr < 8)
157 return 1;
158 if (blocknr > other && blocknr - other < 8)
159 return 1;
160 return 0;
161}
162
163int btrfs_realloc_node(struct btrfs_trans_handle *trans,
164 struct btrfs_root *root, struct buffer_head *parent,
165 int cache_only)
166{
167 struct btrfs_node *parent_node;
168 struct buffer_head *cur_bh;
169 struct buffer_head *tmp_bh;
170 u64 blocknr;
171 u64 search_start = 0;
172 u64 other;
173 u32 parent_nritems;
174 int start_slot;
175 int end_slot;
176 int i;
177 int err = 0;
178
179 if (trans->transaction != root->fs_info->running_transaction) {
180 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
181 root->fs_info->running_transaction->transid);
182 WARN_ON(1);
183 }
184 if (trans->transid != root->fs_info->generation) {
185 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
186 root->fs_info->generation);
187 WARN_ON(1);
188 }
189 parent_node = btrfs_buffer_node(parent);
190 parent_nritems = btrfs_header_nritems(&parent_node->header);
191
192 start_slot = 0;
193 end_slot = parent_nritems;
194
195 if (parent_nritems == 1)
196 return 0;
197
198 for (i = start_slot; i < end_slot; i++) {
199 int close = 1;
200 blocknr = btrfs_node_blockptr(parent_node, i);
201 if (i > 0) {
202 other = btrfs_node_blockptr(parent_node, i - 1);
203 close = close_blocks(blocknr, other);
204 }
205 if (close && i < end_slot - 1) {
206 other = btrfs_node_blockptr(parent_node, i + 1);
207 close = close_blocks(blocknr, other);
208 }
209 if (close)
210 continue;
211
212 cur_bh = btrfs_find_tree_block(root, blocknr);
213 if (!cur_bh || !buffer_uptodate(cur_bh) ||
214 buffer_locked(cur_bh)) {
215 if (cache_only) {
216 brelse(cur_bh);
217 continue;
218 }
219 brelse(cur_bh);
220 cur_bh = read_tree_block(root, blocknr);
221 }
222 if (search_start == 0) {
223 search_start = bh_blocknr(cur_bh) & ~((u64)65535);
224 }
225 err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
226 &tmp_bh, search_start,
227 min(8, end_slot - i));
228 if (err)
229 break;
230 search_start = bh_blocknr(tmp_bh);
231 brelse(tmp_bh);
232 }
233 return err;
234}
235
125/* 236/*
126 * The leaf data grows from end-to-front in the node. 237 * The leaf data grows from end-to-front in the node.
127 * this returns the address of the start of the last item, 238 * this returns the address of the start of the last item,
@@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
221 332
222 parent_slot = path->slots[level + 1]; 333 parent_slot = path->slots[level + 1];
223 parent_key = &parent->ptrs[parent_slot].key; 334 parent_key = &parent->ptrs[parent_slot].key;
335
224 BUG_ON(memcmp(parent_key, &leaf->items[0].key, 336 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
225 sizeof(struct btrfs_disk_key))); 337 sizeof(struct btrfs_disk_key)));
226 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 338 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
@@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
643 * readahead one full node of leaves 755 * readahead one full node of leaves
644 */ 756 */
645static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, 757static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
646 int slot) 758 int level, int slot)
647{ 759{
648 struct btrfs_node *node; 760 struct btrfs_node *node;
649 int i; 761 int i;
@@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
659 unsigned long gang[8]; 771 unsigned long gang[8];
660 struct buffer_head *bh; 772 struct buffer_head *bh;
661 773
662 if (!path->nodes[1]) 774 if (level == 0)
775 return;
776
777 if (!path->nodes[level])
663 return; 778 return;
664 779
665 node = btrfs_buffer_node(path->nodes[1]); 780 node = btrfs_buffer_node(path->nodes[level]);
666 search = btrfs_node_blockptr(node, slot); 781 search = btrfs_node_blockptr(node, slot);
667 bh = btrfs_find_tree_block(root, search); 782 bh = btrfs_find_tree_block(root, search);
668 if (bh) { 783 if (bh) {
@@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
690 for (i = 0; i < ret; i++) { 805 for (i = 0; i < ret; i++) {
691 blocknr = gang[i]; 806 blocknr = gang[i];
692 clear_radix_bit(&found, blocknr); 807 clear_radix_bit(&found, blocknr);
693 if (nread > 64) 808 if (nread > 32)
694 continue; 809 continue;
695 if (direction > 0 && cluster_start <= blocknr && 810 if (direction > 0 && cluster_start <= blocknr &&
696 cluster_start + 8 > blocknr) { 811 cluster_start + 8 > blocknr) {
@@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
726 struct buffer_head *b; 841 struct buffer_head *b;
727 struct buffer_head *cow_buf; 842 struct buffer_head *cow_buf;
728 struct btrfs_node *c; 843 struct btrfs_node *c;
729 struct btrfs_root_item *root_item = &root->root_item;
730 u64 blocknr; 844 u64 blocknr;
731 int slot; 845 int slot;
732 int ret; 846 int ret;
@@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
734 int should_reada = p->reada; 848 int should_reada = p->reada;
735 u8 lowest_level = 0; 849 u8 lowest_level = 0;
736 850
737 if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { 851 lowest_level = p->lowest_level;
738 lowest_level = root_item->drop_level; 852 WARN_ON(lowest_level && ins_len);
739 WARN_ON(ins_len || cow);
740 }
741
742 WARN_ON(p->nodes[0] != NULL); 853 WARN_ON(p->nodes[0] != NULL);
743 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 854 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
744again: 855again:
@@ -798,8 +909,8 @@ again:
798 if (level == lowest_level) 909 if (level == lowest_level)
799 break; 910 break;
800 blocknr = btrfs_node_blockptr(c, slot); 911 blocknr = btrfs_node_blockptr(c, slot);
801 if (level == 1 && should_reada) 912 if (should_reada)
802 reada_for_search(root, p, slot); 913 reada_for_search(root, p, level, slot);
803 b = read_tree_block(root, btrfs_node_blockptr(c, slot)); 914 b = read_tree_block(root, btrfs_node_blockptr(c, slot));
804 915
805 } else { 916 } else {
@@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
960 BUG_ON(path->nodes[level]); 1071 BUG_ON(path->nodes[level]);
961 BUG_ON(path->nodes[level-1] != root->node); 1072 BUG_ON(path->nodes[level-1] != root->node);
962 1073
963 t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); 1074 t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
964 if (IS_ERR(t)) 1075 if (IS_ERR(t))
965 return PTR_ERR(t); 1076 return PTR_ERR(t);
966 c = btrfs_buffer_node(t); 1077 c = btrfs_buffer_node(t);
@@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1070 } 1181 }
1071 1182
1072 c_nritems = btrfs_header_nritems(&c->header); 1183 c_nritems = btrfs_header_nritems(&c->header);
1073 split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); 1184 split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
1074 if (IS_ERR(split_buffer)) 1185 if (IS_ERR(split_buffer))
1075 return PTR_ERR(split_buffer); 1186 return PTR_ERR(split_buffer);
1076 1187
@@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1461 nritems = btrfs_header_nritems(&l->header); 1572 nritems = btrfs_header_nritems(&l->header);
1462 mid = (nritems + 1)/ 2; 1573 mid = (nritems + 1)/ 2;
1463 1574
1464 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); 1575 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
1465 if (IS_ERR(right_buffer)) 1576 if (IS_ERR(right_buffer))
1466 return PTR_ERR(right_buffer); 1577 return PTR_ERR(right_buffer);
1467 1578
@@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1560 1671
1561 if (!double_split) 1672 if (!double_split)
1562 return ret; 1673 return ret;
1563 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); 1674 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
1564 if (IS_ERR(right_buffer)) 1675 if (IS_ERR(right_buffer))
1565 return PTR_ERR(right_buffer); 1676 return PTR_ERR(right_buffer);
1566 1677
@@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1988 blocknr = btrfs_node_blockptr(c_node, slot); 2099 blocknr = btrfs_node_blockptr(c_node, slot);
1989 if (next) 2100 if (next)
1990 btrfs_block_release(root, next); 2101 btrfs_block_release(root, next);
1991 if (level == 1 && path->reada) 2102 if (path->reada)
1992 reada_for_search(root, path, slot); 2103 reada_for_search(root, path, level, slot);
1993 next = read_tree_block(root, blocknr); 2104 next = read_tree_block(root, blocknr);
1994 break; 2105 break;
1995 } 2106 }
@@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2002 path->slots[level] = 0; 2113 path->slots[level] = 0;
2003 if (!level) 2114 if (!level)
2004 break; 2115 break;
2005 if (level == 1 && path->reada) 2116 if (path->reada)
2006 reada_for_search(root, path, slot); 2117 reada_for_search(root, path, level, slot);
2007 next = read_tree_block(root, 2118 next = read_tree_block(root,
2008 btrfs_node_blockptr(btrfs_buffer_node(next), 0)); 2119 btrfs_node_blockptr(btrfs_buffer_node(next), 0));
2009 } 2120 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c5a18d5d7f7c..42aa20301bc9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -178,6 +178,7 @@ struct btrfs_path {
178 struct buffer_head *nodes[BTRFS_MAX_LEVEL]; 178 struct buffer_head *nodes[BTRFS_MAX_LEVEL];
179 int slots[BTRFS_MAX_LEVEL]; 179 int slots[BTRFS_MAX_LEVEL];
180 int reada; 180 int reada;
181 int lowest_level;
181}; 182};
182 183
183/* 184/*
@@ -338,6 +339,9 @@ struct btrfs_root {
338 u64 highest_inode; 339 u64 highest_inode;
339 u64 last_inode_alloc; 340 u64 last_inode_alloc;
340 int ref_cows; 341 int ref_cows;
342 struct btrfs_key defrag_progress;
343 int defrag_running;
344 int defrag_level;
341}; 345};
342 346
343/* the lower bits in the key flags defines the item type */ 347/* the lower bits in the key flags defines the item type */
@@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
1031int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, 1035int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
1032 struct btrfs_root *root); 1036 struct btrfs_root *root);
1033struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1037struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1034 struct btrfs_root *root, u64 hint); 1038 struct btrfs_root *root, u64 hint,
1039 u64 empty_size);
1035int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1040int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1036 struct btrfs_root *root, u64 owner, 1041 struct btrfs_root *root, u64 owner,
1037 u64 num_blocks, u64 search_start, 1042 u64 num_blocks, u64 empty_size, u64 search_start,
1038 u64 search_end, struct btrfs_key *ins, int data); 1043 u64 search_end, struct btrfs_key *ins, int data);
1039int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1044int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1040 struct buffer_head *buf); 1045 struct buffer_head *buf);
@@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1051int btrfs_free_block_groups(struct btrfs_fs_info *info); 1056int btrfs_free_block_groups(struct btrfs_fs_info *info);
1052int btrfs_read_block_groups(struct btrfs_root *root); 1057int btrfs_read_block_groups(struct btrfs_root *root);
1053/* ctree.c */ 1058/* ctree.c */
1059int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
1060 *root, struct buffer_head *buf, struct buffer_head
1061 *parent, int parent_slot, struct buffer_head
1062 **cow_ret);
1054int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 1063int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1055 *root, struct btrfs_path *path, u32 data_size); 1064 *root, struct btrfs_path *path, u32 data_size);
1056int btrfs_truncate_item(struct btrfs_trans_handle *trans, 1065int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1060int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1069int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1061 *root, struct btrfs_key *key, struct btrfs_path *p, int 1070 *root, struct btrfs_key *key, struct btrfs_path *p, int
1062 ins_len, int cow); 1071 ins_len, int cow);
1072int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1073 struct btrfs_root *root, struct buffer_head *parent,
1074 int cache_only);
1063void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); 1075void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
1064struct btrfs_path *btrfs_alloc_path(void); 1076struct btrfs_path *btrfs_alloc_path(void);
1065void btrfs_free_path(struct btrfs_path *p); 1077void btrfs_free_path(struct btrfs_path *p);
@@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations;
1171int btrfs_drop_extents(struct btrfs_trans_handle *trans, 1183int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1172 struct btrfs_root *root, struct inode *inode, 1184 struct btrfs_root *root, struct inode *inode,
1173 u64 start, u64 end, u64 *hint_block); 1185 u64 start, u64 end, u64 *hint_block);
1186/* tree-defrag.c */
1187int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
1188 struct btrfs_root *root, int cache_only);
1174#endif 1189#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60db85bc4ba8..c948416fea3a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
273 struct buffer_head *buf) 273 struct buffer_head *buf)
274{ 274{
275 WARN_ON(atomic_read(&buf->b_count) == 0); 275 WARN_ON(atomic_read(&buf->b_count) == 0);
276 lock_buffer(buf);
276 clear_buffer_dirty(buf); 277 clear_buffer_dirty(buf);
278 unlock_buffer(buf);
277 return 0; 279 return 0;
278} 280}
279 281
@@ -294,6 +296,9 @@ static int __setup_root(int blocksize,
294 root->last_inode_alloc = 0; 296 root->last_inode_alloc = 0;
295 memset(&root->root_key, 0, sizeof(root->root_key)); 297 memset(&root->root_key, 0, sizeof(root->root_key));
296 memset(&root->root_item, 0, sizeof(root->root_item)); 298 memset(&root->root_item, 0, sizeof(root->root_item));
299 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
300 root->defrag_running = 0;
301 root->defrag_level = 0;
297 root->root_key.objectid = objectid; 302 root->root_key.objectid = objectid;
298 return 0; 303 return 0;
299} 304}
@@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root)
585 fs_info->closing = 1; 590 fs_info->closing = 1;
586 btrfs_transaction_flush_work(root); 591 btrfs_transaction_flush_work(root);
587 mutex_lock(&fs_info->fs_mutex); 592 mutex_lock(&fs_info->fs_mutex);
593 btrfs_defrag_dirty_roots(root->fs_info);
588 trans = btrfs_start_transaction(root, 1); 594 trans = btrfs_start_transaction(root, 1);
589 ret = btrfs_commit_transaction(trans, root); 595 ret = btrfs_commit_transaction(trans, root);
590 /* run commit again to drop the original snapshot */ 596 /* run commit again to drop the original snapshot */
@@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh)
616{ 622{
617 struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; 623 struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
618 u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); 624 u64 transid = btrfs_header_generation(btrfs_buffer_header(bh));
625
619 WARN_ON(!atomic_read(&bh->b_count)); 626 WARN_ON(!atomic_read(&bh->b_count));
627
620 if (transid != root->fs_info->generation) { 628 if (transid != root->fs_info->generation) {
621 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", 629 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
622 (unsigned long long)bh->b_blocknr, 630 (unsigned long long)bh->b_blocknr,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5d4d5d8db8ef..26b8d3406491 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,7 +23,8 @@
23#include "transaction.h" 23#include "transaction.h"
24 24
25static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 25static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26 *orig_root, u64 num_blocks, u64 search_start, 26 *orig_root, u64 num_blocks, u64 empty_size,
27 u64 search_start,
27 u64 search_end, u64 hint_block, 28 u64 search_end, u64 hint_block,
28 struct btrfs_key *ins, u64 exclude_start, 29 struct btrfs_key *ins, u64 exclude_start,
29 u64 exclude_nr, int data); 30 u64 exclude_nr, int data);
@@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
379 path = btrfs_alloc_path(); 380 path = btrfs_alloc_path();
380 if (!path) 381 if (!path)
381 return -ENOMEM; 382 return -ENOMEM;
382 ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 383 ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0,
383 (u64)-1, 0, &ins, 0, 0, 0); 384 (u64)-1, 0, &ins, 0, 0, 0);
384 if (ret) { 385 if (ret) {
385 btrfs_free_path(path); 386 btrfs_free_path(path);
@@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
533 struct btrfs_block_group_item *bi; 534 struct btrfs_block_group_item *bi;
534 struct btrfs_key ins; 535 struct btrfs_key ins;
535 536
536 ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 537 ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins,
537 0, 0, 0); 538 0, 0, 0);
538 /* FIXME, set bit to recalc cache groups on next mount */ 539 /* FIXME, set bit to recalc cache groups on next mount */
539 if (ret) 540 if (ret)
@@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
708static int try_remove_page(struct address_space *mapping, unsigned long index) 709static int try_remove_page(struct address_space *mapping, unsigned long index)
709{ 710{
710 int ret; 711 int ret;
712 return 0;
711 ret = invalidate_mapping_pages(mapping, index, index); 713 ret = invalidate_mapping_pages(mapping, index, index);
712 return ret; 714 return ret;
713} 715}
@@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
866 if (!path) 868 if (!path)
867 return -ENOMEM; 869 return -ENOMEM;
868 870
869 ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); 871 ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
870 if (ret) { 872 if (ret) {
871 btrfs_free_path(path); 873 btrfs_free_path(path);
872 return ret; 874 return ret;
@@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
983 * Any available blocks before search_start are skipped. 985 * Any available blocks before search_start are skipped.
984 */ 986 */
985static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 987static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
986 *orig_root, u64 num_blocks, u64 search_start, u64 988 *orig_root, u64 num_blocks, u64 empty_size,
987 search_end, u64 hint_block, 989 u64 search_start, u64 search_end, u64 hint_block,
988 struct btrfs_key *ins, u64 exclude_start, 990 struct btrfs_key *ins, u64 exclude_start,
989 u64 exclude_nr, int data) 991 u64 exclude_nr, int data)
990{ 992{
@@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1042 data, 1); 1044 data, 1);
1043 } 1045 }
1044 1046
1047 total_needed += empty_size;
1045 path = btrfs_alloc_path(); 1048 path = btrfs_alloc_path();
1046 1049
1047check_failed: 1050check_failed:
@@ -1157,9 +1160,11 @@ check_pending:
1157 goto error; 1160 goto error;
1158 } 1161 }
1159 search_start = orig_search_start; 1162 search_start = orig_search_start;
1160 if (wrapped) 1163 if (wrapped) {
1164 if (!full_scan)
1165 total_needed -= empty_size;
1161 full_scan = 1; 1166 full_scan = 1;
1162 else 1167 } else
1163 wrapped = 1; 1168 wrapped = 1;
1164 goto new_group; 1169 goto new_group;
1165 } 1170 }
@@ -1238,9 +1243,11 @@ new_group:
1238 ret = -ENOSPC; 1243 ret = -ENOSPC;
1239 goto error; 1244 goto error;
1240 } 1245 }
1241 if (wrapped) 1246 if (wrapped) {
1247 if (!full_scan)
1248 total_needed -= empty_size;
1242 full_scan = 1; 1249 full_scan = 1;
1243 else 1250 } else
1244 wrapped = 1; 1251 wrapped = 1;
1245 } 1252 }
1246 block_group = btrfs_lookup_block_group(info, search_start); 1253 block_group = btrfs_lookup_block_group(info, search_start);
@@ -1264,7 +1271,7 @@ error:
1264 */ 1271 */
1265int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1272int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1266 struct btrfs_root *root, u64 owner, 1273 struct btrfs_root *root, u64 owner,
1267 u64 num_blocks, u64 hint_block, 1274 u64 num_blocks, u64 empty_size, u64 hint_block,
1268 u64 search_end, struct btrfs_key *ins, int data) 1275 u64 search_end, struct btrfs_key *ins, int data)
1269{ 1276{
1270 int ret; 1277 int ret;
@@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1303 * in the correct block group. 1310 * in the correct block group.
1304 */ 1311 */
1305 if (data) { 1312 if (data) {
1306 ret = find_free_extent(trans, root, 0, 0, 1313 ret = find_free_extent(trans, root, 0, 0, 0,
1307 search_end, 0, &prealloc_key, 0, 0, 0); 1314 search_end, 0, &prealloc_key, 0, 0, 0);
1308 BUG_ON(ret); 1315 BUG_ON(ret);
1309 if (ret) 1316 if (ret)
@@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1313 } 1320 }
1314 1321
1315 /* do the real allocation */ 1322 /* do the real allocation */
1316 ret = find_free_extent(trans, root, num_blocks, search_start, 1323 ret = find_free_extent(trans, root, num_blocks, empty_size,
1317 search_end, hint_block, ins, 1324 search_start, search_end, hint_block, ins,
1318 exclude_start, exclude_nr, data); 1325 exclude_start, exclude_nr, data);
1319 BUG_ON(ret); 1326 BUG_ON(ret);
1320 if (ret) 1327 if (ret)
@@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1333 exclude_start = ins->objectid; 1340 exclude_start = ins->objectid;
1334 exclude_nr = ins->offset; 1341 exclude_nr = ins->offset;
1335 hint_block = exclude_start + exclude_nr; 1342 hint_block = exclude_start + exclude_nr;
1336 ret = find_free_extent(trans, root, 0, search_start, 1343 ret = find_free_extent(trans, root, 0, 0, search_start,
1337 search_end, hint_block, 1344 search_end, hint_block,
1338 &prealloc_key, exclude_start, 1345 &prealloc_key, exclude_start,
1339 exclude_nr, 0); 1346 exclude_nr, 0);
@@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1368 * returns the tree buffer or NULL. 1375 * returns the tree buffer or NULL.
1369 */ 1376 */
1370struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1377struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1371 struct btrfs_root *root, u64 hint) 1378 struct btrfs_root *root, u64 hint,
1379 u64 empty_size)
1372{ 1380{
1373 struct btrfs_key ins; 1381 struct btrfs_key ins;
1374 int ret; 1382 int ret;
1375 struct buffer_head *buf; 1383 struct buffer_head *buf;
1376 1384
1377 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1385 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1378 1, hint, (unsigned long)-1, &ins, 0); 1386 1, empty_size, hint,
1387 (unsigned long)-1, &ins, 0);
1379 if (ret) { 1388 if (ret) {
1380 BUG_ON(ret > 0); 1389 BUG_ON(ret > 0);
1381 return ERR_PTR(ret); 1390 return ERR_PTR(ret);
@@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1385 btrfs_free_extent(trans, root, ins.objectid, 1, 0); 1394 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1386 return ERR_PTR(-ENOMEM); 1395 return ERR_PTR(-ENOMEM);
1387 } 1396 }
1397 WARN_ON(buffer_dirty(buf));
1388 set_buffer_uptodate(buf); 1398 set_buffer_uptodate(buf);
1389 set_buffer_checked(buf); 1399 set_buffer_checked(buf);
1390 set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); 1400 set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
@@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1591 struct btrfs_key key; 1601 struct btrfs_key key;
1592 struct btrfs_disk_key *found_key; 1602 struct btrfs_disk_key *found_key;
1593 struct btrfs_node *node; 1603 struct btrfs_node *node;
1604
1594 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1605 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1606 level = root_item->drop_level;
1607 path->lowest_level = level;
1595 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1608 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1596 if (ret < 0) { 1609 if (wret < 0) {
1597 ret = wret; 1610 ret = wret;
1598 goto out; 1611 goto out;
1599 } 1612 }
1600 level = root_item->drop_level;
1601 node = btrfs_buffer_node(path->nodes[level]); 1613 node = btrfs_buffer_node(path->nodes[level]);
1602 found_key = &node->ptrs[path->slots[level]].key; 1614 found_key = &node->ptrs[path->slots[level]].key;
1603 WARN_ON(memcmp(found_key, &root_item->drop_progress, 1615 WARN_ON(memcmp(found_key, &root_item->drop_progress,
@@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1617 ret = wret; 1629 ret = wret;
1618 num_walks++; 1630 num_walks++;
1619 if (num_walks > 10) { 1631 if (num_walks > 10) {
1620 struct btrfs_key key;
1621 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1622 ret = -EAGAIN; 1632 ret = -EAGAIN;
1623 get_bh(root->node); 1633 get_bh(root->node);
1624 break; 1634 break;
@@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1627 for (i = 0; i <= orig_level; i++) { 1637 for (i = 0; i <= orig_level; i++) {
1628 if (path->nodes[i]) { 1638 if (path->nodes[i]) {
1629 btrfs_block_release(root, path->nodes[i]); 1639 btrfs_block_release(root, path->nodes[i]);
1640 path->nodes[i] = 0;
1630 } 1641 }
1631 } 1642 }
1632out: 1643out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1fe38fe84150..00b118a2db69 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root,
512 if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || 512 if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
513 pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { 513 pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
514 err = btrfs_alloc_extent(trans, root, inode->i_ino, 514 err = btrfs_alloc_extent(trans, root, inode->i_ino,
515 num_blocks, hint_block, (u64)-1, 515 num_blocks, 0, hint_block, (u64)-1,
516 &ins, 1); 516 &ins, 1);
517 if (err) 517 if (err)
518 goto failed_truncate; 518 goto failed_truncate;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3889032fc449..12aa043b9f6f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
554 &alloc_hint); 554 &alloc_hint);
555 if (ret) 555 if (ret)
556 goto out; 556 goto out;
557 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 557 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
558 alloc_hint, (u64)-1, &ins, 1); 558 alloc_hint, (u64)-1, &ins, 1);
559 if (ret) 559 if (ret)
560 goto out; 560 goto out;
@@ -1360,7 +1360,7 @@ not_found:
1360 if (create & BTRFS_GET_BLOCK_CREATE) { 1360 if (create & BTRFS_GET_BLOCK_CREATE) {
1361 struct btrfs_key ins; 1361 struct btrfs_key ins;
1362 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1362 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1363 1, alloc_hint, (u64)-1, 1363 1, 0, alloc_hint, (u64)-1,
1364 &ins, 1); 1364 &ins, 1);
1365 if (ret) { 1365 if (ret) {
1366 err = ret; 1366 err = ret;
@@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1998 trans = btrfs_start_transaction(root, 1); 1998 trans = btrfs_start_transaction(root, 1);
1999 BUG_ON(!trans); 1999 BUG_ON(!trans);
2000 2000
2001 subvol = btrfs_alloc_free_block(trans, root, 0); 2001 subvol = btrfs_alloc_free_block(trans, root, 0, 0);
2002 if (IS_ERR(subvol)) 2002 if (IS_ERR(subvol))
2003 return PTR_ERR(subvol); 2003 return PTR_ERR(subvol);
2004 leaf = btrfs_buffer_leaf(subvol); 2004 leaf = btrfs_buffer_leaf(subvol);
@@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2159{ 2159{
2160 struct btrfs_root *root = BTRFS_I(inode)->root; 2160 struct btrfs_root *root = BTRFS_I(inode)->root;
2161 struct btrfs_ioctl_vol_args vol_args; 2161 struct btrfs_ioctl_vol_args vol_args;
2162 struct btrfs_trans_handle *trans;
2162 int ret = 0; 2163 int ret = 0;
2164 int err;
2163 struct btrfs_dir_item *di; 2165 struct btrfs_dir_item *di;
2164 int namelen; 2166 int namelen;
2165 struct btrfs_path *path; 2167 struct btrfs_path *path;
@@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2196 else 2198 else
2197 ret = create_snapshot(root, vol_args.name, namelen); 2199 ret = create_snapshot(root, vol_args.name, namelen);
2198 break; 2200 break;
2201
2202 case BTRFS_IOC_DEFRAG:
2203 mutex_lock(&root->fs_info->fs_mutex);
2204 trans = btrfs_start_transaction(root, 1);
2205 memset(&root->defrag_progress, 0,
2206 sizeof(root->defrag_progress));
2207 while (1) {
2208 root->defrag_running = 1;
2209 err = btrfs_defrag_leaves(trans, root, 0);
2210
2211 btrfs_end_transaction(trans, root);
2212 mutex_unlock(&root->fs_info->fs_mutex);
2213
2214 btrfs_btree_balance_dirty(root);
2215
2216 mutex_lock(&root->fs_info->fs_mutex);
2217 trans = btrfs_start_transaction(root, 1);
2218 if (err != -EAGAIN)
2219 break;
2220 }
2221 root->defrag_running = 0;
2222 btrfs_end_transaction(trans, root);
2223 mutex_unlock(&root->fs_info->fs_mutex);
2224 ret = 0;
2225 break;
2199 default: 2226 default:
2200 return -ENOTTY; 2227 return -ENOTTY;
2201 } 2228 }
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 23bed48c51a7..8bc47dec2865 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args {
28 28
29#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 29#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
30 struct btrfs_ioctl_vol_args) 30 struct btrfs_ioctl_vol_args)
31#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ 31#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
32 struct btrfs_ioctl_vol_args) 32 struct btrfs_ioctl_vol_args)
33#endif 33#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 498626470a04..338a7199363b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep;
29static struct workqueue_struct *trans_wq; 29static struct workqueue_struct *trans_wq;
30 30
31#define BTRFS_ROOT_TRANS_TAG 0 31#define BTRFS_ROOT_TRANS_TAG 0
32#define BTRFS_ROOT_DEFRAG_TAG 1
32 33
33static void put_transaction(struct btrfs_transaction *transaction) 34static void put_transaction(struct btrfs_transaction *transaction)
34{ 35{
@@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root)
69 return 0; 70 return 0;
70} 71}
71 72
73static int record_root_in_trans(struct btrfs_root *root)
74{
75 u64 running_trans_id = root->fs_info->running_transaction->transid;
76 if (root->ref_cows && root->last_trans < running_trans_id) {
77 WARN_ON(root == root->fs_info->extent_root);
78 if (root->root_item.refs != 0) {
79 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
80 (unsigned long)root->root_key.objectid,
81 BTRFS_ROOT_TRANS_TAG);
82 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
83 (unsigned long)root->root_key.objectid,
84 BTRFS_ROOT_DEFRAG_TAG);
85 root->commit_root = root->node;
86 get_bh(root->node);
87 } else {
88 WARN_ON(1);
89 }
90 root->last_trans = running_trans_id;
91 }
92 return 0;
93}
94
72struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 95struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
73 int num_blocks) 96 int num_blocks)
74{ 97{
75 struct btrfs_trans_handle *h = 98 struct btrfs_trans_handle *h =
76 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 99 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
77 int ret; 100 int ret;
78 u64 running_trans_id;
79 101
80 mutex_lock(&root->fs_info->trans_mutex); 102 mutex_lock(&root->fs_info->trans_mutex);
81 ret = join_transaction(root); 103 ret = join_transaction(root);
82 BUG_ON(ret); 104 BUG_ON(ret);
83 running_trans_id = root->fs_info->running_transaction->transid;
84 105
85 if (root != root->fs_info->tree_root && root->last_trans < 106 record_root_in_trans(root);
86 running_trans_id) { 107 h->transid = root->fs_info->running_transaction->transid;
87 WARN_ON(root == root->fs_info->extent_root);
88 WARN_ON(root->ref_cows != 1);
89 if (root->root_item.refs != 0) {
90 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
91 (unsigned long)root->root_key.objectid,
92 BTRFS_ROOT_TRANS_TAG);
93 root->commit_root = root->node;
94 get_bh(root->node);
95 } else {
96 WARN_ON(1);
97 }
98 }
99 root->last_trans = running_trans_id;
100 h->transid = running_trans_id;
101 h->transaction = root->fs_info->running_transaction; 108 h->transaction = root->fs_info->running_transaction;
102 h->blocks_reserved = num_blocks; 109 h->blocks_reserved = num_blocks;
103 h->blocks_used = 0; 110 h->blocks_used = 0;
@@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
155 gang[i]); 162 gang[i]);
156 if (!page) 163 if (!page)
157 continue; 164 continue;
165 if (PageWriteback(page)) {
166 if (PageDirty(page))
167 wait_on_page_writeback(page);
168 else {
169 unlock_page(page);
170 page_cache_release(page);
171 continue;
172 }
173 }
158 err = write_one_page(page, 0); 174 err = write_one_page(page, 0);
159 if (err) 175 if (err)
160 werr = err; 176 werr = err;
@@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
299 return err; 315 return err;
300} 316}
301 317
318int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
319{
320 struct btrfs_root *gang[1];
321 struct btrfs_root *root;
322 struct btrfs_root *tree_root = info->tree_root;
323 struct btrfs_trans_handle *trans;
324 int i;
325 int ret;
326 int err = 0;
327 u64 last = 0;
328
329 trans = btrfs_start_transaction(tree_root, 1);
330 while(1) {
331 ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
332 (void **)gang, last,
333 ARRAY_SIZE(gang),
334 BTRFS_ROOT_DEFRAG_TAG);
335 if (ret == 0)
336 break;
337 for (i = 0; i < ret; i++) {
338 root = gang[i];
339 last = root->root_key.objectid + 1;
340 radix_tree_tag_clear(&info->fs_roots_radix,
341 (unsigned long)root->root_key.objectid,
342 BTRFS_ROOT_DEFRAG_TAG);
343 if (root->defrag_running)
344 continue;
345
346 while (1) {
347 mutex_lock(&root->fs_info->trans_mutex);
348 record_root_in_trans(root);
349 mutex_unlock(&root->fs_info->trans_mutex);
350
351 root->defrag_running = 1;
352 err = btrfs_defrag_leaves(trans, root, 1);
353 btrfs_end_transaction(trans, tree_root);
354 mutex_unlock(&info->fs_mutex);
355
356 btrfs_btree_balance_dirty(root);
357
358 mutex_lock(&info->fs_mutex);
359 trans = btrfs_start_transaction(tree_root, 1);
360 if (err != -EAGAIN)
361 break;
362 }
363 root->defrag_running = 0;
364 }
365 }
366 btrfs_end_transaction(trans, tree_root);
367 return err;
368}
369
302static int drop_dirty_roots(struct btrfs_root *tree_root, 370static int drop_dirty_roots(struct btrfs_root *tree_root,
303 struct list_head *list) 371 struct list_head *list)
304{ 372{
@@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work)
475 goto out; 543 goto out;
476 } 544 }
477 mutex_unlock(&root->fs_info->trans_mutex); 545 mutex_unlock(&root->fs_info->trans_mutex);
546 btrfs_defrag_dirty_roots(root->fs_info);
478 trans = btrfs_start_transaction(root, 1); 547 trans = btrfs_start_transaction(root, 1);
479 ret = btrfs_commit_transaction(trans, root); 548 ret = btrfs_commit_transaction(trans, root);
480out: 549out:
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ebf44f3e1118..8b2714e65562 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay);
70void btrfs_init_transaction_sys(void); 70void btrfs_init_transaction_sys(void);
71void btrfs_exit_transaction_sys(void); 71void btrfs_exit_transaction_sys(void);
72int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); 72int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list);
73int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
73 74
74#endif 75#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
new file mode 100644
index 000000000000..15d0a486fb59
--- /dev/null
+++ b/fs/btrfs/tree-defrag.c
@@ -0,0 +1,222 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "transaction.h"
24
25static void reada_defrag(struct btrfs_root *root,
26 struct btrfs_node *node)
27{
28 int i;
29 u32 nritems;
30 u64 blocknr;
31 int ret;
32
33 nritems = btrfs_header_nritems(&node->header);
34 for (i = 0; i < nritems; i++) {
35 blocknr = btrfs_node_blockptr(node, i);
36 ret = readahead_tree_block(root, blocknr);
37 if (ret)
38 break;
39 }
40}
41
42static int defrag_walk_down(struct btrfs_trans_handle *trans,
43 struct btrfs_root *root,
44 struct btrfs_path *path, int *level,
45 int cache_only)
46{
47 struct buffer_head *next;
48 struct buffer_head *cur;
49 u64 blocknr;
50 int ret = 0;
51
52 WARN_ON(*level < 0);
53 WARN_ON(*level >= BTRFS_MAX_LEVEL);
54
55 while(*level > 0) {
56 WARN_ON(*level < 0);
57 WARN_ON(*level >= BTRFS_MAX_LEVEL);
58 cur = path->nodes[*level];
59
60 if (!cache_only && *level > 1 && path->slots[*level] == 0)
61 reada_defrag(root, btrfs_buffer_node(cur));
62
63 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
64 WARN_ON(1);
65
66 if (path->slots[*level] >=
67 btrfs_header_nritems(btrfs_buffer_header(cur)))
68 break;
69
70 if (*level == 1) {
71 ret = btrfs_realloc_node(trans, root,
72 path->nodes[*level],
73 cache_only);
74 break;
75 }
76 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
77 path->slots[*level]);
78
79 if (cache_only) {
80 next = btrfs_find_tree_block(root, blocknr);
81 if (!next || !buffer_uptodate(next) ||
82 buffer_locked(next)) {
83 brelse(next);
84 path->slots[*level]++;
85 continue;
86 }
87 } else {
88 next = read_tree_block(root, blocknr);
89 }
90 ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
91 path->slots[*level], &next);
92 BUG_ON(ret);
93 ret = btrfs_realloc_node(trans, root, next, cache_only);
94 BUG_ON(ret);
95 WARN_ON(*level <= 0);
96 if (path->nodes[*level-1])
97 btrfs_block_release(root, path->nodes[*level-1]);
98 path->nodes[*level-1] = next;
99 *level = btrfs_header_level(btrfs_buffer_header(next));
100 path->slots[*level] = 0;
101 }
102 WARN_ON(*level < 0);
103 WARN_ON(*level >= BTRFS_MAX_LEVEL);
104 btrfs_block_release(root, path->nodes[*level]);
105 path->nodes[*level] = NULL;
106 *level += 1;
107 WARN_ON(ret);
108 return 0;
109}
110
111static int defrag_walk_up(struct btrfs_trans_handle *trans,
112 struct btrfs_root *root,
113 struct btrfs_path *path, int *level,
114 int cache_only)
115{
116 int i;
117 int slot;
118 struct btrfs_node *node;
119
120 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
121 slot = path->slots[i];
122 if (slot < btrfs_header_nritems(
123 btrfs_buffer_header(path->nodes[i])) - 1) {
124 path->slots[i]++;
125 *level = i;
126 node = btrfs_buffer_node(path->nodes[i]);
127 WARN_ON(i == 0);
128 btrfs_disk_key_to_cpu(&root->defrag_progress,
129 &node->ptrs[path->slots[i]].key);
130 root->defrag_level = i;
131 return 0;
132 } else {
133 btrfs_block_release(root, path->nodes[*level]);
134 path->nodes[*level] = NULL;
135 *level = i + 1;
136 }
137 }
138 return 1;
139}
140
141int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
142 struct btrfs_root *root, int cache_only)
143{
144 struct btrfs_path *path = NULL;
145 struct buffer_head *tmp;
146 int ret = 0;
147 int wret;
148 int level;
149 int orig_level;
150 int i;
151 int num_runs = 0;
152
153 if (root->ref_cows == 0) {
154 goto out;
155 }
156 path = btrfs_alloc_path();
157 if (!path)
158 return -ENOMEM;
159
160 level = btrfs_header_level(btrfs_buffer_header(root->node));
161 orig_level = level;
162 if (level == 0) {
163 goto out;
164 }
165 if (root->defrag_progress.objectid == 0) {
166 get_bh(root->node);
167 ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
168 BUG_ON(ret);
169 ret = btrfs_realloc_node(trans, root, root->node, cache_only);
170 BUG_ON(ret);
171 path->nodes[level] = root->node;
172 path->slots[level] = 0;
173 } else {
174 level = root->defrag_level;
175 path->lowest_level = level;
176 wret = btrfs_search_slot(trans, root, &root->defrag_progress,
177 path, 0, 1);
178
179 if (wret < 0) {
180 ret = wret;
181 goto out;
182 }
183 while(level > 0 && !path->nodes[level])
184 level--;
185 if (!path->nodes[level]) {
186 ret = 0;
187 goto out;
188 }
189 }
190
191 while(1) {
192 wret = defrag_walk_down(trans, root, path, &level, cache_only);
193 if (wret > 0)
194 break;
195 if (wret < 0)
196 ret = wret;
197
198 wret = defrag_walk_up(trans, root, path, &level, cache_only);
199 if (wret > 0)
200 break;
201 if (wret < 0)
202 ret = wret;
203 if (num_runs++ > 8) {
204 ret = -EAGAIN;
205 break;
206 }
207 }
208 for (i = 0; i <= orig_level; i++) {
209 if (path->nodes[i]) {
210 btrfs_block_release(root, path->nodes[i]);
211 path->nodes[i] = 0;
212 }
213 }
214out:
215 if (path)
216 btrfs_free_path(path);
217 if (ret != -EAGAIN) {
218 memset(&root->defrag_progress, 0,
219 sizeof(root->defrag_progress));
220 }
221 return ret;
222}