aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/ctree.c195
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c53
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c33
-rw-r--r--fs/btrfs/ioctl.h2
-rw-r--r--fs/btrfs/transaction.c105
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-defrag.c222
11 files changed, 553 insertions, 89 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a4e2df66bdbb..9321438c5eef 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
4obj-m := btrfs.o 4obj-m := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \ 6 hash.o file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o bit-radix.o inode.o file.o 7 transaction.o bit-radix.o inode.o file.o tree-defrag.o
8 8
9#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ 9#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
10# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ 10# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7a08491e208e..c7e47e77723f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
65 memset(p, 0, sizeof(*p)); 65 memset(p, 0, sizeof(*p));
66} 66}
67 67
68static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root 68static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
69 *root, struct buffer_head *buf, struct buffer_head 69 *root, struct buffer_head *buf, struct buffer_head
70 *parent, int parent_slot, struct buffer_head 70 *parent, int parent_slot, struct buffer_head
71 **cow_ret) 71 **cow_ret, u64 search_start, u64 empty_size)
72{ 72{
73 struct buffer_head *cow; 73 struct buffer_head *cow;
74 struct btrfs_node *cow_node; 74 struct btrfs_node *cow_node;
75 int ret; 75 int ret = 0;
76 int different_trans = 0;
76 77
78 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
77 WARN_ON(!buffer_uptodate(buf)); 79 WARN_ON(!buffer_uptodate(buf));
78 if (trans->transaction != root->fs_info->running_transaction) { 80 cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
79 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
80 root->fs_info->running_transaction->transid);
81 WARN_ON(1);
82 }
83 if (trans->transid != root->fs_info->generation) {
84 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
85 root->fs_info->generation);
86 WARN_ON(1);
87 }
88 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
89 trans->transid) {
90 *cow_ret = buf;
91 return 0;
92 }
93 cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
94 if (IS_ERR(cow)) 81 if (IS_ERR(cow))
95 return PTR_ERR(cow); 82 return PTR_ERR(cow);
83
96 cow_node = btrfs_buffer_node(cow); 84 cow_node = btrfs_buffer_node(cow);
97 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) 85 if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
98 WARN_ON(1); 86 WARN_ON(1);
87
99 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); 88 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
100 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); 89 btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
101 btrfs_set_header_generation(&cow_node->header, trans->transid); 90 btrfs_set_header_generation(&cow_node->header, trans->transid);
102 btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); 91 btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
103 ret = btrfs_inc_ref(trans, root, buf); 92
104 if (ret) 93 WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
105 return ret; 94 trans->transid);
95 if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
96 trans->transid) {
97 different_trans = 1;
98 ret = btrfs_inc_ref(trans, root, buf);
99 if (ret)
100 return ret;
101 } else {
102 WARN_ON(!root->ref_cows);
103 clean_tree_block(trans, root, buf);
104 }
105
106 if (buf == root->node) { 106 if (buf == root->node) {
107 root->node = cow; 107 root->node = cow;
108 get_bh(cow); 108 get_bh(cow);
@@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
114 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, 114 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
115 bh_blocknr(cow)); 115 bh_blocknr(cow));
116 btrfs_mark_buffer_dirty(parent); 116 btrfs_mark_buffer_dirty(parent);
117 WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
118 trans->transid);
117 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); 119 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
118 } 120 }
119 btrfs_block_release(root, buf); 121 btrfs_block_release(root, buf);
@@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
122 return 0; 124 return 0;
123} 125}
124 126
127int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
128 *root, struct buffer_head *buf, struct buffer_head
129 *parent, int parent_slot, struct buffer_head
130 **cow_ret)
131{
132 u64 search_start;
133 if (trans->transaction != root->fs_info->running_transaction) {
134 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
135 root->fs_info->running_transaction->transid);
136 WARN_ON(1);
137 }
138 if (trans->transid != root->fs_info->generation) {
139 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
140 root->fs_info->generation);
141 WARN_ON(1);
142 }
143 if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
144 trans->transid) {
145 *cow_ret = buf;
146 return 0;
147 }
148
149 search_start = bh_blocknr(buf) & ~((u64)65535);
150 return __btrfs_cow_block(trans, root, buf, parent,
151 parent_slot, cow_ret, search_start, 0);
152}
153
154static int close_blocks(u64 blocknr, u64 other)
155{
156 if (blocknr < other && other - blocknr < 8)
157 return 1;
158 if (blocknr > other && blocknr - other < 8)
159 return 1;
160 return 0;
161}
162
163int btrfs_realloc_node(struct btrfs_trans_handle *trans,
164 struct btrfs_root *root, struct buffer_head *parent,
165 int cache_only)
166{
167 struct btrfs_node *parent_node;
168 struct buffer_head *cur_bh;
169 struct buffer_head *tmp_bh;
170 u64 blocknr;
171 u64 search_start = 0;
172 u64 other;
173 u32 parent_nritems;
174 int start_slot;
175 int end_slot;
176 int i;
177 int err = 0;
178
179 if (trans->transaction != root->fs_info->running_transaction) {
180 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
181 root->fs_info->running_transaction->transid);
182 WARN_ON(1);
183 }
184 if (trans->transid != root->fs_info->generation) {
185 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
186 root->fs_info->generation);
187 WARN_ON(1);
188 }
189 parent_node = btrfs_buffer_node(parent);
190 parent_nritems = btrfs_header_nritems(&parent_node->header);
191
192 start_slot = 0;
193 end_slot = parent_nritems;
194
195 if (parent_nritems == 1)
196 return 0;
197
198 for (i = start_slot; i < end_slot; i++) {
199 int close = 1;
200 blocknr = btrfs_node_blockptr(parent_node, i);
201 if (i > 0) {
202 other = btrfs_node_blockptr(parent_node, i - 1);
203 close = close_blocks(blocknr, other);
204 }
205 if (close && i < end_slot - 1) {
206 other = btrfs_node_blockptr(parent_node, i + 1);
207 close = close_blocks(blocknr, other);
208 }
209 if (close)
210 continue;
211
212 cur_bh = btrfs_find_tree_block(root, blocknr);
213 if (!cur_bh || !buffer_uptodate(cur_bh) ||
214 buffer_locked(cur_bh)) {
215 if (cache_only) {
216 brelse(cur_bh);
217 continue;
218 }
219 brelse(cur_bh);
220 cur_bh = read_tree_block(root, blocknr);
221 }
222 if (search_start == 0) {
223 search_start = bh_blocknr(cur_bh) & ~((u64)65535);
224 }
225 err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
226 &tmp_bh, search_start,
227 min(8, end_slot - i));
228 if (err)
229 break;
230 search_start = bh_blocknr(tmp_bh);
231 brelse(tmp_bh);
232 }
233 return err;
234}
235
125/* 236/*
126 * The leaf data grows from end-to-front in the node. 237 * The leaf data grows from end-to-front in the node.
127 * this returns the address of the start of the last item, 238 * this returns the address of the start of the last item,
@@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
221 332
222 parent_slot = path->slots[level + 1]; 333 parent_slot = path->slots[level + 1];
223 parent_key = &parent->ptrs[parent_slot].key; 334 parent_key = &parent->ptrs[parent_slot].key;
335
224 BUG_ON(memcmp(parent_key, &leaf->items[0].key, 336 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
225 sizeof(struct btrfs_disk_key))); 337 sizeof(struct btrfs_disk_key)));
226 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 338 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
@@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
643 * readahead one full node of leaves 755 * readahead one full node of leaves
644 */ 756 */
645static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, 757static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
646 int slot) 758 int level, int slot)
647{ 759{
648 struct btrfs_node *node; 760 struct btrfs_node *node;
649 int i; 761 int i;
@@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
659 unsigned long gang[8]; 771 unsigned long gang[8];
660 struct buffer_head *bh; 772 struct buffer_head *bh;
661 773
662 if (!path->nodes[1]) 774 if (level == 0)
775 return;
776
777 if (!path->nodes[level])
663 return; 778 return;
664 779
665 node = btrfs_buffer_node(path->nodes[1]); 780 node = btrfs_buffer_node(path->nodes[level]);
666 search = btrfs_node_blockptr(node, slot); 781 search = btrfs_node_blockptr(node, slot);
667 bh = btrfs_find_tree_block(root, search); 782 bh = btrfs_find_tree_block(root, search);
668 if (bh) { 783 if (bh) {
@@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
690 for (i = 0; i < ret; i++) { 805 for (i = 0; i < ret; i++) {
691 blocknr = gang[i]; 806 blocknr = gang[i];
692 clear_radix_bit(&found, blocknr); 807 clear_radix_bit(&found, blocknr);
693 if (nread > 64) 808 if (nread > 32)
694 continue; 809 continue;
695 if (direction > 0 && cluster_start <= blocknr && 810 if (direction > 0 && cluster_start <= blocknr &&
696 cluster_start + 8 > blocknr) { 811 cluster_start + 8 > blocknr) {
@@ -726,7 +841,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
726 struct buffer_head *b; 841 struct buffer_head *b;
727 struct buffer_head *cow_buf; 842 struct buffer_head *cow_buf;
728 struct btrfs_node *c; 843 struct btrfs_node *c;
729 struct btrfs_root_item *root_item = &root->root_item;
730 u64 blocknr; 844 u64 blocknr;
731 int slot; 845 int slot;
732 int ret; 846 int ret;
@@ -734,11 +848,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
734 int should_reada = p->reada; 848 int should_reada = p->reada;
735 u8 lowest_level = 0; 849 u8 lowest_level = 0;
736 850
737 if (btrfs_root_refs(root_item) == 0 && root->ref_cows) { 851 lowest_level = p->lowest_level;
738 lowest_level = root_item->drop_level; 852 WARN_ON(lowest_level && ins_len);
739 WARN_ON(ins_len || cow);
740 }
741
742 WARN_ON(p->nodes[0] != NULL); 853 WARN_ON(p->nodes[0] != NULL);
743 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 854 WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
744again: 855again:
@@ -798,8 +909,8 @@ again:
798 if (level == lowest_level) 909 if (level == lowest_level)
799 break; 910 break;
800 blocknr = btrfs_node_blockptr(c, slot); 911 blocknr = btrfs_node_blockptr(c, slot);
801 if (level == 1 && should_reada) 912 if (should_reada)
802 reada_for_search(root, p, slot); 913 reada_for_search(root, p, level, slot);
803 b = read_tree_block(root, btrfs_node_blockptr(c, slot)); 914 b = read_tree_block(root, btrfs_node_blockptr(c, slot));
804 915
805 } else { 916 } else {
@@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
960 BUG_ON(path->nodes[level]); 1071 BUG_ON(path->nodes[level]);
961 BUG_ON(path->nodes[level-1] != root->node); 1072 BUG_ON(path->nodes[level-1] != root->node);
962 1073
963 t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); 1074 t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
964 if (IS_ERR(t)) 1075 if (IS_ERR(t))
965 return PTR_ERR(t); 1076 return PTR_ERR(t);
966 c = btrfs_buffer_node(t); 1077 c = btrfs_buffer_node(t);
@@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1070 } 1181 }
1071 1182
1072 c_nritems = btrfs_header_nritems(&c->header); 1183 c_nritems = btrfs_header_nritems(&c->header);
1073 split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); 1184 split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
1074 if (IS_ERR(split_buffer)) 1185 if (IS_ERR(split_buffer))
1075 return PTR_ERR(split_buffer); 1186 return PTR_ERR(split_buffer);
1076 1187
@@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1461 nritems = btrfs_header_nritems(&l->header); 1572 nritems = btrfs_header_nritems(&l->header);
1462 mid = (nritems + 1)/ 2; 1573 mid = (nritems + 1)/ 2;
1463 1574
1464 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); 1575 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
1465 if (IS_ERR(right_buffer)) 1576 if (IS_ERR(right_buffer))
1466 return PTR_ERR(right_buffer); 1577 return PTR_ERR(right_buffer);
1467 1578
@@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1560 1671
1561 if (!double_split) 1672 if (!double_split)
1562 return ret; 1673 return ret;
1563 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); 1674 right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
1564 if (IS_ERR(right_buffer)) 1675 if (IS_ERR(right_buffer))
1565 return PTR_ERR(right_buffer); 1676 return PTR_ERR(right_buffer);
1566 1677
@@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1988 blocknr = btrfs_node_blockptr(c_node, slot); 2099 blocknr = btrfs_node_blockptr(c_node, slot);
1989 if (next) 2100 if (next)
1990 btrfs_block_release(root, next); 2101 btrfs_block_release(root, next);
1991 if (level == 1 && path->reada) 2102 if (path->reada)
1992 reada_for_search(root, path, slot); 2103 reada_for_search(root, path, level, slot);
1993 next = read_tree_block(root, blocknr); 2104 next = read_tree_block(root, blocknr);
1994 break; 2105 break;
1995 } 2106 }
@@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2002 path->slots[level] = 0; 2113 path->slots[level] = 0;
2003 if (!level) 2114 if (!level)
2004 break; 2115 break;
2005 if (level == 1 && path->reada) 2116 if (path->reada)
2006 reada_for_search(root, path, slot); 2117 reada_for_search(root, path, level, slot);
2007 next = read_tree_block(root, 2118 next = read_tree_block(root,
2008 btrfs_node_blockptr(btrfs_buffer_node(next), 0)); 2119 btrfs_node_blockptr(btrfs_buffer_node(next), 0));
2009 } 2120 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c5a18d5d7f7c..42aa20301bc9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -178,6 +178,7 @@ struct btrfs_path {
178 struct buffer_head *nodes[BTRFS_MAX_LEVEL]; 178 struct buffer_head *nodes[BTRFS_MAX_LEVEL];
179 int slots[BTRFS_MAX_LEVEL]; 179 int slots[BTRFS_MAX_LEVEL];
180 int reada; 180 int reada;
181 int lowest_level;
181}; 182};
182 183
183/* 184/*
@@ -338,6 +339,9 @@ struct btrfs_root {
338 u64 highest_inode; 339 u64 highest_inode;
339 u64 last_inode_alloc; 340 u64 last_inode_alloc;
340 int ref_cows; 341 int ref_cows;
342 struct btrfs_key defrag_progress;
343 int defrag_running;
344 int defrag_level;
341}; 345};
342 346
343/* the lower bits in the key flags defines the item type */ 347/* the lower bits in the key flags defines the item type */
@@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
1031int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, 1035int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
1032 struct btrfs_root *root); 1036 struct btrfs_root *root);
1033struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1037struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1034 struct btrfs_root *root, u64 hint); 1038 struct btrfs_root *root, u64 hint,
1039 u64 empty_size);
1035int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1040int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1036 struct btrfs_root *root, u64 owner, 1041 struct btrfs_root *root, u64 owner,
1037 u64 num_blocks, u64 search_start, 1042 u64 num_blocks, u64 empty_size, u64 search_start,
1038 u64 search_end, struct btrfs_key *ins, int data); 1043 u64 search_end, struct btrfs_key *ins, int data);
1039int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1044int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1040 struct buffer_head *buf); 1045 struct buffer_head *buf);
@@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1051int btrfs_free_block_groups(struct btrfs_fs_info *info); 1056int btrfs_free_block_groups(struct btrfs_fs_info *info);
1052int btrfs_read_block_groups(struct btrfs_root *root); 1057int btrfs_read_block_groups(struct btrfs_root *root);
1053/* ctree.c */ 1058/* ctree.c */
1059int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
1060 *root, struct buffer_head *buf, struct buffer_head
1061 *parent, int parent_slot, struct buffer_head
1062 **cow_ret);
1054int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 1063int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1055 *root, struct btrfs_path *path, u32 data_size); 1064 *root, struct btrfs_path *path, u32 data_size);
1056int btrfs_truncate_item(struct btrfs_trans_handle *trans, 1065int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1060int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1069int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1061 *root, struct btrfs_key *key, struct btrfs_path *p, int 1070 *root, struct btrfs_key *key, struct btrfs_path *p, int
1062 ins_len, int cow); 1071 ins_len, int cow);
1072int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1073 struct btrfs_root *root, struct buffer_head *parent,
1074 int cache_only);
1063void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); 1075void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
1064struct btrfs_path *btrfs_alloc_path(void); 1076struct btrfs_path *btrfs_alloc_path(void);
1065void btrfs_free_path(struct btrfs_path *p); 1077void btrfs_free_path(struct btrfs_path *p);
@@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations;
1171int btrfs_drop_extents(struct btrfs_trans_handle *trans, 1183int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1172 struct btrfs_root *root, struct inode *inode, 1184 struct btrfs_root *root, struct inode *inode,
1173 u64 start, u64 end, u64 *hint_block); 1185 u64 start, u64 end, u64 *hint_block);
1186/* tree-defrag.c */
1187int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
1188 struct btrfs_root *root, int cache_only);
1174#endif 1189#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 60db85bc4ba8..c948416fea3a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
273 struct buffer_head *buf) 273 struct buffer_head *buf)
274{ 274{
275 WARN_ON(atomic_read(&buf->b_count) == 0); 275 WARN_ON(atomic_read(&buf->b_count) == 0);
276 lock_buffer(buf);
276 clear_buffer_dirty(buf); 277 clear_buffer_dirty(buf);
278 unlock_buffer(buf);
277 return 0; 279 return 0;
278} 280}
279 281
@@ -294,6 +296,9 @@ static int __setup_root(int blocksize,
294 root->last_inode_alloc = 0; 296 root->last_inode_alloc = 0;
295 memset(&root->root_key, 0, sizeof(root->root_key)); 297 memset(&root->root_key, 0, sizeof(root->root_key));
296 memset(&root->root_item, 0, sizeof(root->root_item)); 298 memset(&root->root_item, 0, sizeof(root->root_item));
299 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
300 root->defrag_running = 0;
301 root->defrag_level = 0;
297 root->root_key.objectid = objectid; 302 root->root_key.objectid = objectid;
298 return 0; 303 return 0;
299} 304}
@@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root)
585 fs_info->closing = 1; 590 fs_info->closing = 1;
586 btrfs_transaction_flush_work(root); 591 btrfs_transaction_flush_work(root);
587 mutex_lock(&fs_info->fs_mutex); 592 mutex_lock(&fs_info->fs_mutex);
593 btrfs_defrag_dirty_roots(root->fs_info);
588 trans = btrfs_start_transaction(root, 1); 594 trans = btrfs_start_transaction(root, 1);
589 ret = btrfs_commit_transaction(trans, root); 595 ret = btrfs_commit_transaction(trans, root);
590 /* run commit again to drop the original snapshot */ 596 /* run commit again to drop the original snapshot */
@@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh)
616{ 622{
617 struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; 623 struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
618 u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); 624 u64 transid = btrfs_header_generation(btrfs_buffer_header(bh));
625
619 WARN_ON(!atomic_read(&bh->b_count)); 626 WARN_ON(!atomic_read(&bh->b_count));
627
620 if (transid != root->fs_info->generation) { 628 if (transid != root->fs_info->generation) {
621 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", 629 printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
622 (unsigned long long)bh->b_blocknr, 630 (unsigned long long)bh->b_blocknr,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5d4d5d8db8ef..26b8d3406491 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -23,7 +23,8 @@
23#include "transaction.h" 23#include "transaction.h"
24 24
25static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 25static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
26 *orig_root, u64 num_blocks, u64 search_start, 26 *orig_root, u64 num_blocks, u64 empty_size,
27 u64 search_start,
27 u64 search_end, u64 hint_block, 28 u64 search_end, u64 hint_block,
28 struct btrfs_key *ins, u64 exclude_start, 29 struct btrfs_key *ins, u64 exclude_start,
29 u64 exclude_nr, int data); 30 u64 exclude_nr, int data);
@@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
379 path = btrfs_alloc_path(); 380 path = btrfs_alloc_path();
380 if (!path) 381 if (!path)
381 return -ENOMEM; 382 return -ENOMEM;
382 ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 383 ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0,
383 (u64)-1, 0, &ins, 0, 0, 0); 384 (u64)-1, 0, &ins, 0, 0, 0);
384 if (ret) { 385 if (ret) {
385 btrfs_free_path(path); 386 btrfs_free_path(path);
@@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
533 struct btrfs_block_group_item *bi; 534 struct btrfs_block_group_item *bi;
534 struct btrfs_key ins; 535 struct btrfs_key ins;
535 536
536 ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 537 ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins,
537 0, 0, 0); 538 0, 0, 0);
538 /* FIXME, set bit to recalc cache groups on next mount */ 539 /* FIXME, set bit to recalc cache groups on next mount */
539 if (ret) 540 if (ret)
@@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
708static int try_remove_page(struct address_space *mapping, unsigned long index) 709static int try_remove_page(struct address_space *mapping, unsigned long index)
709{ 710{
710 int ret; 711 int ret;
712 return 0;
711 ret = invalidate_mapping_pages(mapping, index, index); 713 ret = invalidate_mapping_pages(mapping, index, index);
712 return ret; 714 return ret;
713} 715}
@@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
866 if (!path) 868 if (!path)
867 return -ENOMEM; 869 return -ENOMEM;
868 870
869 ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); 871 ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
870 if (ret) { 872 if (ret) {
871 btrfs_free_path(path); 873 btrfs_free_path(path);
872 return ret; 874 return ret;
@@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
983 * Any available blocks before search_start are skipped. 985 * Any available blocks before search_start are skipped.
984 */ 986 */
985static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 987static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
986 *orig_root, u64 num_blocks, u64 search_start, u64 988 *orig_root, u64 num_blocks, u64 empty_size,
987 search_end, u64 hint_block, 989 u64 search_start, u64 search_end, u64 hint_block,
988 struct btrfs_key *ins, u64 exclude_start, 990 struct btrfs_key *ins, u64 exclude_start,
989 u64 exclude_nr, int data) 991 u64 exclude_nr, int data)
990{ 992{
@@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1042 data, 1); 1044 data, 1);
1043 } 1045 }
1044 1046
1047 total_needed += empty_size;
1045 path = btrfs_alloc_path(); 1048 path = btrfs_alloc_path();
1046 1049
1047check_failed: 1050check_failed:
@@ -1157,9 +1160,11 @@ check_pending:
1157 goto error; 1160 goto error;
1158 } 1161 }
1159 search_start = orig_search_start; 1162 search_start = orig_search_start;
1160 if (wrapped) 1163 if (wrapped) {
1164 if (!full_scan)
1165 total_needed -= empty_size;
1161 full_scan = 1; 1166 full_scan = 1;
1162 else 1167 } else
1163 wrapped = 1; 1168 wrapped = 1;
1164 goto new_group; 1169 goto new_group;
1165 } 1170 }
@@ -1238,9 +1243,11 @@ new_group:
1238 ret = -ENOSPC; 1243 ret = -ENOSPC;
1239 goto error; 1244 goto error;
1240 } 1245 }
1241 if (wrapped) 1246 if (wrapped) {
1247 if (!full_scan)
1248 total_needed -= empty_size;
1242 full_scan = 1; 1249 full_scan = 1;
1243 else 1250 } else
1244 wrapped = 1; 1251 wrapped = 1;
1245 } 1252 }
1246 block_group = btrfs_lookup_block_group(info, search_start); 1253 block_group = btrfs_lookup_block_group(info, search_start);
@@ -1264,7 +1271,7 @@ error:
1264 */ 1271 */
1265int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1272int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1266 struct btrfs_root *root, u64 owner, 1273 struct btrfs_root *root, u64 owner,
1267 u64 num_blocks, u64 hint_block, 1274 u64 num_blocks, u64 empty_size, u64 hint_block,
1268 u64 search_end, struct btrfs_key *ins, int data) 1275 u64 search_end, struct btrfs_key *ins, int data)
1269{ 1276{
1270 int ret; 1277 int ret;
@@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1303 * in the correct block group. 1310 * in the correct block group.
1304 */ 1311 */
1305 if (data) { 1312 if (data) {
1306 ret = find_free_extent(trans, root, 0, 0, 1313 ret = find_free_extent(trans, root, 0, 0, 0,
1307 search_end, 0, &prealloc_key, 0, 0, 0); 1314 search_end, 0, &prealloc_key, 0, 0, 0);
1308 BUG_ON(ret); 1315 BUG_ON(ret);
1309 if (ret) 1316 if (ret)
@@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1313 } 1320 }
1314 1321
1315 /* do the real allocation */ 1322 /* do the real allocation */
1316 ret = find_free_extent(trans, root, num_blocks, search_start, 1323 ret = find_free_extent(trans, root, num_blocks, empty_size,
1317 search_end, hint_block, ins, 1324 search_start, search_end, hint_block, ins,
1318 exclude_start, exclude_nr, data); 1325 exclude_start, exclude_nr, data);
1319 BUG_ON(ret); 1326 BUG_ON(ret);
1320 if (ret) 1327 if (ret)
@@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1333 exclude_start = ins->objectid; 1340 exclude_start = ins->objectid;
1334 exclude_nr = ins->offset; 1341 exclude_nr = ins->offset;
1335 hint_block = exclude_start + exclude_nr; 1342 hint_block = exclude_start + exclude_nr;
1336 ret = find_free_extent(trans, root, 0, search_start, 1343 ret = find_free_extent(trans, root, 0, 0, search_start,
1337 search_end, hint_block, 1344 search_end, hint_block,
1338 &prealloc_key, exclude_start, 1345 &prealloc_key, exclude_start,
1339 exclude_nr, 0); 1346 exclude_nr, 0);
@@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1368 * returns the tree buffer or NULL. 1375 * returns the tree buffer or NULL.
1369 */ 1376 */
1370struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1377struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1371 struct btrfs_root *root, u64 hint) 1378 struct btrfs_root *root, u64 hint,
1379 u64 empty_size)
1372{ 1380{
1373 struct btrfs_key ins; 1381 struct btrfs_key ins;
1374 int ret; 1382 int ret;
1375 struct buffer_head *buf; 1383 struct buffer_head *buf;
1376 1384
1377 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1385 ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1378 1, hint, (unsigned long)-1, &ins, 0); 1386 1, empty_size, hint,
1387 (unsigned long)-1, &ins, 0);
1379 if (ret) { 1388 if (ret) {
1380 BUG_ON(ret > 0); 1389 BUG_ON(ret > 0);
1381 return ERR_PTR(ret); 1390 return ERR_PTR(ret);
@@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1385 btrfs_free_extent(trans, root, ins.objectid, 1, 0); 1394 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1386 return ERR_PTR(-ENOMEM); 1395 return ERR_PTR(-ENOMEM);
1387 } 1396 }
1397 WARN_ON(buffer_dirty(buf));
1388 set_buffer_uptodate(buf); 1398 set_buffer_uptodate(buf);
1389 set_buffer_checked(buf); 1399 set_buffer_checked(buf);
1390 set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); 1400 set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
@@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1591 struct btrfs_key key; 1601 struct btrfs_key key;
1592 struct btrfs_disk_key *found_key; 1602 struct btrfs_disk_key *found_key;
1593 struct btrfs_node *node; 1603 struct btrfs_node *node;
1604
1594 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1605 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1606 level = root_item->drop_level;
1607 path->lowest_level = level;
1595 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1608 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1596 if (ret < 0) { 1609 if (wret < 0) {
1597 ret = wret; 1610 ret = wret;
1598 goto out; 1611 goto out;
1599 } 1612 }
1600 level = root_item->drop_level;
1601 node = btrfs_buffer_node(path->nodes[level]); 1613 node = btrfs_buffer_node(path->nodes[level]);
1602 found_key = &node->ptrs[path->slots[level]].key; 1614 found_key = &node->ptrs[path->slots[level]].key;
1603 WARN_ON(memcmp(found_key, &root_item->drop_progress, 1615 WARN_ON(memcmp(found_key, &root_item->drop_progress,
@@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1617 ret = wret; 1629 ret = wret;
1618 num_walks++; 1630 num_walks++;
1619 if (num_walks > 10) { 1631 if (num_walks > 10) {
1620 struct btrfs_key key;
1621 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1622 ret = -EAGAIN; 1632 ret = -EAGAIN;
1623 get_bh(root->node); 1633 get_bh(root->node);
1624 break; 1634 break;
@@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1627 for (i = 0; i <= orig_level; i++) { 1637 for (i = 0; i <= orig_level; i++) {
1628 if (path->nodes[i]) { 1638 if (path->nodes[i]) {
1629 btrfs_block_release(root, path->nodes[i]); 1639 btrfs_block_release(root, path->nodes[i]);
1640 path->nodes[i] = 0;
1630 } 1641 }
1631 } 1642 }
1632out: 1643out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1fe38fe84150..00b118a2db69 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root,
512 if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || 512 if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
513 pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { 513 pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
514 err = btrfs_alloc_extent(trans, root, inode->i_ino, 514 err = btrfs_alloc_extent(trans, root, inode->i_ino,
515 num_blocks, hint_block, (u64)-1, 515 num_blocks, 0, hint_block, (u64)-1,
516 &ins, 1); 516 &ins, 1);
517 if (err) 517 if (err)
518 goto failed_truncate; 518 goto failed_truncate;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3889032fc449..12aa043b9f6f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
554 &alloc_hint); 554 &alloc_hint);
555 if (ret) 555 if (ret)
556 goto out; 556 goto out;
557 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 557 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
558 alloc_hint, (u64)-1, &ins, 1); 558 alloc_hint, (u64)-1, &ins, 1);
559 if (ret) 559 if (ret)
560 goto out; 560 goto out;
@@ -1360,7 +1360,7 @@ not_found:
1360 if (create & BTRFS_GET_BLOCK_CREATE) { 1360 if (create & BTRFS_GET_BLOCK_CREATE) {
1361 struct btrfs_key ins; 1361 struct btrfs_key ins;
1362 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1362 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1363 1, alloc_hint, (u64)-1, 1363 1, 0, alloc_hint, (u64)-1,
1364 &ins, 1); 1364 &ins, 1);
1365 if (ret) { 1365 if (ret) {
1366 err = ret; 1366 err = ret;
@@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1998 trans = btrfs_start_transaction(root, 1); 1998 trans = btrfs_start_transaction(root, 1);
1999 BUG_ON(!trans); 1999 BUG_ON(!trans);
2000 2000
2001 subvol = btrfs_alloc_free_block(trans, root, 0); 2001 subvol = btrfs_alloc_free_block(trans, root, 0, 0);
2002 if (IS_ERR(subvol)) 2002 if (IS_ERR(subvol))
2003 return PTR_ERR(subvol); 2003 return PTR_ERR(subvol);
2004 leaf = btrfs_buffer_leaf(subvol); 2004 leaf = btrfs_buffer_leaf(subvol);
@@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2159{ 2159{
2160 struct btrfs_root *root = BTRFS_I(inode)->root; 2160 struct btrfs_root *root = BTRFS_I(inode)->root;
2161 struct btrfs_ioctl_vol_args vol_args; 2161 struct btrfs_ioctl_vol_args vol_args;
2162 struct btrfs_trans_handle *trans;
2162 int ret = 0; 2163 int ret = 0;
2164 int err;
2163 struct btrfs_dir_item *di; 2165 struct btrfs_dir_item *di;
2164 int namelen; 2166 int namelen;
2165 struct btrfs_path *path; 2167 struct btrfs_path *path;
@@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2196 else 2198 else
2197 ret = create_snapshot(root, vol_args.name, namelen); 2199 ret = create_snapshot(root, vol_args.name, namelen);
2198 break; 2200 break;
2201
2202 case BTRFS_IOC_DEFRAG:
2203 mutex_lock(&root->fs_info->fs_mutex);
2204 trans = btrfs_start_transaction(root, 1);
2205 memset(&root->defrag_progress, 0,
2206 sizeof(root->defrag_progress));
2207 while (1) {
2208 root->defrag_running = 1;
2209 err = btrfs_defrag_leaves(trans, root, 0);
2210
2211 btrfs_end_transaction(trans, root);
2212 mutex_unlock(&root->fs_info->fs_mutex);
2213
2214 btrfs_btree_balance_dirty(root);
2215
2216 mutex_lock(&root->fs_info->fs_mutex);
2217 trans = btrfs_start_transaction(root, 1);
2218 if (err != -EAGAIN)
2219 break;
2220 }
2221 root->defrag_running = 0;
2222 btrfs_end_transaction(trans, root);
2223 mutex_unlock(&root->fs_info->fs_mutex);
2224 ret = 0;
2225 break;
2199 default: 2226 default:
2200 return -ENOTTY; 2227 return -ENOTTY;
2201 } 2228 }
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 23bed48c51a7..8bc47dec2865 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args {
28 28
29#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 29#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
30 struct btrfs_ioctl_vol_args) 30 struct btrfs_ioctl_vol_args)
31#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ 31#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
32 struct btrfs_ioctl_vol_args) 32 struct btrfs_ioctl_vol_args)
33#endif 33#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 498626470a04..338a7199363b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep;
29static struct workqueue_struct *trans_wq; 29static struct workqueue_struct *trans_wq;
30 30
31#define BTRFS_ROOT_TRANS_TAG 0 31#define BTRFS_ROOT_TRANS_TAG 0
32#define BTRFS_ROOT_DEFRAG_TAG 1
32 33
33static void put_transaction(struct btrfs_transaction *transaction) 34static void put_transaction(struct btrfs_transaction *transaction)
34{ 35{
@@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root)
69 return 0; 70 return 0;
70} 71}
71 72
73static int record_root_in_trans(struct btrfs_root *root)
74{
75 u64 running_trans_id = root->fs_info->running_transaction->transid;
76 if (root->ref_cows && root->last_trans < running_trans_id) {
77 WARN_ON(root == root->fs_info->extent_root);
78 if (root->root_item.refs != 0) {
79 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
80 (unsigned long)root->root_key.objectid,
81 BTRFS_ROOT_TRANS_TAG);
82 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
83 (unsigned long)root->root_key.objectid,
84 BTRFS_ROOT_DEFRAG_TAG);
85 root->commit_root = root->node;
86 get_bh(root->node);
87 } else {
88 WARN_ON(1);
89 }
90 root->last_trans = running_trans_id;
91 }
92 return 0;
93}
94
72struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 95struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
73 int num_blocks) 96 int num_blocks)
74{ 97{
75 struct btrfs_trans_handle *h = 98 struct btrfs_trans_handle *h =
76 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 99 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
77 int ret; 100 int ret;
78 u64 running_trans_id;
79 101
80 mutex_lock(&root->fs_info->trans_mutex); 102 mutex_lock(&root->fs_info->trans_mutex);
81 ret = join_transaction(root); 103 ret = join_transaction(root);
82 BUG_ON(ret); 104 BUG_ON(ret);
83 running_trans_id = root->fs_info->running_transaction->transid;
84 105
85 if (root != root->fs_info->tree_root && root->last_trans < 106 record_root_in_trans(root);
86 running_trans_id) { 107 h->transid = root->fs_info->running_transaction->transid;
87 WARN_ON(root == root->fs_info->extent_root);
88 WARN_ON(root->ref_cows != 1);
89 if (root->root_item.refs != 0) {
90 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
91 (unsigned long)root->root_key.objectid,
92 BTRFS_ROOT_TRANS_TAG);
93 root->commit_root = root->node;
94 get_bh(root->node);
95 } else {
96 WARN_ON(1);
97 }
98 }
99 root->last_trans = running_trans_id;
100 h->transid = running_trans_id;
101 h->transaction = root->fs_info->running_transaction; 108 h->transaction = root->fs_info->running_transaction;
102 h->blocks_reserved = num_blocks; 109 h->blocks_reserved = num_blocks;
103 h->blocks_used = 0; 110 h->blocks_used = 0;
@@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
155 gang[i]); 162 gang[i]);
156 if (!page) 163 if (!page)
157 continue; 164 continue;
165 if (PageWriteback(page)) {
166 if (PageDirty(page))
167 wait_on_page_writeback(page);
168 else {
169 unlock_page(page);
170 page_cache_release(page);
171 continue;
172 }
173 }
158 err = write_one_page(page, 0); 174 err = write_one_page(page, 0);
159 if (err) 175 if (err)
160 werr = err; 176 werr = err;
@@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
299 return err; 315 return err;
300} 316}
301 317
318int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
319{
320 struct btrfs_root *gang[1];
321 struct btrfs_root *root;
322 struct btrfs_root *tree_root = info->tree_root;
323 struct btrfs_trans_handle *trans;
324 int i;
325 int ret;
326 int err = 0;
327 u64 last = 0;
328
329 trans = btrfs_start_transaction(tree_root, 1);
330 while(1) {
331 ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
332 (void **)gang, last,
333 ARRAY_SIZE(gang),
334 BTRFS_ROOT_DEFRAG_TAG);
335 if (ret == 0)
336 break;
337 for (i = 0; i < ret; i++) {
338 root = gang[i];
339 last = root->root_key.objectid + 1;
340 radix_tree_tag_clear(&info->fs_roots_radix,
341 (unsigned long)root->root_key.objectid,
342 BTRFS_ROOT_DEFRAG_TAG);
343 if (root->defrag_running)
344 continue;
345
346 while (1) {
347 mutex_lock(&root->fs_info->trans_mutex);
348 record_root_in_trans(root);
349 mutex_unlock(&root->fs_info->trans_mutex);
350
351 root->defrag_running = 1;
352 err = btrfs_defrag_leaves(trans, root, 1);
353 btrfs_end_transaction(trans, tree_root);
354 mutex_unlock(&info->fs_mutex);
355
356 btrfs_btree_balance_dirty(root);
357
358 mutex_lock(&info->fs_mutex);
359 trans = btrfs_start_transaction(tree_root, 1);
360 if (err != -EAGAIN)
361 break;
362 }
363 root->defrag_running = 0;
364 }
365 }
366 btrfs_end_transaction(trans, tree_root);
367 return err;
368}
369
302static int drop_dirty_roots(struct btrfs_root *tree_root, 370static int drop_dirty_roots(struct btrfs_root *tree_root,
303 struct list_head *list) 371 struct list_head *list)
304{ 372{
@@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work)
475 goto out; 543 goto out;
476 } 544 }
477 mutex_unlock(&root->fs_info->trans_mutex); 545 mutex_unlock(&root->fs_info->trans_mutex);
546 btrfs_defrag_dirty_roots(root->fs_info);
478 trans = btrfs_start_transaction(root, 1); 547 trans = btrfs_start_transaction(root, 1);
479 ret = btrfs_commit_transaction(trans, root); 548 ret = btrfs_commit_transaction(trans, root);
480out: 549out:
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index ebf44f3e1118..8b2714e65562 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay);
70void btrfs_init_transaction_sys(void); 70void btrfs_init_transaction_sys(void);
71void btrfs_exit_transaction_sys(void); 71void btrfs_exit_transaction_sys(void);
72int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); 72int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list);
73int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
73 74
74#endif 75#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
new file mode 100644
index 000000000000..15d0a486fb59
--- /dev/null
+++ b/fs/btrfs/tree-defrag.c
@@ -0,0 +1,222 @@
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include "ctree.h"
21#include "disk-io.h"
22#include "print-tree.h"
23#include "transaction.h"
24
25static void reada_defrag(struct btrfs_root *root,
26 struct btrfs_node *node)
27{
28 int i;
29 u32 nritems;
30 u64 blocknr;
31 int ret;
32
33 nritems = btrfs_header_nritems(&node->header);
34 for (i = 0; i < nritems; i++) {
35 blocknr = btrfs_node_blockptr(node, i);
36 ret = readahead_tree_block(root, blocknr);
37 if (ret)
38 break;
39 }
40}
41
42static int defrag_walk_down(struct btrfs_trans_handle *trans,
43 struct btrfs_root *root,
44 struct btrfs_path *path, int *level,
45 int cache_only)
46{
47 struct buffer_head *next;
48 struct buffer_head *cur;
49 u64 blocknr;
50 int ret = 0;
51
52 WARN_ON(*level < 0);
53 WARN_ON(*level >= BTRFS_MAX_LEVEL);
54
55 while(*level > 0) {
56 WARN_ON(*level < 0);
57 WARN_ON(*level >= BTRFS_MAX_LEVEL);
58 cur = path->nodes[*level];
59
60 if (!cache_only && *level > 1 && path->slots[*level] == 0)
61 reada_defrag(root, btrfs_buffer_node(cur));
62
63 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
64 WARN_ON(1);
65
66 if (path->slots[*level] >=
67 btrfs_header_nritems(btrfs_buffer_header(cur)))
68 break;
69
70 if (*level == 1) {
71 ret = btrfs_realloc_node(trans, root,
72 path->nodes[*level],
73 cache_only);
74 break;
75 }
76 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
77 path->slots[*level]);
78
79 if (cache_only) {
80 next = btrfs_find_tree_block(root, blocknr);
81 if (!next || !buffer_uptodate(next) ||
82 buffer_locked(next)) {
83 brelse(next);
84 path->slots[*level]++;
85 continue;
86 }
87 } else {
88 next = read_tree_block(root, blocknr);
89 }
90 ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
91 path->slots[*level], &next);
92 BUG_ON(ret);
93 ret = btrfs_realloc_node(trans, root, next, cache_only);
94 BUG_ON(ret);
95 WARN_ON(*level <= 0);
96 if (path->nodes[*level-1])
97 btrfs_block_release(root, path->nodes[*level-1]);
98 path->nodes[*level-1] = next;
99 *level = btrfs_header_level(btrfs_buffer_header(next));
100 path->slots[*level] = 0;
101 }
102 WARN_ON(*level < 0);
103 WARN_ON(*level >= BTRFS_MAX_LEVEL);
104 btrfs_block_release(root, path->nodes[*level]);
105 path->nodes[*level] = NULL;
106 *level += 1;
107 WARN_ON(ret);
108 return 0;
109}
110
111static int defrag_walk_up(struct btrfs_trans_handle *trans,
112 struct btrfs_root *root,
113 struct btrfs_path *path, int *level,
114 int cache_only)
115{
116 int i;
117 int slot;
118 struct btrfs_node *node;
119
120 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
121 slot = path->slots[i];
122 if (slot < btrfs_header_nritems(
123 btrfs_buffer_header(path->nodes[i])) - 1) {
124 path->slots[i]++;
125 *level = i;
126 node = btrfs_buffer_node(path->nodes[i]);
127 WARN_ON(i == 0);
128 btrfs_disk_key_to_cpu(&root->defrag_progress,
129 &node->ptrs[path->slots[i]].key);
130 root->defrag_level = i;
131 return 0;
132 } else {
133 btrfs_block_release(root, path->nodes[*level]);
134 path->nodes[*level] = NULL;
135 *level = i + 1;
136 }
137 }
138 return 1;
139}
140
141int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
142 struct btrfs_root *root, int cache_only)
143{
144 struct btrfs_path *path = NULL;
145 struct buffer_head *tmp;
146 int ret = 0;
147 int wret;
148 int level;
149 int orig_level;
150 int i;
151 int num_runs = 0;
152
153 if (root->ref_cows == 0) {
154 goto out;
155 }
156 path = btrfs_alloc_path();
157 if (!path)
158 return -ENOMEM;
159
160 level = btrfs_header_level(btrfs_buffer_header(root->node));
161 orig_level = level;
162 if (level == 0) {
163 goto out;
164 }
165 if (root->defrag_progress.objectid == 0) {
166 get_bh(root->node);
167 ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
168 BUG_ON(ret);
169 ret = btrfs_realloc_node(trans, root, root->node, cache_only);
170 BUG_ON(ret);
171 path->nodes[level] = root->node;
172 path->slots[level] = 0;
173 } else {
174 level = root->defrag_level;
175 path->lowest_level = level;
176 wret = btrfs_search_slot(trans, root, &root->defrag_progress,
177 path, 0, 1);
178
179 if (wret < 0) {
180 ret = wret;
181 goto out;
182 }
183 while(level > 0 && !path->nodes[level])
184 level--;
185 if (!path->nodes[level]) {
186 ret = 0;
187 goto out;
188 }
189 }
190
191 while(1) {
192 wret = defrag_walk_down(trans, root, path, &level, cache_only);
193 if (wret > 0)
194 break;
195 if (wret < 0)
196 ret = wret;
197
198 wret = defrag_walk_up(trans, root, path, &level, cache_only);
199 if (wret > 0)
200 break;
201 if (wret < 0)
202 ret = wret;
203 if (num_runs++ > 8) {
204 ret = -EAGAIN;
205 break;
206 }
207 }
208 for (i = 0; i <= orig_level; i++) {
209 if (path->nodes[i]) {
210 btrfs_block_release(root, path->nodes[i]);
211 path->nodes[i] = 0;
212 }
213 }
214out:
215 if (path)
216 btrfs_free_path(path);
217 if (ret != -EAGAIN) {
218 memset(&root->defrag_progress, 0,
219 sizeof(root->defrag_progress));
220 }
221 return ret;
222}