aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-09-11 16:11:19 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-17 15:47:36 -0400
commit11833d66be94b514652466802100378046c16b72 (patch)
tree2b00b36d0aa42e9e10cecf3bf723eb70a607afec /fs
parent6e74057c4686dc12ea767b4bdc50a63876056e1c (diff)
Btrfs: improve async block group caching
This patch gets rid of two limitations of async block group caching. The old code delays handling pinned extents when block group is in caching. To allocate logged file extents, the old code need wait until block group is fully cached. To get rid of the limitations, This patch introduces a data structure to track the progress of caching. Base on the caching progress, we know which extents should be added to the free space cache when handling the pinned extents. The logged file extents are also handled in a similar way. This patch also changes how pinned extents are tracked. The old code uses one tree to track pinned extents, and copy the pinned extents tree at transaction commit time. This patch makes it use two trees to track pinned extents. One tree for extents that are pinned in the running transaction, one tree for extents that can be unpinned. At transaction commit time, we swap the two trees. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h29
-rw-r--r--fs/btrfs/disk-io.c7
-rw-r--r--fs/btrfs/extent-tree.c586
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/tree-log.c4
5 files changed, 382 insertions, 259 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 732d5b884aa7..3b6df7140575 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -726,6 +726,15 @@ enum btrfs_caching_type {
726 BTRFS_CACHE_FINISHED = 2, 726 BTRFS_CACHE_FINISHED = 2,
727}; 727};
728 728
729struct btrfs_caching_control {
730 struct list_head list;
731 struct mutex mutex;
732 wait_queue_head_t wait;
733 struct btrfs_block_group_cache *block_group;
734 u64 progress;
735 atomic_t count;
736};
737
729struct btrfs_block_group_cache { 738struct btrfs_block_group_cache {
730 struct btrfs_key key; 739 struct btrfs_key key;
731 struct btrfs_block_group_item item; 740 struct btrfs_block_group_item item;
@@ -742,8 +751,9 @@ struct btrfs_block_group_cache {
742 int dirty; 751 int dirty;
743 752
744 /* cache tracking stuff */ 753 /* cache tracking stuff */
745 wait_queue_head_t caching_q;
746 int cached; 754 int cached;
755 struct btrfs_caching_control *caching_ctl;
756 u64 last_byte_to_unpin;
747 757
748 struct btrfs_space_info *space_info; 758 struct btrfs_space_info *space_info;
749 759
@@ -788,7 +798,8 @@ struct btrfs_fs_info {
788 spinlock_t block_group_cache_lock; 798 spinlock_t block_group_cache_lock;
789 struct rb_root block_group_cache_tree; 799 struct rb_root block_group_cache_tree;
790 800
791 struct extent_io_tree pinned_extents; 801 struct extent_io_tree freed_extents[2];
802 struct extent_io_tree *pinned_extents;
792 803
793 /* logical->physical extent mapping */ 804 /* logical->physical extent mapping */
794 struct btrfs_mapping_tree mapping_tree; 805 struct btrfs_mapping_tree mapping_tree;
@@ -825,8 +836,6 @@ struct btrfs_fs_info {
825 struct mutex drop_mutex; 836 struct mutex drop_mutex;
826 struct mutex volume_mutex; 837 struct mutex volume_mutex;
827 struct mutex tree_reloc_mutex; 838 struct mutex tree_reloc_mutex;
828 struct rw_semaphore extent_commit_sem;
829
830 /* 839 /*
831 * this protects the ordered operations list only while we are 840 * this protects the ordered operations list only while we are
832 * processing all of the entries on it. This way we make 841 * processing all of the entries on it. This way we make
@@ -835,10 +844,12 @@ struct btrfs_fs_info {
835 * before jumping into the main commit. 844 * before jumping into the main commit.
836 */ 845 */
837 struct mutex ordered_operations_mutex; 846 struct mutex ordered_operations_mutex;
847 struct rw_semaphore extent_commit_sem;
838 848
839 struct list_head trans_list; 849 struct list_head trans_list;
840 struct list_head hashers; 850 struct list_head hashers;
841 struct list_head dead_roots; 851 struct list_head dead_roots;
852 struct list_head caching_block_groups;
842 853
843 atomic_t nr_async_submits; 854 atomic_t nr_async_submits;
844 atomic_t async_submit_draining; 855 atomic_t async_submit_draining;
@@ -1920,8 +1931,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1920int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1931int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1921 struct btrfs_root *root, unsigned long count); 1932 struct btrfs_root *root, unsigned long count);
1922int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1933int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1923int btrfs_update_pinned_extents(struct btrfs_root *root, 1934int btrfs_pin_extent(struct btrfs_root *root,
1924 u64 bytenr, u64 num, int pin); 1935 u64 bytenr, u64 num, int reserved);
1925int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1936int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1926 struct btrfs_root *root, struct extent_buffer *leaf); 1937 struct btrfs_root *root, struct extent_buffer *leaf);
1927int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1938int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -1971,9 +1982,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
1971 u64 root_objectid, u64 owner, u64 offset); 1982 u64 root_objectid, u64 owner, u64 offset);
1972 1983
1973int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 1984int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
1985int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
1986 struct btrfs_root *root);
1974int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1987int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1975 struct btrfs_root *root, 1988 struct btrfs_root *root);
1976 struct extent_io_tree *unpin);
1977int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1989int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1978 struct btrfs_root *root, 1990 struct btrfs_root *root,
1979 u64 bytenr, u64 num_bytes, u64 parent, 1991 u64 bytenr, u64 num_bytes, u64 parent,
@@ -2006,7 +2018,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2006 u64 bytes); 2018 u64 bytes);
2007void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2019void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2008 u64 bytes); 2020 u64 bytes);
2009void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
2010/* ctree.c */ 2021/* ctree.c */
2011int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2022int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2012 int level, int *slot); 2023 int level, int *slot);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 253da7e01ab3..16dae122dda4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1563,6 +1563,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1563 INIT_LIST_HEAD(&fs_info->hashers); 1563 INIT_LIST_HEAD(&fs_info->hashers);
1564 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1564 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1565 INIT_LIST_HEAD(&fs_info->ordered_operations); 1565 INIT_LIST_HEAD(&fs_info->ordered_operations);
1566 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1566 spin_lock_init(&fs_info->delalloc_lock); 1567 spin_lock_init(&fs_info->delalloc_lock);
1567 spin_lock_init(&fs_info->new_trans_lock); 1568 spin_lock_init(&fs_info->new_trans_lock);
1568 spin_lock_init(&fs_info->ref_cache_lock); 1569 spin_lock_init(&fs_info->ref_cache_lock);
@@ -1621,8 +1622,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1621 spin_lock_init(&fs_info->block_group_cache_lock); 1622 spin_lock_init(&fs_info->block_group_cache_lock);
1622 fs_info->block_group_cache_tree.rb_node = NULL; 1623 fs_info->block_group_cache_tree.rb_node = NULL;
1623 1624
1624 extent_io_tree_init(&fs_info->pinned_extents, 1625 extent_io_tree_init(&fs_info->freed_extents[0],
1625 fs_info->btree_inode->i_mapping, GFP_NOFS); 1626 fs_info->btree_inode->i_mapping, GFP_NOFS);
1627 extent_io_tree_init(&fs_info->freed_extents[1],
1628 fs_info->btree_inode->i_mapping, GFP_NOFS);
1629 fs_info->pinned_extents = &fs_info->freed_extents[0];
1626 fs_info->do_barriers = 1; 1630 fs_info->do_barriers = 1;
1627 1631
1628 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1632 BTRFS_I(fs_info->btree_inode)->root = tree_root;
@@ -2359,7 +2363,6 @@ int close_ctree(struct btrfs_root *root)
2359 free_extent_buffer(root->fs_info->csum_root->commit_root); 2363 free_extent_buffer(root->fs_info->csum_root->commit_root);
2360 2364
2361 btrfs_free_block_groups(root->fs_info); 2365 btrfs_free_block_groups(root->fs_info);
2362 btrfs_free_pinned_extents(root->fs_info);
2363 2366
2364 del_fs_roots(fs_info); 2367 del_fs_roots(fs_info);
2365 2368
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edd86ae9e149..9bcb9c09c3b8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -32,12 +32,12 @@
32#include "locking.h" 32#include "locking.h"
33#include "free-space-cache.h" 33#include "free-space-cache.h"
34 34
35static int update_reserved_extents(struct btrfs_root *root,
36 u64 bytenr, u64 num, int reserve);
37static int update_block_group(struct btrfs_trans_handle *trans, 35static int update_block_group(struct btrfs_trans_handle *trans,
38 struct btrfs_root *root, 36 struct btrfs_root *root,
39 u64 bytenr, u64 num_bytes, int alloc, 37 u64 bytenr, u64 num_bytes, int alloc,
40 int mark_free); 38 int mark_free);
39static int update_reserved_extents(struct btrfs_block_group_cache *cache,
40 u64 num_bytes, int reserve);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 42 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
57 u64 parent, u64 root_objectid, 57 u64 parent, u64 root_objectid,
58 u64 flags, struct btrfs_disk_key *key, 58 u64 flags, struct btrfs_disk_key *key,
59 int level, struct btrfs_key *ins); 59 int level, struct btrfs_key *ins);
60
61static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
62 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
63 u64 flags, int force); 62 u64 flags, int force);
63static int pin_down_bytes(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root,
65 struct btrfs_path *path,
66 u64 bytenr, u64 num_bytes,
67 int is_data, int reserved,
68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key);
64 71
65static noinline int 72static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache) 73block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
153 return ret; 160 return ret;
154} 161}
155 162
156/* 163static int add_excluded_extent(struct btrfs_root *root,
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't 164 u64 start, u64 num_bytes)
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
163{ 165{
164 u64 start, end, last = 0; 166 u64 end = start + num_bytes - 1;
165 int ret; 167 set_extent_bits(&root->fs_info->freed_extents[0],
168 start, end, EXTENT_UPTODATE, GFP_NOFS);
169 set_extent_bits(&root->fs_info->freed_extents[1],
170 start, end, EXTENT_UPTODATE, GFP_NOFS);
171 return 0;
172}
166 173
167 while (1) { 174static void free_excluded_extents(struct btrfs_root *root,
168 ret = find_first_extent_bit(&info->pinned_extents, last, 175 struct btrfs_block_group_cache *cache)
169 &start, &end, 176{
170 EXTENT_LOCKED|EXTENT_DIRTY); 177 u64 start, end;
171 if (ret)
172 break;
173 178
174 clear_extent_bits(&info->pinned_extents, start, end, 179 start = cache->key.objectid;
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 180 end = start + cache->key.offset - 1;
176 last = end+1; 181
177 } 182 clear_extent_bits(&root->fs_info->freed_extents[0],
183 start, end, EXTENT_UPTODATE, GFP_NOFS);
184 clear_extent_bits(&root->fs_info->freed_extents[1],
185 start, end, EXTENT_UPTODATE, GFP_NOFS);
178} 186}
179 187
180static int remove_sb_from_cache(struct btrfs_root *root, 188static int exclude_super_stripes(struct btrfs_root *root,
181 struct btrfs_block_group_cache *cache) 189 struct btrfs_block_group_cache *cache)
182{ 190{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 u64 bytenr; 191 u64 bytenr;
185 u64 *logical; 192 u64 *logical;
186 int stripe_len; 193 int stripe_len;
@@ -192,17 +199,41 @@ static int remove_sb_from_cache(struct btrfs_root *root,
192 cache->key.objectid, bytenr, 199 cache->key.objectid, bytenr,
193 0, &logical, &nr, &stripe_len); 200 0, &logical, &nr, &stripe_len);
194 BUG_ON(ret); 201 BUG_ON(ret);
202
195 while (nr--) { 203 while (nr--) {
196 try_lock_extent(&fs_info->pinned_extents, 204 ret = add_excluded_extent(root, logical[nr],
197 logical[nr], 205 stripe_len);
198 logical[nr] + stripe_len - 1, GFP_NOFS); 206 BUG_ON(ret);
199 } 207 }
208
200 kfree(logical); 209 kfree(logical);
201 } 210 }
202
203 return 0; 211 return 0;
204} 212}
205 213
214static struct btrfs_caching_control *
215get_caching_control(struct btrfs_block_group_cache *cache)
216{
217 struct btrfs_caching_control *ctl;
218
219 spin_lock(&cache->lock);
220 if (cache->cached != BTRFS_CACHE_STARTED) {
221 spin_unlock(&cache->lock);
222 return NULL;
223 }
224
225 ctl = cache->caching_ctl;
226 atomic_inc(&ctl->count);
227 spin_unlock(&cache->lock);
228 return ctl;
229}
230
231static void put_caching_control(struct btrfs_caching_control *ctl)
232{
233 if (atomic_dec_and_test(&ctl->count))
234 kfree(ctl);
235}
236
206/* 237/*
207 * this is only called by cache_block_group, since we could have freed extents 238 * this is only called by cache_block_group, since we could have freed extents
208 * we need to check the pinned_extents for any extents that can't be used yet 239 * we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +246,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
215 int ret; 246 int ret;
216 247
217 while (start < end) { 248 while (start < end) {
218 ret = find_first_extent_bit(&info->pinned_extents, start, 249 ret = find_first_extent_bit(info->pinned_extents, start,
219 &extent_start, &extent_end, 250 &extent_start, &extent_end,
220 EXTENT_DIRTY|EXTENT_LOCKED); 251 EXTENT_DIRTY | EXTENT_UPTODATE);
221 if (ret) 252 if (ret)
222 break; 253 break;
223 254
@@ -249,22 +280,24 @@ static int caching_kthread(void *data)
249{ 280{
250 struct btrfs_block_group_cache *block_group = data; 281 struct btrfs_block_group_cache *block_group = data;
251 struct btrfs_fs_info *fs_info = block_group->fs_info; 282 struct btrfs_fs_info *fs_info = block_group->fs_info;
252 u64 last = 0; 283 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
284 struct btrfs_root *extent_root = fs_info->extent_root;
253 struct btrfs_path *path; 285 struct btrfs_path *path;
254 int ret = 0;
255 struct btrfs_key key;
256 struct extent_buffer *leaf; 286 struct extent_buffer *leaf;
257 int slot; 287 struct btrfs_key key;
258 u64 total_found = 0; 288 u64 total_found = 0;
259 289 u64 last = 0;
260 BUG_ON(!fs_info); 290 u32 nritems;
291 int ret = 0;
261 292
262 path = btrfs_alloc_path(); 293 path = btrfs_alloc_path();
263 if (!path) 294 if (!path)
264 return -ENOMEM; 295 return -ENOMEM;
265 296
266 atomic_inc(&block_group->space_info->caching_threads); 297 exclude_super_stripes(extent_root, block_group);
298
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 299 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
300
268 /* 301 /*
269 * We don't want to deadlock with somebody trying to allocate a new 302 * We don't want to deadlock with somebody trying to allocate a new
270 * extent for the extent root while also trying to search the extent 303 * extent for the extent root while also trying to search the extent
@@ -277,74 +310,64 @@ static int caching_kthread(void *data)
277 310
278 key.objectid = last; 311 key.objectid = last;
279 key.offset = 0; 312 key.offset = 0;
280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 313 key.type = BTRFS_EXTENT_ITEM_KEY;
281again: 314again:
315 mutex_lock(&caching_ctl->mutex);
282 /* need to make sure the commit_root doesn't disappear */ 316 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem); 317 down_read(&fs_info->extent_commit_sem);
284 318
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 319 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
286 if (ret < 0) 320 if (ret < 0)
287 goto err; 321 goto err;
288 322
323 leaf = path->nodes[0];
324 nritems = btrfs_header_nritems(leaf);
325
289 while (1) { 326 while (1) {
290 smp_mb(); 327 smp_mb();
291 if (block_group->fs_info->closing > 1) { 328 if (fs_info->closing > 1) {
292 last = (u64)-1; 329 last = (u64)-1;
293 break; 330 break;
294 } 331 }
295 332
296 leaf = path->nodes[0]; 333 if (path->slots[0] < nritems) {
297 slot = path->slots[0]; 334 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
298 if (slot >= btrfs_header_nritems(leaf)) { 335 } else {
299 ret = btrfs_next_leaf(fs_info->extent_root, path); 336 ret = find_next_key(path, 0, &key);
300 if (ret < 0) 337 if (ret)
301 goto err;
302 else if (ret)
303 break; 338 break;
304 339
305 if (need_resched() || 340 caching_ctl->progress = last;
306 btrfs_transaction_in_commit(fs_info)) { 341 btrfs_release_path(extent_root, path);
307 leaf = path->nodes[0]; 342 up_read(&fs_info->extent_commit_sem);
308 343 mutex_unlock(&caching_ctl->mutex);
309 /* this shouldn't happen, but if the 344 if (btrfs_transaction_in_commit(fs_info))
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
320 btrfs_release_path(fs_info->extent_root, path);
321 up_read(&fs_info->extent_commit_sem);
322 schedule_timeout(1); 345 schedule_timeout(1);
323 goto again; 346 else
324 } 347 cond_resched();
348 goto again;
349 }
325 350
351 if (key.objectid < block_group->key.objectid) {
352 path->slots[0]++;
326 continue; 353 continue;
327 } 354 }
328 btrfs_item_key_to_cpu(leaf, &key, slot);
329 if (key.objectid < block_group->key.objectid)
330 goto next;
331 355
332 if (key.objectid >= block_group->key.objectid + 356 if (key.objectid >= block_group->key.objectid +
333 block_group->key.offset) 357 block_group->key.offset)
334 break; 358 break;
335 359
336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 360 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
337 total_found += add_new_free_space(block_group, 361 total_found += add_new_free_space(block_group,
338 fs_info, last, 362 fs_info, last,
339 key.objectid); 363 key.objectid);
340 last = key.objectid + key.offset; 364 last = key.objectid + key.offset;
341 }
342 365
343 if (total_found > (1024 * 1024 * 2)) { 366 if (total_found > (1024 * 1024 * 2)) {
344 total_found = 0; 367 total_found = 0;
345 wake_up(&block_group->caching_q); 368 wake_up(&caching_ctl->wait);
369 }
346 } 370 }
347next:
348 path->slots[0]++; 371 path->slots[0]++;
349 } 372 }
350 ret = 0; 373 ret = 0;
@@ -352,33 +375,65 @@ next:
352 total_found += add_new_free_space(block_group, fs_info, last, 375 total_found += add_new_free_space(block_group, fs_info, last,
353 block_group->key.objectid + 376 block_group->key.objectid +
354 block_group->key.offset); 377 block_group->key.offset);
378 caching_ctl->progress = (u64)-1;
355 379
356 spin_lock(&block_group->lock); 380 spin_lock(&block_group->lock);
381 block_group->caching_ctl = NULL;
357 block_group->cached = BTRFS_CACHE_FINISHED; 382 block_group->cached = BTRFS_CACHE_FINISHED;
358 spin_unlock(&block_group->lock); 383 spin_unlock(&block_group->lock);
359 384
360err: 385err:
361 btrfs_free_path(path); 386 btrfs_free_path(path);
362 up_read(&fs_info->extent_commit_sem); 387 up_read(&fs_info->extent_commit_sem);
363 atomic_dec(&block_group->space_info->caching_threads);
364 wake_up(&block_group->caching_q);
365 388
389 free_excluded_extents(extent_root, block_group);
390
391 mutex_unlock(&caching_ctl->mutex);
392 wake_up(&caching_ctl->wait);
393
394 put_caching_control(caching_ctl);
395 atomic_dec(&block_group->space_info->caching_threads);
366 return 0; 396 return 0;
367} 397}
368 398
369static int cache_block_group(struct btrfs_block_group_cache *cache) 399static int cache_block_group(struct btrfs_block_group_cache *cache)
370{ 400{
401 struct btrfs_fs_info *fs_info = cache->fs_info;
402 struct btrfs_caching_control *caching_ctl;
371 struct task_struct *tsk; 403 struct task_struct *tsk;
372 int ret = 0; 404 int ret = 0;
373 405
406 smp_mb();
407 if (cache->cached != BTRFS_CACHE_NO)
408 return 0;
409
410 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
411 BUG_ON(!caching_ctl);
412
413 INIT_LIST_HEAD(&caching_ctl->list);
414 mutex_init(&caching_ctl->mutex);
415 init_waitqueue_head(&caching_ctl->wait);
416 caching_ctl->block_group = cache;
417 caching_ctl->progress = cache->key.objectid;
418 /* one for caching kthread, one for caching block group list */
419 atomic_set(&caching_ctl->count, 2);
420
374 spin_lock(&cache->lock); 421 spin_lock(&cache->lock);
375 if (cache->cached != BTRFS_CACHE_NO) { 422 if (cache->cached != BTRFS_CACHE_NO) {
376 spin_unlock(&cache->lock); 423 spin_unlock(&cache->lock);
377 return ret; 424 kfree(caching_ctl);
425 return 0;
378 } 426 }
427 cache->caching_ctl = caching_ctl;
379 cache->cached = BTRFS_CACHE_STARTED; 428 cache->cached = BTRFS_CACHE_STARTED;
380 spin_unlock(&cache->lock); 429 spin_unlock(&cache->lock);
381 430
431 down_write(&fs_info->extent_commit_sem);
432 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
433 up_write(&fs_info->extent_commit_sem);
434
435 atomic_inc(&cache->space_info->caching_threads);
436
382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 437 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
383 cache->key.objectid); 438 cache->key.objectid);
384 if (IS_ERR(tsk)) { 439 if (IS_ERR(tsk)) {
@@ -1656,7 +1711,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1656 parent, ref_root, flags, 1711 parent, ref_root, flags,
1657 ref->objectid, ref->offset, 1712 ref->objectid, ref->offset,
1658 &ins, node->ref_mod); 1713 &ins, node->ref_mod);
1659 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1660 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1714 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1661 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1715 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1662 node->num_bytes, parent, 1716 node->num_bytes, parent,
@@ -1782,7 +1836,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1782 extent_op->flags_to_set, 1836 extent_op->flags_to_set,
1783 &extent_op->key, 1837 &extent_op->key,
1784 ref->level, &ins); 1838 ref->level, &ins);
1785 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1786 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1839 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1787 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1840 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1788 node->num_bytes, parent, ref_root, 1841 node->num_bytes, parent, ref_root,
@@ -1817,16 +1870,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1817 BUG_ON(extent_op); 1870 BUG_ON(extent_op);
1818 head = btrfs_delayed_node_to_head(node); 1871 head = btrfs_delayed_node_to_head(node);
1819 if (insert_reserved) { 1872 if (insert_reserved) {
1873 int mark_free = 0;
1874 struct extent_buffer *must_clean = NULL;
1875
1876 ret = pin_down_bytes(trans, root, NULL,
1877 node->bytenr, node->num_bytes,
1878 head->is_data, 1, &must_clean);
1879 if (ret > 0)
1880 mark_free = 1;
1881
1882 if (must_clean) {
1883 clean_tree_block(NULL, root, must_clean);
1884 btrfs_tree_unlock(must_clean);
1885 free_extent_buffer(must_clean);
1886 }
1820 if (head->is_data) { 1887 if (head->is_data) {
1821 ret = btrfs_del_csums(trans, root, 1888 ret = btrfs_del_csums(trans, root,
1822 node->bytenr, 1889 node->bytenr,
1823 node->num_bytes); 1890 node->num_bytes);
1824 BUG_ON(ret); 1891 BUG_ON(ret);
1825 } 1892 }
1826 btrfs_update_pinned_extents(root, node->bytenr, 1893 if (mark_free) {
1827 node->num_bytes, 1); 1894 ret = btrfs_free_reserved_extent(root,
1828 update_reserved_extents(root, node->bytenr, 1895 node->bytenr,
1829 node->num_bytes, 0); 1896 node->num_bytes);
1897 BUG_ON(ret);
1898 }
1830 } 1899 }
1831 mutex_unlock(&head->mutex); 1900 mutex_unlock(&head->mutex);
1832 return 0; 1901 return 0;
@@ -3008,10 +3077,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3008 num_bytes = min(total, cache->key.offset - byte_in_group); 3077 num_bytes = min(total, cache->key.offset - byte_in_group);
3009 if (alloc) { 3078 if (alloc) {
3010 old_val += num_bytes; 3079 old_val += num_bytes;
3080 btrfs_set_block_group_used(&cache->item, old_val);
3081 cache->reserved -= num_bytes;
3011 cache->space_info->bytes_used += num_bytes; 3082 cache->space_info->bytes_used += num_bytes;
3083 cache->space_info->bytes_reserved -= num_bytes;
3012 if (cache->ro) 3084 if (cache->ro)
3013 cache->space_info->bytes_readonly -= num_bytes; 3085 cache->space_info->bytes_readonly -= num_bytes;
3014 btrfs_set_block_group_used(&cache->item, old_val);
3015 spin_unlock(&cache->lock); 3086 spin_unlock(&cache->lock);
3016 spin_unlock(&cache->space_info->lock); 3087 spin_unlock(&cache->space_info->lock);
3017 } else { 3088 } else {
@@ -3056,127 +3127,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3056 return bytenr; 3127 return bytenr;
3057} 3128}
3058 3129
3059int btrfs_update_pinned_extents(struct btrfs_root *root, 3130/*
3060 u64 bytenr, u64 num, int pin) 3131 * this function must be called within transaction
3132 */
3133int btrfs_pin_extent(struct btrfs_root *root,
3134 u64 bytenr, u64 num_bytes, int reserved)
3061{ 3135{
3062 u64 len;
3063 struct btrfs_block_group_cache *cache;
3064 struct btrfs_fs_info *fs_info = root->fs_info; 3136 struct btrfs_fs_info *fs_info = root->fs_info;
3137 struct btrfs_block_group_cache *cache;
3065 3138
3066 if (pin) 3139 cache = btrfs_lookup_block_group(fs_info, bytenr);
3067 set_extent_dirty(&fs_info->pinned_extents, 3140 BUG_ON(!cache);
3068 bytenr, bytenr + num - 1, GFP_NOFS);
3069
3070 while (num > 0) {
3071 cache = btrfs_lookup_block_group(fs_info, bytenr);
3072 BUG_ON(!cache);
3073 len = min(num, cache->key.offset -
3074 (bytenr - cache->key.objectid));
3075 if (pin) {
3076 spin_lock(&cache->space_info->lock);
3077 spin_lock(&cache->lock);
3078 cache->pinned += len;
3079 cache->space_info->bytes_pinned += len;
3080 spin_unlock(&cache->lock);
3081 spin_unlock(&cache->space_info->lock);
3082 fs_info->total_pinned += len;
3083 } else {
3084 int unpin = 0;
3085 3141
3086 /* 3142 spin_lock(&cache->space_info->lock);
3087 * in order to not race with the block group caching, we 3143 spin_lock(&cache->lock);
3088 * only want to unpin the extent if we are cached. If 3144 cache->pinned += num_bytes;
3089 * we aren't cached, we want to start async caching this 3145 cache->space_info->bytes_pinned += num_bytes;
3090 * block group so we can free the extent the next time 3146 if (reserved) {
3091 * around. 3147 cache->reserved -= num_bytes;
3092 */ 3148 cache->space_info->bytes_reserved -= num_bytes;
3093 spin_lock(&cache->space_info->lock); 3149 }
3094 spin_lock(&cache->lock); 3150 spin_unlock(&cache->lock);
3095 unpin = (cache->cached == BTRFS_CACHE_FINISHED); 3151 spin_unlock(&cache->space_info->lock);
3096 if (likely(unpin)) {
3097 cache->pinned -= len;
3098 cache->space_info->bytes_pinned -= len;
3099 fs_info->total_pinned -= len;
3100 }
3101 spin_unlock(&cache->lock);
3102 spin_unlock(&cache->space_info->lock);
3103 3152
3104 if (likely(unpin)) 3153 btrfs_put_block_group(cache);
3105 clear_extent_dirty(&fs_info->pinned_extents,
3106 bytenr, bytenr + len -1,
3107 GFP_NOFS);
3108 else
3109 cache_block_group(cache);
3110 3154
3111 if (unpin) 3155 set_extent_dirty(fs_info->pinned_extents,
3112 btrfs_add_free_space(cache, bytenr, len); 3156 bytenr, bytenr + num_bytes - 1, GFP_NOFS);
3113 } 3157 return 0;
3114 btrfs_put_block_group(cache); 3158}
3115 bytenr += len; 3159
3116 num -= len; 3160static int update_reserved_extents(struct btrfs_block_group_cache *cache,
3161 u64 num_bytes, int reserve)
3162{
3163 spin_lock(&cache->space_info->lock);
3164 spin_lock(&cache->lock);
3165 if (reserve) {
3166 cache->reserved += num_bytes;
3167 cache->space_info->bytes_reserved += num_bytes;
3168 } else {
3169 cache->reserved -= num_bytes;
3170 cache->space_info->bytes_reserved -= num_bytes;
3117 } 3171 }
3172 spin_unlock(&cache->lock);
3173 spin_unlock(&cache->space_info->lock);
3118 return 0; 3174 return 0;
3119} 3175}
3120 3176
3121static int update_reserved_extents(struct btrfs_root *root, 3177int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
3122 u64 bytenr, u64 num, int reserve) 3178 struct btrfs_root *root)
3123{ 3179{
3124 u64 len;
3125 struct btrfs_block_group_cache *cache;
3126 struct btrfs_fs_info *fs_info = root->fs_info; 3180 struct btrfs_fs_info *fs_info = root->fs_info;
3181 struct btrfs_caching_control *next;
3182 struct btrfs_caching_control *caching_ctl;
3183 struct btrfs_block_group_cache *cache;
3127 3184
3128 while (num > 0) { 3185 down_write(&fs_info->extent_commit_sem);
3129 cache = btrfs_lookup_block_group(fs_info, bytenr);
3130 BUG_ON(!cache);
3131 len = min(num, cache->key.offset -
3132 (bytenr - cache->key.objectid));
3133 3186
3134 spin_lock(&cache->space_info->lock); 3187 list_for_each_entry_safe(caching_ctl, next,
3135 spin_lock(&cache->lock); 3188 &fs_info->caching_block_groups, list) {
3136 if (reserve) { 3189 cache = caching_ctl->block_group;
3137 cache->reserved += len; 3190 if (block_group_cache_done(cache)) {
3138 cache->space_info->bytes_reserved += len; 3191 cache->last_byte_to_unpin = (u64)-1;
3192 list_del_init(&caching_ctl->list);
3193 put_caching_control(caching_ctl);
3139 } else { 3194 } else {
3140 cache->reserved -= len; 3195 cache->last_byte_to_unpin = caching_ctl->progress;
3141 cache->space_info->bytes_reserved -= len;
3142 } 3196 }
3143 spin_unlock(&cache->lock);
3144 spin_unlock(&cache->space_info->lock);
3145 btrfs_put_block_group(cache);
3146 bytenr += len;
3147 num -= len;
3148 } 3197 }
3198
3199 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3200 fs_info->pinned_extents = &fs_info->freed_extents[1];
3201 else
3202 fs_info->pinned_extents = &fs_info->freed_extents[0];
3203
3204 up_write(&fs_info->extent_commit_sem);
3149 return 0; 3205 return 0;
3150} 3206}
3151 3207
3152int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3208static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3153{ 3209{
3154 u64 last = 0; 3210 struct btrfs_fs_info *fs_info = root->fs_info;
3155 u64 start; 3211 struct btrfs_block_group_cache *cache = NULL;
3156 u64 end; 3212 u64 len;
3157 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3158 int ret;
3159 3213
3160 while (1) { 3214 while (start <= end) {
3161 ret = find_first_extent_bit(pinned_extents, last, 3215 if (!cache ||
3162 &start, &end, EXTENT_DIRTY); 3216 start >= cache->key.objectid + cache->key.offset) {
3163 if (ret) 3217 if (cache)
3164 break; 3218 btrfs_put_block_group(cache);
3219 cache = btrfs_lookup_block_group(fs_info, start);
3220 BUG_ON(!cache);
3221 }
3222
3223 len = cache->key.objectid + cache->key.offset - start;
3224 len = min(len, end + 1 - start);
3225
3226 if (start < cache->last_byte_to_unpin) {
3227 len = min(len, cache->last_byte_to_unpin - start);
3228 btrfs_add_free_space(cache, start, len);
3229 }
3230
3231 spin_lock(&cache->space_info->lock);
3232 spin_lock(&cache->lock);
3233 cache->pinned -= len;
3234 cache->space_info->bytes_pinned -= len;
3235 spin_unlock(&cache->lock);
3236 spin_unlock(&cache->space_info->lock);
3165 3237
3166 set_extent_dirty(copy, start, end, GFP_NOFS); 3238 start += len;
3167 last = end + 1;
3168 } 3239 }
3240
3241 if (cache)
3242 btrfs_put_block_group(cache);
3169 return 0; 3243 return 0;
3170} 3244}
3171 3245
3172int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3246int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3173 struct btrfs_root *root, 3247 struct btrfs_root *root)
3174 struct extent_io_tree *unpin)
3175{ 3248{
3249 struct btrfs_fs_info *fs_info = root->fs_info;
3250 struct extent_io_tree *unpin;
3176 u64 start; 3251 u64 start;
3177 u64 end; 3252 u64 end;
3178 int ret; 3253 int ret;
3179 3254
3255 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
3256 unpin = &fs_info->freed_extents[1];
3257 else
3258 unpin = &fs_info->freed_extents[0];
3259
3180 while (1) { 3260 while (1) {
3181 ret = find_first_extent_bit(unpin, 0, &start, &end, 3261 ret = find_first_extent_bit(unpin, 0, &start, &end,
3182 EXTENT_DIRTY); 3262 EXTENT_DIRTY);
@@ -3185,10 +3265,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3185 3265
3186 ret = btrfs_discard_extent(root, start, end + 1 - start); 3266 ret = btrfs_discard_extent(root, start, end + 1 - start);
3187 3267
3188 /* unlocks the pinned mutex */
3189 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
3190 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3268 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3191 3269 unpin_extent_range(root, start, end);
3192 cond_resched(); 3270 cond_resched();
3193 } 3271 }
3194 3272
@@ -3198,7 +3276,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3198static int pin_down_bytes(struct btrfs_trans_handle *trans, 3276static int pin_down_bytes(struct btrfs_trans_handle *trans,
3199 struct btrfs_root *root, 3277 struct btrfs_root *root,
3200 struct btrfs_path *path, 3278 struct btrfs_path *path,
3201 u64 bytenr, u64 num_bytes, int is_data, 3279 u64 bytenr, u64 num_bytes,
3280 int is_data, int reserved,
3202 struct extent_buffer **must_clean) 3281 struct extent_buffer **must_clean)
3203{ 3282{
3204 int err = 0; 3283 int err = 0;
@@ -3230,15 +3309,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3230 } 3309 }
3231 free_extent_buffer(buf); 3310 free_extent_buffer(buf);
3232pinit: 3311pinit:
3233 btrfs_set_path_blocking(path); 3312 if (path)
3313 btrfs_set_path_blocking(path);
3234 /* unlocks the pinned mutex */ 3314 /* unlocks the pinned mutex */
3235 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3315 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
3236 3316
3237 BUG_ON(err < 0); 3317 BUG_ON(err < 0);
3238 return 0; 3318 return 0;
3239} 3319}
3240 3320
3241
3242static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3321static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3243 struct btrfs_root *root, 3322 struct btrfs_root *root,
3244 u64 bytenr, u64 num_bytes, u64 parent, 3323 u64 bytenr, u64 num_bytes, u64 parent,
@@ -3412,7 +3491,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3412 } 3491 }
3413 3492
3414 ret = pin_down_bytes(trans, root, path, bytenr, 3493 ret = pin_down_bytes(trans, root, path, bytenr,
3415 num_bytes, is_data, &must_clean); 3494 num_bytes, is_data, 0, &must_clean);
3416 if (ret > 0) 3495 if (ret > 0)
3417 mark_free = 1; 3496 mark_free = 1;
3418 BUG_ON(ret < 0); 3497 BUG_ON(ret < 0);
@@ -3543,8 +3622,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3543 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3622 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3544 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3623 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3545 /* unlocks the pinned mutex */ 3624 /* unlocks the pinned mutex */
3546 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3625 btrfs_pin_extent(root, bytenr, num_bytes, 1);
3547 update_reserved_extents(root, bytenr, num_bytes, 0);
3548 ret = 0; 3626 ret = 0;
3549 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3627 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
3550 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3628 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3584,19 +3662,33 @@ static noinline int
3584wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3662wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3585 u64 num_bytes) 3663 u64 num_bytes)
3586{ 3664{
3665 struct btrfs_caching_control *caching_ctl;
3587 DEFINE_WAIT(wait); 3666 DEFINE_WAIT(wait);
3588 3667
3589 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3668 caching_ctl = get_caching_control(cache);
3590 3669 if (!caching_ctl)
3591 if (block_group_cache_done(cache)) {
3592 finish_wait(&cache->caching_q, &wait);
3593 return 0; 3670 return 0;
3594 }
3595 schedule();
3596 finish_wait(&cache->caching_q, &wait);
3597 3671
3598 wait_event(cache->caching_q, block_group_cache_done(cache) || 3672 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
3599 (cache->free_space >= num_bytes)); 3673 (cache->free_space >= num_bytes));
3674
3675 put_caching_control(caching_ctl);
3676 return 0;
3677}
3678
3679static noinline int
3680wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
3681{
3682 struct btrfs_caching_control *caching_ctl;
3683 DEFINE_WAIT(wait);
3684
3685 caching_ctl = get_caching_control(cache);
3686 if (!caching_ctl)
3687 return 0;
3688
3689 wait_event(caching_ctl->wait, block_group_cache_done(cache));
3690
3691 put_caching_control(caching_ctl);
3600 return 0; 3692 return 0;
3601} 3693}
3602 3694
@@ -3880,6 +3972,8 @@ checks:
3880 search_start - offset); 3972 search_start - offset);
3881 BUG_ON(offset > search_start); 3973 BUG_ON(offset > search_start);
3882 3974
3975 update_reserved_extents(block_group, num_bytes, 1);
3976
3883 /* we are all good, lets return */ 3977 /* we are all good, lets return */
3884 break; 3978 break;
3885loop: 3979loop:
@@ -3972,12 +4066,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
3972 up_read(&info->groups_sem); 4066 up_read(&info->groups_sem);
3973} 4067}
3974 4068
3975static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4069int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3976 struct btrfs_root *root, 4070 struct btrfs_root *root,
3977 u64 num_bytes, u64 min_alloc_size, 4071 u64 num_bytes, u64 min_alloc_size,
3978 u64 empty_size, u64 hint_byte, 4072 u64 empty_size, u64 hint_byte,
3979 u64 search_end, struct btrfs_key *ins, 4073 u64 search_end, struct btrfs_key *ins,
3980 u64 data) 4074 u64 data)
3981{ 4075{
3982 int ret; 4076 int ret;
3983 u64 search_start = 0; 4077 u64 search_start = 0;
@@ -4043,25 +4137,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4043 ret = btrfs_discard_extent(root, start, len); 4137 ret = btrfs_discard_extent(root, start, len);
4044 4138
4045 btrfs_add_free_space(cache, start, len); 4139 btrfs_add_free_space(cache, start, len);
4140 update_reserved_extents(cache, len, 0);
4046 btrfs_put_block_group(cache); 4141 btrfs_put_block_group(cache);
4047 update_reserved_extents(root, start, len, 0);
4048
4049 return ret;
4050}
4051
4052int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
4053 struct btrfs_root *root,
4054 u64 num_bytes, u64 min_alloc_size,
4055 u64 empty_size, u64 hint_byte,
4056 u64 search_end, struct btrfs_key *ins,
4057 u64 data)
4058{
4059 int ret;
4060 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
4061 empty_size, hint_byte, search_end, ins,
4062 data);
4063 if (!ret)
4064 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4065 4142
4066 return ret; 4143 return ret;
4067} 4144}
@@ -4222,15 +4299,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4222{ 4299{
4223 int ret; 4300 int ret;
4224 struct btrfs_block_group_cache *block_group; 4301 struct btrfs_block_group_cache *block_group;
4302 struct btrfs_caching_control *caching_ctl;
4303 u64 start = ins->objectid;
4304 u64 num_bytes = ins->offset;
4225 4305
4226 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4306 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4227 cache_block_group(block_group); 4307 cache_block_group(block_group);
4228 wait_event(block_group->caching_q, 4308 caching_ctl = get_caching_control(block_group);
4229 block_group_cache_done(block_group));
4230 4309
4231 ret = btrfs_remove_free_space(block_group, ins->objectid, 4310 if (!caching_ctl) {
4232 ins->offset); 4311 BUG_ON(!block_group_cache_done(block_group));
4233 BUG_ON(ret); 4312 ret = btrfs_remove_free_space(block_group, start, num_bytes);
4313 BUG_ON(ret);
4314 } else {
4315 mutex_lock(&caching_ctl->mutex);
4316
4317 if (start >= caching_ctl->progress) {
4318 ret = add_excluded_extent(root, start, num_bytes);
4319 BUG_ON(ret);
4320 } else if (start + num_bytes <= caching_ctl->progress) {
4321 ret = btrfs_remove_free_space(block_group,
4322 start, num_bytes);
4323 BUG_ON(ret);
4324 } else {
4325 num_bytes = caching_ctl->progress - start;
4326 ret = btrfs_remove_free_space(block_group,
4327 start, num_bytes);
4328 BUG_ON(ret);
4329
4330 start = caching_ctl->progress;
4331 num_bytes = ins->objectid + ins->offset -
4332 caching_ctl->progress;
4333 ret = add_excluded_extent(root, start, num_bytes);
4334 BUG_ON(ret);
4335 }
4336
4337 mutex_unlock(&caching_ctl->mutex);
4338 put_caching_control(caching_ctl);
4339 }
4340
4341 update_reserved_extents(block_group, ins->offset, 1);
4234 btrfs_put_block_group(block_group); 4342 btrfs_put_block_group(block_group);
4235 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4343 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4236 0, owner, offset, ins, 1); 4344 0, owner, offset, ins, 1);
@@ -4254,9 +4362,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4254 int ret; 4362 int ret;
4255 u64 flags = 0; 4363 u64 flags = 0;
4256 4364
4257 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4365 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4258 empty_size, hint_byte, search_end, 4366 empty_size, hint_byte, search_end,
4259 ins, 0); 4367 ins, 0);
4260 if (ret) 4368 if (ret)
4261 return ret; 4369 return ret;
4262 4370
@@ -4267,7 +4375,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4267 } else 4375 } else
4268 BUG_ON(parent > 0); 4376 BUG_ON(parent > 0);
4269 4377
4270 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4271 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4378 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4272 struct btrfs_delayed_extent_op *extent_op; 4379 struct btrfs_delayed_extent_op *extent_op;
4273 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4380 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -7164,8 +7271,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7164{ 7271{
7165 struct btrfs_block_group_cache *block_group; 7272 struct btrfs_block_group_cache *block_group;
7166 struct btrfs_space_info *space_info; 7273 struct btrfs_space_info *space_info;
7274 struct btrfs_caching_control *caching_ctl;
7167 struct rb_node *n; 7275 struct rb_node *n;
7168 7276
7277 down_write(&info->extent_commit_sem);
7278 while (!list_empty(&info->caching_block_groups)) {
7279 caching_ctl = list_entry(info->caching_block_groups.next,
7280 struct btrfs_caching_control, list);
7281 list_del(&caching_ctl->list);
7282 put_caching_control(caching_ctl);
7283 }
7284 up_write(&info->extent_commit_sem);
7285
7169 spin_lock(&info->block_group_cache_lock); 7286 spin_lock(&info->block_group_cache_lock);
7170 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7287 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
7171 block_group = rb_entry(n, struct btrfs_block_group_cache, 7288 block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7179,8 +7296,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7179 up_write(&block_group->space_info->groups_sem); 7296 up_write(&block_group->space_info->groups_sem);
7180 7297
7181 if (block_group->cached == BTRFS_CACHE_STARTED) 7298 if (block_group->cached == BTRFS_CACHE_STARTED)
7182 wait_event(block_group->caching_q, 7299 wait_block_group_cache_done(block_group);
7183 block_group_cache_done(block_group));
7184 7300
7185 btrfs_remove_free_space_cache(block_group); 7301 btrfs_remove_free_space_cache(block_group);
7186 7302
@@ -7250,7 +7366,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7250 spin_lock_init(&cache->lock); 7366 spin_lock_init(&cache->lock);
7251 spin_lock_init(&cache->tree_lock); 7367 spin_lock_init(&cache->tree_lock);
7252 cache->fs_info = info; 7368 cache->fs_info = info;
7253 init_waitqueue_head(&cache->caching_q);
7254 INIT_LIST_HEAD(&cache->list); 7369 INIT_LIST_HEAD(&cache->list);
7255 INIT_LIST_HEAD(&cache->cluster_list); 7370 INIT_LIST_HEAD(&cache->cluster_list);
7256 7371
@@ -7272,8 +7387,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7272 cache->flags = btrfs_block_group_flags(&cache->item); 7387 cache->flags = btrfs_block_group_flags(&cache->item);
7273 cache->sectorsize = root->sectorsize; 7388 cache->sectorsize = root->sectorsize;
7274 7389
7275 remove_sb_from_cache(root, cache);
7276
7277 /* 7390 /*
7278 * check for two cases, either we are full, and therefore 7391 * check for two cases, either we are full, and therefore
7279 * don't need to bother with the caching work since we won't 7392 * don't need to bother with the caching work since we won't
@@ -7282,13 +7395,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7282 * time, particularly in the full case. 7395 * time, particularly in the full case.
7283 */ 7396 */
7284 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7397 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
7398 cache->last_byte_to_unpin = (u64)-1;
7285 cache->cached = BTRFS_CACHE_FINISHED; 7399 cache->cached = BTRFS_CACHE_FINISHED;
7286 } else if (btrfs_block_group_used(&cache->item) == 0) { 7400 } else if (btrfs_block_group_used(&cache->item) == 0) {
7401 exclude_super_stripes(root, cache);
7402 cache->last_byte_to_unpin = (u64)-1;
7287 cache->cached = BTRFS_CACHE_FINISHED; 7403 cache->cached = BTRFS_CACHE_FINISHED;
7288 add_new_free_space(cache, root->fs_info, 7404 add_new_free_space(cache, root->fs_info,
7289 found_key.objectid, 7405 found_key.objectid,
7290 found_key.objectid + 7406 found_key.objectid +
7291 found_key.offset); 7407 found_key.offset);
7408 free_excluded_extents(root, cache);
7292 } 7409 }
7293 7410
7294 ret = update_space_info(info, cache->flags, found_key.offset, 7411 ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7345,7 +7462,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7345 atomic_set(&cache->count, 1); 7462 atomic_set(&cache->count, 1);
7346 spin_lock_init(&cache->lock); 7463 spin_lock_init(&cache->lock);
7347 spin_lock_init(&cache->tree_lock); 7464 spin_lock_init(&cache->tree_lock);
7348 init_waitqueue_head(&cache->caching_q);
7349 INIT_LIST_HEAD(&cache->list); 7465 INIT_LIST_HEAD(&cache->list);
7350 INIT_LIST_HEAD(&cache->cluster_list); 7466 INIT_LIST_HEAD(&cache->cluster_list);
7351 7467
@@ -7354,12 +7470,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7354 cache->flags = type; 7470 cache->flags = type;
7355 btrfs_set_block_group_flags(&cache->item, type); 7471 btrfs_set_block_group_flags(&cache->item, type);
7356 7472
7473 cache->last_byte_to_unpin = (u64)-1;
7357 cache->cached = BTRFS_CACHE_FINISHED; 7474 cache->cached = BTRFS_CACHE_FINISHED;
7358 remove_sb_from_cache(root, cache); 7475 exclude_super_stripes(root, cache);
7359 7476
7360 add_new_free_space(cache, root->fs_info, chunk_offset, 7477 add_new_free_space(cache, root->fs_info, chunk_offset,
7361 chunk_offset + size); 7478 chunk_offset + size);
7362 7479
7480 free_excluded_extents(root, cache);
7481
7363 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7482 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7364 &cache->space_info); 7483 &cache->space_info);
7365 BUG_ON(ret); 7484 BUG_ON(ret);
@@ -7428,8 +7547,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7428 up_write(&block_group->space_info->groups_sem); 7547 up_write(&block_group->space_info->groups_sem);
7429 7548
7430 if (block_group->cached == BTRFS_CACHE_STARTED) 7549 if (block_group->cached == BTRFS_CACHE_STARTED)
7431 wait_event(block_group->caching_q, 7550 wait_block_group_cache_done(block_group);
7432 block_group_cache_done(block_group));
7433 7551
7434 btrfs_remove_free_space_cache(block_group); 7552 btrfs_remove_free_space_cache(block_group);
7435 7553
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cdbb5022da52..6ed6186f51cd 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -874,7 +874,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
874 unsigned long timeout = 1; 874 unsigned long timeout = 1;
875 struct btrfs_transaction *cur_trans; 875 struct btrfs_transaction *cur_trans;
876 struct btrfs_transaction *prev_trans = NULL; 876 struct btrfs_transaction *prev_trans = NULL;
877 struct extent_io_tree *pinned_copy;
878 DEFINE_WAIT(wait); 877 DEFINE_WAIT(wait);
879 int ret; 878 int ret;
880 int should_grow = 0; 879 int should_grow = 0;
@@ -915,13 +914,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
915 return 0; 914 return 0;
916 } 915 }
917 916
918 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
919 if (!pinned_copy)
920 return -ENOMEM;
921
922 extent_io_tree_init(pinned_copy,
923 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
924
925 trans->transaction->in_commit = 1; 917 trans->transaction->in_commit = 1;
926 trans->transaction->blocked = 1; 918 trans->transaction->blocked = 1;
927 if (cur_trans->list.prev != &root->fs_info->trans_list) { 919 if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -1019,6 +1011,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1019 ret = commit_cowonly_roots(trans, root); 1011 ret = commit_cowonly_roots(trans, root);
1020 BUG_ON(ret); 1012 BUG_ON(ret);
1021 1013
1014 btrfs_prepare_extent_commit(trans, root);
1015
1022 cur_trans = root->fs_info->running_transaction; 1016 cur_trans = root->fs_info->running_transaction;
1023 spin_lock(&root->fs_info->new_trans_lock); 1017 spin_lock(&root->fs_info->new_trans_lock);
1024 root->fs_info->running_transaction = NULL; 1018 root->fs_info->running_transaction = NULL;
@@ -1042,8 +1036,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1042 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 1036 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
1043 sizeof(root->fs_info->super_copy)); 1037 sizeof(root->fs_info->super_copy));
1044 1038
1045 btrfs_copy_pinned(root, pinned_copy);
1046
1047 trans->transaction->blocked = 0; 1039 trans->transaction->blocked = 0;
1048 1040
1049 wake_up(&root->fs_info->transaction_wait); 1041 wake_up(&root->fs_info->transaction_wait);
@@ -1059,8 +1051,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1059 */ 1051 */
1060 mutex_unlock(&root->fs_info->tree_log_mutex); 1052 mutex_unlock(&root->fs_info->tree_log_mutex);
1061 1053
1062 btrfs_finish_extent_commit(trans, root, pinned_copy); 1054 btrfs_finish_extent_commit(trans, root);
1063 kfree(pinned_copy);
1064 1055
1065 /* do the directory inserts of any pending snapshot creations */ 1056 /* do the directory inserts of any pending snapshot creations */
1066 finish_pending_snapshots(trans, root->fs_info); 1057 finish_pending_snapshots(trans, root->fs_info);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8661a7381b39..f4a7b62f9bea 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log,
263 struct walk_control *wc, u64 gen) 263 struct walk_control *wc, u64 gen)
264{ 264{
265 if (wc->pin) 265 if (wc->pin)
266 btrfs_update_pinned_extents(log->fs_info->extent_root, 266 btrfs_pin_extent(log->fs_info->extent_root,
267 eb->start, eb->len, 1); 267 eb->start, eb->len, 0);
268 268
269 if (btrfs_buffer_uptodate(eb, gen)) { 269 if (btrfs_buffer_uptodate(eb, gen)) {
270 if (wc->write) 270 if (wc->write)