aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h22
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c471
-rw-r--r--fs/btrfs/free-space-cache.c42
-rw-r--r--fs/btrfs/transaction.c23
-rw-r--r--fs/btrfs/tree-log.c2
6 files changed, 439 insertions, 124 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0cbf3491bb7c..42b03c4ee494 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -691,6 +691,7 @@ struct btrfs_space_info {
691 struct list_head block_groups; 691 struct list_head block_groups;
692 spinlock_t lock; 692 spinlock_t lock;
693 struct rw_semaphore groups_sem; 693 struct rw_semaphore groups_sem;
694 atomic_t caching_threads;
694}; 695};
695 696
696/* 697/*
@@ -721,11 +722,17 @@ struct btrfs_free_cluster {
721 struct list_head block_group_list; 722 struct list_head block_group_list;
722}; 723};
723 724
725enum btrfs_caching_type {
726 BTRFS_CACHE_NO = 0,
727 BTRFS_CACHE_STARTED = 1,
728 BTRFS_CACHE_FINISHED = 2,
729};
730
724struct btrfs_block_group_cache { 731struct btrfs_block_group_cache {
725 struct btrfs_key key; 732 struct btrfs_key key;
726 struct btrfs_block_group_item item; 733 struct btrfs_block_group_item item;
734 struct btrfs_fs_info *fs_info;
727 spinlock_t lock; 735 spinlock_t lock;
728 struct mutex cache_mutex;
729 u64 pinned; 736 u64 pinned;
730 u64 reserved; 737 u64 reserved;
731 u64 flags; 738 u64 flags;
@@ -733,15 +740,19 @@ struct btrfs_block_group_cache {
733 int extents_thresh; 740 int extents_thresh;
734 int free_extents; 741 int free_extents;
735 int total_bitmaps; 742 int total_bitmaps;
736 int cached;
737 int ro; 743 int ro;
738 int dirty; 744 int dirty;
739 745
746 /* cache tracking stuff */
747 wait_queue_head_t caching_q;
748 int cached;
749
740 struct btrfs_space_info *space_info; 750 struct btrfs_space_info *space_info;
741 751
742 /* free space cache stuff */ 752 /* free space cache stuff */
743 spinlock_t tree_lock; 753 spinlock_t tree_lock;
744 struct rb_root free_space_offset; 754 struct rb_root free_space_offset;
755 u64 free_space;
745 756
746 /* block group cache stuff */ 757 /* block group cache stuff */
747 struct rb_node cache_node; 758 struct rb_node cache_node;
@@ -834,6 +845,7 @@ struct btrfs_fs_info {
834 atomic_t async_submit_draining; 845 atomic_t async_submit_draining;
835 atomic_t nr_async_bios; 846 atomic_t nr_async_bios;
836 atomic_t async_delalloc_pages; 847 atomic_t async_delalloc_pages;
848 atomic_t async_caching_threads;
837 849
838 /* 850 /*
839 * this is used by the balancing code to wait for all the pending 851 * this is used by the balancing code to wait for all the pending
@@ -950,6 +962,9 @@ struct btrfs_root {
950 /* the node lock is held while changing the node pointer */ 962 /* the node lock is held while changing the node pointer */
951 spinlock_t node_lock; 963 spinlock_t node_lock;
952 964
965 /* taken when updating the commit root */
966 struct rw_semaphore commit_root_sem;
967
953 struct extent_buffer *commit_root; 968 struct extent_buffer *commit_root;
954 struct btrfs_root *log_root; 969 struct btrfs_root *log_root;
955 struct btrfs_root *reloc_root; 970 struct btrfs_root *reloc_root;
@@ -1911,7 +1926,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1911 struct btrfs_root *root, unsigned long count); 1926 struct btrfs_root *root, unsigned long count);
1912int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1927int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1913int btrfs_update_pinned_extents(struct btrfs_root *root, 1928int btrfs_update_pinned_extents(struct btrfs_root *root,
1914 u64 bytenr, u64 num, int pin); 1929 u64 bytenr, u64 num, int pin, int mark_free);
1915int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1930int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1916 struct btrfs_root *root, struct extent_buffer *leaf); 1931 struct btrfs_root *root, struct extent_buffer *leaf);
1917int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1932int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -1996,6 +2011,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1996 u64 bytes); 2011 u64 bytes);
1997void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2012void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1998 u64 bytes); 2013 u64 bytes);
2014void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info);
1999/* ctree.c */ 2015/* ctree.c */
2000int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2016int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2001 int level, int *slot); 2017 int level, int *slot);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 55d9d188e693..ec2c915f7f4a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -907,6 +907,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
907 spin_lock_init(&root->inode_lock); 907 spin_lock_init(&root->inode_lock);
908 mutex_init(&root->objectid_mutex); 908 mutex_init(&root->objectid_mutex);
909 mutex_init(&root->log_mutex); 909 mutex_init(&root->log_mutex);
910 init_rwsem(&root->commit_root_sem);
910 init_waitqueue_head(&root->log_writer_wait); 911 init_waitqueue_head(&root->log_writer_wait);
911 init_waitqueue_head(&root->log_commit_wait[0]); 912 init_waitqueue_head(&root->log_commit_wait[0]);
912 init_waitqueue_head(&root->log_commit_wait[1]); 913 init_waitqueue_head(&root->log_commit_wait[1]);
@@ -1566,6 +1567,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1566 atomic_set(&fs_info->async_delalloc_pages, 0); 1567 atomic_set(&fs_info->async_delalloc_pages, 0);
1567 atomic_set(&fs_info->async_submit_draining, 0); 1568 atomic_set(&fs_info->async_submit_draining, 0);
1568 atomic_set(&fs_info->nr_async_bios, 0); 1569 atomic_set(&fs_info->nr_async_bios, 0);
1570 atomic_set(&fs_info->async_caching_threads, 0);
1569 fs_info->sb = sb; 1571 fs_info->sb = sb;
1570 fs_info->max_extent = (u64)-1; 1572 fs_info->max_extent = (u64)-1;
1571 fs_info->max_inline = 8192 * 1024; 1573 fs_info->max_inline = 8192 * 1024;
@@ -2337,6 +2339,7 @@ int close_ctree(struct btrfs_root *root)
2337 free_extent_buffer(root->fs_info->csum_root->commit_root); 2339 free_extent_buffer(root->fs_info->csum_root->commit_root);
2338 2340
2339 btrfs_free_block_groups(root->fs_info); 2341 btrfs_free_block_groups(root->fs_info);
2342 btrfs_free_super_mirror_extents(root->fs_info);
2340 2343
2341 del_fs_roots(fs_info); 2344 del_fs_roots(fs_info);
2342 2345
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 98697be6bdde..9a489cc89fd3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -21,6 +21,7 @@
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
24#include "compat.h" 25#include "compat.h"
25#include "hash.h" 26#include "hash.h"
26#include "ctree.h" 27#include "ctree.h"
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 struct btrfs_root *extent_root, u64 alloc_bytes,
62 u64 flags, int force); 63 u64 flags, int force);
63 64
65static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache)
67{
68 smp_mb();
69 return cache->cached == BTRFS_CACHE_FINISHED;
70}
71
64static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 72static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
65{ 73{
66 return (cache->flags & bits) == bits; 74 return (cache->flags & bits) == bits;
@@ -145,21 +153,64 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
145 return ret; 153 return ret;
146} 154}
147 155
156void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info)
157{
158 u64 start, end, last = 0;
159 int ret;
160
161 while (1) {
162 ret = find_first_extent_bit(&info->pinned_extents, last,
163 &start, &end, EXTENT_LOCKED);
164 if (ret)
165 break;
166
167 unlock_extent(&info->pinned_extents, start, end, GFP_NOFS);
168 last = end+1;
169 }
170}
171
172static int remove_sb_from_cache(struct btrfs_root *root,
173 struct btrfs_block_group_cache *cache)
174{
175 struct btrfs_fs_info *fs_info = root->fs_info;
176 u64 bytenr;
177 u64 *logical;
178 int stripe_len;
179 int i, nr, ret;
180
181 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
182 bytenr = btrfs_sb_offset(i);
183 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
184 cache->key.objectid, bytenr,
185 0, &logical, &nr, &stripe_len);
186 BUG_ON(ret);
187 while (nr--) {
188 try_lock_extent(&fs_info->pinned_extents,
189 logical[nr],
190 logical[nr] + stripe_len - 1, GFP_NOFS);
191 }
192 kfree(logical);
193 }
194
195 return 0;
196}
197
148/* 198/*
149 * this is only called by cache_block_group, since we could have freed extents 199 * this is only called by cache_block_group, since we could have freed extents
150 * we need to check the pinned_extents for any extents that can't be used yet 200 * we need to check the pinned_extents for any extents that can't be used yet
151 * since their free space will be released as soon as the transaction commits. 201 * since their free space will be released as soon as the transaction commits.
152 */ 202 */
153static int add_new_free_space(struct btrfs_block_group_cache *block_group, 203static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
154 struct btrfs_fs_info *info, u64 start, u64 end) 204 struct btrfs_fs_info *info, u64 start, u64 end)
155{ 205{
156 u64 extent_start, extent_end, size; 206 u64 extent_start, extent_end, size, total_added = 0;
157 int ret; 207 int ret;
158 208
159 while (start < end) { 209 while (start < end) {
160 ret = find_first_extent_bit(&info->pinned_extents, start, 210 ret = find_first_extent_bit(&info->pinned_extents, start,
161 &extent_start, &extent_end, 211 &extent_start, &extent_end,
162 EXTENT_DIRTY); 212 EXTENT_DIRTY|EXTENT_LOCKED|
213 EXTENT_DELALLOC);
163 if (ret) 214 if (ret)
164 break; 215 break;
165 216
@@ -167,6 +218,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
167 start = extent_end + 1; 218 start = extent_end + 1;
168 } else if (extent_start > start && extent_start < end) { 219 } else if (extent_start > start && extent_start < end) {
169 size = extent_start - start; 220 size = extent_start - start;
221 total_added += size;
170 ret = btrfs_add_free_space(block_group, start, 222 ret = btrfs_add_free_space(block_group, start,
171 size); 223 size);
172 BUG_ON(ret); 224 BUG_ON(ret);
@@ -178,84 +230,139 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
178 230
179 if (start < end) { 231 if (start < end) {
180 size = end - start; 232 size = end - start;
233 total_added += size;
181 ret = btrfs_add_free_space(block_group, start, size); 234 ret = btrfs_add_free_space(block_group, start, size);
182 BUG_ON(ret); 235 BUG_ON(ret);
183 } 236 }
184 237
185 return 0; 238 return total_added;
186} 239}
187 240
188static int remove_sb_from_cache(struct btrfs_root *root, 241DEFINE_MUTEX(discard_mutex);
189 struct btrfs_block_group_cache *cache) 242
243/*
244 * if async kthreads are running when we cross transactions, we mark any pinned
245 * extents with EXTENT_DELALLOC and then let the caching kthreads clean up those
246 * extents when they are done. Also we run this from btrfs_finish_extent_commit
247 * in case there were some pinned extents that were missed because we had
248 * already cached that block group.
249 */
250static void btrfs_discard_pinned_extents(struct btrfs_fs_info *fs_info,
251 struct btrfs_block_group_cache *cache)
190{ 252{
191 u64 bytenr; 253 u64 start, end, last;
192 u64 *logical; 254 int ret;
193 int stripe_len;
194 int i, nr, ret;
195 255
196 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 256 if (!cache)
197 bytenr = btrfs_sb_offset(i); 257 last = 0;
198 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 258 else
199 cache->key.objectid, bytenr, 0, 259 last = cache->key.objectid;
200 &logical, &nr, &stripe_len); 260
201 BUG_ON(ret); 261 mutex_lock(&discard_mutex);
202 while (nr--) { 262 while (1) {
203 btrfs_remove_free_space(cache, logical[nr], 263 ret = find_first_extent_bit(&fs_info->pinned_extents, last,
204 stripe_len); 264 &start, &end, EXTENT_DELALLOC);
265 if (ret)
266 break;
267
268 if (cache && start >= cache->key.objectid + cache->key.offset)
269 break;
270
271
272 if (!cache) {
273 cache = btrfs_lookup_block_group(fs_info, start);
274 BUG_ON(!cache);
275
276 start = max(start, cache->key.objectid);
277 end = min(end, cache->key.objectid + cache->key.offset - 1);
278
279 if (block_group_cache_done(cache))
280 btrfs_add_free_space(cache, start,
281 end - start + 1);
282 cache = NULL;
283 } else {
284 start = max(start, cache->key.objectid);
285 end = min(end, cache->key.objectid + cache->key.offset - 1);
286 btrfs_add_free_space(cache, start, end - start + 1);
287 }
288
289 clear_extent_bits(&fs_info->pinned_extents, start, end,
290 EXTENT_DELALLOC, GFP_NOFS);
291 last = end + 1;
292
293 if (need_resched()) {
294 mutex_unlock(&discard_mutex);
295 cond_resched();
296 mutex_lock(&discard_mutex);
205 } 297 }
206 kfree(logical);
207 } 298 }
208 return 0; 299 mutex_unlock(&discard_mutex);
209} 300}
210 301
211static int cache_block_group(struct btrfs_root *root, 302static int caching_kthread(void *data)
212 struct btrfs_block_group_cache *block_group)
213{ 303{
304 struct btrfs_block_group_cache *block_group = data;
305 struct btrfs_fs_info *fs_info = block_group->fs_info;
306 u64 last = 0;
214 struct btrfs_path *path; 307 struct btrfs_path *path;
215 int ret = 0; 308 int ret = 0;
216 struct btrfs_key key; 309 struct btrfs_key key;
217 struct extent_buffer *leaf; 310 struct extent_buffer *leaf;
218 int slot; 311 int slot;
219 u64 last; 312 u64 total_found = 0;
220
221 if (!block_group)
222 return 0;
223 313
224 root = root->fs_info->extent_root; 314 BUG_ON(!fs_info);
225
226 if (block_group->cached)
227 return 0;
228 315
229 path = btrfs_alloc_path(); 316 path = btrfs_alloc_path();
230 if (!path) 317 if (!path)
231 return -ENOMEM; 318 return -ENOMEM;
232 319
233 path->reada = 2; 320 atomic_inc(&fs_info->async_caching_threads);
321 atomic_inc(&block_group->space_info->caching_threads);
322 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
323again:
324 /* need to make sure the commit_root doesn't disappear */
325 down_read(&fs_info->extent_root->commit_root_sem);
326
234 /* 327 /*
235 * we get into deadlocks with paths held by callers of this function. 328 * We don't want to deadlock with somebody trying to allocate a new
236 * since the alloc_mutex is protecting things right now, just 329 * extent for the extent root while also trying to search the extent
237 * skip the locking here 330 * root to add free space. So we skip locking and search the commit
331 * root, since its read-only
238 */ 332 */
239 path->skip_locking = 1; 333 path->skip_locking = 1;
240 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 334 path->search_commit_root = 1;
335 path->reada = 2;
336
241 key.objectid = last; 337 key.objectid = last;
242 key.offset = 0; 338 key.offset = 0;
243 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 339 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 340 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
245 if (ret < 0) 341 if (ret < 0)
246 goto err; 342 goto err;
247 343
248 while (1) { 344 while (1) {
345 smp_mb();
346 if (block_group->fs_info->closing)
347 break;
348
249 leaf = path->nodes[0]; 349 leaf = path->nodes[0];
250 slot = path->slots[0]; 350 slot = path->slots[0];
251 if (slot >= btrfs_header_nritems(leaf)) { 351 if (slot >= btrfs_header_nritems(leaf)) {
252 ret = btrfs_next_leaf(root, path); 352 ret = btrfs_next_leaf(fs_info->extent_root, path);
253 if (ret < 0) 353 if (ret < 0)
254 goto err; 354 goto err;
255 if (ret == 0) 355 else if (ret)
256 continue;
257 else
258 break; 356 break;
357
358 if (need_resched()) {
359 btrfs_release_path(fs_info->extent_root, path);
360 up_read(&fs_info->extent_root->commit_root_sem);
361 cond_resched();
362 goto again;
363 }
364
365 continue;
259 } 366 }
260 btrfs_item_key_to_cpu(leaf, &key, slot); 367 btrfs_item_key_to_cpu(leaf, &key, slot);
261 if (key.objectid < block_group->key.objectid) 368 if (key.objectid < block_group->key.objectid)
@@ -266,24 +373,63 @@ static int cache_block_group(struct btrfs_root *root,
266 break; 373 break;
267 374
268 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 375 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
269 add_new_free_space(block_group, root->fs_info, last, 376 total_found += add_new_free_space(block_group,
270 key.objectid); 377 fs_info, last,
271 378 key.objectid);
272 last = key.objectid + key.offset; 379 last = key.objectid + key.offset;
273 } 380 }
381
382 if (total_found > (1024 * 1024 * 2)) {
383 total_found = 0;
384 wake_up(&block_group->caching_q);
385 }
274next: 386next:
275 path->slots[0]++; 387 path->slots[0]++;
276 } 388 }
389 ret = 0;
277 390
278 add_new_free_space(block_group, root->fs_info, last, 391 total_found += add_new_free_space(block_group, fs_info, last,
279 block_group->key.objectid + 392 block_group->key.objectid +
280 block_group->key.offset); 393 block_group->key.offset);
394
395 spin_lock(&block_group->lock);
396 block_group->cached = BTRFS_CACHE_FINISHED;
397 spin_unlock(&block_group->lock);
281 398
282 block_group->cached = 1;
283 remove_sb_from_cache(root, block_group);
284 ret = 0;
285err: 399err:
286 btrfs_free_path(path); 400 btrfs_free_path(path);
401 up_read(&fs_info->extent_root->commit_root_sem);
402 atomic_dec(&fs_info->async_caching_threads);
403 atomic_dec(&block_group->space_info->caching_threads);
404 wake_up(&block_group->caching_q);
405
406 if (!ret)
407 btrfs_discard_pinned_extents(fs_info, block_group);
408
409 return 0;
410}
411
412static int cache_block_group(struct btrfs_block_group_cache *cache)
413{
414 struct task_struct *tsk;
415 int ret = 0;
416
417 spin_lock(&cache->lock);
418 if (cache->cached != BTRFS_CACHE_NO) {
419 spin_unlock(&cache->lock);
420 return ret;
421 }
422 cache->cached = BTRFS_CACHE_STARTED;
423 spin_unlock(&cache->lock);
424
425 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
426 cache->key.objectid);
427 if (IS_ERR(tsk)) {
428 ret = PTR_ERR(tsk);
429 printk(KERN_ERR "error running thread %d\n", ret);
430 BUG();
431 }
432
287 return ret; 433 return ret;
288} 434}
289 435
@@ -1721,7 +1867,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1721 BUG_ON(ret); 1867 BUG_ON(ret);
1722 } 1868 }
1723 btrfs_update_pinned_extents(root, node->bytenr, 1869 btrfs_update_pinned_extents(root, node->bytenr,
1724 node->num_bytes, 1); 1870 node->num_bytes, 1, 0);
1725 update_reserved_extents(root, node->bytenr, 1871 update_reserved_extents(root, node->bytenr,
1726 node->num_bytes, 0); 1872 node->num_bytes, 0);
1727 } 1873 }
@@ -2496,6 +2642,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2496 found->force_alloc = 0; 2642 found->force_alloc = 0;
2497 *space_info = found; 2643 *space_info = found;
2498 list_add_rcu(&found->list, &info->space_info); 2644 list_add_rcu(&found->list, &info->space_info);
2645 atomic_set(&found->caching_threads, 0);
2499 return 0; 2646 return 0;
2500} 2647}
2501 2648
@@ -2953,7 +3100,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2953} 3100}
2954 3101
2955int btrfs_update_pinned_extents(struct btrfs_root *root, 3102int btrfs_update_pinned_extents(struct btrfs_root *root,
2956 u64 bytenr, u64 num, int pin) 3103 u64 bytenr, u64 num, int pin, int mark_free)
2957{ 3104{
2958 u64 len; 3105 u64 len;
2959 struct btrfs_block_group_cache *cache; 3106 struct btrfs_block_group_cache *cache;
@@ -2988,7 +3135,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
2988 spin_unlock(&cache->lock); 3135 spin_unlock(&cache->lock);
2989 spin_unlock(&cache->space_info->lock); 3136 spin_unlock(&cache->space_info->lock);
2990 fs_info->total_pinned -= len; 3137 fs_info->total_pinned -= len;
2991 if (cache->cached) 3138 if (block_group_cache_done(cache) && mark_free)
2992 btrfs_add_free_space(cache, bytenr, len); 3139 btrfs_add_free_space(cache, bytenr, len);
2993 } 3140 }
2994 btrfs_put_block_group(cache); 3141 btrfs_put_block_group(cache);
@@ -3034,14 +3181,27 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
3034 u64 last = 0; 3181 u64 last = 0;
3035 u64 start; 3182 u64 start;
3036 u64 end; 3183 u64 end;
3184 bool caching_kthreads = false;
3037 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 3185 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
3038 int ret; 3186 int ret;
3039 3187
3188 if (atomic_read(&root->fs_info->async_caching_threads))
3189 caching_kthreads = true;
3190
3040 while (1) { 3191 while (1) {
3041 ret = find_first_extent_bit(pinned_extents, last, 3192 ret = find_first_extent_bit(pinned_extents, last,
3042 &start, &end, EXTENT_DIRTY); 3193 &start, &end, EXTENT_DIRTY);
3043 if (ret) 3194 if (ret)
3044 break; 3195 break;
3196
3197 /*
3198 * we need to make sure that the pinned extents don't go away
3199 * while we are caching block groups
3200 */
3201 if (unlikely(caching_kthreads))
3202 set_extent_delalloc(pinned_extents, start, end,
3203 GFP_NOFS);
3204
3045 set_extent_dirty(copy, start, end, GFP_NOFS); 3205 set_extent_dirty(copy, start, end, GFP_NOFS);
3046 last = end + 1; 3206 last = end + 1;
3047 } 3207 }
@@ -3055,6 +3215,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3055 u64 start; 3215 u64 start;
3056 u64 end; 3216 u64 end;
3057 int ret; 3217 int ret;
3218 int mark_free = 1;
3219
3220 ret = find_first_extent_bit(&root->fs_info->pinned_extents, 0,
3221 &start, &end, EXTENT_DELALLOC);
3222 if (!ret)
3223 mark_free = 0;
3058 3224
3059 while (1) { 3225 while (1) {
3060 ret = find_first_extent_bit(unpin, 0, &start, &end, 3226 ret = find_first_extent_bit(unpin, 0, &start, &end,
@@ -3065,11 +3231,16 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3065 ret = btrfs_discard_extent(root, start, end + 1 - start); 3231 ret = btrfs_discard_extent(root, start, end + 1 - start);
3066 3232
3067 /* unlocks the pinned mutex */ 3233 /* unlocks the pinned mutex */
3068 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 3234 btrfs_update_pinned_extents(root, start, end + 1 - start, 0,
3235 mark_free);
3069 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3236 clear_extent_dirty(unpin, start, end, GFP_NOFS);
3070 3237
3071 cond_resched(); 3238 cond_resched();
3072 } 3239 }
3240
3241 if (unlikely(!mark_free))
3242 btrfs_discard_pinned_extents(root->fs_info, NULL);
3243
3073 return ret; 3244 return ret;
3074} 3245}
3075 3246
@@ -3110,7 +3281,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3110pinit: 3281pinit:
3111 btrfs_set_path_blocking(path); 3282 btrfs_set_path_blocking(path);
3112 /* unlocks the pinned mutex */ 3283 /* unlocks the pinned mutex */
3113 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3284 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0);
3114 3285
3115 BUG_ON(err < 0); 3286 BUG_ON(err < 0);
3116 return 0; 3287 return 0;
@@ -3421,7 +3592,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
3421 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3592 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
3422 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3593 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
3423 /* unlocks the pinned mutex */ 3594 /* unlocks the pinned mutex */
3424 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3595 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0);
3425 update_reserved_extents(root, bytenr, num_bytes, 0); 3596 update_reserved_extents(root, bytenr, num_bytes, 0);
3426 ret = 0; 3597 ret = 0;
3427 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3598 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
@@ -3448,6 +3619,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
3448} 3619}
3449 3620
3450/* 3621/*
3622 * when we wait for progress in the block group caching, its because
3623 * our allocation attempt failed at least once. So, we must sleep
3624 * and let some progress happen before we try again.
3625 *
3626 * This function will sleep at least once waiting for new free space to
3627 * show up, and then it will check the block group free space numbers
3628 * for our min num_bytes. Another option is to have it go ahead
3629 * and look in the rbtree for a free extent of a given size, but this
3630 * is a good start.
3631 */
3632static noinline int
3633wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3634 u64 num_bytes)
3635{
3636 DEFINE_WAIT(wait);
3637
3638 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
3639
3640 if (block_group_cache_done(cache)) {
3641 finish_wait(&cache->caching_q, &wait);
3642 return 0;
3643 }
3644 schedule();
3645 finish_wait(&cache->caching_q, &wait);
3646
3647 wait_event(cache->caching_q, block_group_cache_done(cache) ||
3648 (cache->free_space >= num_bytes));
3649 return 0;
3650}
3651
3652enum btrfs_loop_type {
3653 LOOP_CACHED_ONLY = 0,
3654 LOOP_CACHING_NOWAIT = 1,
3655 LOOP_CACHING_WAIT = 2,
3656 LOOP_ALLOC_CHUNK = 3,
3657 LOOP_NO_EMPTY_SIZE = 4,
3658};
3659
3660/*
3451 * walks the btree of allocated extents and find a hole of a given size. 3661 * walks the btree of allocated extents and find a hole of a given size.
3452 * The key ins is changed to record the hole: 3662 * The key ins is changed to record the hole:
3453 * ins->objectid == block start 3663 * ins->objectid == block start
@@ -3472,6 +3682,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3472 struct btrfs_space_info *space_info; 3682 struct btrfs_space_info *space_info;
3473 int last_ptr_loop = 0; 3683 int last_ptr_loop = 0;
3474 int loop = 0; 3684 int loop = 0;
3685 bool found_uncached_bg = false;
3475 3686
3476 WARN_ON(num_bytes < root->sectorsize); 3687 WARN_ON(num_bytes < root->sectorsize);
3477 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3688 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3503,15 +3714,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3503 search_start = max(search_start, first_logical_byte(root, 0)); 3714 search_start = max(search_start, first_logical_byte(root, 0));
3504 search_start = max(search_start, hint_byte); 3715 search_start = max(search_start, hint_byte);
3505 3716
3506 if (!last_ptr) { 3717 if (!last_ptr)
3507 empty_cluster = 0; 3718 empty_cluster = 0;
3508 loop = 1;
3509 }
3510 3719
3511 if (search_start == hint_byte) { 3720 if (search_start == hint_byte) {
3512 block_group = btrfs_lookup_block_group(root->fs_info, 3721 block_group = btrfs_lookup_block_group(root->fs_info,
3513 search_start); 3722 search_start);
3514 if (block_group && block_group_bits(block_group, data)) { 3723 /*
3724 * we don't want to use the block group if it doesn't match our
3725 * allocation bits, or if its not cached.
3726 */
3727 if (block_group && block_group_bits(block_group, data) &&
3728 block_group_cache_done(block_group)) {
3515 down_read(&space_info->groups_sem); 3729 down_read(&space_info->groups_sem);
3516 if (list_empty(&block_group->list) || 3730 if (list_empty(&block_group->list) ||
3517 block_group->ro) { 3731 block_group->ro) {
@@ -3534,21 +3748,35 @@ search:
3534 down_read(&space_info->groups_sem); 3748 down_read(&space_info->groups_sem);
3535 list_for_each_entry(block_group, &space_info->block_groups, list) { 3749 list_for_each_entry(block_group, &space_info->block_groups, list) {
3536 u64 offset; 3750 u64 offset;
3751 int cached;
3537 3752
3538 atomic_inc(&block_group->count); 3753 atomic_inc(&block_group->count);
3539 search_start = block_group->key.objectid; 3754 search_start = block_group->key.objectid;
3540 3755
3541have_block_group: 3756have_block_group:
3542 if (unlikely(!block_group->cached)) { 3757 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
3543 mutex_lock(&block_group->cache_mutex); 3758 /*
3544 ret = cache_block_group(root, block_group); 3759 * we want to start caching kthreads, but not too many
3545 mutex_unlock(&block_group->cache_mutex); 3760 * right off the bat so we don't overwhelm the system,
3546 if (ret) { 3761 * so only start them if there are less than 2 and we're
3547 btrfs_put_block_group(block_group); 3762 * in the initial allocation phase.
3548 break; 3763 */
3764 if (loop > LOOP_CACHING_NOWAIT ||
3765 atomic_read(&space_info->caching_threads) < 2) {
3766 ret = cache_block_group(block_group);
3767 BUG_ON(ret);
3549 } 3768 }
3550 } 3769 }
3551 3770
3771 cached = block_group_cache_done(block_group);
3772 if (unlikely(!cached)) {
3773 found_uncached_bg = true;
3774
3775 /* if we only want cached bgs, loop */
3776 if (loop == LOOP_CACHED_ONLY)
3777 goto loop;
3778 }
3779
3552 if (unlikely(block_group->ro)) 3780 if (unlikely(block_group->ro))
3553 goto loop; 3781 goto loop;
3554 3782
@@ -3627,14 +3855,21 @@ refill_cluster:
3627 spin_unlock(&last_ptr->refill_lock); 3855 spin_unlock(&last_ptr->refill_lock);
3628 goto checks; 3856 goto checks;
3629 } 3857 }
3858 } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
3859 spin_unlock(&last_ptr->refill_lock);
3860
3861 wait_block_group_cache_progress(block_group,
3862 num_bytes + empty_cluster + empty_size);
3863 goto have_block_group;
3630 } 3864 }
3865
3631 /* 3866 /*
3632 * at this point we either didn't find a cluster 3867 * at this point we either didn't find a cluster
3633 * or we weren't able to allocate a block from our 3868 * or we weren't able to allocate a block from our
3634 * cluster. Free the cluster we've been trying 3869 * cluster. Free the cluster we've been trying
3635 * to use, and go to the next block group 3870 * to use, and go to the next block group
3636 */ 3871 */
3637 if (loop < 2) { 3872 if (loop < LOOP_NO_EMPTY_SIZE) {
3638 btrfs_return_cluster_to_free_space(NULL, 3873 btrfs_return_cluster_to_free_space(NULL,
3639 last_ptr); 3874 last_ptr);
3640 spin_unlock(&last_ptr->refill_lock); 3875 spin_unlock(&last_ptr->refill_lock);
@@ -3645,8 +3880,15 @@ refill_cluster:
3645 3880
3646 offset = btrfs_find_space_for_alloc(block_group, search_start, 3881 offset = btrfs_find_space_for_alloc(block_group, search_start,
3647 num_bytes, empty_size); 3882 num_bytes, empty_size);
3648 if (!offset) 3883 if (!offset && (cached || (!cached &&
3884 loop == LOOP_CACHING_NOWAIT))) {
3649 goto loop; 3885 goto loop;
3886 } else if (!offset && (!cached &&
3887 loop > LOOP_CACHING_NOWAIT)) {
3888 wait_block_group_cache_progress(block_group,
3889 num_bytes + empty_size);
3890 goto have_block_group;
3891 }
3650checks: 3892checks:
3651 search_start = stripe_align(root, offset); 3893 search_start = stripe_align(root, offset);
3652 /* move on to the next group */ 3894 /* move on to the next group */
@@ -3694,13 +3936,26 @@ loop:
3694 } 3936 }
3695 up_read(&space_info->groups_sem); 3937 up_read(&space_info->groups_sem);
3696 3938
3697 /* loop == 0, try to find a clustered alloc in every block group 3939 /* LOOP_CACHED_ONLY, only search fully cached block groups
3698 * loop == 1, try again after forcing a chunk allocation 3940 * LOOP_CACHING_NOWAIT, search partially cached block groups, but
3699 * loop == 2, set empty_size and empty_cluster to 0 and try again 3941 * dont wait foR them to finish caching
3942 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
3943 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
3944 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
3945 * again
3700 */ 3946 */
3701 if (!ins->objectid && loop < 3 && 3947 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
3702 (empty_size || empty_cluster || allowed_chunk_alloc)) { 3948 (found_uncached_bg || empty_size || empty_cluster ||
3703 if (loop >= 2) { 3949 allowed_chunk_alloc)) {
3950 if (found_uncached_bg) {
3951 found_uncached_bg = false;
3952 if (loop < LOOP_CACHING_WAIT) {
3953 loop++;
3954 goto search;
3955 }
3956 }
3957
3958 if (loop == LOOP_ALLOC_CHUNK) {
3704 empty_size = 0; 3959 empty_size = 0;
3705 empty_cluster = 0; 3960 empty_cluster = 0;
3706 } 3961 }
@@ -3713,7 +3968,7 @@ loop:
3713 space_info->force_alloc = 1; 3968 space_info->force_alloc = 1;
3714 } 3969 }
3715 3970
3716 if (loop < 3) { 3971 if (loop < LOOP_NO_EMPTY_SIZE) {
3717 loop++; 3972 loop++;
3718 goto search; 3973 goto search;
3719 } 3974 }
@@ -3809,7 +4064,7 @@ again:
3809 num_bytes, data, 1); 4064 num_bytes, data, 1);
3810 goto again; 4065 goto again;
3811 } 4066 }
3812 if (ret) { 4067 if (ret == -ENOSPC) {
3813 struct btrfs_space_info *sinfo; 4068 struct btrfs_space_info *sinfo;
3814 4069
3815 sinfo = __find_space_info(root->fs_info, data); 4070 sinfo = __find_space_info(root->fs_info, data);
@@ -3817,7 +4072,6 @@ again:
3817 "wanted %llu\n", (unsigned long long)data, 4072 "wanted %llu\n", (unsigned long long)data,
3818 (unsigned long long)num_bytes); 4073 (unsigned long long)num_bytes);
3819 dump_space_info(sinfo, num_bytes); 4074 dump_space_info(sinfo, num_bytes);
3820 BUG();
3821 } 4075 }
3822 4076
3823 return ret; 4077 return ret;
@@ -3855,7 +4109,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3855 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 4109 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3856 empty_size, hint_byte, search_end, ins, 4110 empty_size, hint_byte, search_end, ins,
3857 data); 4111 data);
3858 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4112 if (!ret)
4113 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4114
3859 return ret; 4115 return ret;
3860} 4116}
3861 4117
@@ -4017,9 +4273,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4017 struct btrfs_block_group_cache *block_group; 4273 struct btrfs_block_group_cache *block_group;
4018 4274
4019 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4275 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4020 mutex_lock(&block_group->cache_mutex); 4276 cache_block_group(block_group);
4021 cache_block_group(root, block_group); 4277 wait_event(block_group->caching_q,
4022 mutex_unlock(&block_group->cache_mutex); 4278 block_group_cache_done(block_group));
4023 4279
4024 ret = btrfs_remove_free_space(block_group, ins->objectid, 4280 ret = btrfs_remove_free_space(block_group, ins->objectid,
4025 ins->offset); 4281 ins->offset);
@@ -4050,7 +4306,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4050 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4306 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4051 empty_size, hint_byte, search_end, 4307 empty_size, hint_byte, search_end,
4052 ins, 0); 4308 ins, 0);
4053 BUG_ON(ret); 4309 if (ret)
4310 return ret;
4054 4311
4055 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4312 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4056 if (parent == 0) 4313 if (parent == 0)
@@ -6966,11 +7223,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6966 &info->block_group_cache_tree); 7223 &info->block_group_cache_tree);
6967 spin_unlock(&info->block_group_cache_lock); 7224 spin_unlock(&info->block_group_cache_lock);
6968 7225
6969 btrfs_remove_free_space_cache(block_group);
6970 down_write(&block_group->space_info->groups_sem); 7226 down_write(&block_group->space_info->groups_sem);
6971 list_del(&block_group->list); 7227 list_del(&block_group->list);
6972 up_write(&block_group->space_info->groups_sem); 7228 up_write(&block_group->space_info->groups_sem);
6973 7229
7230 if (block_group->cached == BTRFS_CACHE_STARTED)
7231 wait_event(block_group->caching_q,
7232 block_group_cache_done(block_group));
7233
7234 btrfs_remove_free_space_cache(block_group);
7235
6974 WARN_ON(atomic_read(&block_group->count) != 1); 7236 WARN_ON(atomic_read(&block_group->count) != 1);
6975 kfree(block_group); 7237 kfree(block_group);
6976 7238
@@ -7036,10 +7298,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7036 atomic_set(&cache->count, 1); 7298 atomic_set(&cache->count, 1);
7037 spin_lock_init(&cache->lock); 7299 spin_lock_init(&cache->lock);
7038 spin_lock_init(&cache->tree_lock); 7300 spin_lock_init(&cache->tree_lock);
7039 mutex_init(&cache->cache_mutex); 7301 cache->fs_info = info;
7302 init_waitqueue_head(&cache->caching_q);
7040 INIT_LIST_HEAD(&cache->list); 7303 INIT_LIST_HEAD(&cache->list);
7041 INIT_LIST_HEAD(&cache->cluster_list); 7304 INIT_LIST_HEAD(&cache->cluster_list);
7042 cache->sectorsize = root->sectorsize;
7043 7305
7044 /* 7306 /*
7045 * we only want to have 32k of ram per block group for keeping 7307 * we only want to have 32k of ram per block group for keeping
@@ -7057,6 +7319,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7057 key.objectid = found_key.objectid + found_key.offset; 7319 key.objectid = found_key.objectid + found_key.offset;
7058 btrfs_release_path(root, path); 7320 btrfs_release_path(root, path);
7059 cache->flags = btrfs_block_group_flags(&cache->item); 7321 cache->flags = btrfs_block_group_flags(&cache->item);
7322 cache->sectorsize = root->sectorsize;
7323
7324 remove_sb_from_cache(root, cache);
7325
7326 /*
7327 * check for two cases, either we are full, and therefore
7328 * don't need to bother with the caching work since we won't
7329 * find any space, or we are empty, and we can just add all
7330 * the space in and be done with it. This saves us _alot_ of
7331 * time, particularly in the full case.
7332 */
7333 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
7334 cache->cached = BTRFS_CACHE_FINISHED;
7335 } else if (btrfs_block_group_used(&cache->item) == 0) {
7336 cache->cached = BTRFS_CACHE_FINISHED;
7337 add_new_free_space(cache, root->fs_info,
7338 found_key.objectid,
7339 found_key.objectid +
7340 found_key.offset);
7341 }
7060 7342
7061 ret = update_space_info(info, cache->flags, found_key.offset, 7343 ret = update_space_info(info, cache->flags, found_key.offset,
7062 btrfs_block_group_used(&cache->item), 7344 btrfs_block_group_used(&cache->item),
@@ -7112,7 +7394,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7112 atomic_set(&cache->count, 1); 7394 atomic_set(&cache->count, 1);
7113 spin_lock_init(&cache->lock); 7395 spin_lock_init(&cache->lock);
7114 spin_lock_init(&cache->tree_lock); 7396 spin_lock_init(&cache->tree_lock);
7115 mutex_init(&cache->cache_mutex); 7397 init_waitqueue_head(&cache->caching_q);
7116 INIT_LIST_HEAD(&cache->list); 7398 INIT_LIST_HEAD(&cache->list);
7117 INIT_LIST_HEAD(&cache->cluster_list); 7399 INIT_LIST_HEAD(&cache->cluster_list);
7118 7400
@@ -7121,11 +7403,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7121 cache->flags = type; 7403 cache->flags = type;
7122 btrfs_set_block_group_flags(&cache->item, type); 7404 btrfs_set_block_group_flags(&cache->item, type);
7123 7405
7124 cache->cached = 1; 7406 cache->cached = BTRFS_CACHE_FINISHED;
7125 ret = btrfs_add_free_space(cache, chunk_offset, size);
7126 BUG_ON(ret);
7127 remove_sb_from_cache(root, cache); 7407 remove_sb_from_cache(root, cache);
7128 7408
7409 add_new_free_space(cache, root->fs_info, chunk_offset,
7410 chunk_offset + size);
7411
7129 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7412 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
7130 &cache->space_info); 7413 &cache->space_info);
7131 BUG_ON(ret); 7414 BUG_ON(ret);
@@ -7184,7 +7467,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7184 rb_erase(&block_group->cache_node, 7467 rb_erase(&block_group->cache_node,
7185 &root->fs_info->block_group_cache_tree); 7468 &root->fs_info->block_group_cache_tree);
7186 spin_unlock(&root->fs_info->block_group_cache_lock); 7469 spin_unlock(&root->fs_info->block_group_cache_lock);
7187 btrfs_remove_free_space_cache(block_group); 7470
7188 down_write(&block_group->space_info->groups_sem); 7471 down_write(&block_group->space_info->groups_sem);
7189 /* 7472 /*
7190 * we must use list_del_init so people can check to see if they 7473 * we must use list_del_init so people can check to see if they
@@ -7193,6 +7476,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7193 list_del_init(&block_group->list); 7476 list_del_init(&block_group->list);
7194 up_write(&block_group->space_info->groups_sem); 7477 up_write(&block_group->space_info->groups_sem);
7195 7478
7479 if (block_group->cached == BTRFS_CACHE_STARTED)
7480 wait_event(block_group->caching_q,
7481 block_group_cache_done(block_group));
7482
7483 btrfs_remove_free_space_cache(block_group);
7484
7196 spin_lock(&block_group->space_info->lock); 7485 spin_lock(&block_group->space_info->lock);
7197 block_group->space_info->total_bytes -= block_group->key.offset; 7486 block_group->space_info->total_bytes -= block_group->key.offset;
7198 block_group->space_info->bytes_readonly -= block_group->key.offset; 7487 block_group->space_info->bytes_readonly -= block_group->key.offset;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ab8cad8b46c9..af99b78b288e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -238,6 +238,7 @@ static void unlink_free_space(struct btrfs_block_group_cache *block_group,
238{ 238{
239 rb_erase(&info->offset_index, &block_group->free_space_offset); 239 rb_erase(&info->offset_index, &block_group->free_space_offset);
240 block_group->free_extents--; 240 block_group->free_extents--;
241 block_group->free_space -= info->bytes;
241} 242}
242 243
243static int link_free_space(struct btrfs_block_group_cache *block_group, 244static int link_free_space(struct btrfs_block_group_cache *block_group,
@@ -251,6 +252,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
251 if (ret) 252 if (ret)
252 return ret; 253 return ret;
253 254
255 block_group->free_space += info->bytes;
254 block_group->free_extents++; 256 block_group->free_extents++;
255 return ret; 257 return ret;
256} 258}
@@ -285,36 +287,40 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
285 } 287 }
286} 288}
287 289
288static void bitmap_clear_bits(struct btrfs_free_space *info, u64 offset, u64 bytes, 290static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
289 u64 sectorsize) 291 struct btrfs_free_space *info, u64 offset,
292 u64 bytes)
290{ 293{
291 unsigned long start, end; 294 unsigned long start, end;
292 unsigned long i; 295 unsigned long i;
293 296
294 start = offset_to_bit(info->offset, sectorsize, offset); 297 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
295 end = start + bytes_to_bits(bytes, sectorsize); 298 end = start + bytes_to_bits(bytes, block_group->sectorsize);
296 BUG_ON(end > BITS_PER_BITMAP); 299 BUG_ON(end > BITS_PER_BITMAP);
297 300
298 for (i = start; i < end; i++) 301 for (i = start; i < end; i++)
299 clear_bit(i, info->bitmap); 302 clear_bit(i, info->bitmap);
300 303
301 info->bytes -= bytes; 304 info->bytes -= bytes;
305 block_group->free_space -= bytes;
302} 306}
303 307
304static void bitmap_set_bits(struct btrfs_free_space *info, u64 offset, u64 bytes, 308static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
305 u64 sectorsize) 309 struct btrfs_free_space *info, u64 offset,
310 u64 bytes)
306{ 311{
307 unsigned long start, end; 312 unsigned long start, end;
308 unsigned long i; 313 unsigned long i;
309 314
310 start = offset_to_bit(info->offset, sectorsize, offset); 315 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
311 end = start + bytes_to_bits(bytes, sectorsize); 316 end = start + bytes_to_bits(bytes, block_group->sectorsize);
312 BUG_ON(end > BITS_PER_BITMAP); 317 BUG_ON(end > BITS_PER_BITMAP);
313 318
314 for (i = start; i < end; i++) 319 for (i = start; i < end; i++)
315 set_bit(i, info->bitmap); 320 set_bit(i, info->bitmap);
316 321
317 info->bytes += bytes; 322 info->bytes += bytes;
323 block_group->free_space += bytes;
318} 324}
319 325
320static int search_bitmap(struct btrfs_block_group_cache *block_group, 326static int search_bitmap(struct btrfs_block_group_cache *block_group,
@@ -414,13 +420,12 @@ again:
414 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 420 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
415 421
416 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 422 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
417 bitmap_clear_bits(bitmap_info, *offset, 423 bitmap_clear_bits(block_group, bitmap_info, *offset,
418 end - *offset + 1, block_group->sectorsize); 424 end - *offset + 1);
419 *bytes -= end - *offset + 1; 425 *bytes -= end - *offset + 1;
420 *offset = end + 1; 426 *offset = end + 1;
421 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 427 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
422 bitmap_clear_bits(bitmap_info, *offset, 428 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
423 *bytes, block_group->sectorsize);
424 *bytes = 0; 429 *bytes = 0;
425 } 430 }
426 431
@@ -495,14 +500,13 @@ again:
495 (u64)(BITS_PER_BITMAP * block_group->sectorsize); 500 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
496 501
497 if (offset >= bitmap_info->offset && offset + bytes > end) { 502 if (offset >= bitmap_info->offset && offset + bytes > end) {
498 bitmap_set_bits(bitmap_info, offset, end - offset, 503 bitmap_set_bits(block_group, bitmap_info, offset,
499 block_group->sectorsize); 504 end - offset);
500 bytes -= end - offset; 505 bytes -= end - offset;
501 offset = end; 506 offset = end;
502 added = 0; 507 added = 0;
503 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 508 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
504 bitmap_set_bits(bitmap_info, offset, bytes, 509 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
505 block_group->sectorsize);
506 bytes = 0; 510 bytes = 0;
507 } else { 511 } else {
508 BUG(); 512 BUG();
@@ -870,8 +874,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
870 874
871 ret = offset; 875 ret = offset;
872 if (entry->bitmap) { 876 if (entry->bitmap) {
873 bitmap_clear_bits(entry, offset, bytes, 877 bitmap_clear_bits(block_group, entry, offset, bytes);
874 block_group->sectorsize);
875 if (!entry->bytes) { 878 if (!entry->bytes) {
876 unlink_free_space(block_group, entry); 879 unlink_free_space(block_group, entry);
877 kfree(entry->bitmap); 880 kfree(entry->bitmap);
@@ -891,6 +894,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
891 894
892out: 895out:
893 spin_unlock(&block_group->tree_lock); 896 spin_unlock(&block_group->tree_lock);
897
894 return ret; 898 return ret;
895} 899}
896 900
@@ -967,7 +971,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
967 goto out; 971 goto out;
968 972
969 ret = search_start; 973 ret = search_start;
970 bitmap_clear_bits(entry, ret, bytes, block_group->sectorsize); 974 bitmap_clear_bits(block_group, entry, ret, bytes);
971out: 975out:
972 spin_unlock(&cluster->lock); 976 spin_unlock(&cluster->lock);
973 spin_unlock(&block_group->tree_lock); 977 spin_unlock(&block_group->tree_lock);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 81f7124c3051..32454d1c566f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
40 } 40 }
41} 41}
42 42
43static noinline void switch_commit_root(struct btrfs_root *root)
44{
45 down_write(&root->commit_root_sem);
46 free_extent_buffer(root->commit_root);
47 root->commit_root = btrfs_root_node(root);
48 up_write(&root->commit_root_sem);
49}
50
43/* 51/*
44 * either allocate a new transaction or hop into the existing one 52 * either allocate a new transaction or hop into the existing one
45 */ 53 */
@@ -458,8 +466,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
458 ret = btrfs_write_dirty_block_groups(trans, root); 466 ret = btrfs_write_dirty_block_groups(trans, root);
459 BUG_ON(ret); 467 BUG_ON(ret);
460 } 468 }
461 free_extent_buffer(root->commit_root); 469 switch_commit_root(root);
462 root->commit_root = btrfs_root_node(root);
463 return 0; 470 return 0;
464} 471}
465 472
@@ -537,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
537 btrfs_update_reloc_root(trans, root); 544 btrfs_update_reloc_root(trans, root);
538 545
539 if (root->commit_root != root->node) { 546 if (root->commit_root != root->node) {
540 free_extent_buffer(root->commit_root); 547 switch_commit_root(root);
541 root->commit_root = btrfs_root_node(root);
542 btrfs_set_root_node(&root->root_item, 548 btrfs_set_root_node(&root->root_item,
543 root->node); 549 root->node);
544 } 550 }
@@ -1002,15 +1008,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1002 1008
1003 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1009 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1004 root->fs_info->tree_root->node); 1010 root->fs_info->tree_root->node);
1005 free_extent_buffer(root->fs_info->tree_root->commit_root); 1011 switch_commit_root(root->fs_info->tree_root);
1006 root->fs_info->tree_root->commit_root =
1007 btrfs_root_node(root->fs_info->tree_root);
1008 1012
1009 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1013 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1010 root->fs_info->chunk_root->node); 1014 root->fs_info->chunk_root->node);
1011 free_extent_buffer(root->fs_info->chunk_root->commit_root); 1015 switch_commit_root(root->fs_info->chunk_root);
1012 root->fs_info->chunk_root->commit_root =
1013 btrfs_root_node(root->fs_info->chunk_root);
1014 1016
1015 update_super_roots(root); 1017 update_super_roots(root);
1016 1018
@@ -1050,6 +1052,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1050 cur_trans->commit_done = 1; 1052 cur_trans->commit_done = 1;
1051 1053
1052 root->fs_info->last_trans_committed = cur_trans->transid; 1054 root->fs_info->last_trans_committed = cur_trans->transid;
1055
1053 wake_up(&cur_trans->commit_wait); 1056 wake_up(&cur_trans->commit_wait);
1054 1057
1055 put_transaction(cur_trans); 1058 put_transaction(cur_trans);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c13922206d1b..195606862618 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -264,7 +264,7 @@ static int process_one_buffer(struct btrfs_root *log,
264{ 264{
265 if (wc->pin) 265 if (wc->pin)
266 btrfs_update_pinned_extents(log->fs_info->extent_root, 266 btrfs_update_pinned_extents(log->fs_info->extent_root,
267 eb->start, eb->len, 1); 267 eb->start, eb->len, 1, 0);
268 268
269 if (btrfs_buffer_uptodate(eb, gen)) { 269 if (btrfs_buffer_uptodate(eb, gen)) {
270 if (wc->write) 270 if (wc->write)