aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c358
1 files changed, 240 insertions, 118 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6e1d36702ff7..4e1b153b7c47 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -34,6 +34,8 @@
34#include "locking.h" 34#include "locking.h"
35#include "free-space-cache.h" 35#include "free-space-cache.h"
36 36
37#undef SCRAMBLE_DELAYED_REFS
38
37/* 39/*
38 * control flags for do_chunk_alloc's force field 40 * control flags for do_chunk_alloc's force field
39 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 41 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
@@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2217 struct btrfs_delayed_ref_node *ref; 2219 struct btrfs_delayed_ref_node *ref;
2218 struct btrfs_delayed_ref_head *locked_ref = NULL; 2220 struct btrfs_delayed_ref_head *locked_ref = NULL;
2219 struct btrfs_delayed_extent_op *extent_op; 2221 struct btrfs_delayed_extent_op *extent_op;
2222 struct btrfs_fs_info *fs_info = root->fs_info;
2220 int ret; 2223 int ret;
2221 int count = 0; 2224 int count = 0;
2222 int must_insert_reserved = 0; 2225 int must_insert_reserved = 0;
@@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2255 ref = select_delayed_ref(locked_ref); 2258 ref = select_delayed_ref(locked_ref);
2256 2259
2257 if (ref && ref->seq && 2260 if (ref && ref->seq &&
2258 btrfs_check_delayed_seq(delayed_refs, ref->seq)) { 2261 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2259 /* 2262 /*
2260 * there are still refs with lower seq numbers in the 2263 * there are still refs with lower seq numbers in the
2261 * process of being added. Don't run this ref yet. 2264 * process of being added. Don't run this ref yet.
@@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2337 } 2340 }
2338 2341
2339next: 2342next:
2340 do_chunk_alloc(trans, root->fs_info->extent_root, 2343 do_chunk_alloc(trans, fs_info->extent_root,
2341 2 * 1024 * 1024, 2344 2 * 1024 * 1024,
2342 btrfs_get_alloc_profile(root, 0), 2345 btrfs_get_alloc_profile(root, 0),
2343 CHUNK_ALLOC_NO_FORCE); 2346 CHUNK_ALLOC_NO_FORCE);
@@ -2347,21 +2350,99 @@ next:
2347 return count; 2350 return count;
2348} 2351}
2349 2352
2350static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, 2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2351 unsigned long num_refs, 2355 unsigned long num_refs,
2352 struct list_head *first_seq) 2356 struct list_head *first_seq)
2353{ 2357{
2354 spin_unlock(&delayed_refs->lock); 2358 spin_unlock(&delayed_refs->lock);
2355 pr_debug("waiting for more refs (num %ld, first %p)\n", 2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2356 num_refs, first_seq); 2360 num_refs, first_seq);
2357 wait_event(delayed_refs->seq_wait, 2361 wait_event(fs_info->tree_mod_seq_wait,
2358 num_refs != delayed_refs->num_entries || 2362 num_refs != delayed_refs->num_entries ||
2359 delayed_refs->seq_head.next != first_seq); 2363 fs_info->tree_mod_seq_list.next != first_seq);
2360 pr_debug("done waiting for more refs (num %ld, first %p)\n", 2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2361 delayed_refs->num_entries, delayed_refs->seq_head.next); 2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2362 spin_lock(&delayed_refs->lock); 2366 spin_lock(&delayed_refs->lock);
2363} 2367}
2364 2368
2369#ifdef SCRAMBLE_DELAYED_REFS
2370/*
2371 * Normally delayed refs get processed in ascending bytenr order. This
2372 * correlates in most cases to the order added. To expose dependencies on this
2373 * order, we start to process the tree in the middle instead of the beginning
2374 */
2375static u64 find_middle(struct rb_root *root)
2376{
2377 struct rb_node *n = root->rb_node;
2378 struct btrfs_delayed_ref_node *entry;
2379 int alt = 1;
2380 u64 middle;
2381 u64 first = 0, last = 0;
2382
2383 n = rb_first(root);
2384 if (n) {
2385 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2386 first = entry->bytenr;
2387 }
2388 n = rb_last(root);
2389 if (n) {
2390 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2391 last = entry->bytenr;
2392 }
2393 n = root->rb_node;
2394
2395 while (n) {
2396 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2397 WARN_ON(!entry->in_tree);
2398
2399 middle = entry->bytenr;
2400
2401 if (alt)
2402 n = n->rb_left;
2403 else
2404 n = n->rb_right;
2405
2406 alt = 1 - alt;
2407 }
2408 return middle;
2409}
2410#endif
2411
2412int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2413 struct btrfs_fs_info *fs_info)
2414{
2415 struct qgroup_update *qgroup_update;
2416 int ret = 0;
2417
2418 if (list_empty(&trans->qgroup_ref_list) !=
2419 !trans->delayed_ref_elem.seq) {
2420 /* list without seq or seq without list */
2421 printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
2422 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2423 trans->delayed_ref_elem.seq);
2424 BUG();
2425 }
2426
2427 if (!trans->delayed_ref_elem.seq)
2428 return 0;
2429
2430 while (!list_empty(&trans->qgroup_ref_list)) {
2431 qgroup_update = list_first_entry(&trans->qgroup_ref_list,
2432 struct qgroup_update, list);
2433 list_del(&qgroup_update->list);
2434 if (!ret)
2435 ret = btrfs_qgroup_account_ref(
2436 trans, fs_info, qgroup_update->node,
2437 qgroup_update->extent_op);
2438 kfree(qgroup_update);
2439 }
2440
2441 btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
2442
2443 return ret;
2444}
2445
2365/* 2446/*
2366 * this starts processing the delayed reference count updates and 2447 * this starts processing the delayed reference count updates and
2367 * extent insertions we have queued up so far. count can be 2448 * extent insertions we have queued up so far. count can be
@@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2398 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0), 2479 2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
2399 CHUNK_ALLOC_NO_FORCE); 2480 CHUNK_ALLOC_NO_FORCE);
2400 2481
2482 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2483
2401 delayed_refs = &trans->transaction->delayed_refs; 2484 delayed_refs = &trans->transaction->delayed_refs;
2402 INIT_LIST_HEAD(&cluster); 2485 INIT_LIST_HEAD(&cluster);
2403again: 2486again:
2404 consider_waiting = 0; 2487 consider_waiting = 0;
2405 spin_lock(&delayed_refs->lock); 2488 spin_lock(&delayed_refs->lock);
2489
2490#ifdef SCRAMBLE_DELAYED_REFS
2491 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2492#endif
2493
2406 if (count == 0) { 2494 if (count == 0) {
2407 count = delayed_refs->num_entries * 2; 2495 count = delayed_refs->num_entries * 2;
2408 run_most = 1; 2496 run_most = 1;
@@ -2437,7 +2525,7 @@ again:
2437 num_refs = delayed_refs->num_entries; 2525 num_refs = delayed_refs->num_entries;
2438 first_seq = root->fs_info->tree_mod_seq_list.next; 2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2439 } else { 2527 } else {
2440 wait_for_more_refs(delayed_refs, 2528 wait_for_more_refs(root->fs_info, delayed_refs,
2441 num_refs, first_seq); 2529 num_refs, first_seq);
2442 /* 2530 /*
2443 * after waiting, things have changed. we 2531 * after waiting, things have changed. we
@@ -2502,6 +2590,7 @@ again:
2502 } 2590 }
2503out: 2591out:
2504 spin_unlock(&delayed_refs->lock); 2592 spin_unlock(&delayed_refs->lock);
2593 assert_qgroups_uptodate(trans);
2505 return 0; 2594 return 0;
2506} 2595}
2507 2596
@@ -2581,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2581 2670
2582 node = rb_prev(node); 2671 node = rb_prev(node);
2583 if (node) { 2672 if (node) {
2673 int seq = ref->seq;
2674
2584 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2675 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2585 if (ref->bytenr == bytenr) 2676 if (ref->bytenr == bytenr && ref->seq == seq)
2586 goto out_unlock; 2677 goto out_unlock;
2587 } 2678 }
2588 2679
@@ -2903,8 +2994,13 @@ again:
2903 } 2994 }
2904 2995
2905 spin_lock(&block_group->lock); 2996 spin_lock(&block_group->lock);
2906 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2997 if (block_group->cached != BTRFS_CACHE_FINISHED ||
2907 /* We're not cached, don't bother trying to write stuff out */ 2998 !btrfs_test_opt(root, SPACE_CACHE)) {
2999 /*
3000 * don't bother trying to write stuff out _if_
3001 * a) we're not cached,
3002 * b) we're with nospace_cache mount option.
3003 */
2908 dcs = BTRFS_DC_WRITTEN; 3004 dcs = BTRFS_DC_WRITTEN;
2909 spin_unlock(&block_group->lock); 3005 spin_unlock(&block_group->lock);
2910 goto out_put; 3006 goto out_put;
@@ -3134,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3134 init_waitqueue_head(&found->wait); 3230 init_waitqueue_head(&found->wait);
3135 *space_info = found; 3231 *space_info = found;
3136 list_add_rcu(&found->list, &info->space_info); 3232 list_add_rcu(&found->list, &info->space_info);
3233 if (flags & BTRFS_BLOCK_GROUP_DATA)
3234 info->data_sinfo = found;
3137 return 0; 3235 return 0;
3138} 3236}
3139 3237
@@ -3263,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3263 return get_alloc_profile(root, flags); 3361 return get_alloc_profile(root, flags);
3264} 3362}
3265 3363
3266void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
3267{
3268 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
3269 BTRFS_BLOCK_GROUP_DATA);
3270}
3271
3272/* 3364/*
3273 * This will check the space that the inode allocates from to make sure we have 3365 * This will check the space that the inode allocates from to make sure we have
3274 * enough space for bytes. 3366 * enough space for bytes.
@@ -3277,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3277{ 3369{
3278 struct btrfs_space_info *data_sinfo; 3370 struct btrfs_space_info *data_sinfo;
3279 struct btrfs_root *root = BTRFS_I(inode)->root; 3371 struct btrfs_root *root = BTRFS_I(inode)->root;
3372 struct btrfs_fs_info *fs_info = root->fs_info;
3280 u64 used; 3373 u64 used;
3281 int ret = 0, committed = 0, alloc_chunk = 1; 3374 int ret = 0, committed = 0, alloc_chunk = 1;
3282 3375
@@ -3289,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3289 committed = 1; 3382 committed = 1;
3290 } 3383 }
3291 3384
3292 data_sinfo = BTRFS_I(inode)->space_info; 3385 data_sinfo = fs_info->data_sinfo;
3293 if (!data_sinfo) 3386 if (!data_sinfo)
3294 goto alloc; 3387 goto alloc;
3295 3388
@@ -3330,10 +3423,9 @@ alloc:
3330 goto commit_trans; 3423 goto commit_trans;
3331 } 3424 }
3332 3425
3333 if (!data_sinfo) { 3426 if (!data_sinfo)
3334 btrfs_set_inode_space_info(root, inode); 3427 data_sinfo = fs_info->data_sinfo;
3335 data_sinfo = BTRFS_I(inode)->space_info; 3428
3336 }
3337 goto again; 3429 goto again;
3338 } 3430 }
3339 3431
@@ -3380,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3380 /* make sure bytes are sectorsize aligned */ 3472 /* make sure bytes are sectorsize aligned */
3381 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3473 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3382 3474
3383 data_sinfo = BTRFS_I(inode)->space_info; 3475 data_sinfo = root->fs_info->data_sinfo;
3384 spin_lock(&data_sinfo->lock); 3476 spin_lock(&data_sinfo->lock);
3385 data_sinfo->bytes_may_use -= bytes; 3477 data_sinfo->bytes_may_use -= bytes;
3386 trace_btrfs_space_reservation(root->fs_info, "space_info", 3478 trace_btrfs_space_reservation(root->fs_info, "space_info",
@@ -3586,89 +3678,58 @@ out:
3586/* 3678/*
3587 * shrink metadata reservation for delalloc 3679 * shrink metadata reservation for delalloc
3588 */ 3680 */
3589static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, 3681static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3590 bool wait_ordered) 3682 bool wait_ordered)
3591{ 3683{
3592 struct btrfs_block_rsv *block_rsv; 3684 struct btrfs_block_rsv *block_rsv;
3593 struct btrfs_space_info *space_info; 3685 struct btrfs_space_info *space_info;
3594 struct btrfs_trans_handle *trans; 3686 struct btrfs_trans_handle *trans;
3595 u64 reserved; 3687 u64 delalloc_bytes;
3596 u64 max_reclaim; 3688 u64 max_reclaim;
3597 u64 reclaimed = 0;
3598 long time_left; 3689 long time_left;
3599 unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3690 unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3600 int loops = 0; 3691 int loops = 0;
3601 unsigned long progress;
3602 3692
3603 trans = (struct btrfs_trans_handle *)current->journal_info; 3693 trans = (struct btrfs_trans_handle *)current->journal_info;
3604 block_rsv = &root->fs_info->delalloc_block_rsv; 3694 block_rsv = &root->fs_info->delalloc_block_rsv;
3605 space_info = block_rsv->space_info; 3695 space_info = block_rsv->space_info;
3606 3696
3607 smp_mb(); 3697 smp_mb();
3608 reserved = space_info->bytes_may_use; 3698 delalloc_bytes = root->fs_info->delalloc_bytes;
3609 progress = space_info->reservation_progress; 3699 if (delalloc_bytes == 0) {
3610
3611 if (reserved == 0)
3612 return 0;
3613
3614 smp_mb();
3615 if (root->fs_info->delalloc_bytes == 0) {
3616 if (trans) 3700 if (trans)
3617 return 0; 3701 return;
3618 btrfs_wait_ordered_extents(root, 0, 0); 3702 btrfs_wait_ordered_extents(root, 0, 0);
3619 return 0; 3703 return;
3620 } 3704 }
3621 3705
3622 max_reclaim = min(reserved, to_reclaim); 3706 while (delalloc_bytes && loops < 3) {
3623 nr_pages = max_t(unsigned long, nr_pages, 3707 max_reclaim = min(delalloc_bytes, to_reclaim);
3624 max_reclaim >> PAGE_CACHE_SHIFT); 3708 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
3625 while (loops < 1024) {
3626 /* have the flusher threads jump in and do some IO */
3627 smp_mb();
3628 nr_pages = min_t(unsigned long, nr_pages,
3629 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3630 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, 3709 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
3631 WB_REASON_FS_FREE_SPACE); 3710 WB_REASON_FS_FREE_SPACE);
3632 3711
3633 spin_lock(&space_info->lock); 3712 spin_lock(&space_info->lock);
3634 if (reserved > space_info->bytes_may_use) 3713 if (space_info->bytes_used + space_info->bytes_reserved +
3635 reclaimed += reserved - space_info->bytes_may_use; 3714 space_info->bytes_pinned + space_info->bytes_readonly +
3636 reserved = space_info->bytes_may_use; 3715 space_info->bytes_may_use + orig <=
3716 space_info->total_bytes) {
3717 spin_unlock(&space_info->lock);
3718 break;
3719 }
3637 spin_unlock(&space_info->lock); 3720 spin_unlock(&space_info->lock);
3638 3721
3639 loops++; 3722 loops++;
3640
3641 if (reserved == 0 || reclaimed >= max_reclaim)
3642 break;
3643
3644 if (trans && trans->transaction->blocked)
3645 return -EAGAIN;
3646
3647 if (wait_ordered && !trans) { 3723 if (wait_ordered && !trans) {
3648 btrfs_wait_ordered_extents(root, 0, 0); 3724 btrfs_wait_ordered_extents(root, 0, 0);
3649 } else { 3725 } else {
3650 time_left = schedule_timeout_interruptible(1); 3726 time_left = schedule_timeout_killable(1);
3651
3652 /* We were interrupted, exit */
3653 if (time_left) 3727 if (time_left)
3654 break; 3728 break;
3655 } 3729 }
3656 3730 smp_mb();
3657 /* we've kicked the IO a few times, if anything has been freed, 3731 delalloc_bytes = root->fs_info->delalloc_bytes;
3658 * exit. There is no sense in looping here for a long time
3659 * when we really need to commit the transaction, or there are
3660 * just too many writers without enough free space
3661 */
3662
3663 if (loops > 3) {
3664 smp_mb();
3665 if (progress != space_info->reservation_progress)
3666 break;
3667 }
3668
3669 } 3732 }
3670
3671 return reclaimed >= to_reclaim;
3672} 3733}
3673 3734
3674/** 3735/**
@@ -3728,6 +3789,58 @@ commit:
3728 return btrfs_commit_transaction(trans, root); 3789 return btrfs_commit_transaction(trans, root);
3729} 3790}
3730 3791
3792enum flush_state {
3793 FLUSH_DELALLOC = 1,
3794 FLUSH_DELALLOC_WAIT = 2,
3795 FLUSH_DELAYED_ITEMS_NR = 3,
3796 FLUSH_DELAYED_ITEMS = 4,
3797 COMMIT_TRANS = 5,
3798};
3799
3800static int flush_space(struct btrfs_root *root,
3801 struct btrfs_space_info *space_info, u64 num_bytes,
3802 u64 orig_bytes, int state)
3803{
3804 struct btrfs_trans_handle *trans;
3805 int nr;
3806 int ret = 0;
3807
3808 switch (state) {
3809 case FLUSH_DELALLOC:
3810 case FLUSH_DELALLOC_WAIT:
3811 shrink_delalloc(root, num_bytes, orig_bytes,
3812 state == FLUSH_DELALLOC_WAIT);
3813 break;
3814 case FLUSH_DELAYED_ITEMS_NR:
3815 case FLUSH_DELAYED_ITEMS:
3816 if (state == FLUSH_DELAYED_ITEMS_NR) {
3817 u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
3818
3819 nr = (int)div64_u64(num_bytes, bytes);
3820 if (!nr)
3821 nr = 1;
3822 nr *= 2;
3823 } else {
3824 nr = -1;
3825 }
3826 trans = btrfs_join_transaction(root);
3827 if (IS_ERR(trans)) {
3828 ret = PTR_ERR(trans);
3829 break;
3830 }
3831 ret = btrfs_run_delayed_items_nr(trans, root, nr);
3832 btrfs_end_transaction(trans, root);
3833 break;
3834 case COMMIT_TRANS:
3835 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
3836 break;
3837 default:
3838 ret = -ENOSPC;
3839 break;
3840 }
3841
3842 return ret;
3843}
3731/** 3844/**
3732 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space 3845 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
3733 * @root - the root we're allocating for 3846 * @root - the root we're allocating for
@@ -3749,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
3749 struct btrfs_space_info *space_info = block_rsv->space_info; 3862 struct btrfs_space_info *space_info = block_rsv->space_info;
3750 u64 used; 3863 u64 used;
3751 u64 num_bytes = orig_bytes; 3864 u64 num_bytes = orig_bytes;
3752 int retries = 0; 3865 int flush_state = FLUSH_DELALLOC;
3753 int ret = 0; 3866 int ret = 0;
3754 bool committed = false;
3755 bool flushing = false; 3867 bool flushing = false;
3756 bool wait_ordered = false; 3868 bool committed = false;
3757 3869
3758again: 3870again:
3759 ret = 0; 3871 ret = 0;
@@ -3812,9 +3924,8 @@ again:
3812 * amount plus the amount of bytes that we need for this 3924 * amount plus the amount of bytes that we need for this
3813 * reservation. 3925 * reservation.
3814 */ 3926 */
3815 wait_ordered = true;
3816 num_bytes = used - space_info->total_bytes + 3927 num_bytes = used - space_info->total_bytes +
3817 (orig_bytes * (retries + 1)); 3928 (orig_bytes * 2);
3818 } 3929 }
3819 3930
3820 if (ret) { 3931 if (ret) {
@@ -3867,8 +3978,6 @@ again:
3867 trace_btrfs_space_reservation(root->fs_info, 3978 trace_btrfs_space_reservation(root->fs_info,
3868 "space_info", space_info->flags, orig_bytes, 1); 3979 "space_info", space_info->flags, orig_bytes, 1);
3869 ret = 0; 3980 ret = 0;
3870 } else {
3871 wait_ordered = true;
3872 } 3981 }
3873 } 3982 }
3874 3983
@@ -3887,36 +3996,13 @@ again:
3887 if (!ret || !flush) 3996 if (!ret || !flush)
3888 goto out; 3997 goto out;
3889 3998
3890 /* 3999 ret = flush_space(root, space_info, num_bytes, orig_bytes,
3891 * We do synchronous shrinking since we don't actually unreserve 4000 flush_state);
3892 * metadata until after the IO is completed. 4001 flush_state++;
3893 */ 4002 if (!ret)
3894 ret = shrink_delalloc(root, num_bytes, wait_ordered);
3895 if (ret < 0)
3896 goto out;
3897
3898 ret = 0;
3899
3900 /*
3901 * So if we were overcommitted it's possible that somebody else flushed
3902 * out enough space and we simply didn't have enough space to reclaim,
3903 * so go back around and try again.
3904 */
3905 if (retries < 2) {
3906 wait_ordered = true;
3907 retries++;
3908 goto again; 4003 goto again;
3909 } 4004 else if (flush_state <= COMMIT_TRANS)
3910
3911 ret = -ENOSPC;
3912 if (committed)
3913 goto out;
3914
3915 ret = may_commit_transaction(root, space_info, orig_bytes, 0);
3916 if (!ret) {
3917 committed = true;
3918 goto again; 4005 goto again;
3919 }
3920 4006
3921out: 4007out:
3922 if (flushing) { 4008 if (flushing) {
@@ -3934,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv(
3934{ 4020{
3935 struct btrfs_block_rsv *block_rsv = NULL; 4021 struct btrfs_block_rsv *block_rsv = NULL;
3936 4022
3937 if (root->ref_cows || root == root->fs_info->csum_root) 4023 if (root->ref_cows)
4024 block_rsv = trans->block_rsv;
4025
4026 if (root == root->fs_info->csum_root && trans->adding_csums)
3938 block_rsv = trans->block_rsv; 4027 block_rsv = trans->block_rsv;
3939 4028
3940 if (!block_rsv) 4029 if (!block_rsv)
@@ -4286,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
4286void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 4375void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4287 struct btrfs_root *root) 4376 struct btrfs_root *root)
4288{ 4377{
4378 if (!trans->block_rsv)
4379 return;
4380
4289 if (!trans->bytes_reserved) 4381 if (!trans->bytes_reserved)
4290 return; 4382 return;
4291 4383
@@ -4444,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4444 int ret; 4536 int ret;
4445 4537
4446 /* Need to be holding the i_mutex here if we aren't free space cache */ 4538 /* Need to be holding the i_mutex here if we aren't free space cache */
4447 if (btrfs_is_free_space_inode(root, inode)) 4539 if (btrfs_is_free_space_inode(inode))
4448 flush = 0; 4540 flush = 0;
4449 4541
4450 if (flush && btrfs_transaction_in_commit(root->fs_info)) 4542 if (flush && btrfs_transaction_in_commit(root->fs_info))
@@ -4476,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4476 csum_bytes = BTRFS_I(inode)->csum_bytes; 4568 csum_bytes = BTRFS_I(inode)->csum_bytes;
4477 spin_unlock(&BTRFS_I(inode)->lock); 4569 spin_unlock(&BTRFS_I(inode)->lock);
4478 4570
4571 if (root->fs_info->quota_enabled) {
4572 ret = btrfs_qgroup_reserve(root, num_bytes +
4573 nr_extents * root->leafsize);
4574 if (ret)
4575 return ret;
4576 }
4577
4479 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4578 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
4480 if (ret) { 4579 if (ret) {
4481 u64 to_free = 0; 4580 u64 to_free = 0;
@@ -4554,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4554 4653
4555 trace_btrfs_space_reservation(root->fs_info, "delalloc", 4654 trace_btrfs_space_reservation(root->fs_info, "delalloc",
4556 btrfs_ino(inode), to_free, 0); 4655 btrfs_ino(inode), to_free, 0);
4656 if (root->fs_info->quota_enabled) {
4657 btrfs_qgroup_free(root, num_bytes +
4658 dropped * root->leafsize);
4659 }
4660
4557 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4661 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4558 to_free); 4662 to_free);
4559} 4663}
@@ -5190,8 +5294,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5190 rb_erase(&head->node.rb_node, &delayed_refs->root); 5294 rb_erase(&head->node.rb_node, &delayed_refs->root);
5191 5295
5192 delayed_refs->num_entries--; 5296 delayed_refs->num_entries--;
5193 if (waitqueue_active(&delayed_refs->seq_wait)) 5297 smp_mb();
5194 wake_up(&delayed_refs->seq_wait); 5298 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5299 wake_up(&root->fs_info->tree_mod_seq_wait);
5195 5300
5196 /* 5301 /*
5197 * we don't take a ref on the node because we're removing it from the 5302 * we don't take a ref on the node because we're removing it from the
@@ -5748,7 +5853,11 @@ loop:
5748 ret = do_chunk_alloc(trans, root, num_bytes + 5853 ret = do_chunk_alloc(trans, root, num_bytes +
5749 2 * 1024 * 1024, data, 5854 2 * 1024 * 1024, data,
5750 CHUNK_ALLOC_LIMITED); 5855 CHUNK_ALLOC_LIMITED);
5751 if (ret < 0) { 5856 /*
5857 * Do not bail out on ENOSPC since we
5858 * can do more things.
5859 */
5860 if (ret < 0 && ret != -ENOSPC) {
5752 btrfs_abort_transaction(trans, 5861 btrfs_abort_transaction(trans,
5753 root, ret); 5862 root, ret);
5754 goto out; 5863 goto out;
@@ -5816,13 +5925,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
5816again: 5925again:
5817 list_for_each_entry(cache, &info->block_groups[index], list) { 5926 list_for_each_entry(cache, &info->block_groups[index], list) {
5818 spin_lock(&cache->lock); 5927 spin_lock(&cache->lock);
5819 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 5928 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
5820 "%llu pinned %llu reserved\n",
5821 (unsigned long long)cache->key.objectid, 5929 (unsigned long long)cache->key.objectid,
5822 (unsigned long long)cache->key.offset, 5930 (unsigned long long)cache->key.offset,
5823 (unsigned long long)btrfs_block_group_used(&cache->item), 5931 (unsigned long long)btrfs_block_group_used(&cache->item),
5824 (unsigned long long)cache->pinned, 5932 (unsigned long long)cache->pinned,
5825 (unsigned long long)cache->reserved); 5933 (unsigned long long)cache->reserved,
5934 cache->ro ? "[readonly]" : "");
5826 btrfs_dump_free_space(cache, bytes); 5935 btrfs_dump_free_space(cache, bytes);
5827 spin_unlock(&cache->lock); 5936 spin_unlock(&cache->lock);
5828 } 5937 }
@@ -7610,8 +7719,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7610 INIT_LIST_HEAD(&cache->list); 7719 INIT_LIST_HEAD(&cache->list);
7611 INIT_LIST_HEAD(&cache->cluster_list); 7720 INIT_LIST_HEAD(&cache->cluster_list);
7612 7721
7613 if (need_clear) 7722 if (need_clear) {
7723 /*
7724 * When we mount with old space cache, we need to
7725 * set BTRFS_DC_CLEAR and set dirty flag.
7726 *
7727 * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
7728 * truncate the old free space cache inode and
7729 * setup a new one.
7730 * b) Setting 'dirty flag' makes sure that we flush
7731 * the new space cache info onto disk.
7732 */
7614 cache->disk_cache_state = BTRFS_DC_CLEAR; 7733 cache->disk_cache_state = BTRFS_DC_CLEAR;
7734 if (btrfs_test_opt(root, SPACE_CACHE))
7735 cache->dirty = 1;
7736 }
7615 7737
7616 read_extent_buffer(leaf, &cache->item, 7738 read_extent_buffer(leaf, &cache->item,
7617 btrfs_item_ptr_offset(leaf, path->slots[0]), 7739 btrfs_item_ptr_offset(leaf, path->slots[0]),