aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-02-20 14:05:45 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-20 14:05:45 -0500
commitb2c6b3e0611c58fbeb6b9c0892b6249f7bdfaf6b (patch)
treede7cf0825605aa6acf33a8d107003efd7aedbe72 /fs/btrfs/extent-tree.c
parent19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff)
parent272d26d0ad8c0e326689f2fa3cdc6a5fcc8e74e0 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into for-linus-3.9
Signed-off-by: Chris Mason <chris.mason@fusionio.com> Conflicts: fs/btrfs/disk-io.c
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c284
1 files changed, 186 insertions, 98 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5a3327b8f90d..5cd44e239595 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -72,8 +72,7 @@ enum {
72 RESERVE_ALLOC_NO_ACCOUNT = 2, 72 RESERVE_ALLOC_NO_ACCOUNT = 2,
73}; 73};
74 74
75static int update_block_group(struct btrfs_trans_handle *trans, 75static int update_block_group(struct btrfs_root *root,
76 struct btrfs_root *root,
77 u64 bytenr, u64 num_bytes, int alloc); 76 u64 bytenr, u64 num_bytes, int alloc);
78static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 77static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, 78 struct btrfs_root *root,
@@ -103,6 +102,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
103 int dump_block_groups); 102 int dump_block_groups);
104static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 103static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
105 u64 num_bytes, int reserve); 104 u64 num_bytes, int reserve);
105static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
106 u64 num_bytes);
106 107
107static noinline int 108static noinline int
108block_group_cache_done(struct btrfs_block_group_cache *cache) 109block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -162,6 +163,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
162 rb_link_node(&block_group->cache_node, parent, p); 163 rb_link_node(&block_group->cache_node, parent, p);
163 rb_insert_color(&block_group->cache_node, 164 rb_insert_color(&block_group->cache_node,
164 &info->block_group_cache_tree); 165 &info->block_group_cache_tree);
166
167 if (info->first_logical_byte > block_group->key.objectid)
168 info->first_logical_byte = block_group->key.objectid;
169
165 spin_unlock(&info->block_group_cache_lock); 170 spin_unlock(&info->block_group_cache_lock);
166 171
167 return 0; 172 return 0;
@@ -203,8 +208,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
203 break; 208 break;
204 } 209 }
205 } 210 }
206 if (ret) 211 if (ret) {
207 btrfs_get_block_group(ret); 212 btrfs_get_block_group(ret);
213 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
214 info->first_logical_byte = ret->key.objectid;
215 }
208 spin_unlock(&info->block_group_cache_lock); 216 spin_unlock(&info->block_group_cache_lock);
209 217
210 return ret; 218 return ret;
@@ -468,8 +476,6 @@ out:
468} 476}
469 477
470static int cache_block_group(struct btrfs_block_group_cache *cache, 478static int cache_block_group(struct btrfs_block_group_cache *cache,
471 struct btrfs_trans_handle *trans,
472 struct btrfs_root *root,
473 int load_cache_only) 479 int load_cache_only)
474{ 480{
475 DEFINE_WAIT(wait); 481 DEFINE_WAIT(wait);
@@ -527,12 +533,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
527 cache->cached = BTRFS_CACHE_FAST; 533 cache->cached = BTRFS_CACHE_FAST;
528 spin_unlock(&cache->lock); 534 spin_unlock(&cache->lock);
529 535
530 /*
531 * We can't do the read from on-disk cache during a commit since we need
532 * to have the normal tree locking. Also if we are currently trying to
533 * allocate blocks for the tree root we can't do the fast caching since
534 * we likely hold important locks.
535 */
536 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { 536 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
537 ret = load_free_space_cache(fs_info, cache); 537 ret = load_free_space_cache(fs_info, cache);
538 538
@@ -2143,7 +2143,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2143 node->num_bytes); 2143 node->num_bytes);
2144 } 2144 }
2145 } 2145 }
2146 mutex_unlock(&head->mutex);
2147 return ret; 2146 return ret;
2148 } 2147 }
2149 2148
@@ -2258,7 +2257,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2258 * process of being added. Don't run this ref yet. 2257 * process of being added. Don't run this ref yet.
2259 */ 2258 */
2260 list_del_init(&locked_ref->cluster); 2259 list_del_init(&locked_ref->cluster);
2261 mutex_unlock(&locked_ref->mutex); 2260 btrfs_delayed_ref_unlock(locked_ref);
2262 locked_ref = NULL; 2261 locked_ref = NULL;
2263 delayed_refs->num_heads_ready++; 2262 delayed_refs->num_heads_ready++;
2264 spin_unlock(&delayed_refs->lock); 2263 spin_unlock(&delayed_refs->lock);
@@ -2285,7 +2284,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2285 ref = &locked_ref->node; 2284 ref = &locked_ref->node;
2286 2285
2287 if (extent_op && must_insert_reserved) { 2286 if (extent_op && must_insert_reserved) {
2288 kfree(extent_op); 2287 btrfs_free_delayed_extent_op(extent_op);
2289 extent_op = NULL; 2288 extent_op = NULL;
2290 } 2289 }
2291 2290
@@ -2294,28 +2293,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2294 2293
2295 ret = run_delayed_extent_op(trans, root, 2294 ret = run_delayed_extent_op(trans, root,
2296 ref, extent_op); 2295 ref, extent_op);
2297 kfree(extent_op); 2296 btrfs_free_delayed_extent_op(extent_op);
2298 2297
2299 if (ret) { 2298 if (ret) {
2300 list_del_init(&locked_ref->cluster); 2299 printk(KERN_DEBUG
2301 mutex_unlock(&locked_ref->mutex); 2300 "btrfs: run_delayed_extent_op "
2302 2301 "returned %d\n", ret);
2303 printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
2304 spin_lock(&delayed_refs->lock); 2302 spin_lock(&delayed_refs->lock);
2303 btrfs_delayed_ref_unlock(locked_ref);
2305 return ret; 2304 return ret;
2306 } 2305 }
2307 2306
2308 goto next; 2307 goto next;
2309 } 2308 }
2310
2311 list_del_init(&locked_ref->cluster);
2312 locked_ref = NULL;
2313 } 2309 }
2314 2310
2315 ref->in_tree = 0; 2311 ref->in_tree = 0;
2316 rb_erase(&ref->rb_node, &delayed_refs->root); 2312 rb_erase(&ref->rb_node, &delayed_refs->root);
2317 delayed_refs->num_entries--; 2313 delayed_refs->num_entries--;
2318 if (locked_ref) { 2314 if (!btrfs_delayed_ref_is_head(ref)) {
2319 /* 2315 /*
2320 * when we play the delayed ref, also correct the 2316 * when we play the delayed ref, also correct the
2321 * ref_mod on head 2317 * ref_mod on head
@@ -2337,20 +2333,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2337 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2333 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2338 must_insert_reserved); 2334 must_insert_reserved);
2339 2335
2340 btrfs_put_delayed_ref(ref); 2336 btrfs_free_delayed_extent_op(extent_op);
2341 kfree(extent_op);
2342 count++;
2343
2344 if (ret) { 2337 if (ret) {
2345 if (locked_ref) { 2338 btrfs_delayed_ref_unlock(locked_ref);
2346 list_del_init(&locked_ref->cluster); 2339 btrfs_put_delayed_ref(ref);
2347 mutex_unlock(&locked_ref->mutex); 2340 printk(KERN_DEBUG
2348 } 2341 "btrfs: run_one_delayed_ref returned %d\n", ret);
2349 printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
2350 spin_lock(&delayed_refs->lock); 2342 spin_lock(&delayed_refs->lock);
2351 return ret; 2343 return ret;
2352 } 2344 }
2353 2345
2346 /*
2347 * If this node is a head, that means all the refs in this head
2348 * have been dealt with, and we will pick the next head to deal
2349 * with, so we must unlock the head and drop it from the cluster
2350 * list before we release it.
2351 */
2352 if (btrfs_delayed_ref_is_head(ref)) {
2353 list_del_init(&locked_ref->cluster);
2354 btrfs_delayed_ref_unlock(locked_ref);
2355 locked_ref = NULL;
2356 }
2357 btrfs_put_delayed_ref(ref);
2358 count++;
2354next: 2359next:
2355 cond_resched(); 2360 cond_resched();
2356 spin_lock(&delayed_refs->lock); 2361 spin_lock(&delayed_refs->lock);
@@ -2500,6 +2505,7 @@ again:
2500 2505
2501 ret = run_clustered_refs(trans, root, &cluster); 2506 ret = run_clustered_refs(trans, root, &cluster);
2502 if (ret < 0) { 2507 if (ret < 0) {
2508 btrfs_release_ref_cluster(&cluster);
2503 spin_unlock(&delayed_refs->lock); 2509 spin_unlock(&delayed_refs->lock);
2504 btrfs_abort_transaction(trans, root, ret); 2510 btrfs_abort_transaction(trans, root, ret);
2505 return ret; 2511 return ret;
@@ -2586,7 +2592,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2586 struct btrfs_delayed_extent_op *extent_op; 2592 struct btrfs_delayed_extent_op *extent_op;
2587 int ret; 2593 int ret;
2588 2594
2589 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2595 extent_op = btrfs_alloc_delayed_extent_op();
2590 if (!extent_op) 2596 if (!extent_op)
2591 return -ENOMEM; 2597 return -ENOMEM;
2592 2598
@@ -2598,7 +2604,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2598 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, 2604 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2599 num_bytes, extent_op); 2605 num_bytes, extent_op);
2600 if (ret) 2606 if (ret)
2601 kfree(extent_op); 2607 btrfs_free_delayed_extent_op(extent_op);
2602 return ret; 2608 return ret;
2603} 2609}
2604 2610
@@ -3223,12 +3229,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3223 u64 extra_flags = chunk_to_extended(flags) & 3229 u64 extra_flags = chunk_to_extended(flags) &
3224 BTRFS_EXTENDED_PROFILE_MASK; 3230 BTRFS_EXTENDED_PROFILE_MASK;
3225 3231
3232 write_seqlock(&fs_info->profiles_lock);
3226 if (flags & BTRFS_BLOCK_GROUP_DATA) 3233 if (flags & BTRFS_BLOCK_GROUP_DATA)
3227 fs_info->avail_data_alloc_bits |= extra_flags; 3234 fs_info->avail_data_alloc_bits |= extra_flags;
3228 if (flags & BTRFS_BLOCK_GROUP_METADATA) 3235 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3229 fs_info->avail_metadata_alloc_bits |= extra_flags; 3236 fs_info->avail_metadata_alloc_bits |= extra_flags;
3230 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 3237 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3231 fs_info->avail_system_alloc_bits |= extra_flags; 3238 fs_info->avail_system_alloc_bits |= extra_flags;
3239 write_sequnlock(&fs_info->profiles_lock);
3232} 3240}
3233 3241
3234/* 3242/*
@@ -3320,12 +3328,18 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3320 3328
3321static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3329static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
3322{ 3330{
3323 if (flags & BTRFS_BLOCK_GROUP_DATA) 3331 unsigned seq;
3324 flags |= root->fs_info->avail_data_alloc_bits; 3332
3325 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 3333 do {
3326 flags |= root->fs_info->avail_system_alloc_bits; 3334 seq = read_seqbegin(&root->fs_info->profiles_lock);
3327 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 3335
3328 flags |= root->fs_info->avail_metadata_alloc_bits; 3336 if (flags & BTRFS_BLOCK_GROUP_DATA)
3337 flags |= root->fs_info->avail_data_alloc_bits;
3338 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3339 flags |= root->fs_info->avail_system_alloc_bits;
3340 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3341 flags |= root->fs_info->avail_metadata_alloc_bits;
3342 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
3329 3343
3330 return btrfs_reduce_alloc_profile(root, flags); 3344 return btrfs_reduce_alloc_profile(root, flags);
3331} 3345}
@@ -3564,6 +3578,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3564 int wait_for_alloc = 0; 3578 int wait_for_alloc = 0;
3565 int ret = 0; 3579 int ret = 0;
3566 3580
3581 /* Don't re-enter if we're already allocating a chunk */
3582 if (trans->allocating_chunk)
3583 return -ENOSPC;
3584
3567 space_info = __find_space_info(extent_root->fs_info, flags); 3585 space_info = __find_space_info(extent_root->fs_info, flags);
3568 if (!space_info) { 3586 if (!space_info) {
3569 ret = update_space_info(extent_root->fs_info, flags, 3587 ret = update_space_info(extent_root->fs_info, flags,
@@ -3606,6 +3624,8 @@ again:
3606 goto again; 3624 goto again;
3607 } 3625 }
3608 3626
3627 trans->allocating_chunk = true;
3628
3609 /* 3629 /*
3610 * If we have mixed data/metadata chunks we want to make sure we keep 3630 * If we have mixed data/metadata chunks we want to make sure we keep
3611 * allocating mixed chunks instead of individual chunks. 3631 * allocating mixed chunks instead of individual chunks.
@@ -3632,6 +3652,7 @@ again:
3632 check_system_chunk(trans, extent_root, flags); 3652 check_system_chunk(trans, extent_root, flags);
3633 3653
3634 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3654 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3655 trans->allocating_chunk = false;
3635 if (ret < 0 && ret != -ENOSPC) 3656 if (ret < 0 && ret != -ENOSPC)
3636 goto out; 3657 goto out;
3637 3658
@@ -3653,13 +3674,31 @@ static int can_overcommit(struct btrfs_root *root,
3653 struct btrfs_space_info *space_info, u64 bytes, 3674 struct btrfs_space_info *space_info, u64 bytes,
3654 enum btrfs_reserve_flush_enum flush) 3675 enum btrfs_reserve_flush_enum flush)
3655{ 3676{
3677 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3656 u64 profile = btrfs_get_alloc_profile(root, 0); 3678 u64 profile = btrfs_get_alloc_profile(root, 0);
3679 u64 rsv_size = 0;
3657 u64 avail; 3680 u64 avail;
3658 u64 used; 3681 u64 used;
3682 u64 to_add;
3659 3683
3660 used = space_info->bytes_used + space_info->bytes_reserved + 3684 used = space_info->bytes_used + space_info->bytes_reserved +
3661 space_info->bytes_pinned + space_info->bytes_readonly + 3685 space_info->bytes_pinned + space_info->bytes_readonly;
3662 space_info->bytes_may_use; 3686
3687 spin_lock(&global_rsv->lock);
3688 rsv_size = global_rsv->size;
3689 spin_unlock(&global_rsv->lock);
3690
3691 /*
3692 * We only want to allow over committing if we have lots of actual space
3693 * free, but if we don't have enough space to handle the global reserve
3694 * space then we could end up having a real enospc problem when trying
3695 * to allocate a chunk or some other such important allocation.
3696 */
3697 rsv_size <<= 1;
3698 if (used + rsv_size >= space_info->total_bytes)
3699 return 0;
3700
3701 used += space_info->bytes_may_use;
3663 3702
3664 spin_lock(&root->fs_info->free_chunk_lock); 3703 spin_lock(&root->fs_info->free_chunk_lock);
3665 avail = root->fs_info->free_chunk_space; 3704 avail = root->fs_info->free_chunk_space;
@@ -3674,27 +3713,38 @@ static int can_overcommit(struct btrfs_root *root,
3674 BTRFS_BLOCK_GROUP_RAID10)) 3713 BTRFS_BLOCK_GROUP_RAID10))
3675 avail >>= 1; 3714 avail >>= 1;
3676 3715
3716 to_add = space_info->total_bytes;
3717
3677 /* 3718 /*
3678 * If we aren't flushing all things, let us overcommit up to 3719 * If we aren't flushing all things, let us overcommit up to
3679 * 1/2th of the space. If we can flush, don't let us overcommit 3720 * 1/2th of the space. If we can flush, don't let us overcommit
3680 * too much, let it overcommit up to 1/8 of the space. 3721 * too much, let it overcommit up to 1/8 of the space.
3681 */ 3722 */
3682 if (flush == BTRFS_RESERVE_FLUSH_ALL) 3723 if (flush == BTRFS_RESERVE_FLUSH_ALL)
3683 avail >>= 3; 3724 to_add >>= 3;
3684 else 3725 else
3685 avail >>= 1; 3726 to_add >>= 1;
3686 3727
3687 if (used + bytes < space_info->total_bytes + avail) 3728 /*
3729 * Limit the overcommit to the amount of free space we could possibly
3730 * allocate for chunks.
3731 */
3732 to_add = min(avail, to_add);
3733
3734 if (used + bytes < space_info->total_bytes + to_add)
3688 return 1; 3735 return 1;
3689 return 0; 3736 return 0;
3690} 3737}
3691 3738
3692static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, 3739static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
3693 unsigned long nr_pages, 3740 unsigned long nr_pages,
3694 enum wb_reason reason) 3741 enum wb_reason reason)
3695{ 3742{
3696 if (!writeback_in_progress(sb->s_bdi) && 3743 /* the flusher is dealing with the dirty inodes now. */
3697 down_read_trylock(&sb->s_umount)) { 3744 if (writeback_in_progress(sb->s_bdi))
3745 return 1;
3746
3747 if (down_read_trylock(&sb->s_umount)) {
3698 writeback_inodes_sb_nr(sb, nr_pages, reason); 3748 writeback_inodes_sb_nr(sb, nr_pages, reason);
3699 up_read(&sb->s_umount); 3749 up_read(&sb->s_umount);
3700 return 1; 3750 return 1;
@@ -3703,6 +3753,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
3703 return 0; 3753 return 0;
3704} 3754}
3705 3755
3756void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3757 unsigned long nr_pages)
3758{
3759 struct super_block *sb = root->fs_info->sb;
3760 int started;
3761
3762 /* If we can not start writeback, just sync all the delalloc file. */
3763 started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
3764 WB_REASON_FS_FREE_SPACE);
3765 if (!started) {
3766 /*
3767 * We needn't worry the filesystem going from r/w to r/o though
3768 * we don't acquire ->s_umount mutex, because the filesystem
3769 * should guarantee the delalloc inodes list be empty after
3770 * the filesystem is readonly(all dirty pages are written to
3771 * the disk).
3772 */
3773 btrfs_start_delalloc_inodes(root, 0);
3774 btrfs_wait_ordered_extents(root, 0);
3775 }
3776}
3777
3706/* 3778/*
3707 * shrink metadata reservation for delalloc 3779 * shrink metadata reservation for delalloc
3708 */ 3780 */
@@ -3724,7 +3796,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3724 space_info = block_rsv->space_info; 3796 space_info = block_rsv->space_info;
3725 3797
3726 smp_mb(); 3798 smp_mb();
3727 delalloc_bytes = root->fs_info->delalloc_bytes; 3799 delalloc_bytes = percpu_counter_sum_positive(
3800 &root->fs_info->delalloc_bytes);
3728 if (delalloc_bytes == 0) { 3801 if (delalloc_bytes == 0) {
3729 if (trans) 3802 if (trans)
3730 return; 3803 return;
@@ -3735,10 +3808,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3735 while (delalloc_bytes && loops < 3) { 3808 while (delalloc_bytes && loops < 3) {
3736 max_reclaim = min(delalloc_bytes, to_reclaim); 3809 max_reclaim = min(delalloc_bytes, to_reclaim);
3737 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; 3810 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
3738 writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, 3811 btrfs_writeback_inodes_sb_nr(root, nr_pages);
3739 nr_pages,
3740 WB_REASON_FS_FREE_SPACE);
3741
3742 /* 3812 /*
3743 * We need to wait for the async pages to actually start before 3813 * We need to wait for the async pages to actually start before
3744 * we do anything. 3814 * we do anything.
@@ -3766,7 +3836,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3766 break; 3836 break;
3767 } 3837 }
3768 smp_mb(); 3838 smp_mb();
3769 delalloc_bytes = root->fs_info->delalloc_bytes; 3839 delalloc_bytes = percpu_counter_sum_positive(
3840 &root->fs_info->delalloc_bytes);
3770 } 3841 }
3771} 3842}
3772 3843
@@ -4030,6 +4101,15 @@ again:
4030 goto again; 4101 goto again;
4031 4102
4032out: 4103out:
4104 if (ret == -ENOSPC &&
4105 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4106 struct btrfs_block_rsv *global_rsv =
4107 &root->fs_info->global_block_rsv;
4108
4109 if (block_rsv != global_rsv &&
4110 !block_rsv_use_bytes(global_rsv, orig_bytes))
4111 ret = 0;
4112 }
4033 if (flushing) { 4113 if (flushing) {
4034 spin_lock(&space_info->lock); 4114 spin_lock(&space_info->lock);
4035 space_info->flush = 0; 4115 space_info->flush = 0;
@@ -4668,7 +4748,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4668 spin_lock(&BTRFS_I(inode)->lock); 4748 spin_lock(&BTRFS_I(inode)->lock);
4669 dropped = drop_outstanding_extent(inode); 4749 dropped = drop_outstanding_extent(inode);
4670 4750
4671 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4751 if (num_bytes)
4752 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4672 spin_unlock(&BTRFS_I(inode)->lock); 4753 spin_unlock(&BTRFS_I(inode)->lock);
4673 if (dropped > 0) 4754 if (dropped > 0)
4674 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4755 to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -4735,8 +4816,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
4735 btrfs_free_reserved_data_space(inode, num_bytes); 4816 btrfs_free_reserved_data_space(inode, num_bytes);
4736} 4817}
4737 4818
4738static int update_block_group(struct btrfs_trans_handle *trans, 4819static int update_block_group(struct btrfs_root *root,
4739 struct btrfs_root *root,
4740 u64 bytenr, u64 num_bytes, int alloc) 4820 u64 bytenr, u64 num_bytes, int alloc)
4741{ 4821{
4742 struct btrfs_block_group_cache *cache = NULL; 4822 struct btrfs_block_group_cache *cache = NULL;
@@ -4773,7 +4853,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4773 * space back to the block group, otherwise we will leak space. 4853 * space back to the block group, otherwise we will leak space.
4774 */ 4854 */
4775 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4855 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4776 cache_block_group(cache, trans, NULL, 1); 4856 cache_block_group(cache, 1);
4777 4857
4778 byte_in_group = bytenr - cache->key.objectid; 4858 byte_in_group = bytenr - cache->key.objectid;
4779 WARN_ON(byte_in_group > cache->key.offset); 4859 WARN_ON(byte_in_group > cache->key.offset);
@@ -4823,6 +4903,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
4823 struct btrfs_block_group_cache *cache; 4903 struct btrfs_block_group_cache *cache;
4824 u64 bytenr; 4904 u64 bytenr;
4825 4905
4906 spin_lock(&root->fs_info->block_group_cache_lock);
4907 bytenr = root->fs_info->first_logical_byte;
4908 spin_unlock(&root->fs_info->block_group_cache_lock);
4909
4910 if (bytenr < (u64)-1)
4911 return bytenr;
4912
4826 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 4913 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
4827 if (!cache) 4914 if (!cache)
4828 return 0; 4915 return 0;
@@ -4873,8 +4960,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
4873/* 4960/*
4874 * this function must be called within transaction 4961 * this function must be called within transaction
4875 */ 4962 */
4876int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, 4963int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
4877 struct btrfs_root *root,
4878 u64 bytenr, u64 num_bytes) 4964 u64 bytenr, u64 num_bytes)
4879{ 4965{
4880 struct btrfs_block_group_cache *cache; 4966 struct btrfs_block_group_cache *cache;
@@ -4888,7 +4974,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
4888 * to one because the slow code to read in the free extents does check 4974 * to one because the slow code to read in the free extents does check
4889 * the pinned extents. 4975 * the pinned extents.
4890 */ 4976 */
4891 cache_block_group(cache, trans, root, 1); 4977 cache_block_group(cache, 1);
4892 4978
4893 pin_down_extent(root, cache, bytenr, num_bytes, 0); 4979 pin_down_extent(root, cache, bytenr, num_bytes, 0);
4894 4980
@@ -5285,7 +5371,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5285 } 5371 }
5286 } 5372 }
5287 5373
5288 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 5374 ret = update_block_group(root, bytenr, num_bytes, 0);
5289 if (ret) { 5375 if (ret) {
5290 btrfs_abort_transaction(trans, extent_root, ret); 5376 btrfs_abort_transaction(trans, extent_root, ret);
5291 goto out; 5377 goto out;
@@ -5330,7 +5416,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5330 if (head->extent_op) { 5416 if (head->extent_op) {
5331 if (!head->must_insert_reserved) 5417 if (!head->must_insert_reserved)
5332 goto out; 5418 goto out;
5333 kfree(head->extent_op); 5419 btrfs_free_delayed_extent_op(head->extent_op);
5334 head->extent_op = NULL; 5420 head->extent_op = NULL;
5335 } 5421 }
5336 5422
@@ -5476,7 +5562,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
5476 u64 num_bytes) 5562 u64 num_bytes)
5477{ 5563{
5478 struct btrfs_caching_control *caching_ctl; 5564 struct btrfs_caching_control *caching_ctl;
5479 DEFINE_WAIT(wait);
5480 5565
5481 caching_ctl = get_caching_control(cache); 5566 caching_ctl = get_caching_control(cache);
5482 if (!caching_ctl) 5567 if (!caching_ctl)
@@ -5493,7 +5578,6 @@ static noinline int
5493wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 5578wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
5494{ 5579{
5495 struct btrfs_caching_control *caching_ctl; 5580 struct btrfs_caching_control *caching_ctl;
5496 DEFINE_WAIT(wait);
5497 5581
5498 caching_ctl = get_caching_control(cache); 5582 caching_ctl = get_caching_control(cache);
5499 if (!caching_ctl) 5583 if (!caching_ctl)
@@ -5507,20 +5591,16 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
5507 5591
5508int __get_raid_index(u64 flags) 5592int __get_raid_index(u64 flags)
5509{ 5593{
5510 int index;
5511
5512 if (flags & BTRFS_BLOCK_GROUP_RAID10) 5594 if (flags & BTRFS_BLOCK_GROUP_RAID10)
5513 index = 0; 5595 return BTRFS_RAID_RAID10;
5514 else if (flags & BTRFS_BLOCK_GROUP_RAID1) 5596 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
5515 index = 1; 5597 return BTRFS_RAID_RAID1;
5516 else if (flags & BTRFS_BLOCK_GROUP_DUP) 5598 else if (flags & BTRFS_BLOCK_GROUP_DUP)
5517 index = 2; 5599 return BTRFS_RAID_DUP;
5518 else if (flags & BTRFS_BLOCK_GROUP_RAID0) 5600 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
5519 index = 3; 5601 return BTRFS_RAID_RAID0;
5520 else 5602 else
5521 index = 4; 5603 return BTRFS_RAID_SINGLE;
5522
5523 return index;
5524} 5604}
5525 5605
5526static int get_block_group_index(struct btrfs_block_group_cache *cache) 5606static int get_block_group_index(struct btrfs_block_group_cache *cache)
@@ -5678,8 +5758,7 @@ have_block_group:
5678 cached = block_group_cache_done(block_group); 5758 cached = block_group_cache_done(block_group);
5679 if (unlikely(!cached)) { 5759 if (unlikely(!cached)) {
5680 found_uncached_bg = true; 5760 found_uncached_bg = true;
5681 ret = cache_block_group(block_group, trans, 5761 ret = cache_block_group(block_group, 0);
5682 orig_root, 0);
5683 BUG_ON(ret < 0); 5762 BUG_ON(ret < 0);
5684 ret = 0; 5763 ret = 0;
5685 } 5764 }
@@ -6108,7 +6187,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6108 btrfs_mark_buffer_dirty(path->nodes[0]); 6187 btrfs_mark_buffer_dirty(path->nodes[0]);
6109 btrfs_free_path(path); 6188 btrfs_free_path(path);
6110 6189
6111 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 6190 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6112 if (ret) { /* -ENOENT, logic error */ 6191 if (ret) { /* -ENOENT, logic error */
6113 printk(KERN_ERR "btrfs update block group failed for %llu " 6192 printk(KERN_ERR "btrfs update block group failed for %llu "
6114 "%llu\n", (unsigned long long)ins->objectid, 6193 "%llu\n", (unsigned long long)ins->objectid,
@@ -6172,7 +6251,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6172 btrfs_mark_buffer_dirty(leaf); 6251 btrfs_mark_buffer_dirty(leaf);
6173 btrfs_free_path(path); 6252 btrfs_free_path(path);
6174 6253
6175 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 6254 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6176 if (ret) { /* -ENOENT, logic error */ 6255 if (ret) { /* -ENOENT, logic error */
6177 printk(KERN_ERR "btrfs update block group failed for %llu " 6256 printk(KERN_ERR "btrfs update block group failed for %llu "
6178 "%llu\n", (unsigned long long)ins->objectid, 6257 "%llu\n", (unsigned long long)ins->objectid,
@@ -6215,7 +6294,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6215 u64 num_bytes = ins->offset; 6294 u64 num_bytes = ins->offset;
6216 6295
6217 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 6296 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
6218 cache_block_group(block_group, trans, NULL, 0); 6297 cache_block_group(block_group, 0);
6219 caching_ctl = get_caching_control(block_group); 6298 caching_ctl = get_caching_control(block_group);
6220 6299
6221 if (!caching_ctl) { 6300 if (!caching_ctl) {
@@ -6329,12 +6408,14 @@ use_block_rsv(struct btrfs_trans_handle *trans,
6329 if (!ret) 6408 if (!ret)
6330 return block_rsv; 6409 return block_rsv;
6331 if (ret && !block_rsv->failfast) { 6410 if (ret && !block_rsv->failfast) {
6332 static DEFINE_RATELIMIT_STATE(_rs, 6411 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6333 DEFAULT_RATELIMIT_INTERVAL, 6412 static DEFINE_RATELIMIT_STATE(_rs,
6334 /*DEFAULT_RATELIMIT_BURST*/ 2); 6413 DEFAULT_RATELIMIT_INTERVAL * 10,
6335 if (__ratelimit(&_rs)) 6414 /*DEFAULT_RATELIMIT_BURST*/ 1);
6336 WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", 6415 if (__ratelimit(&_rs))
6337 ret); 6416 WARN(1, KERN_DEBUG
6417 "btrfs: block rsv returned %d\n", ret);
6418 }
6338 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6419 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
6339 BTRFS_RESERVE_NO_FLUSH); 6420 BTRFS_RESERVE_NO_FLUSH);
6340 if (!ret) { 6421 if (!ret) {
@@ -6400,7 +6481,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6400 6481
6401 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 6482 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
6402 struct btrfs_delayed_extent_op *extent_op; 6483 struct btrfs_delayed_extent_op *extent_op;
6403 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 6484 extent_op = btrfs_alloc_delayed_extent_op();
6404 BUG_ON(!extent_op); /* -ENOMEM */ 6485 BUG_ON(!extent_op); /* -ENOMEM */
6405 if (key) 6486 if (key)
6406 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 6487 memcpy(&extent_op->key, key, sizeof(extent_op->key));
@@ -7481,16 +7562,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7481 index = get_block_group_index(block_group); 7562 index = get_block_group_index(block_group);
7482 } 7563 }
7483 7564
7484 if (index == 0) { 7565 if (index == BTRFS_RAID_RAID10) {
7485 dev_min = 4; 7566 dev_min = 4;
7486 /* Divide by 2 */ 7567 /* Divide by 2 */
7487 min_free >>= 1; 7568 min_free >>= 1;
7488 } else if (index == 1) { 7569 } else if (index == BTRFS_RAID_RAID1) {
7489 dev_min = 2; 7570 dev_min = 2;
7490 } else if (index == 2) { 7571 } else if (index == BTRFS_RAID_DUP) {
7491 /* Multiply by 2 */ 7572 /* Multiply by 2 */
7492 min_free <<= 1; 7573 min_free <<= 1;
7493 } else if (index == 3) { 7574 } else if (index == BTRFS_RAID_RAID0) {
7494 dev_min = fs_devices->rw_devices; 7575 dev_min = fs_devices->rw_devices;
7495 do_div(min_free, dev_min); 7576 do_div(min_free, dev_min);
7496 } 7577 }
@@ -7651,11 +7732,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7651 space_info = list_entry(info->space_info.next, 7732 space_info = list_entry(info->space_info.next,
7652 struct btrfs_space_info, 7733 struct btrfs_space_info,
7653 list); 7734 list);
7654 if (space_info->bytes_pinned > 0 || 7735 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
7655 space_info->bytes_reserved > 0 || 7736 if (space_info->bytes_pinned > 0 ||
7656 space_info->bytes_may_use > 0) { 7737 space_info->bytes_reserved > 0 ||
7657 WARN_ON(1); 7738 space_info->bytes_may_use > 0) {
7658 dump_space_info(space_info, 0, 0); 7739 WARN_ON(1);
7740 dump_space_info(space_info, 0, 0);
7741 }
7659 } 7742 }
7660 list_del(&space_info->list); 7743 list_del(&space_info->list);
7661 kfree(space_info); 7744 kfree(space_info);
@@ -7932,12 +8015,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
7932 u64 extra_flags = chunk_to_extended(flags) & 8015 u64 extra_flags = chunk_to_extended(flags) &
7933 BTRFS_EXTENDED_PROFILE_MASK; 8016 BTRFS_EXTENDED_PROFILE_MASK;
7934 8017
8018 write_seqlock(&fs_info->profiles_lock);
7935 if (flags & BTRFS_BLOCK_GROUP_DATA) 8019 if (flags & BTRFS_BLOCK_GROUP_DATA)
7936 fs_info->avail_data_alloc_bits &= ~extra_flags; 8020 fs_info->avail_data_alloc_bits &= ~extra_flags;
7937 if (flags & BTRFS_BLOCK_GROUP_METADATA) 8021 if (flags & BTRFS_BLOCK_GROUP_METADATA)
7938 fs_info->avail_metadata_alloc_bits &= ~extra_flags; 8022 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
7939 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 8023 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
7940 fs_info->avail_system_alloc_bits &= ~extra_flags; 8024 fs_info->avail_system_alloc_bits &= ~extra_flags;
8025 write_sequnlock(&fs_info->profiles_lock);
7941} 8026}
7942 8027
7943int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 8028int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
@@ -8036,6 +8121,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8036 spin_lock(&root->fs_info->block_group_cache_lock); 8121 spin_lock(&root->fs_info->block_group_cache_lock);
8037 rb_erase(&block_group->cache_node, 8122 rb_erase(&block_group->cache_node,
8038 &root->fs_info->block_group_cache_tree); 8123 &root->fs_info->block_group_cache_tree);
8124
8125 if (root->fs_info->first_logical_byte == block_group->key.objectid)
8126 root->fs_info->first_logical_byte = (u64)-1;
8039 spin_unlock(&root->fs_info->block_group_cache_lock); 8127 spin_unlock(&root->fs_info->block_group_cache_lock);
8040 8128
8041 down_write(&block_group->space_info->groups_sem); 8129 down_write(&block_group->space_info->groups_sem);
@@ -8158,7 +8246,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
8158 8246
8159 if (end - start >= range->minlen) { 8247 if (end - start >= range->minlen) {
8160 if (!block_group_cache_done(cache)) { 8248 if (!block_group_cache_done(cache)) {
8161 ret = cache_block_group(cache, NULL, root, 0); 8249 ret = cache_block_group(cache, 0);
8162 if (!ret) 8250 if (!ret)
8163 wait_block_group_cache_done(cache); 8251 wait_block_group_cache_done(cache);
8164 } 8252 }