aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c285
1 files changed, 153 insertions, 132 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 71cd456fdb6..4d08ed79405 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
320 return total_added; 320 return total_added;
321} 321}
322 322
323static int caching_kthread(void *data) 323static noinline void caching_thread(struct btrfs_work *work)
324{ 324{
325 struct btrfs_block_group_cache *block_group = data; 325 struct btrfs_block_group_cache *block_group;
326 struct btrfs_fs_info *fs_info = block_group->fs_info; 326 struct btrfs_fs_info *fs_info;
327 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 327 struct btrfs_caching_control *caching_ctl;
328 struct btrfs_root *extent_root = fs_info->extent_root; 328 struct btrfs_root *extent_root;
329 struct btrfs_path *path; 329 struct btrfs_path *path;
330 struct extent_buffer *leaf; 330 struct extent_buffer *leaf;
331 struct btrfs_key key; 331 struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
334 u32 nritems; 334 u32 nritems;
335 int ret = 0; 335 int ret = 0;
336 336
337 caching_ctl = container_of(work, struct btrfs_caching_control, work);
338 block_group = caching_ctl->block_group;
339 fs_info = block_group->fs_info;
340 extent_root = fs_info->extent_root;
341
337 path = btrfs_alloc_path(); 342 path = btrfs_alloc_path();
338 if (!path) 343 if (!path)
339 return -ENOMEM; 344 goto out;
340 345
341 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 346 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
342 347
@@ -433,13 +438,11 @@ err:
433 free_excluded_extents(extent_root, block_group); 438 free_excluded_extents(extent_root, block_group);
434 439
435 mutex_unlock(&caching_ctl->mutex); 440 mutex_unlock(&caching_ctl->mutex);
441out:
436 wake_up(&caching_ctl->wait); 442 wake_up(&caching_ctl->wait);
437 443
438 put_caching_control(caching_ctl); 444 put_caching_control(caching_ctl);
439 atomic_dec(&block_group->space_info->caching_threads);
440 btrfs_put_block_group(block_group); 445 btrfs_put_block_group(block_group);
441
442 return 0;
443} 446}
444 447
445static int cache_block_group(struct btrfs_block_group_cache *cache, 448static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
449{ 452{
450 struct btrfs_fs_info *fs_info = cache->fs_info; 453 struct btrfs_fs_info *fs_info = cache->fs_info;
451 struct btrfs_caching_control *caching_ctl; 454 struct btrfs_caching_control *caching_ctl;
452 struct task_struct *tsk;
453 int ret = 0; 455 int ret = 0;
454 456
455 smp_mb(); 457 smp_mb();
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
501 caching_ctl->progress = cache->key.objectid; 503 caching_ctl->progress = cache->key.objectid;
502 /* one for caching kthread, one for caching block group list */ 504 /* one for caching kthread, one for caching block group list */
503 atomic_set(&caching_ctl->count, 2); 505 atomic_set(&caching_ctl->count, 2);
506 caching_ctl->work.func = caching_thread;
504 507
505 spin_lock(&cache->lock); 508 spin_lock(&cache->lock);
506 if (cache->cached != BTRFS_CACHE_NO) { 509 if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
516 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 519 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
517 up_write(&fs_info->extent_commit_sem); 520 up_write(&fs_info->extent_commit_sem);
518 521
519 atomic_inc(&cache->space_info->caching_threads);
520 btrfs_get_block_group(cache); 522 btrfs_get_block_group(cache);
521 523
522 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 524 btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
523 cache->key.objectid);
524 if (IS_ERR(tsk)) {
525 ret = PTR_ERR(tsk);
526 printk(KERN_ERR "error running thread %d\n", ret);
527 BUG();
528 }
529 525
530 return ret; 526 return ret;
531} 527}
@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2932 found->full = 0; 2928 found->full = 0;
2933 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 2929 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
2934 found->chunk_alloc = 0; 2930 found->chunk_alloc = 0;
2931 found->flush = 0;
2932 init_waitqueue_head(&found->wait);
2935 *space_info = found; 2933 *space_info = found;
2936 list_add_rcu(&found->list, &info->space_info); 2934 list_add_rcu(&found->list, &info->space_info);
2937 atomic_set(&found->caching_threads, 0);
2938 return 0; 2935 return 0;
2939} 2936}
2940 2937
@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3314 if (reserved == 0) 3311 if (reserved == 0)
3315 return 0; 3312 return 0;
3316 3313
3314 smp_mb();
3315 if (root->fs_info->delalloc_bytes == 0) {
3316 if (trans)
3317 return 0;
3318 btrfs_wait_ordered_extents(root, 0, 0);
3319 return 0;
3320 }
3321
3317 max_reclaim = min(reserved, to_reclaim); 3322 max_reclaim = min(reserved, to_reclaim);
3318 3323
3319 while (loops < 1024) { 3324 while (loops < 1024) {
@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3356 } 3361 }
3357 3362
3358 } 3363 }
3364 if (reclaimed >= to_reclaim && !trans)
3365 btrfs_wait_ordered_extents(root, 0, 0);
3359 return reclaimed >= to_reclaim; 3366 return reclaimed >= to_reclaim;
3360} 3367}
3361 3368
@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3380 u64 num_bytes = orig_bytes; 3387 u64 num_bytes = orig_bytes;
3381 int retries = 0; 3388 int retries = 0;
3382 int ret = 0; 3389 int ret = 0;
3383 bool reserved = false;
3384 bool committed = false; 3390 bool committed = false;
3391 bool flushing = false;
3385 3392
3386again: 3393again:
3387 ret = -ENOSPC; 3394 ret = 0;
3388 if (reserved)
3389 num_bytes = 0;
3390
3391 spin_lock(&space_info->lock); 3395 spin_lock(&space_info->lock);
3396 /*
3397 * We only want to wait if somebody other than us is flushing and we are
3398 * actually alloed to flush.
3399 */
3400 while (flush && !flushing && space_info->flush) {
3401 spin_unlock(&space_info->lock);
3402 /*
3403 * If we have a trans handle we can't wait because the flusher
3404 * may have to commit the transaction, which would mean we would
3405 * deadlock since we are waiting for the flusher to finish, but
3406 * hold the current transaction open.
3407 */
3408 if (trans)
3409 return -EAGAIN;
3410 ret = wait_event_interruptible(space_info->wait,
3411 !space_info->flush);
3412 /* Must have been interrupted, return */
3413 if (ret)
3414 return -EINTR;
3415
3416 spin_lock(&space_info->lock);
3417 }
3418
3419 ret = -ENOSPC;
3392 unused = space_info->bytes_used + space_info->bytes_reserved + 3420 unused = space_info->bytes_used + space_info->bytes_reserved +
3393 space_info->bytes_pinned + space_info->bytes_readonly + 3421 space_info->bytes_pinned + space_info->bytes_readonly +
3394 space_info->bytes_may_use; 3422 space_info->bytes_may_use;
@@ -3403,8 +3431,7 @@ again:
3403 if (unused <= space_info->total_bytes) { 3431 if (unused <= space_info->total_bytes) {
3404 unused = space_info->total_bytes - unused; 3432 unused = space_info->total_bytes - unused;
3405 if (unused >= num_bytes) { 3433 if (unused >= num_bytes) {
3406 if (!reserved) 3434 space_info->bytes_reserved += orig_bytes;
3407 space_info->bytes_reserved += orig_bytes;
3408 ret = 0; 3435 ret = 0;
3409 } else { 3436 } else {
3410 /* 3437 /*
@@ -3429,17 +3456,14 @@ again:
3429 * to reclaim space we can actually use it instead of somebody else 3456 * to reclaim space we can actually use it instead of somebody else
3430 * stealing it from us. 3457 * stealing it from us.
3431 */ 3458 */
3432 if (ret && !reserved) { 3459 if (ret && flush) {
3433 space_info->bytes_reserved += orig_bytes; 3460 flushing = true;
3434 reserved = true; 3461 space_info->flush = 1;
3435 } 3462 }
3436 3463
3437 spin_unlock(&space_info->lock); 3464 spin_unlock(&space_info->lock);
3438 3465
3439 if (!ret) 3466 if (!ret || !flush)
3440 return 0;
3441
3442 if (!flush)
3443 goto out; 3467 goto out;
3444 3468
3445 /* 3469 /*
@@ -3447,11 +3471,11 @@ again:
3447 * metadata until after the IO is completed. 3471 * metadata until after the IO is completed.
3448 */ 3472 */
3449 ret = shrink_delalloc(trans, root, num_bytes, 1); 3473 ret = shrink_delalloc(trans, root, num_bytes, 1);
3450 if (ret > 0) 3474 if (ret < 0)
3451 return 0;
3452 else if (ret < 0)
3453 goto out; 3475 goto out;
3454 3476
3477 ret = 0;
3478
3455 /* 3479 /*
3456 * So if we were overcommitted it's possible that somebody else flushed 3480 * So if we were overcommitted it's possible that somebody else flushed
3457 * out enough space and we simply didn't have enough space to reclaim, 3481 * out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3486,11 @@ again:
3462 goto again; 3486 goto again;
3463 } 3487 }
3464 3488
3465 spin_lock(&space_info->lock);
3466 /* 3489 /*
3467 * Not enough space to be reclaimed, don't bother committing the 3490 * Not enough space to be reclaimed, don't bother committing the
3468 * transaction. 3491 * transaction.
3469 */ 3492 */
3493 spin_lock(&space_info->lock);
3470 if (space_info->bytes_pinned < orig_bytes) 3494 if (space_info->bytes_pinned < orig_bytes)
3471 ret = -ENOSPC; 3495 ret = -ENOSPC;
3472 spin_unlock(&space_info->lock); 3496 spin_unlock(&space_info->lock);
@@ -3474,10 +3498,13 @@ again:
3474 goto out; 3498 goto out;
3475 3499
3476 ret = -EAGAIN; 3500 ret = -EAGAIN;
3477 if (trans || committed) 3501 if (trans)
3478 goto out; 3502 goto out;
3479 3503
3480 ret = -ENOSPC; 3504 ret = -ENOSPC;
3505 if (committed)
3506 goto out;
3507
3481 trans = btrfs_join_transaction(root); 3508 trans = btrfs_join_transaction(root);
3482 if (IS_ERR(trans)) 3509 if (IS_ERR(trans))
3483 goto out; 3510 goto out;
@@ -3489,12 +3516,12 @@ again:
3489 } 3516 }
3490 3517
3491out: 3518out:
3492 if (reserved) { 3519 if (flushing) {
3493 spin_lock(&space_info->lock); 3520 spin_lock(&space_info->lock);
3494 space_info->bytes_reserved -= orig_bytes; 3521 space_info->flush = 0;
3522 wake_up_all(&space_info->wait);
3495 spin_unlock(&space_info->lock); 3523 spin_unlock(&space_info->lock);
3496 } 3524 }
3497
3498 return ret; 3525 return ret;
3499} 3526}
3500 3527
@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3704 if (commit_trans) { 3731 if (commit_trans) {
3705 if (trans) 3732 if (trans)
3706 return -EAGAIN; 3733 return -EAGAIN;
3707
3708 trans = btrfs_join_transaction(root); 3734 trans = btrfs_join_transaction(root);
3709 BUG_ON(IS_ERR(trans)); 3735 BUG_ON(IS_ERR(trans));
3710 ret = btrfs_commit_transaction(trans, root); 3736 ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3874 return 0; 3900 return 0;
3875} 3901}
3876 3902
3877int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3878 struct btrfs_root *root,
3879 int num_items)
3880{
3881 u64 num_bytes;
3882 int ret;
3883
3884 if (num_items == 0 || root->fs_info->chunk_root == root)
3885 return 0;
3886
3887 num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
3888 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3889 num_bytes);
3890 if (!ret) {
3891 trans->bytes_reserved += num_bytes;
3892 trans->block_rsv = &root->fs_info->trans_block_rsv;
3893 }
3894 return ret;
3895}
3896
3897void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3903void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3898 struct btrfs_root *root) 3904 struct btrfs_root *root)
3899{ 3905{
@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3944 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3950 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3945} 3951}
3946 3952
3953static unsigned drop_outstanding_extent(struct inode *inode)
3954{
3955 unsigned dropped_extents = 0;
3956
3957 spin_lock(&BTRFS_I(inode)->lock);
3958 BUG_ON(!BTRFS_I(inode)->outstanding_extents);
3959 BTRFS_I(inode)->outstanding_extents--;
3960
3961 /*
3962 * If we have more or the same amount of outsanding extents than we have
3963 * reserved then we need to leave the reserved extents count alone.
3964 */
3965 if (BTRFS_I(inode)->outstanding_extents >=
3966 BTRFS_I(inode)->reserved_extents)
3967 goto out;
3968
3969 dropped_extents = BTRFS_I(inode)->reserved_extents -
3970 BTRFS_I(inode)->outstanding_extents;
3971 BTRFS_I(inode)->reserved_extents -= dropped_extents;
3972out:
3973 spin_unlock(&BTRFS_I(inode)->lock);
3974 return dropped_extents;
3975}
3976
3947static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3977static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
3948{ 3978{
3949 return num_bytes >>= 3; 3979 return num_bytes >>= 3;
@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3953{ 3983{
3954 struct btrfs_root *root = BTRFS_I(inode)->root; 3984 struct btrfs_root *root = BTRFS_I(inode)->root;
3955 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3985 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3956 u64 to_reserve; 3986 u64 to_reserve = 0;
3957 int nr_extents; 3987 unsigned nr_extents = 0;
3958 int reserved_extents;
3959 int ret; 3988 int ret;
3960 3989
3961 if (btrfs_transaction_in_commit(root->fs_info)) 3990 if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3963 3992
3964 num_bytes = ALIGN(num_bytes, root->sectorsize); 3993 num_bytes = ALIGN(num_bytes, root->sectorsize);
3965 3994
3966 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3995 spin_lock(&BTRFS_I(inode)->lock);
3967 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 3996 BTRFS_I(inode)->outstanding_extents++;
3997
3998 if (BTRFS_I(inode)->outstanding_extents >
3999 BTRFS_I(inode)->reserved_extents) {
4000 nr_extents = BTRFS_I(inode)->outstanding_extents -
4001 BTRFS_I(inode)->reserved_extents;
4002 BTRFS_I(inode)->reserved_extents += nr_extents;
3968 4003
3969 if (nr_extents > reserved_extents) {
3970 nr_extents -= reserved_extents;
3971 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4004 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
3972 } else {
3973 nr_extents = 0;
3974 to_reserve = 0;
3975 } 4005 }
4006 spin_unlock(&BTRFS_I(inode)->lock);
3976 4007
3977 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4008 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4009 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3979 if (ret) 4010 if (ret) {
4011 unsigned dropped;
4012 /*
4013 * We don't need the return value since our reservation failed,
4014 * we just need to clean up our counter.
4015 */
4016 dropped = drop_outstanding_extent(inode);
4017 WARN_ON(dropped > 1);
3980 return ret; 4018 return ret;
3981 4019 }
3982 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
3983 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3984 4020
3985 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4021 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3986 4022
3987 if (block_rsv->size > 512 * 1024 * 1024)
3988 shrink_delalloc(NULL, root, to_reserve, 0);
3989
3990 return 0; 4023 return 0;
3991} 4024}
3992 4025
3993void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 4026void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3994{ 4027{
3995 struct btrfs_root *root = BTRFS_I(inode)->root; 4028 struct btrfs_root *root = BTRFS_I(inode)->root;
3996 u64 to_free; 4029 u64 to_free = 0;
3997 int nr_extents; 4030 unsigned dropped;
3998 int reserved_extents;
3999 4031
4000 num_bytes = ALIGN(num_bytes, root->sectorsize); 4032 num_bytes = ALIGN(num_bytes, root->sectorsize);
4001 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4033 dropped = drop_outstanding_extent(inode);
4002 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4003
4004 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4005 do {
4006 int old, new;
4007
4008 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
4009 if (nr_extents >= reserved_extents) {
4010 nr_extents = 0;
4011 break;
4012 }
4013 old = reserved_extents;
4014 nr_extents = reserved_extents - nr_extents;
4015 new = reserved_extents - nr_extents;
4016 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4017 reserved_extents, new);
4018 if (likely(old == reserved_extents))
4019 break;
4020 reserved_extents = old;
4021 } while (1);
4022 4034
4023 to_free = calc_csum_metadata_size(inode, num_bytes); 4035 to_free = calc_csum_metadata_size(inode, num_bytes);
4024 if (nr_extents > 0) 4036 if (dropped > 0)
4025 to_free += btrfs_calc_trans_metadata_size(root, nr_extents); 4037 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4026 4038
4027 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4039 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4028 to_free); 4040 to_free);
@@ -4990,14 +5002,10 @@ have_block_group:
4990 } 5002 }
4991 5003
4992 /* 5004 /*
4993 * We only want to start kthread caching if we are at 5005 * The caching workers are limited to 2 threads, so we
4994 * the point where we will wait for caching to make 5006 * can queue as much work as we care to.
4995 * progress, or if our ideal search is over and we've
4996 * found somebody to start caching.
4997 */ 5007 */
4998 if (loop > LOOP_CACHING_NOWAIT || 5008 if (loop > LOOP_FIND_IDEAL) {
4999 (loop > LOOP_FIND_IDEAL &&
5000 atomic_read(&space_info->caching_threads) < 2)) {
5001 ret = cache_block_group(block_group, trans, 5009 ret = cache_block_group(block_group, trans,
5002 orig_root, 0); 5010 orig_root, 0);
5003 BUG_ON(ret); 5011 BUG_ON(ret);
@@ -5219,8 +5227,7 @@ loop:
5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5227 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5220 found_uncached_bg = false; 5228 found_uncached_bg = false;
5221 loop++; 5229 loop++;
5222 if (!ideal_cache_percent && 5230 if (!ideal_cache_percent)
5223 atomic_read(&space_info->caching_threads))
5224 goto search; 5231 goto search;
5225 5232
5226 /* 5233 /*
@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
5623 if (!buf) 5630 if (!buf)
5624 return ERR_PTR(-ENOMEM); 5631 return ERR_PTR(-ENOMEM);
5625 btrfs_set_header_generation(buf, trans->transid); 5632 btrfs_set_header_generation(buf, trans->transid);
5626 btrfs_set_buffer_lockdep_class(buf, level); 5633 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
5627 btrfs_tree_lock(buf); 5634 btrfs_tree_lock(buf);
5628 clean_tree_block(trans, root, buf); 5635 clean_tree_block(trans, root, buf);
5629 5636
@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5910 return 1; 5917 return 1;
5911 5918
5912 if (path->locks[level] && !wc->keep_locks) { 5919 if (path->locks[level] && !wc->keep_locks) {
5913 btrfs_tree_unlock(eb); 5920 btrfs_tree_unlock_rw(eb, path->locks[level]);
5914 path->locks[level] = 0; 5921 path->locks[level] = 0;
5915 } 5922 }
5916 return 0; 5923 return 0;
@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5934 * keep the tree lock 5941 * keep the tree lock
5935 */ 5942 */
5936 if (path->locks[level] && level > 0) { 5943 if (path->locks[level] && level > 0) {
5937 btrfs_tree_unlock(eb); 5944 btrfs_tree_unlock_rw(eb, path->locks[level]);
5938 path->locks[level] = 0; 5945 path->locks[level] = 0;
5939 } 5946 }
5940 return 0; 5947 return 0;
@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6047 BUG_ON(level != btrfs_header_level(next)); 6054 BUG_ON(level != btrfs_header_level(next));
6048 path->nodes[level] = next; 6055 path->nodes[level] = next;
6049 path->slots[level] = 0; 6056 path->slots[level] = 0;
6050 path->locks[level] = 1; 6057 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6051 wc->level = level; 6058 wc->level = level;
6052 if (wc->level == 1) 6059 if (wc->level == 1)
6053 wc->reada_slot = 0; 6060 wc->reada_slot = 0;
@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6118 BUG_ON(level == 0); 6125 BUG_ON(level == 0);
6119 btrfs_tree_lock(eb); 6126 btrfs_tree_lock(eb);
6120 btrfs_set_lock_blocking(eb); 6127 btrfs_set_lock_blocking(eb);
6121 path->locks[level] = 1; 6128 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6122 6129
6123 ret = btrfs_lookup_extent_info(trans, root, 6130 ret = btrfs_lookup_extent_info(trans, root,
6124 eb->start, eb->len, 6131 eb->start, eb->len,
@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6127 BUG_ON(ret); 6134 BUG_ON(ret);
6128 BUG_ON(wc->refs[level] == 0); 6135 BUG_ON(wc->refs[level] == 0);
6129 if (wc->refs[level] == 1) { 6136 if (wc->refs[level] == 1) {
6130 btrfs_tree_unlock(eb); 6137 btrfs_tree_unlock_rw(eb, path->locks[level]);
6131 path->locks[level] = 0;
6132 return 1; 6138 return 1;
6133 } 6139 }
6134 } 6140 }
@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6150 btrfs_header_generation(eb) == trans->transid) { 6156 btrfs_header_generation(eb) == trans->transid) {
6151 btrfs_tree_lock(eb); 6157 btrfs_tree_lock(eb);
6152 btrfs_set_lock_blocking(eb); 6158 btrfs_set_lock_blocking(eb);
6153 path->locks[level] = 1; 6159 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6154 } 6160 }
6155 clean_tree_block(trans, root, eb); 6161 clean_tree_block(trans, root, eb);
6156 } 6162 }
@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6229 return 0; 6235 return 0;
6230 6236
6231 if (path->locks[level]) { 6237 if (path->locks[level]) {
6232 btrfs_tree_unlock(path->nodes[level]); 6238 btrfs_tree_unlock_rw(path->nodes[level],
6239 path->locks[level]);
6233 path->locks[level] = 0; 6240 path->locks[level] = 0;
6234 } 6241 }
6235 free_extent_buffer(path->nodes[level]); 6242 free_extent_buffer(path->nodes[level]);
@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6281 path->nodes[level] = btrfs_lock_root_node(root); 6288 path->nodes[level] = btrfs_lock_root_node(root);
6282 btrfs_set_lock_blocking(path->nodes[level]); 6289 btrfs_set_lock_blocking(path->nodes[level]);
6283 path->slots[level] = 0; 6290 path->slots[level] = 0;
6284 path->locks[level] = 1; 6291 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6285 memset(&wc->update_progress, 0, 6292 memset(&wc->update_progress, 0,
6286 sizeof(wc->update_progress)); 6293 sizeof(wc->update_progress));
6287 } else { 6294 } else {
@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6449 level = btrfs_header_level(node); 6456 level = btrfs_header_level(node);
6450 path->nodes[level] = node; 6457 path->nodes[level] = node;
6451 path->slots[level] = 0; 6458 path->slots[level] = 0;
6452 path->locks[level] = 1; 6459 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6453 6460
6454 wc->refs[parent_level] = 1; 6461 wc->refs[parent_level] = 1;
6455 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6462 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
6524 return flags; 6531 return flags;
6525} 6532}
6526 6533
6527static int set_block_group_ro(struct btrfs_block_group_cache *cache) 6534static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6528{ 6535{
6529 struct btrfs_space_info *sinfo = cache->space_info; 6536 struct btrfs_space_info *sinfo = cache->space_info;
6530 u64 num_bytes; 6537 u64 num_bytes;
6538 u64 min_allocable_bytes;
6531 int ret = -ENOSPC; 6539 int ret = -ENOSPC;
6532 6540
6533 if (cache->ro) 6541 if (cache->ro)
6534 return 0; 6542 return 0;
6535 6543
6544 /*
6545 * We need some metadata space and system metadata space for
6546 * allocating chunks in some corner cases until we force to set
6547 * it to be readonly.
6548 */
6549 if ((sinfo->flags &
6550 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
6551 !force)
6552 min_allocable_bytes = 1 * 1024 * 1024;
6553 else
6554 min_allocable_bytes = 0;
6555
6536 spin_lock(&sinfo->lock); 6556 spin_lock(&sinfo->lock);
6537 spin_lock(&cache->lock); 6557 spin_lock(&cache->lock);
6538 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6558 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache)
6540 6560
6541 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6561 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
6542 sinfo->bytes_may_use + sinfo->bytes_readonly + 6562 sinfo->bytes_may_use + sinfo->bytes_readonly +
6543 cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { 6563 cache->reserved_pinned + num_bytes + min_allocable_bytes <=
6564 sinfo->total_bytes) {
6544 sinfo->bytes_readonly += num_bytes; 6565 sinfo->bytes_readonly += num_bytes;
6545 sinfo->bytes_reserved += cache->reserved_pinned; 6566 sinfo->bytes_reserved += cache->reserved_pinned;
6546 cache->reserved_pinned = 0; 6567 cache->reserved_pinned = 0;
@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6571 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6592 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
6572 CHUNK_ALLOC_FORCE); 6593 CHUNK_ALLOC_FORCE);
6573 6594
6574 ret = set_block_group_ro(cache); 6595 ret = set_block_group_ro(cache, 0);
6575 if (!ret) 6596 if (!ret)
6576 goto out; 6597 goto out;
6577 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6598 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6579 CHUNK_ALLOC_FORCE); 6600 CHUNK_ALLOC_FORCE);
6580 if (ret < 0) 6601 if (ret < 0)
6581 goto out; 6602 goto out;
6582 ret = set_block_group_ro(cache); 6603 ret = set_block_group_ro(cache, 0);
6583out: 6604out:
6584 btrfs_end_transaction(trans, root); 6605 btrfs_end_transaction(trans, root);
6585 return ret; 6606 return ret;
@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7016 7037
7017 set_avail_alloc_bits(root->fs_info, cache->flags); 7038 set_avail_alloc_bits(root->fs_info, cache->flags);
7018 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7039 if (btrfs_chunk_readonly(root, cache->key.objectid))
7019 set_block_group_ro(cache); 7040 set_block_group_ro(cache, 1);
7020 } 7041 }
7021 7042
7022 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7043 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7030 * mirrored block groups. 7051 * mirrored block groups.
7031 */ 7052 */
7032 list_for_each_entry(cache, &space_info->block_groups[3], list) 7053 list_for_each_entry(cache, &space_info->block_groups[3], list)
7033 set_block_group_ro(cache); 7054 set_block_group_ro(cache, 1);
7034 list_for_each_entry(cache, &space_info->block_groups[4], list) 7055 list_for_each_entry(cache, &space_info->block_groups[4], list)
7035 set_block_group_ro(cache); 7056 set_block_group_ro(cache, 1);
7036 } 7057 }
7037 7058
7038 init_global_block_rsv(info); 7059 init_global_block_rsv(info);