diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 285 |
1 files changed, 153 insertions, 132 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456fdb6..4d08ed79405 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
320 | return total_added; | 320 | return total_added; |
321 | } | 321 | } |
322 | 322 | ||
323 | static int caching_kthread(void *data) | 323 | static noinline void caching_thread(struct btrfs_work *work) |
324 | { | 324 | { |
325 | struct btrfs_block_group_cache *block_group = data; | 325 | struct btrfs_block_group_cache *block_group; |
326 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 326 | struct btrfs_fs_info *fs_info; |
327 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; | 327 | struct btrfs_caching_control *caching_ctl; |
328 | struct btrfs_root *extent_root = fs_info->extent_root; | 328 | struct btrfs_root *extent_root; |
329 | struct btrfs_path *path; | 329 | struct btrfs_path *path; |
330 | struct extent_buffer *leaf; | 330 | struct extent_buffer *leaf; |
331 | struct btrfs_key key; | 331 | struct btrfs_key key; |
@@ -334,9 +334,14 @@ static int caching_kthread(void *data) | |||
334 | u32 nritems; | 334 | u32 nritems; |
335 | int ret = 0; | 335 | int ret = 0; |
336 | 336 | ||
337 | caching_ctl = container_of(work, struct btrfs_caching_control, work); | ||
338 | block_group = caching_ctl->block_group; | ||
339 | fs_info = block_group->fs_info; | ||
340 | extent_root = fs_info->extent_root; | ||
341 | |||
337 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
338 | if (!path) | 343 | if (!path) |
339 | return -ENOMEM; | 344 | goto out; |
340 | 345 | ||
341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 346 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
342 | 347 | ||
@@ -433,13 +438,11 @@ err: | |||
433 | free_excluded_extents(extent_root, block_group); | 438 | free_excluded_extents(extent_root, block_group); |
434 | 439 | ||
435 | mutex_unlock(&caching_ctl->mutex); | 440 | mutex_unlock(&caching_ctl->mutex); |
441 | out: | ||
436 | wake_up(&caching_ctl->wait); | 442 | wake_up(&caching_ctl->wait); |
437 | 443 | ||
438 | put_caching_control(caching_ctl); | 444 | put_caching_control(caching_ctl); |
439 | atomic_dec(&block_group->space_info->caching_threads); | ||
440 | btrfs_put_block_group(block_group); | 445 | btrfs_put_block_group(block_group); |
441 | |||
442 | return 0; | ||
443 | } | 446 | } |
444 | 447 | ||
445 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 448 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
449 | { | 452 | { |
450 | struct btrfs_fs_info *fs_info = cache->fs_info; | 453 | struct btrfs_fs_info *fs_info = cache->fs_info; |
451 | struct btrfs_caching_control *caching_ctl; | 454 | struct btrfs_caching_control *caching_ctl; |
452 | struct task_struct *tsk; | ||
453 | int ret = 0; | 455 | int ret = 0; |
454 | 456 | ||
455 | smp_mb(); | 457 | smp_mb(); |
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
501 | caching_ctl->progress = cache->key.objectid; | 503 | caching_ctl->progress = cache->key.objectid; |
502 | /* one for caching kthread, one for caching block group list */ | 504 | /* one for caching kthread, one for caching block group list */ |
503 | atomic_set(&caching_ctl->count, 2); | 505 | atomic_set(&caching_ctl->count, 2); |
506 | caching_ctl->work.func = caching_thread; | ||
504 | 507 | ||
505 | spin_lock(&cache->lock); | 508 | spin_lock(&cache->lock); |
506 | if (cache->cached != BTRFS_CACHE_NO) { | 509 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
516 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 519 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
517 | up_write(&fs_info->extent_commit_sem); | 520 | up_write(&fs_info->extent_commit_sem); |
518 | 521 | ||
519 | atomic_inc(&cache->space_info->caching_threads); | ||
520 | btrfs_get_block_group(cache); | 522 | btrfs_get_block_group(cache); |
521 | 523 | ||
522 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 524 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); |
523 | cache->key.objectid); | ||
524 | if (IS_ERR(tsk)) { | ||
525 | ret = PTR_ERR(tsk); | ||
526 | printk(KERN_ERR "error running thread %d\n", ret); | ||
527 | BUG(); | ||
528 | } | ||
529 | 525 | ||
530 | return ret; | 526 | return ret; |
531 | } | 527 | } |
@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2932 | found->full = 0; | 2928 | found->full = 0; |
2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | 2929 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | 2930 | found->chunk_alloc = 0; |
2931 | found->flush = 0; | ||
2932 | init_waitqueue_head(&found->wait); | ||
2935 | *space_info = found; | 2933 | *space_info = found; |
2936 | list_add_rcu(&found->list, &info->space_info); | 2934 | list_add_rcu(&found->list, &info->space_info); |
2937 | atomic_set(&found->caching_threads, 0); | ||
2938 | return 0; | 2935 | return 0; |
2939 | } | 2936 | } |
2940 | 2937 | ||
@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3314 | if (reserved == 0) | 3311 | if (reserved == 0) |
3315 | return 0; | 3312 | return 0; |
3316 | 3313 | ||
3314 | smp_mb(); | ||
3315 | if (root->fs_info->delalloc_bytes == 0) { | ||
3316 | if (trans) | ||
3317 | return 0; | ||
3318 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3319 | return 0; | ||
3320 | } | ||
3321 | |||
3317 | max_reclaim = min(reserved, to_reclaim); | 3322 | max_reclaim = min(reserved, to_reclaim); |
3318 | 3323 | ||
3319 | while (loops < 1024) { | 3324 | while (loops < 1024) { |
@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3356 | } | 3361 | } |
3357 | 3362 | ||
3358 | } | 3363 | } |
3364 | if (reclaimed >= to_reclaim && !trans) | ||
3365 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3359 | return reclaimed >= to_reclaim; | 3366 | return reclaimed >= to_reclaim; |
3360 | } | 3367 | } |
3361 | 3368 | ||
@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | |||
3380 | u64 num_bytes = orig_bytes; | 3387 | u64 num_bytes = orig_bytes; |
3381 | int retries = 0; | 3388 | int retries = 0; |
3382 | int ret = 0; | 3389 | int ret = 0; |
3383 | bool reserved = false; | ||
3384 | bool committed = false; | 3390 | bool committed = false; |
3391 | bool flushing = false; | ||
3385 | 3392 | ||
3386 | again: | 3393 | again: |
3387 | ret = -ENOSPC; | 3394 | ret = 0; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3390 | |||
3391 | spin_lock(&space_info->lock); | 3395 | spin_lock(&space_info->lock); |
3396 | /* | ||
3397 | * We only want to wait if somebody other than us is flushing and we are | ||
3398 | * actually alloed to flush. | ||
3399 | */ | ||
3400 | while (flush && !flushing && space_info->flush) { | ||
3401 | spin_unlock(&space_info->lock); | ||
3402 | /* | ||
3403 | * If we have a trans handle we can't wait because the flusher | ||
3404 | * may have to commit the transaction, which would mean we would | ||
3405 | * deadlock since we are waiting for the flusher to finish, but | ||
3406 | * hold the current transaction open. | ||
3407 | */ | ||
3408 | if (trans) | ||
3409 | return -EAGAIN; | ||
3410 | ret = wait_event_interruptible(space_info->wait, | ||
3411 | !space_info->flush); | ||
3412 | /* Must have been interrupted, return */ | ||
3413 | if (ret) | ||
3414 | return -EINTR; | ||
3415 | |||
3416 | spin_lock(&space_info->lock); | ||
3417 | } | ||
3418 | |||
3419 | ret = -ENOSPC; | ||
3392 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3420 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3393 | space_info->bytes_pinned + space_info->bytes_readonly + | 3421 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | 3422 | space_info->bytes_may_use; |
@@ -3403,8 +3431,7 @@ again: | |||
3403 | if (unused <= space_info->total_bytes) { | 3431 | if (unused <= space_info->total_bytes) { |
3404 | unused = space_info->total_bytes - unused; | 3432 | unused = space_info->total_bytes - unused; |
3405 | if (unused >= num_bytes) { | 3433 | if (unused >= num_bytes) { |
3406 | if (!reserved) | 3434 | space_info->bytes_reserved += orig_bytes; |
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | 3435 | ret = 0; |
3409 | } else { | 3436 | } else { |
3410 | /* | 3437 | /* |
@@ -3429,17 +3456,14 @@ again: | |||
3429 | * to reclaim space we can actually use it instead of somebody else | 3456 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | 3457 | * stealing it from us. |
3431 | */ | 3458 | */ |
3432 | if (ret && !reserved) { | 3459 | if (ret && flush) { |
3433 | space_info->bytes_reserved += orig_bytes; | 3460 | flushing = true; |
3434 | reserved = true; | 3461 | space_info->flush = 1; |
3435 | } | 3462 | } |
3436 | 3463 | ||
3437 | spin_unlock(&space_info->lock); | 3464 | spin_unlock(&space_info->lock); |
3438 | 3465 | ||
3439 | if (!ret) | 3466 | if (!ret || !flush) |
3440 | return 0; | ||
3441 | |||
3442 | if (!flush) | ||
3443 | goto out; | 3467 | goto out; |
3444 | 3468 | ||
3445 | /* | 3469 | /* |
@@ -3447,11 +3471,11 @@ again: | |||
3447 | * metadata until after the IO is completed. | 3471 | * metadata until after the IO is completed. |
3448 | */ | 3472 | */ |
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3473 | ret = shrink_delalloc(trans, root, num_bytes, 1); |
3450 | if (ret > 0) | 3474 | if (ret < 0) |
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | 3475 | goto out; |
3454 | 3476 | ||
3477 | ret = 0; | ||
3478 | |||
3455 | /* | 3479 | /* |
3456 | * So if we were overcommitted it's possible that somebody else flushed | 3480 | * So if we were overcommitted it's possible that somebody else flushed |
3457 | * out enough space and we simply didn't have enough space to reclaim, | 3481 | * out enough space and we simply didn't have enough space to reclaim, |
@@ -3462,11 +3486,11 @@ again: | |||
3462 | goto again; | 3486 | goto again; |
3463 | } | 3487 | } |
3464 | 3488 | ||
3465 | spin_lock(&space_info->lock); | ||
3466 | /* | 3489 | /* |
3467 | * Not enough space to be reclaimed, don't bother committing the | 3490 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | 3491 | * transaction. |
3469 | */ | 3492 | */ |
3493 | spin_lock(&space_info->lock); | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | 3494 | if (space_info->bytes_pinned < orig_bytes) |
3471 | ret = -ENOSPC; | 3495 | ret = -ENOSPC; |
3472 | spin_unlock(&space_info->lock); | 3496 | spin_unlock(&space_info->lock); |
@@ -3474,10 +3498,13 @@ again: | |||
3474 | goto out; | 3498 | goto out; |
3475 | 3499 | ||
3476 | ret = -EAGAIN; | 3500 | ret = -EAGAIN; |
3477 | if (trans || committed) | 3501 | if (trans) |
3478 | goto out; | 3502 | goto out; |
3479 | 3503 | ||
3480 | ret = -ENOSPC; | 3504 | ret = -ENOSPC; |
3505 | if (committed) | ||
3506 | goto out; | ||
3507 | |||
3481 | trans = btrfs_join_transaction(root); | 3508 | trans = btrfs_join_transaction(root); |
3482 | if (IS_ERR(trans)) | 3509 | if (IS_ERR(trans)) |
3483 | goto out; | 3510 | goto out; |
@@ -3489,12 +3516,12 @@ again: | |||
3489 | } | 3516 | } |
3490 | 3517 | ||
3491 | out: | 3518 | out: |
3492 | if (reserved) { | 3519 | if (flushing) { |
3493 | spin_lock(&space_info->lock); | 3520 | spin_lock(&space_info->lock); |
3494 | space_info->bytes_reserved -= orig_bytes; | 3521 | space_info->flush = 0; |
3522 | wake_up_all(&space_info->wait); | ||
3495 | spin_unlock(&space_info->lock); | 3523 | spin_unlock(&space_info->lock); |
3496 | } | 3524 | } |
3497 | |||
3498 | return ret; | 3525 | return ret; |
3499 | } | 3526 | } |
3500 | 3527 | ||
@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3704 | if (commit_trans) { | 3731 | if (commit_trans) { |
3705 | if (trans) | 3732 | if (trans) |
3706 | return -EAGAIN; | 3733 | return -EAGAIN; |
3707 | |||
3708 | trans = btrfs_join_transaction(root); | 3734 | trans = btrfs_join_transaction(root); |
3709 | BUG_ON(IS_ERR(trans)); | 3735 | BUG_ON(IS_ERR(trans)); |
3710 | ret = btrfs_commit_transaction(trans, root); | 3736 | ret = btrfs_commit_transaction(trans, root); |
@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3874 | return 0; | 3900 | return 0; |
3875 | } | 3901 | } |
3876 | 3902 | ||
3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3878 | struct btrfs_root *root, | ||
3879 | int num_items) | ||
3880 | { | ||
3881 | u64 num_bytes; | ||
3882 | int ret; | ||
3883 | |||
3884 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3885 | return 0; | ||
3886 | |||
3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3889 | num_bytes); | ||
3890 | if (!ret) { | ||
3891 | trans->bytes_reserved += num_bytes; | ||
3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3893 | } | ||
3894 | return ret; | ||
3895 | } | ||
3896 | |||
3897 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3903 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3898 | struct btrfs_root *root) | 3904 | struct btrfs_root *root) |
3899 | { | 3905 | { |
@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3950 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3945 | } | 3951 | } |
3946 | 3952 | ||
3953 | static unsigned drop_outstanding_extent(struct inode *inode) | ||
3954 | { | ||
3955 | unsigned dropped_extents = 0; | ||
3956 | |||
3957 | spin_lock(&BTRFS_I(inode)->lock); | ||
3958 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | ||
3959 | BTRFS_I(inode)->outstanding_extents--; | ||
3960 | |||
3961 | /* | ||
3962 | * If we have more or the same amount of outsanding extents than we have | ||
3963 | * reserved then we need to leave the reserved extents count alone. | ||
3964 | */ | ||
3965 | if (BTRFS_I(inode)->outstanding_extents >= | ||
3966 | BTRFS_I(inode)->reserved_extents) | ||
3967 | goto out; | ||
3968 | |||
3969 | dropped_extents = BTRFS_I(inode)->reserved_extents - | ||
3970 | BTRFS_I(inode)->outstanding_extents; | ||
3971 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | ||
3972 | out: | ||
3973 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3974 | return dropped_extents; | ||
3975 | } | ||
3976 | |||
3947 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 3977 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3948 | { | 3978 | { |
3949 | return num_bytes >>= 3; | 3979 | return num_bytes >>= 3; |
@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3953 | { | 3983 | { |
3954 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3984 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3985 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3956 | u64 to_reserve; | 3986 | u64 to_reserve = 0; |
3957 | int nr_extents; | 3987 | unsigned nr_extents = 0; |
3958 | int reserved_extents; | ||
3959 | int ret; | 3988 | int ret; |
3960 | 3989 | ||
3961 | if (btrfs_transaction_in_commit(root->fs_info)) | 3990 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3963 | 3992 | ||
3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3993 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3965 | 3994 | ||
3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 3995 | spin_lock(&BTRFS_I(inode)->lock); |
3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | 3996 | BTRFS_I(inode)->outstanding_extents++; |
3997 | |||
3998 | if (BTRFS_I(inode)->outstanding_extents > | ||
3999 | BTRFS_I(inode)->reserved_extents) { | ||
4000 | nr_extents = BTRFS_I(inode)->outstanding_extents - | ||
4001 | BTRFS_I(inode)->reserved_extents; | ||
4002 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3968 | 4003 | ||
3969 | if (nr_extents > reserved_extents) { | ||
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4004 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
3972 | } else { | ||
3973 | nr_extents = 0; | ||
3974 | to_reserve = 0; | ||
3975 | } | 4005 | } |
4006 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3976 | 4007 | ||
3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4008 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4009 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3979 | if (ret) | 4010 | if (ret) { |
4011 | unsigned dropped; | ||
4012 | /* | ||
4013 | * We don't need the return value since our reservation failed, | ||
4014 | * we just need to clean up our counter. | ||
4015 | */ | ||
4016 | dropped = drop_outstanding_extent(inode); | ||
4017 | WARN_ON(dropped > 1); | ||
3980 | return ret; | 4018 | return ret; |
3981 | 4019 | } | |
3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); | ||
3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3984 | 4020 | ||
3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4021 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3986 | 4022 | ||
3987 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3988 | shrink_delalloc(NULL, root, to_reserve, 0); | ||
3989 | |||
3990 | return 0; | 4023 | return 0; |
3991 | } | 4024 | } |
3992 | 4025 | ||
3993 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4026 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3994 | { | 4027 | { |
3995 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4028 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3996 | u64 to_free; | 4029 | u64 to_free = 0; |
3997 | int nr_extents; | 4030 | unsigned dropped; |
3998 | int reserved_extents; | ||
3999 | 4031 | ||
4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4032 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4033 | dropped = drop_outstanding_extent(inode); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4003 | |||
4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | ||
4005 | do { | ||
4006 | int old, new; | ||
4007 | |||
4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4009 | if (nr_extents >= reserved_extents) { | ||
4010 | nr_extents = 0; | ||
4011 | break; | ||
4012 | } | ||
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
4022 | 4034 | ||
4023 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4035 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4024 | if (nr_extents > 0) | 4036 | if (dropped > 0) |
4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); | 4037 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4026 | 4038 | ||
4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4039 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4028 | to_free); | 4040 | to_free); |
@@ -4990,14 +5002,10 @@ have_block_group: | |||
4990 | } | 5002 | } |
4991 | 5003 | ||
4992 | /* | 5004 | /* |
4993 | * We only want to start kthread caching if we are at | 5005 | * The caching workers are limited to 2 threads, so we |
4994 | * the point where we will wait for caching to make | 5006 | * can queue as much work as we care to. |
4995 | * progress, or if our ideal search is over and we've | ||
4996 | * found somebody to start caching. | ||
4997 | */ | 5007 | */ |
4998 | if (loop > LOOP_CACHING_NOWAIT || | 5008 | if (loop > LOOP_FIND_IDEAL) { |
4999 | (loop > LOOP_FIND_IDEAL && | ||
5000 | atomic_read(&space_info->caching_threads) < 2)) { | ||
5001 | ret = cache_block_group(block_group, trans, | 5009 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | 5010 | orig_root, 0); |
5003 | BUG_ON(ret); | 5011 | BUG_ON(ret); |
@@ -5219,8 +5227,7 @@ loop: | |||
5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5227 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
5220 | found_uncached_bg = false; | 5228 | found_uncached_bg = false; |
5221 | loop++; | 5229 | loop++; |
5222 | if (!ideal_cache_percent && | 5230 | if (!ideal_cache_percent) |
5223 | atomic_read(&space_info->caching_threads)) | ||
5224 | goto search; | 5231 | goto search; |
5225 | 5232 | ||
5226 | /* | 5233 | /* |
@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
5623 | if (!buf) | 5630 | if (!buf) |
5624 | return ERR_PTR(-ENOMEM); | 5631 | return ERR_PTR(-ENOMEM); |
5625 | btrfs_set_header_generation(buf, trans->transid); | 5632 | btrfs_set_header_generation(buf, trans->transid); |
5626 | btrfs_set_buffer_lockdep_class(buf, level); | 5633 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
5627 | btrfs_tree_lock(buf); | 5634 | btrfs_tree_lock(buf); |
5628 | clean_tree_block(trans, root, buf); | 5635 | clean_tree_block(trans, root, buf); |
5629 | 5636 | ||
@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5910 | return 1; | 5917 | return 1; |
5911 | 5918 | ||
5912 | if (path->locks[level] && !wc->keep_locks) { | 5919 | if (path->locks[level] && !wc->keep_locks) { |
5913 | btrfs_tree_unlock(eb); | 5920 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5914 | path->locks[level] = 0; | 5921 | path->locks[level] = 0; |
5915 | } | 5922 | } |
5916 | return 0; | 5923 | return 0; |
@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5934 | * keep the tree lock | 5941 | * keep the tree lock |
5935 | */ | 5942 | */ |
5936 | if (path->locks[level] && level > 0) { | 5943 | if (path->locks[level] && level > 0) { |
5937 | btrfs_tree_unlock(eb); | 5944 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5938 | path->locks[level] = 0; | 5945 | path->locks[level] = 0; |
5939 | } | 5946 | } |
5940 | return 0; | 5947 | return 0; |
@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | BUG_ON(level != btrfs_header_level(next)); | 6054 | BUG_ON(level != btrfs_header_level(next)); |
6048 | path->nodes[level] = next; | 6055 | path->nodes[level] = next; |
6049 | path->slots[level] = 0; | 6056 | path->slots[level] = 0; |
6050 | path->locks[level] = 1; | 6057 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6051 | wc->level = level; | 6058 | wc->level = level; |
6052 | if (wc->level == 1) | 6059 | if (wc->level == 1) |
6053 | wc->reada_slot = 0; | 6060 | wc->reada_slot = 0; |
@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6118 | BUG_ON(level == 0); | 6125 | BUG_ON(level == 0); |
6119 | btrfs_tree_lock(eb); | 6126 | btrfs_tree_lock(eb); |
6120 | btrfs_set_lock_blocking(eb); | 6127 | btrfs_set_lock_blocking(eb); |
6121 | path->locks[level] = 1; | 6128 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6122 | 6129 | ||
6123 | ret = btrfs_lookup_extent_info(trans, root, | 6130 | ret = btrfs_lookup_extent_info(trans, root, |
6124 | eb->start, eb->len, | 6131 | eb->start, eb->len, |
@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6127 | BUG_ON(ret); | 6134 | BUG_ON(ret); |
6128 | BUG_ON(wc->refs[level] == 0); | 6135 | BUG_ON(wc->refs[level] == 0); |
6129 | if (wc->refs[level] == 1) { | 6136 | if (wc->refs[level] == 1) { |
6130 | btrfs_tree_unlock(eb); | 6137 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
6131 | path->locks[level] = 0; | ||
6132 | return 1; | 6138 | return 1; |
6133 | } | 6139 | } |
6134 | } | 6140 | } |
@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6150 | btrfs_header_generation(eb) == trans->transid) { | 6156 | btrfs_header_generation(eb) == trans->transid) { |
6151 | btrfs_tree_lock(eb); | 6157 | btrfs_tree_lock(eb); |
6152 | btrfs_set_lock_blocking(eb); | 6158 | btrfs_set_lock_blocking(eb); |
6153 | path->locks[level] = 1; | 6159 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6154 | } | 6160 | } |
6155 | clean_tree_block(trans, root, eb); | 6161 | clean_tree_block(trans, root, eb); |
6156 | } | 6162 | } |
@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6229 | return 0; | 6235 | return 0; |
6230 | 6236 | ||
6231 | if (path->locks[level]) { | 6237 | if (path->locks[level]) { |
6232 | btrfs_tree_unlock(path->nodes[level]); | 6238 | btrfs_tree_unlock_rw(path->nodes[level], |
6239 | path->locks[level]); | ||
6233 | path->locks[level] = 0; | 6240 | path->locks[level] = 0; |
6234 | } | 6241 | } |
6235 | free_extent_buffer(path->nodes[level]); | 6242 | free_extent_buffer(path->nodes[level]); |
@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6281 | path->nodes[level] = btrfs_lock_root_node(root); | 6288 | path->nodes[level] = btrfs_lock_root_node(root); |
6282 | btrfs_set_lock_blocking(path->nodes[level]); | 6289 | btrfs_set_lock_blocking(path->nodes[level]); |
6283 | path->slots[level] = 0; | 6290 | path->slots[level] = 0; |
6284 | path->locks[level] = 1; | 6291 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6285 | memset(&wc->update_progress, 0, | 6292 | memset(&wc->update_progress, 0, |
6286 | sizeof(wc->update_progress)); | 6293 | sizeof(wc->update_progress)); |
6287 | } else { | 6294 | } else { |
@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6449 | level = btrfs_header_level(node); | 6456 | level = btrfs_header_level(node); |
6450 | path->nodes[level] = node; | 6457 | path->nodes[level] = node; |
6451 | path->slots[level] = 0; | 6458 | path->slots[level] = 0; |
6452 | path->locks[level] = 1; | 6459 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6453 | 6460 | ||
6454 | wc->refs[parent_level] = 1; | 6461 | wc->refs[parent_level] = 1; |
6455 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 6462 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
6524 | return flags; | 6531 | return flags; |
6525 | } | 6532 | } |
6526 | 6533 | ||
6527 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) | 6534 | static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) |
6528 | { | 6535 | { |
6529 | struct btrfs_space_info *sinfo = cache->space_info; | 6536 | struct btrfs_space_info *sinfo = cache->space_info; |
6530 | u64 num_bytes; | 6537 | u64 num_bytes; |
6538 | u64 min_allocable_bytes; | ||
6531 | int ret = -ENOSPC; | 6539 | int ret = -ENOSPC; |
6532 | 6540 | ||
6533 | if (cache->ro) | 6541 | if (cache->ro) |
6534 | return 0; | 6542 | return 0; |
6535 | 6543 | ||
6544 | /* | ||
6545 | * We need some metadata space and system metadata space for | ||
6546 | * allocating chunks in some corner cases until we force to set | ||
6547 | * it to be readonly. | ||
6548 | */ | ||
6549 | if ((sinfo->flags & | ||
6550 | (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && | ||
6551 | !force) | ||
6552 | min_allocable_bytes = 1 * 1024 * 1024; | ||
6553 | else | ||
6554 | min_allocable_bytes = 0; | ||
6555 | |||
6536 | spin_lock(&sinfo->lock); | 6556 | spin_lock(&sinfo->lock); |
6537 | spin_lock(&cache->lock); | 6557 | spin_lock(&cache->lock); |
6538 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | 6558 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - |
@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
6540 | 6560 | ||
6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6561 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6562 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { | 6563 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= |
6564 | sinfo->total_bytes) { | ||
6544 | sinfo->bytes_readonly += num_bytes; | 6565 | sinfo->bytes_readonly += num_bytes; |
6545 | sinfo->bytes_reserved += cache->reserved_pinned; | 6566 | sinfo->bytes_reserved += cache->reserved_pinned; |
6546 | cache->reserved_pinned = 0; | 6567 | cache->reserved_pinned = 0; |
@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 6592 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | 6593 | CHUNK_ALLOC_FORCE); |
6573 | 6594 | ||
6574 | ret = set_block_group_ro(cache); | 6595 | ret = set_block_group_ro(cache, 0); |
6575 | if (!ret) | 6596 | if (!ret) |
6576 | goto out; | 6597 | goto out; |
6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6598 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6579 | CHUNK_ALLOC_FORCE); | 6600 | CHUNK_ALLOC_FORCE); |
6580 | if (ret < 0) | 6601 | if (ret < 0) |
6581 | goto out; | 6602 | goto out; |
6582 | ret = set_block_group_ro(cache); | 6603 | ret = set_block_group_ro(cache, 0); |
6583 | out: | 6604 | out: |
6584 | btrfs_end_transaction(trans, root); | 6605 | btrfs_end_transaction(trans, root); |
6585 | return ret; | 6606 | return ret; |
@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7016 | 7037 | ||
7017 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7038 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7018 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7039 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7019 | set_block_group_ro(cache); | 7040 | set_block_group_ro(cache, 1); |
7020 | } | 7041 | } |
7021 | 7042 | ||
7022 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 7043 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7030 | * mirrored block groups. | 7051 | * mirrored block groups. |
7031 | */ | 7052 | */ |
7032 | list_for_each_entry(cache, &space_info->block_groups[3], list) | 7053 | list_for_each_entry(cache, &space_info->block_groups[3], list) |
7033 | set_block_group_ro(cache); | 7054 | set_block_group_ro(cache, 1); |
7034 | list_for_each_entry(cache, &space_info->block_groups[4], list) | 7055 | list_for_each_entry(cache, &space_info->block_groups[4], list) |
7035 | set_block_group_ro(cache); | 7056 | set_block_group_ro(cache, 1); |
7036 | } | 7057 | } |
7037 | 7058 | ||
7038 | init_global_block_rsv(info); | 7059 | init_global_block_rsv(info); |