aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c241
1 files changed, 216 insertions, 25 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 40c95135d037..02c2b29a0840 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3298,7 +3298,7 @@ again:
3298 if (ret) 3298 if (ret)
3299 goto out_put; 3299 goto out_put;
3300 3300
3301 ret = btrfs_truncate_free_space_cache(root, trans, inode); 3301 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3302 if (ret) 3302 if (ret)
3303 goto out_put; 3303 goto out_put;
3304 } 3304 }
@@ -3382,20 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3382 return 0; 3382 return 0;
3383} 3383}
3384 3384
3385int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3385/*
3386 * transaction commit does final block group cache writeback during a
3387 * critical section where nothing is allowed to change the FS. This is
3388 * required in order for the cache to actually match the block group,
3389 * but can introduce a lot of latency into the commit.
3390 *
3391 * So, btrfs_start_dirty_block_groups is here to kick off block group
3392 * cache IO. There's a chance we'll have to redo some of it if the
3393 * block group changes again during the commit, but it greatly reduces
3394 * the commit latency by getting rid of the easy block groups while
3395 * we're still allowing others to join the commit.
3396 */
3397int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3386 struct btrfs_root *root) 3398 struct btrfs_root *root)
3387{ 3399{
3388 struct btrfs_block_group_cache *cache; 3400 struct btrfs_block_group_cache *cache;
3389 struct btrfs_transaction *cur_trans = trans->transaction; 3401 struct btrfs_transaction *cur_trans = trans->transaction;
3390 int ret = 0; 3402 int ret = 0;
3391 int should_put; 3403 int should_put;
3392 struct btrfs_path *path; 3404 struct btrfs_path *path = NULL;
3393 LIST_HEAD(io); 3405 LIST_HEAD(dirty);
3406 struct list_head *io = &cur_trans->io_bgs;
3394 int num_started = 0; 3407 int num_started = 0;
3395 int num_waited = 0; 3408 int loops = 0;
3409
3410 spin_lock(&cur_trans->dirty_bgs_lock);
3411 if (!list_empty(&cur_trans->dirty_bgs)) {
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3413 }
3414 spin_unlock(&cur_trans->dirty_bgs_lock);
3396 3415
3397 if (list_empty(&cur_trans->dirty_bgs)) 3416again:
3417 if (list_empty(&dirty)) {
3418 btrfs_free_path(path);
3398 return 0; 3419 return 0;
3420 }
3421
3422 /*
3423 * make sure all the block groups on our dirty list actually
3424 * exist
3425 */
3426 btrfs_create_pending_block_groups(trans, root);
3427
3428 if (!path) {
3429 path = btrfs_alloc_path();
3430 if (!path)
3431 return -ENOMEM;
3432 }
3433
3434 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache,
3437 dirty_list);
3438
3439 /*
3440 * cache_write_mutex is here only to save us from balance
3441 * deleting this block group while we are writing out the
3442 * cache
3443 */
3444 mutex_lock(&trans->transaction->cache_write_mutex);
3445
3446 /*
3447 * this can happen if something re-dirties a block
3448 * group that is already under IO. Just wait for it to
3449 * finish and then do it all again
3450 */
3451 if (!list_empty(&cache->io_list)) {
3452 list_del_init(&cache->io_list);
3453 btrfs_wait_cache_io(root, trans, cache,
3454 &cache->io_ctl, path,
3455 cache->key.objectid);
3456 btrfs_put_block_group(cache);
3457 }
3458
3459
3460 /*
3461 * btrfs_wait_cache_io uses the cache->dirty_list to decide
3462 * if it should update the cache_state. Don't delete
3463 * until after we wait.
3464 *
3465 * Since we're not running in the commit critical section
3466 * we need the dirty_bgs_lock to protect from update_block_group
3467 */
3468 spin_lock(&cur_trans->dirty_bgs_lock);
3469 list_del_init(&cache->dirty_list);
3470 spin_unlock(&cur_trans->dirty_bgs_lock);
3471
3472 should_put = 1;
3473
3474 cache_save_setup(cache, trans, path);
3475
3476 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3477 cache->io_ctl.inode = NULL;
3478 ret = btrfs_write_out_cache(root, trans, cache, path);
3479 if (ret == 0 && cache->io_ctl.inode) {
3480 num_started++;
3481 should_put = 0;
3482
3483 /*
3484 * the cache_write_mutex is protecting
3485 * the io_list
3486 */
3487 list_add_tail(&cache->io_list, io);
3488 } else {
3489 /*
3490 * if we failed to write the cache, the
3491 * generation will be bad and life goes on
3492 */
3493 ret = 0;
3494 }
3495 }
3496 if (!ret)
3497 ret = write_one_cache_group(trans, root, path, cache);
3498 mutex_unlock(&trans->transaction->cache_write_mutex);
3499
3500 /* if its not on the io list, we need to put the block group */
3501 if (should_put)
3502 btrfs_put_block_group(cache);
3503
3504 if (ret)
3505 break;
3506 }
3507
3508 /*
3509 * go through delayed refs for all the stuff we've just kicked off
3510 * and then loop back (just once)
3511 */
3512 ret = btrfs_run_delayed_refs(trans, root, 0);
3513 if (!ret && loops == 0) {
3514 loops++;
3515 spin_lock(&cur_trans->dirty_bgs_lock);
3516 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3517 spin_unlock(&cur_trans->dirty_bgs_lock);
3518 goto again;
3519 }
3520
3521 btrfs_free_path(path);
3522 return ret;
3523}
3524
3525int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3526 struct btrfs_root *root)
3527{
3528 struct btrfs_block_group_cache *cache;
3529 struct btrfs_transaction *cur_trans = trans->transaction;
3530 int ret = 0;
3531 int should_put;
3532 struct btrfs_path *path;
3533 struct list_head *io = &cur_trans->io_bgs;
3534 int num_started = 0;
3399 3535
3400 path = btrfs_alloc_path(); 3536 path = btrfs_alloc_path();
3401 if (!path) 3537 if (!path)
@@ -3423,14 +3559,16 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3423 &cache->io_ctl, path, 3559 &cache->io_ctl, path,
3424 cache->key.objectid); 3560 cache->key.objectid);
3425 btrfs_put_block_group(cache); 3561 btrfs_put_block_group(cache);
3426 num_waited++;
3427 } 3562 }
3428 3563
3564 /*
3565 * don't remove from the dirty list until after we've waited
3566 * on any pending IO
3567 */
3429 list_del_init(&cache->dirty_list); 3568 list_del_init(&cache->dirty_list);
3430 should_put = 1; 3569 should_put = 1;
3431 3570
3432 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 3571 cache_save_setup(cache, trans, path);
3433 cache_save_setup(cache, trans, path);
3434 3572
3435 if (!ret) 3573 if (!ret)
3436 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1); 3574 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
@@ -3441,7 +3579,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3441 if (ret == 0 && cache->io_ctl.inode) { 3579 if (ret == 0 && cache->io_ctl.inode) {
3442 num_started++; 3580 num_started++;
3443 should_put = 0; 3581 should_put = 0;
3444 list_add_tail(&cache->io_list, &io); 3582 list_add_tail(&cache->io_list, io);
3445 } else { 3583 } else {
3446 /* 3584 /*
3447 * if we failed to write the cache, the 3585 * if we failed to write the cache, the
@@ -3458,11 +3596,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3458 btrfs_put_block_group(cache); 3596 btrfs_put_block_group(cache);
3459 } 3597 }
3460 3598
3461 while (!list_empty(&io)) { 3599 while (!list_empty(io)) {
3462 cache = list_first_entry(&io, struct btrfs_block_group_cache, 3600 cache = list_first_entry(io, struct btrfs_block_group_cache,
3463 io_list); 3601 io_list);
3464 list_del_init(&cache->io_list); 3602 list_del_init(&cache->io_list);
3465 num_waited++;
3466 btrfs_wait_cache_io(root, trans, cache, 3603 btrfs_wait_cache_io(root, trans, cache,
3467 &cache->io_ctl, path, cache->key.objectid); 3604 &cache->io_ctl, path, cache->key.objectid);
3468 btrfs_put_block_group(cache); 3605 btrfs_put_block_group(cache);
@@ -5459,15 +5596,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5459 if (!alloc && cache->cached == BTRFS_CACHE_NO) 5596 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5460 cache_block_group(cache, 1); 5597 cache_block_group(cache, 1);
5461 5598
5462 spin_lock(&trans->transaction->dirty_bgs_lock);
5463 if (list_empty(&cache->dirty_list)) {
5464 list_add_tail(&cache->dirty_list,
5465 &trans->transaction->dirty_bgs);
5466 trans->transaction->num_dirty_bgs++;
5467 btrfs_get_block_group(cache);
5468 }
5469 spin_unlock(&trans->transaction->dirty_bgs_lock);
5470
5471 byte_in_group = bytenr - cache->key.objectid; 5599 byte_in_group = bytenr - cache->key.objectid;
5472 WARN_ON(byte_in_group > cache->key.offset); 5600 WARN_ON(byte_in_group > cache->key.offset);
5473 5601
@@ -5516,6 +5644,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5516 spin_unlock(&info->unused_bgs_lock); 5644 spin_unlock(&info->unused_bgs_lock);
5517 } 5645 }
5518 } 5646 }
5647
5648 spin_lock(&trans->transaction->dirty_bgs_lock);
5649 if (list_empty(&cache->dirty_list)) {
5650 list_add_tail(&cache->dirty_list,
5651 &trans->transaction->dirty_bgs);
5652 trans->transaction->num_dirty_bgs++;
5653 btrfs_get_block_group(cache);
5654 }
5655 spin_unlock(&trans->transaction->dirty_bgs_lock);
5656
5519 btrfs_put_block_group(cache); 5657 btrfs_put_block_group(cache);
5520 total -= num_bytes; 5658 total -= num_bytes;
5521 bytenr += num_bytes; 5659 bytenr += num_bytes;
@@ -8602,10 +8740,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8602 8740
8603 BUG_ON(cache->ro); 8741 BUG_ON(cache->ro);
8604 8742
8743again:
8605 trans = btrfs_join_transaction(root); 8744 trans = btrfs_join_transaction(root);
8606 if (IS_ERR(trans)) 8745 if (IS_ERR(trans))
8607 return PTR_ERR(trans); 8746 return PTR_ERR(trans);
8608 8747
8748 /*
8749 * we're not allowed to set block groups readonly after the dirty
8750 * block groups cache has started writing. If it already started,
8751 * back off and let this transaction commit
8752 */
8753 mutex_lock(&root->fs_info->ro_block_group_mutex);
8754 if (trans->transaction->dirty_bg_run) {
8755 u64 transid = trans->transid;
8756
8757 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8758 btrfs_end_transaction(trans, root);
8759
8760 ret = btrfs_wait_for_commit(root, transid);
8761 if (ret)
8762 return ret;
8763 goto again;
8764 }
8765
8766
8609 ret = set_block_group_ro(cache, 0); 8767 ret = set_block_group_ro(cache, 0);
8610 if (!ret) 8768 if (!ret)
8611 goto out; 8769 goto out;
@@ -8620,6 +8778,7 @@ out:
8620 alloc_flags = update_block_group_flags(root, cache->flags); 8778 alloc_flags = update_block_group_flags(root, cache->flags);
8621 check_system_chunk(trans, root, alloc_flags); 8779 check_system_chunk(trans, root, alloc_flags);
8622 } 8780 }
8781 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8623 8782
8624 btrfs_end_transaction(trans, root); 8783 btrfs_end_transaction(trans, root);
8625 return ret; 8784 return ret;
@@ -9425,7 +9584,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9425 goto out; 9584 goto out;
9426 } 9585 }
9427 9586
9587 /*
9588 * get the inode first so any iput calls done for the io_list
9589 * aren't the final iput (no unlinks allowed now)
9590 */
9428 inode = lookup_free_space_inode(tree_root, block_group, path); 9591 inode = lookup_free_space_inode(tree_root, block_group, path);
9592
9593 mutex_lock(&trans->transaction->cache_write_mutex);
9594 /*
9595 * make sure our free spache cache IO is done before remove the
9596 * free space inode
9597 */
9598 spin_lock(&trans->transaction->dirty_bgs_lock);
9599 if (!list_empty(&block_group->io_list)) {
9600 list_del_init(&block_group->io_list);
9601
9602 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
9603
9604 spin_unlock(&trans->transaction->dirty_bgs_lock);
9605 btrfs_wait_cache_io(root, trans, block_group,
9606 &block_group->io_ctl, path,
9607 block_group->key.objectid);
9608 btrfs_put_block_group(block_group);
9609 spin_lock(&trans->transaction->dirty_bgs_lock);
9610 }
9611
9612 if (!list_empty(&block_group->dirty_list)) {
9613 list_del_init(&block_group->dirty_list);
9614 btrfs_put_block_group(block_group);
9615 }
9616 spin_unlock(&trans->transaction->dirty_bgs_lock);
9617 mutex_unlock(&trans->transaction->cache_write_mutex);
9618
9429 if (!IS_ERR(inode)) { 9619 if (!IS_ERR(inode)) {
9430 ret = btrfs_orphan_add(trans, inode); 9620 ret = btrfs_orphan_add(trans, inode);
9431 if (ret) { 9621 if (ret) {
@@ -9518,11 +9708,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9518 9708
9519 spin_lock(&trans->transaction->dirty_bgs_lock); 9709 spin_lock(&trans->transaction->dirty_bgs_lock);
9520 if (!list_empty(&block_group->dirty_list)) { 9710 if (!list_empty(&block_group->dirty_list)) {
9521 list_del_init(&block_group->dirty_list); 9711 WARN_ON(1);
9522 btrfs_put_block_group(block_group); 9712 }
9713 if (!list_empty(&block_group->io_list)) {
9714 WARN_ON(1);
9523 } 9715 }
9524 spin_unlock(&trans->transaction->dirty_bgs_lock); 9716 spin_unlock(&trans->transaction->dirty_bgs_lock);
9525
9526 btrfs_remove_free_space_cache(block_group); 9717 btrfs_remove_free_space_cache(block_group);
9527 9718
9528 spin_lock(&block_group->space_info->lock); 9719 spin_lock(&block_group->space_info->lock);