diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 241 |
1 files changed, 216 insertions, 25 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 40c95135d037..02c2b29a0840 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3298,7 +3298,7 @@ again: | |||
3298 | if (ret) | 3298 | if (ret) |
3299 | goto out_put; | 3299 | goto out_put; |
3300 | 3300 | ||
3301 | ret = btrfs_truncate_free_space_cache(root, trans, inode); | 3301 | ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode); |
3302 | if (ret) | 3302 | if (ret) |
3303 | goto out_put; | 3303 | goto out_put; |
3304 | } | 3304 | } |
@@ -3382,20 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, | |||
3382 | return 0; | 3382 | return 0; |
3383 | } | 3383 | } |
3384 | 3384 | ||
3385 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 3385 | /* |
3386 | * transaction commit does final block group cache writeback during a | ||
3387 | * critical section where nothing is allowed to change the FS. This is | ||
3388 | * required in order for the cache to actually match the block group, | ||
3389 | * but can introduce a lot of latency into the commit. | ||
3390 | * | ||
3391 | * So, btrfs_start_dirty_block_groups is here to kick off block group | ||
3392 | * cache IO. There's a chance we'll have to redo some of it if the | ||
3393 | * block group changes again during the commit, but it greatly reduces | ||
3394 | * the commit latency by getting rid of the easy block groups while | ||
3395 | * we're still allowing others to join the commit. | ||
3396 | */ | ||
3397 | int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
3386 | struct btrfs_root *root) | 3398 | struct btrfs_root *root) |
3387 | { | 3399 | { |
3388 | struct btrfs_block_group_cache *cache; | 3400 | struct btrfs_block_group_cache *cache; |
3389 | struct btrfs_transaction *cur_trans = trans->transaction; | 3401 | struct btrfs_transaction *cur_trans = trans->transaction; |
3390 | int ret = 0; | 3402 | int ret = 0; |
3391 | int should_put; | 3403 | int should_put; |
3392 | struct btrfs_path *path; | 3404 | struct btrfs_path *path = NULL; |
3393 | LIST_HEAD(io); | 3405 | LIST_HEAD(dirty); |
3406 | struct list_head *io = &cur_trans->io_bgs; | ||
3394 | int num_started = 0; | 3407 | int num_started = 0; |
3395 | int num_waited = 0; | 3408 | int loops = 0; |
3409 | |||
3410 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3411 | if (!list_empty(&cur_trans->dirty_bgs)) { | ||
3412 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | ||
3413 | } | ||
3414 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3396 | 3415 | ||
3397 | if (list_empty(&cur_trans->dirty_bgs)) | 3416 | again: |
3417 | if (list_empty(&dirty)) { | ||
3418 | btrfs_free_path(path); | ||
3398 | return 0; | 3419 | return 0; |
3420 | } | ||
3421 | |||
3422 | /* | ||
3423 | * make sure all the block groups on our dirty list actually | ||
3424 | * exist | ||
3425 | */ | ||
3426 | btrfs_create_pending_block_groups(trans, root); | ||
3427 | |||
3428 | if (!path) { | ||
3429 | path = btrfs_alloc_path(); | ||
3430 | if (!path) | ||
3431 | return -ENOMEM; | ||
3432 | } | ||
3433 | |||
3434 | while (!list_empty(&dirty)) { | ||
3435 | cache = list_first_entry(&dirty, | ||
3436 | struct btrfs_block_group_cache, | ||
3437 | dirty_list); | ||
3438 | |||
3439 | /* | ||
3440 | * cache_write_mutex is here only to save us from balance | ||
3441 | * deleting this block group while we are writing out the | ||
3442 | * cache | ||
3443 | */ | ||
3444 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
3445 | |||
3446 | /* | ||
3447 | * this can happen if something re-dirties a block | ||
3448 | * group that is already under IO. Just wait for it to | ||
3449 | * finish and then do it all again | ||
3450 | */ | ||
3451 | if (!list_empty(&cache->io_list)) { | ||
3452 | list_del_init(&cache->io_list); | ||
3453 | btrfs_wait_cache_io(root, trans, cache, | ||
3454 | &cache->io_ctl, path, | ||
3455 | cache->key.objectid); | ||
3456 | btrfs_put_block_group(cache); | ||
3457 | } | ||
3458 | |||
3459 | |||
3460 | /* | ||
3461 | * btrfs_wait_cache_io uses the cache->dirty_list to decide | ||
3462 | * if it should update the cache_state. Don't delete | ||
3463 | * until after we wait. | ||
3464 | * | ||
3465 | * Since we're not running in the commit critical section | ||
3466 | * we need the dirty_bgs_lock to protect from update_block_group | ||
3467 | */ | ||
3468 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3469 | list_del_init(&cache->dirty_list); | ||
3470 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3471 | |||
3472 | should_put = 1; | ||
3473 | |||
3474 | cache_save_setup(cache, trans, path); | ||
3475 | |||
3476 | if (cache->disk_cache_state == BTRFS_DC_SETUP) { | ||
3477 | cache->io_ctl.inode = NULL; | ||
3478 | ret = btrfs_write_out_cache(root, trans, cache, path); | ||
3479 | if (ret == 0 && cache->io_ctl.inode) { | ||
3480 | num_started++; | ||
3481 | should_put = 0; | ||
3482 | |||
3483 | /* | ||
3484 | * the cache_write_mutex is protecting | ||
3485 | * the io_list | ||
3486 | */ | ||
3487 | list_add_tail(&cache->io_list, io); | ||
3488 | } else { | ||
3489 | /* | ||
3490 | * if we failed to write the cache, the | ||
3491 | * generation will be bad and life goes on | ||
3492 | */ | ||
3493 | ret = 0; | ||
3494 | } | ||
3495 | } | ||
3496 | if (!ret) | ||
3497 | ret = write_one_cache_group(trans, root, path, cache); | ||
3498 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
3499 | |||
3500 | /* if its not on the io list, we need to put the block group */ | ||
3501 | if (should_put) | ||
3502 | btrfs_put_block_group(cache); | ||
3503 | |||
3504 | if (ret) | ||
3505 | break; | ||
3506 | } | ||
3507 | |||
3508 | /* | ||
3509 | * go through delayed refs for all the stuff we've just kicked off | ||
3510 | * and then loop back (just once) | ||
3511 | */ | ||
3512 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
3513 | if (!ret && loops == 0) { | ||
3514 | loops++; | ||
3515 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3516 | list_splice_init(&cur_trans->dirty_bgs, &dirty); | ||
3517 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3518 | goto again; | ||
3519 | } | ||
3520 | |||
3521 | btrfs_free_path(path); | ||
3522 | return ret; | ||
3523 | } | ||
3524 | |||
3525 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | ||
3526 | struct btrfs_root *root) | ||
3527 | { | ||
3528 | struct btrfs_block_group_cache *cache; | ||
3529 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
3530 | int ret = 0; | ||
3531 | int should_put; | ||
3532 | struct btrfs_path *path; | ||
3533 | struct list_head *io = &cur_trans->io_bgs; | ||
3534 | int num_started = 0; | ||
3399 | 3535 | ||
3400 | path = btrfs_alloc_path(); | 3536 | path = btrfs_alloc_path(); |
3401 | if (!path) | 3537 | if (!path) |
@@ -3423,14 +3559,16 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3423 | &cache->io_ctl, path, | 3559 | &cache->io_ctl, path, |
3424 | cache->key.objectid); | 3560 | cache->key.objectid); |
3425 | btrfs_put_block_group(cache); | 3561 | btrfs_put_block_group(cache); |
3426 | num_waited++; | ||
3427 | } | 3562 | } |
3428 | 3563 | ||
3564 | /* | ||
3565 | * don't remove from the dirty list until after we've waited | ||
3566 | * on any pending IO | ||
3567 | */ | ||
3429 | list_del_init(&cache->dirty_list); | 3568 | list_del_init(&cache->dirty_list); |
3430 | should_put = 1; | 3569 | should_put = 1; |
3431 | 3570 | ||
3432 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | 3571 | cache_save_setup(cache, trans, path); |
3433 | cache_save_setup(cache, trans, path); | ||
3434 | 3572 | ||
3435 | if (!ret) | 3573 | if (!ret) |
3436 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1); | 3574 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1); |
@@ -3441,7 +3579,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3441 | if (ret == 0 && cache->io_ctl.inode) { | 3579 | if (ret == 0 && cache->io_ctl.inode) { |
3442 | num_started++; | 3580 | num_started++; |
3443 | should_put = 0; | 3581 | should_put = 0; |
3444 | list_add_tail(&cache->io_list, &io); | 3582 | list_add_tail(&cache->io_list, io); |
3445 | } else { | 3583 | } else { |
3446 | /* | 3584 | /* |
3447 | * if we failed to write the cache, the | 3585 | * if we failed to write the cache, the |
@@ -3458,11 +3596,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3458 | btrfs_put_block_group(cache); | 3596 | btrfs_put_block_group(cache); |
3459 | } | 3597 | } |
3460 | 3598 | ||
3461 | while (!list_empty(&io)) { | 3599 | while (!list_empty(io)) { |
3462 | cache = list_first_entry(&io, struct btrfs_block_group_cache, | 3600 | cache = list_first_entry(io, struct btrfs_block_group_cache, |
3463 | io_list); | 3601 | io_list); |
3464 | list_del_init(&cache->io_list); | 3602 | list_del_init(&cache->io_list); |
3465 | num_waited++; | ||
3466 | btrfs_wait_cache_io(root, trans, cache, | 3603 | btrfs_wait_cache_io(root, trans, cache, |
3467 | &cache->io_ctl, path, cache->key.objectid); | 3604 | &cache->io_ctl, path, cache->key.objectid); |
3468 | btrfs_put_block_group(cache); | 3605 | btrfs_put_block_group(cache); |
@@ -5459,15 +5596,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
5459 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 5596 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
5460 | cache_block_group(cache, 1); | 5597 | cache_block_group(cache, 1); |
5461 | 5598 | ||
5462 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
5463 | if (list_empty(&cache->dirty_list)) { | ||
5464 | list_add_tail(&cache->dirty_list, | ||
5465 | &trans->transaction->dirty_bgs); | ||
5466 | trans->transaction->num_dirty_bgs++; | ||
5467 | btrfs_get_block_group(cache); | ||
5468 | } | ||
5469 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
5470 | |||
5471 | byte_in_group = bytenr - cache->key.objectid; | 5599 | byte_in_group = bytenr - cache->key.objectid; |
5472 | WARN_ON(byte_in_group > cache->key.offset); | 5600 | WARN_ON(byte_in_group > cache->key.offset); |
5473 | 5601 | ||
@@ -5516,6 +5644,16 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
5516 | spin_unlock(&info->unused_bgs_lock); | 5644 | spin_unlock(&info->unused_bgs_lock); |
5517 | } | 5645 | } |
5518 | } | 5646 | } |
5647 | |||
5648 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
5649 | if (list_empty(&cache->dirty_list)) { | ||
5650 | list_add_tail(&cache->dirty_list, | ||
5651 | &trans->transaction->dirty_bgs); | ||
5652 | trans->transaction->num_dirty_bgs++; | ||
5653 | btrfs_get_block_group(cache); | ||
5654 | } | ||
5655 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
5656 | |||
5519 | btrfs_put_block_group(cache); | 5657 | btrfs_put_block_group(cache); |
5520 | total -= num_bytes; | 5658 | total -= num_bytes; |
5521 | bytenr += num_bytes; | 5659 | bytenr += num_bytes; |
@@ -8602,10 +8740,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
8602 | 8740 | ||
8603 | BUG_ON(cache->ro); | 8741 | BUG_ON(cache->ro); |
8604 | 8742 | ||
8743 | again: | ||
8605 | trans = btrfs_join_transaction(root); | 8744 | trans = btrfs_join_transaction(root); |
8606 | if (IS_ERR(trans)) | 8745 | if (IS_ERR(trans)) |
8607 | return PTR_ERR(trans); | 8746 | return PTR_ERR(trans); |
8608 | 8747 | ||
8748 | /* | ||
8749 | * we're not allowed to set block groups readonly after the dirty | ||
8750 | * block groups cache has started writing. If it already started, | ||
8751 | * back off and let this transaction commit | ||
8752 | */ | ||
8753 | mutex_lock(&root->fs_info->ro_block_group_mutex); | ||
8754 | if (trans->transaction->dirty_bg_run) { | ||
8755 | u64 transid = trans->transid; | ||
8756 | |||
8757 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | ||
8758 | btrfs_end_transaction(trans, root); | ||
8759 | |||
8760 | ret = btrfs_wait_for_commit(root, transid); | ||
8761 | if (ret) | ||
8762 | return ret; | ||
8763 | goto again; | ||
8764 | } | ||
8765 | |||
8766 | |||
8609 | ret = set_block_group_ro(cache, 0); | 8767 | ret = set_block_group_ro(cache, 0); |
8610 | if (!ret) | 8768 | if (!ret) |
8611 | goto out; | 8769 | goto out; |
@@ -8620,6 +8778,7 @@ out: | |||
8620 | alloc_flags = update_block_group_flags(root, cache->flags); | 8778 | alloc_flags = update_block_group_flags(root, cache->flags); |
8621 | check_system_chunk(trans, root, alloc_flags); | 8779 | check_system_chunk(trans, root, alloc_flags); |
8622 | } | 8780 | } |
8781 | mutex_unlock(&root->fs_info->ro_block_group_mutex); | ||
8623 | 8782 | ||
8624 | btrfs_end_transaction(trans, root); | 8783 | btrfs_end_transaction(trans, root); |
8625 | return ret; | 8784 | return ret; |
@@ -9425,7 +9584,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9425 | goto out; | 9584 | goto out; |
9426 | } | 9585 | } |
9427 | 9586 | ||
9587 | /* | ||
9588 | * get the inode first so any iput calls done for the io_list | ||
9589 | * aren't the final iput (no unlinks allowed now) | ||
9590 | */ | ||
9428 | inode = lookup_free_space_inode(tree_root, block_group, path); | 9591 | inode = lookup_free_space_inode(tree_root, block_group, path); |
9592 | |||
9593 | mutex_lock(&trans->transaction->cache_write_mutex); | ||
9594 | /* | ||
9595 | * make sure our free spache cache IO is done before remove the | ||
9596 | * free space inode | ||
9597 | */ | ||
9598 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
9599 | if (!list_empty(&block_group->io_list)) { | ||
9600 | list_del_init(&block_group->io_list); | ||
9601 | |||
9602 | WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode); | ||
9603 | |||
9604 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
9605 | btrfs_wait_cache_io(root, trans, block_group, | ||
9606 | &block_group->io_ctl, path, | ||
9607 | block_group->key.objectid); | ||
9608 | btrfs_put_block_group(block_group); | ||
9609 | spin_lock(&trans->transaction->dirty_bgs_lock); | ||
9610 | } | ||
9611 | |||
9612 | if (!list_empty(&block_group->dirty_list)) { | ||
9613 | list_del_init(&block_group->dirty_list); | ||
9614 | btrfs_put_block_group(block_group); | ||
9615 | } | ||
9616 | spin_unlock(&trans->transaction->dirty_bgs_lock); | ||
9617 | mutex_unlock(&trans->transaction->cache_write_mutex); | ||
9618 | |||
9429 | if (!IS_ERR(inode)) { | 9619 | if (!IS_ERR(inode)) { |
9430 | ret = btrfs_orphan_add(trans, inode); | 9620 | ret = btrfs_orphan_add(trans, inode); |
9431 | if (ret) { | 9621 | if (ret) { |
@@ -9518,11 +9708,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
9518 | 9708 | ||
9519 | spin_lock(&trans->transaction->dirty_bgs_lock); | 9709 | spin_lock(&trans->transaction->dirty_bgs_lock); |
9520 | if (!list_empty(&block_group->dirty_list)) { | 9710 | if (!list_empty(&block_group->dirty_list)) { |
9521 | list_del_init(&block_group->dirty_list); | 9711 | WARN_ON(1); |
9522 | btrfs_put_block_group(block_group); | 9712 | } |
9713 | if (!list_empty(&block_group->io_list)) { | ||
9714 | WARN_ON(1); | ||
9523 | } | 9715 | } |
9524 | spin_unlock(&trans->transaction->dirty_bgs_lock); | 9716 | spin_unlock(&trans->transaction->dirty_bgs_lock); |
9525 | |||
9526 | btrfs_remove_free_space_cache(block_group); | 9717 | btrfs_remove_free_space_cache(block_group); |
9527 | 9718 | ||
9528 | spin_lock(&block_group->space_info->lock); | 9719 | spin_lock(&block_group->space_info->lock); |