diff options
author | Chris Mason <clm@fb.com> | 2015-12-23 16:28:35 -0500 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-12-23 16:28:35 -0500 |
commit | a53fe2576955171449711933242d8fb1c13a7d5c (patch) | |
tree | 6fd4a583f2e9606c6c61226b40a060e4c1b2dc60 | |
parent | bb9d687618695e8291f1e6209eb3211d231f97bb (diff) | |
parent | e44081ef611832b47a86abf4e36dc0ed2e950884 (diff) |
Merge branch 'for-chris-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/fdmanana/linux into for-linus-4.5
-rw-r--r-- | fs/btrfs/extent-tree.c | 19 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 127 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 17 | ||||
-rw-r--r-- | fs/btrfs/tree-defrag.c | 27 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 17 |
5 files changed, 151 insertions, 56 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 065055342881..d436117e20dd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3684,11 +3684,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3684 | return -ENOMEM; | 3684 | return -ENOMEM; |
3685 | 3685 | ||
3686 | /* | 3686 | /* |
3687 | * We don't need the lock here since we are protected by the transaction | 3687 | * Even though we are in the critical section of the transaction commit, |
3688 | * commit. We want to do the cache_save_setup first and then run the | 3688 | * we can still have concurrent tasks adding elements to this |
3689 | * transaction's list of dirty block groups. These tasks correspond to | ||
3690 | * endio free space workers started when writeback finishes for a | ||
3691 | * space cache, which run inode.c:btrfs_finish_ordered_io(), and can | ||
3692 | * allocate new block groups as a result of COWing nodes of the root | ||
3693 | * tree when updating the free space inode. The writeback for the space | ||
3694 | * caches is triggered by an earlier call to | ||
3695 | * btrfs_start_dirty_block_groups() and iterations of the following | ||
3696 | * loop. | ||
3697 | * Also we want to do the cache_save_setup first and then run the | ||
3689 | * delayed refs to make sure we have the best chance at doing this all | 3698 | * delayed refs to make sure we have the best chance at doing this all |
3690 | * in one shot. | 3699 | * in one shot. |
3691 | */ | 3700 | */ |
3701 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3692 | while (!list_empty(&cur_trans->dirty_bgs)) { | 3702 | while (!list_empty(&cur_trans->dirty_bgs)) { |
3693 | cache = list_first_entry(&cur_trans->dirty_bgs, | 3703 | cache = list_first_entry(&cur_trans->dirty_bgs, |
3694 | struct btrfs_block_group_cache, | 3704 | struct btrfs_block_group_cache, |
@@ -3700,11 +3710,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3700 | * finish and then do it all again | 3710 | * finish and then do it all again |
3701 | */ | 3711 | */ |
3702 | if (!list_empty(&cache->io_list)) { | 3712 | if (!list_empty(&cache->io_list)) { |
3713 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3703 | list_del_init(&cache->io_list); | 3714 | list_del_init(&cache->io_list); |
3704 | btrfs_wait_cache_io(root, trans, cache, | 3715 | btrfs_wait_cache_io(root, trans, cache, |
3705 | &cache->io_ctl, path, | 3716 | &cache->io_ctl, path, |
3706 | cache->key.objectid); | 3717 | cache->key.objectid); |
3707 | btrfs_put_block_group(cache); | 3718 | btrfs_put_block_group(cache); |
3719 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3708 | } | 3720 | } |
3709 | 3721 | ||
3710 | /* | 3722 | /* |
@@ -3712,6 +3724,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3712 | * on any pending IO | 3724 | * on any pending IO |
3713 | */ | 3725 | */ |
3714 | list_del_init(&cache->dirty_list); | 3726 | list_del_init(&cache->dirty_list); |
3727 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3715 | should_put = 1; | 3728 | should_put = 1; |
3716 | 3729 | ||
3717 | cache_save_setup(cache, trans, path); | 3730 | cache_save_setup(cache, trans, path); |
@@ -3743,7 +3756,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
3743 | /* if its not on the io list, we need to put the block group */ | 3756 | /* if its not on the io list, we need to put the block group */ |
3744 | if (should_put) | 3757 | if (should_put) |
3745 | btrfs_put_block_group(cache); | 3758 | btrfs_put_block_group(cache); |
3759 | spin_lock(&cur_trans->dirty_bgs_lock); | ||
3746 | } | 3760 | } |
3761 | spin_unlock(&cur_trans->dirty_bgs_lock); | ||
3747 | 3762 | ||
3748 | while (!list_empty(io)) { | 3763 | while (!list_empty(io)) { |
3749 | cache = list_first_entry(io, struct btrfs_block_group_cache, | 3764 | cache = list_first_entry(io, struct btrfs_block_group_cache, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dc616329c9a1..bdb0008712c8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -66,6 +66,13 @@ struct btrfs_iget_args { | |||
66 | struct btrfs_root *root; | 66 | struct btrfs_root *root; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | struct btrfs_dio_data { | ||
70 | u64 outstanding_extents; | ||
71 | u64 reserve; | ||
72 | u64 unsubmitted_oe_range_start; | ||
73 | u64 unsubmitted_oe_range_end; | ||
74 | }; | ||
75 | |||
69 | static const struct inode_operations btrfs_dir_inode_operations; | 76 | static const struct inode_operations btrfs_dir_inode_operations; |
70 | static const struct inode_operations btrfs_symlink_inode_operations; | 77 | static const struct inode_operations btrfs_symlink_inode_operations; |
71 | static const struct inode_operations btrfs_dir_ro_inode_operations; | 78 | static const struct inode_operations btrfs_dir_ro_inode_operations; |
@@ -7408,25 +7415,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
7408 | btrfs_start_ordered_extent(inode, ordered, 1); | 7415 | btrfs_start_ordered_extent(inode, ordered, 1); |
7409 | btrfs_put_ordered_extent(ordered); | 7416 | btrfs_put_ordered_extent(ordered); |
7410 | } else { | 7417 | } else { |
7411 | /* Screw you mmap */ | ||
7412 | ret = btrfs_fdatawrite_range(inode, lockstart, lockend); | ||
7413 | if (ret) | ||
7414 | break; | ||
7415 | ret = filemap_fdatawait_range(inode->i_mapping, | ||
7416 | lockstart, | ||
7417 | lockend); | ||
7418 | if (ret) | ||
7419 | break; | ||
7420 | |||
7421 | /* | 7418 | /* |
7422 | * If we found a page that couldn't be invalidated just | 7419 | * We could trigger writeback for this range (and wait |
7423 | * fall back to buffered. | 7420 | * for it to complete) and then invalidate the pages for |
7421 | * this range (through invalidate_inode_pages2_range()), | ||
7422 | * but that can lead us to a deadlock with a concurrent | ||
7423 | * call to readpages() (a buffered read or a defrag call | ||
7424 | * triggered a readahead) on a page lock due to an | ||
7425 | * ordered dio extent we created before but did not have | ||
7426 | * yet a corresponding bio submitted (whence it can not | ||
7427 | * complete), which makes readpages() wait for that | ||
7428 | * ordered extent to complete while holding a lock on | ||
7429 | * that page. | ||
7424 | */ | 7430 | */ |
7425 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 7431 | ret = -ENOTBLK; |
7426 | lockstart >> PAGE_CACHE_SHIFT, | 7432 | break; |
7427 | lockend >> PAGE_CACHE_SHIFT); | ||
7428 | if (ret) | ||
7429 | break; | ||
7430 | } | 7433 | } |
7431 | 7434 | ||
7432 | cond_resched(); | 7435 | cond_resched(); |
@@ -7482,11 +7485,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | |||
7482 | return em; | 7485 | return em; |
7483 | } | 7486 | } |
7484 | 7487 | ||
7485 | struct btrfs_dio_data { | ||
7486 | u64 outstanding_extents; | ||
7487 | u64 reserve; | ||
7488 | }; | ||
7489 | |||
7490 | static void adjust_dio_outstanding_extents(struct inode *inode, | 7488 | static void adjust_dio_outstanding_extents(struct inode *inode, |
7491 | struct btrfs_dio_data *dio_data, | 7489 | struct btrfs_dio_data *dio_data, |
7492 | const u64 len) | 7490 | const u64 len) |
@@ -7670,6 +7668,7 @@ unlock: | |||
7670 | btrfs_free_reserved_data_space(inode, start, len); | 7668 | btrfs_free_reserved_data_space(inode, start, len); |
7671 | WARN_ON(dio_data->reserve < len); | 7669 | WARN_ON(dio_data->reserve < len); |
7672 | dio_data->reserve -= len; | 7670 | dio_data->reserve -= len; |
7671 | dio_data->unsubmitted_oe_range_end = start + len; | ||
7673 | current->journal_info = dio_data; | 7672 | current->journal_info = dio_data; |
7674 | } | 7673 | } |
7675 | 7674 | ||
@@ -7992,22 +7991,22 @@ static void btrfs_endio_direct_read(struct bio *bio) | |||
7992 | bio_put(bio); | 7991 | bio_put(bio); |
7993 | } | 7992 | } |
7994 | 7993 | ||
7995 | static void btrfs_endio_direct_write(struct bio *bio) | 7994 | static void btrfs_endio_direct_write_update_ordered(struct inode *inode, |
7995 | const u64 offset, | ||
7996 | const u64 bytes, | ||
7997 | const int uptodate) | ||
7996 | { | 7998 | { |
7997 | struct btrfs_dio_private *dip = bio->bi_private; | ||
7998 | struct inode *inode = dip->inode; | ||
7999 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7999 | struct btrfs_root *root = BTRFS_I(inode)->root; |
8000 | struct btrfs_ordered_extent *ordered = NULL; | 8000 | struct btrfs_ordered_extent *ordered = NULL; |
8001 | u64 ordered_offset = dip->logical_offset; | 8001 | u64 ordered_offset = offset; |
8002 | u64 ordered_bytes = dip->bytes; | 8002 | u64 ordered_bytes = bytes; |
8003 | struct bio *dio_bio; | ||
8004 | int ret; | 8003 | int ret; |
8005 | 8004 | ||
8006 | again: | 8005 | again: |
8007 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, | 8006 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
8008 | &ordered_offset, | 8007 | &ordered_offset, |
8009 | ordered_bytes, | 8008 | ordered_bytes, |
8010 | !bio->bi_error); | 8009 | uptodate); |
8011 | if (!ret) | 8010 | if (!ret) |
8012 | goto out_test; | 8011 | goto out_test; |
8013 | 8012 | ||
@@ -8020,13 +8019,22 @@ out_test: | |||
8020 | * our bio might span multiple ordered extents. If we haven't | 8019 | * our bio might span multiple ordered extents. If we haven't |
8021 | * completed the accounting for the whole dio, go back and try again | 8020 | * completed the accounting for the whole dio, go back and try again |
8022 | */ | 8021 | */ |
8023 | if (ordered_offset < dip->logical_offset + dip->bytes) { | 8022 | if (ordered_offset < offset + bytes) { |
8024 | ordered_bytes = dip->logical_offset + dip->bytes - | 8023 | ordered_bytes = offset + bytes - ordered_offset; |
8025 | ordered_offset; | ||
8026 | ordered = NULL; | 8024 | ordered = NULL; |
8027 | goto again; | 8025 | goto again; |
8028 | } | 8026 | } |
8029 | dio_bio = dip->dio_bio; | 8027 | } |
8028 | |||
8029 | static void btrfs_endio_direct_write(struct bio *bio) | ||
8030 | { | ||
8031 | struct btrfs_dio_private *dip = bio->bi_private; | ||
8032 | struct bio *dio_bio = dip->dio_bio; | ||
8033 | |||
8034 | btrfs_endio_direct_write_update_ordered(dip->inode, | ||
8035 | dip->logical_offset, | ||
8036 | dip->bytes, | ||
8037 | !bio->bi_error); | ||
8030 | 8038 | ||
8031 | kfree(dip); | 8039 | kfree(dip); |
8032 | 8040 | ||
@@ -8334,6 +8342,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, | |||
8334 | dip->subio_endio = btrfs_subio_endio_read; | 8342 | dip->subio_endio = btrfs_subio_endio_read; |
8335 | } | 8343 | } |
8336 | 8344 | ||
8345 | /* | ||
8346 | * Reset the range for unsubmitted ordered extents (to a 0 length range) | ||
8347 | * even if we fail to submit a bio, because in such case we do the | ||
8348 | * corresponding error handling below and it must not be done a second | ||
8349 | * time by btrfs_direct_IO(). | ||
8350 | */ | ||
8351 | if (write) { | ||
8352 | struct btrfs_dio_data *dio_data = current->journal_info; | ||
8353 | |||
8354 | dio_data->unsubmitted_oe_range_end = dip->logical_offset + | ||
8355 | dip->bytes; | ||
8356 | dio_data->unsubmitted_oe_range_start = | ||
8357 | dio_data->unsubmitted_oe_range_end; | ||
8358 | } | ||
8359 | |||
8337 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); | 8360 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
8338 | if (!ret) | 8361 | if (!ret) |
8339 | return; | 8362 | return; |
@@ -8362,24 +8385,15 @@ free_ordered: | |||
8362 | dip = NULL; | 8385 | dip = NULL; |
8363 | io_bio = NULL; | 8386 | io_bio = NULL; |
8364 | } else { | 8387 | } else { |
8365 | if (write) { | 8388 | if (write) |
8366 | struct btrfs_ordered_extent *ordered; | 8389 | btrfs_endio_direct_write_update_ordered(inode, |
8367 | 8390 | file_offset, | |
8368 | ordered = btrfs_lookup_ordered_extent(inode, | 8391 | dio_bio->bi_iter.bi_size, |
8369 | file_offset); | 8392 | 0); |
8370 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); | 8393 | else |
8371 | /* | ||
8372 | * Decrements our ref on the ordered extent and removes | ||
8373 | * the ordered extent from the inode's ordered tree, | ||
8374 | * doing all the proper resource cleanup such as for the | ||
8375 | * reserved space and waking up any waiters for this | ||
8376 | * ordered extent (through btrfs_remove_ordered_extent). | ||
8377 | */ | ||
8378 | btrfs_finish_ordered_io(ordered); | ||
8379 | } else { | ||
8380 | unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, | 8394 | unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, |
8381 | file_offset + dio_bio->bi_iter.bi_size - 1); | 8395 | file_offset + dio_bio->bi_iter.bi_size - 1); |
8382 | } | 8396 | |
8383 | dio_bio->bi_error = -EIO; | 8397 | dio_bio->bi_error = -EIO; |
8384 | /* | 8398 | /* |
8385 | * Releases and cleans up our dio_bio, no need to bio_put() | 8399 | * Releases and cleans up our dio_bio, no need to bio_put() |
@@ -8479,6 +8493,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
8479 | * originally calculated. Abuse current->journal_info for this. | 8493 | * originally calculated. Abuse current->journal_info for this. |
8480 | */ | 8494 | */ |
8481 | dio_data.reserve = round_up(count, root->sectorsize); | 8495 | dio_data.reserve = round_up(count, root->sectorsize); |
8496 | dio_data.unsubmitted_oe_range_start = (u64)offset; | ||
8497 | dio_data.unsubmitted_oe_range_end = (u64)offset; | ||
8482 | current->journal_info = &dio_data; | 8498 | current->journal_info = &dio_data; |
8483 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 8499 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
8484 | &BTRFS_I(inode)->runtime_flags)) { | 8500 | &BTRFS_I(inode)->runtime_flags)) { |
@@ -8497,6 +8513,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
8497 | if (dio_data.reserve) | 8513 | if (dio_data.reserve) |
8498 | btrfs_delalloc_release_space(inode, offset, | 8514 | btrfs_delalloc_release_space(inode, offset, |
8499 | dio_data.reserve); | 8515 | dio_data.reserve); |
8516 | /* | ||
8517 | * On error we might have left some ordered extents | ||
8518 | * without submitting corresponding bios for them, so | ||
8519 | * cleanup them up to avoid other tasks getting them | ||
8520 | * and waiting for them to complete forever. | ||
8521 | */ | ||
8522 | if (dio_data.unsubmitted_oe_range_start < | ||
8523 | dio_data.unsubmitted_oe_range_end) | ||
8524 | btrfs_endio_direct_write_update_ordered(inode, | ||
8525 | dio_data.unsubmitted_oe_range_start, | ||
8526 | dio_data.unsubmitted_oe_range_end - | ||
8527 | dio_data.unsubmitted_oe_range_start, | ||
8528 | 0); | ||
8500 | } else if (ret >= 0 && (size_t)ret < count) | 8529 | } else if (ret >= 0 && (size_t)ret < count) |
8501 | btrfs_delalloc_release_space(inode, offset, | 8530 | btrfs_delalloc_release_space(inode, offset, |
8502 | count - (size_t)ret); | 8531 | count - (size_t)ret); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index be8eae80ff65..f85ccf634ca1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) | |||
75 | list_del_init(&em->list); | 75 | list_del_init(&em->list); |
76 | free_extent_map(em); | 76 | free_extent_map(em); |
77 | } | 77 | } |
78 | /* | ||
79 | * If any block groups are found in ->deleted_bgs then it's | ||
80 | * because the transaction was aborted and a commit did not | ||
81 | * happen (things failed before writing the new superblock | ||
82 | * and calling btrfs_finish_extent_commit()), so we can not | ||
83 | * discard the physical locations of the block groups. | ||
84 | */ | ||
85 | while (!list_empty(&transaction->deleted_bgs)) { | ||
86 | struct btrfs_block_group_cache *cache; | ||
87 | |||
88 | cache = list_first_entry(&transaction->deleted_bgs, | ||
89 | struct btrfs_block_group_cache, | ||
90 | bg_list); | ||
91 | list_del_init(&cache->bg_list); | ||
92 | btrfs_put_block_group_trimming(cache); | ||
93 | btrfs_put_block_group(cache); | ||
94 | } | ||
78 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 95 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
79 | } | 96 | } |
80 | } | 97 | } |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index f31db4325339..cb65089127cc 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
89 | goto out; | 89 | goto out; |
90 | } | 90 | } |
91 | btrfs_release_path(path); | 91 | btrfs_release_path(path); |
92 | /* | ||
93 | * We don't need a lock on a leaf. btrfs_realloc_node() will lock all | ||
94 | * leafs from path->nodes[1], so set lowest_level to 1 to avoid later | ||
95 | * a deadlock (attempting to write lock an already write locked leaf). | ||
96 | */ | ||
97 | path->lowest_level = 1; | ||
92 | wret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 98 | wret = btrfs_search_slot(trans, root, &key, path, 0, 1); |
93 | 99 | ||
94 | if (wret < 0) { | 100 | if (wret < 0) { |
@@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
99 | ret = 0; | 105 | ret = 0; |
100 | goto out; | 106 | goto out; |
101 | } | 107 | } |
102 | path->slots[1] = btrfs_header_nritems(path->nodes[1]); | 108 | /* |
103 | next_key_ret = btrfs_find_next_key(root, path, &key, 1, | 109 | * The node at level 1 must always be locked when our path has |
104 | min_trans); | 110 | * keep_locks set and lowest_level is 1, regardless of the value of |
111 | * path->slots[1]. | ||
112 | */ | ||
113 | BUG_ON(path->locks[1] == 0); | ||
105 | ret = btrfs_realloc_node(trans, root, | 114 | ret = btrfs_realloc_node(trans, root, |
106 | path->nodes[1], 0, | 115 | path->nodes[1], 0, |
107 | &last_ret, | 116 | &last_ret, |
@@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
110 | WARN_ON(ret == -EAGAIN); | 119 | WARN_ON(ret == -EAGAIN); |
111 | goto out; | 120 | goto out; |
112 | } | 121 | } |
122 | /* | ||
123 | * Now that we reallocated the node we can find the next key. Note that | ||
124 | * btrfs_find_next_key() can release our path and do another search | ||
125 | * without COWing, this is because even with path->keep_locks = 1, | ||
126 | * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a | ||
127 | * node when path->slots[node_level - 1] does not point to the last | ||
128 | * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore | ||
129 | * we search for the next key after reallocating our node. | ||
130 | */ | ||
131 | path->slots[1] = btrfs_header_nritems(path->nodes[1]); | ||
132 | next_key_ret = btrfs_find_next_key(root, path, &key, 1, | ||
133 | min_trans); | ||
113 | if (next_key_ret == 0) { | 134 | if (next_key_ret == 0) { |
114 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 135 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
115 | ret = -EAGAIN; | 136 | ret = -EAGAIN; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 54d2d2cc2c92..a37cc0478bb2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -4825,20 +4825,32 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4825 | goto out; | 4825 | goto out; |
4826 | } | 4826 | } |
4827 | 4827 | ||
4828 | /* | ||
4829 | * Take the device list mutex to prevent races with the final phase of | ||
4830 | * a device replace operation that replaces the device object associated | ||
4831 | * with the map's stripes, because the device object's id can change | ||
4832 | * at any time during that final phase of the device replace operation | ||
4833 | * (dev-replace.c:btrfs_dev_replace_finishing()). | ||
4834 | */ | ||
4835 | mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex); | ||
4828 | for (i = 0; i < map->num_stripes; i++) { | 4836 | for (i = 0; i < map->num_stripes; i++) { |
4829 | device = map->stripes[i].dev; | 4837 | device = map->stripes[i].dev; |
4830 | dev_offset = map->stripes[i].physical; | 4838 | dev_offset = map->stripes[i].physical; |
4831 | 4839 | ||
4832 | ret = btrfs_update_device(trans, device); | 4840 | ret = btrfs_update_device(trans, device); |
4833 | if (ret) | 4841 | if (ret) |
4834 | goto out; | 4842 | break; |
4835 | ret = btrfs_alloc_dev_extent(trans, device, | 4843 | ret = btrfs_alloc_dev_extent(trans, device, |
4836 | chunk_root->root_key.objectid, | 4844 | chunk_root->root_key.objectid, |
4837 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 4845 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
4838 | chunk_offset, dev_offset, | 4846 | chunk_offset, dev_offset, |
4839 | stripe_size); | 4847 | stripe_size); |
4840 | if (ret) | 4848 | if (ret) |
4841 | goto out; | 4849 | break; |
4850 | } | ||
4851 | if (ret) { | ||
4852 | mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); | ||
4853 | goto out; | ||
4842 | } | 4854 | } |
4843 | 4855 | ||
4844 | stripe = &chunk->stripe; | 4856 | stripe = &chunk->stripe; |
@@ -4851,6 +4863,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, | |||
4851 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); | 4863 | memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); |
4852 | stripe++; | 4864 | stripe++; |
4853 | } | 4865 | } |
4866 | mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); | ||
4854 | 4867 | ||
4855 | btrfs_set_stack_chunk_length(chunk, chunk_size); | 4868 | btrfs_set_stack_chunk_length(chunk, chunk_size); |
4856 | btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); | 4869 | btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); |