diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 13:49:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 13:49:22 -0400 |
commit | 07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch) | |
tree | e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/extent-tree.c | |
parent | 63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff) | |
parent | c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff) |
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This has our merge window series of cleanups and fixes. These target
a wide range of issues, but do include some important fixes for
qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a
few definitions to make them easier for userland to consume.
Also whiteout support is included now that issues with overlayfs have
been cleared up.
I have one more fix pending for page faults during btrfs_copy_from_user,
but I wanted to get this bulk out the door first"
* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
btrfs: fix memory leak during RAID 5/6 device replacement
Btrfs: add semaphore to synchronize direct IO writes with fsync
Btrfs: fix race between block group relocation and nocow writes
Btrfs: fix race between fsync and direct IO writes for prealloc extents
Btrfs: fix number of transaction units for renames with whiteout
Btrfs: pin logs earlier when doing a rename exchange operation
Btrfs: unpin logs if rename exchange operation fails
Btrfs: fix inode leak on failure to setup whiteout inode in rename
btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
Btrfs: pin log earlier when renaming
Btrfs: unpin log if rename operation fails
Btrfs: don't do unnecessary delalloc flushes when relocating
Btrfs: don't wait for unrelated IO to finish before relocation
Btrfs: fix empty symlink after creating symlink and fsync parent dir
Btrfs: fix for incorrect directory entries after fsync log replay
btrfs: build fixup for qgroup_account_snapshot
btrfs: qgroup: Fix qgroup accounting when creating snapshot
Btrfs: fix fspath error deallocation
btrfs: make find_workspace warn if there are no workspaces
btrfs: make find_workspace always succeed
...
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 167 |
1 files changed, 138 insertions, 29 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 84e060eb0de8..9424864fd01a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) | |||
3824 | return readonly; | 3824 | return readonly; |
3825 | } | 3825 | } |
3826 | 3826 | ||
3827 | bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) | ||
3828 | { | ||
3829 | struct btrfs_block_group_cache *bg; | ||
3830 | bool ret = true; | ||
3831 | |||
3832 | bg = btrfs_lookup_block_group(fs_info, bytenr); | ||
3833 | if (!bg) | ||
3834 | return false; | ||
3835 | |||
3836 | spin_lock(&bg->lock); | ||
3837 | if (bg->ro) | ||
3838 | ret = false; | ||
3839 | else | ||
3840 | atomic_inc(&bg->nocow_writers); | ||
3841 | spin_unlock(&bg->lock); | ||
3842 | |||
3843 | /* no put on block group, done by btrfs_dec_nocow_writers */ | ||
3844 | if (!ret) | ||
3845 | btrfs_put_block_group(bg); | ||
3846 | |||
3847 | return ret; | ||
3848 | |||
3849 | } | ||
3850 | |||
3851 | void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) | ||
3852 | { | ||
3853 | struct btrfs_block_group_cache *bg; | ||
3854 | |||
3855 | bg = btrfs_lookup_block_group(fs_info, bytenr); | ||
3856 | ASSERT(bg); | ||
3857 | if (atomic_dec_and_test(&bg->nocow_writers)) | ||
3858 | wake_up_atomic_t(&bg->nocow_writers); | ||
3859 | /* | ||
3860 | * Once for our lookup and once for the lookup done by a previous call | ||
3861 | * to btrfs_inc_nocow_writers() | ||
3862 | */ | ||
3863 | btrfs_put_block_group(bg); | ||
3864 | btrfs_put_block_group(bg); | ||
3865 | } | ||
3866 | |||
3867 | static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a) | ||
3868 | { | ||
3869 | schedule(); | ||
3870 | return 0; | ||
3871 | } | ||
3872 | |||
3873 | void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) | ||
3874 | { | ||
3875 | wait_on_atomic_t(&bg->nocow_writers, | ||
3876 | btrfs_wait_nocow_writers_atomic_t, | ||
3877 | TASK_UNINTERRUPTIBLE); | ||
3878 | } | ||
3879 | |||
3827 | static const char *alloc_name(u64 flags) | 3880 | static const char *alloc_name(u64 flags) |
3828 | { | 3881 | { |
3829 | switch (flags) { | 3882 | switch (flags) { |
@@ -4141,7 +4194,7 @@ commit_trans: | |||
4141 | 4194 | ||
4142 | if (need_commit > 0) { | 4195 | if (need_commit > 0) { |
4143 | btrfs_start_delalloc_roots(fs_info, 0, -1); | 4196 | btrfs_start_delalloc_roots(fs_info, 0, -1); |
4144 | btrfs_wait_ordered_roots(fs_info, -1); | 4197 | btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1); |
4145 | } | 4198 | } |
4146 | 4199 | ||
4147 | trans = btrfs_join_transaction(root); | 4200 | trans = btrfs_join_transaction(root); |
@@ -4583,7 +4636,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
4583 | */ | 4636 | */ |
4584 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); | 4637 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
4585 | if (!current->journal_info) | 4638 | if (!current->journal_info) |
4586 | btrfs_wait_ordered_roots(root->fs_info, nr_items); | 4639 | btrfs_wait_ordered_roots(root->fs_info, nr_items, |
4640 | 0, (u64)-1); | ||
4587 | } | 4641 | } |
4588 | } | 4642 | } |
4589 | 4643 | ||
@@ -4620,7 +4674,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
4620 | 4674 | ||
4621 | /* Calc the number of the pages we need flush for space reservation */ | 4675 | /* Calc the number of the pages we need flush for space reservation */ |
4622 | items = calc_reclaim_items_nr(root, to_reclaim); | 4676 | items = calc_reclaim_items_nr(root, to_reclaim); |
4623 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; | 4677 | to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM; |
4624 | 4678 | ||
4625 | trans = (struct btrfs_trans_handle *)current->journal_info; | 4679 | trans = (struct btrfs_trans_handle *)current->journal_info; |
4626 | block_rsv = &root->fs_info->delalloc_block_rsv; | 4680 | block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -4632,7 +4686,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
4632 | if (trans) | 4686 | if (trans) |
4633 | return; | 4687 | return; |
4634 | if (wait_ordered) | 4688 | if (wait_ordered) |
4635 | btrfs_wait_ordered_roots(root->fs_info, items); | 4689 | btrfs_wait_ordered_roots(root->fs_info, items, |
4690 | 0, (u64)-1); | ||
4636 | return; | 4691 | return; |
4637 | } | 4692 | } |
4638 | 4693 | ||
@@ -4671,7 +4726,8 @@ skip_async: | |||
4671 | 4726 | ||
4672 | loops++; | 4727 | loops++; |
4673 | if (wait_ordered && !trans) { | 4728 | if (wait_ordered && !trans) { |
4674 | btrfs_wait_ordered_roots(root->fs_info, items); | 4729 | btrfs_wait_ordered_roots(root->fs_info, items, |
4730 | 0, (u64)-1); | ||
4675 | } else { | 4731 | } else { |
4676 | time_left = schedule_timeout_killable(1); | 4732 | time_left = schedule_timeout_killable(1); |
4677 | if (time_left) | 4733 | if (time_left) |
@@ -6172,6 +6228,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, | |||
6172 | return 0; | 6228 | return 0; |
6173 | } | 6229 | } |
6174 | 6230 | ||
6231 | static void | ||
6232 | btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg) | ||
6233 | { | ||
6234 | atomic_inc(&bg->reservations); | ||
6235 | } | ||
6236 | |||
6237 | void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, | ||
6238 | const u64 start) | ||
6239 | { | ||
6240 | struct btrfs_block_group_cache *bg; | ||
6241 | |||
6242 | bg = btrfs_lookup_block_group(fs_info, start); | ||
6243 | ASSERT(bg); | ||
6244 | if (atomic_dec_and_test(&bg->reservations)) | ||
6245 | wake_up_atomic_t(&bg->reservations); | ||
6246 | btrfs_put_block_group(bg); | ||
6247 | } | ||
6248 | |||
6249 | static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a) | ||
6250 | { | ||
6251 | schedule(); | ||
6252 | return 0; | ||
6253 | } | ||
6254 | |||
6255 | void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) | ||
6256 | { | ||
6257 | struct btrfs_space_info *space_info = bg->space_info; | ||
6258 | |||
6259 | ASSERT(bg->ro); | ||
6260 | |||
6261 | if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA)) | ||
6262 | return; | ||
6263 | |||
6264 | /* | ||
6265 | * Our block group is read only but before we set it to read only, | ||
6266 | * some task might have had allocated an extent from it already, but it | ||
6267 | * has not yet created a respective ordered extent (and added it to a | ||
6268 | * root's list of ordered extents). | ||
6269 | * Therefore wait for any task currently allocating extents, since the | ||
6270 | * block group's reservations counter is incremented while a read lock | ||
6271 | * on the groups' semaphore is held and decremented after releasing | ||
6272 | * the read access on that semaphore and creating the ordered extent. | ||
6273 | */ | ||
6274 | down_write(&space_info->groups_sem); | ||
6275 | up_write(&space_info->groups_sem); | ||
6276 | |||
6277 | wait_on_atomic_t(&bg->reservations, | ||
6278 | btrfs_wait_bg_reservations_atomic_t, | ||
6279 | TASK_UNINTERRUPTIBLE); | ||
6280 | } | ||
6281 | |||
6175 | /** | 6282 | /** |
6176 | * btrfs_update_reserved_bytes - update the block_group and space info counters | 6283 | * btrfs_update_reserved_bytes - update the block_group and space info counters |
6177 | * @cache: The cache we are manipulating | 6284 | * @cache: The cache we are manipulating |
@@ -7025,36 +7132,35 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, | |||
7025 | int delalloc) | 7132 | int delalloc) |
7026 | { | 7133 | { |
7027 | struct btrfs_block_group_cache *used_bg = NULL; | 7134 | struct btrfs_block_group_cache *used_bg = NULL; |
7028 | bool locked = false; | 7135 | |
7029 | again: | ||
7030 | spin_lock(&cluster->refill_lock); | 7136 | spin_lock(&cluster->refill_lock); |
7031 | if (locked) { | 7137 | while (1) { |
7032 | if (used_bg == cluster->block_group) | 7138 | used_bg = cluster->block_group; |
7139 | if (!used_bg) | ||
7140 | return NULL; | ||
7141 | |||
7142 | if (used_bg == block_group) | ||
7033 | return used_bg; | 7143 | return used_bg; |
7034 | 7144 | ||
7035 | up_read(&used_bg->data_rwsem); | 7145 | btrfs_get_block_group(used_bg); |
7036 | btrfs_put_block_group(used_bg); | ||
7037 | } | ||
7038 | 7146 | ||
7039 | used_bg = cluster->block_group; | 7147 | if (!delalloc) |
7040 | if (!used_bg) | 7148 | return used_bg; |
7041 | return NULL; | ||
7042 | 7149 | ||
7043 | if (used_bg == block_group) | 7150 | if (down_read_trylock(&used_bg->data_rwsem)) |
7044 | return used_bg; | 7151 | return used_bg; |
7045 | 7152 | ||
7046 | btrfs_get_block_group(used_bg); | 7153 | spin_unlock(&cluster->refill_lock); |
7047 | 7154 | ||
7048 | if (!delalloc) | 7155 | down_read(&used_bg->data_rwsem); |
7049 | return used_bg; | ||
7050 | 7156 | ||
7051 | if (down_read_trylock(&used_bg->data_rwsem)) | 7157 | spin_lock(&cluster->refill_lock); |
7052 | return used_bg; | 7158 | if (used_bg == cluster->block_group) |
7159 | return used_bg; | ||
7053 | 7160 | ||
7054 | spin_unlock(&cluster->refill_lock); | 7161 | up_read(&used_bg->data_rwsem); |
7055 | down_read(&used_bg->data_rwsem); | 7162 | btrfs_put_block_group(used_bg); |
7056 | locked = true; | 7163 | } |
7057 | goto again; | ||
7058 | } | 7164 | } |
7059 | 7165 | ||
7060 | static inline void | 7166 | static inline void |
@@ -7431,6 +7537,7 @@ checks: | |||
7431 | btrfs_add_free_space(block_group, offset, num_bytes); | 7537 | btrfs_add_free_space(block_group, offset, num_bytes); |
7432 | goto loop; | 7538 | goto loop; |
7433 | } | 7539 | } |
7540 | btrfs_inc_block_group_reservations(block_group); | ||
7434 | 7541 | ||
7435 | /* we are all good, lets return */ | 7542 | /* we are all good, lets return */ |
7436 | ins->objectid = search_start; | 7543 | ins->objectid = search_start; |
@@ -7612,8 +7719,10 @@ again: | |||
7612 | WARN_ON(num_bytes < root->sectorsize); | 7719 | WARN_ON(num_bytes < root->sectorsize); |
7613 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, | 7720 | ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, |
7614 | flags, delalloc); | 7721 | flags, delalloc); |
7615 | 7722 | if (!ret && !is_data) { | |
7616 | if (ret == -ENOSPC) { | 7723 | btrfs_dec_block_group_reservations(root->fs_info, |
7724 | ins->objectid); | ||
7725 | } else if (ret == -ENOSPC) { | ||
7617 | if (!final_tried && ins->offset) { | 7726 | if (!final_tried && ins->offset) { |
7618 | num_bytes = min(num_bytes >> 1, ins->offset); | 7727 | num_bytes = min(num_bytes >> 1, ins->offset); |
7619 | num_bytes = round_down(num_bytes, root->sectorsize); | 7728 | num_bytes = round_down(num_bytes, root->sectorsize); |
@@ -9058,7 +9167,7 @@ out: | |||
9058 | if (!for_reloc && root_dropped == false) | 9167 | if (!for_reloc && root_dropped == false) |
9059 | btrfs_add_dead_root(root); | 9168 | btrfs_add_dead_root(root); |
9060 | if (err && err != -EAGAIN) | 9169 | if (err && err != -EAGAIN) |
9061 | btrfs_std_error(root->fs_info, err, NULL); | 9170 | btrfs_handle_fs_error(root->fs_info, err, NULL); |
9062 | return err; | 9171 | return err; |
9063 | } | 9172 | } |
9064 | 9173 | ||