aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c63
-rw-r--r--fs/btrfs/free-space-cache.c22
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/ioctl.c14
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/btrfs/tree-log.c17
8 files changed, 107 insertions, 57 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80953528572d..68f322f600a0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
3163int btrfs_drop_inode(struct inode *inode); 3163int btrfs_drop_inode(struct inode *inode);
3164int __init btrfs_init_cachep(void); 3164int __init btrfs_init_cachep(void);
3165void __cold btrfs_destroy_cachep(void); 3165void __cold btrfs_destroy_cachep(void);
3166struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
3167 struct btrfs_root *root, int *new,
3168 struct btrfs_path *path);
3166struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3169struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3167 struct btrfs_root *root, int *was_new); 3170 struct btrfs_root *root, int *was_new);
3168struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, 3171struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b0ab41da91d1..3f0b6d1936e8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
1664 struct btrfs_root *root = arg; 1664 struct btrfs_root *root = arg;
1665 struct btrfs_fs_info *fs_info = root->fs_info; 1665 struct btrfs_fs_info *fs_info = root->fs_info;
1666 int again; 1666 int again;
1667 struct btrfs_trans_handle *trans;
1668 1667
1669 do { 1668 while (1) {
1670 again = 0; 1669 again = 0;
1671 1670
1672 /* Make the cleaner go to sleep early. */ 1671 /* Make the cleaner go to sleep early. */
@@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
1715 */ 1714 */
1716 btrfs_delete_unused_bgs(fs_info); 1715 btrfs_delete_unused_bgs(fs_info);
1717sleep: 1716sleep:
1717 if (kthread_should_park())
1718 kthread_parkme();
1719 if (kthread_should_stop())
1720 return 0;
1718 if (!again) { 1721 if (!again) {
1719 set_current_state(TASK_INTERRUPTIBLE); 1722 set_current_state(TASK_INTERRUPTIBLE);
1720 if (!kthread_should_stop()) 1723 schedule();
1721 schedule();
1722 __set_current_state(TASK_RUNNING); 1724 __set_current_state(TASK_RUNNING);
1723 } 1725 }
1724 } while (!kthread_should_stop());
1725
1726 /*
1727 * Transaction kthread is stopped before us and wakes us up.
1728 * However we might have started a new transaction and COWed some
1729 * tree blocks when deleting unused block groups for example. So
1730 * make sure we commit the transaction we started to have a clean
1731 * shutdown when evicting the btree inode - if it has dirty pages
1732 * when we do the final iput() on it, eviction will trigger a
1733 * writeback for it which will fail with null pointer dereferences
1734 * since work queues and other resources were already released and
1735 * destroyed by the time the iput/eviction/writeback is made.
1736 */
1737 trans = btrfs_attach_transaction(root);
1738 if (IS_ERR(trans)) {
1739 if (PTR_ERR(trans) != -ENOENT)
1740 btrfs_err(fs_info,
1741 "cleaner transaction attach returned %ld",
1742 PTR_ERR(trans));
1743 } else {
1744 int ret;
1745
1746 ret = btrfs_commit_transaction(trans);
1747 if (ret)
1748 btrfs_err(fs_info,
1749 "cleaner open transaction commit returned %d",
1750 ret);
1751 } 1726 }
1752
1753 return 0;
1754} 1727}
1755 1728
1756static int transaction_kthread(void *arg) 1729static int transaction_kthread(void *arg)
@@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
3931 int ret; 3904 int ret;
3932 3905
3933 set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); 3906 set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
3907 /*
3908 * We don't want the cleaner to start new transactions, add more delayed
3909 * iputs, etc. while we're closing. We can't use kthread_stop() yet
3910 * because that frees the task_struct, and the transaction kthread might
3911 * still try to wake up the cleaner.
3912 */
3913 kthread_park(fs_info->cleaner_kthread);
3934 3914
3935 /* wait for the qgroup rescan worker to stop */ 3915 /* wait for the qgroup rescan worker to stop */
3936 btrfs_qgroup_wait_for_completion(fs_info, false); 3916 btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
3958 3938
3959 if (!sb_rdonly(fs_info->sb)) { 3939 if (!sb_rdonly(fs_info->sb)) {
3960 /* 3940 /*
3961 * If the cleaner thread is stopped and there are 3941 * The cleaner kthread is stopped, so do one final pass over
3962 * block groups queued for removal, the deletion will be 3942 * unused block groups.
3963 * skipped when we quit the cleaner thread.
3964 */ 3943 */
3965 btrfs_delete_unused_bgs(fs_info); 3944 btrfs_delete_unused_bgs(fs_info);
3966 3945
@@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
4359 unpin = pinned_extents; 4338 unpin = pinned_extents;
4360again: 4339again:
4361 while (1) { 4340 while (1) {
4341 /*
4342 * The btrfs_finish_extent_commit() may get the same range as
4343 * ours between find_first_extent_bit and clear_extent_dirty.
4344 * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
4345 * the same extent range.
4346 */
4347 mutex_lock(&fs_info->unused_bg_unpin_mutex);
4362 ret = find_first_extent_bit(unpin, 0, &start, &end, 4348 ret = find_first_extent_bit(unpin, 0, &start, &end,
4363 EXTENT_DIRTY, NULL); 4349 EXTENT_DIRTY, NULL);
4364 if (ret) 4350 if (ret) {
4351 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
4365 break; 4352 break;
4353 }
4366 4354
4367 clear_extent_dirty(unpin, start, end); 4355 clear_extent_dirty(unpin, start, end);
4368 btrfs_error_unpin_extent_range(fs_info, start, end); 4356 btrfs_error_unpin_extent_range(fs_info, start, end);
4357 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
4369 cond_resched(); 4358 cond_resched();
4370 } 4359 }
4371 4360
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4ba0aedc878b..74aa552f4793 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
75 * sure NOFS is set to keep us from deadlocking. 75 * sure NOFS is set to keep us from deadlocking.
76 */ 76 */
77 nofs_flag = memalloc_nofs_save(); 77 nofs_flag = memalloc_nofs_save();
78 inode = btrfs_iget(fs_info->sb, &location, root, NULL); 78 inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
79 btrfs_release_path(path);
79 memalloc_nofs_restore(nofs_flag); 80 memalloc_nofs_restore(nofs_flag);
80 if (IS_ERR(inode)) 81 if (IS_ERR(inode))
81 return inode; 82 return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
838 path->search_commit_root = 1; 839 path->search_commit_root = 1;
839 path->skip_locking = 1; 840 path->skip_locking = 1;
840 841
842 /*
843 * We must pass a path with search_commit_root set to btrfs_iget in
844 * order to avoid a deadlock when allocating extents for the tree root.
845 *
846 * When we are COWing an extent buffer from the tree root, when looking
847 * for a free extent, at extent-tree.c:find_free_extent(), we can find
848 * block group without its free space cache loaded. When we find one
849 * we must load its space cache which requires reading its free space
850 * cache's inode item from the root tree. If this inode item is located
851 * in the same leaf that we started COWing before, then we end up in
852 * deadlock on the extent buffer (trying to read lock it when we
853 * previously write locked it).
854 *
855 * It's safe to read the inode item using the commit root because
856 * block groups, once loaded, stay in memory forever (until they are
857 * removed) as well as their space caches once loaded. New block groups
858 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
859 * we will never try to read their inode item while the fs is mounted.
860 */
841 inode = lookup_free_space_inode(fs_info, block_group, path); 861 inode = lookup_free_space_inode(fs_info, block_group, path);
842 if (IS_ERR(inode)) { 862 if (IS_ERR(inode)) {
843 btrfs_free_path(path); 863 btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d3df5b52278c..9ea4c6f0352f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1531,12 +1531,11 @@ out_check:
1531 } 1531 }
1532 btrfs_release_path(path); 1532 btrfs_release_path(path);
1533 1533
1534 if (cur_offset <= end && cow_start == (u64)-1) { 1534 if (cur_offset <= end && cow_start == (u64)-1)
1535 cow_start = cur_offset; 1535 cow_start = cur_offset;
1536 cur_offset = end;
1537 }
1538 1536
1539 if (cow_start != (u64)-1) { 1537 if (cow_start != (u64)-1) {
1538 cur_offset = end;
1540 ret = cow_file_range(inode, locked_page, cow_start, end, end, 1539 ret = cow_file_range(inode, locked_page, cow_start, end, end,
1541 page_started, nr_written, 1, NULL); 1540 page_started, nr_written, 1, NULL);
1542 if (ret) 1541 if (ret)
@@ -3570,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3570/* 3569/*
3571 * read an inode from the btree into the in-memory inode 3570 * read an inode from the btree into the in-memory inode
3572 */ 3571 */
3573static int btrfs_read_locked_inode(struct inode *inode) 3572static int btrfs_read_locked_inode(struct inode *inode,
3573 struct btrfs_path *in_path)
3574{ 3574{
3575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3576 struct btrfs_path *path; 3576 struct btrfs_path *path = in_path;
3577 struct extent_buffer *leaf; 3577 struct extent_buffer *leaf;
3578 struct btrfs_inode_item *inode_item; 3578 struct btrfs_inode_item *inode_item;
3579 struct btrfs_root *root = BTRFS_I(inode)->root; 3579 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3589,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
3589 if (!ret) 3589 if (!ret)
3590 filled = true; 3590 filled = true;
3591 3591
3592 path = btrfs_alloc_path(); 3592 if (!path) {
3593 if (!path) 3593 path = btrfs_alloc_path();
3594 return -ENOMEM; 3594 if (!path)
3595 return -ENOMEM;
3596 }
3595 3597
3596 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3598 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3597 3599
3598 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3600 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3599 if (ret) { 3601 if (ret) {
3600 btrfs_free_path(path); 3602 if (path != in_path)
3603 btrfs_free_path(path);
3601 return ret; 3604 return ret;
3602 } 3605 }
3603 3606
@@ -3722,7 +3725,8 @@ cache_acl:
3722 btrfs_ino(BTRFS_I(inode)), 3725 btrfs_ino(BTRFS_I(inode)),
3723 root->root_key.objectid, ret); 3726 root->root_key.objectid, ret);
3724 } 3727 }
3725 btrfs_free_path(path); 3728 if (path != in_path)
3729 btrfs_free_path(path);
3726 3730
3727 if (!maybe_acls) 3731 if (!maybe_acls)
3728 cache_no_acl(inode); 3732 cache_no_acl(inode);
@@ -5644,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
5644/* Get an inode object given its location and corresponding root. 5648/* Get an inode object given its location and corresponding root.
5645 * Returns in *is_new if the inode was read from disk 5649 * Returns in *is_new if the inode was read from disk
5646 */ 5650 */
5647struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 5651struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5648 struct btrfs_root *root, int *new) 5652 struct btrfs_root *root, int *new,
5653 struct btrfs_path *path)
5649{ 5654{
5650 struct inode *inode; 5655 struct inode *inode;
5651 5656
@@ -5656,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5656 if (inode->i_state & I_NEW) { 5661 if (inode->i_state & I_NEW) {
5657 int ret; 5662 int ret;
5658 5663
5659 ret = btrfs_read_locked_inode(inode); 5664 ret = btrfs_read_locked_inode(inode, path);
5660 if (!ret) { 5665 if (!ret) {
5661 inode_tree_add(inode); 5666 inode_tree_add(inode);
5662 unlock_new_inode(inode); 5667 unlock_new_inode(inode);
@@ -5678,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5678 return inode; 5683 return inode;
5679} 5684}
5680 5685
5686struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5687 struct btrfs_root *root, int *new)
5688{
5689 return btrfs_iget_path(s, location, root, new, NULL);
5690}
5691
5681static struct inode *new_simple_dir(struct super_block *s, 5692static struct inode *new_simple_dir(struct super_block *s,
5682 struct btrfs_key *key, 5693 struct btrfs_key *key,
5683 struct btrfs_root *root) 5694 struct btrfs_root *root)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3ca6943827ef..802a628e9f7d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
3488 const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize; 3488 const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
3489 3489
3490 len = round_down(i_size_read(src), sz) - loff; 3490 len = round_down(i_size_read(src), sz) - loff;
3491 if (len == 0)
3492 return 0;
3491 olen = len; 3493 olen = len;
3492 } 3494 }
3493 } 3495 }
@@ -4257,9 +4259,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
4257 goto out_unlock; 4259 goto out_unlock;
4258 if (len == 0) 4260 if (len == 0)
4259 olen = len = src->i_size - off; 4261 olen = len = src->i_size - off;
4260 /* if we extend to eof, continue to block boundary */ 4262 /*
4261 if (off + len == src->i_size) 4263 * If we extend to eof, continue to block boundary if and only if the
4264 * destination end offset matches the destination file's size, otherwise
4265 * we would be corrupting data by placing the eof block into the middle
4266 * of a file.
4267 */
4268 if (off + len == src->i_size) {
4269 if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
4270 goto out_unlock;
4262 len = ALIGN(src->i_size, bs) - off; 4271 len = ALIGN(src->i_size, bs) - off;
4272 }
4263 4273
4264 if (len == 0) { 4274 if (len == 0) {
4265 ret = 0; 4275 ret = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b362b45dd757..cbc9d0d2c12d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1916,7 +1916,7 @@ restore:
1916} 1916}
1917 1917
1918/* Used to sort the devices by max_avail(descending sort) */ 1918/* Used to sort the devices by max_avail(descending sort) */
1919static int btrfs_cmp_device_free_bytes(const void *dev_info1, 1919static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
1920 const void *dev_info2) 1920 const void *dev_info2)
1921{ 1921{
1922 if (((struct btrfs_device_info *)dev_info1)->max_avail > 1922 if (((struct btrfs_device_info *)dev_info1)->max_avail >
@@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
1945 * The helper to calc the free space on the devices that can be used to store 1945 * The helper to calc the free space on the devices that can be used to store
1946 * file data. 1946 * file data.
1947 */ 1947 */
1948static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, 1948static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
1949 u64 *free_bytes) 1949 u64 *free_bytes)
1950{ 1950{
1951 struct btrfs_device_info *devices_info; 1951 struct btrfs_device_info *devices_info;
1952 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 1952 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index cab0b1f1f741..efcf89a8ba44 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
440 type != (BTRFS_BLOCK_GROUP_METADATA | 440 type != (BTRFS_BLOCK_GROUP_METADATA |
441 BTRFS_BLOCK_GROUP_DATA)) { 441 BTRFS_BLOCK_GROUP_DATA)) {
442 block_group_err(fs_info, leaf, slot, 442 block_group_err(fs_info, leaf, slot,
443"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx", 443"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
444 type, hweight64(type), 444 type, hweight64(type),
445 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, 445 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
446 BTRFS_BLOCK_GROUP_SYSTEM, 446 BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e07f3376b7df..a5ce99a6c936 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4396 logged_end = end; 4396 logged_end = end;
4397 4397
4398 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 4398 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
4399 /*
4400 * Skip extents outside our logging range. It's important to do
4401 * it for correctness because if we don't ignore them, we may
4402 * log them before their ordered extent completes, and therefore
4403 * we could log them without logging their respective checksums
4404 * (the checksum items are added to the csum tree at the very
4405 * end of btrfs_finish_ordered_io()). Also leave such extents
4406 * outside of our range in the list, since we may have another
4407 * ranged fsync in the near future that needs them. If an extent
4408 * outside our range corresponds to a hole, log it to avoid
4409 * leaving gaps between extents (fsck will complain when we are
4410 * not using the NO_HOLES feature).
4411 */
4412 if ((em->start > end || em->start + em->len <= start) &&
4413 em->block_start != EXTENT_MAP_HOLE)
4414 continue;
4415
4399 list_del_init(&em->list); 4416 list_del_init(&em->list);
4400 /* 4417 /*
4401 * Just an arbitrary number, this can be really CPU intensive 4418 * Just an arbitrary number, this can be really CPU intensive