From 8bb8ab2e93f9c3c9453e13be0f37d344a32a3a6d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Oct 2010 16:52:49 -0400 Subject: Btrfs: rework how we reserve metadata bytes With multi-threaded writes we were getting ENOSPC early because somebody would come in, start flushing delalloc because they couldn't make their reservation, and in the meantime other threads would come in and use the space that was getting freed up, so when the original thread went to check to see if they had space they didn't and they'd return ENOSPC. So instead if we have some free space but not enough for our reservation, take the reservation and then start doing the flushing. The only time we don't take reservations is when we've already overcommitted our space, that way we don't have people who come late to the party way overcommitting ourselves. This also moves all of the retrying and flushing code into reserve_metdata_bytes so it's all uniform. This keeps my fs_mark test from returning -ENOSPC as soon as it starts and actually lets me fill up the disk. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b37d723b9d4a..39adb68a653f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -178,8 +178,6 @@ struct reloc_control { u64 search_start; u64 extents_found; - int block_rsv_retries; - unsigned int stage:8; unsigned int create_reloc_tree:1; unsigned int merge_reloc_tree:1; @@ -2133,7 +2131,6 @@ int prepare_to_merge(struct reloc_control *rc, int err) LIST_HEAD(reloc_roots); u64 num_bytes = 0; int ret; - int retries = 0; mutex_lock(&root->fs_info->trans_mutex); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; @@ -2143,7 +2140,7 @@ again: if (!err) { num_bytes = rc->merging_rsv_size; ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, - num_bytes, &retries); + num_bytes); if (ret) err = ret; } @@ -2155,7 +2152,6 @@ again: btrfs_end_transaction(trans, rc->extent_root); btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); - retries = 0; goto again; } } @@ -2405,15 +2401,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, num_bytes = calcu_metadata_size(rc, node, 1) * 2; trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, - &rc->block_rsv_retries); + ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); if (ret) { if (ret == -EAGAIN) rc->commit_transaction = 1; return ret; } - rc->block_rsv_retries = 0; return 0; } @@ -3554,8 +3548,7 @@ int prepare_to_relocate(struct reloc_control *rc) * is no reservation in transaction handle. */ ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, - rc->extent_root->nodesize * 256, - &rc->block_rsv_retries); + rc->extent_root->nodesize * 256); if (ret) return ret; @@ -3567,7 +3560,6 @@ int prepare_to_relocate(struct reloc_control *rc) rc->extents_found = 0; rc->nodes_relocated = 0; rc->merging_rsv_size = 0; - rc->block_rsv_retries = 0; rc->create_reloc_tree = 1; set_reloc_control(rc); -- cgit v1.2.2 From 0af3d00bad38d3bb9912a60928ad0669f17bdb76 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Jun 2010 14:48:16 -0400 Subject: Btrfs: create special free space cache inode In order to save free space cache, we need an inode to hold the data, and we need a special item to point at the right inode for the right block group. So first, create a special item that will point to the right inode, and the number of extent entries we will have and the number of bitmaps we will have. We truncate and pre-allocate space everytime to make sure it's uptodate. This feature will be turned on as soon as you mount with -o space_cache, however it is safe to boot into old kernels, they will just generate the cache the old fashion way. When you boot back into a newer kernel we will notice that we modified and not the cache and automatically discard the cache. Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b37d723b9d4a..af339eee55b8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -29,6 +29,7 @@ #include "locking.h" #include "btrfs_inode.h" #include "async-thread.h" +#include "free-space-cache.h" /* * backref_node, mapping_node and tree_block start with this @@ -3191,6 +3192,54 @@ static int block_use_full_backref(struct reloc_control *rc, return ret; } +static int delete_block_group_cache(struct btrfs_fs_info *fs_info, + struct inode *inode, u64 ino) +{ + struct btrfs_key key; + struct btrfs_path *path; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_trans_handle *trans; + unsigned long nr; + int ret = 0; + + if (inode) + goto truncate; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + inode = btrfs_iget(fs_info->sb, &key, root, NULL); + if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { + if (inode && !IS_ERR(inode)) + iput(inode); + return -ENOENT; + } + +truncate: + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + trans = btrfs_join_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_free_path(path); + goto out; + } + + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + + btrfs_free_path(path); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); +out: + iput(inode); + return ret; +} + /* * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY * this function scans fs tree to find blocks reference the data extent @@ -3217,15 +3266,27 @@ static int find_data_references(struct reloc_control *rc, int counted; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - ref_root = btrfs_extent_data_ref_root(leaf, ref); ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); ref_offset = btrfs_extent_data_ref_offset(leaf, ref); ref_count = btrfs_extent_data_ref_count(leaf, ref); + /* + * This is an extent belonging to the free space cache, lets just delete + * it and redo the search. + */ + if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { + ret = delete_block_group_cache(rc->extent_root->fs_info, + NULL, ref_objectid); + if (ret != -ENOENT) + return ret; + ret = 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { err = PTR_ERR(root); @@ -3860,6 +3921,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) { struct btrfs_fs_info *fs_info = extent_root->fs_info; struct reloc_control *rc; + struct inode *inode; + struct btrfs_path *path; int ret; int rw = 0; int err = 0; @@ -3882,6 +3945,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) rw = 1; } + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; + goto out; + } + + inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, + path); + btrfs_free_path(path); + + if (!IS_ERR(inode)) + ret = delete_block_group_cache(fs_info, inode, 0); + else + ret = PTR_ERR(inode); + + if (ret && ret != -ENOENT) { + err = ret; + goto out; + } + rc->data_inode = create_reloc_inode(fs_info, rc->block_group); if (IS_ERR(rc->data_inode)) { err = PTR_ERR(rc->data_inode); -- cgit v1.2.2 From 411fc6bcef54f828a5458f4730c68abdf13c6bf0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 29 Oct 2010 15:14:31 -0400 Subject: Btrfs: Fix variables set but not read (bugs found by gcc 4.6) These are all the cases where a variable is set, but not read which are really bugs. - Couple of incorrect error handling fixed. - One incorrect use of a allocation policy - Some other things Still needs more review. Found by gcc 4.6's new warnings. [akpm@linux-foundation.org: fix build. Might have been bitrot] Signed-off-by: Andi Kleen Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fd0714475db7..045c9c2b2d7e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3094,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc, BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); ret = get_ref_objectid_v0(rc, path, extent_key, &ref_owner, NULL); + if (ret < 0) + return ret; BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); level = (int)ref_owner; /* FIXME: get real generation */ @@ -4218,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) btrfs_add_ordered_sum(inode, ordered, sums); } btrfs_put_ordered_extent(ordered); - return 0; + return ret; } void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, -- cgit v1.2.2 From 3612b49598c303cfb22a4b609427f829828e2427 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 25 Jan 2011 02:51:38 +0000 Subject: btrfs: fix return value check of btrfs_join_transaction() The error check of btrfs_join_transaction()/btrfs_join_transaction_nolock() is added, and the mistake of the error check in several places is corrected. For more stable Btrfs, I think that we should reduce BUG_ON(). But, I think that long time is necessary for this. So, I propose this patch as a short-term solution. With this patch: - To more stable Btrfs, the part that should be corrected is clarified. - The panic isn't done by the NULL pointer reference etc. (even if BUG_ON() is increased temporarily) - The error code is returned in the place where the error can be easily returned. As a long-term plan: - BUG_ON() is reduced by using the forced-readonly framework, etc. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 045c9c2b2d7e..ea9965430241 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2147,6 +2147,12 @@ again: } trans = btrfs_join_transaction(rc->extent_root, 1); + if (IS_ERR(trans)) { + if (!err) + btrfs_block_rsv_release(rc->extent_root, + rc->block_rsv, num_bytes); + return PTR_ERR(trans); + } if (!err) { if (num_bytes != rc->merging_rsv_size) { @@ -3222,6 +3228,7 @@ truncate: trans = btrfs_join_transaction(root, 0); if (IS_ERR(trans)) { btrfs_free_path(path); + ret = PTR_ERR(trans); goto out; } @@ -3628,6 +3635,7 @@ int prepare_to_relocate(struct reloc_control *rc) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); + BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; } @@ -3804,7 +3812,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); btrfs_free_path(path); @@ -4125,6 +4136,11 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + err = PTR_ERR(trans); + goto out_free; + } rc->merge_reloc_tree = 1; @@ -4154,9 +4170,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); -out: + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); +out_free: kfree(rc); +out: while (!list_empty(&reloc_roots)) { reloc_root = list_entry(reloc_roots.next, struct btrfs_root, root_list); -- cgit v1.2.2 From 98d5dc13e7e74b77ca3b4c3cbded9f48d2dbbbb7 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 20 Jan 2011 06:19:37 +0000 Subject: btrfs: fix return value check of btrfs_start_transaction() The error check of btrfs_start_transaction() is added, and the mistake of the error check on several places is corrected. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ea9965430241..1f5556acb530 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2028,6 +2028,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, while (1) { trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); trans->block_rsv = rc->block_rsv; ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, @@ -3665,6 +3666,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) while (1) { trans = btrfs_start_transaction(rc->extent_root, 0); + BUG_ON(IS_ERR(trans)); if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); @@ -4033,6 +4035,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) int ret; trans = btrfs_start_transaction(root->fs_info->tree_root, 0); + BUG_ON(IS_ERR(trans)); memset(&root->root_item.drop_progress, 0, sizeof(root->root_item.drop_progress)); -- cgit v1.2.2 From 6848ad6461e551849ba3c32d945d4f45e96453a6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 14 Feb 2011 16:00:03 -0500 Subject: Btrfs: Fix balance panic Mark the cloned backref_node as checked in clone_backref_node() Signed-off-by: Yan, Zheng Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 1f5556acb530..0825e4ed9447 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, new_node->bytenr = dest->node->start; new_node->level = node->level; new_node->lowest = node->lowest; + new_node->checked = 1; new_node->root = dest; if (!node->lowest) { -- cgit v1.2.2 From c87f08ca44e83b2c8d28f63f9c33f3a270a04bbe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Feb 2011 13:57:04 -0500 Subject: Btrfs: allow balance to explicitly allocate chunks as it relocates Btrfs device shrinking and balancing ends up reallocating all the blocks in order to allow COW to move them to new destinations. It is somewhat awkward in terms of ENOSPC because most of the enospc code is built around the idea that some operation on a reference counted tree triggers allocations in the non-reference counted trees. This commit changes the balancing code to deal with enospc by trying to allocate a new chunk. If that allocation succeeds, we go ahead and retry whatever failed due to enospc. Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0825e4ed9447..31ade5802ae8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) u32 item_size; int ret; int err = 0; + int progress = 0; path = btrfs_alloc_path(); if (!path) @@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } while (1) { + progress++; trans = btrfs_start_transaction(rc->extent_root, 0); BUG_ON(IS_ERR(trans)); - +restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); continue; @@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } } } + if (trans && progress && err == -ENOSPC) { + ret = btrfs_force_chunk_alloc(trans, rc->extent_root, + rc->block_group->flags); + if (ret == 0) { + err = 0; + progress = 0; + goto restart; + } + } btrfs_release_path(rc->extent_root, path); clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, -- cgit v1.2.2 From 66b4ffd110f9b48b8d8c1319ee446b53b8d073bf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 16:22:42 -0500 Subject: Btrfs: handle errors in btrfs_orphan_cleanup If we cannot truncate an inode for some reason we will never delete the orphan item associated with that inode, which means that we will loop forever in btrfs_orphan_cleanup. Instead of doing this just return error so we fail to mount. It sucks, but hey it's better than hanging. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 31ade5802ae8..c863c8447015 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4209,7 +4209,7 @@ out: if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); else - btrfs_orphan_cleanup(fs_root); + err = btrfs_orphan_cleanup(fs_root); } return err; } -- cgit v1.2.2 From 97d9a8a420444eb5b5c071d4b3b9c4100a7ae015 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 24 Mar 2011 06:33:21 +0000 Subject: Btrfs: check return value of read_tree_block() This patch is checking return value of read_tree_block(), and if it is NULL, error processing. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c863c8447015..58250e09eb05 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1724,6 +1724,7 @@ again: eb = read_tree_block(dest, old_bytenr, blocksize, old_ptr_gen); + BUG_ON(!eb); btrfs_tree_lock(eb); if (cow) { ret = btrfs_cow_block(trans, dest, eb, parent, @@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = btrfs_level_size(root, node->level); generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); + if (!eb) { + err = -EIO; + goto next; + } btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); @@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); + BUG_ON(!eb); WARN_ON(btrfs_header_level(eb) != block->level); if (block->level == 0) btrfs_item_key_to_cpu(eb, &block->key, 0); -- cgit v1.2.2 From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 58250e09eb05..199a80134312 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2346,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, root = next->root; BUG_ON(!root); - /* no other choice for non-refernce counted tree */ + /* no other choice for non-references counted tree */ if (!root->ref_cows) return root; -- cgit v1.2.2 From 581bb050941b4f220f84d3e5ed6dace3d42dd382 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:06:11 +0800 Subject: Btrfs: Cache free inode numbers in memory Currently btrfs stores the highest objectid of the fs tree, and it always returns (highest+1) inode number when we create a file, so inode numbers won't be reclaimed when we delete files, so we'll run out of inode numbers as we keep create/delete files in 32bits machines. This fixes it, and it works similarly to how we cache free space in block cgroups. We start a kernel thread to read the file tree. By scanning inode items, we know which chunks of inode numbers are free, and we cache them in an rb-tree. Because we are searching the commit root, we have to carefully handle the cross-transaction case. The rb-tree is a hybrid extent+bitmap tree, so if we have too many small chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram of extents, and a bitmap will be used if we exceed this threshold. The extents threshold is adjusted in runtime. Signed-off-by: Li Zefan --- fs/btrfs/relocation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 58250e09eb05..e6cb89357256 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -30,6 +30,7 @@ #include "btrfs_inode.h" #include "async-thread.h" #include "free-space-cache.h" +#include "inode-map.h" /* * backref_node, mapping_node and tree_block start with this @@ -3897,7 +3898,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, if (IS_ERR(trans)) return ERR_CAST(trans); - err = btrfs_find_free_objectid(trans, root, objectid, &objectid); + err = btrfs_find_free_objectid(root, &objectid); if (err) goto out; -- cgit v1.2.2 From 33345d01522f8152f99dc84a3e7a1a45707f387f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:31:50 +0800 Subject: Btrfs: Always use 64bit inode number There's a potential problem in 32bit system when we exhaust 32bit inode numbers and start to allocate big inode numbers, because btrfs uses inode->i_ino in many places. So here we always use BTRFS_I(inode)->location.objectid, which is an u64 variable. There are 2 exceptions that BTRFS_I(inode)->location.objectid != inode->i_ino: the btree inode (0 vs 1) and empty subvol dirs (256 vs 2), and inode->i_ino will be used in those cases. Another reason to make this change is I'm going to use a special inode to save free ino cache, and the inode number must be > (u64)-256. Signed-off-by: Li Zefan --- fs/btrfs/relocation.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e6cb89357256..7b75e0c8ef8d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1410,9 +1410,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < entry->vfs_inode.i_ino) + if (objectid < btrfs_ino(&entry->vfs_inode)) node = node->rb_left; - else if (objectid > entry->vfs_inode.i_ino) + else if (objectid > btrfs_ino(&entry->vfs_inode)) node = node->rb_right; else break; @@ -1420,7 +1420,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= entry->vfs_inode.i_ino) { + if (objectid <= btrfs_ino(&entry->vfs_inode)) { node = prev; break; } @@ -1435,7 +1435,7 @@ again: return inode; } - objectid = entry->vfs_inode.i_ino + 1; + objectid = btrfs_ino(&entry->vfs_inode) + 1; if (cond_resched_lock(&root->inode_lock)) goto again; @@ -1471,7 +1471,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, return -ENOMEM; bytenr -= BTRFS_I(reloc_inode)->index_cnt; - ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), bytenr, 0); if (ret < 0) goto out; @@ -1559,11 +1559,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans, if (first) { inode = find_next_inode(root, key.objectid); first = 0; - } else if (inode && inode->i_ino < key.objectid) { + } else if (inode && btrfs_ino(inode) < key.objectid) { btrfs_add_delayed_iput(inode); inode = find_next_inode(root, key.objectid); } - if (inode && inode->i_ino == key.objectid) { + if (inode && btrfs_ino(inode) == key.objectid) { end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); WARN_ON(!IS_ALIGNED(key.offset, @@ -1894,6 +1894,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, struct inode *inode = NULL; u64 objectid; u64 start, end; + u64 ino; objectid = min_key->objectid; while (1) { @@ -1906,17 +1907,18 @@ static int invalidate_extent_cache(struct btrfs_root *root, inode = find_next_inode(root, objectid); if (!inode) break; + ino = btrfs_ino(inode); - if (inode->i_ino > max_key->objectid) { + if (ino > max_key->objectid) { iput(inode); break; } - objectid = inode->i_ino + 1; + objectid = ino + 1; if (!S_ISREG(inode->i_mode)) continue; - if (unlikely(min_key->objectid == inode->i_ino)) { + if (unlikely(min_key->objectid == ino)) { if (min_key->type > BTRFS_EXTENT_DATA_KEY) continue; if (min_key->type < BTRFS_EXTENT_DATA_KEY) @@ -1929,7 +1931,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, start = 0; } - if (unlikely(max_key->objectid == inode->i_ino)) { + if (unlikely(max_key->objectid == ino)) { if (max_key->type < BTRFS_EXTENT_DATA_KEY) continue; if (max_key->type > BTRFS_EXTENT_DATA_KEY) { -- cgit v1.2.2 From c704005d886cf0bc9bc3974eb009b22fe0da32c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 19 Apr 2011 18:00:01 +0200 Subject: btrfs: unify checking of IS_ERR and null use IS_ERR_OR_NULL when possible, done by this coccinelle script: @ match @ identifier id; @@ ( - BUG_ON(IS_ERR(id) || !id); + BUG_ON(IS_ERR_OR_NULL(id)); | - IS_ERR(id) || !id + IS_ERR_OR_NULL(id) | - !id || IS_ERR(id) + IS_ERR_OR_NULL(id) ) Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a80134312..fed0aaec0753 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3220,7 +3220,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, key.offset = 0; inode = btrfs_iget(fs_info->sb, &key, root, NULL); - if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { + if (IS_ERR_OR_NULL(inode) || is_bad_inode(inode)) { if (inode && !IS_ERR(inode)) iput(inode); return -ENOENT; -- cgit v1.2.2 From f993c883ad8e111fb9e9ae603540acbe94f7246c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 20 Apr 2011 23:35:57 +0200 Subject: btrfs: drop unused argument from extent_io_tree_init all callers pass GFP_NOFS, but the GFP mask argument is not used in the function; GFP_ATOMIC is passed to radix tree initialization and it's the only correct one, since we're using the preload/insert mechanism of radix tree. Let's drop the gfp mask from btrfs function, this will not change behaviour. Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fed0aaec0753..f3edf45317bc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3935,7 +3935,7 @@ static struct reloc_control *alloc_reloc_control(void) INIT_LIST_HEAD(&rc->reloc_roots); backref_cache_init(&rc->backref_cache); mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); + extent_io_tree_init(&rc->processed_blocks, NULL); return rc; } -- cgit v1.2.2 From 172ddd60a662c4d8bf2809462866ddddd6431ea5 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 00:48:27 +0200 Subject: btrfs: drop gfp parameter from alloc_extent_map pass GFP_NOFS directly to kmem_cache_alloc Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f3edf45317bc..2097a88f60aa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2870,7 +2870,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, struct extent_map *em; int ret = 0; - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); if (!em) return -ENOMEM; -- cgit v1.2.2 From b3b4aa74b58bded927f579fff787fb6fa1c0393c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 01:20:15 +0200 Subject: btrfs: drop unused parameter from btrfs_release_path parameter tree root it's not used since commit 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee ("Btrfs: Create extent_buffer interface for large blocksizes") Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 2097a88f60aa..f7b799b151aa 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -961,7 +961,7 @@ again: lower = upper; upper = NULL; } - btrfs_release_path(root, path2); + btrfs_release_path(path2); next: if (ptr < end) { ptr += btrfs_extent_inline_ref_size(key.type); @@ -974,7 +974,7 @@ next: if (ptr >= end) path1->slots[0]++; } - btrfs_release_path(rc->extent_root, path1); + btrfs_release_path(path1); cur->checked = 1; WARN_ON(exist); @@ -1749,7 +1749,7 @@ again: btrfs_node_key_to_cpu(path->nodes[level], &key, path->slots[level]); - btrfs_release_path(src, path); + btrfs_release_path(path); path->lowest_level = level; ret = btrfs_search_slot(trans, src, &key, path, 0, 1); @@ -2496,7 +2496,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, path->locks[upper->level] = 0; slot = path->slots[upper->level]; - btrfs_release_path(NULL, path); + btrfs_release_path(path); } else { ret = btrfs_bin_search(upper->eb, key, upper->level, &slot); @@ -2737,7 +2737,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, } else { path->lowest_level = node->level; ret = btrfs_search_slot(trans, root, key, path, 0, 1); - btrfs_release_path(root, path); + btrfs_release_path(path); if (ret > 0) ret = 0; } @@ -3119,7 +3119,7 @@ static int add_tree_block(struct reloc_control *rc, #endif } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); BUG_ON(level == -1); @@ -3505,7 +3505,7 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); if (err) free_block_list(blocks); return err; @@ -3568,7 +3568,7 @@ next: EXTENT_DIRTY); if (ret == 0 && start <= key.objectid) { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); rc->search_start = end + 1; } else { rc->search_start = key.objectid + key.offset; @@ -3576,7 +3576,7 @@ next: return 0; } } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); return ret; } @@ -3713,7 +3713,7 @@ restart: flags = BTRFS_EXTENT_FLAG_DATA; if (path_change) { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); path->search_commit_root = 1; path->skip_locking = 1; @@ -3736,7 +3736,7 @@ restart: (flags & BTRFS_EXTENT_FLAG_DATA)) { ret = add_data_references(rc, &key, path, &blocks); } else { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); ret = 0; } if (ret < 0) { @@ -3799,7 +3799,7 @@ restart: } } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, GFP_NOFS); @@ -3867,7 +3867,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); + btrfs_release_path(path); out: btrfs_free_path(path); return ret; @@ -4109,7 +4109,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || key.type != BTRFS_ROOT_ITEM_KEY) @@ -4141,7 +4141,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) key.offset--; } - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); if (list_empty(&reloc_roots)) goto out; -- cgit v1.2.2 From f2a97a9dbd86eb1ef956bdf20e05c507b32beb96 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 5 May 2011 12:44:41 +0200 Subject: btrfs: remove all unused functions Remove static and global declarations and/or definitions. Reduces size of btrfs.ko by ~3.4kB. text data bss dec hex filename 402081 7464 200 409745 64091 btrfs.ko.base 398620 7144 200 405964 631cc btrfs.ko.remove-all Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f7b799b151aa..f726e72dd362 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -507,6 +507,7 @@ static int update_backref_cache(struct btrfs_trans_handle *trans, return 1; } + static int should_ignore_root(struct btrfs_root *root) { struct btrfs_root *reloc_root; @@ -529,7 +530,6 @@ static int should_ignore_root(struct btrfs_root *root) */ return 1; } - /* * find reloc tree by address of tree root */ -- cgit v1.2.2 From 70f23fd66bc821a0e99647f70a809e277cc93c4c Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Tue, 10 May 2011 10:16:21 +0200 Subject: treewide: fix a few typos in comments - kenrel -> kernel - whetehr -> whether - ttt -> tt - sss -> ss Signed-off-by: Justin P. Mattock Signed-off-by: Jiri Kosina --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a80134312..f340f7c99d09 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -709,7 +709,7 @@ again: WARN_ON(cur->checked); if (!list_empty(&cur->upper)) { /* - * the backref was added previously when processsing + * the backref was added previously when processing * backref of type BTRFS_TREE_BLOCK_REF_KEY */ BUG_ON(!list_is_singular(&cur->upper)); -- cgit v1.2.2 From a2de733c78fa7af51ba9670482fa7d392aa67c57 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 8 Mar 2011 14:14:00 +0100 Subject: btrfs: scrub This adds an initial implementation for scrub. It works quite straightforward. The usermode issues an ioctl for each device in the fs. For each device, it enumerates the allocated device chunks. For each chunk, the contained extents are enumerated and the data checksums fetched. The extents are read sequentially and the checksums verified. If an error occurs (checksum or EIO), a good copy is searched for. If one is found, the bad copy will be rewritten. All enumerations happen from the commit roots. During a transaction commit, the scrubs get paused and afterwards continue from the new roots. This commit is based on the series originally posted to linux-btrfs with some improvements that resulted from comments from David Sterba, Ilya Dryomov and Jan Schmidt. Signed-off-by: Arne Jansen --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 58250e09eb05..db1dffa9952b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4242,7 +4242,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, - disk_bytenr + len - 1, &list); + disk_bytenr + len - 1, &list, 0); while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); -- cgit v1.2.2 From 7a7eaa40a39bde4eefc91aadeb1ce3dc4e6a1252 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:54:33 -0400 Subject: Btrfs: take away the num_items argument from btrfs_join_transaction I keep forgetting that btrfs_join_transaction() just ignores the num_items argument, which leads me to sending pointless patches and looking stupid :). So just kill the num_items argument from btrfs_join_transaction and btrfs_start_ioctl_transaction, since neither of them use it. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a80134312..8bb256667f2d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2149,7 +2149,7 @@ again: err = ret; } - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { if (!err) btrfs_block_rsv_release(rc->extent_root, @@ -3233,7 +3233,7 @@ truncate: goto out; } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { btrfs_free_path(path); ret = PTR_ERR(trans); @@ -3642,7 +3642,7 @@ int prepare_to_relocate(struct reloc_control *rc) rc->create_reloc_tree = 1; set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; @@ -3831,7 +3831,7 @@ restart: btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); /* get rid of pinned extents */ - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else @@ -4156,7 +4156,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { unset_reloc_control(rc); err = PTR_ERR(trans); @@ -4190,7 +4190,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else -- cgit v1.2.2 From a4abeea41adfa3c143c289045f4625dfaeba2212 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 17:25:13 -0400 Subject: Btrfs: kill trans_mutex We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8bb256667f2d..09c30d37d43e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); list_splice_init(&rc->reloc_roots, &reloc_roots); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); while (!list_empty(&reloc_roots)) { found = 1; @@ -3583,17 +3583,17 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = rc; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = NULL; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static int check_extent_flags(u64 flags) -- cgit v1.2.2 From 026fd317828500524cdc7e5ff9e8e7923abb2868 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 10:32:11 -0400 Subject: Btrfs: don't always do readahead Our readahead is sort of sloppy, and really isn't always needed. For example if ls is doing a stating ls (which is the default) it's going to stat in non-disk order, so if say you have a directory with a stupid amount of files, readahead is going to do nothing but waste time in the case of doing the stat. Taking the unconditional readahead out made my test go from 57 minutes to 36 minutes. This means that everywhere we do loop through the tree we want to make sure we do set path->reada properly, so I went through and found all of the places where we loop through the path and set reada to 1. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/relocation.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 09c30d37d43e..5872b41581f4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -676,6 +676,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } + path1->reada = 1; + path2->reada = 2; node = alloc_backref_node(cache); if (!node) { @@ -1996,6 +1998,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -3297,6 +3300,7 @@ static int find_data_references(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { @@ -3665,6 +3669,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; ret = prepare_to_relocate(rc); if (ret) { @@ -4090,6 +4095,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = -1; key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; -- cgit v1.2.2 From 7585717f304f5ed005cc4ad933a69aab3efbd136 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 13 Jun 2011 20:00:16 -0400 Subject: Btrfs: fix relocation races The recent commit to get rid of our trans_mutex introduced some races with block group relocation. The problem is that relocation needs to do some record keeping about each root, and it was relying on the transaction mutex to coordinate things in subtle ways. This fix adds a mutex just for the relocation code and makes sure it doesn't have a big impact on normal operations. The race is really fixed in btrfs_record_root_in_trans, which is where we step back and wait for the relocation code to finish accounting setup. Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f25b10a22a0a..086b1e6b8614 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, int ret; if (!root->reloc_root) - return 0; + goto out; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, ret = btrfs_update_root(trans, root->fs_info->tree_root, &reloc_root->root_key, root_item); BUG_ON(ret); + +out: return 0; } @@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - spin_lock(&root->fs_info->trans_lock); + mutex_lock(&root->fs_info->reloc_mutex); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); + again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - spin_lock(&root->fs_info->trans_lock); + + /* + * this serializes us with btrfs_record_root_in_transaction, + * we have to make sure nobody is in the middle of + * adding their roots to the list while we are + * doing this splice + */ + mutex_lock(&root->fs_info->reloc_mutex); list_splice_init(&rc->reloc_roots, &reloc_roots); - spin_unlock(&root->fs_info->trans_lock); + mutex_unlock(&root->fs_info->reloc_mutex); while (!list_empty(&reloc_roots)) { found = 1; @@ -3590,17 +3600,19 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - spin_lock(&fs_info->trans_lock); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = rc; - spin_unlock(&fs_info->trans_lock); + mutex_unlock(&fs_info->reloc_mutex); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - spin_lock(&fs_info->trans_lock); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = NULL; - spin_unlock(&fs_info->trans_lock); + mutex_unlock(&fs_info->reloc_mutex); } static int check_extent_flags(u64 flags) -- cgit v1.2.2