From c71bf099abddf3e0fdc27f251ba76fca1461d49a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:40 +0000 Subject: Btrfs: Avoid orphan inodes cleanup while replaying log We do log replay in a single transaction, so it's not good to do unbound operations. This patch cleans up orphan inodes cleanup after replaying the log. It also avoids doing other unbound operations such as truncating a file during replaying log. These unbound operations are postponed to the orphan inode cleanup stage. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfcc93c93a7b..975fdd33ac41 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3755,6 +3755,7 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); + btrfs_orphan_cleanup(fs_root); } return err; } -- cgit v1.2.2 From 8082510e7124cc50d728f1b875639cb4e22312cc Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:35:36 +0000 Subject: Btrfs: Make truncate(2) more ENOSPC friendly truncating and deleting regular files are unbound operations, so it's not good to do them in a single transaction. This patch makes btrfs_truncate and btrfs_delete_inode start a new transaction after all items in a tree leaf are deleted. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 975fdd33ac41..f2aa53d2f944 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root, return 0; } +static void put_inodes(struct list_head *list) +{ + struct inodevec *ivec; + while (!list_empty(list)) { + ivec = list_entry(list->next, struct inodevec, list); + list_del(&ivec->list); + while (ivec->nr > 0) { + ivec->nr--; + iput(ivec->inode[ivec->nr]); + } + kfree(ivec); + } +} + static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key) @@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_btree_balance_dirty(root, nr); + /* + * put inodes outside transaction, otherwise we may deadlock. + */ + put_inodes(&inode_list); + if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); } @@ -1752,19 +1771,7 @@ out: btrfs_btree_balance_dirty(root, nr); - /* - * put inodes while we aren't holding the tree locks - */ - while (!list_empty(&inode_list)) { - struct inodevec *ivec; - ivec = list_entry(inode_list.next, struct inodevec, list); - list_del(&ivec->list); - while (ivec->nr > 0) { - ivec->nr--; - iput(ivec->inode[ivec->nr]); - } - kfree(ivec); - } + put_inodes(&inode_list); if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); -- cgit v1.2.2 From 24bbcf0442ee04660a5a030efdbb6d03f1c275cb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:36:34 +0000 Subject: Btrfs: Add delayed iput iput() can trigger new transactions if we are dropping the final reference, so calling it in btrfs_commit_transaction may end up deadlock. This patch adds delayed iput to avoid the issue. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f2aa53d2f944..a9728680eca8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3541,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) (unsigned long long)rc->block_group->key.objectid, (unsigned long long)rc->block_group->flags); - btrfs_start_delalloc_inodes(fs_info->tree_root); - btrfs_wait_ordered_extents(fs_info->tree_root, 0); + btrfs_start_delalloc_inodes(fs_info->tree_root, 0); + btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); while (1) { rc->extents_found = 0; -- cgit v1.2.2 From 2423fdfb96e3f9ff3baeb6c4c78d74145547891d Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 6 Jan 2010 16:57:22 +0000 Subject: Btrfs, fix memory leaks in error paths Stanse found 2 memory leaks in relocate_block_group and __btrfs_map_block. cluster and multi are not freed/assigned on all paths. Fix that. Signed-off-by: Jiri Slaby Cc: linux-btrfs@vger.kernel.org Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a9728680eca8..ed3e4a2ec2c8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3281,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) return -ENOMEM; path = btrfs_alloc_path(); - if (!path) + if (!path) { + kfree(cluster); return -ENOMEM; + } rc->extents_found = 0; rc->extents_skipped = 0; -- cgit v1.2.2 From d7ce5843bb28ada6845ab2ae8510ba3f12d33154 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 2 Feb 2010 08:46:44 +0000 Subject: Btrfs: remove BUG_ON() due to mounting bad filesystem Mounting a bad filesystem caused a BUG_ON(). The following is steps to reproduce it. # mkfs.btrfs /dev/sda2 # mount /dev/sda2 /mnt # mkfs.btrfs /dev/sda1 /dev/sda2 (the program says that /dev/sda2 was mounted, and then exits. ) # umount /mnt # mount /dev/sda1 /mnt At the third step, mkfs.btrfs exited in the way of make filesystem. So the initialization of the filesystem didn't finish. So the filesystem was bad, and it caused BUG_ON() when mounting it. But BUG_ON() should be called by the wrong code, not user's operation, so I think it is a bug of btrfs. This patch fixes it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ed3e4a2ec2c8..ab7ab5318745 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3764,7 +3764,8 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); - btrfs_orphan_cleanup(fs_root); + else + btrfs_orphan_cleanup(fs_root); } return err; } -- cgit v1.2.2 From 6bef4d317193d3badbbfa3f3c593758ace84a629 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 23 Feb 2010 19:43:04 +0000 Subject: Btrfs: use RB_ROOT to intialize rb_trees instead of setting rb_node to NULL btrfs inialize rb trees in quite a number of places by settin rb_node = NULL; The problem with this is that 17d9ddc72fb8bba0d4f678 in the linux-next tree adds a new field to that struct which needs to be NULL for the new rbtree library code to work properly. This patch uses RB_ROOT as the intializer so all of the relevant fields will be NULL'd. Without the patch I get a panic. Signed-off-by: Eric Paris Acked-by: Venkatesh Pallipadi Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ab7ab5318745..0109e5606bad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -170,14 +170,14 @@ struct async_merge { static void mapping_tree_init(struct mapping_tree *tree) { - tree->rb_root.rb_node = NULL; + tree->rb_root = RB_ROOT; spin_lock_init(&tree->lock); } static void backref_cache_init(struct backref_cache *cache) { int i; - cache->rb_root.rb_node = NULL; + cache->rb_root = RB_ROOT; for (i = 0; i < BTRFS_MAX_LEVEL; i++) INIT_LIST_HEAD(&cache->pending[i]); spin_lock_init(&cache->lock); -- cgit v1.2.2 From 73f73415caddbc01d9f10c03e0a677d5b3d11569 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 4 Dec 2009 17:38:27 +0000 Subject: Btrfs: change how we mount subvolumes This work is in preperation for being able to set a different root as the default mounting root. There is currently a problem with how we mount subvolumes. We cannot currently mount a subvolume of a subvolume, you can only mount subvolumes/snapshots of the default subvolume. So say you take a snapshot of the default subvolume and call it snap1, and then take a snapshot of snap1 and call it snap2, so now you have / /snap1 /snap1/snap2 as your available volumes. Currently you can only mount / and /snap1, you cannot mount /snap1/snap2. To fix this problem instead of passing subvolid= you must pass in subvolid=, where is the tree id that gets spit out via the subvolume listing you get from the subvolume listing patches (btrfs filesystem list). This allows us to mount /, /snap1 and /snap1/snap2 as the root volume. In addition to the above, we also now read the default dir item in the tree root to get the root key that it points to. For now this just points at what has always been the default subvolme, but later on I plan to change it to point at whatever root you want to be the new default root, so you can just set the default mount and not have to mount with -o subvolid=. I tested this out with the above scenario and it worked perfectly. Thanks, mount -o subvol operates inside the selected subvolid. For example: mount -o subvol=snap1,subvolid=256 /dev/xxx /mnt /mnt will have the snap1 directory for the subvolume with id 256. mount -o subvol=snap /dev/xxx /mnt /mnt will be the snap directory of whatever the default subvolume is. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0109e5606bad..d52759daa53f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3487,7 +3487,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, key.objectid = objectid; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &key, root); + inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); BTRFS_I(inode)->index_cnt = group->key.objectid; -- cgit v1.2.2 From 2ac55d41b5d6bf49e76bc85db5431240617e2f8f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 3 Feb 2010 19:33:23 +0000 Subject: Btrfs: cache the extent state everywhere we possibly can V2 This patch just goes through and fixes everybody that does lock_extent() blah unlock_extent() to use lock_extent_bits() blah unlock_extent_cached() and pass around a extent_state so we only have to do the searches once per function. This gives me about a 3 mb/s boots on my random write test. I have not converted some things, like the relocation and ioctl's, since they aren't heavily used and the relocation stuff is in the middle of being re-written. I also changed the clear_extent_bit() to only unset the cached state if we are clearing EXTENT_LOCKED and related stuff, so we can do things like this lock_extent_bits() clear delalloc bits unlock_extent_cached() without losing our cached state. I tested this thoroughly and turned on LEAK_DEBUG to make sure we weren't leaking extent states, everything worked out fine. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d52759daa53f..0b23942cbc0d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2659,7 +2659,7 @@ static int relocate_file_extent_cluster(struct inode *inode, EXTENT_BOUNDARY, GFP_NOFS); nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); set_page_dirty(page); dirty_page++; -- cgit v1.2.2 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- fs/btrfs/relocation.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0b23942cbc0d..e558dd941ded 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -- cgit v1.2.2