aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2009-11-12 04:34:40 -0500
committerChris Mason <chris.mason@oracle.com>2009-12-17 12:33:33 -0500
commitc71bf099abddf3e0fdc27f251ba76fca1461d49a (patch)
tree0b682ad2b49aeaf9ac774c40be4b7549c1d079fe
parentc216775458a2ee345d9412a2770c2916acfb5d30 (diff)
Btrfs: Avoid orphan inodes cleanup while replaying log
We do log replay in a single transaction, so it's not good to do unbound operations. This patch cleans up orphan inodes cleanup after replaying the log. It also avoids doing other unbound operations such as truncating a file during replaying log. These unbound operations are postponed to the orphan inode cleanup stage. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/inode.c19
-rw-r--r--fs/btrfs/relocation.c1
-rw-r--r--fs/btrfs/tree-log.c49
5 files changed, 55 insertions, 36 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ae5b0aaa9386..fcfbefbbb685 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -859,8 +859,9 @@ struct btrfs_fs_info {
859 struct mutex ordered_operations_mutex; 859 struct mutex ordered_operations_mutex;
860 struct rw_semaphore extent_commit_sem; 860 struct rw_semaphore extent_commit_sem;
861 861
862 struct rw_semaphore subvol_sem; 862 struct rw_semaphore cleanup_work_sem;
863 863
864 struct rw_semaphore subvol_sem;
864 struct srcu_struct subvol_srcu; 865 struct srcu_struct subvol_srcu;
865 866
866 struct list_head trans_list; 867 struct list_head trans_list;
@@ -1034,12 +1035,12 @@ struct btrfs_root {
1034 int ref_cows; 1035 int ref_cows;
1035 int track_dirty; 1036 int track_dirty;
1036 int in_radix; 1037 int in_radix;
1038 int clean_orphans;
1037 1039
1038 u64 defrag_trans_start; 1040 u64 defrag_trans_start;
1039 struct btrfs_key defrag_progress; 1041 struct btrfs_key defrag_progress;
1040 struct btrfs_key defrag_max; 1042 struct btrfs_key defrag_max;
1041 int defrag_running; 1043 int defrag_running;
1042 int defrag_level;
1043 char *name; 1044 char *name;
1044 int in_sysfs; 1045 int in_sysfs;
1045 1046
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 101940fab9b3..c1e59e33f020 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
892 root->stripesize = stripesize; 892 root->stripesize = stripesize;
893 root->ref_cows = 0; 893 root->ref_cows = 0;
894 root->track_dirty = 0; 894 root->track_dirty = 0;
895 root->in_radix = 0;
896 root->clean_orphans = 0;
895 897
896 root->fs_info = fs_info; 898 root->fs_info = fs_info;
897 root->objectid = objectid; 899 root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
928 root->defrag_trans_start = fs_info->generation; 930 root->defrag_trans_start = fs_info->generation;
929 init_completion(&root->kobj_unregister); 931 init_completion(&root->kobj_unregister);
930 root->defrag_running = 0; 932 root->defrag_running = 0;
931 root->defrag_level = 0;
932 root->root_key.objectid = objectid; 933 root->root_key.objectid = objectid;
933 root->anon_super.s_root = NULL; 934 root->anon_super.s_root = NULL;
934 root->anon_super.s_dev = 0; 935 root->anon_super.s_dev = 0;
@@ -1210,8 +1211,10 @@ again:
1210 ret = radix_tree_insert(&fs_info->fs_roots_radix, 1211 ret = radix_tree_insert(&fs_info->fs_roots_radix,
1211 (unsigned long)root->root_key.objectid, 1212 (unsigned long)root->root_key.objectid,
1212 root); 1213 root);
1213 if (ret == 0) 1214 if (ret == 0) {
1214 root->in_radix = 1; 1215 root->in_radix = 1;
1216 root->clean_orphans = 1;
1217 }
1215 spin_unlock(&fs_info->fs_roots_radix_lock); 1218 spin_unlock(&fs_info->fs_roots_radix_lock);
1216 radix_tree_preload_end(); 1219 radix_tree_preload_end();
1217 if (ret) { 1220 if (ret) {
@@ -1225,10 +1228,6 @@ again:
1225 ret = btrfs_find_dead_roots(fs_info->tree_root, 1228 ret = btrfs_find_dead_roots(fs_info->tree_root,
1226 root->root_key.objectid); 1229 root->root_key.objectid);
1227 WARN_ON(ret); 1230 WARN_ON(ret);
1228
1229 if (!(fs_info->sb->s_flags & MS_RDONLY))
1230 btrfs_orphan_cleanup(root);
1231
1232 return root; 1231 return root;
1233fail: 1232fail:
1234 free_fs_root(root); 1233 free_fs_root(root);
@@ -1689,6 +1688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 mutex_init(&fs_info->cleaner_mutex); 1688 mutex_init(&fs_info->cleaner_mutex);
1690 mutex_init(&fs_info->volume_mutex); 1689 mutex_init(&fs_info->volume_mutex);
1691 init_rwsem(&fs_info->extent_commit_sem); 1690 init_rwsem(&fs_info->extent_commit_sem);
1691 init_rwsem(&fs_info->cleanup_work_sem);
1692 init_rwsem(&fs_info->subvol_sem); 1692 init_rwsem(&fs_info->subvol_sem);
1693 1693
1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1694 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2388,6 +2388,11 @@ int btrfs_commit_super(struct btrfs_root *root)
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2388 mutex_lock(&root->fs_info->cleaner_mutex);
2389 btrfs_clean_old_snapshots(root); 2389 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2390 mutex_unlock(&root->fs_info->cleaner_mutex);
2391
2392 /* wait until ongoing cleanup work done */
2393 down_write(&root->fs_info->cleanup_work_sem);
2394 up_write(&root->fs_info->cleanup_work_sem);
2395
2391 trans = btrfs_start_transaction(root, 1); 2396 trans = btrfs_start_transaction(root, 1);
2392 ret = btrfs_commit_transaction(trans, root); 2397 ret = btrfs_commit_transaction(trans, root);
2393 BUG_ON(ret); 2398 BUG_ON(ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa57247887e3..eb2db3bde236 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2093,16 +2093,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2093 struct inode *inode; 2093 struct inode *inode;
2094 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2094 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2095 2095
2096 path = btrfs_alloc_path(); 2096 if (!xchg(&root->clean_orphans, 0))
2097 if (!path)
2098 return; 2097 return;
2098
2099 path = btrfs_alloc_path();
2100 BUG_ON(!path);
2099 path->reada = -1; 2101 path->reada = -1;
2100 2102
2101 key.objectid = BTRFS_ORPHAN_OBJECTID; 2103 key.objectid = BTRFS_ORPHAN_OBJECTID;
2102 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); 2104 btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
2103 key.offset = (u64)-1; 2105 key.offset = (u64)-1;
2104 2106
2105
2106 while (1) { 2107 while (1) {
2107 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2108 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2108 if (ret < 0) { 2109 if (ret < 0) {
@@ -3298,6 +3299,11 @@ void btrfs_delete_inode(struct inode *inode)
3298 } 3299 }
3299 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3300 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3300 3301
3302 if (root->fs_info->log_root_recovering) {
3303 BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
3304 goto no_delete;
3305 }
3306
3301 if (inode->i_nlink > 0) { 3307 if (inode->i_nlink > 0) {
3302 BUG_ON(btrfs_root_refs(&root->root_item) != 0); 3308 BUG_ON(btrfs_root_refs(&root->root_item) != 0);
3303 goto no_delete; 3309 goto no_delete;
@@ -3705,6 +3711,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3705 } 3711 }
3706 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3712 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3707 3713
3714 if (root != sub_root) {
3715 down_read(&root->fs_info->cleanup_work_sem);
3716 if (!(inode->i_sb->s_flags & MS_RDONLY))
3717 btrfs_orphan_cleanup(sub_root);
3718 up_read(&root->fs_info->cleanup_work_sem);
3719 }
3720
3708 return inode; 3721 return inode;
3709} 3722}
3710 3723
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7b..975fdd33ac41 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3755,6 +3755,7 @@ out:
3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3755 BTRFS_DATA_RELOC_TREE_OBJECTID);
3756 if (IS_ERR(fs_root)) 3756 if (IS_ERR(fs_root))
3757 err = PTR_ERR(fs_root); 3757 err = PTR_ERR(fs_root);
3758 btrfs_orphan_cleanup(fs_root);
3758 } 3759 }
3759 return err; 3760 return err;
3760} 3761}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6bbaa10bb67e..4a9434b622ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -930,6 +930,17 @@ out_nowrite:
930 return 0; 930 return 0;
931} 931}
932 932
933static int insert_orphan_item(struct btrfs_trans_handle *trans,
934 struct btrfs_root *root, u64 offset)
935{
936 int ret;
937 ret = btrfs_find_orphan_item(root, offset);
938 if (ret > 0)
939 ret = btrfs_insert_orphan_item(trans, root, offset);
940 return ret;
941}
942
943
933/* 944/*
934 * There are a few corners where the link count of the file can't 945 * There are a few corners where the link count of the file can't
935 * be properly maintained during replay. So, instead of adding 946 * be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
997 } 1008 }
998 BTRFS_I(inode)->index_cnt = (u64)-1; 1009 BTRFS_I(inode)->index_cnt = (u64)-1;
999 1010
1000 if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { 1011 if (inode->i_nlink == 0) {
1001 ret = replay_dir_deletes(trans, root, NULL, path, 1012 if (S_ISDIR(inode->i_mode)) {
1002 inode->i_ino, 1); 1013 ret = replay_dir_deletes(trans, root, NULL, path,
1014 inode->i_ino, 1);
1015 BUG_ON(ret);
1016 }
1017 ret = insert_orphan_item(trans, root, inode->i_ino);
1003 BUG_ON(ret); 1018 BUG_ON(ret);
1004 } 1019 }
1005 btrfs_free_path(path); 1020 btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1587 /* inode keys are done during the first stage */ 1602 /* inode keys are done during the first stage */
1588 if (key.type == BTRFS_INODE_ITEM_KEY && 1603 if (key.type == BTRFS_INODE_ITEM_KEY &&
1589 wc->stage == LOG_WALK_REPLAY_INODES) { 1604 wc->stage == LOG_WALK_REPLAY_INODES) {
1590 struct inode *inode;
1591 struct btrfs_inode_item *inode_item; 1605 struct btrfs_inode_item *inode_item;
1592 u32 mode; 1606 u32 mode;
1593 1607
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1603 eb, i, &key); 1617 eb, i, &key);
1604 BUG_ON(ret); 1618 BUG_ON(ret);
1605 1619
1606 /* for regular files, truncate away 1620 /* for regular files, make sure corresponding
1607 * extents past the new EOF 1621 * orhpan item exist. extents past the new EOF
1622 * will be truncated later by orphan cleanup.
1608 */ 1623 */
1609 if (S_ISREG(mode)) { 1624 if (S_ISREG(mode)) {
1610 inode = read_one_inode(root, 1625 ret = insert_orphan_item(wc->trans, root,
1611 key.objectid); 1626 key.objectid);
1612 BUG_ON(!inode);
1613
1614 ret = btrfs_truncate_inode_items(wc->trans,
1615 root, inode, inode->i_size,
1616 BTRFS_EXTENT_DATA_KEY);
1617 BUG_ON(ret); 1627 BUG_ON(ret);
1618
1619 /* if the nlink count is zero here, the iput
1620 * will free the inode. We bump it to make
1621 * sure it doesn't get freed until the link
1622 * count fixup is done
1623 */
1624 if (inode->i_nlink == 0) {
1625 btrfs_inc_nlink(inode);
1626 btrfs_update_inode(wc->trans,
1627 root, inode);
1628 }
1629 iput(inode);
1630 } 1628 }
1629
1631 ret = link_to_fixup_dir(wc->trans, root, 1630 ret = link_to_fixup_dir(wc->trans, root,
1632 path, key.objectid); 1631 path, key.objectid);
1633 BUG_ON(ret); 1632 BUG_ON(ret);