diff options
author | Yan, Zheng <zheng.yan@oracle.com> | 2009-11-12 04:34:40 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-12-17 12:33:33 -0500 |
commit | c71bf099abddf3e0fdc27f251ba76fca1461d49a (patch) | |
tree | 0b682ad2b49aeaf9ac774c40be4b7549c1d079fe | |
parent | c216775458a2ee345d9412a2770c2916acfb5d30 (diff) |
Btrfs: Avoid orphan inodes cleanup while replaying log
We do log replay in a single transaction, so it's not good to do unbound
operations. This patch cleans up orphan inodes cleanup after replaying
the log. It also avoids doing other unbound operations such as truncating
a file during replaying log. These unbound operations are postponed to
the orphan inode cleanup stage.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 17 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 19 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 1 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 49 |
5 files changed, 55 insertions, 36 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ae5b0aaa9386..fcfbefbbb685 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -859,8 +859,9 @@ struct btrfs_fs_info { | |||
859 | struct mutex ordered_operations_mutex; | 859 | struct mutex ordered_operations_mutex; |
860 | struct rw_semaphore extent_commit_sem; | 860 | struct rw_semaphore extent_commit_sem; |
861 | 861 | ||
862 | struct rw_semaphore subvol_sem; | 862 | struct rw_semaphore cleanup_work_sem; |
863 | 863 | ||
864 | struct rw_semaphore subvol_sem; | ||
864 | struct srcu_struct subvol_srcu; | 865 | struct srcu_struct subvol_srcu; |
865 | 866 | ||
866 | struct list_head trans_list; | 867 | struct list_head trans_list; |
@@ -1034,12 +1035,12 @@ struct btrfs_root { | |||
1034 | int ref_cows; | 1035 | int ref_cows; |
1035 | int track_dirty; | 1036 | int track_dirty; |
1036 | int in_radix; | 1037 | int in_radix; |
1038 | int clean_orphans; | ||
1037 | 1039 | ||
1038 | u64 defrag_trans_start; | 1040 | u64 defrag_trans_start; |
1039 | struct btrfs_key defrag_progress; | 1041 | struct btrfs_key defrag_progress; |
1040 | struct btrfs_key defrag_max; | 1042 | struct btrfs_key defrag_max; |
1041 | int defrag_running; | 1043 | int defrag_running; |
1042 | int defrag_level; | ||
1043 | char *name; | 1044 | char *name; |
1044 | int in_sysfs; | 1045 | int in_sysfs; |
1045 | 1046 | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 101940fab9b3..c1e59e33f020 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
892 | root->stripesize = stripesize; | 892 | root->stripesize = stripesize; |
893 | root->ref_cows = 0; | 893 | root->ref_cows = 0; |
894 | root->track_dirty = 0; | 894 | root->track_dirty = 0; |
895 | root->in_radix = 0; | ||
896 | root->clean_orphans = 0; | ||
895 | 897 | ||
896 | root->fs_info = fs_info; | 898 | root->fs_info = fs_info; |
897 | root->objectid = objectid; | 899 | root->objectid = objectid; |
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
928 | root->defrag_trans_start = fs_info->generation; | 930 | root->defrag_trans_start = fs_info->generation; |
929 | init_completion(&root->kobj_unregister); | 931 | init_completion(&root->kobj_unregister); |
930 | root->defrag_running = 0; | 932 | root->defrag_running = 0; |
931 | root->defrag_level = 0; | ||
932 | root->root_key.objectid = objectid; | 933 | root->root_key.objectid = objectid; |
933 | root->anon_super.s_root = NULL; | 934 | root->anon_super.s_root = NULL; |
934 | root->anon_super.s_dev = 0; | 935 | root->anon_super.s_dev = 0; |
@@ -1210,8 +1211,10 @@ again: | |||
1210 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1211 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1211 | (unsigned long)root->root_key.objectid, | 1212 | (unsigned long)root->root_key.objectid, |
1212 | root); | 1213 | root); |
1213 | if (ret == 0) | 1214 | if (ret == 0) { |
1214 | root->in_radix = 1; | 1215 | root->in_radix = 1; |
1216 | root->clean_orphans = 1; | ||
1217 | } | ||
1215 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1218 | spin_unlock(&fs_info->fs_roots_radix_lock); |
1216 | radix_tree_preload_end(); | 1219 | radix_tree_preload_end(); |
1217 | if (ret) { | 1220 | if (ret) { |
@@ -1225,10 +1228,6 @@ again: | |||
1225 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1228 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
1226 | root->root_key.objectid); | 1229 | root->root_key.objectid); |
1227 | WARN_ON(ret); | 1230 | WARN_ON(ret); |
1228 | |||
1229 | if (!(fs_info->sb->s_flags & MS_RDONLY)) | ||
1230 | btrfs_orphan_cleanup(root); | ||
1231 | |||
1232 | return root; | 1231 | return root; |
1233 | fail: | 1232 | fail: |
1234 | free_fs_root(root); | 1233 | free_fs_root(root); |
@@ -1689,6 +1688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1689 | mutex_init(&fs_info->cleaner_mutex); | 1688 | mutex_init(&fs_info->cleaner_mutex); |
1690 | mutex_init(&fs_info->volume_mutex); | 1689 | mutex_init(&fs_info->volume_mutex); |
1691 | init_rwsem(&fs_info->extent_commit_sem); | 1690 | init_rwsem(&fs_info->extent_commit_sem); |
1691 | init_rwsem(&fs_info->cleanup_work_sem); | ||
1692 | init_rwsem(&fs_info->subvol_sem); | 1692 | init_rwsem(&fs_info->subvol_sem); |
1693 | 1693 | ||
1694 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 1694 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
@@ -2388,6 +2388,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2388 | mutex_lock(&root->fs_info->cleaner_mutex); | 2388 | mutex_lock(&root->fs_info->cleaner_mutex); |
2389 | btrfs_clean_old_snapshots(root); | 2389 | btrfs_clean_old_snapshots(root); |
2390 | mutex_unlock(&root->fs_info->cleaner_mutex); | 2390 | mutex_unlock(&root->fs_info->cleaner_mutex); |
2391 | |||
2392 | /* wait until ongoing cleanup work done */ | ||
2393 | down_write(&root->fs_info->cleanup_work_sem); | ||
2394 | up_write(&root->fs_info->cleanup_work_sem); | ||
2395 | |||
2391 | trans = btrfs_start_transaction(root, 1); | 2396 | trans = btrfs_start_transaction(root, 1); |
2392 | ret = btrfs_commit_transaction(trans, root); | 2397 | ret = btrfs_commit_transaction(trans, root); |
2393 | BUG_ON(ret); | 2398 | BUG_ON(ret); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fa57247887e3..eb2db3bde236 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -2093,16 +2093,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2093 | struct inode *inode; | 2093 | struct inode *inode; |
2094 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2094 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2095 | 2095 | ||
2096 | path = btrfs_alloc_path(); | 2096 | if (!xchg(&root->clean_orphans, 0)) |
2097 | if (!path) | ||
2098 | return; | 2097 | return; |
2098 | |||
2099 | path = btrfs_alloc_path(); | ||
2100 | BUG_ON(!path); | ||
2099 | path->reada = -1; | 2101 | path->reada = -1; |
2100 | 2102 | ||
2101 | key.objectid = BTRFS_ORPHAN_OBJECTID; | 2103 | key.objectid = BTRFS_ORPHAN_OBJECTID; |
2102 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); | 2104 | btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); |
2103 | key.offset = (u64)-1; | 2105 | key.offset = (u64)-1; |
2104 | 2106 | ||
2105 | |||
2106 | while (1) { | 2107 | while (1) { |
2107 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2108 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
2108 | if (ret < 0) { | 2109 | if (ret < 0) { |
@@ -3298,6 +3299,11 @@ void btrfs_delete_inode(struct inode *inode) | |||
3298 | } | 3299 | } |
3299 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3300 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3300 | 3301 | ||
3302 | if (root->fs_info->log_root_recovering) { | ||
3303 | BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); | ||
3304 | goto no_delete; | ||
3305 | } | ||
3306 | |||
3301 | if (inode->i_nlink > 0) { | 3307 | if (inode->i_nlink > 0) { |
3302 | BUG_ON(btrfs_root_refs(&root->root_item) != 0); | 3308 | BUG_ON(btrfs_root_refs(&root->root_item) != 0); |
3303 | goto no_delete; | 3309 | goto no_delete; |
@@ -3705,6 +3711,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3705 | } | 3711 | } |
3706 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | 3712 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); |
3707 | 3713 | ||
3714 | if (root != sub_root) { | ||
3715 | down_read(&root->fs_info->cleanup_work_sem); | ||
3716 | if (!(inode->i_sb->s_flags & MS_RDONLY)) | ||
3717 | btrfs_orphan_cleanup(sub_root); | ||
3718 | up_read(&root->fs_info->cleanup_work_sem); | ||
3719 | } | ||
3720 | |||
3708 | return inode; | 3721 | return inode; |
3709 | } | 3722 | } |
3710 | 3723 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index cfcc93c93a7b..975fdd33ac41 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -3755,6 +3755,7 @@ out: | |||
3755 | BTRFS_DATA_RELOC_TREE_OBJECTID); | 3755 | BTRFS_DATA_RELOC_TREE_OBJECTID); |
3756 | if (IS_ERR(fs_root)) | 3756 | if (IS_ERR(fs_root)) |
3757 | err = PTR_ERR(fs_root); | 3757 | err = PTR_ERR(fs_root); |
3758 | btrfs_orphan_cleanup(fs_root); | ||
3758 | } | 3759 | } |
3759 | return err; | 3760 | return err; |
3760 | } | 3761 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6bbaa10bb67e..4a9434b622ec 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -930,6 +930,17 @@ out_nowrite: | |||
930 | return 0; | 930 | return 0; |
931 | } | 931 | } |
932 | 932 | ||
933 | static int insert_orphan_item(struct btrfs_trans_handle *trans, | ||
934 | struct btrfs_root *root, u64 offset) | ||
935 | { | ||
936 | int ret; | ||
937 | ret = btrfs_find_orphan_item(root, offset); | ||
938 | if (ret > 0) | ||
939 | ret = btrfs_insert_orphan_item(trans, root, offset); | ||
940 | return ret; | ||
941 | } | ||
942 | |||
943 | |||
933 | /* | 944 | /* |
934 | * There are a few corners where the link count of the file can't | 945 | * There are a few corners where the link count of the file can't |
935 | * be properly maintained during replay. So, instead of adding | 946 | * be properly maintained during replay. So, instead of adding |
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
997 | } | 1008 | } |
998 | BTRFS_I(inode)->index_cnt = (u64)-1; | 1009 | BTRFS_I(inode)->index_cnt = (u64)-1; |
999 | 1010 | ||
1000 | if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { | 1011 | if (inode->i_nlink == 0) { |
1001 | ret = replay_dir_deletes(trans, root, NULL, path, | 1012 | if (S_ISDIR(inode->i_mode)) { |
1002 | inode->i_ino, 1); | 1013 | ret = replay_dir_deletes(trans, root, NULL, path, |
1014 | inode->i_ino, 1); | ||
1015 | BUG_ON(ret); | ||
1016 | } | ||
1017 | ret = insert_orphan_item(trans, root, inode->i_ino); | ||
1003 | BUG_ON(ret); | 1018 | BUG_ON(ret); |
1004 | } | 1019 | } |
1005 | btrfs_free_path(path); | 1020 | btrfs_free_path(path); |
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1587 | /* inode keys are done during the first stage */ | 1602 | /* inode keys are done during the first stage */ |
1588 | if (key.type == BTRFS_INODE_ITEM_KEY && | 1603 | if (key.type == BTRFS_INODE_ITEM_KEY && |
1589 | wc->stage == LOG_WALK_REPLAY_INODES) { | 1604 | wc->stage == LOG_WALK_REPLAY_INODES) { |
1590 | struct inode *inode; | ||
1591 | struct btrfs_inode_item *inode_item; | 1605 | struct btrfs_inode_item *inode_item; |
1592 | u32 mode; | 1606 | u32 mode; |
1593 | 1607 | ||
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1603 | eb, i, &key); | 1617 | eb, i, &key); |
1604 | BUG_ON(ret); | 1618 | BUG_ON(ret); |
1605 | 1619 | ||
1606 | /* for regular files, truncate away | 1620 | /* for regular files, make sure corresponding |
1607 | * extents past the new EOF | 1621 | * orhpan item exist. extents past the new EOF |
1622 | * will be truncated later by orphan cleanup. | ||
1608 | */ | 1623 | */ |
1609 | if (S_ISREG(mode)) { | 1624 | if (S_ISREG(mode)) { |
1610 | inode = read_one_inode(root, | 1625 | ret = insert_orphan_item(wc->trans, root, |
1611 | key.objectid); | 1626 | key.objectid); |
1612 | BUG_ON(!inode); | ||
1613 | |||
1614 | ret = btrfs_truncate_inode_items(wc->trans, | ||
1615 | root, inode, inode->i_size, | ||
1616 | BTRFS_EXTENT_DATA_KEY); | ||
1617 | BUG_ON(ret); | 1627 | BUG_ON(ret); |
1618 | |||
1619 | /* if the nlink count is zero here, the iput | ||
1620 | * will free the inode. We bump it to make | ||
1621 | * sure it doesn't get freed until the link | ||
1622 | * count fixup is done | ||
1623 | */ | ||
1624 | if (inode->i_nlink == 0) { | ||
1625 | btrfs_inc_nlink(inode); | ||
1626 | btrfs_update_inode(wc->trans, | ||
1627 | root, inode); | ||
1628 | } | ||
1629 | iput(inode); | ||
1630 | } | 1628 | } |
1629 | |||
1631 | ret = link_to_fixup_dir(wc->trans, root, | 1630 | ret = link_to_fixup_dir(wc->trans, root, |
1632 | path, key.objectid); | 1631 | path, key.objectid); |
1633 | BUG_ON(ret); | 1632 | BUG_ON(ret); |