aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2009-11-12 04:36:34 -0500
committerChris Mason <chris.mason@oracle.com>2009-12-17 12:33:35 -0500
commit24bbcf0442ee04660a5a030efdbb6d03f1c275cb (patch)
treeaa57d77d29cc5150b272cc3f6465f10262fcbaac
parentf34f57a3ab4e73304d78c125682f1a53cd3975f2 (diff)
Btrfs: Add delayed iput
iput() can trigger new transactions if we are dropping the final reference, so calling it in btrfs_commit_transaction may end up deadlock. This patch adds delayed iput to avoid the issue. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/extent-tree.c8
-rw-r--r--fs/btrfs/inode.c55
-rw-r--r--fs/btrfs/ordered-data.c10
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/transaction.c13
9 files changed, 90 insertions, 18 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a7cac2148c7c..1983c889bb1c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -872,6 +872,9 @@ struct btrfs_fs_info {
872 struct list_head dead_roots; 872 struct list_head dead_roots;
873 struct list_head caching_block_groups; 873 struct list_head caching_block_groups;
874 874
875 spinlock_t delayed_iput_lock;
876 struct list_head delayed_iputs;
877
875 atomic_t nr_async_submits; 878 atomic_t nr_async_submits;
876 atomic_t async_submit_draining; 879 atomic_t async_submit_draining;
877 atomic_t nr_async_bios; 880 atomic_t nr_async_bios;
@@ -2301,7 +2304,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2301 struct inode *inode, u64 new_size, 2304 struct inode *inode, u64 new_size,
2302 u32 min_type); 2305 u32 min_type);
2303 2306
2304int btrfs_start_delalloc_inodes(struct btrfs_root *root); 2307int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2305int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2308int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
2306int btrfs_writepages(struct address_space *mapping, 2309int btrfs_writepages(struct address_space *mapping,
2307 struct writeback_control *wbc); 2310 struct writeback_control *wbc);
@@ -2341,6 +2344,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
2341void btrfs_orphan_cleanup(struct btrfs_root *root); 2344void btrfs_orphan_cleanup(struct btrfs_root *root);
2342int btrfs_cont_expand(struct inode *inode, loff_t size); 2345int btrfs_cont_expand(struct inode *inode, loff_t size);
2343int btrfs_invalidate_inodes(struct btrfs_root *root); 2346int btrfs_invalidate_inodes(struct btrfs_root *root);
2347void btrfs_add_delayed_iput(struct inode *inode);
2348void btrfs_run_delayed_iputs(struct btrfs_root *root);
2344extern const struct dentry_operations btrfs_dentry_operations; 2349extern const struct dentry_operations btrfs_dentry_operations;
2345 2350
2346/* ioctl.c */ 2351/* ioctl.c */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c1e59e33f020..009e3bd18f23 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1476,6 +1476,7 @@ static int cleaner_kthread(void *arg)
1476 1476
1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1477 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1478 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1478 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1479 btrfs_run_delayed_iputs(root);
1479 btrfs_clean_old_snapshots(root); 1480 btrfs_clean_old_snapshots(root);
1480 mutex_unlock(&root->fs_info->cleaner_mutex); 1481 mutex_unlock(&root->fs_info->cleaner_mutex);
1481 } 1482 }
@@ -1605,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1605 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 1606 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
1606 INIT_LIST_HEAD(&fs_info->trans_list); 1607 INIT_LIST_HEAD(&fs_info->trans_list);
1607 INIT_LIST_HEAD(&fs_info->dead_roots); 1608 INIT_LIST_HEAD(&fs_info->dead_roots);
1609 INIT_LIST_HEAD(&fs_info->delayed_iputs);
1608 INIT_LIST_HEAD(&fs_info->hashers); 1610 INIT_LIST_HEAD(&fs_info->hashers);
1609 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1611 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1610 INIT_LIST_HEAD(&fs_info->ordered_operations); 1612 INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1613,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 spin_lock_init(&fs_info->new_trans_lock); 1615 spin_lock_init(&fs_info->new_trans_lock);
1614 spin_lock_init(&fs_info->ref_cache_lock); 1616 spin_lock_init(&fs_info->ref_cache_lock);
1615 spin_lock_init(&fs_info->fs_roots_radix_lock); 1617 spin_lock_init(&fs_info->fs_roots_radix_lock);
1618 spin_lock_init(&fs_info->delayed_iput_lock);
1616 1619
1617 init_completion(&fs_info->kobj_unregister); 1620 init_completion(&fs_info->kobj_unregister);
1618 fs_info->tree_root = tree_root; 1621 fs_info->tree_root = tree_root;
@@ -2386,6 +2389,7 @@ int btrfs_commit_super(struct btrfs_root *root)
2386 int ret; 2389 int ret;
2387 2390
2388 mutex_lock(&root->fs_info->cleaner_mutex); 2391 mutex_lock(&root->fs_info->cleaner_mutex);
2392 btrfs_run_delayed_iputs(root);
2389 btrfs_clean_old_snapshots(root); 2393 btrfs_clean_old_snapshots(root);
2390 mutex_unlock(&root->fs_info->cleaner_mutex); 2394 mutex_unlock(&root->fs_info->cleaner_mutex);
2391 2395
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4a86508ce473..fcdccfa46004 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2880,9 +2880,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
2880 root = async->root; 2880 root = async->root;
2881 info = async->info; 2881 info = async->info;
2882 2882
2883 btrfs_start_delalloc_inodes(root); 2883 btrfs_start_delalloc_inodes(root, 0);
2884 wake_up(&info->flush_wait); 2884 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0); 2885 btrfs_wait_ordered_extents(root, 0, 0);
2886 2886
2887 spin_lock(&info->lock); 2887 spin_lock(&info->lock);
2888 info->flushing = 0; 2888 info->flushing = 0;
@@ -2956,8 +2956,8 @@ static void flush_delalloc(struct btrfs_root *root,
2956 return; 2956 return;
2957 2957
2958flush: 2958flush:
2959 btrfs_start_delalloc_inodes(root); 2959 btrfs_start_delalloc_inodes(root, 0);
2960 btrfs_wait_ordered_extents(root, 0); 2960 btrfs_wait_ordered_extents(root, 0, 0);
2961 2961
2962 spin_lock(&info->lock); 2962 spin_lock(&info->lock);
2963 info->flushing = 0; 2963 info->flushing = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 82740a3c628a..168e8c040aab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2022,6 +2022,54 @@ zeroit:
2022 return -EIO; 2022 return -EIO;
2023} 2023}
2024 2024
2025struct delayed_iput {
2026 struct list_head list;
2027 struct inode *inode;
2028};
2029
2030void btrfs_add_delayed_iput(struct inode *inode)
2031{
2032 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2033 struct delayed_iput *delayed;
2034
2035 if (atomic_add_unless(&inode->i_count, -1, 1))
2036 return;
2037
2038 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
2039 delayed->inode = inode;
2040
2041 spin_lock(&fs_info->delayed_iput_lock);
2042 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2043 spin_unlock(&fs_info->delayed_iput_lock);
2044}
2045
2046void btrfs_run_delayed_iputs(struct btrfs_root *root)
2047{
2048 LIST_HEAD(list);
2049 struct btrfs_fs_info *fs_info = root->fs_info;
2050 struct delayed_iput *delayed;
2051 int empty;
2052
2053 spin_lock(&fs_info->delayed_iput_lock);
2054 empty = list_empty(&fs_info->delayed_iputs);
2055 spin_unlock(&fs_info->delayed_iput_lock);
2056 if (empty)
2057 return;
2058
2059 down_read(&root->fs_info->cleanup_work_sem);
2060 spin_lock(&fs_info->delayed_iput_lock);
2061 list_splice_init(&fs_info->delayed_iputs, &list);
2062 spin_unlock(&fs_info->delayed_iput_lock);
2063
2064 while (!list_empty(&list)) {
2065 delayed = list_entry(list.next, struct delayed_iput, list);
2066 list_del(&delayed->list);
2067 iput(delayed->inode);
2068 kfree(delayed);
2069 }
2070 up_read(&root->fs_info->cleanup_work_sem);
2071}
2072
2025/* 2073/*
2026 * This creates an orphan entry for the given inode in case something goes 2074 * This creates an orphan entry for the given inode in case something goes
2027 * wrong in the middle of an unlink/truncate. 2075 * wrong in the middle of an unlink/truncate.
@@ -5568,7 +5616,7 @@ out_fail:
5568 * some fairly slow code that needs optimization. This walks the list 5616 * some fairly slow code that needs optimization. This walks the list
5569 * of all the inodes with pending delalloc and forces them to disk. 5617 * of all the inodes with pending delalloc and forces them to disk.
5570 */ 5618 */
5571int btrfs_start_delalloc_inodes(struct btrfs_root *root) 5619int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5572{ 5620{
5573 struct list_head *head = &root->fs_info->delalloc_inodes; 5621 struct list_head *head = &root->fs_info->delalloc_inodes;
5574 struct btrfs_inode *binode; 5622 struct btrfs_inode *binode;
@@ -5587,7 +5635,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
5587 spin_unlock(&root->fs_info->delalloc_lock); 5635 spin_unlock(&root->fs_info->delalloc_lock);
5588 if (inode) { 5636 if (inode) {
5589 filemap_flush(inode->i_mapping); 5637 filemap_flush(inode->i_mapping);
5590 iput(inode); 5638 if (delay_iput)
5639 btrfs_add_delayed_iput(inode);
5640 else
5641 iput(inode);
5591 } 5642 }
5592 cond_resched(); 5643 cond_resched();
5593 spin_lock(&root->fs_info->delalloc_lock); 5644 spin_lock(&root->fs_info->delalloc_lock);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9b16073bb875..b10a49d4bc6a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -352,7 +352,8 @@ int btrfs_remove_ordered_extent(struct inode *inode,
352 * wait for all the ordered extents in a root. This is done when balancing 352 * wait for all the ordered extents in a root. This is done when balancing
353 * space between drives. 353 * space between drives.
354 */ 354 */
355int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 355int btrfs_wait_ordered_extents(struct btrfs_root *root,
356 int nocow_only, int delay_iput)
356{ 357{
357 struct list_head splice; 358 struct list_head splice;
358 struct list_head *cur; 359 struct list_head *cur;
@@ -389,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
389 if (inode) { 390 if (inode) {
390 btrfs_start_ordered_extent(inode, ordered, 1); 391 btrfs_start_ordered_extent(inode, ordered, 1);
391 btrfs_put_ordered_extent(ordered); 392 btrfs_put_ordered_extent(ordered);
392 iput(inode); 393 if (delay_iput)
394 btrfs_add_delayed_iput(inode);
395 else
396 iput(inode);
393 } else { 397 } else {
394 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
395 } 399 }
@@ -447,7 +451,7 @@ again:
447 btrfs_wait_ordered_range(inode, 0, (u64)-1); 451 btrfs_wait_ordered_range(inode, 0, (u64)-1);
448 else 452 else
449 filemap_flush(inode->i_mapping); 453 filemap_flush(inode->i_mapping);
450 iput(inode); 454 btrfs_add_delayed_iput(inode);
451 } 455 }
452 456
453 cond_resched(); 457 cond_resched();
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4fa20398aec1..1fe1282ef47c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -153,9 +153,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 153int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
154 struct btrfs_ordered_extent *ordered); 154 struct btrfs_ordered_extent *ordered);
155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 155int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
156int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
157int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 156int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
158int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 157int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
159 struct btrfs_root *root, 158 struct btrfs_root *root,
160 struct inode *inode); 159 struct inode *inode);
160int btrfs_wait_ordered_extents(struct btrfs_root *root,
161 int nocow_only, int delay_iput);
161#endif 162#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f2aa53d2f944..a9728680eca8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3541,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3541 (unsigned long long)rc->block_group->key.objectid, 3541 (unsigned long long)rc->block_group->key.objectid,
3542 (unsigned long long)rc->block_group->flags); 3542 (unsigned long long)rc->block_group->flags);
3543 3543
3544 btrfs_start_delalloc_inodes(fs_info->tree_root); 3544 btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3545 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3545 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3546 3546
3547 while (1) { 3547 while (1) {
3548 rc->extents_found = 0; 3548 rc->extents_found = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf53..270cc96b9a43 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -405,8 +405,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
405 return 0; 405 return 0;
406 } 406 }
407 407
408 btrfs_start_delalloc_inodes(root); 408 btrfs_start_delalloc_inodes(root, 0);
409 btrfs_wait_ordered_extents(root, 0); 409 btrfs_wait_ordered_extents(root, 0, 0);
410 410
411 trans = btrfs_start_transaction(root, 1); 411 trans = btrfs_start_transaction(root, 1);
412 ret = btrfs_commit_transaction(trans, root); 412 ret = btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 728e8fe5d2cc..75b31caade29 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
333 memset(trans, 0, sizeof(*trans)); 333 memset(trans, 0, sizeof(*trans));
334 kmem_cache_free(btrfs_trans_handle_cachep, trans); 334 kmem_cache_free(btrfs_trans_handle_cachep, trans);
335 335
336 if (throttle)
337 btrfs_run_delayed_iputs(root);
338
336 return 0; 339 return 0;
337} 340}
338 341
@@ -991,11 +994,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
991 mutex_unlock(&root->fs_info->trans_mutex); 994 mutex_unlock(&root->fs_info->trans_mutex);
992 995
993 if (flush_on_commit) { 996 if (flush_on_commit) {
994 btrfs_start_delalloc_inodes(root); 997 btrfs_start_delalloc_inodes(root, 1);
995 ret = btrfs_wait_ordered_extents(root, 0); 998 ret = btrfs_wait_ordered_extents(root, 0, 1);
996 BUG_ON(ret); 999 BUG_ON(ret);
997 } else if (snap_pending) { 1000 } else if (snap_pending) {
998 ret = btrfs_wait_ordered_extents(root, 1); 1001 ret = btrfs_wait_ordered_extents(root, 0, 1);
999 BUG_ON(ret); 1002 BUG_ON(ret);
1000 } 1003 }
1001 1004
@@ -1113,6 +1116,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1113 current->journal_info = NULL; 1116 current->journal_info = NULL;
1114 1117
1115 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1118 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1119
1120 if (current != root->fs_info->transaction_kthread)
1121 btrfs_run_delayed_iputs(root);
1122
1116 return ret; 1123 return ret;
1117} 1124}
1118 1125