diff options
author | Yan, Zheng <zheng.yan@oracle.com> | 2009-11-12 04:36:34 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-12-17 12:33:35 -0500 |
commit | 24bbcf0442ee04660a5a030efdbb6d03f1c275cb (patch) | |
tree | aa57d77d29cc5150b272cc3f6465f10262fcbaac | |
parent | f34f57a3ab4e73304d78c125682f1a53cd3975f2 (diff) |
Btrfs: Add delayed iput
iput() can trigger new transactions if we are dropping the
final reference, so calling it in btrfs_commit_transaction
may end up deadlock. This patch adds delayed iput to avoid
the issue.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/ctree.h | 7 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 4 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 8 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 55 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 10 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 3 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 4 | ||||
-rw-r--r-- | fs/btrfs/super.c | 4 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 13 |
9 files changed, 90 insertions, 18 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a7cac2148c7c..1983c889bb1c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -872,6 +872,9 @@ struct btrfs_fs_info { | |||
872 | struct list_head dead_roots; | 872 | struct list_head dead_roots; |
873 | struct list_head caching_block_groups; | 873 | struct list_head caching_block_groups; |
874 | 874 | ||
875 | spinlock_t delayed_iput_lock; | ||
876 | struct list_head delayed_iputs; | ||
877 | |||
875 | atomic_t nr_async_submits; | 878 | atomic_t nr_async_submits; |
876 | atomic_t async_submit_draining; | 879 | atomic_t async_submit_draining; |
877 | atomic_t nr_async_bios; | 880 | atomic_t nr_async_bios; |
@@ -2301,7 +2304,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2301 | struct inode *inode, u64 new_size, | 2304 | struct inode *inode, u64 new_size, |
2302 | u32 min_type); | 2305 | u32 min_type); |
2303 | 2306 | ||
2304 | int btrfs_start_delalloc_inodes(struct btrfs_root *root); | 2307 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2305 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | 2308 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); |
2306 | int btrfs_writepages(struct address_space *mapping, | 2309 | int btrfs_writepages(struct address_space *mapping, |
2307 | struct writeback_control *wbc); | 2310 | struct writeback_control *wbc); |
@@ -2341,6 +2344,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | |||
2341 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2344 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2342 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2345 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2343 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2346 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2347 | void btrfs_add_delayed_iput(struct inode *inode); | ||
2348 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | ||
2344 | extern const struct dentry_operations btrfs_dentry_operations; | 2349 | extern const struct dentry_operations btrfs_dentry_operations; |
2345 | 2350 | ||
2346 | /* ioctl.c */ | 2351 | /* ioctl.c */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c1e59e33f020..009e3bd18f23 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1476,6 +1476,7 @@ static int cleaner_kthread(void *arg) | |||
1476 | 1476 | ||
1477 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1477 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
1478 | mutex_trylock(&root->fs_info->cleaner_mutex)) { | 1478 | mutex_trylock(&root->fs_info->cleaner_mutex)) { |
1479 | btrfs_run_delayed_iputs(root); | ||
1479 | btrfs_clean_old_snapshots(root); | 1480 | btrfs_clean_old_snapshots(root); |
1480 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1481 | mutex_unlock(&root->fs_info->cleaner_mutex); |
1481 | } | 1482 | } |
@@ -1605,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1605 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1606 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1606 | INIT_LIST_HEAD(&fs_info->trans_list); | 1607 | INIT_LIST_HEAD(&fs_info->trans_list); |
1607 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1608 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1609 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | ||
1608 | INIT_LIST_HEAD(&fs_info->hashers); | 1610 | INIT_LIST_HEAD(&fs_info->hashers); |
1609 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1611 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1610 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1612 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
@@ -1613,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1613 | spin_lock_init(&fs_info->new_trans_lock); | 1615 | spin_lock_init(&fs_info->new_trans_lock); |
1614 | spin_lock_init(&fs_info->ref_cache_lock); | 1616 | spin_lock_init(&fs_info->ref_cache_lock); |
1615 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1617 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1618 | spin_lock_init(&fs_info->delayed_iput_lock); | ||
1616 | 1619 | ||
1617 | init_completion(&fs_info->kobj_unregister); | 1620 | init_completion(&fs_info->kobj_unregister); |
1618 | fs_info->tree_root = tree_root; | 1621 | fs_info->tree_root = tree_root; |
@@ -2386,6 +2389,7 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2386 | int ret; | 2389 | int ret; |
2387 | 2390 | ||
2388 | mutex_lock(&root->fs_info->cleaner_mutex); | 2391 | mutex_lock(&root->fs_info->cleaner_mutex); |
2392 | btrfs_run_delayed_iputs(root); | ||
2389 | btrfs_clean_old_snapshots(root); | 2393 | btrfs_clean_old_snapshots(root); |
2390 | mutex_unlock(&root->fs_info->cleaner_mutex); | 2394 | mutex_unlock(&root->fs_info->cleaner_mutex); |
2391 | 2395 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4a86508ce473..fcdccfa46004 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -2880,9 +2880,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work) | |||
2880 | root = async->root; | 2880 | root = async->root; |
2881 | info = async->info; | 2881 | info = async->info; |
2882 | 2882 | ||
2883 | btrfs_start_delalloc_inodes(root); | 2883 | btrfs_start_delalloc_inodes(root, 0); |
2884 | wake_up(&info->flush_wait); | 2884 | wake_up(&info->flush_wait); |
2885 | btrfs_wait_ordered_extents(root, 0); | 2885 | btrfs_wait_ordered_extents(root, 0, 0); |
2886 | 2886 | ||
2887 | spin_lock(&info->lock); | 2887 | spin_lock(&info->lock); |
2888 | info->flushing = 0; | 2888 | info->flushing = 0; |
@@ -2956,8 +2956,8 @@ static void flush_delalloc(struct btrfs_root *root, | |||
2956 | return; | 2956 | return; |
2957 | 2957 | ||
2958 | flush: | 2958 | flush: |
2959 | btrfs_start_delalloc_inodes(root); | 2959 | btrfs_start_delalloc_inodes(root, 0); |
2960 | btrfs_wait_ordered_extents(root, 0); | 2960 | btrfs_wait_ordered_extents(root, 0, 0); |
2961 | 2961 | ||
2962 | spin_lock(&info->lock); | 2962 | spin_lock(&info->lock); |
2963 | info->flushing = 0; | 2963 | info->flushing = 0; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82740a3c628a..168e8c040aab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -2022,6 +2022,54 @@ zeroit: | |||
2022 | return -EIO; | 2022 | return -EIO; |
2023 | } | 2023 | } |
2024 | 2024 | ||
2025 | struct delayed_iput { | ||
2026 | struct list_head list; | ||
2027 | struct inode *inode; | ||
2028 | }; | ||
2029 | |||
2030 | void btrfs_add_delayed_iput(struct inode *inode) | ||
2031 | { | ||
2032 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
2033 | struct delayed_iput *delayed; | ||
2034 | |||
2035 | if (atomic_add_unless(&inode->i_count, -1, 1)) | ||
2036 | return; | ||
2037 | |||
2038 | delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); | ||
2039 | delayed->inode = inode; | ||
2040 | |||
2041 | spin_lock(&fs_info->delayed_iput_lock); | ||
2042 | list_add_tail(&delayed->list, &fs_info->delayed_iputs); | ||
2043 | spin_unlock(&fs_info->delayed_iput_lock); | ||
2044 | } | ||
2045 | |||
2046 | void btrfs_run_delayed_iputs(struct btrfs_root *root) | ||
2047 | { | ||
2048 | LIST_HEAD(list); | ||
2049 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2050 | struct delayed_iput *delayed; | ||
2051 | int empty; | ||
2052 | |||
2053 | spin_lock(&fs_info->delayed_iput_lock); | ||
2054 | empty = list_empty(&fs_info->delayed_iputs); | ||
2055 | spin_unlock(&fs_info->delayed_iput_lock); | ||
2056 | if (empty) | ||
2057 | return; | ||
2058 | |||
2059 | down_read(&root->fs_info->cleanup_work_sem); | ||
2060 | spin_lock(&fs_info->delayed_iput_lock); | ||
2061 | list_splice_init(&fs_info->delayed_iputs, &list); | ||
2062 | spin_unlock(&fs_info->delayed_iput_lock); | ||
2063 | |||
2064 | while (!list_empty(&list)) { | ||
2065 | delayed = list_entry(list.next, struct delayed_iput, list); | ||
2066 | list_del(&delayed->list); | ||
2067 | iput(delayed->inode); | ||
2068 | kfree(delayed); | ||
2069 | } | ||
2070 | up_read(&root->fs_info->cleanup_work_sem); | ||
2071 | } | ||
2072 | |||
2025 | /* | 2073 | /* |
2026 | * This creates an orphan entry for the given inode in case something goes | 2074 | * This creates an orphan entry for the given inode in case something goes |
2027 | * wrong in the middle of an unlink/truncate. | 2075 | * wrong in the middle of an unlink/truncate. |
@@ -5568,7 +5616,7 @@ out_fail: | |||
5568 | * some fairly slow code that needs optimization. This walks the list | 5616 | * some fairly slow code that needs optimization. This walks the list |
5569 | * of all the inodes with pending delalloc and forces them to disk. | 5617 | * of all the inodes with pending delalloc and forces them to disk. |
5570 | */ | 5618 | */ |
5571 | int btrfs_start_delalloc_inodes(struct btrfs_root *root) | 5619 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) |
5572 | { | 5620 | { |
5573 | struct list_head *head = &root->fs_info->delalloc_inodes; | 5621 | struct list_head *head = &root->fs_info->delalloc_inodes; |
5574 | struct btrfs_inode *binode; | 5622 | struct btrfs_inode *binode; |
@@ -5587,7 +5635,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) | |||
5587 | spin_unlock(&root->fs_info->delalloc_lock); | 5635 | spin_unlock(&root->fs_info->delalloc_lock); |
5588 | if (inode) { | 5636 | if (inode) { |
5589 | filemap_flush(inode->i_mapping); | 5637 | filemap_flush(inode->i_mapping); |
5590 | iput(inode); | 5638 | if (delay_iput) |
5639 | btrfs_add_delayed_iput(inode); | ||
5640 | else | ||
5641 | iput(inode); | ||
5591 | } | 5642 | } |
5592 | cond_resched(); | 5643 | cond_resched(); |
5593 | spin_lock(&root->fs_info->delalloc_lock); | 5644 | spin_lock(&root->fs_info->delalloc_lock); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9b16073bb875..b10a49d4bc6a 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -352,7 +352,8 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
352 | * wait for all the ordered extents in a root. This is done when balancing | 352 | * wait for all the ordered extents in a root. This is done when balancing |
353 | * space between drives. | 353 | * space between drives. |
354 | */ | 354 | */ |
355 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | 355 | int btrfs_wait_ordered_extents(struct btrfs_root *root, |
356 | int nocow_only, int delay_iput) | ||
356 | { | 357 | { |
357 | struct list_head splice; | 358 | struct list_head splice; |
358 | struct list_head *cur; | 359 | struct list_head *cur; |
@@ -389,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | |||
389 | if (inode) { | 390 | if (inode) { |
390 | btrfs_start_ordered_extent(inode, ordered, 1); | 391 | btrfs_start_ordered_extent(inode, ordered, 1); |
391 | btrfs_put_ordered_extent(ordered); | 392 | btrfs_put_ordered_extent(ordered); |
392 | iput(inode); | 393 | if (delay_iput) |
394 | btrfs_add_delayed_iput(inode); | ||
395 | else | ||
396 | iput(inode); | ||
393 | } else { | 397 | } else { |
394 | btrfs_put_ordered_extent(ordered); | 398 | btrfs_put_ordered_extent(ordered); |
395 | } | 399 | } |
@@ -447,7 +451,7 @@ again: | |||
447 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 451 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
448 | else | 452 | else |
449 | filemap_flush(inode->i_mapping); | 453 | filemap_flush(inode->i_mapping); |
450 | iput(inode); | 454 | btrfs_add_delayed_iput(inode); |
451 | } | 455 | } |
452 | 456 | ||
453 | cond_resched(); | 457 | cond_resched(); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 4fa20398aec1..1fe1282ef47c 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -153,9 +153,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | |||
153 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 153 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
154 | struct btrfs_ordered_extent *ordered); | 154 | struct btrfs_ordered_extent *ordered); |
155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
156 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | ||
157 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 156 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
158 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 157 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
159 | struct btrfs_root *root, | 158 | struct btrfs_root *root, |
160 | struct inode *inode); | 159 | struct inode *inode); |
160 | int btrfs_wait_ordered_extents(struct btrfs_root *root, | ||
161 | int nocow_only, int delay_iput); | ||
161 | #endif | 162 | #endif |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f2aa53d2f944..a9728680eca8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -3541,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3541 | (unsigned long long)rc->block_group->key.objectid, | 3541 | (unsigned long long)rc->block_group->key.objectid, |
3542 | (unsigned long long)rc->block_group->flags); | 3542 | (unsigned long long)rc->block_group->flags); |
3543 | 3543 | ||
3544 | btrfs_start_delalloc_inodes(fs_info->tree_root); | 3544 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); |
3545 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 3545 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
3546 | 3546 | ||
3547 | while (1) { | 3547 | while (1) { |
3548 | rc->extents_found = 0; | 3548 | rc->extents_found = 0; |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 752a5463bf53..270cc96b9a43 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -405,8 +405,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
405 | return 0; | 405 | return 0; |
406 | } | 406 | } |
407 | 407 | ||
408 | btrfs_start_delalloc_inodes(root); | 408 | btrfs_start_delalloc_inodes(root, 0); |
409 | btrfs_wait_ordered_extents(root, 0); | 409 | btrfs_wait_ordered_extents(root, 0, 0); |
410 | 410 | ||
411 | trans = btrfs_start_transaction(root, 1); | 411 | trans = btrfs_start_transaction(root, 1); |
412 | ret = btrfs_commit_transaction(trans, root); | 412 | ret = btrfs_commit_transaction(trans, root); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 728e8fe5d2cc..75b31caade29 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
333 | memset(trans, 0, sizeof(*trans)); | 333 | memset(trans, 0, sizeof(*trans)); |
334 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 334 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
335 | 335 | ||
336 | if (throttle) | ||
337 | btrfs_run_delayed_iputs(root); | ||
338 | |||
336 | return 0; | 339 | return 0; |
337 | } | 340 | } |
338 | 341 | ||
@@ -991,11 +994,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
991 | mutex_unlock(&root->fs_info->trans_mutex); | 994 | mutex_unlock(&root->fs_info->trans_mutex); |
992 | 995 | ||
993 | if (flush_on_commit) { | 996 | if (flush_on_commit) { |
994 | btrfs_start_delalloc_inodes(root); | 997 | btrfs_start_delalloc_inodes(root, 1); |
995 | ret = btrfs_wait_ordered_extents(root, 0); | 998 | ret = btrfs_wait_ordered_extents(root, 0, 1); |
996 | BUG_ON(ret); | 999 | BUG_ON(ret); |
997 | } else if (snap_pending) { | 1000 | } else if (snap_pending) { |
998 | ret = btrfs_wait_ordered_extents(root, 1); | 1001 | ret = btrfs_wait_ordered_extents(root, 0, 1); |
999 | BUG_ON(ret); | 1002 | BUG_ON(ret); |
1000 | } | 1003 | } |
1001 | 1004 | ||
@@ -1113,6 +1116,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1113 | current->journal_info = NULL; | 1116 | current->journal_info = NULL; |
1114 | 1117 | ||
1115 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1118 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
1119 | |||
1120 | if (current != root->fs_info->transaction_kthread) | ||
1121 | btrfs_run_delayed_iputs(root); | ||
1122 | |||
1116 | return ret; | 1123 | return ret; |
1117 | } | 1124 | } |
1118 | 1125 | ||