aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2013-02-13 11:09:14 -0500
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 12:59:57 -0500
commit569e0f358c0c37f6733702d4a5d2c412860f7169 (patch)
tree8ab0ece9bd2716da66a43406628f6f5176604817
parentdde5740fdd6175fc95aecf4ccc7856fbbad9b44e (diff)
Btrfs: place ordered operations on a per transaction list
Miao made the ordered operations stuff run async, which introduced a deadlock where we could get somebody (sync) racing in and committing the transaction while a commit was already happening. The new committer would try and flush ordered operations which would hang waiting for the commit to finish because it is done asynchronously and no longer inherits the callers trans handle. To fix this we need to make the ordered operations list a per transaction list. We can get new inodes added to the ordered operation list by truncating them and then having another process writing to them, so this makes it so that anybody trying to add an ordered operation _must_ start a transaction in order to add itself to the list, which will keep new inodes from getting added to the ordered operations list after we start committing. This should fix the deadlock and also keeps us from doing a lot more work than we need to during commit. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
-rw-r--r--fs/btrfs/ctree.h7
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/file.c15
-rw-r--r--fs/btrfs/ordered-data.c13
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/transaction.h1
7 files changed, 34 insertions, 21 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 14f01dc70ea6..961ff2986341 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1408,13 +1408,6 @@ struct btrfs_fs_info {
1408 struct list_head delalloc_inodes; 1408 struct list_head delalloc_inodes;
1409 1409
1410 /* 1410 /*
1411 * special rename and truncate targets that must be on disk before
1412 * we're allowed to commit. This is basically the ext3 style
1413 * data=ordered list.
1414 */
1415 struct list_head ordered_operations;
1416
1417 /*
1418 * there is a pool of worker threads for checksumming during writes 1411 * there is a pool of worker threads for checksumming during writes
1419 * and a pool for checksumming after reads. This is because readers 1412 * and a pool for checksumming after reads. This is because readers
1420 * can run with FS locks held, and the writers may be waiting for 1413 * can run with FS locks held, and the writers may be waiting for
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9fcce54ecde4..e511d9f78c19 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
56static void free_fs_root(struct btrfs_root *root); 56static void free_fs_root(struct btrfs_root *root);
57static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 57static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
58 int read_only); 58 int read_only);
59static void btrfs_destroy_ordered_operations(struct btrfs_root *root); 59static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
60 struct btrfs_root *root);
60static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 61static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
61static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 62static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
62 struct btrfs_root *root); 63 struct btrfs_root *root);
@@ -2029,7 +2030,6 @@ int open_ctree(struct super_block *sb,
2029 INIT_LIST_HEAD(&fs_info->dead_roots); 2030 INIT_LIST_HEAD(&fs_info->dead_roots);
2030 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2031 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2031 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 2032 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
2032 INIT_LIST_HEAD(&fs_info->ordered_operations);
2033 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2033 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2034 spin_lock_init(&fs_info->delalloc_lock); 2034 spin_lock_init(&fs_info->delalloc_lock);
2035 spin_lock_init(&fs_info->trans_lock); 2035 spin_lock_init(&fs_info->trans_lock);
@@ -3538,7 +3538,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
3538 btrfs_cleanup_transaction(root); 3538 btrfs_cleanup_transaction(root);
3539} 3539}
3540 3540
3541static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3541static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3542 struct btrfs_root *root)
3542{ 3543{
3543 struct btrfs_inode *btrfs_inode; 3544 struct btrfs_inode *btrfs_inode;
3544 struct list_head splice; 3545 struct list_head splice;
@@ -3548,7 +3549,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3548 mutex_lock(&root->fs_info->ordered_operations_mutex); 3549 mutex_lock(&root->fs_info->ordered_operations_mutex);
3549 spin_lock(&root->fs_info->ordered_extent_lock); 3550 spin_lock(&root->fs_info->ordered_extent_lock);
3550 3551
3551 list_splice_init(&root->fs_info->ordered_operations, &splice); 3552 list_splice_init(&t->ordered_operations, &splice);
3552 while (!list_empty(&splice)) { 3553 while (!list_empty(&splice)) {
3553 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 3554 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3554 ordered_operations); 3555 ordered_operations);
@@ -3829,7 +3830,7 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3829 while (!list_empty(&list)) { 3830 while (!list_empty(&list)) {
3830 t = list_entry(list.next, struct btrfs_transaction, list); 3831 t = list_entry(list.next, struct btrfs_transaction, list);
3831 3832
3832 btrfs_destroy_ordered_operations(root); 3833 btrfs_destroy_ordered_operations(t, root);
3833 3834
3834 btrfs_destroy_ordered_extents(root); 3835 btrfs_destroy_ordered_extents(root);
3835 3836
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 75d0fe134be3..b12ba52c4505 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1628,7 +1628,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1628 */ 1628 */
1629 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 1629 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1630 &BTRFS_I(inode)->runtime_flags)) { 1630 &BTRFS_I(inode)->runtime_flags)) {
1631 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); 1631 struct btrfs_trans_handle *trans;
1632 struct btrfs_root *root = BTRFS_I(inode)->root;
1633
1634 /*
1635 * We need to block on a committing transaction to keep us from
1636 * throwing a ordered operation on to the list and causing
1637 * something like sync to deadlock trying to flush out this
1638 * inode.
1639 */
1640 trans = btrfs_start_transaction(root, 0);
1641 if (IS_ERR(trans))
1642 return PTR_ERR(trans);
1643 btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
1644 btrfs_end_transaction(trans, root);
1632 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 1645 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1633 filemap_flush(inode->i_mapping); 1646 filemap_flush(inode->i_mapping);
1634 } 1647 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9489fa96e3ed..dc08d77b717e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -612,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
612 * extra check to make sure the ordered operation list really is empty 612 * extra check to make sure the ordered operation list really is empty
613 * before we return 613 * before we return
614 */ 614 */
615int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) 615int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
616 struct btrfs_root *root, int wait)
616{ 617{
617 struct btrfs_inode *btrfs_inode; 618 struct btrfs_inode *btrfs_inode;
618 struct inode *inode; 619 struct inode *inode;
620 struct btrfs_transaction *cur_trans = trans->transaction;
619 struct list_head splice; 621 struct list_head splice;
620 struct list_head works; 622 struct list_head works;
621 struct btrfs_delalloc_work *work, *next; 623 struct btrfs_delalloc_work *work, *next;
@@ -626,7 +628,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
626 628
627 mutex_lock(&root->fs_info->ordered_operations_mutex); 629 mutex_lock(&root->fs_info->ordered_operations_mutex);
628 spin_lock(&root->fs_info->ordered_extent_lock); 630 spin_lock(&root->fs_info->ordered_extent_lock);
629 list_splice_init(&root->fs_info->ordered_operations, &splice); 631 list_splice_init(&cur_trans->ordered_operations, &splice);
630 while (!list_empty(&splice)) { 632 while (!list_empty(&splice)) {
631 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 633 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
632 ordered_operations); 634 ordered_operations);
@@ -643,7 +645,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
643 645
644 if (!wait) 646 if (!wait)
645 list_add_tail(&BTRFS_I(inode)->ordered_operations, 647 list_add_tail(&BTRFS_I(inode)->ordered_operations,
646 &root->fs_info->ordered_operations); 648 &cur_trans->ordered_operations);
647 spin_unlock(&root->fs_info->ordered_extent_lock); 649 spin_unlock(&root->fs_info->ordered_extent_lock);
648 650
649 work = btrfs_alloc_delalloc_work(inode, wait, 1); 651 work = btrfs_alloc_delalloc_work(inode, wait, 1);
@@ -653,7 +655,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
653 list_add_tail(&btrfs_inode->ordered_operations, 655 list_add_tail(&btrfs_inode->ordered_operations,
654 &splice); 656 &splice);
655 list_splice_tail(&splice, 657 list_splice_tail(&splice,
656 &root->fs_info->ordered_operations); 658 &cur_trans->ordered_operations);
657 spin_unlock(&root->fs_info->ordered_extent_lock); 659 spin_unlock(&root->fs_info->ordered_extent_lock);
658 ret = -ENOMEM; 660 ret = -ENOMEM;
659 goto out; 661 goto out;
@@ -1033,6 +1035,7 @@ out:
1033void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 1035void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1034 struct btrfs_root *root, struct inode *inode) 1036 struct btrfs_root *root, struct inode *inode)
1035{ 1037{
1038 struct btrfs_transaction *cur_trans = trans->transaction;
1036 u64 last_mod; 1039 u64 last_mod;
1037 1040
1038 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); 1041 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
@@ -1047,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1047 spin_lock(&root->fs_info->ordered_extent_lock); 1050 spin_lock(&root->fs_info->ordered_extent_lock);
1048 if (list_empty(&BTRFS_I(inode)->ordered_operations)) { 1051 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
1049 list_add_tail(&BTRFS_I(inode)->ordered_operations, 1052 list_add_tail(&BTRFS_I(inode)->ordered_operations,
1050 &root->fs_info->ordered_operations); 1053 &cur_trans->ordered_operations);
1051 } 1054 }
1052 spin_unlock(&root->fs_info->ordered_extent_lock); 1055 spin_unlock(&root->fs_info->ordered_extent_lock);
1053} 1056}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d523dbd2314d..267ac99095f6 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -197,7 +197,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
198 struct btrfs_ordered_extent *ordered); 198 struct btrfs_ordered_extent *ordered);
199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
200int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 200int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
201 struct btrfs_root *root, int wait);
201void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 202void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
202 struct btrfs_root *root, 203 struct btrfs_root *root,
203 struct inode *inode); 204 struct inode *inode);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d574d830a1c4..0c87d18d1881 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -157,6 +157,7 @@ loop:
157 spin_lock_init(&cur_trans->delayed_refs.lock); 157 spin_lock_init(&cur_trans->delayed_refs.lock);
158 158
159 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 159 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
160 INIT_LIST_HEAD(&cur_trans->ordered_operations);
160 list_add_tail(&cur_trans->list, &fs_info->trans_list); 161 list_add_tail(&cur_trans->list, &fs_info->trans_list);
161 extent_io_tree_init(&cur_trans->dirty_pages, 162 extent_io_tree_init(&cur_trans->dirty_pages,
162 fs_info->btree_inode->i_mapping); 163 fs_info->btree_inode->i_mapping);
@@ -1456,7 +1457,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1456 * it here and no for sure that nothing new will be added 1457 * it here and no for sure that nothing new will be added
1457 * to the list 1458 * to the list
1458 */ 1459 */
1459 ret = btrfs_run_ordered_operations(root, 1); 1460 ret = btrfs_run_ordered_operations(trans, root, 1);
1460 1461
1461 return ret; 1462 return ret;
1462} 1463}
@@ -1479,7 +1480,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1479 int should_grow = 0; 1480 int should_grow = 0;
1480 unsigned long now = get_seconds(); 1481 unsigned long now = get_seconds();
1481 1482
1482 ret = btrfs_run_ordered_operations(root, 0); 1483 ret = btrfs_run_ordered_operations(trans, root, 0);
1483 if (ret) { 1484 if (ret) {
1484 btrfs_abort_transaction(trans, root, ret); 1485 btrfs_abort_transaction(trans, root, ret);
1485 btrfs_end_transaction(trans, root); 1486 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 46628210e5d8..3f772fd0191a 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -43,6 +43,7 @@ struct btrfs_transaction {
43 wait_queue_head_t writer_wait; 43 wait_queue_head_t writer_wait;
44 wait_queue_head_t commit_wait; 44 wait_queue_head_t commit_wait;
45 struct list_head pending_snapshots; 45 struct list_head pending_snapshots;
46 struct list_head ordered_operations;
46 struct btrfs_delayed_ref_root delayed_refs; 47 struct btrfs_delayed_ref_root delayed_refs;
47 int aborted; 48 int aborted;
48}; 49};