aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-12 20:12:45 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:26 -0400
commitb7ec40d7845bffca8bb3af2ea3f192d6257bbe21 (patch)
tree65b833b979417d36f0fd26d647573de1df0646b9
parentc3e69d58e86c3917ae4e9e31b4acf490a7cafe60 (diff)
Btrfs: reduce stalls during transaction commit
To avoid deadlocks and reduce latencies during some critical operations, some transaction writers are allowed to jump into the running transaction and make it run a little longer, while others sit around and wait for the commit to finish. This is a bit unfair, especially when the callers that jump in do a bunch of IO that makes all the others procs on the box wait. This commit reduces the stalls this produces by pre-reading file extent pointers during btrfs_finish_ordered_io before the transaction is joined. It also tunes the drop_snapshot code to politely wait for transactions that have started writing out their delayed refs to finish. This avoids new delayed refs being flooded into the queue while we're trying to close off the transaction. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/btrfs/transaction.c62
-rw-r--r--fs/btrfs/transaction.h5
4 files changed, 80 insertions, 8 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3b8b6c212701..a421c32c6cfe 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3797,7 +3797,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3797 break; 3797 break;
3798 if (wret < 0) 3798 if (wret < 0)
3799 ret = wret; 3799 ret = wret;
3800 if (trans->transaction->in_commit) { 3800 if (trans->transaction->in_commit ||
3801 trans->transaction->delayed_refs.flushing) {
3801 ret = -EAGAIN; 3802 ret = -EAGAIN;
3802 break; 3803 break;
3803 } 3804 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7d4f948bc22a..13a17477c4f4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1502,6 +1502,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1502 struct btrfs_trans_handle *trans; 1502 struct btrfs_trans_handle *trans;
1503 struct btrfs_ordered_extent *ordered_extent; 1503 struct btrfs_ordered_extent *ordered_extent;
1504 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1504 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1505 struct btrfs_path *path;
1505 int compressed = 0; 1506 int compressed = 0;
1506 int ret; 1507 int ret;
1507 1508
@@ -1509,6 +1510,23 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1509 if (!ret) 1510 if (!ret)
1510 return 0; 1511 return 0;
1511 1512
1513 /*
1514 * before we join the transaction, try to do some of our IO.
1515 * This will limit the amount of IO that we have to do with
1516 * the transaction running. We're unlikely to need to do any
1517 * IO if the file extents are new, the disk_i_size checks
1518 * covers the most common case.
1519 */
1520 if (start < BTRFS_I(inode)->disk_i_size) {
1521 path = btrfs_alloc_path();
1522 if (path) {
1523 ret = btrfs_lookup_file_extent(NULL, root, path,
1524 inode->i_ino,
1525 start, 0);
1526 btrfs_free_path(path);
1527 }
1528 }
1529
1512 trans = btrfs_join_transaction(root, 1); 1530 trans = btrfs_join_transaction(root, 1);
1513 1531
1514 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1532 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 903edab3659a..01c9620bb001 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -192,6 +192,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
192 h->alloc_exclude_nr = 0; 192 h->alloc_exclude_nr = 0;
193 h->alloc_exclude_start = 0; 193 h->alloc_exclude_start = 0;
194 h->delayed_ref_updates = 0; 194 h->delayed_ref_updates = 0;
195
195 root->fs_info->running_transaction->use_count++; 196 root->fs_info->running_transaction->use_count++;
196 mutex_unlock(&root->fs_info->trans_mutex); 197 mutex_unlock(&root->fs_info->trans_mutex);
197 return h; 198 return h;
@@ -281,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root)
281 if (!root->fs_info->open_ioctl_trans) 282 if (!root->fs_info->open_ioctl_trans)
282 wait_current_trans(root); 283 wait_current_trans(root);
283 mutex_unlock(&root->fs_info->trans_mutex); 284 mutex_unlock(&root->fs_info->trans_mutex);
284
285 throttle_on_drops(root); 285 throttle_on_drops(root);
286} 286}
287 287
@@ -298,6 +298,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
298 if (cur && 298 if (cur &&
299 trans->transaction->delayed_refs.num_heads_ready > 64) { 299 trans->transaction->delayed_refs.num_heads_ready > 64) {
300 trans->delayed_ref_updates = 0; 300 trans->delayed_ref_updates = 0;
301
302 /*
303 * do a full flush if the transaction is trying
304 * to close
305 */
306 if (trans->transaction->delayed_refs.flushing)
307 cur = 0;
301 btrfs_run_delayed_refs(trans, root, cur); 308 btrfs_run_delayed_refs(trans, root, cur);
302 } else { 309 } else {
303 break; 310 break;
@@ -666,6 +673,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
666} 673}
667 674
668/* 675/*
676 * when dropping snapshots, we generate a ton of delayed refs, and it makes
677 * sense not to join the transaction while it is trying to flush the current
678 * queue of delayed refs out.
679 *
680 * This is used by the drop snapshot code only
681 */
682static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
683{
684 DEFINE_WAIT(wait);
685
686 mutex_lock(&info->trans_mutex);
687 while (info->running_transaction &&
688 info->running_transaction->delayed_refs.flushing) {
689 prepare_to_wait(&info->transaction_wait, &wait,
690 TASK_UNINTERRUPTIBLE);
691 mutex_unlock(&info->trans_mutex);
692 schedule();
693 mutex_lock(&info->trans_mutex);
694 finish_wait(&info->transaction_wait, &wait);
695 }
696 mutex_unlock(&info->trans_mutex);
697 return 0;
698}
699
700/*
669 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 701 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
670 * all of them 702 * all of them
671 */ 703 */
@@ -692,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
692 atomic_inc(&root->fs_info->throttles); 724 atomic_inc(&root->fs_info->throttles);
693 725
694 while (1) { 726 while (1) {
727 /*
728 * we don't want to jump in and create a bunch of
729 * delayed refs if the transaction is starting to close
730 */
731 wait_transaction_pre_flush(tree_root->fs_info);
695 trans = btrfs_start_transaction(tree_root, 1); 732 trans = btrfs_start_transaction(tree_root, 1);
733
734 /*
735 * we've joined a transaction, make sure it isn't
736 * closing right now
737 */
738 if (trans->transaction->delayed_refs.flushing) {
739 btrfs_end_transaction(trans, tree_root);
740 continue;
741 }
742
696 mutex_lock(&root->fs_info->drop_mutex); 743 mutex_lock(&root->fs_info->drop_mutex);
697 ret = btrfs_drop_snapshot(trans, dirty->root); 744 ret = btrfs_drop_snapshot(trans, dirty->root);
698 if (ret != -EAGAIN) 745 if (ret != -EAGAIN)
@@ -932,20 +979,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
932 ret = btrfs_run_delayed_refs(trans, root, 0); 979 ret = btrfs_run_delayed_refs(trans, root, 0);
933 BUG_ON(ret); 980 BUG_ON(ret);
934 981
982 cur_trans = trans->transaction;
935 /* 983 /*
936 * set the flushing flag so procs in this transaction have to 984 * set the flushing flag so procs in this transaction have to
937 * start sending their work down. 985 * start sending their work down.
938 */ 986 */
939 trans->transaction->delayed_refs.flushing = 1; 987 cur_trans->delayed_refs.flushing = 1;
940 988
941 ret = btrfs_run_delayed_refs(trans, root, 0); 989 ret = btrfs_run_delayed_refs(trans, root, 0);
942 BUG_ON(ret); 990 BUG_ON(ret);
943 991
944 INIT_LIST_HEAD(&dirty_fs_roots);
945 mutex_lock(&root->fs_info->trans_mutex); 992 mutex_lock(&root->fs_info->trans_mutex);
946 if (trans->transaction->in_commit) { 993 INIT_LIST_HEAD(&dirty_fs_roots);
947 cur_trans = trans->transaction; 994 if (cur_trans->in_commit) {
948 trans->transaction->use_count++; 995 cur_trans->use_count++;
949 mutex_unlock(&root->fs_info->trans_mutex); 996 mutex_unlock(&root->fs_info->trans_mutex);
950 btrfs_end_transaction(trans, root); 997 btrfs_end_transaction(trans, root);
951 998
@@ -968,7 +1015,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
968 1015
969 trans->transaction->in_commit = 1; 1016 trans->transaction->in_commit = 1;
970 trans->transaction->blocked = 1; 1017 trans->transaction->blocked = 1;
971 cur_trans = trans->transaction;
972 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1018 if (cur_trans->list.prev != &root->fs_info->trans_list) {
973 prev_trans = list_entry(cur_trans->list.prev, 1019 prev_trans = list_entry(cur_trans->list.prev,
974 struct btrfs_transaction, list); 1020 struct btrfs_transaction, list);
@@ -1081,6 +1127,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1081 btrfs_copy_pinned(root, pinned_copy); 1127 btrfs_copy_pinned(root, pinned_copy);
1082 1128
1083 trans->transaction->blocked = 0; 1129 trans->transaction->blocked = 0;
1130
1084 wake_up(&root->fs_info->transaction_throttle); 1131 wake_up(&root->fs_info->transaction_throttle);
1085 wake_up(&root->fs_info->transaction_wait); 1132 wake_up(&root->fs_info->transaction_wait);
1086 1133
@@ -1107,6 +1154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1107 mutex_lock(&root->fs_info->trans_mutex); 1154 mutex_lock(&root->fs_info->trans_mutex);
1108 1155
1109 cur_trans->commit_done = 1; 1156 cur_trans->commit_done = 1;
1157
1110 root->fs_info->last_trans_committed = cur_trans->transid; 1158 root->fs_info->last_trans_committed = cur_trans->transid;
1111 wake_up(&cur_trans->commit_wait); 1159 wake_up(&cur_trans->commit_wait);
1112 1160
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94876709217f..94f5bde2b58d 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -23,7 +23,12 @@
23 23
24struct btrfs_transaction { 24struct btrfs_transaction {
25 u64 transid; 25 u64 transid;
26 /*
27 * total writers in this transaction, it must be zero before the
28 * transaction can end
29 */
26 unsigned long num_writers; 30 unsigned long num_writers;
31
27 unsigned long num_joined; 32 unsigned long num_joined;
28 int in_commit; 33 int in_commit;
29 int use_count; 34 int use_count;