diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 151 |
1 files changed, 133 insertions, 18 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4112d53d4f4d..664782c6a2df 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -65,6 +65,15 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
65 | cur_trans->use_count = 1; | 65 | cur_trans->use_count = 1; |
66 | cur_trans->commit_done = 0; | 66 | cur_trans->commit_done = 0; |
67 | cur_trans->start_time = get_seconds(); | 67 | cur_trans->start_time = get_seconds(); |
68 | |||
69 | cur_trans->delayed_refs.root.rb_node = NULL; | ||
70 | cur_trans->delayed_refs.num_entries = 0; | ||
71 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
72 | cur_trans->delayed_refs.num_heads = 0; | ||
73 | cur_trans->delayed_refs.flushing = 0; | ||
74 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
75 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
76 | |||
68 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 77 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 78 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | extent_io_tree_init(&cur_trans->dirty_pages, | 79 | extent_io_tree_init(&cur_trans->dirty_pages, |
@@ -182,6 +191,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
182 | h->block_group = 0; | 191 | h->block_group = 0; |
183 | h->alloc_exclude_nr = 0; | 192 | h->alloc_exclude_nr = 0; |
184 | h->alloc_exclude_start = 0; | 193 | h->alloc_exclude_start = 0; |
194 | h->delayed_ref_updates = 0; | ||
195 | |||
185 | root->fs_info->running_transaction->use_count++; | 196 | root->fs_info->running_transaction->use_count++; |
186 | mutex_unlock(&root->fs_info->trans_mutex); | 197 | mutex_unlock(&root->fs_info->trans_mutex); |
187 | return h; | 198 | return h; |
@@ -271,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root) | |||
271 | if (!root->fs_info->open_ioctl_trans) | 282 | if (!root->fs_info->open_ioctl_trans) |
272 | wait_current_trans(root); | 283 | wait_current_trans(root); |
273 | mutex_unlock(&root->fs_info->trans_mutex); | 284 | mutex_unlock(&root->fs_info->trans_mutex); |
274 | |||
275 | throttle_on_drops(root); | 285 | throttle_on_drops(root); |
276 | } | 286 | } |
277 | 287 | ||
@@ -280,6 +290,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
280 | { | 290 | { |
281 | struct btrfs_transaction *cur_trans; | 291 | struct btrfs_transaction *cur_trans; |
282 | struct btrfs_fs_info *info = root->fs_info; | 292 | struct btrfs_fs_info *info = root->fs_info; |
293 | int count = 0; | ||
294 | |||
295 | while (count < 4) { | ||
296 | unsigned long cur = trans->delayed_ref_updates; | ||
297 | trans->delayed_ref_updates = 0; | ||
298 | if (cur && | ||
299 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
300 | trans->delayed_ref_updates = 0; | ||
301 | |||
302 | /* | ||
303 | * do a full flush if the transaction is trying | ||
304 | * to close | ||
305 | */ | ||
306 | if (trans->transaction->delayed_refs.flushing) | ||
307 | cur = 0; | ||
308 | btrfs_run_delayed_refs(trans, root, cur); | ||
309 | } else { | ||
310 | break; | ||
311 | } | ||
312 | count++; | ||
313 | } | ||
283 | 314 | ||
284 | mutex_lock(&info->trans_mutex); | 315 | mutex_lock(&info->trans_mutex); |
285 | cur_trans = info->running_transaction; | 316 | cur_trans = info->running_transaction; |
@@ -424,9 +455,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
424 | u64 old_root_bytenr; | 455 | u64 old_root_bytenr; |
425 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 456 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
426 | 457 | ||
427 | btrfs_extent_post_op(trans, root); | ||
428 | btrfs_write_dirty_block_groups(trans, root); | 458 | btrfs_write_dirty_block_groups(trans, root); |
429 | btrfs_extent_post_op(trans, root); | 459 | |
460 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
461 | BUG_ON(ret); | ||
430 | 462 | ||
431 | while (1) { | 463 | while (1) { |
432 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 464 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
@@ -438,14 +470,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
438 | btrfs_header_level(root->node)); | 470 | btrfs_header_level(root->node)); |
439 | btrfs_set_root_generation(&root->root_item, trans->transid); | 471 | btrfs_set_root_generation(&root->root_item, trans->transid); |
440 | 472 | ||
441 | btrfs_extent_post_op(trans, root); | ||
442 | |||
443 | ret = btrfs_update_root(trans, tree_root, | 473 | ret = btrfs_update_root(trans, tree_root, |
444 | &root->root_key, | 474 | &root->root_key, |
445 | &root->root_item); | 475 | &root->root_item); |
446 | BUG_ON(ret); | 476 | BUG_ON(ret); |
447 | btrfs_write_dirty_block_groups(trans, root); | 477 | btrfs_write_dirty_block_groups(trans, root); |
448 | btrfs_extent_post_op(trans, root); | 478 | |
479 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
480 | BUG_ON(ret); | ||
449 | } | 481 | } |
450 | return 0; | 482 | return 0; |
451 | } | 483 | } |
@@ -459,15 +491,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
459 | struct btrfs_fs_info *fs_info = root->fs_info; | 491 | struct btrfs_fs_info *fs_info = root->fs_info; |
460 | struct list_head *next; | 492 | struct list_head *next; |
461 | struct extent_buffer *eb; | 493 | struct extent_buffer *eb; |
494 | int ret; | ||
462 | 495 | ||
463 | btrfs_extent_post_op(trans, fs_info->tree_root); | 496 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
497 | BUG_ON(ret); | ||
464 | 498 | ||
465 | eb = btrfs_lock_root_node(fs_info->tree_root); | 499 | eb = btrfs_lock_root_node(fs_info->tree_root); |
466 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); | 500 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); |
467 | btrfs_tree_unlock(eb); | 501 | btrfs_tree_unlock(eb); |
468 | free_extent_buffer(eb); | 502 | free_extent_buffer(eb); |
469 | 503 | ||
470 | btrfs_extent_post_op(trans, fs_info->tree_root); | 504 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
505 | BUG_ON(ret); | ||
471 | 506 | ||
472 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 507 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
473 | next = fs_info->dirty_cowonly_roots.next; | 508 | next = fs_info->dirty_cowonly_roots.next; |
@@ -475,6 +510,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
475 | root = list_entry(next, struct btrfs_root, dirty_list); | 510 | root = list_entry(next, struct btrfs_root, dirty_list); |
476 | 511 | ||
477 | update_cowonly_root(trans, root); | 512 | update_cowonly_root(trans, root); |
513 | |||
514 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
515 | BUG_ON(ret); | ||
478 | } | 516 | } |
479 | return 0; | 517 | return 0; |
480 | } | 518 | } |
@@ -635,6 +673,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
635 | } | 673 | } |
636 | 674 | ||
637 | /* | 675 | /* |
676 | * when dropping snapshots, we generate a ton of delayed refs, and it makes | ||
677 | * sense not to join the transaction while it is trying to flush the current | ||
678 | * queue of delayed refs out. | ||
679 | * | ||
680 | * This is used by the drop snapshot code only | ||
681 | */ | ||
682 | static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | ||
683 | { | ||
684 | DEFINE_WAIT(wait); | ||
685 | |||
686 | mutex_lock(&info->trans_mutex); | ||
687 | while (info->running_transaction && | ||
688 | info->running_transaction->delayed_refs.flushing) { | ||
689 | prepare_to_wait(&info->transaction_wait, &wait, | ||
690 | TASK_UNINTERRUPTIBLE); | ||
691 | mutex_unlock(&info->trans_mutex); | ||
692 | schedule(); | ||
693 | mutex_lock(&info->trans_mutex); | ||
694 | finish_wait(&info->transaction_wait, &wait); | ||
695 | } | ||
696 | mutex_unlock(&info->trans_mutex); | ||
697 | return 0; | ||
698 | } | ||
699 | |||
700 | /* | ||
638 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | 701 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on |
639 | * all of them | 702 | * all of them |
640 | */ | 703 | */ |
@@ -661,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
661 | atomic_inc(&root->fs_info->throttles); | 724 | atomic_inc(&root->fs_info->throttles); |
662 | 725 | ||
663 | while (1) { | 726 | while (1) { |
727 | /* | ||
728 | * we don't want to jump in and create a bunch of | ||
729 | * delayed refs if the transaction is starting to close | ||
730 | */ | ||
731 | wait_transaction_pre_flush(tree_root->fs_info); | ||
664 | trans = btrfs_start_transaction(tree_root, 1); | 732 | trans = btrfs_start_transaction(tree_root, 1); |
733 | |||
734 | /* | ||
735 | * we've joined a transaction, make sure it isn't | ||
736 | * closing right now | ||
737 | */ | ||
738 | if (trans->transaction->delayed_refs.flushing) { | ||
739 | btrfs_end_transaction(trans, tree_root); | ||
740 | continue; | ||
741 | } | ||
742 | |||
665 | mutex_lock(&root->fs_info->drop_mutex); | 743 | mutex_lock(&root->fs_info->drop_mutex); |
666 | ret = btrfs_drop_snapshot(trans, dirty->root); | 744 | ret = btrfs_drop_snapshot(trans, dirty->root); |
667 | if (ret != -EAGAIN) | 745 | if (ret != -EAGAIN) |
@@ -766,7 +844,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
766 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 844 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
767 | 845 | ||
768 | old = btrfs_lock_root_node(root); | 846 | old = btrfs_lock_root_node(root); |
769 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | 847 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
770 | 848 | ||
771 | btrfs_copy_root(trans, root, old, &tmp, objectid); | 849 | btrfs_copy_root(trans, root, old, &tmp, objectid); |
772 | btrfs_tree_unlock(old); | 850 | btrfs_tree_unlock(old); |
@@ -894,12 +972,31 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
894 | struct extent_io_tree *pinned_copy; | 972 | struct extent_io_tree *pinned_copy; |
895 | DEFINE_WAIT(wait); | 973 | DEFINE_WAIT(wait); |
896 | int ret; | 974 | int ret; |
975 | int should_grow = 0; | ||
976 | unsigned long now = get_seconds(); | ||
977 | |||
978 | btrfs_run_ordered_operations(root, 0); | ||
979 | |||
980 | /* make a pass through all the delayed refs we have so far | ||
981 | * any runnings procs may add more while we are here | ||
982 | */ | ||
983 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
984 | BUG_ON(ret); | ||
985 | |||
986 | cur_trans = trans->transaction; | ||
987 | /* | ||
988 | * set the flushing flag so procs in this transaction have to | ||
989 | * start sending their work down. | ||
990 | */ | ||
991 | cur_trans->delayed_refs.flushing = 1; | ||
992 | |||
993 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
994 | BUG_ON(ret); | ||
897 | 995 | ||
898 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
899 | mutex_lock(&root->fs_info->trans_mutex); | 996 | mutex_lock(&root->fs_info->trans_mutex); |
900 | if (trans->transaction->in_commit) { | 997 | INIT_LIST_HEAD(&dirty_fs_roots); |
901 | cur_trans = trans->transaction; | 998 | if (cur_trans->in_commit) { |
902 | trans->transaction->use_count++; | 999 | cur_trans->use_count++; |
903 | mutex_unlock(&root->fs_info->trans_mutex); | 1000 | mutex_unlock(&root->fs_info->trans_mutex); |
904 | btrfs_end_transaction(trans, root); | 1001 | btrfs_end_transaction(trans, root); |
905 | 1002 | ||
@@ -922,7 +1019,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
922 | 1019 | ||
923 | trans->transaction->in_commit = 1; | 1020 | trans->transaction->in_commit = 1; |
924 | trans->transaction->blocked = 1; | 1021 | trans->transaction->blocked = 1; |
925 | cur_trans = trans->transaction; | ||
926 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1022 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
927 | prev_trans = list_entry(cur_trans->list.prev, | 1023 | prev_trans = list_entry(cur_trans->list.prev, |
928 | struct btrfs_transaction, list); | 1024 | struct btrfs_transaction, list); |
@@ -937,6 +1033,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
937 | } | 1033 | } |
938 | } | 1034 | } |
939 | 1035 | ||
1036 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | ||
1037 | should_grow = 1; | ||
1038 | |||
940 | do { | 1039 | do { |
941 | int snap_pending = 0; | 1040 | int snap_pending = 0; |
942 | joined = cur_trans->num_joined; | 1041 | joined = cur_trans->num_joined; |
@@ -949,7 +1048,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
949 | 1048 | ||
950 | if (cur_trans->num_writers > 1) | 1049 | if (cur_trans->num_writers > 1) |
951 | timeout = MAX_SCHEDULE_TIMEOUT; | 1050 | timeout = MAX_SCHEDULE_TIMEOUT; |
952 | else | 1051 | else if (should_grow) |
953 | timeout = 1; | 1052 | timeout = 1; |
954 | 1053 | ||
955 | mutex_unlock(&root->fs_info->trans_mutex); | 1054 | mutex_unlock(&root->fs_info->trans_mutex); |
@@ -959,16 +1058,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
959 | BUG_ON(ret); | 1058 | BUG_ON(ret); |
960 | } | 1059 | } |
961 | 1060 | ||
962 | schedule_timeout(timeout); | 1061 | /* |
1062 | * rename don't use btrfs_join_transaction, so, once we | ||
1063 | * set the transaction to blocked above, we aren't going | ||
1064 | * to get any new ordered operations. We can safely run | ||
1065 | * it here and no for sure that nothing new will be added | ||
1066 | * to the list | ||
1067 | */ | ||
1068 | btrfs_run_ordered_operations(root, 1); | ||
1069 | |||
1070 | smp_mb(); | ||
1071 | if (cur_trans->num_writers > 1 || should_grow) | ||
1072 | schedule_timeout(timeout); | ||
963 | 1073 | ||
964 | mutex_lock(&root->fs_info->trans_mutex); | 1074 | mutex_lock(&root->fs_info->trans_mutex); |
965 | finish_wait(&cur_trans->writer_wait, &wait); | 1075 | finish_wait(&cur_trans->writer_wait, &wait); |
966 | } while (cur_trans->num_writers > 1 || | 1076 | } while (cur_trans->num_writers > 1 || |
967 | (cur_trans->num_joined != joined)); | 1077 | (should_grow && cur_trans->num_joined != joined)); |
968 | 1078 | ||
969 | ret = create_pending_snapshots(trans, root->fs_info); | 1079 | ret = create_pending_snapshots(trans, root->fs_info); |
970 | BUG_ON(ret); | 1080 | BUG_ON(ret); |
971 | 1081 | ||
1082 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1083 | BUG_ON(ret); | ||
1084 | |||
972 | WARN_ON(cur_trans != trans->transaction); | 1085 | WARN_ON(cur_trans != trans->transaction); |
973 | 1086 | ||
974 | /* btrfs_commit_tree_roots is responsible for getting the | 1087 | /* btrfs_commit_tree_roots is responsible for getting the |
@@ -1032,6 +1145,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1032 | btrfs_copy_pinned(root, pinned_copy); | 1145 | btrfs_copy_pinned(root, pinned_copy); |
1033 | 1146 | ||
1034 | trans->transaction->blocked = 0; | 1147 | trans->transaction->blocked = 0; |
1148 | |||
1035 | wake_up(&root->fs_info->transaction_throttle); | 1149 | wake_up(&root->fs_info->transaction_throttle); |
1036 | wake_up(&root->fs_info->transaction_wait); | 1150 | wake_up(&root->fs_info->transaction_wait); |
1037 | 1151 | ||
@@ -1058,6 +1172,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1058 | mutex_lock(&root->fs_info->trans_mutex); | 1172 | mutex_lock(&root->fs_info->trans_mutex); |
1059 | 1173 | ||
1060 | cur_trans->commit_done = 1; | 1174 | cur_trans->commit_done = 1; |
1175 | |||
1061 | root->fs_info->last_trans_committed = cur_trans->transid; | 1176 | root->fs_info->last_trans_committed = cur_trans->transid; |
1062 | wake_up(&cur_trans->commit_wait); | 1177 | wake_up(&cur_trans->commit_wait); |
1063 | 1178 | ||