diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 158 |
1 files changed, 137 insertions, 21 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4112d53d4f4d..2869b3361eb6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -53,8 +53,6 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
53 | GFP_NOFS); | 53 | GFP_NOFS); |
54 | BUG_ON(!cur_trans); | 54 | BUG_ON(!cur_trans); |
55 | root->fs_info->generation++; | 55 | root->fs_info->generation++; |
56 | root->fs_info->last_alloc = 0; | ||
57 | root->fs_info->last_data_alloc = 0; | ||
58 | cur_trans->num_writers = 1; | 56 | cur_trans->num_writers = 1; |
59 | cur_trans->num_joined = 0; | 57 | cur_trans->num_joined = 0; |
60 | cur_trans->transid = root->fs_info->generation; | 58 | cur_trans->transid = root->fs_info->generation; |
@@ -65,6 +63,15 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
65 | cur_trans->use_count = 1; | 63 | cur_trans->use_count = 1; |
66 | cur_trans->commit_done = 0; | 64 | cur_trans->commit_done = 0; |
67 | cur_trans->start_time = get_seconds(); | 65 | cur_trans->start_time = get_seconds(); |
66 | |||
67 | cur_trans->delayed_refs.root.rb_node = NULL; | ||
68 | cur_trans->delayed_refs.num_entries = 0; | ||
69 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
70 | cur_trans->delayed_refs.num_heads = 0; | ||
71 | cur_trans->delayed_refs.flushing = 0; | ||
72 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
73 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
74 | |||
68 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 75 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 76 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | extent_io_tree_init(&cur_trans->dirty_pages, | 77 | extent_io_tree_init(&cur_trans->dirty_pages, |
@@ -182,6 +189,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
182 | h->block_group = 0; | 189 | h->block_group = 0; |
183 | h->alloc_exclude_nr = 0; | 190 | h->alloc_exclude_nr = 0; |
184 | h->alloc_exclude_start = 0; | 191 | h->alloc_exclude_start = 0; |
192 | h->delayed_ref_updates = 0; | ||
193 | |||
185 | root->fs_info->running_transaction->use_count++; | 194 | root->fs_info->running_transaction->use_count++; |
186 | mutex_unlock(&root->fs_info->trans_mutex); | 195 | mutex_unlock(&root->fs_info->trans_mutex); |
187 | return h; | 196 | return h; |
@@ -271,7 +280,6 @@ void btrfs_throttle(struct btrfs_root *root) | |||
271 | if (!root->fs_info->open_ioctl_trans) | 280 | if (!root->fs_info->open_ioctl_trans) |
272 | wait_current_trans(root); | 281 | wait_current_trans(root); |
273 | mutex_unlock(&root->fs_info->trans_mutex); | 282 | mutex_unlock(&root->fs_info->trans_mutex); |
274 | |||
275 | throttle_on_drops(root); | 283 | throttle_on_drops(root); |
276 | } | 284 | } |
277 | 285 | ||
@@ -280,6 +288,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
280 | { | 288 | { |
281 | struct btrfs_transaction *cur_trans; | 289 | struct btrfs_transaction *cur_trans; |
282 | struct btrfs_fs_info *info = root->fs_info; | 290 | struct btrfs_fs_info *info = root->fs_info; |
291 | int count = 0; | ||
292 | |||
293 | while (count < 4) { | ||
294 | unsigned long cur = trans->delayed_ref_updates; | ||
295 | trans->delayed_ref_updates = 0; | ||
296 | if (cur && | ||
297 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
298 | trans->delayed_ref_updates = 0; | ||
299 | |||
300 | /* | ||
301 | * do a full flush if the transaction is trying | ||
302 | * to close | ||
303 | */ | ||
304 | if (trans->transaction->delayed_refs.flushing) | ||
305 | cur = 0; | ||
306 | btrfs_run_delayed_refs(trans, root, cur); | ||
307 | } else { | ||
308 | break; | ||
309 | } | ||
310 | count++; | ||
311 | } | ||
283 | 312 | ||
284 | mutex_lock(&info->trans_mutex); | 313 | mutex_lock(&info->trans_mutex); |
285 | cur_trans = info->running_transaction; | 314 | cur_trans = info->running_transaction; |
@@ -424,9 +453,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
424 | u64 old_root_bytenr; | 453 | u64 old_root_bytenr; |
425 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 454 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
426 | 455 | ||
427 | btrfs_extent_post_op(trans, root); | ||
428 | btrfs_write_dirty_block_groups(trans, root); | 456 | btrfs_write_dirty_block_groups(trans, root); |
429 | btrfs_extent_post_op(trans, root); | 457 | |
458 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
459 | BUG_ON(ret); | ||
430 | 460 | ||
431 | while (1) { | 461 | while (1) { |
432 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 462 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
@@ -438,14 +468,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
438 | btrfs_header_level(root->node)); | 468 | btrfs_header_level(root->node)); |
439 | btrfs_set_root_generation(&root->root_item, trans->transid); | 469 | btrfs_set_root_generation(&root->root_item, trans->transid); |
440 | 470 | ||
441 | btrfs_extent_post_op(trans, root); | ||
442 | |||
443 | ret = btrfs_update_root(trans, tree_root, | 471 | ret = btrfs_update_root(trans, tree_root, |
444 | &root->root_key, | 472 | &root->root_key, |
445 | &root->root_item); | 473 | &root->root_item); |
446 | BUG_ON(ret); | 474 | BUG_ON(ret); |
447 | btrfs_write_dirty_block_groups(trans, root); | 475 | btrfs_write_dirty_block_groups(trans, root); |
448 | btrfs_extent_post_op(trans, root); | 476 | |
477 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
478 | BUG_ON(ret); | ||
449 | } | 479 | } |
450 | return 0; | 480 | return 0; |
451 | } | 481 | } |
@@ -459,15 +489,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
459 | struct btrfs_fs_info *fs_info = root->fs_info; | 489 | struct btrfs_fs_info *fs_info = root->fs_info; |
460 | struct list_head *next; | 490 | struct list_head *next; |
461 | struct extent_buffer *eb; | 491 | struct extent_buffer *eb; |
492 | int ret; | ||
462 | 493 | ||
463 | btrfs_extent_post_op(trans, fs_info->tree_root); | 494 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
495 | BUG_ON(ret); | ||
464 | 496 | ||
465 | eb = btrfs_lock_root_node(fs_info->tree_root); | 497 | eb = btrfs_lock_root_node(fs_info->tree_root); |
466 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); | 498 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); |
467 | btrfs_tree_unlock(eb); | 499 | btrfs_tree_unlock(eb); |
468 | free_extent_buffer(eb); | 500 | free_extent_buffer(eb); |
469 | 501 | ||
470 | btrfs_extent_post_op(trans, fs_info->tree_root); | 502 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
503 | BUG_ON(ret); | ||
471 | 504 | ||
472 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 505 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
473 | next = fs_info->dirty_cowonly_roots.next; | 506 | next = fs_info->dirty_cowonly_roots.next; |
@@ -475,6 +508,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
475 | root = list_entry(next, struct btrfs_root, dirty_list); | 508 | root = list_entry(next, struct btrfs_root, dirty_list); |
476 | 509 | ||
477 | update_cowonly_root(trans, root); | 510 | update_cowonly_root(trans, root); |
511 | |||
512 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
513 | BUG_ON(ret); | ||
478 | } | 514 | } |
479 | return 0; | 515 | return 0; |
480 | } | 516 | } |
@@ -635,6 +671,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
635 | } | 671 | } |
636 | 672 | ||
637 | /* | 673 | /* |
674 | * when dropping snapshots, we generate a ton of delayed refs, and it makes | ||
675 | * sense not to join the transaction while it is trying to flush the current | ||
676 | * queue of delayed refs out. | ||
677 | * | ||
678 | * This is used by the drop snapshot code only | ||
679 | */ | ||
680 | static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | ||
681 | { | ||
682 | DEFINE_WAIT(wait); | ||
683 | |||
684 | mutex_lock(&info->trans_mutex); | ||
685 | while (info->running_transaction && | ||
686 | info->running_transaction->delayed_refs.flushing) { | ||
687 | prepare_to_wait(&info->transaction_wait, &wait, | ||
688 | TASK_UNINTERRUPTIBLE); | ||
689 | mutex_unlock(&info->trans_mutex); | ||
690 | schedule(); | ||
691 | mutex_lock(&info->trans_mutex); | ||
692 | finish_wait(&info->transaction_wait, &wait); | ||
693 | } | ||
694 | mutex_unlock(&info->trans_mutex); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /* | ||
638 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | 699 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on |
639 | * all of them | 700 | * all of them |
640 | */ | 701 | */ |
@@ -661,7 +722,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
661 | atomic_inc(&root->fs_info->throttles); | 722 | atomic_inc(&root->fs_info->throttles); |
662 | 723 | ||
663 | while (1) { | 724 | while (1) { |
725 | /* | ||
726 | * we don't want to jump in and create a bunch of | ||
727 | * delayed refs if the transaction is starting to close | ||
728 | */ | ||
729 | wait_transaction_pre_flush(tree_root->fs_info); | ||
664 | trans = btrfs_start_transaction(tree_root, 1); | 730 | trans = btrfs_start_transaction(tree_root, 1); |
731 | |||
732 | /* | ||
733 | * we've joined a transaction, make sure it isn't | ||
734 | * closing right now | ||
735 | */ | ||
736 | if (trans->transaction->delayed_refs.flushing) { | ||
737 | btrfs_end_transaction(trans, tree_root); | ||
738 | continue; | ||
739 | } | ||
740 | |||
665 | mutex_lock(&root->fs_info->drop_mutex); | 741 | mutex_lock(&root->fs_info->drop_mutex); |
666 | ret = btrfs_drop_snapshot(trans, dirty->root); | 742 | ret = btrfs_drop_snapshot(trans, dirty->root); |
667 | if (ret != -EAGAIN) | 743 | if (ret != -EAGAIN) |
@@ -766,7 +842,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
766 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 842 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
767 | 843 | ||
768 | old = btrfs_lock_root_node(root); | 844 | old = btrfs_lock_root_node(root); |
769 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | 845 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
770 | 846 | ||
771 | btrfs_copy_root(trans, root, old, &tmp, objectid); | 847 | btrfs_copy_root(trans, root, old, &tmp, objectid); |
772 | btrfs_tree_unlock(old); | 848 | btrfs_tree_unlock(old); |
@@ -894,12 +970,32 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
894 | struct extent_io_tree *pinned_copy; | 970 | struct extent_io_tree *pinned_copy; |
895 | DEFINE_WAIT(wait); | 971 | DEFINE_WAIT(wait); |
896 | int ret; | 972 | int ret; |
973 | int should_grow = 0; | ||
974 | unsigned long now = get_seconds(); | ||
975 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
976 | |||
977 | btrfs_run_ordered_operations(root, 0); | ||
978 | |||
979 | /* make a pass through all the delayed refs we have so far | ||
980 | * any runnings procs may add more while we are here | ||
981 | */ | ||
982 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
983 | BUG_ON(ret); | ||
984 | |||
985 | cur_trans = trans->transaction; | ||
986 | /* | ||
987 | * set the flushing flag so procs in this transaction have to | ||
988 | * start sending their work down. | ||
989 | */ | ||
990 | cur_trans->delayed_refs.flushing = 1; | ||
991 | |||
992 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
993 | BUG_ON(ret); | ||
897 | 994 | ||
898 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
899 | mutex_lock(&root->fs_info->trans_mutex); | 995 | mutex_lock(&root->fs_info->trans_mutex); |
900 | if (trans->transaction->in_commit) { | 996 | INIT_LIST_HEAD(&dirty_fs_roots); |
901 | cur_trans = trans->transaction; | 997 | if (cur_trans->in_commit) { |
902 | trans->transaction->use_count++; | 998 | cur_trans->use_count++; |
903 | mutex_unlock(&root->fs_info->trans_mutex); | 999 | mutex_unlock(&root->fs_info->trans_mutex); |
904 | btrfs_end_transaction(trans, root); | 1000 | btrfs_end_transaction(trans, root); |
905 | 1001 | ||
@@ -922,7 +1018,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
922 | 1018 | ||
923 | trans->transaction->in_commit = 1; | 1019 | trans->transaction->in_commit = 1; |
924 | trans->transaction->blocked = 1; | 1020 | trans->transaction->blocked = 1; |
925 | cur_trans = trans->transaction; | ||
926 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1021 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
927 | prev_trans = list_entry(cur_trans->list.prev, | 1022 | prev_trans = list_entry(cur_trans->list.prev, |
928 | struct btrfs_transaction, list); | 1023 | struct btrfs_transaction, list); |
@@ -937,6 +1032,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
937 | } | 1032 | } |
938 | } | 1033 | } |
939 | 1034 | ||
1035 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | ||
1036 | should_grow = 1; | ||
1037 | |||
940 | do { | 1038 | do { |
941 | int snap_pending = 0; | 1039 | int snap_pending = 0; |
942 | joined = cur_trans->num_joined; | 1040 | joined = cur_trans->num_joined; |
@@ -949,26 +1047,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
949 | 1047 | ||
950 | if (cur_trans->num_writers > 1) | 1048 | if (cur_trans->num_writers > 1) |
951 | timeout = MAX_SCHEDULE_TIMEOUT; | 1049 | timeout = MAX_SCHEDULE_TIMEOUT; |
952 | else | 1050 | else if (should_grow) |
953 | timeout = 1; | 1051 | timeout = 1; |
954 | 1052 | ||
955 | mutex_unlock(&root->fs_info->trans_mutex); | 1053 | mutex_unlock(&root->fs_info->trans_mutex); |
956 | 1054 | ||
957 | if (snap_pending) { | 1055 | if (flush_on_commit || snap_pending) { |
1056 | if (flush_on_commit) | ||
1057 | btrfs_start_delalloc_inodes(root); | ||
958 | ret = btrfs_wait_ordered_extents(root, 1); | 1058 | ret = btrfs_wait_ordered_extents(root, 1); |
959 | BUG_ON(ret); | 1059 | BUG_ON(ret); |
960 | } | 1060 | } |
961 | 1061 | ||
962 | schedule_timeout(timeout); | 1062 | /* |
1063 | * rename don't use btrfs_join_transaction, so, once we | ||
1064 | * set the transaction to blocked above, we aren't going | ||
1065 | * to get any new ordered operations. We can safely run | ||
1066 | * it here and no for sure that nothing new will be added | ||
1067 | * to the list | ||
1068 | */ | ||
1069 | btrfs_run_ordered_operations(root, 1); | ||
1070 | |||
1071 | smp_mb(); | ||
1072 | if (cur_trans->num_writers > 1 || should_grow) | ||
1073 | schedule_timeout(timeout); | ||
963 | 1074 | ||
964 | mutex_lock(&root->fs_info->trans_mutex); | 1075 | mutex_lock(&root->fs_info->trans_mutex); |
965 | finish_wait(&cur_trans->writer_wait, &wait); | 1076 | finish_wait(&cur_trans->writer_wait, &wait); |
966 | } while (cur_trans->num_writers > 1 || | 1077 | } while (cur_trans->num_writers > 1 || |
967 | (cur_trans->num_joined != joined)); | 1078 | (should_grow && cur_trans->num_joined != joined)); |
968 | 1079 | ||
969 | ret = create_pending_snapshots(trans, root->fs_info); | 1080 | ret = create_pending_snapshots(trans, root->fs_info); |
970 | BUG_ON(ret); | 1081 | BUG_ON(ret); |
971 | 1082 | ||
1083 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1084 | BUG_ON(ret); | ||
1085 | |||
972 | WARN_ON(cur_trans != trans->transaction); | 1086 | WARN_ON(cur_trans != trans->transaction); |
973 | 1087 | ||
974 | /* btrfs_commit_tree_roots is responsible for getting the | 1088 | /* btrfs_commit_tree_roots is responsible for getting the |
@@ -1032,6 +1146,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1032 | btrfs_copy_pinned(root, pinned_copy); | 1146 | btrfs_copy_pinned(root, pinned_copy); |
1033 | 1147 | ||
1034 | trans->transaction->blocked = 0; | 1148 | trans->transaction->blocked = 0; |
1149 | |||
1035 | wake_up(&root->fs_info->transaction_throttle); | 1150 | wake_up(&root->fs_info->transaction_throttle); |
1036 | wake_up(&root->fs_info->transaction_wait); | 1151 | wake_up(&root->fs_info->transaction_wait); |
1037 | 1152 | ||
@@ -1058,6 +1173,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1058 | mutex_lock(&root->fs_info->trans_mutex); | 1173 | mutex_lock(&root->fs_info->trans_mutex); |
1059 | 1174 | ||
1060 | cur_trans->commit_done = 1; | 1175 | cur_trans->commit_done = 1; |
1176 | |||
1061 | root->fs_info->last_trans_committed = cur_trans->transid; | 1177 | root->fs_info->last_trans_committed = cur_trans->transid; |
1062 | wake_up(&cur_trans->commit_wait); | 1178 | wake_up(&cur_trans->commit_wait); |
1063 | 1179 | ||