diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 149 |
1 files changed, 98 insertions, 51 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8471c79b0877..3b8b6c212701 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -926,52 +926,41 @@ again: | |||
926 | return NULL; | 926 | return NULL; |
927 | } | 927 | } |
928 | 928 | ||
929 | /* | 929 | static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, |
930 | * this starts processing the delayed reference count updates and | 930 | struct btrfs_root *root, |
931 | * extent insertions we have queued up so far. count can be | 931 | struct list_head *cluster) |
932 | * 0, which means to process everything in the tree at the start | ||
933 | * of the run (but not newly added entries), or it can be some target | ||
934 | * number you'd like to process. | ||
935 | */ | ||
936 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
937 | struct btrfs_root *root, unsigned long count) | ||
938 | { | 932 | { |
939 | struct rb_node *node; | ||
940 | struct btrfs_delayed_ref_root *delayed_refs; | 933 | struct btrfs_delayed_ref_root *delayed_refs; |
941 | struct btrfs_delayed_ref_node *ref; | 934 | struct btrfs_delayed_ref_node *ref; |
942 | struct btrfs_delayed_ref_head *locked_ref = NULL; | 935 | struct btrfs_delayed_ref_head *locked_ref = NULL; |
943 | int ret; | 936 | int ret; |
937 | int count = 0; | ||
944 | int must_insert_reserved = 0; | 938 | int must_insert_reserved = 0; |
945 | int run_all = count == (unsigned long)-1; | ||
946 | |||
947 | if (root == root->fs_info->extent_root) | ||
948 | root = root->fs_info->tree_root; | ||
949 | 939 | ||
950 | delayed_refs = &trans->transaction->delayed_refs; | 940 | delayed_refs = &trans->transaction->delayed_refs; |
951 | again: | ||
952 | spin_lock(&delayed_refs->lock); | ||
953 | if (count == 0) | ||
954 | count = delayed_refs->num_entries; | ||
955 | while (1) { | 941 | while (1) { |
956 | if (!locked_ref) { | 942 | if (!locked_ref) { |
957 | /* | 943 | /* pick a new head ref from the cluster list */ |
958 | * no locked ref, go find something we can | 944 | if (list_empty(cluster)) |
959 | * process in the rbtree. We start at | ||
960 | * the beginning of the tree, there may be less | ||
961 | * lock contention if we do something smarter here. | ||
962 | */ | ||
963 | node = rb_first(&delayed_refs->root); | ||
964 | if (!node) { | ||
965 | spin_unlock(&delayed_refs->lock); | ||
966 | break; | 945 | break; |
967 | } | ||
968 | 946 | ||
969 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | 947 | locked_ref = list_entry(cluster->next, |
970 | rb_node); | 948 | struct btrfs_delayed_ref_head, cluster); |
971 | ret = btrfs_lock_delayed_ref(trans, ref, &locked_ref); | 949 | |
972 | if (ret) { | 950 | /* grab the lock that says we are going to process |
973 | spin_unlock(&delayed_refs->lock); | 951 | * all the refs for this head */ |
974 | break; | 952 | ret = btrfs_delayed_ref_lock(trans, locked_ref); |
953 | |||
954 | /* | ||
955 | * we may have dropped the spin lock to get the head | ||
956 | * mutex lock, and that might have given someone else | ||
957 | * time to free the head. If that's true, it has been | ||
958 | * removed from our list and we can move on. | ||
959 | */ | ||
960 | if (ret == -EAGAIN) { | ||
961 | locked_ref = NULL; | ||
962 | count++; | ||
963 | continue; | ||
975 | } | 964 | } |
976 | } | 965 | } |
977 | 966 | ||
@@ -986,7 +975,6 @@ again: | |||
986 | * locked_ref is the head node, so we have to go one | 975 | * locked_ref is the head node, so we have to go one |
987 | * node back for any delayed ref updates | 976 | * node back for any delayed ref updates |
988 | */ | 977 | */ |
989 | |||
990 | ref = select_delayed_ref(locked_ref); | 978 | ref = select_delayed_ref(locked_ref); |
991 | if (!ref) { | 979 | if (!ref) { |
992 | /* All delayed refs have been processed, Go ahead | 980 | /* All delayed refs have been processed, Go ahead |
@@ -994,6 +982,7 @@ again: | |||
994 | * so that any accounting fixes can happen | 982 | * so that any accounting fixes can happen |
995 | */ | 983 | */ |
996 | ref = &locked_ref->node; | 984 | ref = &locked_ref->node; |
985 | list_del_init(&locked_ref->cluster); | ||
997 | locked_ref = NULL; | 986 | locked_ref = NULL; |
998 | } | 987 | } |
999 | 988 | ||
@@ -1007,30 +996,72 @@ again: | |||
1007 | BUG_ON(ret); | 996 | BUG_ON(ret); |
1008 | btrfs_put_delayed_ref(ref); | 997 | btrfs_put_delayed_ref(ref); |
1009 | 998 | ||
1010 | /* once we lock the head ref, we have to process all the | 999 | count++; |
1011 | * entries for it. So, we might end up doing more entries | 1000 | cond_resched(); |
1012 | * that count was asking us to do. | 1001 | spin_lock(&delayed_refs->lock); |
1013 | */ | 1002 | } |
1014 | if (count > 0) | 1003 | return count; |
1015 | count--; | 1004 | } |
1005 | |||
1006 | /* | ||
1007 | * this starts processing the delayed reference count updates and | ||
1008 | * extent insertions we have queued up so far. count can be | ||
1009 | * 0, which means to process everything in the tree at the start | ||
1010 | * of the run (but not newly added entries), or it can be some target | ||
1011 | * number you'd like to process. | ||
1012 | */ | ||
1013 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1014 | struct btrfs_root *root, unsigned long count) | ||
1015 | { | ||
1016 | struct rb_node *node; | ||
1017 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1018 | struct btrfs_delayed_ref_node *ref; | ||
1019 | struct list_head cluster; | ||
1020 | int ret; | ||
1021 | int run_all = count == (unsigned long)-1; | ||
1022 | int run_most = 0; | ||
1023 | |||
1024 | if (root == root->fs_info->extent_root) | ||
1025 | root = root->fs_info->tree_root; | ||
1026 | |||
1027 | delayed_refs = &trans->transaction->delayed_refs; | ||
1028 | INIT_LIST_HEAD(&cluster); | ||
1029 | again: | ||
1030 | spin_lock(&delayed_refs->lock); | ||
1031 | if (count == 0) { | ||
1032 | count = delayed_refs->num_entries * 2; | ||
1033 | run_most = 1; | ||
1034 | } | ||
1035 | while (1) { | ||
1036 | if (!(run_all || run_most) && | ||
1037 | delayed_refs->num_heads_ready < 64) | ||
1038 | break; | ||
1016 | 1039 | ||
1017 | /* | 1040 | /* |
1018 | * we set locked_ref to null above if we're all done | 1041 | * go find something we can process in the rbtree. We start at |
1019 | * with this bytenr | 1042 | * the beginning of the tree, and then build a cluster |
1043 | * of refs to process starting at the first one we are able to | ||
1044 | * lock | ||
1020 | */ | 1045 | */ |
1021 | if (!locked_ref && count == 0) | 1046 | ret = btrfs_find_ref_cluster(trans, &cluster, |
1047 | delayed_refs->run_delayed_start); | ||
1048 | if (ret) | ||
1022 | break; | 1049 | break; |
1023 | 1050 | ||
1024 | cond_resched(); | 1051 | ret = run_clustered_refs(trans, root, &cluster); |
1025 | spin_lock(&delayed_refs->lock); | 1052 | BUG_ON(ret < 0); |
1053 | |||
1054 | count -= min_t(unsigned long, ret, count); | ||
1055 | |||
1056 | if (count == 0) | ||
1057 | break; | ||
1026 | } | 1058 | } |
1059 | |||
1027 | if (run_all) { | 1060 | if (run_all) { |
1028 | spin_lock(&delayed_refs->lock); | ||
1029 | node = rb_first(&delayed_refs->root); | 1061 | node = rb_first(&delayed_refs->root); |
1030 | if (!node) { | 1062 | if (!node) |
1031 | spin_unlock(&delayed_refs->lock); | ||
1032 | goto out; | 1063 | goto out; |
1033 | } | 1064 | count = (unsigned long)-1; |
1034 | 1065 | ||
1035 | while (node) { | 1066 | while (node) { |
1036 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | 1067 | ref = rb_entry(node, struct btrfs_delayed_ref_node, |
@@ -1052,11 +1083,11 @@ again: | |||
1052 | node = rb_next(node); | 1083 | node = rb_next(node); |
1053 | } | 1084 | } |
1054 | spin_unlock(&delayed_refs->lock); | 1085 | spin_unlock(&delayed_refs->lock); |
1055 | count = (unsigned long)-1; | ||
1056 | schedule_timeout(1); | 1086 | schedule_timeout(1); |
1057 | goto again; | 1087 | goto again; |
1058 | } | 1088 | } |
1059 | out: | 1089 | out: |
1090 | spin_unlock(&delayed_refs->lock); | ||
1060 | return 0; | 1091 | return 0; |
1061 | } | 1092 | } |
1062 | 1093 | ||
@@ -2407,12 +2438,18 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
2407 | */ | 2438 | */ |
2408 | head->node.in_tree = 0; | 2439 | head->node.in_tree = 0; |
2409 | rb_erase(&head->node.rb_node, &delayed_refs->root); | 2440 | rb_erase(&head->node.rb_node, &delayed_refs->root); |
2441 | |||
2410 | delayed_refs->num_entries--; | 2442 | delayed_refs->num_entries--; |
2411 | 2443 | ||
2412 | /* | 2444 | /* |
2413 | * we don't take a ref on the node because we're removing it from the | 2445 | * we don't take a ref on the node because we're removing it from the |
2414 | * tree, so we just steal the ref the tree was holding. | 2446 | * tree, so we just steal the ref the tree was holding. |
2415 | */ | 2447 | */ |
2448 | delayed_refs->num_heads--; | ||
2449 | if (list_empty(&head->cluster)) | ||
2450 | delayed_refs->num_heads_ready--; | ||
2451 | |||
2452 | list_del_init(&head->cluster); | ||
2416 | spin_unlock(&delayed_refs->lock); | 2453 | spin_unlock(&delayed_refs->lock); |
2417 | 2454 | ||
2418 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 2455 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, |
@@ -3705,6 +3742,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3705 | struct btrfs_path *path; | 3742 | struct btrfs_path *path; |
3706 | int i; | 3743 | int i; |
3707 | int orig_level; | 3744 | int orig_level; |
3745 | int update_count; | ||
3708 | struct btrfs_root_item *root_item = &root->root_item; | 3746 | struct btrfs_root_item *root_item = &root->root_item; |
3709 | 3747 | ||
3710 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); | 3748 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); |
@@ -3746,6 +3784,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3746 | } | 3784 | } |
3747 | } | 3785 | } |
3748 | while (1) { | 3786 | while (1) { |
3787 | unsigned long update; | ||
3749 | wret = walk_down_tree(trans, root, path, &level); | 3788 | wret = walk_down_tree(trans, root, path, &level); |
3750 | if (wret > 0) | 3789 | if (wret > 0) |
3751 | break; | 3790 | break; |
@@ -3764,6 +3803,14 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3764 | } | 3803 | } |
3765 | atomic_inc(&root->fs_info->throttle_gen); | 3804 | atomic_inc(&root->fs_info->throttle_gen); |
3766 | wake_up(&root->fs_info->transaction_throttle); | 3805 | wake_up(&root->fs_info->transaction_throttle); |
3806 | for (update_count = 0; update_count < 16; update_count++) { | ||
3807 | update = trans->delayed_ref_updates; | ||
3808 | trans->delayed_ref_updates = 0; | ||
3809 | if (update) | ||
3810 | btrfs_run_delayed_refs(trans, root, update); | ||
3811 | else | ||
3812 | break; | ||
3813 | } | ||
3767 | } | 3814 | } |
3768 | for (i = 0; i <= orig_level; i++) { | 3815 | for (i = 0; i <= orig_level; i++) { |
3769 | if (path->nodes[i]) { | 3816 | if (path->nodes[i]) { |