aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/delayed-ref.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-13 10:17:05 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:26 -0400
commitc3e69d58e86c3917ae4e9e31b4acf490a7cafe60 (patch)
treebd4f1e62446a208bdae26f0c36d67e3afbc1cd1d /fs/btrfs/delayed-ref.c
parent1887be66dcc3140a81d1299958a41fc0eedfa64f (diff)
Btrfs: process the delayed reference queue in clusters
The delayed reference queue maintains pending operations that need to be done to the extent allocation tree. These are processed by finding records in the tree that are not currently being processed one at a time. This is slow because it uses lots of time searching through the rbtree and because it creates lock contention on the extent allocation tree when lots of different procs are running delayed refs at the same time. This commit changes things to grab a cluster of refs for processing, using a cursor into the rbtree as the starting point of the next search. This way we walk smoothly through the rbtree. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/delayed-ref.c')
-rw-r--r--fs/btrfs/delayed-ref.c130
1 files changed, 96 insertions, 34 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 3e7eeaf86408..759fa247ced8 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -93,7 +93,8 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
93 * ref if it was able to find one, or NULL if nothing was in that spot 93 * ref if it was able to find one, or NULL if nothing was in that spot
94 */ 94 */
95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 95static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
96 u64 bytenr, u64 parent) 96 u64 bytenr, u64 parent,
97 struct btrfs_delayed_ref_node **last)
97{ 98{
98 struct rb_node *n = root->rb_node; 99 struct rb_node *n = root->rb_node;
99 struct btrfs_delayed_ref_node *entry; 100 struct btrfs_delayed_ref_node *entry;
@@ -102,6 +103,8 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
102 while (n) { 103 while (n) {
103 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 104 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
104 WARN_ON(!entry->in_tree); 105 WARN_ON(!entry->in_tree);
106 if (last)
107 *last = entry;
105 108
106 cmp = comp_entry(entry, bytenr, parent); 109 cmp = comp_entry(entry, bytenr, parent);
107 if (cmp < 0) 110 if (cmp < 0)
@@ -114,45 +117,99 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
114 return NULL; 117 return NULL;
115} 118}
116 119
117/* 120int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
118 * Locking on delayed refs is done by taking a lock on the head node, 121 struct btrfs_delayed_ref_head *head)
119 * which has the (impossible) parent id of (u64)-1. Once a lock is held
120 * on the head node, you're allowed (and required) to process all the
121 * delayed refs for a given byte number in the tree.
122 *
123 * This will walk forward in the rbtree until it finds a head node it
124 * is able to lock. It might not lock the delayed ref you asked for,
125 * and so it will return the one it did lock in next_ret and return 0.
126 *
127 * If no locks are taken, next_ret is set to null and 1 is returned. This
128 * means there are no more unlocked head nodes in the rbtree.
129 */
130int btrfs_lock_delayed_ref(struct btrfs_trans_handle *trans,
131 struct btrfs_delayed_ref_node *ref,
132 struct btrfs_delayed_ref_head **next_ret)
133{ 122{
123 struct btrfs_delayed_ref_root *delayed_refs;
124
125 delayed_refs = &trans->transaction->delayed_refs;
126 assert_spin_locked(&delayed_refs->lock);
127 if (mutex_trylock(&head->mutex))
128 return 0;
129
130 atomic_inc(&head->node.refs);
131 spin_unlock(&delayed_refs->lock);
132
133 mutex_lock(&head->mutex);
134 spin_lock(&delayed_refs->lock);
135 if (!head->node.in_tree) {
136 mutex_unlock(&head->mutex);
137 btrfs_put_delayed_ref(&head->node);
138 return -EAGAIN;
139 }
140 btrfs_put_delayed_ref(&head->node);
141 return 0;
142}
143
144int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
145 struct list_head *cluster, u64 start)
146{
147 int count = 0;
148 struct btrfs_delayed_ref_root *delayed_refs;
134 struct rb_node *node; 149 struct rb_node *node;
150 struct btrfs_delayed_ref_node *ref;
135 struct btrfs_delayed_ref_head *head; 151 struct btrfs_delayed_ref_head *head;
136 int ret = 0;
137 152
138 while (1) { 153 delayed_refs = &trans->transaction->delayed_refs;
154 if (start == 0) {
155 node = rb_first(&delayed_refs->root);
156 } else {
157 ref = NULL;
158 tree_search(&delayed_refs->root, start, (u64)-1, &ref);
159 if (ref) {
160 struct btrfs_delayed_ref_node *tmp;
161
162 node = rb_prev(&ref->rb_node);
163 while (node) {
164 tmp = rb_entry(node,
165 struct btrfs_delayed_ref_node,
166 rb_node);
167 if (tmp->bytenr < start)
168 break;
169 ref = tmp;
170 node = rb_prev(&ref->rb_node);
171 }
172 node = &ref->rb_node;
173 } else
174 node = rb_first(&delayed_refs->root);
175 }
176again:
177 while (node && count < 32) {
178 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
139 if (btrfs_delayed_ref_is_head(ref)) { 179 if (btrfs_delayed_ref_is_head(ref)) {
140 head = btrfs_delayed_node_to_head(ref); 180 head = btrfs_delayed_node_to_head(ref);
141 if (mutex_trylock(&head->mutex)) { 181 if (list_empty(&head->cluster)) {
142 *next_ret = head; 182 list_add_tail(&head->cluster, cluster);
143 ret = 0; 183 delayed_refs->run_delayed_start =
184 head->node.bytenr;
185 count++;
186
187 WARN_ON(delayed_refs->num_heads_ready == 0);
188 delayed_refs->num_heads_ready--;
189 } else if (count) {
190 /* the goal of the clustering is to find extents
191 * that are likely to end up in the same extent
192 * leaf on disk. So, we don't want them spread
193 * all over the tree. Stop now if we've hit
194 * a head that was already in use
195 */
144 break; 196 break;
145 } 197 }
146 } 198 }
147 node = rb_next(&ref->rb_node); 199 node = rb_next(node);
148 if (!node) {
149 ret = 1;
150 *next_ret = NULL;
151 break;
152 }
153 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
154 } 200 }
155 return ret; 201 if (count) {
202 return 0;
203 } else if (start) {
204 /*
205 * we've gone to the end of the rbtree without finding any
206 * clusters. start from the beginning and try again
207 */
208 start = 0;
209 node = rb_first(&delayed_refs->root);
210 goto again;
211 }
212 return 1;
156} 213}
157 214
158/* 215/*
@@ -178,7 +235,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
178 delayed_refs = &trans->transaction->delayed_refs; 235 delayed_refs = &trans->transaction->delayed_refs;
179 spin_lock(&delayed_refs->lock); 236 spin_lock(&delayed_refs->lock);
180 237
181 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 238 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
182 if (ref) { 239 if (ref) {
183 prev_node = rb_prev(&ref->rb_node); 240 prev_node = rb_prev(&ref->rb_node);
184 if (!prev_node) 241 if (!prev_node)
@@ -240,7 +297,7 @@ again:
240 } 297 }
241 298
242 spin_lock(&delayed_refs->lock); 299 spin_lock(&delayed_refs->lock);
243 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 300 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
244 if (ref) { 301 if (ref) {
245 head = btrfs_delayed_node_to_head(ref); 302 head = btrfs_delayed_node_to_head(ref);
246 if (mutex_trylock(&head->mutex)) { 303 if (mutex_trylock(&head->mutex)) {
@@ -384,7 +441,7 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
384{ 441{
385 struct btrfs_delayed_ref_node *existing; 442 struct btrfs_delayed_ref_node *existing;
386 struct btrfs_delayed_ref *full_ref; 443 struct btrfs_delayed_ref *full_ref;
387 struct btrfs_delayed_ref_head *head_ref; 444 struct btrfs_delayed_ref_head *head_ref = NULL;
388 struct btrfs_delayed_ref_root *delayed_refs; 445 struct btrfs_delayed_ref_root *delayed_refs;
389 int count_mod = 1; 446 int count_mod = 1;
390 int must_insert_reserved = 0; 447 int must_insert_reserved = 0;
@@ -428,6 +485,7 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
428 if (btrfs_delayed_ref_is_head(ref)) { 485 if (btrfs_delayed_ref_is_head(ref)) {
429 head_ref = btrfs_delayed_node_to_head(ref); 486 head_ref = btrfs_delayed_node_to_head(ref);
430 head_ref->must_insert_reserved = must_insert_reserved; 487 head_ref->must_insert_reserved = must_insert_reserved;
488 INIT_LIST_HEAD(&head_ref->cluster);
431 mutex_init(&head_ref->mutex); 489 mutex_init(&head_ref->mutex);
432 } else { 490 } else {
433 full_ref = btrfs_delayed_node_to_ref(ref); 491 full_ref = btrfs_delayed_node_to_ref(ref);
@@ -453,6 +511,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
453 */ 511 */
454 kfree(ref); 512 kfree(ref);
455 } else { 513 } else {
514 if (btrfs_delayed_ref_is_head(ref)) {
515 delayed_refs->num_heads++;
516 delayed_refs->num_heads_ready++;
517 }
456 delayed_refs->num_entries++; 518 delayed_refs->num_entries++;
457 trans->delayed_ref_updates++; 519 trans->delayed_ref_updates++;
458 } 520 }
@@ -522,7 +584,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
522 struct btrfs_delayed_ref_root *delayed_refs; 584 struct btrfs_delayed_ref_root *delayed_refs;
523 585
524 delayed_refs = &trans->transaction->delayed_refs; 586 delayed_refs = &trans->transaction->delayed_refs;
525 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1); 587 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL);
526 if (ref) 588 if (ref)
527 return btrfs_delayed_node_to_head(ref); 589 return btrfs_delayed_node_to_head(ref);
528 return NULL; 590 return NULL;