aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/delayed-ref.c
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2014-01-23 09:21:38 -0500
committerChris Mason <clm@fb.com>2014-01-28 16:20:25 -0500
commitd7df2c796d7eedd72a334dc89c65e1fec8171431 (patch)
tree63e3adda6e56db27b13d0df28840a873ceac5855 /fs/btrfs/delayed-ref.c
parent5039eddc19aee8c894191c24f2dde4e645ca1bbb (diff)
Btrfs: attach delayed ref updates to delayed ref heads
Currently we have two rb-trees, one for delayed ref heads and one for all of the delayed refs, including the delayed ref heads. When we process the delayed refs we have to hold onto the delayed ref lock for all of the selecting and merging and such, which results in quite a bit of lock contention. This was solved by having a waitqueue and only one flusher at a time, however this hurts if we get a lot of delayed refs queued up. So instead just have an rb tree for the delayed ref heads, and then attach the delayed ref updates to an rb tree that is per delayed ref head. Then we only need to take the delayed ref lock when adding new delayed refs and when selecting a delayed ref head to process, all the rest of the time we deal with a per delayed ref head lock which will be much less contentious. The locking rules for this get a little more complicated since we have to lock up to 3 things to properly process delayed refs, but I will address that problem later. For now this passes all of xfstests and my overnight stress tests. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/delayed-ref.c')
-rw-r--r--fs/btrfs/delayed-ref.c223
1 files changed, 108 insertions, 115 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index fab60c1ba065..f3bff89eecf0 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -269,39 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
269 269
270static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, 270static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
271 struct btrfs_delayed_ref_root *delayed_refs, 271 struct btrfs_delayed_ref_root *delayed_refs,
272 struct btrfs_delayed_ref_head *head,
272 struct btrfs_delayed_ref_node *ref) 273 struct btrfs_delayed_ref_node *ref)
273{ 274{
274 rb_erase(&ref->rb_node, &delayed_refs->root);
275 if (btrfs_delayed_ref_is_head(ref)) { 275 if (btrfs_delayed_ref_is_head(ref)) {
276 struct btrfs_delayed_ref_head *head;
277
278 head = btrfs_delayed_node_to_head(ref); 276 head = btrfs_delayed_node_to_head(ref);
279 rb_erase(&head->href_node, &delayed_refs->href_root); 277 rb_erase(&head->href_node, &delayed_refs->href_root);
278 } else {
279 assert_spin_locked(&head->lock);
280 rb_erase(&ref->rb_node, &head->ref_root);
280 } 281 }
281 ref->in_tree = 0; 282 ref->in_tree = 0;
282 btrfs_put_delayed_ref(ref); 283 btrfs_put_delayed_ref(ref);
283 delayed_refs->num_entries--; 284 atomic_dec(&delayed_refs->num_entries);
284 if (trans->delayed_ref_updates) 285 if (trans->delayed_ref_updates)
285 trans->delayed_ref_updates--; 286 trans->delayed_ref_updates--;
286} 287}
287 288
288static int merge_ref(struct btrfs_trans_handle *trans, 289static int merge_ref(struct btrfs_trans_handle *trans,
289 struct btrfs_delayed_ref_root *delayed_refs, 290 struct btrfs_delayed_ref_root *delayed_refs,
291 struct btrfs_delayed_ref_head *head,
290 struct btrfs_delayed_ref_node *ref, u64 seq) 292 struct btrfs_delayed_ref_node *ref, u64 seq)
291{ 293{
292 struct rb_node *node; 294 struct rb_node *node;
293 int merged = 0;
294 int mod = 0; 295 int mod = 0;
295 int done = 0; 296 int done = 0;
296 297
297 node = rb_prev(&ref->rb_node); 298 node = rb_next(&ref->rb_node);
298 while (node) { 299 while (!done && node) {
299 struct btrfs_delayed_ref_node *next; 300 struct btrfs_delayed_ref_node *next;
300 301
301 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 302 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
302 node = rb_prev(node); 303 node = rb_next(node);
303 if (next->bytenr != ref->bytenr)
304 break;
305 if (seq && next->seq >= seq) 304 if (seq && next->seq >= seq)
306 break; 305 break;
307 if (comp_entry(ref, next, 0)) 306 if (comp_entry(ref, next, 0))
@@ -321,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans,
321 mod = -next->ref_mod; 320 mod = -next->ref_mod;
322 } 321 }
323 322
324 merged++; 323 drop_delayed_ref(trans, delayed_refs, head, next);
325 drop_delayed_ref(trans, delayed_refs, next);
326 ref->ref_mod += mod; 324 ref->ref_mod += mod;
327 if (ref->ref_mod == 0) { 325 if (ref->ref_mod == 0) {
328 drop_delayed_ref(trans, delayed_refs, ref); 326 drop_delayed_ref(trans, delayed_refs, head, ref);
329 break; 327 done = 1;
330 } else { 328 } else {
331 /* 329 /*
332 * You can't have multiples of the same ref on a tree 330 * You can't have multiples of the same ref on a tree
@@ -335,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans,
335 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 333 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
336 ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 334 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
337 } 335 }
338
339 if (done)
340 break;
341 node = rb_prev(&ref->rb_node);
342 } 336 }
343 337 return done;
344 return merged;
345} 338}
346 339
347void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 340void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
@@ -352,6 +345,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
352 struct rb_node *node; 345 struct rb_node *node;
353 u64 seq = 0; 346 u64 seq = 0;
354 347
348 assert_spin_locked(&head->lock);
355 /* 349 /*
356 * We don't have too much refs to merge in the case of delayed data 350 * We don't have too much refs to merge in the case of delayed data
357 * refs. 351 * refs.
@@ -369,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
369 } 363 }
370 spin_unlock(&fs_info->tree_mod_seq_lock); 364 spin_unlock(&fs_info->tree_mod_seq_lock);
371 365
372 node = rb_prev(&head->node.rb_node); 366 node = rb_first(&head->ref_root);
373 while (node) { 367 while (node) {
374 struct btrfs_delayed_ref_node *ref; 368 struct btrfs_delayed_ref_node *ref;
375 369
376 ref = rb_entry(node, struct btrfs_delayed_ref_node, 370 ref = rb_entry(node, struct btrfs_delayed_ref_node,
377 rb_node); 371 rb_node);
378 if (ref->bytenr != head->node.bytenr)
379 break;
380
381 /* We can't merge refs that are outside of our seq count */ 372 /* We can't merge refs that are outside of our seq count */
382 if (seq && ref->seq >= seq) 373 if (seq && ref->seq >= seq)
383 break; 374 break;
384 if (merge_ref(trans, delayed_refs, ref, seq)) 375 if (merge_ref(trans, delayed_refs, head, ref, seq))
385 node = rb_prev(&head->node.rb_node); 376 node = rb_first(&head->ref_root);
386 else 377 else
387 node = rb_prev(node); 378 node = rb_next(&ref->rb_node);
388 } 379 }
389} 380}
390 381
@@ -412,64 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
412 return ret; 403 return ret;
413} 404}
414 405
415int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 406struct btrfs_delayed_ref_head *
416 struct list_head *cluster, u64 start) 407btrfs_select_ref_head(struct btrfs_trans_handle *trans)
417{ 408{
418 int count = 0;
419 struct btrfs_delayed_ref_root *delayed_refs; 409 struct btrfs_delayed_ref_root *delayed_refs;
420 struct rb_node *node; 410 struct btrfs_delayed_ref_head *head;
421 struct btrfs_delayed_ref_head *head = NULL; 411 u64 start;
412 bool loop = false;
422 413
423 delayed_refs = &trans->transaction->delayed_refs; 414 delayed_refs = &trans->transaction->delayed_refs;
424 node = rb_first(&delayed_refs->href_root);
425 415
426 if (start) {
427 find_ref_head(&delayed_refs->href_root, start + 1, &head, 1);
428 if (head)
429 node = &head->href_node;
430 }
431again: 416again:
432 while (node && count < 32) { 417 start = delayed_refs->run_delayed_start;
433 head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); 418 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
434 if (list_empty(&head->cluster)) { 419 if (!head && !loop) {
435 list_add_tail(&head->cluster, cluster); 420 delayed_refs->run_delayed_start = 0;
436 delayed_refs->run_delayed_start =
437 head->node.bytenr;
438 count++;
439
440 WARN_ON(delayed_refs->num_heads_ready == 0);
441 delayed_refs->num_heads_ready--;
442 } else if (count) {
443 /* the goal of the clustering is to find extents
444 * that are likely to end up in the same extent
445 * leaf on disk. So, we don't want them spread
446 * all over the tree. Stop now if we've hit
447 * a head that was already in use
448 */
449 break;
450 }
451 node = rb_next(node);
452 }
453 if (count) {
454 return 0;
455 } else if (start) {
456 /*
457 * we've gone to the end of the rbtree without finding any
458 * clusters. start from the beginning and try again
459 */
460 start = 0; 421 start = 0;
461 node = rb_first(&delayed_refs->href_root); 422 loop = true;
462 goto again; 423 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
424 if (!head)
425 return NULL;
426 } else if (!head && loop) {
427 return NULL;
463 } 428 }
464 return 1;
465}
466 429
467void btrfs_release_ref_cluster(struct list_head *cluster) 430 while (head->processing) {
468{ 431 struct rb_node *node;
469 struct list_head *pos, *q; 432
433 node = rb_next(&head->href_node);
434 if (!node) {
435 if (loop)
436 return NULL;
437 delayed_refs->run_delayed_start = 0;
438 start = 0;
439 loop = true;
440 goto again;
441 }
442 head = rb_entry(node, struct btrfs_delayed_ref_head,
443 href_node);
444 }
470 445
471 list_for_each_safe(pos, q, cluster) 446 head->processing = 1;
472 list_del_init(pos); 447 WARN_ON(delayed_refs->num_heads_ready == 0);
448 delayed_refs->num_heads_ready--;
449 delayed_refs->run_delayed_start = head->node.bytenr +
450 head->node.num_bytes;
451 return head;
473} 452}
474 453
475/* 454/*
@@ -483,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster)
483static noinline void 462static noinline void
484update_existing_ref(struct btrfs_trans_handle *trans, 463update_existing_ref(struct btrfs_trans_handle *trans,
485 struct btrfs_delayed_ref_root *delayed_refs, 464 struct btrfs_delayed_ref_root *delayed_refs,
465 struct btrfs_delayed_ref_head *head,
486 struct btrfs_delayed_ref_node *existing, 466 struct btrfs_delayed_ref_node *existing,
487 struct btrfs_delayed_ref_node *update) 467 struct btrfs_delayed_ref_node *update)
488{ 468{
@@ -495,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
495 */ 475 */
496 existing->ref_mod--; 476 existing->ref_mod--;
497 if (existing->ref_mod == 0) 477 if (existing->ref_mod == 0)
498 drop_delayed_ref(trans, delayed_refs, existing); 478 drop_delayed_ref(trans, delayed_refs, head, existing);
499 else 479 else
500 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 480 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
501 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 481 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -565,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
565 } 545 }
566 } 546 }
567 /* 547 /*
568 * update the reference mod on the head to reflect this new operation 548 * update the reference mod on the head to reflect this new operation,
549 * only need the lock for this case cause we could be processing it
550 * currently, for refs we just added we know we're a-ok.
569 */ 551 */
552 spin_lock(&existing_ref->lock);
570 existing->ref_mod += update->ref_mod; 553 existing->ref_mod += update->ref_mod;
554 spin_unlock(&existing_ref->lock);
571} 555}
572 556
573/* 557/*
@@ -575,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
575 * this does all the dirty work in terms of maintaining the correct 559 * this does all the dirty work in terms of maintaining the correct
576 * overall modification count. 560 * overall modification count.
577 */ 561 */
578static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, 562static noinline struct btrfs_delayed_ref_head *
579 struct btrfs_trans_handle *trans, 563add_delayed_ref_head(struct btrfs_fs_info *fs_info,
580 struct btrfs_delayed_ref_node *ref, 564 struct btrfs_trans_handle *trans,
581 u64 bytenr, u64 num_bytes, 565 struct btrfs_delayed_ref_node *ref, u64 bytenr,
582 int action, int is_data) 566 u64 num_bytes, int action, int is_data)
583{ 567{
584 struct btrfs_delayed_ref_node *existing; 568 struct btrfs_delayed_ref_head *existing;
585 struct btrfs_delayed_ref_head *head_ref = NULL; 569 struct btrfs_delayed_ref_head *head_ref = NULL;
586 struct btrfs_delayed_ref_root *delayed_refs; 570 struct btrfs_delayed_ref_root *delayed_refs;
587 int count_mod = 1; 571 int count_mod = 1;
@@ -628,39 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
628 head_ref = btrfs_delayed_node_to_head(ref); 612 head_ref = btrfs_delayed_node_to_head(ref);
629 head_ref->must_insert_reserved = must_insert_reserved; 613 head_ref->must_insert_reserved = must_insert_reserved;
630 head_ref->is_data = is_data; 614 head_ref->is_data = is_data;
615 head_ref->ref_root = RB_ROOT;
616 head_ref->processing = 0;
631 617
632 INIT_LIST_HEAD(&head_ref->cluster); 618 spin_lock_init(&head_ref->lock);
633 mutex_init(&head_ref->mutex); 619 mutex_init(&head_ref->mutex);
634 620
635 trace_add_delayed_ref_head(ref, head_ref, action); 621 trace_add_delayed_ref_head(ref, head_ref, action);
636 622
637 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 623 existing = htree_insert(&delayed_refs->href_root,
638 624 &head_ref->href_node);
639 if (existing) { 625 if (existing) {
640 update_existing_head_ref(existing, ref); 626 update_existing_head_ref(&existing->node, ref);
641 /* 627 /*
642 * we've updated the existing ref, free the newly 628 * we've updated the existing ref, free the newly
643 * allocated ref 629 * allocated ref
644 */ 630 */
645 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 631 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
632 head_ref = existing;
646 } else { 633 } else {
647 htree_insert(&delayed_refs->href_root, &head_ref->href_node);
648 delayed_refs->num_heads++; 634 delayed_refs->num_heads++;
649 delayed_refs->num_heads_ready++; 635 delayed_refs->num_heads_ready++;
650 delayed_refs->num_entries++; 636 atomic_inc(&delayed_refs->num_entries);
651 trans->delayed_ref_updates++; 637 trans->delayed_ref_updates++;
652 } 638 }
639 return head_ref;
653} 640}
654 641
655/* 642/*
656 * helper to insert a delayed tree ref into the rbtree. 643 * helper to insert a delayed tree ref into the rbtree.
657 */ 644 */
658static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 645static noinline void
659 struct btrfs_trans_handle *trans, 646add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
660 struct btrfs_delayed_ref_node *ref, 647 struct btrfs_trans_handle *trans,
661 u64 bytenr, u64 num_bytes, u64 parent, 648 struct btrfs_delayed_ref_head *head_ref,
662 u64 ref_root, int level, int action, 649 struct btrfs_delayed_ref_node *ref, u64 bytenr,
663 int for_cow) 650 u64 num_bytes, u64 parent, u64 ref_root, int level,
651 int action, int for_cow)
664{ 652{
665 struct btrfs_delayed_ref_node *existing; 653 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_tree_ref *full_ref; 654 struct btrfs_delayed_tree_ref *full_ref;
@@ -696,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
696 684
697 trace_add_delayed_tree_ref(ref, full_ref, action); 685 trace_add_delayed_tree_ref(ref, full_ref, action);
698 686
699 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 687 spin_lock(&head_ref->lock);
700 688 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
701 if (existing) { 689 if (existing) {
702 update_existing_ref(trans, delayed_refs, existing, ref); 690 update_existing_ref(trans, delayed_refs, head_ref, existing,
691 ref);
703 /* 692 /*
704 * we've updated the existing ref, free the newly 693 * we've updated the existing ref, free the newly
705 * allocated ref 694 * allocated ref
706 */ 695 */
707 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); 696 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
708 } else { 697 } else {
709 delayed_refs->num_entries++; 698 atomic_inc(&delayed_refs->num_entries);
710 trans->delayed_ref_updates++; 699 trans->delayed_ref_updates++;
711 } 700 }
701 spin_unlock(&head_ref->lock);
712} 702}
713 703
714/* 704/*
715 * helper to insert a delayed data ref into the rbtree. 705 * helper to insert a delayed data ref into the rbtree.
716 */ 706 */
717static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, 707static noinline void
718 struct btrfs_trans_handle *trans, 708add_delayed_data_ref(struct btrfs_fs_info *fs_info,
719 struct btrfs_delayed_ref_node *ref, 709 struct btrfs_trans_handle *trans,
720 u64 bytenr, u64 num_bytes, u64 parent, 710 struct btrfs_delayed_ref_head *head_ref,
721 u64 ref_root, u64 owner, u64 offset, 711 struct btrfs_delayed_ref_node *ref, u64 bytenr,
722 int action, int for_cow) 712 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
713 u64 offset, int action, int for_cow)
723{ 714{
724 struct btrfs_delayed_ref_node *existing; 715 struct btrfs_delayed_ref_node *existing;
725 struct btrfs_delayed_data_ref *full_ref; 716 struct btrfs_delayed_data_ref *full_ref;
@@ -757,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
757 748
758 trace_add_delayed_data_ref(ref, full_ref, action); 749 trace_add_delayed_data_ref(ref, full_ref, action);
759 750
760 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 751 spin_lock(&head_ref->lock);
761 752 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
762 if (existing) { 753 if (existing) {
763 update_existing_ref(trans, delayed_refs, existing, ref); 754 update_existing_ref(trans, delayed_refs, head_ref, existing,
755 ref);
764 /* 756 /*
765 * we've updated the existing ref, free the newly 757 * we've updated the existing ref, free the newly
766 * allocated ref 758 * allocated ref
767 */ 759 */
768 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); 760 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
769 } else { 761 } else {
770 delayed_refs->num_entries++; 762 atomic_inc(&delayed_refs->num_entries);
771 trans->delayed_ref_updates++; 763 trans->delayed_ref_updates++;
772 } 764 }
765 spin_unlock(&head_ref->lock);
773} 766}
774 767
775/* 768/*
@@ -808,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
808 * insert both the head node and the new ref without dropping 801 * insert both the head node and the new ref without dropping
809 * the spin lock 802 * the spin lock
810 */ 803 */
811 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 804 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
812 num_bytes, action, 0); 805 bytenr, num_bytes, action, 0);
813 806
814 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 807 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
815 num_bytes, parent, ref_root, level, action, 808 num_bytes, parent, ref_root, level, action,
816 for_cow); 809 for_cow);
817 spin_unlock(&delayed_refs->lock); 810 spin_unlock(&delayed_refs->lock);
@@ -856,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
856 * insert both the head node and the new ref without dropping 849 * insert both the head node and the new ref without dropping
857 * the spin lock 850 * the spin lock
858 */ 851 */
859 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 852 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
860 num_bytes, action, 1); 853 bytenr, num_bytes, action, 1);
861 854
862 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 855 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
863 num_bytes, parent, ref_root, owner, offset, 856 num_bytes, parent, ref_root, owner, offset,
864 action, for_cow); 857 action, for_cow);
865 spin_unlock(&delayed_refs->lock); 858 spin_unlock(&delayed_refs->lock);