aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c23
-rw-r--r--fs/btrfs/delayed-ref.c223
-rw-r--r--fs/btrfs/delayed-ref.h23
-rw-r--r--fs/btrfs/disk-io.c79
-rw-r--r--fs/btrfs/extent-tree.c317
-rw-r--r--fs/btrfs/transaction.c7
6 files changed, 267 insertions, 405 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 835b6c9a26a8..34a8952de8dd 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -538,14 +538,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
538 if (extent_op && extent_op->update_key) 538 if (extent_op && extent_op->update_key)
539 btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 539 btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
540 540
541 while ((n = rb_prev(n))) { 541 spin_lock(&head->lock);
542 n = rb_first(&head->ref_root);
543 while (n) {
542 struct btrfs_delayed_ref_node *node; 544 struct btrfs_delayed_ref_node *node;
543 node = rb_entry(n, struct btrfs_delayed_ref_node, 545 node = rb_entry(n, struct btrfs_delayed_ref_node,
544 rb_node); 546 rb_node);
545 if (node->bytenr != head->node.bytenr) 547 n = rb_next(n);
546 break;
547 WARN_ON(node->is_head);
548
549 if (node->seq > seq) 548 if (node->seq > seq)
550 continue; 549 continue;
551 550
@@ -612,10 +611,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
612 WARN_ON(1); 611 WARN_ON(1);
613 } 612 }
614 if (ret) 613 if (ret)
615 return ret; 614 break;
616 } 615 }
617 616 spin_unlock(&head->lock);
618 return 0; 617 return ret;
619} 618}
620 619
621/* 620/*
@@ -882,15 +881,15 @@ again:
882 btrfs_put_delayed_ref(&head->node); 881 btrfs_put_delayed_ref(&head->node);
883 goto again; 882 goto again;
884 } 883 }
884 spin_unlock(&delayed_refs->lock);
885 ret = __add_delayed_refs(head, time_seq, 885 ret = __add_delayed_refs(head, time_seq,
886 &prefs_delayed); 886 &prefs_delayed);
887 mutex_unlock(&head->mutex); 887 mutex_unlock(&head->mutex);
888 if (ret) { 888 if (ret)
889 spin_unlock(&delayed_refs->lock);
890 goto out; 889 goto out;
891 } 890 } else {
891 spin_unlock(&delayed_refs->lock);
892 } 892 }
893 spin_unlock(&delayed_refs->lock);
894 } 893 }
895 894
896 if (path->slots[0]) { 895 if (path->slots[0]) {
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index fab60c1ba065..f3bff89eecf0 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -269,39 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
269 269
270static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, 270static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
271 struct btrfs_delayed_ref_root *delayed_refs, 271 struct btrfs_delayed_ref_root *delayed_refs,
272 struct btrfs_delayed_ref_head *head,
272 struct btrfs_delayed_ref_node *ref) 273 struct btrfs_delayed_ref_node *ref)
273{ 274{
274 rb_erase(&ref->rb_node, &delayed_refs->root);
275 if (btrfs_delayed_ref_is_head(ref)) { 275 if (btrfs_delayed_ref_is_head(ref)) {
276 struct btrfs_delayed_ref_head *head;
277
278 head = btrfs_delayed_node_to_head(ref); 276 head = btrfs_delayed_node_to_head(ref);
279 rb_erase(&head->href_node, &delayed_refs->href_root); 277 rb_erase(&head->href_node, &delayed_refs->href_root);
278 } else {
279 assert_spin_locked(&head->lock);
280 rb_erase(&ref->rb_node, &head->ref_root);
280 } 281 }
281 ref->in_tree = 0; 282 ref->in_tree = 0;
282 btrfs_put_delayed_ref(ref); 283 btrfs_put_delayed_ref(ref);
283 delayed_refs->num_entries--; 284 atomic_dec(&delayed_refs->num_entries);
284 if (trans->delayed_ref_updates) 285 if (trans->delayed_ref_updates)
285 trans->delayed_ref_updates--; 286 trans->delayed_ref_updates--;
286} 287}
287 288
288static int merge_ref(struct btrfs_trans_handle *trans, 289static int merge_ref(struct btrfs_trans_handle *trans,
289 struct btrfs_delayed_ref_root *delayed_refs, 290 struct btrfs_delayed_ref_root *delayed_refs,
291 struct btrfs_delayed_ref_head *head,
290 struct btrfs_delayed_ref_node *ref, u64 seq) 292 struct btrfs_delayed_ref_node *ref, u64 seq)
291{ 293{
292 struct rb_node *node; 294 struct rb_node *node;
293 int merged = 0;
294 int mod = 0; 295 int mod = 0;
295 int done = 0; 296 int done = 0;
296 297
297 node = rb_prev(&ref->rb_node); 298 node = rb_next(&ref->rb_node);
298 while (node) { 299 while (!done && node) {
299 struct btrfs_delayed_ref_node *next; 300 struct btrfs_delayed_ref_node *next;
300 301
301 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 302 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
302 node = rb_prev(node); 303 node = rb_next(node);
303 if (next->bytenr != ref->bytenr)
304 break;
305 if (seq && next->seq >= seq) 304 if (seq && next->seq >= seq)
306 break; 305 break;
307 if (comp_entry(ref, next, 0)) 306 if (comp_entry(ref, next, 0))
@@ -321,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans,
321 mod = -next->ref_mod; 320 mod = -next->ref_mod;
322 } 321 }
323 322
324 merged++; 323 drop_delayed_ref(trans, delayed_refs, head, next);
325 drop_delayed_ref(trans, delayed_refs, next);
326 ref->ref_mod += mod; 324 ref->ref_mod += mod;
327 if (ref->ref_mod == 0) { 325 if (ref->ref_mod == 0) {
328 drop_delayed_ref(trans, delayed_refs, ref); 326 drop_delayed_ref(trans, delayed_refs, head, ref);
329 break; 327 done = 1;
330 } else { 328 } else {
331 /* 329 /*
332 * You can't have multiples of the same ref on a tree 330 * You can't have multiples of the same ref on a tree
@@ -335,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans,
335 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 333 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
336 ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 334 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
337 } 335 }
338
339 if (done)
340 break;
341 node = rb_prev(&ref->rb_node);
342 } 336 }
343 337 return done;
344 return merged;
345} 338}
346 339
347void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 340void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
@@ -352,6 +345,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
352 struct rb_node *node; 345 struct rb_node *node;
353 u64 seq = 0; 346 u64 seq = 0;
354 347
348 assert_spin_locked(&head->lock);
355 /* 349 /*
356 * We don't have too much refs to merge in the case of delayed data 350 * We don't have too much refs to merge in the case of delayed data
357 * refs. 351 * refs.
@@ -369,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
369 } 363 }
370 spin_unlock(&fs_info->tree_mod_seq_lock); 364 spin_unlock(&fs_info->tree_mod_seq_lock);
371 365
372 node = rb_prev(&head->node.rb_node); 366 node = rb_first(&head->ref_root);
373 while (node) { 367 while (node) {
374 struct btrfs_delayed_ref_node *ref; 368 struct btrfs_delayed_ref_node *ref;
375 369
376 ref = rb_entry(node, struct btrfs_delayed_ref_node, 370 ref = rb_entry(node, struct btrfs_delayed_ref_node,
377 rb_node); 371 rb_node);
378 if (ref->bytenr != head->node.bytenr)
379 break;
380
381 /* We can't merge refs that are outside of our seq count */ 372 /* We can't merge refs that are outside of our seq count */
382 if (seq && ref->seq >= seq) 373 if (seq && ref->seq >= seq)
383 break; 374 break;
384 if (merge_ref(trans, delayed_refs, ref, seq)) 375 if (merge_ref(trans, delayed_refs, head, ref, seq))
385 node = rb_prev(&head->node.rb_node); 376 node = rb_first(&head->ref_root);
386 else 377 else
387 node = rb_prev(node); 378 node = rb_next(&ref->rb_node);
388 } 379 }
389} 380}
390 381
@@ -412,64 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
412 return ret; 403 return ret;
413} 404}
414 405
415int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 406struct btrfs_delayed_ref_head *
416 struct list_head *cluster, u64 start) 407btrfs_select_ref_head(struct btrfs_trans_handle *trans)
417{ 408{
418 int count = 0;
419 struct btrfs_delayed_ref_root *delayed_refs; 409 struct btrfs_delayed_ref_root *delayed_refs;
420 struct rb_node *node; 410 struct btrfs_delayed_ref_head *head;
421 struct btrfs_delayed_ref_head *head = NULL; 411 u64 start;
412 bool loop = false;
422 413
423 delayed_refs = &trans->transaction->delayed_refs; 414 delayed_refs = &trans->transaction->delayed_refs;
424 node = rb_first(&delayed_refs->href_root);
425 415
426 if (start) {
427 find_ref_head(&delayed_refs->href_root, start + 1, &head, 1);
428 if (head)
429 node = &head->href_node;
430 }
431again: 416again:
432 while (node && count < 32) { 417 start = delayed_refs->run_delayed_start;
433 head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); 418 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
434 if (list_empty(&head->cluster)) { 419 if (!head && !loop) {
435 list_add_tail(&head->cluster, cluster); 420 delayed_refs->run_delayed_start = 0;
436 delayed_refs->run_delayed_start =
437 head->node.bytenr;
438 count++;
439
440 WARN_ON(delayed_refs->num_heads_ready == 0);
441 delayed_refs->num_heads_ready--;
442 } else if (count) {
443 /* the goal of the clustering is to find extents
444 * that are likely to end up in the same extent
445 * leaf on disk. So, we don't want them spread
446 * all over the tree. Stop now if we've hit
447 * a head that was already in use
448 */
449 break;
450 }
451 node = rb_next(node);
452 }
453 if (count) {
454 return 0;
455 } else if (start) {
456 /*
457 * we've gone to the end of the rbtree without finding any
458 * clusters. start from the beginning and try again
459 */
460 start = 0; 421 start = 0;
461 node = rb_first(&delayed_refs->href_root); 422 loop = true;
462 goto again; 423 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
424 if (!head)
425 return NULL;
426 } else if (!head && loop) {
427 return NULL;
463 } 428 }
464 return 1;
465}
466 429
467void btrfs_release_ref_cluster(struct list_head *cluster) 430 while (head->processing) {
468{ 431 struct rb_node *node;
469 struct list_head *pos, *q; 432
433 node = rb_next(&head->href_node);
434 if (!node) {
435 if (loop)
436 return NULL;
437 delayed_refs->run_delayed_start = 0;
438 start = 0;
439 loop = true;
440 goto again;
441 }
442 head = rb_entry(node, struct btrfs_delayed_ref_head,
443 href_node);
444 }
470 445
471 list_for_each_safe(pos, q, cluster) 446 head->processing = 1;
472 list_del_init(pos); 447 WARN_ON(delayed_refs->num_heads_ready == 0);
448 delayed_refs->num_heads_ready--;
449 delayed_refs->run_delayed_start = head->node.bytenr +
450 head->node.num_bytes;
451 return head;
473} 452}
474 453
475/* 454/*
@@ -483,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster)
483static noinline void 462static noinline void
484update_existing_ref(struct btrfs_trans_handle *trans, 463update_existing_ref(struct btrfs_trans_handle *trans,
485 struct btrfs_delayed_ref_root *delayed_refs, 464 struct btrfs_delayed_ref_root *delayed_refs,
465 struct btrfs_delayed_ref_head *head,
486 struct btrfs_delayed_ref_node *existing, 466 struct btrfs_delayed_ref_node *existing,
487 struct btrfs_delayed_ref_node *update) 467 struct btrfs_delayed_ref_node *update)
488{ 468{
@@ -495,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
495 */ 475 */
496 existing->ref_mod--; 476 existing->ref_mod--;
497 if (existing->ref_mod == 0) 477 if (existing->ref_mod == 0)
498 drop_delayed_ref(trans, delayed_refs, existing); 478 drop_delayed_ref(trans, delayed_refs, head, existing);
499 else 479 else
500 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 480 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
501 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 481 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -565,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
565 } 545 }
566 } 546 }
567 /* 547 /*
568 * update the reference mod on the head to reflect this new operation 548 * update the reference mod on the head to reflect this new operation,
549 * only need the lock for this case cause we could be processing it
550 * currently, for refs we just added we know we're a-ok.
569 */ 551 */
552 spin_lock(&existing_ref->lock);
570 existing->ref_mod += update->ref_mod; 553 existing->ref_mod += update->ref_mod;
554 spin_unlock(&existing_ref->lock);
571} 555}
572 556
573/* 557/*
@@ -575,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
575 * this does all the dirty work in terms of maintaining the correct 559 * this does all the dirty work in terms of maintaining the correct
576 * overall modification count. 560 * overall modification count.
577 */ 561 */
578static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, 562static noinline struct btrfs_delayed_ref_head *
579 struct btrfs_trans_handle *trans, 563add_delayed_ref_head(struct btrfs_fs_info *fs_info,
580 struct btrfs_delayed_ref_node *ref, 564 struct btrfs_trans_handle *trans,
581 u64 bytenr, u64 num_bytes, 565 struct btrfs_delayed_ref_node *ref, u64 bytenr,
582 int action, int is_data) 566 u64 num_bytes, int action, int is_data)
583{ 567{
584 struct btrfs_delayed_ref_node *existing; 568 struct btrfs_delayed_ref_head *existing;
585 struct btrfs_delayed_ref_head *head_ref = NULL; 569 struct btrfs_delayed_ref_head *head_ref = NULL;
586 struct btrfs_delayed_ref_root *delayed_refs; 570 struct btrfs_delayed_ref_root *delayed_refs;
587 int count_mod = 1; 571 int count_mod = 1;
@@ -628,39 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
628 head_ref = btrfs_delayed_node_to_head(ref); 612 head_ref = btrfs_delayed_node_to_head(ref);
629 head_ref->must_insert_reserved = must_insert_reserved; 613 head_ref->must_insert_reserved = must_insert_reserved;
630 head_ref->is_data = is_data; 614 head_ref->is_data = is_data;
615 head_ref->ref_root = RB_ROOT;
616 head_ref->processing = 0;
631 617
632 INIT_LIST_HEAD(&head_ref->cluster); 618 spin_lock_init(&head_ref->lock);
633 mutex_init(&head_ref->mutex); 619 mutex_init(&head_ref->mutex);
634 620
635 trace_add_delayed_ref_head(ref, head_ref, action); 621 trace_add_delayed_ref_head(ref, head_ref, action);
636 622
637 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 623 existing = htree_insert(&delayed_refs->href_root,
638 624 &head_ref->href_node);
639 if (existing) { 625 if (existing) {
640 update_existing_head_ref(existing, ref); 626 update_existing_head_ref(&existing->node, ref);
641 /* 627 /*
642 * we've updated the existing ref, free the newly 628 * we've updated the existing ref, free the newly
643 * allocated ref 629 * allocated ref
644 */ 630 */
645 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 631 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
632 head_ref = existing;
646 } else { 633 } else {
647 htree_insert(&delayed_refs->href_root, &head_ref->href_node);
648 delayed_refs->num_heads++; 634 delayed_refs->num_heads++;
649 delayed_refs->num_heads_ready++; 635 delayed_refs->num_heads_ready++;
650 delayed_refs->num_entries++; 636 atomic_inc(&delayed_refs->num_entries);
651 trans->delayed_ref_updates++; 637 trans->delayed_ref_updates++;
652 } 638 }
639 return head_ref;
653} 640}
654 641
655/* 642/*
656 * helper to insert a delayed tree ref into the rbtree. 643 * helper to insert a delayed tree ref into the rbtree.
657 */ 644 */
658static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 645static noinline void
659 struct btrfs_trans_handle *trans, 646add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
660 struct btrfs_delayed_ref_node *ref, 647 struct btrfs_trans_handle *trans,
661 u64 bytenr, u64 num_bytes, u64 parent, 648 struct btrfs_delayed_ref_head *head_ref,
662 u64 ref_root, int level, int action, 649 struct btrfs_delayed_ref_node *ref, u64 bytenr,
663 int for_cow) 650 u64 num_bytes, u64 parent, u64 ref_root, int level,
651 int action, int for_cow)
664{ 652{
665 struct btrfs_delayed_ref_node *existing; 653 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_tree_ref *full_ref; 654 struct btrfs_delayed_tree_ref *full_ref;
@@ -696,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
696 684
697 trace_add_delayed_tree_ref(ref, full_ref, action); 685 trace_add_delayed_tree_ref(ref, full_ref, action);
698 686
699 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 687 spin_lock(&head_ref->lock);
700 688 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
701 if (existing) { 689 if (existing) {
702 update_existing_ref(trans, delayed_refs, existing, ref); 690 update_existing_ref(trans, delayed_refs, head_ref, existing,
691 ref);
703 /* 692 /*
704 * we've updated the existing ref, free the newly 693 * we've updated the existing ref, free the newly
705 * allocated ref 694 * allocated ref
706 */ 695 */
707 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); 696 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
708 } else { 697 } else {
709 delayed_refs->num_entries++; 698 atomic_inc(&delayed_refs->num_entries);
710 trans->delayed_ref_updates++; 699 trans->delayed_ref_updates++;
711 } 700 }
701 spin_unlock(&head_ref->lock);
712} 702}
713 703
714/* 704/*
715 * helper to insert a delayed data ref into the rbtree. 705 * helper to insert a delayed data ref into the rbtree.
716 */ 706 */
717static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, 707static noinline void
718 struct btrfs_trans_handle *trans, 708add_delayed_data_ref(struct btrfs_fs_info *fs_info,
719 struct btrfs_delayed_ref_node *ref, 709 struct btrfs_trans_handle *trans,
720 u64 bytenr, u64 num_bytes, u64 parent, 710 struct btrfs_delayed_ref_head *head_ref,
721 u64 ref_root, u64 owner, u64 offset, 711 struct btrfs_delayed_ref_node *ref, u64 bytenr,
722 int action, int for_cow) 712 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
713 u64 offset, int action, int for_cow)
723{ 714{
724 struct btrfs_delayed_ref_node *existing; 715 struct btrfs_delayed_ref_node *existing;
725 struct btrfs_delayed_data_ref *full_ref; 716 struct btrfs_delayed_data_ref *full_ref;
@@ -757,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
757 748
758 trace_add_delayed_data_ref(ref, full_ref, action); 749 trace_add_delayed_data_ref(ref, full_ref, action);
759 750
760 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 751 spin_lock(&head_ref->lock);
761 752 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
762 if (existing) { 753 if (existing) {
763 update_existing_ref(trans, delayed_refs, existing, ref); 754 update_existing_ref(trans, delayed_refs, head_ref, existing,
755 ref);
764 /* 756 /*
765 * we've updated the existing ref, free the newly 757 * we've updated the existing ref, free the newly
766 * allocated ref 758 * allocated ref
767 */ 759 */
768 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); 760 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
769 } else { 761 } else {
770 delayed_refs->num_entries++; 762 atomic_inc(&delayed_refs->num_entries);
771 trans->delayed_ref_updates++; 763 trans->delayed_ref_updates++;
772 } 764 }
765 spin_unlock(&head_ref->lock);
773} 766}
774 767
775/* 768/*
@@ -808,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
808 * insert both the head node and the new ref without dropping 801 * insert both the head node and the new ref without dropping
809 * the spin lock 802 * the spin lock
810 */ 803 */
811 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 804 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
812 num_bytes, action, 0); 805 bytenr, num_bytes, action, 0);
813 806
814 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 807 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
815 num_bytes, parent, ref_root, level, action, 808 num_bytes, parent, ref_root, level, action,
816 for_cow); 809 for_cow);
817 spin_unlock(&delayed_refs->lock); 810 spin_unlock(&delayed_refs->lock);
@@ -856,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
856 * insert both the head node and the new ref without dropping 849 * insert both the head node and the new ref without dropping
857 * the spin lock 850 * the spin lock
858 */ 851 */
859 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 852 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
860 num_bytes, action, 1); 853 bytenr, num_bytes, action, 1);
861 854
862 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 855 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
863 num_bytes, parent, ref_root, owner, offset, 856 num_bytes, parent, ref_root, owner, offset,
864 action, for_cow); 857 action, for_cow);
865 spin_unlock(&delayed_refs->lock); 858 spin_unlock(&delayed_refs->lock);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a54c9d47918f..4ba9b93022ff 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -81,7 +81,8 @@ struct btrfs_delayed_ref_head {
81 */ 81 */
82 struct mutex mutex; 82 struct mutex mutex;
83 83
84 struct list_head cluster; 84 spinlock_t lock;
85 struct rb_root ref_root;
85 86
86 struct rb_node href_node; 87 struct rb_node href_node;
87 88
@@ -100,6 +101,7 @@ struct btrfs_delayed_ref_head {
100 */ 101 */
101 unsigned int must_insert_reserved:1; 102 unsigned int must_insert_reserved:1;
102 unsigned int is_data:1; 103 unsigned int is_data:1;
104 unsigned int processing:1;
103}; 105};
104 106
105struct btrfs_delayed_tree_ref { 107struct btrfs_delayed_tree_ref {
@@ -118,8 +120,6 @@ struct btrfs_delayed_data_ref {
118}; 120};
119 121
120struct btrfs_delayed_ref_root { 122struct btrfs_delayed_ref_root {
121 struct rb_root root;
122
123 /* head ref rbtree */ 123 /* head ref rbtree */
124 struct rb_root href_root; 124 struct rb_root href_root;
125 125
@@ -129,7 +129,7 @@ struct btrfs_delayed_ref_root {
129 /* how many delayed ref updates we've queued, used by the 129 /* how many delayed ref updates we've queued, used by the
130 * throttling code 130 * throttling code
131 */ 131 */
132 unsigned long num_entries; 132 atomic_t num_entries;
133 133
134 /* total number of head nodes in tree */ 134 /* total number of head nodes in tree */
135 unsigned long num_heads; 135 unsigned long num_heads;
@@ -138,15 +138,6 @@ struct btrfs_delayed_ref_root {
138 unsigned long num_heads_ready; 138 unsigned long num_heads_ready;
139 139
140 /* 140 /*
141 * bumped when someone is making progress on the delayed
142 * refs, so that other procs know they are just adding to
143 * contention intead of helping
144 */
145 atomic_t procs_running_refs;
146 atomic_t ref_seq;
147 wait_queue_head_t wait;
148
149 /*
150 * set when the tree is flushing before a transaction commit, 141 * set when the tree is flushing before a transaction commit,
151 * used by the throttling code to decide if new updates need 142 * used by the throttling code to decide if new updates need
152 * to be run right away 143 * to be run right away
@@ -231,9 +222,9 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
231 mutex_unlock(&head->mutex); 222 mutex_unlock(&head->mutex);
232} 223}
233 224
234int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 225
235 struct list_head *cluster, u64 search_start); 226struct btrfs_delayed_ref_head *
236void btrfs_release_ref_cluster(struct list_head *cluster); 227btrfs_select_ref_head(struct btrfs_trans_handle *trans);
237 228
238int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 229int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
239 struct btrfs_delayed_ref_root *delayed_refs, 230 struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9850a51a3f19..ed23127a4b02 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3801,58 +3801,55 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3801 delayed_refs = &trans->delayed_refs; 3801 delayed_refs = &trans->delayed_refs;
3802 3802
3803 spin_lock(&delayed_refs->lock); 3803 spin_lock(&delayed_refs->lock);
3804 if (delayed_refs->num_entries == 0) { 3804 if (atomic_read(&delayed_refs->num_entries) == 0) {
3805 spin_unlock(&delayed_refs->lock); 3805 spin_unlock(&delayed_refs->lock);
3806 btrfs_info(root->fs_info, "delayed_refs has NO entry"); 3806 btrfs_info(root->fs_info, "delayed_refs has NO entry");
3807 return ret; 3807 return ret;
3808 } 3808 }
3809 3809
3810 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3810 while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
3811 struct btrfs_delayed_ref_head *head = NULL; 3811 struct btrfs_delayed_ref_head *head;
3812 bool pin_bytes = false; 3812 bool pin_bytes = false;
3813 3813
3814 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3814 head = rb_entry(node, struct btrfs_delayed_ref_head,
3815 atomic_set(&ref->refs, 1); 3815 href_node);
3816 if (btrfs_delayed_ref_is_head(ref)) { 3816 if (!mutex_trylock(&head->mutex)) {
3817 atomic_inc(&head->node.refs);
3818 spin_unlock(&delayed_refs->lock);
3817 3819
3818 head = btrfs_delayed_node_to_head(ref); 3820 mutex_lock(&head->mutex);
3819 if (!mutex_trylock(&head->mutex)) {
3820 atomic_inc(&ref->refs);
3821 spin_unlock(&delayed_refs->lock);
3822
3823 /* Need to wait for the delayed ref to run */
3824 mutex_lock(&head->mutex);
3825 mutex_unlock(&head->mutex);
3826 btrfs_put_delayed_ref(ref);
3827
3828 spin_lock(&delayed_refs->lock);
3829 continue;
3830 }
3831
3832 if (head->must_insert_reserved)
3833 pin_bytes = true;
3834 btrfs_free_delayed_extent_op(head->extent_op);
3835 delayed_refs->num_heads--;
3836 if (list_empty(&head->cluster))
3837 delayed_refs->num_heads_ready--;
3838 list_del_init(&head->cluster);
3839 }
3840
3841 ref->in_tree = 0;
3842 rb_erase(&ref->rb_node, &delayed_refs->root);
3843 if (head)
3844 rb_erase(&head->href_node, &delayed_refs->href_root);
3845
3846 delayed_refs->num_entries--;
3847 spin_unlock(&delayed_refs->lock);
3848 if (head) {
3849 if (pin_bytes)
3850 btrfs_pin_extent(root, ref->bytenr,
3851 ref->num_bytes, 1);
3852 mutex_unlock(&head->mutex); 3821 mutex_unlock(&head->mutex);
3822 btrfs_put_delayed_ref(&head->node);
3823 spin_lock(&delayed_refs->lock);
3824 continue;
3825 }
3826 spin_lock(&head->lock);
3827 while ((node = rb_first(&head->ref_root)) != NULL) {
3828 ref = rb_entry(node, struct btrfs_delayed_ref_node,
3829 rb_node);
3830 ref->in_tree = 0;
3831 rb_erase(&ref->rb_node, &head->ref_root);
3832 atomic_dec(&delayed_refs->num_entries);
3833 btrfs_put_delayed_ref(ref);
3834 cond_resched_lock(&head->lock);
3853 } 3835 }
3854 btrfs_put_delayed_ref(ref); 3836 if (head->must_insert_reserved)
3837 pin_bytes = true;
3838 btrfs_free_delayed_extent_op(head->extent_op);
3839 delayed_refs->num_heads--;
3840 if (head->processing == 0)
3841 delayed_refs->num_heads_ready--;
3842 atomic_dec(&delayed_refs->num_entries);
3843 head->node.in_tree = 0;
3844 rb_erase(&head->href_node, &delayed_refs->href_root);
3845 spin_unlock(&head->lock);
3846 spin_unlock(&delayed_refs->lock);
3847 mutex_unlock(&head->mutex);
3855 3848
3849 if (pin_bytes)
3850 btrfs_pin_extent(root, head->node.bytenr,
3851 head->node.num_bytes, 1);
3852 btrfs_put_delayed_ref(&head->node);
3856 cond_resched(); 3853 cond_resched();
3857 spin_lock(&delayed_refs->lock); 3854 spin_lock(&delayed_refs->lock);
3858 } 3855 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 77acc0873895..c77156c77de7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -857,12 +857,14 @@ again:
857 btrfs_put_delayed_ref(&head->node); 857 btrfs_put_delayed_ref(&head->node);
858 goto search_again; 858 goto search_again;
859 } 859 }
860 spin_lock(&head->lock);
860 if (head->extent_op && head->extent_op->update_flags) 861 if (head->extent_op && head->extent_op->update_flags)
861 extent_flags |= head->extent_op->flags_to_set; 862 extent_flags |= head->extent_op->flags_to_set;
862 else 863 else
863 BUG_ON(num_refs == 0); 864 BUG_ON(num_refs == 0);
864 865
865 num_refs += head->node.ref_mod; 866 num_refs += head->node.ref_mod;
867 spin_unlock(&head->lock);
866 mutex_unlock(&head->mutex); 868 mutex_unlock(&head->mutex);
867 } 869 }
868 spin_unlock(&delayed_refs->lock); 870 spin_unlock(&delayed_refs->lock);
@@ -2287,40 +2289,33 @@ static noinline struct btrfs_delayed_ref_node *
2287select_delayed_ref(struct btrfs_delayed_ref_head *head) 2289select_delayed_ref(struct btrfs_delayed_ref_head *head)
2288{ 2290{
2289 struct rb_node *node; 2291 struct rb_node *node;
2290 struct btrfs_delayed_ref_node *ref; 2292 struct btrfs_delayed_ref_node *ref, *last = NULL;;
2291 int action = BTRFS_ADD_DELAYED_REF; 2293
2292again:
2293 /* 2294 /*
2294 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2295 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
2295 * this prevents ref count from going down to zero when 2296 * this prevents ref count from going down to zero when
2296 * there still are pending delayed ref. 2297 * there still are pending delayed ref.
2297 */ 2298 */
2298 node = rb_prev(&head->node.rb_node); 2299 node = rb_first(&head->ref_root);
2299 while (1) { 2300 while (node) {
2300 if (!node)
2301 break;
2302 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2301 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2303 rb_node); 2302 rb_node);
2304 if (ref->bytenr != head->node.bytenr) 2303 if (ref->action == BTRFS_ADD_DELAYED_REF)
2305 break;
2306 if (ref->action == action)
2307 return ref; 2304 return ref;
2308 node = rb_prev(node); 2305 else if (last == NULL)
2309 } 2306 last = ref;
2310 if (action == BTRFS_ADD_DELAYED_REF) { 2307 node = rb_next(node);
2311 action = BTRFS_DROP_DELAYED_REF;
2312 goto again;
2313 } 2308 }
2314 return NULL; 2309 return last;
2315} 2310}
2316 2311
2317/* 2312/*
2318 * Returns 0 on success or if called with an already aborted transaction. 2313 * Returns 0 on success or if called with an already aborted transaction.
2319 * Returns -ENOMEM or -EIO on failure and will abort the transaction. 2314 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2320 */ 2315 */
2321static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2316static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2322 struct btrfs_root *root, 2317 struct btrfs_root *root,
2323 struct list_head *cluster) 2318 unsigned long nr)
2324{ 2319{
2325 struct btrfs_delayed_ref_root *delayed_refs; 2320 struct btrfs_delayed_ref_root *delayed_refs;
2326 struct btrfs_delayed_ref_node *ref; 2321 struct btrfs_delayed_ref_node *ref;
@@ -2328,23 +2323,26 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2328 struct btrfs_delayed_extent_op *extent_op; 2323 struct btrfs_delayed_extent_op *extent_op;
2329 struct btrfs_fs_info *fs_info = root->fs_info; 2324 struct btrfs_fs_info *fs_info = root->fs_info;
2330 int ret; 2325 int ret;
2331 int count = 0; 2326 unsigned long count = 0;
2332 int must_insert_reserved = 0; 2327 int must_insert_reserved = 0;
2333 2328
2334 delayed_refs = &trans->transaction->delayed_refs; 2329 delayed_refs = &trans->transaction->delayed_refs;
2335 while (1) { 2330 while (1) {
2336 if (!locked_ref) { 2331 if (!locked_ref) {
2337 /* pick a new head ref from the cluster list */ 2332 if (count >= nr)
2338 if (list_empty(cluster))
2339 break; 2333 break;
2340 2334
2341 locked_ref = list_entry(cluster->next, 2335 spin_lock(&delayed_refs->lock);
2342 struct btrfs_delayed_ref_head, cluster); 2336 locked_ref = btrfs_select_ref_head(trans);
2337 if (!locked_ref) {
2338 spin_unlock(&delayed_refs->lock);
2339 break;
2340 }
2343 2341
2344 /* grab the lock that says we are going to process 2342 /* grab the lock that says we are going to process
2345 * all the refs for this head */ 2343 * all the refs for this head */
2346 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2344 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2347 2345 spin_unlock(&delayed_refs->lock);
2348 /* 2346 /*
2349 * we may have dropped the spin lock to get the head 2347 * we may have dropped the spin lock to get the head
2350 * mutex lock, and that might have given someone else 2348 * mutex lock, and that might have given someone else
@@ -2365,6 +2363,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2365 * finish. If we merged anything we need to re-loop so we can 2363 * finish. If we merged anything we need to re-loop so we can
2366 * get a good ref. 2364 * get a good ref.
2367 */ 2365 */
2366 spin_lock(&locked_ref->lock);
2368 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, 2367 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2369 locked_ref); 2368 locked_ref);
2370 2369
@@ -2376,17 +2375,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2376 2375
2377 if (ref && ref->seq && 2376 if (ref && ref->seq &&
2378 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { 2377 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2379 /* 2378 spin_unlock(&locked_ref->lock);
2380 * there are still refs with lower seq numbers in the
2381 * process of being added. Don't run this ref yet.
2382 */
2383 list_del_init(&locked_ref->cluster);
2384 btrfs_delayed_ref_unlock(locked_ref); 2379 btrfs_delayed_ref_unlock(locked_ref);
2385 locked_ref = NULL; 2380 spin_lock(&delayed_refs->lock);
2381 locked_ref->processing = 0;
2386 delayed_refs->num_heads_ready++; 2382 delayed_refs->num_heads_ready++;
2387 spin_unlock(&delayed_refs->lock); 2383 spin_unlock(&delayed_refs->lock);
2384 locked_ref = NULL;
2388 cond_resched(); 2385 cond_resched();
2389 spin_lock(&delayed_refs->lock);
2390 continue; 2386 continue;
2391 } 2387 }
2392 2388
@@ -2401,6 +2397,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2401 locked_ref->extent_op = NULL; 2397 locked_ref->extent_op = NULL;
2402 2398
2403 if (!ref) { 2399 if (!ref) {
2400
2401
2404 /* All delayed refs have been processed, Go ahead 2402 /* All delayed refs have been processed, Go ahead
2405 * and send the head node to run_one_delayed_ref, 2403 * and send the head node to run_one_delayed_ref,
2406 * so that any accounting fixes can happen 2404 * so that any accounting fixes can happen
@@ -2413,8 +2411,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2413 } 2411 }
2414 2412
2415 if (extent_op) { 2413 if (extent_op) {
2416 spin_unlock(&delayed_refs->lock); 2414 spin_unlock(&locked_ref->lock);
2417
2418 ret = run_delayed_extent_op(trans, root, 2415 ret = run_delayed_extent_op(trans, root,
2419 ref, extent_op); 2416 ref, extent_op);
2420 btrfs_free_delayed_extent_op(extent_op); 2417 btrfs_free_delayed_extent_op(extent_op);
@@ -2428,23 +2425,38 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2428 */ 2425 */
2429 if (must_insert_reserved) 2426 if (must_insert_reserved)
2430 locked_ref->must_insert_reserved = 1; 2427 locked_ref->must_insert_reserved = 1;
2428 locked_ref->processing = 0;
2431 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 2429 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2432 spin_lock(&delayed_refs->lock);
2433 btrfs_delayed_ref_unlock(locked_ref); 2430 btrfs_delayed_ref_unlock(locked_ref);
2434 return ret; 2431 return ret;
2435 } 2432 }
2436 2433 continue;
2437 goto next;
2438 } 2434 }
2439 }
2440 2435
2441 ref->in_tree = 0; 2436 /*
2442 rb_erase(&ref->rb_node, &delayed_refs->root); 2437 * Need to drop our head ref lock and re-aqcuire the
2443 if (btrfs_delayed_ref_is_head(ref)) { 2438 * delayed ref lock and then re-check to make sure
2439 * nobody got added.
2440 */
2441 spin_unlock(&locked_ref->lock);
2442 spin_lock(&delayed_refs->lock);
2443 spin_lock(&locked_ref->lock);
2444 if (rb_first(&locked_ref->ref_root)) {
2445 spin_unlock(&locked_ref->lock);
2446 spin_unlock(&delayed_refs->lock);
2447 continue;
2448 }
2449 ref->in_tree = 0;
2450 delayed_refs->num_heads--;
2444 rb_erase(&locked_ref->href_node, 2451 rb_erase(&locked_ref->href_node,
2445 &delayed_refs->href_root); 2452 &delayed_refs->href_root);
2453 spin_unlock(&delayed_refs->lock);
2454 } else {
2455 ref->in_tree = 0;
2456 rb_erase(&ref->rb_node, &locked_ref->ref_root);
2446 } 2457 }
2447 delayed_refs->num_entries--; 2458 atomic_dec(&delayed_refs->num_entries);
2459
2448 if (!btrfs_delayed_ref_is_head(ref)) { 2460 if (!btrfs_delayed_ref_is_head(ref)) {
2449 /* 2461 /*
2450 * when we play the delayed ref, also correct the 2462 * when we play the delayed ref, also correct the
@@ -2461,20 +2473,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2461 default: 2473 default:
2462 WARN_ON(1); 2474 WARN_ON(1);
2463 } 2475 }
2464 } else {
2465 list_del_init(&locked_ref->cluster);
2466 } 2476 }
2467 spin_unlock(&delayed_refs->lock); 2477 spin_unlock(&locked_ref->lock);
2468 2478
2469 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2479 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2470 must_insert_reserved); 2480 must_insert_reserved);
2471 2481
2472 btrfs_free_delayed_extent_op(extent_op); 2482 btrfs_free_delayed_extent_op(extent_op);
2473 if (ret) { 2483 if (ret) {
2484 locked_ref->processing = 0;
2474 btrfs_delayed_ref_unlock(locked_ref); 2485 btrfs_delayed_ref_unlock(locked_ref);
2475 btrfs_put_delayed_ref(ref); 2486 btrfs_put_delayed_ref(ref);
2476 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); 2487 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2477 spin_lock(&delayed_refs->lock);
2478 return ret; 2488 return ret;
2479 } 2489 }
2480 2490
@@ -2490,11 +2500,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2490 } 2500 }
2491 btrfs_put_delayed_ref(ref); 2501 btrfs_put_delayed_ref(ref);
2492 count++; 2502 count++;
2493next:
2494 cond_resched(); 2503 cond_resched();
2495 spin_lock(&delayed_refs->lock);
2496 } 2504 }
2497 return count; 2505 return 0;
2498} 2506}
2499 2507
2500#ifdef SCRAMBLE_DELAYED_REFS 2508#ifdef SCRAMBLE_DELAYED_REFS
@@ -2576,16 +2584,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2576 return ret; 2584 return ret;
2577} 2585}
2578 2586
2579static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
2580 int count)
2581{
2582 int val = atomic_read(&delayed_refs->ref_seq);
2583
2584 if (val < seq || val >= seq + count)
2585 return 1;
2586 return 0;
2587}
2588
2589static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2587static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2590{ 2588{
2591 u64 num_bytes; 2589 u64 num_bytes;
@@ -2647,12 +2645,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2647 struct rb_node *node; 2645 struct rb_node *node;
2648 struct btrfs_delayed_ref_root *delayed_refs; 2646 struct btrfs_delayed_ref_root *delayed_refs;
2649 struct btrfs_delayed_ref_head *head; 2647 struct btrfs_delayed_ref_head *head;
2650 struct list_head cluster;
2651 int ret; 2648 int ret;
2652 u64 delayed_start;
2653 int run_all = count == (unsigned long)-1; 2649 int run_all = count == (unsigned long)-1;
2654 int run_most = 0; 2650 int run_most = 0;
2655 int loops;
2656 2651
2657 /* We'll clean this up in btrfs_cleanup_transaction */ 2652 /* We'll clean this up in btrfs_cleanup_transaction */
2658 if (trans->aborted) 2653 if (trans->aborted)
@@ -2664,121 +2659,31 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2664 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 2659 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2665 2660
2666 delayed_refs = &trans->transaction->delayed_refs; 2661 delayed_refs = &trans->transaction->delayed_refs;
2667 INIT_LIST_HEAD(&cluster);
2668 if (count == 0) { 2662 if (count == 0) {
2669 count = delayed_refs->num_entries * 2; 2663 count = atomic_read(&delayed_refs->num_entries) * 2;
2670 run_most = 1; 2664 run_most = 1;
2671 } 2665 }
2672 2666
2673 if (!run_all && !run_most) {
2674 int old;
2675 int seq = atomic_read(&delayed_refs->ref_seq);
2676
2677progress:
2678 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2679 if (old) {
2680 DEFINE_WAIT(__wait);
2681 if (delayed_refs->flushing ||
2682 !btrfs_should_throttle_delayed_refs(trans, root))
2683 return 0;
2684
2685 prepare_to_wait(&delayed_refs->wait, &__wait,
2686 TASK_UNINTERRUPTIBLE);
2687
2688 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2689 if (old) {
2690 schedule();
2691 finish_wait(&delayed_refs->wait, &__wait);
2692
2693 if (!refs_newer(delayed_refs, seq, 256))
2694 goto progress;
2695 else
2696 return 0;
2697 } else {
2698 finish_wait(&delayed_refs->wait, &__wait);
2699 goto again;
2700 }
2701 }
2702
2703 } else {
2704 atomic_inc(&delayed_refs->procs_running_refs);
2705 }
2706
2707again: 2667again:
2708 loops = 0;
2709 spin_lock(&delayed_refs->lock);
2710
2711#ifdef SCRAMBLE_DELAYED_REFS 2668#ifdef SCRAMBLE_DELAYED_REFS
2712 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2669 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2713#endif 2670#endif
2714 2671 ret = __btrfs_run_delayed_refs(trans, root, count);
2715 while (1) { 2672 if (ret < 0) {
2716 if (!(run_all || run_most) && 2673 btrfs_abort_transaction(trans, root, ret);
2717 !btrfs_should_throttle_delayed_refs(trans, root)) 2674 return ret;
2718 break;
2719
2720 /*
2721 * go find something we can process in the rbtree. We start at
2722 * the beginning of the tree, and then build a cluster
2723 * of refs to process starting at the first one we are able to
2724 * lock
2725 */
2726 delayed_start = delayed_refs->run_delayed_start;
2727 ret = btrfs_find_ref_cluster(trans, &cluster,
2728 delayed_refs->run_delayed_start);
2729 if (ret)
2730 break;
2731
2732 ret = run_clustered_refs(trans, root, &cluster);
2733 if (ret < 0) {
2734 btrfs_release_ref_cluster(&cluster);
2735 spin_unlock(&delayed_refs->lock);
2736 btrfs_abort_transaction(trans, root, ret);
2737 atomic_dec(&delayed_refs->procs_running_refs);
2738 wake_up(&delayed_refs->wait);
2739 return ret;
2740 }
2741
2742 atomic_add(ret, &delayed_refs->ref_seq);
2743
2744 count -= min_t(unsigned long, ret, count);
2745
2746 if (count == 0)
2747 break;
2748
2749 if (delayed_start >= delayed_refs->run_delayed_start) {
2750 if (loops == 0) {
2751 /*
2752 * btrfs_find_ref_cluster looped. let's do one
2753 * more cycle. if we don't run any delayed ref
2754 * during that cycle (because we can't because
2755 * all of them are blocked), bail out.
2756 */
2757 loops = 1;
2758 } else {
2759 /*
2760 * no runnable refs left, stop trying
2761 */
2762 BUG_ON(run_all);
2763 break;
2764 }
2765 }
2766 if (ret) {
2767 /* refs were run, let's reset staleness detection */
2768 loops = 0;
2769 }
2770 } 2675 }
2771 2676
2772 if (run_all) { 2677 if (run_all) {
2773 if (!list_empty(&trans->new_bgs)) { 2678 if (!list_empty(&trans->new_bgs))
2774 spin_unlock(&delayed_refs->lock);
2775 btrfs_create_pending_block_groups(trans, root); 2679 btrfs_create_pending_block_groups(trans, root);
2776 spin_lock(&delayed_refs->lock);
2777 }
2778 2680
2681 spin_lock(&delayed_refs->lock);
2779 node = rb_first(&delayed_refs->href_root); 2682 node = rb_first(&delayed_refs->href_root);
2780 if (!node) 2683 if (!node) {
2684 spin_unlock(&delayed_refs->lock);
2781 goto out; 2685 goto out;
2686 }
2782 count = (unsigned long)-1; 2687 count = (unsigned long)-1;
2783 2688
2784 while (node) { 2689 while (node) {
@@ -2807,16 +2712,10 @@ again:
2807 node = rb_next(node); 2712 node = rb_next(node);
2808 } 2713 }
2809 spin_unlock(&delayed_refs->lock); 2714 spin_unlock(&delayed_refs->lock);
2810 schedule_timeout(1); 2715 cond_resched();
2811 goto again; 2716 goto again;
2812 } 2717 }
2813out: 2718out:
2814 atomic_dec(&delayed_refs->procs_running_refs);
2815 smp_mb();
2816 if (waitqueue_active(&delayed_refs->wait))
2817 wake_up(&delayed_refs->wait);
2818
2819 spin_unlock(&delayed_refs->lock);
2820 assert_qgroups_uptodate(trans); 2719 assert_qgroups_uptodate(trans);
2821 return 0; 2720 return 0;
2822} 2721}
@@ -2858,12 +2757,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2858 struct rb_node *node; 2757 struct rb_node *node;
2859 int ret = 0; 2758 int ret = 0;
2860 2759
2861 ret = -ENOENT;
2862 delayed_refs = &trans->transaction->delayed_refs; 2760 delayed_refs = &trans->transaction->delayed_refs;
2863 spin_lock(&delayed_refs->lock); 2761 spin_lock(&delayed_refs->lock);
2864 head = btrfs_find_delayed_ref_head(trans, bytenr); 2762 head = btrfs_find_delayed_ref_head(trans, bytenr);
2865 if (!head) 2763 if (!head) {
2866 goto out; 2764 spin_unlock(&delayed_refs->lock);
2765 return 0;
2766 }
2867 2767
2868 if (!mutex_trylock(&head->mutex)) { 2768 if (!mutex_trylock(&head->mutex)) {
2869 atomic_inc(&head->node.refs); 2769 atomic_inc(&head->node.refs);
@@ -2880,40 +2780,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2880 btrfs_put_delayed_ref(&head->node); 2780 btrfs_put_delayed_ref(&head->node);
2881 return -EAGAIN; 2781 return -EAGAIN;
2882 } 2782 }
2783 spin_unlock(&delayed_refs->lock);
2883 2784
2884 node = rb_prev(&head->node.rb_node); 2785 spin_lock(&head->lock);
2885 if (!node) 2786 node = rb_first(&head->ref_root);
2886 goto out_unlock; 2787 while (node) {
2887 2788 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2888 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2789 node = rb_next(node);
2889
2890 if (ref->bytenr != bytenr)
2891 goto out_unlock;
2892 2790
2893 ret = 1; 2791 /* If it's a shared ref we know a cross reference exists */
2894 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2792 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2895 goto out_unlock; 2793 ret = 1;
2794 break;
2795 }
2896 2796
2897 data_ref = btrfs_delayed_node_to_data_ref(ref); 2797 data_ref = btrfs_delayed_node_to_data_ref(ref);
2898 2798
2899 node = rb_prev(node); 2799 /*
2900 if (node) { 2800 * If our ref doesn't match the one we're currently looking at
2901 int seq = ref->seq; 2801 * then we have a cross reference.
2902 2802 */
2903 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2803 if (data_ref->root != root->root_key.objectid ||
2904 if (ref->bytenr == bytenr && ref->seq == seq) 2804 data_ref->objectid != objectid ||
2905 goto out_unlock; 2805 data_ref->offset != offset) {
2806 ret = 1;
2807 break;
2808 }
2906 } 2809 }
2907 2810 spin_unlock(&head->lock);
2908 if (data_ref->root != root->root_key.objectid ||
2909 data_ref->objectid != objectid || data_ref->offset != offset)
2910 goto out_unlock;
2911
2912 ret = 0;
2913out_unlock:
2914 mutex_unlock(&head->mutex); 2811 mutex_unlock(&head->mutex);
2915out:
2916 spin_unlock(&delayed_refs->lock);
2917 return ret; 2812 return ret;
2918} 2813}
2919 2814
@@ -5953,8 +5848,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5953{ 5848{
5954 struct btrfs_delayed_ref_head *head; 5849 struct btrfs_delayed_ref_head *head;
5955 struct btrfs_delayed_ref_root *delayed_refs; 5850 struct btrfs_delayed_ref_root *delayed_refs;
5956 struct btrfs_delayed_ref_node *ref;
5957 struct rb_node *node;
5958 int ret = 0; 5851 int ret = 0;
5959 5852
5960 delayed_refs = &trans->transaction->delayed_refs; 5853 delayed_refs = &trans->transaction->delayed_refs;
@@ -5963,14 +5856,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5963 if (!head) 5856 if (!head)
5964 goto out; 5857 goto out;
5965 5858
5966 node = rb_prev(&head->node.rb_node); 5859 spin_lock(&head->lock);
5967 if (!node) 5860 if (rb_first(&head->ref_root))
5968 goto out;
5969
5970 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
5971
5972 /* there are still entries for this ref, we can't drop it */
5973 if (ref->bytenr == bytenr)
5974 goto out; 5861 goto out;
5975 5862
5976 if (head->extent_op) { 5863 if (head->extent_op) {
@@ -5992,20 +5879,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5992 * ahead and process it. 5879 * ahead and process it.
5993 */ 5880 */
5994 head->node.in_tree = 0; 5881 head->node.in_tree = 0;
5995 rb_erase(&head->node.rb_node, &delayed_refs->root);
5996 rb_erase(&head->href_node, &delayed_refs->href_root); 5882 rb_erase(&head->href_node, &delayed_refs->href_root);
5997 5883
5998 delayed_refs->num_entries--; 5884 atomic_dec(&delayed_refs->num_entries);
5999 5885
6000 /* 5886 /*
6001 * we don't take a ref on the node because we're removing it from the 5887 * we don't take a ref on the node because we're removing it from the
6002 * tree, so we just steal the ref the tree was holding. 5888 * tree, so we just steal the ref the tree was holding.
6003 */ 5889 */
6004 delayed_refs->num_heads--; 5890 delayed_refs->num_heads--;
6005 if (list_empty(&head->cluster)) 5891 if (head->processing == 0)
6006 delayed_refs->num_heads_ready--; 5892 delayed_refs->num_heads_ready--;
6007 5893 head->processing = 0;
6008 list_del_init(&head->cluster); 5894 spin_unlock(&head->lock);
6009 spin_unlock(&delayed_refs->lock); 5895 spin_unlock(&delayed_refs->lock);
6010 5896
6011 BUG_ON(head->extent_op); 5897 BUG_ON(head->extent_op);
@@ -6016,6 +5902,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
6016 btrfs_put_delayed_ref(&head->node); 5902 btrfs_put_delayed_ref(&head->node);
6017 return ret; 5903 return ret;
6018out: 5904out:
5905 spin_unlock(&head->lock);
6019 spin_unlock(&delayed_refs->lock); 5906 spin_unlock(&delayed_refs->lock);
6020 return 0; 5907 return 0;
6021} 5908}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b16352ce0f73..fd1446496fe8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,7 +62,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
62 WARN_ON(atomic_read(&transaction->use_count) == 0); 62 WARN_ON(atomic_read(&transaction->use_count) == 0);
63 if (atomic_dec_and_test(&transaction->use_count)) { 63 if (atomic_dec_and_test(&transaction->use_count)) {
64 BUG_ON(!list_empty(&transaction->list)); 64 BUG_ON(!list_empty(&transaction->list));
65 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.root));
66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); 65 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67 while (!list_empty(&transaction->pending_chunks)) { 66 while (!list_empty(&transaction->pending_chunks)) {
68 struct extent_map *em; 67 struct extent_map *em;
@@ -184,9 +183,8 @@ loop:
184 atomic_set(&cur_trans->use_count, 2); 183 atomic_set(&cur_trans->use_count, 2);
185 cur_trans->start_time = get_seconds(); 184 cur_trans->start_time = get_seconds();
186 185
187 cur_trans->delayed_refs.root = RB_ROOT;
188 cur_trans->delayed_refs.href_root = RB_ROOT; 186 cur_trans->delayed_refs.href_root = RB_ROOT;
189 cur_trans->delayed_refs.num_entries = 0; 187 atomic_set(&cur_trans->delayed_refs.num_entries, 0);
190 cur_trans->delayed_refs.num_heads_ready = 0; 188 cur_trans->delayed_refs.num_heads_ready = 0;
191 cur_trans->delayed_refs.num_heads = 0; 189 cur_trans->delayed_refs.num_heads = 0;
192 cur_trans->delayed_refs.flushing = 0; 190 cur_trans->delayed_refs.flushing = 0;
@@ -206,9 +204,6 @@ loop:
206 atomic64_set(&fs_info->tree_mod_seq, 0); 204 atomic64_set(&fs_info->tree_mod_seq, 0);
207 205
208 spin_lock_init(&cur_trans->delayed_refs.lock); 206 spin_lock_init(&cur_trans->delayed_refs.lock);
209 atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
210 atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
211 init_waitqueue_head(&cur_trans->delayed_refs.wait);
212 207
213 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 208 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
214 INIT_LIST_HEAD(&cur_trans->ordered_operations); 209 INIT_LIST_HEAD(&cur_trans->ordered_operations);