aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-01-29 18:44:12 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-01 14:24:25 -0500
commitbb721703aa551e98dc5c7fb259cf90343408baf2 (patch)
tree0f17436d6bb333efe8c12fc4638c398422b2e50b /fs/btrfs/extent-tree.c
parent242e18c7c1a8ff3aa05c9fbb6e0bb427511152a6 (diff)
Btrfs: reduce CPU contention while waiting for delayed extent operations
We batch up operations to the extent allocation tree, which allows us to deal with the recursive nature of using the extent allocation tree to allocate extents to the extent allocation tree. It also provides a mechanism to sort and collect extent operations, which makes it much more efficient to record extents that are close together. The delayed extent operations must all be finished before the running transaction commits, so we have code to make sure and run a few of the batched operations when closing our transaction handles. This creates a great deal of contention for the locks in the delayed extent operation tree, and also contention for the lock on the extent allocation tree itself. All the extra contention just slows down the operations and doesn't get things done any faster. This commit changes things to use a wait queue instead. As procs want to run the delayed operations, one of them races in and gets permission to hit the tree, and the others step back and wait for progress to be made. Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c60
1 files changed, 56 insertions, 4 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f13402104c96..87b0e856b6d0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2438,6 +2438,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2438 return ret; 2438 return ret;
2439} 2439}
2440 2440
2441static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
2442 int count)
2443{
2444 int val = atomic_read(&delayed_refs->ref_seq);
2445
2446 if (val < seq || val >= seq + count)
2447 return 1;
2448 return 0;
2449}
2450
2441/* 2451/*
2442 * this starts processing the delayed reference count updates and 2452 * this starts processing the delayed reference count updates and
2443 * extent insertions we have queued up so far. count can be 2453 * extent insertions we have queued up so far. count can be
@@ -2472,6 +2482,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2472 2482
2473 delayed_refs = &trans->transaction->delayed_refs; 2483 delayed_refs = &trans->transaction->delayed_refs;
2474 INIT_LIST_HEAD(&cluster); 2484 INIT_LIST_HEAD(&cluster);
2485 if (count == 0) {
2486 count = delayed_refs->num_entries * 2;
2487 run_most = 1;
2488 }
2489
2490 if (!run_all && !run_most) {
2491 int old;
2492 int seq = atomic_read(&delayed_refs->ref_seq);
2493
2494progress:
2495 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2496 if (old) {
2497 DEFINE_WAIT(__wait);
2498 if (delayed_refs->num_entries < 16348)
2499 return 0;
2500
2501 prepare_to_wait(&delayed_refs->wait, &__wait,
2502 TASK_UNINTERRUPTIBLE);
2503
2504 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2505 if (old) {
2506 schedule();
2507 finish_wait(&delayed_refs->wait, &__wait);
2508
2509 if (!refs_newer(delayed_refs, seq, 256))
2510 goto progress;
2511 else
2512 return 0;
2513 } else {
2514 finish_wait(&delayed_refs->wait, &__wait);
2515 goto again;
2516 }
2517 }
2518
2519 } else {
2520 atomic_inc(&delayed_refs->procs_running_refs);
2521 }
2522
2475again: 2523again:
2476 loops = 0; 2524 loops = 0;
2477 spin_lock(&delayed_refs->lock); 2525 spin_lock(&delayed_refs->lock);
@@ -2480,10 +2528,6 @@ again:
2480 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2528 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2481#endif 2529#endif
2482 2530
2483 if (count == 0) {
2484 count = delayed_refs->num_entries * 2;
2485 run_most = 1;
2486 }
2487 while (1) { 2531 while (1) {
2488 if (!(run_all || run_most) && 2532 if (!(run_all || run_most) &&
2489 delayed_refs->num_heads_ready < 64) 2533 delayed_refs->num_heads_ready < 64)
@@ -2505,9 +2549,12 @@ again:
2505 if (ret < 0) { 2549 if (ret < 0) {
2506 spin_unlock(&delayed_refs->lock); 2550 spin_unlock(&delayed_refs->lock);
2507 btrfs_abort_transaction(trans, root, ret); 2551 btrfs_abort_transaction(trans, root, ret);
2552 atomic_dec(&delayed_refs->procs_running_refs);
2508 return ret; 2553 return ret;
2509 } 2554 }
2510 2555
2556 atomic_add(ret, &delayed_refs->ref_seq);
2557
2511 count -= min_t(unsigned long, ret, count); 2558 count -= min_t(unsigned long, ret, count);
2512 2559
2513 if (count == 0) 2560 if (count == 0)
@@ -2576,6 +2623,11 @@ again:
2576 goto again; 2623 goto again;
2577 } 2624 }
2578out: 2625out:
2626 atomic_dec(&delayed_refs->procs_running_refs);
2627 smp_mb();
2628 if (waitqueue_active(&delayed_refs->wait))
2629 wake_up(&delayed_refs->wait);
2630
2579 spin_unlock(&delayed_refs->lock); 2631 spin_unlock(&delayed_refs->lock);
2580 assert_qgroups_uptodate(trans); 2632 assert_qgroups_uptodate(trans);
2581 return 0; 2633 return 0;