aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorArne Jansen <sensille@gmx.net>2012-08-06 16:18:51 -0400
committerChris Mason <chris.mason@oracle.com>2012-08-28 16:53:32 -0400
commit1fa11e265fa2562fb713171b6a58e72bb7afd276 (patch)
tree9b8e80ef9ef0479d270d46b46c4ca7d0106d3ae0 /fs/btrfs/extent-tree.c
parent6209526531e70c080f79318ab8f50e26846c40a8 (diff)
Btrfs: fix deadlock in wait_for_more_refs
Commit a168650c introduced a waiting mechanism to prevent busy waiting in btrfs_run_delayed_refs. This can deadlock with btrfs_run_ordered_operations, where a tree_mod_seq is held while waiting for the io to complete, while the end_io calls btrfs_run_delayed_refs. This whole mechanism is unnecessary. If not enough runnable refs are available to satisfy count, just return as count is more like a guideline than a strict requirement. In case we have to run all refs, commit transaction makes sure that no other threads are working in the transaction anymore, so we just assert here that no refs are blocked. Signed-off-by: Arne Jansen <sensille@gmx.net> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c77
1 files changed, 21 insertions, 56 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 45c69c4184c..d3df65f83b5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2318,12 +2318,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2318 ref->in_tree = 0; 2318 ref->in_tree = 0;
2319 rb_erase(&ref->rb_node, &delayed_refs->root); 2319 rb_erase(&ref->rb_node, &delayed_refs->root);
2320 delayed_refs->num_entries--; 2320 delayed_refs->num_entries--;
2321 /*
2322 * we modified num_entries, but as we're currently running
2323 * delayed refs, skip
2324 * wake_up(&delayed_refs->seq_wait);
2325 * here.
2326 */
2327 spin_unlock(&delayed_refs->lock); 2321 spin_unlock(&delayed_refs->lock);
2328 2322
2329 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2323 ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2344,6 @@ next:
2350 return count; 2344 return count;
2351} 2345}
2352 2346
2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2355 unsigned long num_refs,
2356 struct list_head *first_seq)
2357{
2358 spin_unlock(&delayed_refs->lock);
2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2360 num_refs, first_seq);
2361 wait_event(fs_info->tree_mod_seq_wait,
2362 num_refs != delayed_refs->num_entries ||
2363 fs_info->tree_mod_seq_list.next != first_seq);
2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2366 spin_lock(&delayed_refs->lock);
2367}
2368
2369#ifdef SCRAMBLE_DELAYED_REFS 2347#ifdef SCRAMBLE_DELAYED_REFS
2370/* 2348/*
2371 * Normally delayed refs get processed in ascending bytenr order. This 2349 * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2438,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2460 struct btrfs_delayed_ref_root *delayed_refs; 2438 struct btrfs_delayed_ref_root *delayed_refs;
2461 struct btrfs_delayed_ref_node *ref; 2439 struct btrfs_delayed_ref_node *ref;
2462 struct list_head cluster; 2440 struct list_head cluster;
2463 struct list_head *first_seq = NULL;
2464 int ret; 2441 int ret;
2465 u64 delayed_start; 2442 u64 delayed_start;
2466 int run_all = count == (unsigned long)-1; 2443 int run_all = count == (unsigned long)-1;
2467 int run_most = 0; 2444 int run_most = 0;
2468 unsigned long num_refs = 0; 2445 int loops;
2469 int consider_waiting;
2470 2446
2471 /* We'll clean this up in btrfs_cleanup_transaction */ 2447 /* We'll clean this up in btrfs_cleanup_transaction */
2472 if (trans->aborted) 2448 if (trans->aborted)
@@ -2484,7 +2460,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2484 delayed_refs = &trans->transaction->delayed_refs; 2460 delayed_refs = &trans->transaction->delayed_refs;
2485 INIT_LIST_HEAD(&cluster); 2461 INIT_LIST_HEAD(&cluster);
2486again: 2462again:
2487 consider_waiting = 0; 2463 loops = 0;
2488 spin_lock(&delayed_refs->lock); 2464 spin_lock(&delayed_refs->lock);
2489 2465
2490#ifdef SCRAMBLE_DELAYED_REFS 2466#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2488,6 @@ again:
2512 if (ret) 2488 if (ret)
2513 break; 2489 break;
2514 2490
2515 if (delayed_start >= delayed_refs->run_delayed_start) {
2516 if (consider_waiting == 0) {
2517 /*
2518 * btrfs_find_ref_cluster looped. let's do one
2519 * more cycle. if we don't run any delayed ref
2520 * during that cycle (because we can't because
2521 * all of them are blocked) and if the number of
2522 * refs doesn't change, we avoid busy waiting.
2523 */
2524 consider_waiting = 1;
2525 num_refs = delayed_refs->num_entries;
2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2527 } else {
2528 wait_for_more_refs(root->fs_info, delayed_refs,
2529 num_refs, first_seq);
2530 /*
2531 * after waiting, things have changed. we
2532 * dropped the lock and someone else might have
2533 * run some refs, built new clusters and so on.
2534 * therefore, we restart staleness detection.
2535 */
2536 consider_waiting = 0;
2537 }
2538 }
2539
2540 ret = run_clustered_refs(trans, root, &cluster); 2491 ret = run_clustered_refs(trans, root, &cluster);
2541 if (ret < 0) { 2492 if (ret < 0) {
2542 spin_unlock(&delayed_refs->lock); 2493 spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2500,26 @@ again:
2549 if (count == 0) 2500 if (count == 0)
2550 break; 2501 break;
2551 2502
2552 if (ret || delayed_refs->run_delayed_start == 0) { 2503 if (delayed_start >= delayed_refs->run_delayed_start) {
2504 if (loops == 0) {
2505 /*
2506 * btrfs_find_ref_cluster looped. let's do one
2507 * more cycle. if we don't run any delayed ref
2508 * during that cycle (because we can't because
2509 * all of them are blocked), bail out.
2510 */
2511 loops = 1;
2512 } else {
2513 /*
2514 * no runnable refs left, stop trying
2515 */
2516 BUG_ON(run_all);
2517 break;
2518 }
2519 }
2520 if (ret) {
2553 /* refs were run, let's reset staleness detection */ 2521 /* refs were run, let's reset staleness detection */
2554 consider_waiting = 0; 2522 loops = 0;
2555 } 2523 }
2556 } 2524 }
2557 2525
@@ -5296,9 +5264,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5296 rb_erase(&head->node.rb_node, &delayed_refs->root); 5264 rb_erase(&head->node.rb_node, &delayed_refs->root);
5297 5265
5298 delayed_refs->num_entries--; 5266 delayed_refs->num_entries--;
5299 smp_mb();
5300 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5301 wake_up(&root->fs_info->tree_mod_seq_wait);
5302 5267
5303 /* 5268 /*
5304 * we don't take a ref on the node because we're removing it from the 5269 * we don't take a ref on the node because we're removing it from the