aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorArne Jansen <sensille@gmx.net>2012-08-06 16:18:51 -0400
committerChris Mason <chris.mason@oracle.com>2012-08-28 16:53:32 -0400
commit1fa11e265fa2562fb713171b6a58e72bb7afd276 (patch)
tree9b8e80ef9ef0479d270d46b46c4ca7d0106d3ae0 /fs
parent6209526531e70c080f79318ab8f50e26846c40a8 (diff)
Btrfs: fix deadlock in wait_for_more_refs
Commit a168650c introduced a waiting mechanism to prevent busy waiting in btrfs_run_delayed_refs. This can deadlock with btrfs_run_ordered_operations, where a tree_mod_seq is held while waiting for the io to complete, while the end_io calls btrfs_run_delayed_refs. This whole mechanism is unnecessary. If not enough runnable refs are available to satisfy count, just return as count is more like a guideline than a strict requirement. In case we have to run all refs, commit transaction makes sure that no other threads are working in the transaction anymore, so we just assert here that no refs are blocked. Signed-off-by: Arne Jansen <sensille@gmx.net> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c6
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/delayed-ref.c8
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c77
5 files changed, 21 insertions, 73 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f271f..08e0b11ba0a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
421 spin_unlock(&fs_info->tree_mod_seq_lock); 421 spin_unlock(&fs_info->tree_mod_seq_lock);
422 422
423 /* 423 /*
424 * we removed the lowest blocker from the blocker list, so there may be
425 * more processible delayed refs.
426 */
427 wake_up(&fs_info->tree_mod_seq_wait);
428
429 /*
430 * anything that's lower than the lowest existing (read: blocked) 424 * anything that's lower than the lowest existing (read: blocked)
431 * sequence number can be removed from the tree. 425 * sequence number can be removed from the tree.
432 */ 426 */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 348196350bf..c38734a07a6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
1252 atomic_t tree_mod_seq; 1252 atomic_t tree_mod_seq;
1253 struct list_head tree_mod_seq_list; 1253 struct list_head tree_mod_seq_list;
1254 struct seq_list tree_mod_seq_elem; 1254 struct seq_list tree_mod_seq_elem;
1255 wait_queue_head_t tree_mod_seq_wait;
1256 1255
1257 /* this protects tree_mod_log */ 1256 /* this protects tree_mod_log */
1258 rwlock_t tree_mod_log_lock; 1257 rwlock_t tree_mod_log_lock;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419ed01b..7561431af50 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -662,9 +662,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 662 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
663 num_bytes, parent, ref_root, level, action, 663 num_bytes, parent, ref_root, level, action,
664 for_cow); 664 for_cow);
665 if (!need_ref_seq(for_cow, ref_root) &&
666 waitqueue_active(&fs_info->tree_mod_seq_wait))
667 wake_up(&fs_info->tree_mod_seq_wait);
668 spin_unlock(&delayed_refs->lock); 665 spin_unlock(&delayed_refs->lock);
669 if (need_ref_seq(for_cow, ref_root)) 666 if (need_ref_seq(for_cow, ref_root))
670 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 667 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -713,9 +710,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
713 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 710 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
714 num_bytes, parent, ref_root, owner, offset, 711 num_bytes, parent, ref_root, owner, offset,
715 action, for_cow); 712 action, for_cow);
716 if (!need_ref_seq(for_cow, ref_root) &&
717 waitqueue_active(&fs_info->tree_mod_seq_wait))
718 wake_up(&fs_info->tree_mod_seq_wait);
719 spin_unlock(&delayed_refs->lock); 713 spin_unlock(&delayed_refs->lock);
720 if (need_ref_seq(for_cow, ref_root)) 714 if (need_ref_seq(for_cow, ref_root))
721 btrfs_qgroup_record_ref(trans, &ref->node, extent_op); 715 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
@@ -744,8 +738,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
744 num_bytes, BTRFS_UPDATE_DELAYED_HEAD, 738 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
745 extent_op->is_data); 739 extent_op->is_data);
746 740
747 if (waitqueue_active(&fs_info->tree_mod_seq_wait))
748 wake_up(&fs_info->tree_mod_seq_wait);
749 spin_unlock(&delayed_refs->lock); 741 spin_unlock(&delayed_refs->lock);
750 return 0; 742 return 0;
751} 743}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 502b20c56e8..a7ad8fc8dc5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2035,8 +2035,6 @@ int open_ctree(struct super_block *sb,
2035 fs_info->free_chunk_space = 0; 2035 fs_info->free_chunk_space = 0;
2036 fs_info->tree_mod_log = RB_ROOT; 2036 fs_info->tree_mod_log = RB_ROOT;
2037 2037
2038 init_waitqueue_head(&fs_info->tree_mod_seq_wait);
2039
2040 /* readahead state */ 2038 /* readahead state */
2041 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2039 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2042 spin_lock_init(&fs_info->reada_lock); 2040 spin_lock_init(&fs_info->reada_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 45c69c4184c..d3df65f83b5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2318,12 +2318,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2318 ref->in_tree = 0; 2318 ref->in_tree = 0;
2319 rb_erase(&ref->rb_node, &delayed_refs->root); 2319 rb_erase(&ref->rb_node, &delayed_refs->root);
2320 delayed_refs->num_entries--; 2320 delayed_refs->num_entries--;
2321 /*
2322 * we modified num_entries, but as we're currently running
2323 * delayed refs, skip
2324 * wake_up(&delayed_refs->seq_wait);
2325 * here.
2326 */
2327 spin_unlock(&delayed_refs->lock); 2321 spin_unlock(&delayed_refs->lock);
2328 2322
2329 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2323 ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2344,6 @@ next:
2350 return count; 2344 return count;
2351} 2345}
2352 2346
2353static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
2354 struct btrfs_delayed_ref_root *delayed_refs,
2355 unsigned long num_refs,
2356 struct list_head *first_seq)
2357{
2358 spin_unlock(&delayed_refs->lock);
2359 pr_debug("waiting for more refs (num %ld, first %p)\n",
2360 num_refs, first_seq);
2361 wait_event(fs_info->tree_mod_seq_wait,
2362 num_refs != delayed_refs->num_entries ||
2363 fs_info->tree_mod_seq_list.next != first_seq);
2364 pr_debug("done waiting for more refs (num %ld, first %p)\n",
2365 delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
2366 spin_lock(&delayed_refs->lock);
2367}
2368
2369#ifdef SCRAMBLE_DELAYED_REFS 2347#ifdef SCRAMBLE_DELAYED_REFS
2370/* 2348/*
2371 * Normally delayed refs get processed in ascending bytenr order. This 2349 * Normally delayed refs get processed in ascending bytenr order. This
@@ -2460,13 +2438,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2460 struct btrfs_delayed_ref_root *delayed_refs; 2438 struct btrfs_delayed_ref_root *delayed_refs;
2461 struct btrfs_delayed_ref_node *ref; 2439 struct btrfs_delayed_ref_node *ref;
2462 struct list_head cluster; 2440 struct list_head cluster;
2463 struct list_head *first_seq = NULL;
2464 int ret; 2441 int ret;
2465 u64 delayed_start; 2442 u64 delayed_start;
2466 int run_all = count == (unsigned long)-1; 2443 int run_all = count == (unsigned long)-1;
2467 int run_most = 0; 2444 int run_most = 0;
2468 unsigned long num_refs = 0; 2445 int loops;
2469 int consider_waiting;
2470 2446
2471 /* We'll clean this up in btrfs_cleanup_transaction */ 2447 /* We'll clean this up in btrfs_cleanup_transaction */
2472 if (trans->aborted) 2448 if (trans->aborted)
@@ -2484,7 +2460,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2484 delayed_refs = &trans->transaction->delayed_refs; 2460 delayed_refs = &trans->transaction->delayed_refs;
2485 INIT_LIST_HEAD(&cluster); 2461 INIT_LIST_HEAD(&cluster);
2486again: 2462again:
2487 consider_waiting = 0; 2463 loops = 0;
2488 spin_lock(&delayed_refs->lock); 2464 spin_lock(&delayed_refs->lock);
2489 2465
2490#ifdef SCRAMBLE_DELAYED_REFS 2466#ifdef SCRAMBLE_DELAYED_REFS
@@ -2512,31 +2488,6 @@ again:
2512 if (ret) 2488 if (ret)
2513 break; 2489 break;
2514 2490
2515 if (delayed_start >= delayed_refs->run_delayed_start) {
2516 if (consider_waiting == 0) {
2517 /*
2518 * btrfs_find_ref_cluster looped. let's do one
2519 * more cycle. if we don't run any delayed ref
2520 * during that cycle (because we can't because
2521 * all of them are blocked) and if the number of
2522 * refs doesn't change, we avoid busy waiting.
2523 */
2524 consider_waiting = 1;
2525 num_refs = delayed_refs->num_entries;
2526 first_seq = root->fs_info->tree_mod_seq_list.next;
2527 } else {
2528 wait_for_more_refs(root->fs_info, delayed_refs,
2529 num_refs, first_seq);
2530 /*
2531 * after waiting, things have changed. we
2532 * dropped the lock and someone else might have
2533 * run some refs, built new clusters and so on.
2534 * therefore, we restart staleness detection.
2535 */
2536 consider_waiting = 0;
2537 }
2538 }
2539
2540 ret = run_clustered_refs(trans, root, &cluster); 2491 ret = run_clustered_refs(trans, root, &cluster);
2541 if (ret < 0) { 2492 if (ret < 0) {
2542 spin_unlock(&delayed_refs->lock); 2493 spin_unlock(&delayed_refs->lock);
@@ -2549,9 +2500,26 @@ again:
2549 if (count == 0) 2500 if (count == 0)
2550 break; 2501 break;
2551 2502
2552 if (ret || delayed_refs->run_delayed_start == 0) { 2503 if (delayed_start >= delayed_refs->run_delayed_start) {
2504 if (loops == 0) {
2505 /*
2506 * btrfs_find_ref_cluster looped. let's do one
2507 * more cycle. if we don't run any delayed ref
2508 * during that cycle (because we can't because
2509 * all of them are blocked), bail out.
2510 */
2511 loops = 1;
2512 } else {
2513 /*
2514 * no runnable refs left, stop trying
2515 */
2516 BUG_ON(run_all);
2517 break;
2518 }
2519 }
2520 if (ret) {
2553 /* refs were run, let's reset staleness detection */ 2521 /* refs were run, let's reset staleness detection */
2554 consider_waiting = 0; 2522 loops = 0;
2555 } 2523 }
2556 } 2524 }
2557 2525
@@ -5296,9 +5264,6 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5296 rb_erase(&head->node.rb_node, &delayed_refs->root); 5264 rb_erase(&head->node.rb_node, &delayed_refs->root);
5297 5265
5298 delayed_refs->num_entries--; 5266 delayed_refs->num_entries--;
5299 smp_mb();
5300 if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
5301 wake_up(&root->fs_info->tree_mod_seq_wait);
5302 5267
5303 /* 5268 /*
5304 * we don't take a ref on the node because we're removing it from the 5269 * we don't take a ref on the node because we're removing it from the