aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2013-10-02 12:39:50 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-10-04 16:02:13 -0400
commit964fb15acfcd672ac691f04879b71f07ccc21e0c (patch)
tree1d8df39327cbfcf2c6543b47b4b66e48a27f4946
parent385fe0bede52a45cd960f30c7eb8d20ad8e1e05b (diff)
Btrfs: eliminate races in worker stopping code
The current implementation of worker threads in Btrfs has races in worker stopping code, which cause all kinds of panics and lockups when running btrfs/011 xfstest in a loop. The problem is that btrfs_stop_workers is unsynchronized with respect to check_idle_worker, check_busy_worker and __btrfs_start_workers. E.g., check_idle_worker race flow: btrfs_stop_workers(): check_idle_worker(aworker): - grabs the lock - splices the idle list into the working list - removes the first worker from the working list - releases the lock to wait for its kthread's completion - grabs the lock - if aworker is on the working list, moves aworker from the working list to the idle list - releases the lock - grabs the lock - puts the worker - removes the second worker from the working list ...... btrfs_stop_workers returns, aworker is on the idle list FS is umounted, memory is freed ...... aworker is waken up, fireworks ensue With this applied, I wasn't able to trigger the problem in 48 hours, whereas previously I could reliably reproduce at least one of these races within an hour. Reported-by: David Sterba <dsterba@suse.cz> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
-rw-r--r--fs/btrfs/async-thread.c25
-rw-r--r--fs/btrfs/async-thread.h2
2 files changed, 21 insertions, 6 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 58b7d14b08ee..08cc08f037a6 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
107 worker->idle = 1; 107 worker->idle = 1;
108 108
109 /* the list may be empty if the worker is just starting */ 109 /* the list may be empty if the worker is just starting */
110 if (!list_empty(&worker->worker_list)) { 110 if (!list_empty(&worker->worker_list) &&
111 !worker->workers->stopping) {
111 list_move(&worker->worker_list, 112 list_move(&worker->worker_list,
112 &worker->workers->idle_list); 113 &worker->workers->idle_list);
113 } 114 }
@@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
127 spin_lock_irqsave(&worker->workers->lock, flags); 128 spin_lock_irqsave(&worker->workers->lock, flags);
128 worker->idle = 0; 129 worker->idle = 0;
129 130
130 if (!list_empty(&worker->worker_list)) { 131 if (!list_empty(&worker->worker_list) &&
132 !worker->workers->stopping) {
131 list_move_tail(&worker->worker_list, 133 list_move_tail(&worker->worker_list,
132 &worker->workers->worker_list); 134 &worker->workers->worker_list);
133 } 135 }
@@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers)
412 int can_stop; 414 int can_stop;
413 415
414 spin_lock_irq(&workers->lock); 416 spin_lock_irq(&workers->lock);
417 workers->stopping = 1;
415 list_splice_init(&workers->idle_list, &workers->worker_list); 418 list_splice_init(&workers->idle_list, &workers->worker_list);
416 while (!list_empty(&workers->worker_list)) { 419 while (!list_empty(&workers->worker_list)) {
417 cur = workers->worker_list.next; 420 cur = workers->worker_list.next;
@@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
455 workers->ordered = 0; 458 workers->ordered = 0;
456 workers->atomic_start_pending = 0; 459 workers->atomic_start_pending = 0;
457 workers->atomic_worker_start = async_helper; 460 workers->atomic_worker_start = async_helper;
461 workers->stopping = 0;
458} 462}
459 463
460/* 464/*
@@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
480 atomic_set(&worker->num_pending, 0); 484 atomic_set(&worker->num_pending, 0);
481 atomic_set(&worker->refs, 1); 485 atomic_set(&worker->refs, 1);
482 worker->workers = workers; 486 worker->workers = workers;
483 worker->task = kthread_run(worker_loop, worker, 487 worker->task = kthread_create(worker_loop, worker,
484 "btrfs-%s-%d", workers->name, 488 "btrfs-%s-%d", workers->name,
485 workers->num_workers + 1); 489 workers->num_workers + 1);
486 if (IS_ERR(worker->task)) { 490 if (IS_ERR(worker->task)) {
487 ret = PTR_ERR(worker->task); 491 ret = PTR_ERR(worker->task);
488 kfree(worker);
489 goto fail; 492 goto fail;
490 } 493 }
494
491 spin_lock_irq(&workers->lock); 495 spin_lock_irq(&workers->lock);
496 if (workers->stopping) {
497 spin_unlock_irq(&workers->lock);
498 goto fail_kthread;
499 }
492 list_add_tail(&worker->worker_list, &workers->idle_list); 500 list_add_tail(&worker->worker_list, &workers->idle_list);
493 worker->idle = 1; 501 worker->idle = 1;
494 workers->num_workers++; 502 workers->num_workers++;
@@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
496 WARN_ON(workers->num_workers_starting < 0); 504 WARN_ON(workers->num_workers_starting < 0);
497 spin_unlock_irq(&workers->lock); 505 spin_unlock_irq(&workers->lock);
498 506
507 wake_up_process(worker->task);
499 return 0; 508 return 0;
509
510fail_kthread:
511 kthread_stop(worker->task);
500fail: 512fail:
513 kfree(worker);
501 spin_lock_irq(&workers->lock); 514 spin_lock_irq(&workers->lock);
502 workers->num_workers_starting--; 515 workers->num_workers_starting--;
503 spin_unlock_irq(&workers->lock); 516 spin_unlock_irq(&workers->lock);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 063698b90ce2..1f26792683ed 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -107,6 +107,8 @@ struct btrfs_workers {
107 107
108 /* extra name for this worker, used for current->name */ 108 /* extra name for this worker, used for current->name */
109 char *name; 109 char *name;
110
111 int stopping;
110}; 112};
111 113
112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 114void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);