aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2009-04-08 19:27:17 -0400
committerAlasdair G Kergon <agk@redhat.com>2009-04-08 19:27:17 -0400
commit340cd44451fb0bfa542365e6b4b565bbd44836e2 (patch)
treebd5e2d1436c91a3365d427552fc5a497234c6b50
parent73830857bca6f6c9dbd48e906daea50bea42d676 (diff)
dm kcopyd: fix callback race
If the thread calling dm_kcopyd_copy is delayed due to scheduling inside split_job/segment_complete and the subjobs complete before the loop in split_job completes, the kcopyd callback could be invoked from the thread that called dm_kcopyd_copy instead of the kcopyd workqueue. dm_kcopyd_copy -> split_job -> segment_complete -> job->fn() Snapshots depend on the fact that callbacks are called from the singlethreaded kcopyd workqueue and expect that there is no racing between individual callbacks. The racing between callbacks can lead to corruption of exception store and it can also mean that exception store callbacks are called twice for the same exception - a likely reason for crashes reported inside pending_complete() / remove_exception(). This patch fixes two problems: 1. job->fn being called from the thread that submitted the job (see above). - Fix: hand over the completion callback to the kcopyd thread. 2. job->fn(read_err, write_err, job->context); in segment_complete reports the error of the last subjob, not the union of all errors. - Fix: pass job->write_err to the callback to report all error bits (it is done already in run_complete_job) Cc: stable@kernel.org Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
-rw-r--r--drivers/md/dm-kcopyd.c17
1 files changed, 11 insertions, 6 deletions
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 9d379070918b..3e3fc06cb861 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -511,13 +511,16 @@ static void segment_complete(int read_err, unsigned long write_err,
511 } else if (atomic_dec_and_test(&job->sub_jobs)) { 511 } else if (atomic_dec_and_test(&job->sub_jobs)) {
512 512
513 /* 513 /*
514 * To avoid a race we must keep the job around 514 * Queue the completion callback to the kcopyd thread.
515 * until after the notify function has completed. 515 *
516 * Otherwise the client may try and stop the job 516 * Some callers assume that all the completions are called
517 * after we've completed. 517 * from a single thread and don't race with each other.
518 *
519 * We must not call the callback directly here because this
520 * code may not be executing in the thread.
518 */ 521 */
519 job->fn(read_err, write_err, job->context); 522 push(&kc->complete_jobs, job);
520 mempool_free(job, job->kc->job_pool); 523 wake(kc);
521 } 524 }
522} 525}
523 526
@@ -530,6 +533,8 @@ static void split_job(struct kcopyd_job *job)
530{ 533{
531 int i; 534 int i;
532 535
536 atomic_inc(&job->kc->nr_jobs);
537
533 atomic_set(&job->sub_jobs, SPLIT_COUNT); 538 atomic_set(&job->sub_jobs, SPLIT_COUNT);
534 for (i = 0; i < SPLIT_COUNT; i++) 539 for (i = 0; i < SPLIT_COUNT; i++)
535 segment_complete(0, 0u, job); 540 segment_complete(0, 0u, job);