diff options
author | NeilBrown <neilb@cse.unsw.edu.au> | 2005-06-21 20:17:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 22:07:46 -0400 |
commit | 3d310eb7b3df1252e8595d059d982b0a9825a137 (patch) | |
tree | 9bca5e7eaa437d60010c1745b9aeb9592439d482 /drivers/md/md.c | |
parent | 41158c7eb22312cfaa256744e1553bb4042ff085 (diff) |
[PATCH] md: fix deadlock due to md thread processing delayed requests.
Before completing a 'write' the md superblock might need to be updated.
This is best done by the md_thread.
The current code schedules this up and queues the write request for later
handling by the md_thread.
However some personalities (Raid5/raid6) will deadlock if the md_thread
tries to submit requests to its own array.
So this patch changes things so the processes submitting the request waits
for the superblock to be written and then submits the request itself.
This fixes a recently-created deadlock in raid5/raid6
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 45 |
1 files changed, 16 insertions, 29 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 789b114f860a..7075bebb7f37 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -224,8 +224,8 @@ static mddev_t * mddev_find(dev_t unit) | |||
224 | INIT_LIST_HEAD(&new->all_mddevs); | 224 | INIT_LIST_HEAD(&new->all_mddevs); |
225 | init_timer(&new->safemode_timer); | 225 | init_timer(&new->safemode_timer); |
226 | atomic_set(&new->active, 1); | 226 | atomic_set(&new->active, 1); |
227 | bio_list_init(&new->write_list); | ||
228 | spin_lock_init(&new->write_lock); | 227 | spin_lock_init(&new->write_lock); |
228 | init_waitqueue_head(&new->sb_wait); | ||
229 | 229 | ||
230 | new->queue = blk_alloc_queue(GFP_KERNEL); | 230 | new->queue = blk_alloc_queue(GFP_KERNEL); |
231 | if (!new->queue) { | 231 | if (!new->queue) { |
@@ -1307,6 +1307,7 @@ repeat: | |||
1307 | if (!mddev->persistent) { | 1307 | if (!mddev->persistent) { |
1308 | mddev->sb_dirty = 0; | 1308 | mddev->sb_dirty = 0; |
1309 | spin_unlock(&mddev->write_lock); | 1309 | spin_unlock(&mddev->write_lock); |
1310 | wake_up(&mddev->sb_wait); | ||
1310 | return; | 1311 | return; |
1311 | } | 1312 | } |
1312 | spin_unlock(&mddev->write_lock); | 1313 | spin_unlock(&mddev->write_lock); |
@@ -1348,6 +1349,7 @@ repeat: | |||
1348 | } | 1349 | } |
1349 | mddev->sb_dirty = 0; | 1350 | mddev->sb_dirty = 0; |
1350 | spin_unlock(&mddev->write_lock); | 1351 | spin_unlock(&mddev->write_lock); |
1352 | wake_up(&mddev->sb_wait); | ||
1351 | 1353 | ||
1352 | } | 1354 | } |
1353 | 1355 | ||
@@ -3368,29 +3370,26 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) | |||
3368 | 3370 | ||
3369 | /* md_write_start(mddev, bi) | 3371 | /* md_write_start(mddev, bi) |
3370 | * If we need to update some array metadata (e.g. 'active' flag | 3372 | * If we need to update some array metadata (e.g. 'active' flag |
3371 | * in superblock) before writing, queue bi for later writing | 3373 | * in superblock) before writing, schedule a superblock update |
3372 | * and return 0, else return 1 and it will be written now | 3374 | * and wait for it to complete. |
3373 | */ | 3375 | */ |
3374 | int md_write_start(mddev_t *mddev, struct bio *bi) | 3376 | void md_write_start(mddev_t *mddev, struct bio *bi) |
3375 | { | 3377 | { |
3378 | DEFINE_WAIT(w); | ||
3376 | if (bio_data_dir(bi) != WRITE) | 3379 | if (bio_data_dir(bi) != WRITE) |
3377 | return 1; | 3380 | return; |
3378 | 3381 | ||
3379 | atomic_inc(&mddev->writes_pending); | 3382 | atomic_inc(&mddev->writes_pending); |
3380 | spin_lock(&mddev->write_lock); | ||
3381 | if (mddev->in_sync == 0 && mddev->sb_dirty == 0) { | ||
3382 | spin_unlock(&mddev->write_lock); | ||
3383 | return 1; | ||
3384 | } | ||
3385 | bio_list_add(&mddev->write_list, bi); | ||
3386 | |||
3387 | if (mddev->in_sync) { | 3383 | if (mddev->in_sync) { |
3388 | mddev->in_sync = 0; | 3384 | spin_lock(&mddev->write_lock); |
3389 | mddev->sb_dirty = 1; | 3385 | if (mddev->in_sync) { |
3386 | mddev->in_sync = 0; | ||
3387 | mddev->sb_dirty = 1; | ||
3388 | md_wakeup_thread(mddev->thread); | ||
3389 | } | ||
3390 | spin_unlock(&mddev->write_lock); | ||
3390 | } | 3391 | } |
3391 | spin_unlock(&mddev->write_lock); | 3392 | wait_event(mddev->sb_wait, mddev->sb_dirty==0); |
3392 | md_wakeup_thread(mddev->thread); | ||
3393 | return 0; | ||
3394 | } | 3393 | } |
3395 | 3394 | ||
3396 | void md_write_end(mddev_t *mddev) | 3395 | void md_write_end(mddev_t *mddev) |
@@ -3685,7 +3684,6 @@ void md_check_recovery(mddev_t *mddev) | |||
3685 | mddev->sb_dirty || | 3684 | mddev->sb_dirty || |
3686 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 3685 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
3687 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || | 3686 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
3688 | mddev->write_list.head || | ||
3689 | (mddev->safemode == 1) || | 3687 | (mddev->safemode == 1) || |
3690 | (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) | 3688 | (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) |
3691 | && !mddev->in_sync && mddev->recovery_cp == MaxSector) | 3689 | && !mddev->in_sync && mddev->recovery_cp == MaxSector) |
@@ -3694,7 +3692,6 @@ void md_check_recovery(mddev_t *mddev) | |||
3694 | 3692 | ||
3695 | if (mddev_trylock(mddev)==0) { | 3693 | if (mddev_trylock(mddev)==0) { |
3696 | int spares =0; | 3694 | int spares =0; |
3697 | struct bio *blist; | ||
3698 | 3695 | ||
3699 | spin_lock(&mddev->write_lock); | 3696 | spin_lock(&mddev->write_lock); |
3700 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 3697 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
@@ -3704,21 +3701,11 @@ void md_check_recovery(mddev_t *mddev) | |||
3704 | } | 3701 | } |
3705 | if (mddev->safemode == 1) | 3702 | if (mddev->safemode == 1) |
3706 | mddev->safemode = 0; | 3703 | mddev->safemode = 0; |
3707 | blist = bio_list_get(&mddev->write_list); | ||
3708 | spin_unlock(&mddev->write_lock); | 3704 | spin_unlock(&mddev->write_lock); |
3709 | 3705 | ||
3710 | if (mddev->sb_dirty) | 3706 | if (mddev->sb_dirty) |
3711 | md_update_sb(mddev); | 3707 | md_update_sb(mddev); |
3712 | 3708 | ||
3713 | while (blist) { | ||
3714 | struct bio *b = blist; | ||
3715 | blist = blist->bi_next; | ||
3716 | b->bi_next = NULL; | ||
3717 | generic_make_request(b); | ||
3718 | /* we already counted this, so need to un-count */ | ||
3719 | md_write_end(mddev); | ||
3720 | } | ||
3721 | |||
3722 | 3709 | ||
3723 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && | 3710 | if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && |
3724 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { | 3711 | !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { |