aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-06-21 20:17:26 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 22:07:46 -0400
commit3d310eb7b3df1252e8595d059d982b0a9825a137 (patch)
tree9bca5e7eaa437d60010c1745b9aeb9592439d482 /drivers/md/md.c
parent41158c7eb22312cfaa256744e1553bb4042ff085 (diff)
[PATCH] md: fix deadlock due to md thread processing delayed requests.
Before completing a 'write' the md superblock might need to be updated. This is best done by the md_thread. The current code schedules this up and queues the write request for later handling by the md_thread. However some personalities (Raid5/raid6) will deadlock if the md_thread tries to submit requests to its own array. So this patch changes things so the processes submitting the request waits for the superblock to be written and then submits the request itself. This fixes a recently-created deadlock in raid5/raid6 Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c45
1 files changed, 16 insertions, 29 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 789b114f860a..7075bebb7f37 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -224,8 +224,8 @@ static mddev_t * mddev_find(dev_t unit)
224 INIT_LIST_HEAD(&new->all_mddevs); 224 INIT_LIST_HEAD(&new->all_mddevs);
225 init_timer(&new->safemode_timer); 225 init_timer(&new->safemode_timer);
226 atomic_set(&new->active, 1); 226 atomic_set(&new->active, 1);
227 bio_list_init(&new->write_list);
228 spin_lock_init(&new->write_lock); 227 spin_lock_init(&new->write_lock);
228 init_waitqueue_head(&new->sb_wait);
229 229
230 new->queue = blk_alloc_queue(GFP_KERNEL); 230 new->queue = blk_alloc_queue(GFP_KERNEL);
231 if (!new->queue) { 231 if (!new->queue) {
@@ -1307,6 +1307,7 @@ repeat:
1307 if (!mddev->persistent) { 1307 if (!mddev->persistent) {
1308 mddev->sb_dirty = 0; 1308 mddev->sb_dirty = 0;
1309 spin_unlock(&mddev->write_lock); 1309 spin_unlock(&mddev->write_lock);
1310 wake_up(&mddev->sb_wait);
1310 return; 1311 return;
1311 } 1312 }
1312 spin_unlock(&mddev->write_lock); 1313 spin_unlock(&mddev->write_lock);
@@ -1348,6 +1349,7 @@ repeat:
1348 } 1349 }
1349 mddev->sb_dirty = 0; 1350 mddev->sb_dirty = 0;
1350 spin_unlock(&mddev->write_lock); 1351 spin_unlock(&mddev->write_lock);
1352 wake_up(&mddev->sb_wait);
1351 1353
1352} 1354}
1353 1355
@@ -3368,29 +3370,26 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
3368 3370
3369/* md_write_start(mddev, bi) 3371/* md_write_start(mddev, bi)
3370 * If we need to update some array metadata (e.g. 'active' flag 3372 * If we need to update some array metadata (e.g. 'active' flag
3371 * in superblock) before writing, queue bi for later writing 3373 * in superblock) before writing, schedule a superblock update
3372 * and return 0, else return 1 and it will be written now 3374 * and wait for it to complete.
3373 */ 3375 */
3374int md_write_start(mddev_t *mddev, struct bio *bi) 3376void md_write_start(mddev_t *mddev, struct bio *bi)
3375{ 3377{
3378 DEFINE_WAIT(w);
3376 if (bio_data_dir(bi) != WRITE) 3379 if (bio_data_dir(bi) != WRITE)
3377 return 1; 3380 return;
3378 3381
3379 atomic_inc(&mddev->writes_pending); 3382 atomic_inc(&mddev->writes_pending);
3380 spin_lock(&mddev->write_lock);
3381 if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
3382 spin_unlock(&mddev->write_lock);
3383 return 1;
3384 }
3385 bio_list_add(&mddev->write_list, bi);
3386
3387 if (mddev->in_sync) { 3383 if (mddev->in_sync) {
3388 mddev->in_sync = 0; 3384 spin_lock(&mddev->write_lock);
3389 mddev->sb_dirty = 1; 3385 if (mddev->in_sync) {
3386 mddev->in_sync = 0;
3387 mddev->sb_dirty = 1;
3388 md_wakeup_thread(mddev->thread);
3389 }
3390 spin_unlock(&mddev->write_lock);
3390 } 3391 }
3391 spin_unlock(&mddev->write_lock); 3392 wait_event(mddev->sb_wait, mddev->sb_dirty==0);
3392 md_wakeup_thread(mddev->thread);
3393 return 0;
3394} 3393}
3395 3394
3396void md_write_end(mddev_t *mddev) 3395void md_write_end(mddev_t *mddev)
@@ -3685,7 +3684,6 @@ void md_check_recovery(mddev_t *mddev)
3685 mddev->sb_dirty || 3684 mddev->sb_dirty ||
3686 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 3685 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
3687 test_bit(MD_RECOVERY_DONE, &mddev->recovery) || 3686 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
3688 mddev->write_list.head ||
3689 (mddev->safemode == 1) || 3687 (mddev->safemode == 1) ||
3690 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) 3688 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
3691 && !mddev->in_sync && mddev->recovery_cp == MaxSector) 3689 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
@@ -3694,7 +3692,6 @@ void md_check_recovery(mddev_t *mddev)
3694 3692
3695 if (mddev_trylock(mddev)==0) { 3693 if (mddev_trylock(mddev)==0) {
3696 int spares =0; 3694 int spares =0;
3697 struct bio *blist;
3698 3695
3699 spin_lock(&mddev->write_lock); 3696 spin_lock(&mddev->write_lock);
3700 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 3697 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
@@ -3704,21 +3701,11 @@ void md_check_recovery(mddev_t *mddev)
3704 } 3701 }
3705 if (mddev->safemode == 1) 3702 if (mddev->safemode == 1)
3706 mddev->safemode = 0; 3703 mddev->safemode = 0;
3707 blist = bio_list_get(&mddev->write_list);
3708 spin_unlock(&mddev->write_lock); 3704 spin_unlock(&mddev->write_lock);
3709 3705
3710 if (mddev->sb_dirty) 3706 if (mddev->sb_dirty)
3711 md_update_sb(mddev); 3707 md_update_sb(mddev);
3712 3708
3713 while (blist) {
3714 struct bio *b = blist;
3715 blist = blist->bi_next;
3716 b->bi_next = NULL;
3717 generic_make_request(b);
3718 /* we already counted this, so need to un-count */
3719 md_write_end(mddev);
3720 }
3721
3722 3709
3723 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && 3710 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
3724 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { 3711 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {