aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@cse.unsw.edu.au>2005-06-21 20:17:26 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 22:07:46 -0400
commit3d310eb7b3df1252e8595d059d982b0a9825a137 (patch)
tree9bca5e7eaa437d60010c1745b9aeb9592439d482
parent41158c7eb22312cfaa256744e1553bb4042ff085 (diff)
[PATCH] md: fix deadlock due to md thread processing delayed requests.
Before completing a 'write' the md superblock might need to be updated. This is best done by the md_thread. The current code schedules this up and queues the write request for later handling by the md_thread. However some personalities (Raid5/raid6) will deadlock if the md_thread tries to submit requests to its own array. So this patch changes things so the processes submitting the request waits for the superblock to be written and then submits the request itself. This fixes a recently-created deadlock in raid5/raid6 Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/md.c45
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c3
-rw-r--r--drivers/md/raid5.c3
-rw-r--r--drivers/md/raid6main.c3
-rw-r--r--include/linux/raid/md.h2
-rw-r--r--include/linux/raid/md_k.h2
7 files changed, 23 insertions, 39 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 789b114f860a..7075bebb7f37 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -224,8 +224,8 @@ static mddev_t * mddev_find(dev_t unit)
224 INIT_LIST_HEAD(&new->all_mddevs); 224 INIT_LIST_HEAD(&new->all_mddevs);
225 init_timer(&new->safemode_timer); 225 init_timer(&new->safemode_timer);
226 atomic_set(&new->active, 1); 226 atomic_set(&new->active, 1);
227 bio_list_init(&new->write_list);
228 spin_lock_init(&new->write_lock); 227 spin_lock_init(&new->write_lock);
228 init_waitqueue_head(&new->sb_wait);
229 229
230 new->queue = blk_alloc_queue(GFP_KERNEL); 230 new->queue = blk_alloc_queue(GFP_KERNEL);
231 if (!new->queue) { 231 if (!new->queue) {
@@ -1307,6 +1307,7 @@ repeat:
1307 if (!mddev->persistent) { 1307 if (!mddev->persistent) {
1308 mddev->sb_dirty = 0; 1308 mddev->sb_dirty = 0;
1309 spin_unlock(&mddev->write_lock); 1309 spin_unlock(&mddev->write_lock);
1310 wake_up(&mddev->sb_wait);
1310 return; 1311 return;
1311 } 1312 }
1312 spin_unlock(&mddev->write_lock); 1313 spin_unlock(&mddev->write_lock);
@@ -1348,6 +1349,7 @@ repeat:
1348 } 1349 }
1349 mddev->sb_dirty = 0; 1350 mddev->sb_dirty = 0;
1350 spin_unlock(&mddev->write_lock); 1351 spin_unlock(&mddev->write_lock);
1352 wake_up(&mddev->sb_wait);
1351 1353
1352} 1354}
1353 1355
@@ -3368,29 +3370,26 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
3368 3370
3369/* md_write_start(mddev, bi) 3371/* md_write_start(mddev, bi)
3370 * If we need to update some array metadata (e.g. 'active' flag 3372 * If we need to update some array metadata (e.g. 'active' flag
3371 * in superblock) before writing, queue bi for later writing 3373 * in superblock) before writing, schedule a superblock update
3372 * and return 0, else return 1 and it will be written now 3374 * and wait for it to complete.
3373 */ 3375 */
3374int md_write_start(mddev_t *mddev, struct bio *bi) 3376void md_write_start(mddev_t *mddev, struct bio *bi)
3375{ 3377{
3378 DEFINE_WAIT(w);
3376 if (bio_data_dir(bi) != WRITE) 3379 if (bio_data_dir(bi) != WRITE)
3377 return 1; 3380 return;
3378 3381
3379 atomic_inc(&mddev->writes_pending); 3382 atomic_inc(&mddev->writes_pending);
3380 spin_lock(&mddev->write_lock);
3381 if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
3382 spin_unlock(&mddev->write_lock);
3383 return 1;
3384 }
3385 bio_list_add(&mddev->write_list, bi);
3386
3387 if (mddev->in_sync) { 3383 if (mddev->in_sync) {
3388 mddev->in_sync = 0; 3384 spin_lock(&mddev->write_lock);
3389 mddev->sb_dirty = 1; 3385 if (mddev->in_sync) {
3386 mddev->in_sync = 0;
3387 mddev->sb_dirty = 1;
3388 md_wakeup_thread(mddev->thread);
3389 }
3390 spin_unlock(&mddev->write_lock);
3390 } 3391 }
3391 spin_unlock(&mddev->write_lock); 3392 wait_event(mddev->sb_wait, mddev->sb_dirty==0);
3392 md_wakeup_thread(mddev->thread);
3393 return 0;
3394} 3393}
3395 3394
3396void md_write_end(mddev_t *mddev) 3395void md_write_end(mddev_t *mddev)
@@ -3685,7 +3684,6 @@ void md_check_recovery(mddev_t *mddev)
3685 mddev->sb_dirty || 3684 mddev->sb_dirty ||
3686 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 3685 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
3687 test_bit(MD_RECOVERY_DONE, &mddev->recovery) || 3686 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
3688 mddev->write_list.head ||
3689 (mddev->safemode == 1) || 3687 (mddev->safemode == 1) ||
3690 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) 3688 (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
3691 && !mddev->in_sync && mddev->recovery_cp == MaxSector) 3689 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
@@ -3694,7 +3692,6 @@ void md_check_recovery(mddev_t *mddev)
3694 3692
3695 if (mddev_trylock(mddev)==0) { 3693 if (mddev_trylock(mddev)==0) {
3696 int spares =0; 3694 int spares =0;
3697 struct bio *blist;
3698 3695
3699 spin_lock(&mddev->write_lock); 3696 spin_lock(&mddev->write_lock);
3700 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 3697 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
@@ -3704,21 +3701,11 @@ void md_check_recovery(mddev_t *mddev)
3704 } 3701 }
3705 if (mddev->safemode == 1) 3702 if (mddev->safemode == 1)
3706 mddev->safemode = 0; 3703 mddev->safemode = 0;
3707 blist = bio_list_get(&mddev->write_list);
3708 spin_unlock(&mddev->write_lock); 3704 spin_unlock(&mddev->write_lock);
3709 3705
3710 if (mddev->sb_dirty) 3706 if (mddev->sb_dirty)
3711 md_update_sb(mddev); 3707 md_update_sb(mddev);
3712 3708
3713 while (blist) {
3714 struct bio *b = blist;
3715 blist = blist->bi_next;
3716 b->bi_next = NULL;
3717 generic_make_request(b);
3718 /* we already counted this, so need to un-count */
3719 md_write_end(mddev);
3720 }
3721
3722 3709
3723 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && 3710 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
3724 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { 3711 !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3f5234fe3593..98b09773e79e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -561,8 +561,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
561 * thread has put up a bar for new requests. 561 * thread has put up a bar for new requests.
562 * Continue immediately if no resync is active currently. 562 * Continue immediately if no resync is active currently.
563 */ 563 */
564 if (md_write_start(mddev, bio)==0) 564 md_write_start(mddev, bio); /* wait on superblock update early */
565 return 0; 565
566 spin_lock_irq(&conf->resync_lock); 566 spin_lock_irq(&conf->resync_lock);
567 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); 567 wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
568 conf->nr_pending++; 568 conf->nr_pending++;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8476515bfdc7..fd7324a86d13 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -700,8 +700,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
700 return 0; 700 return 0;
701 } 701 }
702 702
703 if (md_write_start(mddev, bio) == 0) 703 md_write_start(mddev, bio);
704 return 0;
705 704
706 /* 705 /*
707 * Register the new request and wait if the reconstruction 706 * Register the new request and wait if the reconstruction
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1ce3f5aaa984..93a9726cc2d6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1411,8 +1411,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
1411 sector_t logical_sector, last_sector; 1411 sector_t logical_sector, last_sector;
1412 struct stripe_head *sh; 1412 struct stripe_head *sh;
1413 1413
1414 if (md_write_start(mddev, bi)==0) 1414 md_write_start(mddev, bi);
1415 return 0;
1416 1415
1417 if (bio_data_dir(bi)==WRITE) { 1416 if (bio_data_dir(bi)==WRITE) {
1418 disk_stat_inc(mddev->gendisk, writes); 1417 disk_stat_inc(mddev->gendisk, writes);
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index d9c385496dc5..f62ea1a73d0d 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1570,8 +1570,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
1570 sector_t logical_sector, last_sector; 1570 sector_t logical_sector, last_sector;
1571 struct stripe_head *sh; 1571 struct stripe_head *sh;
1572 1572
1573 if (md_write_start(mddev, bi)==0) 1573 md_write_start(mddev, bi);
1574 return 0;
1575 1574
1576 if (bio_data_dir(bi)==WRITE) { 1575 if (bio_data_dir(bi)==WRITE) {
1577 disk_stat_inc(mddev->gendisk, writes); 1576 disk_stat_inc(mddev->gendisk, writes);
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index cfde8f497d6d..75f41d8faed2 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
69extern void md_unregister_thread (mdk_thread_t *thread); 69extern void md_unregister_thread (mdk_thread_t *thread);
70extern void md_wakeup_thread(mdk_thread_t *thread); 70extern void md_wakeup_thread(mdk_thread_t *thread);
71extern void md_check_recovery(mddev_t *mddev); 71extern void md_check_recovery(mddev_t *mddev);
72extern int md_write_start(mddev_t *mddev, struct bio *bi); 72extern void md_write_start(mddev_t *mddev, struct bio *bi);
73extern void md_write_end(mddev_t *mddev); 73extern void md_write_end(mddev_t *mddev);
74extern void md_handle_safemode(mddev_t *mddev); 74extern void md_handle_safemode(mddev_t *mddev);
75extern void md_done_sync(mddev_t *mddev, int blocks, int ok); 75extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 6cdcb4434c6c..3e977025cf43 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -261,7 +261,7 @@ struct mddev_s
261 sector_t recovery_cp; 261 sector_t recovery_cp;
262 262
263 spinlock_t write_lock; 263 spinlock_t write_lock;
264 struct bio_list write_list; 264 wait_queue_head_t sb_wait; /* for waiting on superblock updates */
265 265
266 unsigned int safemode; /* if set, update "clean" superblock 266 unsigned int safemode; /* if set, update "clean" superblock
267 * when no writes pending. 267 * when no writes pending.