aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-06-11 21:01:22 -0400
committerNeilBrown <neilb@suse.de>2013-06-12 23:40:48 -0400
commite2d59925221cd562e07fee38ec8839f7209ae603 (patch)
tree35ac67de5be7d4a96c2c3f9665f9d7d68b52e8dd /drivers/md/raid10.c
parent3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 (diff)
md/raid1,raid10: use freeze_array in place of raise_barrier in various places.
Various places in raid1 and raid10 are calling raise_barrier when they really should call freeze_array. The former is only intended to be called from "make_request". The later has extra checks for 'nr_queued' and makes a call to flush_pending_writes(), so it is safe to call it from within the management thread. Using raise_barrier will sometimes deadlock. Using freeze_array should not. As 'freeze_array' currently expects one request to be pending (in handle_read_error - the only previous caller), we need to pass it the number of pending requests (extra) to ignore. The deadlock was made particularly noticeable by commits 050b66152f87c7 (raid10) and 6b740b8d79252f13 (raid1) which appeared in 3.4, so the fix is appropriate for any -stable kernel since then. This patch probably won't apply directly to some early kernels and will need to be applied by hand. Cc: stable@vger.kernel.org Reported-by: Alexander Lyakas <alex.bolshoy@gmail.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c14
1 files changed, 7 insertions, 7 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8000ee25650d..aa9ed304951e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1065,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)
1065 wake_up(&conf->wait_barrier); 1065 wake_up(&conf->wait_barrier);
1066} 1066}
1067 1067
1068static void freeze_array(struct r10conf *conf) 1068static void freeze_array(struct r10conf *conf, int extra)
1069{ 1069{
1070 /* stop syncio and normal IO and wait for everything to 1070 /* stop syncio and normal IO and wait for everything to
1071 * go quiet. 1071 * go quiet.
1072 * We increment barrier and nr_waiting, and then 1072 * We increment barrier and nr_waiting, and then
1073 * wait until nr_pending match nr_queued+1 1073 * wait until nr_pending match nr_queued+extra
1074 * This is called in the context of one normal IO request 1074 * This is called in the context of one normal IO request
1075 * that has failed. Thus any sync request that might be pending 1075 * that has failed. Thus any sync request that might be pending
1076 * will be blocked by nr_pending, and we need to wait for 1076 * will be blocked by nr_pending, and we need to wait for
1077 * pending IO requests to complete or be queued for re-try. 1077 * pending IO requests to complete or be queued for re-try.
1078 * Thus the number queued (nr_queued) plus this request (1) 1078 * Thus the number queued (nr_queued) plus this request (extra)
1079 * must match the number of pending IOs (nr_pending) before 1079 * must match the number of pending IOs (nr_pending) before
1080 * we continue. 1080 * we continue.
1081 */ 1081 */
@@ -1083,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)
1083 conf->barrier++; 1083 conf->barrier++;
1084 conf->nr_waiting++; 1084 conf->nr_waiting++;
1085 wait_event_lock_irq_cmd(conf->wait_barrier, 1085 wait_event_lock_irq_cmd(conf->wait_barrier,
1086 conf->nr_pending == conf->nr_queued+1, 1086 conf->nr_pending == conf->nr_queued+extra,
1087 conf->resync_lock, 1087 conf->resync_lock,
1088 flush_pending_writes(conf)); 1088 flush_pending_writes(conf));
1089 1089
@@ -1849,8 +1849,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1849 * we wait for all outstanding requests to complete. 1849 * we wait for all outstanding requests to complete.
1850 */ 1850 */
1851 synchronize_sched(); 1851 synchronize_sched();
1852 raise_barrier(conf, 0); 1852 freeze_array(conf, 0);
1853 lower_barrier(conf); 1853 unfreeze_array(conf);
1854 clear_bit(Unmerged, &rdev->flags); 1854 clear_bit(Unmerged, &rdev->flags);
1855 } 1855 }
1856 md_integrity_add_rdev(rdev, mddev); 1856 md_integrity_add_rdev(rdev, mddev);
@@ -2646,7 +2646,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2646 r10_bio->devs[slot].bio = NULL; 2646 r10_bio->devs[slot].bio = NULL;
2647 2647
2648 if (mddev->ro == 0) { 2648 if (mddev->ro == 0) {
2649 freeze_array(conf); 2649 freeze_array(conf, 1);
2650 fix_read_error(conf, mddev, r10_bio); 2650 fix_read_error(conf, mddev, r10_bio);
2651 unfreeze_array(conf); 2651 unfreeze_array(conf);
2652 } else 2652 } else