aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-12-13 20:49:50 -0500
committerNeilBrown <neilb@suse.de>2009-12-13 20:51:40 -0500
commit729a18663a30a9c8076e3adc2b3e4c866974f935 (patch)
tree14f8e15ffa09d6a583fccdbe2ddf84c7cb8c2246
parenta2826aa92e2e14db372eda01d333267258944033 (diff)
md/raid5: don't complete make_request on barrier until writes are scheduled
The post-barrier-flush is sent by md as soon as make_request on the barrier write completes. For raid5, the data might not be in the per-device queues yet. So for barrier requests, wait for any pre-reading to be done so that the request will be in the per-device queues. We use the 'preread_active' count to check that nothing is still in the preread phase, and delay the decrement of this count until after write requests have been submitted to the underlying devices. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c51
1 files changed, 39 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8c9395f2028f..c78d4835b0d6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2947,6 +2947,7 @@ static void handle_stripe5(struct stripe_head *sh)
2947 struct r5dev *dev; 2947 struct r5dev *dev;
2948 mdk_rdev_t *blocked_rdev = NULL; 2948 mdk_rdev_t *blocked_rdev = NULL;
2949 int prexor; 2949 int prexor;
2950 int dec_preread_active = 0;
2950 2951
2951 memset(&s, 0, sizeof(s)); 2952 memset(&s, 0, sizeof(s));
2952 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " 2953 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
@@ -3096,12 +3097,8 @@ static void handle_stripe5(struct stripe_head *sh)
3096 set_bit(STRIPE_INSYNC, &sh->state); 3097 set_bit(STRIPE_INSYNC, &sh->state);
3097 } 3098 }
3098 } 3099 }
3099 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 3100 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
3100 atomic_dec(&conf->preread_active_stripes); 3101 dec_preread_active = 1;
3101 if (atomic_read(&conf->preread_active_stripes) <
3102 IO_THRESHOLD)
3103 md_wakeup_thread(conf->mddev->thread);
3104 }
3105 } 3102 }
3106 3103
3107 /* Now to consider new write requests and what else, if anything 3104 /* Now to consider new write requests and what else, if anything
@@ -3208,6 +3205,16 @@ static void handle_stripe5(struct stripe_head *sh)
3208 3205
3209 ops_run_io(sh, &s); 3206 ops_run_io(sh, &s);
3210 3207
3208 if (dec_preread_active) {
3209 /* We delay this until after ops_run_io so that if make_request
3210 * is waiting on a barrier, it won't continue until the writes
3211 * have actually been submitted.
3212 */
3213 atomic_dec(&conf->preread_active_stripes);
3214 if (atomic_read(&conf->preread_active_stripes) <
3215 IO_THRESHOLD)
3216 md_wakeup_thread(conf->mddev->thread);
3217 }
3211 return_io(return_bi); 3218 return_io(return_bi);
3212} 3219}
3213 3220
@@ -3221,6 +3228,7 @@ static void handle_stripe6(struct stripe_head *sh)
3221 struct r6_state r6s; 3228 struct r6_state r6s;
3222 struct r5dev *dev, *pdev, *qdev; 3229 struct r5dev *dev, *pdev, *qdev;
3223 mdk_rdev_t *blocked_rdev = NULL; 3230 mdk_rdev_t *blocked_rdev = NULL;
3231 int dec_preread_active = 0;
3224 3232
3225 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3233 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
3226 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", 3234 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
@@ -3379,12 +3387,8 @@ static void handle_stripe6(struct stripe_head *sh)
3379 set_bit(STRIPE_INSYNC, &sh->state); 3387 set_bit(STRIPE_INSYNC, &sh->state);
3380 } 3388 }
3381 } 3389 }
3382 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 3390 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
3383 atomic_dec(&conf->preread_active_stripes); 3391 dec_preread_active = 1;
3384 if (atomic_read(&conf->preread_active_stripes) <
3385 IO_THRESHOLD)
3386 md_wakeup_thread(conf->mddev->thread);
3387 }
3388 } 3392 }
3389 3393
3390 /* Now to consider new write requests and what else, if anything 3394 /* Now to consider new write requests and what else, if anything
@@ -3493,6 +3497,18 @@ static void handle_stripe6(struct stripe_head *sh)
3493 3497
3494 ops_run_io(sh, &s); 3498 ops_run_io(sh, &s);
3495 3499
3500
3501 if (dec_preread_active) {
3502 /* We delay this until after ops_run_io so that if make_request
3503 * is waiting on a barrier, it won't continue until the writes
3504 * have actually been submitted.
3505 */
3506 atomic_dec(&conf->preread_active_stripes);
3507 if (atomic_read(&conf->preread_active_stripes) <
3508 IO_THRESHOLD)
3509 md_wakeup_thread(conf->mddev->thread);
3510 }
3511
3496 return_io(return_bi); 3512 return_io(return_bi);
3497} 3513}
3498 3514
@@ -3995,6 +4011,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
3995 finish_wait(&conf->wait_for_overlap, &w); 4011 finish_wait(&conf->wait_for_overlap, &w);
3996 set_bit(STRIPE_HANDLE, &sh->state); 4012 set_bit(STRIPE_HANDLE, &sh->state);
3997 clear_bit(STRIPE_DELAYED, &sh->state); 4013 clear_bit(STRIPE_DELAYED, &sh->state);
4014 if (mddev->barrier &&
4015 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4016 atomic_inc(&conf->preread_active_stripes);
3998 release_stripe(sh); 4017 release_stripe(sh);
3999 } else { 4018 } else {
4000 /* cannot get stripe for read-ahead, just give-up */ 4019 /* cannot get stripe for read-ahead, just give-up */
@@ -4014,6 +4033,14 @@ static int make_request(struct request_queue *q, struct bio * bi)
4014 4033
4015 bio_endio(bi, 0); 4034 bio_endio(bi, 0);
4016 } 4035 }
4036
4037 if (mddev->barrier) {
4038 /* We need to wait for the stripes to all be handled.
4039 * So: wait for preread_active_stripes to drop to 0.
4040 */
4041 wait_event(mddev->thread->wqueue,
4042 atomic_read(&conf->preread_active_stripes) == 0);
4043 }
4017 return 0; 4044 return 0;
4018} 4045}
4019 4046