diff options
author | NeilBrown <neilb@suse.de> | 2009-12-13 20:49:50 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2009-12-13 20:51:40 -0500 |
commit | 729a18663a30a9c8076e3adc2b3e4c866974f935 (patch) | |
tree | 14f8e15ffa09d6a583fccdbe2ddf84c7cb8c2246 | |
parent | a2826aa92e2e14db372eda01d333267258944033 (diff) |
md/raid5: don't complete make_request on barrier until writes are scheduled
The post-barrier-flush is sent by md as soon as make_request on the
barrier write completes. For raid5, the data might not be in the
per-device queues yet. So for barrier requests, wait for any
pre-reading to be done so that the request will be in the per-device
queues.
We use the 'preread_active' count to check that nothing is still in
the preread phase, and delay the decrement of this count until after
write requests have been submitted to the underlying devices.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 51 |
1 files changed, 39 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8c9395f2028f..c78d4835b0d6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -2947,6 +2947,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2947 | struct r5dev *dev; | 2947 | struct r5dev *dev; |
2948 | mdk_rdev_t *blocked_rdev = NULL; | 2948 | mdk_rdev_t *blocked_rdev = NULL; |
2949 | int prexor; | 2949 | int prexor; |
2950 | int dec_preread_active = 0; | ||
2950 | 2951 | ||
2951 | memset(&s, 0, sizeof(s)); | 2952 | memset(&s, 0, sizeof(s)); |
2952 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " | 2953 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " |
@@ -3096,12 +3097,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
3096 | set_bit(STRIPE_INSYNC, &sh->state); | 3097 | set_bit(STRIPE_INSYNC, &sh->state); |
3097 | } | 3098 | } |
3098 | } | 3099 | } |
3099 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3100 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3100 | atomic_dec(&conf->preread_active_stripes); | 3101 | dec_preread_active = 1; |
3101 | if (atomic_read(&conf->preread_active_stripes) < | ||
3102 | IO_THRESHOLD) | ||
3103 | md_wakeup_thread(conf->mddev->thread); | ||
3104 | } | ||
3105 | } | 3102 | } |
3106 | 3103 | ||
3107 | /* Now to consider new write requests and what else, if anything | 3104 | /* Now to consider new write requests and what else, if anything |
@@ -3208,6 +3205,16 @@ static void handle_stripe5(struct stripe_head *sh) | |||
3208 | 3205 | ||
3209 | ops_run_io(sh, &s); | 3206 | ops_run_io(sh, &s); |
3210 | 3207 | ||
3208 | if (dec_preread_active) { | ||
3209 | /* We delay this until after ops_run_io so that if make_request | ||
3210 | * is waiting on a barrier, it won't continue until the writes | ||
3211 | * have actually been submitted. | ||
3212 | */ | ||
3213 | atomic_dec(&conf->preread_active_stripes); | ||
3214 | if (atomic_read(&conf->preread_active_stripes) < | ||
3215 | IO_THRESHOLD) | ||
3216 | md_wakeup_thread(conf->mddev->thread); | ||
3217 | } | ||
3211 | return_io(return_bi); | 3218 | return_io(return_bi); |
3212 | } | 3219 | } |
3213 | 3220 | ||
@@ -3221,6 +3228,7 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3221 | struct r6_state r6s; | 3228 | struct r6_state r6s; |
3222 | struct r5dev *dev, *pdev, *qdev; | 3229 | struct r5dev *dev, *pdev, *qdev; |
3223 | mdk_rdev_t *blocked_rdev = NULL; | 3230 | mdk_rdev_t *blocked_rdev = NULL; |
3231 | int dec_preread_active = 0; | ||
3224 | 3232 | ||
3225 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 3233 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
3226 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", | 3234 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", |
@@ -3379,12 +3387,8 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3379 | set_bit(STRIPE_INSYNC, &sh->state); | 3387 | set_bit(STRIPE_INSYNC, &sh->state); |
3380 | } | 3388 | } |
3381 | } | 3389 | } |
3382 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3390 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) |
3383 | atomic_dec(&conf->preread_active_stripes); | 3391 | dec_preread_active = 1; |
3384 | if (atomic_read(&conf->preread_active_stripes) < | ||
3385 | IO_THRESHOLD) | ||
3386 | md_wakeup_thread(conf->mddev->thread); | ||
3387 | } | ||
3388 | } | 3392 | } |
3389 | 3393 | ||
3390 | /* Now to consider new write requests and what else, if anything | 3394 | /* Now to consider new write requests and what else, if anything |
@@ -3493,6 +3497,18 @@ static void handle_stripe6(struct stripe_head *sh) | |||
3493 | 3497 | ||
3494 | ops_run_io(sh, &s); | 3498 | ops_run_io(sh, &s); |
3495 | 3499 | ||
3500 | |||
3501 | if (dec_preread_active) { | ||
3502 | /* We delay this until after ops_run_io so that if make_request | ||
3503 | * is waiting on a barrier, it won't continue until the writes | ||
3504 | * have actually been submitted. | ||
3505 | */ | ||
3506 | atomic_dec(&conf->preread_active_stripes); | ||
3507 | if (atomic_read(&conf->preread_active_stripes) < | ||
3508 | IO_THRESHOLD) | ||
3509 | md_wakeup_thread(conf->mddev->thread); | ||
3510 | } | ||
3511 | |||
3496 | return_io(return_bi); | 3512 | return_io(return_bi); |
3497 | } | 3513 | } |
3498 | 3514 | ||
@@ -3995,6 +4011,9 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3995 | finish_wait(&conf->wait_for_overlap, &w); | 4011 | finish_wait(&conf->wait_for_overlap, &w); |
3996 | set_bit(STRIPE_HANDLE, &sh->state); | 4012 | set_bit(STRIPE_HANDLE, &sh->state); |
3997 | clear_bit(STRIPE_DELAYED, &sh->state); | 4013 | clear_bit(STRIPE_DELAYED, &sh->state); |
4014 | if (mddev->barrier && | ||
4015 | !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | ||
4016 | atomic_inc(&conf->preread_active_stripes); | ||
3998 | release_stripe(sh); | 4017 | release_stripe(sh); |
3999 | } else { | 4018 | } else { |
4000 | /* cannot get stripe for read-ahead, just give-up */ | 4019 | /* cannot get stripe for read-ahead, just give-up */ |
@@ -4014,6 +4033,14 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
4014 | 4033 | ||
4015 | bio_endio(bi, 0); | 4034 | bio_endio(bi, 0); |
4016 | } | 4035 | } |
4036 | |||
4037 | if (mddev->barrier) { | ||
4038 | /* We need to wait for the stripes to all be handled. | ||
4039 | * So: wait for preread_active_stripes to drop to 0. | ||
4040 | */ | ||
4041 | wait_event(mddev->thread->wqueue, | ||
4042 | atomic_read(&conf->preread_active_stripes) == 0); | ||
4043 | } | ||
4017 | return 0; | 4044 | return 0; |
4018 | } | 4045 | } |
4019 | 4046 | ||