aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-03 05:56:18 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:38 -0400
commite9c7469bb4f502dafc092166201bea1ad5fc0fbf (patch)
tree04202b0bb88623d3005c909eaafcb280778902da /drivers/md/raid5.c
parent7bc9fddab074d6bb630344e1969e28d20b140621 (diff)
md: implment REQ_FLUSH/FUA support
This patch converts md to support REQ_FLUSH/FUA instead of now deprecated REQ_HARDBARRIER. In the core part (md.c), the following changes are notable. * Unlike REQ_HARDBARRIER, REQ_FLUSH/FUA don't interfere with processing of other requests and thus there is no reason to mark the queue congested while FLUSH/FUA is in progress. * REQ_FLUSH/FUA failures are final and its users don't need retry logic. Retry logic is removed. * Preflush needs to be issued to all member devices but FUA writes can be handled the same way as other writes - their processing can be deferred to request_queue of member devices. md_barrier_request() is renamed to md_flush_request() and simplified accordingly. For linear, raid0 and multipath, the core changes are enough. raid1, 5 and 10 need the following conversions. * raid1: Handling of FLUSH/FUA bio's can simply be deferred to request_queues of member devices. Barrier related logic removed. * raid5: Queue draining logic dropped. FUA bit is propagated through biodrain and stripe resconstruction such that all the updated parts of the stripe are written out with FUA writes if any of the dirtying writes was FUA. preread_active_stripes handling in make_request() is updated as suggested by Neil Brown. * raid10: FUA bit needs to be propagated to write clones. linear, raid0, 1, 5 and 10 tested. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Neil Brown <neilb@suse.de> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c43
1 files changed, 21 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 69b0a169e43d..31140d1259dc 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -506,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
506 int rw; 506 int rw;
507 struct bio *bi; 507 struct bio *bi;
508 mdk_rdev_t *rdev; 508 mdk_rdev_t *rdev;
509 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) 509 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
510 rw = WRITE; 510 if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
511 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 511 rw = WRITE_FUA;
512 else
513 rw = WRITE;
514 } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
512 rw = READ; 515 rw = READ;
513 else 516 else
514 continue; 517 continue;
@@ -1031,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
1031 1034
1032 while (wbi && wbi->bi_sector < 1035 while (wbi && wbi->bi_sector <
1033 dev->sector + STRIPE_SECTORS) { 1036 dev->sector + STRIPE_SECTORS) {
1037 if (wbi->bi_rw & REQ_FUA)
1038 set_bit(R5_WantFUA, &dev->flags);
1034 tx = async_copy_data(1, wbi, dev->page, 1039 tx = async_copy_data(1, wbi, dev->page,
1035 dev->sector, tx); 1040 dev->sector, tx);
1036 wbi = r5_next_bio(wbi, dev->sector); 1041 wbi = r5_next_bio(wbi, dev->sector);
@@ -1048,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
1048 int pd_idx = sh->pd_idx; 1053 int pd_idx = sh->pd_idx;
1049 int qd_idx = sh->qd_idx; 1054 int qd_idx = sh->qd_idx;
1050 int i; 1055 int i;
1056 bool fua = false;
1051 1057
1052 pr_debug("%s: stripe %llu\n", __func__, 1058 pr_debug("%s: stripe %llu\n", __func__,
1053 (unsigned long long)sh->sector); 1059 (unsigned long long)sh->sector);
1054 1060
1061 for (i = disks; i--; )
1062 fua |= test_bit(R5_WantFUA, &sh->dev[i].flags);
1063
1055 for (i = disks; i--; ) { 1064 for (i = disks; i--; ) {
1056 struct r5dev *dev = &sh->dev[i]; 1065 struct r5dev *dev = &sh->dev[i];
1057 1066
1058 if (dev->written || i == pd_idx || i == qd_idx) 1067 if (dev->written || i == pd_idx || i == qd_idx) {
1059 set_bit(R5_UPTODATE, &dev->flags); 1068 set_bit(R5_UPTODATE, &dev->flags);
1069 if (fua)
1070 set_bit(R5_WantFUA, &dev->flags);
1071 }
1060 } 1072 }
1061 1073
1062 if (sh->reconstruct_state == reconstruct_state_drain_run) 1074 if (sh->reconstruct_state == reconstruct_state_drain_run)
@@ -3281,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh)
3281 3293
3282 if (dec_preread_active) { 3294 if (dec_preread_active) {
3283 /* We delay this until after ops_run_io so that if make_request 3295 /* We delay this until after ops_run_io so that if make_request
3284 * is waiting on a barrier, it won't continue until the writes 3296 * is waiting on a flush, it won't continue until the writes
3285 * have actually been submitted. 3297 * have actually been submitted.
3286 */ 3298 */
3287 atomic_dec(&conf->preread_active_stripes); 3299 atomic_dec(&conf->preread_active_stripes);
@@ -3583,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh)
3583 3595
3584 if (dec_preread_active) { 3596 if (dec_preread_active) {
3585 /* We delay this until after ops_run_io so that if make_request 3597 /* We delay this until after ops_run_io so that if make_request
3586 * is waiting on a barrier, it won't continue until the writes 3598 * is waiting on a flush, it won't continue until the writes
3587 * have actually been submitted. 3599 * have actually been submitted.
3588 */ 3600 */
3589 atomic_dec(&conf->preread_active_stripes); 3601 atomic_dec(&conf->preread_active_stripes);
@@ -3978,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi)
3978 const int rw = bio_data_dir(bi); 3990 const int rw = bio_data_dir(bi);
3979 int remaining; 3991 int remaining;
3980 3992
3981 if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) { 3993 if (unlikely(bi->bi_rw & REQ_FLUSH)) {
3982 /* Drain all pending writes. We only really need 3994 md_flush_request(mddev, bi);
3983 * to ensure they have been submitted, but this is
3984 * easier.
3985 */
3986 mddev->pers->quiesce(mddev, 1);
3987 mddev->pers->quiesce(mddev, 0);
3988 md_barrier_request(mddev, bi);
3989 return 0; 3995 return 0;
3990 } 3996 }
3991 3997
@@ -4103,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
4103 finish_wait(&conf->wait_for_overlap, &w); 4109 finish_wait(&conf->wait_for_overlap, &w);
4104 set_bit(STRIPE_HANDLE, &sh->state); 4110 set_bit(STRIPE_HANDLE, &sh->state);
4105 clear_bit(STRIPE_DELAYED, &sh->state); 4111 clear_bit(STRIPE_DELAYED, &sh->state);
4106 if (mddev->barrier && 4112 if ((bi->bi_rw & REQ_SYNC) &&
4107 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 4113 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4108 atomic_inc(&conf->preread_active_stripes); 4114 atomic_inc(&conf->preread_active_stripes);
4109 release_stripe(sh); 4115 release_stripe(sh);
@@ -4126,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi)
4126 bio_endio(bi, 0); 4132 bio_endio(bi, 0);
4127 } 4133 }
4128 4134
4129 if (mddev->barrier) {
4130 /* We need to wait for the stripes to all be handled.
4131 * So: wait for preread_active_stripes to drop to 0.
4132 */
4133 wait_event(mddev->thread->wqueue,
4134 atomic_read(&conf->preread_active_stripes) == 0);
4135 }
4136 return 0; 4135 return 0;
4137} 4136}
4138 4137