From 4ac6875eeb97a49bad7bc8d56b5ec935904fc6e7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Nov 2012 13:11:26 +1100 Subject: md/raid5: round discard alignment up to power of 2. blkdev_issue_discard currently assumes that the granularity is a power of 2. So in raid5, round the chosen number up to avoid embarrassment. Cc: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/raid5.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index c5439dce0295..baea94f0670a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5529,6 +5529,10 @@ static int run(struct mddev *mddev) * discard data disk but write parity disk */ stripe = stripe * PAGE_SIZE; + /* Round up to power of 2, as discard handling + * currently assumes that */ + while ((stripe-1) & stripe) + stripe = (stripe | (stripe-1)) + 1; mddev->queue->limits.discard_alignment = stripe; mddev->queue->limits.discard_granularity = stripe; /* -- cgit v1.2.2 From ef5b7c69b7a1b8b8744a6168b6ff02900f81b6ca Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 22 Nov 2012 09:13:36 +1100 Subject: md/raid5: move resolving of reconstruct_state earlier in stripe_handle. The chunk of code in stripe_handle which responds to a *_result value in reconstruct_state is really the completion of some processing that happened outside of handle_stripe (possibly asynchronously) and so should be one of the first things done in handle_stripe(). After the next patch it will be important that it happens before handle_stripe_clean_event(), as that will clear some dev->flags bit that this code tests. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 68 +++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 34 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index baea94f0670a..0fb988556eea 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3490,40 +3490,6 @@ static void handle_stripe(struct stripe_head *sh) handle_failed_sync(conf, sh, &s); } - /* - * might be able to return some write requests if the parity blocks - * are safe, or on a failed drive - */ - pdev = &sh->dev[sh->pd_idx]; - s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) - || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); - qdev = &sh->dev[sh->qd_idx]; - s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) - || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) - || conf->level < 6; - - if (s.written && - (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) - && !test_bit(R5_LOCKED, &pdev->flags) - && (test_bit(R5_UPTODATE, &pdev->flags) || - test_bit(R5_Discard, &pdev->flags))))) && - (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) - && !test_bit(R5_LOCKED, &qdev->flags) - && (test_bit(R5_UPTODATE, &qdev->flags) || - test_bit(R5_Discard, &qdev->flags)))))) - handle_stripe_clean_event(conf, sh, disks, &s.return_bi); - - /* Now we might consider reading some blocks, either to check/generate - * parity, or to satisfy requests - * or to load a block that is being partially written. - */ - if (s.to_read || s.non_overwrite - || (conf->level == 6 && s.to_write && s.failed) - || (s.syncing && (s.uptodate + s.compute < disks)) - || s.replacing - || s.expanding) - handle_stripe_fill(sh, &s, disks); - /* Now we check to see if any write operations have recently * completed */ @@ -3561,6 +3527,40 @@ static void handle_stripe(struct stripe_head *sh) s.dec_preread_active = 1; } + /* + * might be able to return some write requests if the parity blocks + * are safe, or on a failed drive + */ + pdev = &sh->dev[sh->pd_idx]; + s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) + || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); + qdev = &sh->dev[sh->qd_idx]; + s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) + || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) + || conf->level < 6; + + if (s.written && + (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) + && !test_bit(R5_LOCKED, &pdev->flags) + && (test_bit(R5_UPTODATE, &pdev->flags) || + test_bit(R5_Discard, &pdev->flags))))) && + (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) + && !test_bit(R5_LOCKED, &qdev->flags) + && (test_bit(R5_UPTODATE, &qdev->flags) || + test_bit(R5_Discard, &qdev->flags)))))) + handle_stripe_clean_event(conf, sh, disks, &s.return_bi); + + /* Now we might consider reading some blocks, either to check/generate + * parity, or to satisfy requests + * or to load a block that is being partially written. + */ + if (s.to_read || s.non_overwrite + || (conf->level == 6 && s.to_write && s.failed) + || (s.syncing && (s.uptodate + s.compute < disks)) + || s.replacing + || s.expanding) + handle_stripe_fill(sh, &s, disks); + /* Now to consider new write requests and what else, if anything * should be read. We do not handle new writes when: * 1/ A 'write' operation (copy+xor) is already in flight. -- cgit v1.2.2 From ca64cae96037de16e4af92678814f5d4bf0c1c65 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 21 Nov 2012 16:33:40 +1100 Subject: md/raid5: Make sure we clear R5_Discard when discard is finished. commit 9e44476851e91c86c98eb92b9bc27fb801f89072 MD: raid5 avoid unnecessary zero page for trim change raid5 to clear R5_Discard when the complete request is handled rather than when submitting the per-device discard request. However it did not clear R5_Discard for the parity device. This means that if the stripe_head was reused before it expired from the cache, the setting would be wrong and a hang would result. Also if the R5_Uptodate bit happens to be set, R5_Discard again won't be cleared. But R5_Uptodate really should be clear at this point. So make sure R5_Discard is cleared in all cases, and clear R5_Uptodate when a 'discard' completes. Signed-off-by: NeilBrown --- drivers/md/raid5.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0fb988556eea..a4502686e7a8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2774,10 +2774,12 @@ static void handle_stripe_clean_event(struct r5conf *conf, dev = &sh->dev[i]; if (!test_bit(R5_LOCKED, &dev->flags) && (test_bit(R5_UPTODATE, &dev->flags) || - test_and_clear_bit(R5_Discard, &dev->flags))) { + test_bit(R5_Discard, &dev->flags))) { /* We can return any write requests */ struct bio *wbi, *wbi2; pr_debug("Return write for disc %d\n", i); + if (test_and_clear_bit(R5_Discard, &dev->flags)) + clear_bit(R5_UPTODATE, &dev->flags); wbi = dev->written; dev->written = NULL; while (wbi && wbi->bi_sector < @@ -2795,7 +2797,8 @@ static void handle_stripe_clean_event(struct r5conf *conf, !test_bit(STRIPE_DEGRADED, &sh->state), 0); } - } + } else if (test_bit(R5_Discard, &sh->dev[i].flags)) + clear_bit(R5_Discard, &sh->dev[i].flags); if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) if (atomic_dec_and_test(&conf->pending_full_writes)) -- cgit v1.2.2