aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-03-11 21:18:06 -0400
committerNeilBrown <neilb@suse.de>2013-03-19 22:20:59 -0400
commitf8dfcffd0472a0f353f34a567ad3f53568914d04 (patch)
tree8e19d10fcd778cace960c49336f1323858dcb4d5 /drivers/md/raid5.c
parent90584fc93d461520a888f691144f0879283b3624 (diff)
md/raid5: ensure sync and DISCARD don't happen at the same time.
A number of problems can occur due to races between resync/recovery and discard. - if sync_request calls handle_stripe() while a discard is happening on the stripe, it might call handle_stripe_clean_event before all of the individual discard requests have completed (so some devices are still locked, but not all). Since commit ca64cae96037de16e4af92678814f5d4bf0c1c65 md/raid5: Make sure we clear R5_Discard when discard is finished. this will cause R5_Discard to be cleared for the parity device, so handle_stripe_clean_event() will not be called when the other devices do become unlocked, so their ->written will not be cleared. This ultimately leads to a WARN_ON in init_stripe and a lock-up. - If handle_stripe_clean_event() does clear R5_UPTODATE at an awkward time for resync, it can lead to s->uptodate being less than disks in handle_parity_checks5(), which triggers a BUG (because it is one). So: - keep R5_Discard on the parity device until all other devices have completed their discard request - make sure we don't try to have a 'discard' and a 'sync' action at the same time. This involves a new stripe flag to we know when a 'discard' is happening, and the use of R5_Overlap on the parity disk so when a discard is wanted while a sync is active, so we know to wake up the discard at the appropriate time. Discard support for RAID5 was added in 3.7, so this is suitable for any -stable kernel since 3.7. Cc: stable@vger.kernel.org (v3.7+) Reported-by: Jes Sorensen <Jes.Sorensen@redhat.com> Tested-by: Jes Sorensen <Jes.Sorensen@redhat.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c45
1 files changed, 39 insertions, 6 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 52ba88a10668..42a899728748 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2576,6 +2576,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
2576 int i; 2576 int i;
2577 2577
2578 clear_bit(STRIPE_SYNCING, &sh->state); 2578 clear_bit(STRIPE_SYNCING, &sh->state);
2579 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
2580 wake_up(&conf->wait_for_overlap);
2579 s->syncing = 0; 2581 s->syncing = 0;
2580 s->replacing = 0; 2582 s->replacing = 0;
2581 /* There is nothing more to do for sync/check/repair. 2583 /* There is nothing more to do for sync/check/repair.
@@ -2749,6 +2751,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2749{ 2751{
2750 int i; 2752 int i;
2751 struct r5dev *dev; 2753 struct r5dev *dev;
2754 int discard_pending = 0;
2752 2755
2753 for (i = disks; i--; ) 2756 for (i = disks; i--; )
2754 if (sh->dev[i].written) { 2757 if (sh->dev[i].written) {
@@ -2777,9 +2780,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
2777 STRIPE_SECTORS, 2780 STRIPE_SECTORS,
2778 !test_bit(STRIPE_DEGRADED, &sh->state), 2781 !test_bit(STRIPE_DEGRADED, &sh->state),
2779 0); 2782 0);
2780 } 2783 } else if (test_bit(R5_Discard, &dev->flags))
2781 } else if (test_bit(R5_Discard, &sh->dev[i].flags)) 2784 discard_pending = 1;
2782 clear_bit(R5_Discard, &sh->dev[i].flags); 2785 }
2786 if (!discard_pending &&
2787 test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
2788 clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
2789 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
2790 if (sh->qd_idx >= 0) {
2791 clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
2792 clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
2793 }
2794 /* now that discard is done we can proceed with any sync */
2795 clear_bit(STRIPE_DISCARD, &sh->state);
2796 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
2797 set_bit(STRIPE_HANDLE, &sh->state);
2798
2799 }
2783 2800
2784 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) 2801 if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
2785 if (atomic_dec_and_test(&conf->pending_full_writes)) 2802 if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3431,9 +3448,15 @@ static void handle_stripe(struct stripe_head *sh)
3431 return; 3448 return;
3432 } 3449 }
3433 3450
3434 if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 3451 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3435 set_bit(STRIPE_SYNCING, &sh->state); 3452 spin_lock(&sh->stripe_lock);
3436 clear_bit(STRIPE_INSYNC, &sh->state); 3453 /* Cannot process 'sync' concurrently with 'discard' */
3454 if (!test_bit(STRIPE_DISCARD, &sh->state) &&
3455 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
3456 set_bit(STRIPE_SYNCING, &sh->state);
3457 clear_bit(STRIPE_INSYNC, &sh->state);
3458 }
3459 spin_unlock(&sh->stripe_lock);
3437 } 3460 }
3438 clear_bit(STRIPE_DELAYED, &sh->state); 3461 clear_bit(STRIPE_DELAYED, &sh->state);
3439 3462
@@ -3593,6 +3616,8 @@ static void handle_stripe(struct stripe_head *sh)
3593 test_bit(STRIPE_INSYNC, &sh->state)) { 3616 test_bit(STRIPE_INSYNC, &sh->state)) {
3594 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 3617 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
3595 clear_bit(STRIPE_SYNCING, &sh->state); 3618 clear_bit(STRIPE_SYNCING, &sh->state);
3619 if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
3620 wake_up(&conf->wait_for_overlap);
3596 } 3621 }
3597 3622
3598 /* If the failed drives are just a ReadError, then we might need 3623 /* If the failed drives are just a ReadError, then we might need
@@ -4159,6 +4184,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4159 sh = get_active_stripe(conf, logical_sector, 0, 0, 0); 4184 sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
4160 prepare_to_wait(&conf->wait_for_overlap, &w, 4185 prepare_to_wait(&conf->wait_for_overlap, &w,
4161 TASK_UNINTERRUPTIBLE); 4186 TASK_UNINTERRUPTIBLE);
4187 set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4188 if (test_bit(STRIPE_SYNCING, &sh->state)) {
4189 release_stripe(sh);
4190 schedule();
4191 goto again;
4192 }
4193 clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
4162 spin_lock_irq(&sh->stripe_lock); 4194 spin_lock_irq(&sh->stripe_lock);
4163 for (d = 0; d < conf->raid_disks; d++) { 4195 for (d = 0; d < conf->raid_disks; d++) {
4164 if (d == sh->pd_idx || d == sh->qd_idx) 4196 if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4171,6 +4203,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
4171 goto again; 4203 goto again;
4172 } 4204 }
4173 } 4205 }
4206 set_bit(STRIPE_DISCARD, &sh->state);
4174 finish_wait(&conf->wait_for_overlap, &w); 4207 finish_wait(&conf->wait_for_overlap, &w);
4175 for (d = 0; d < conf->raid_disks; d++) { 4208 for (d = 0; d < conf->raid_disks; d++) {
4176 if (d == sh->pd_idx || d == sh->qd_idx) 4209 if (d == sh->pd_idx || d == sh->qd_idx)