aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-06-09 00:39:59 -0400
committerNeilBrown <neilb@suse.de>2009-06-09 00:39:59 -0400
commita8c906ca3f63d05f0d25490cf82276f73c6fe095 (patch)
tree993249ef5865a2ec81ed21e8934ed9c0eb9f33dd /drivers
parentf001a70cdc61c01452d42e8b32fd7c7842ef62d5 (diff)
md/raid5 - avoid deadlocks in get_active_stripe during reshape
md has functionality to 'quiesce' and array so that all pending IO completed and no new IO starts. This is used to achieve a stable state before making internal changes. Currently this quiescing applies equally to normal IO, resync IO, and reshape IO. However there is a problem with applying it to reshape IO. Reshape can have multiple 'stripe_heads' that must be active together. If the quiesce come between allocating the first and the last of such a collection, then we deadlock, as the last will not be allocated until the quiesce is lifted, the quiesce will not be lifted until the first (which has been allocated) gets used, and that first cannot be used until the last is allocated. It is not necessary to inhibit reshape IO when a quiesce is requested. Those places in the code that require a full quiesce will ensure the reshape thread is not running at all. So allow reshape requests to get access to new stripe_heads without being blocked by a 'quiesce'. This only affects in-place reshapes (i.e. where the array does not grow or shrink) and these are only newly supported. So this patch is not needed in earlier kernels. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid5.c22
1 files changed, 11 insertions, 11 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 75469e63ff10..59f2ec0759b4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q);
362 362
363static struct stripe_head * 363static struct stripe_head *
364get_active_stripe(raid5_conf_t *conf, sector_t sector, 364get_active_stripe(raid5_conf_t *conf, sector_t sector,
365 int previous, int noblock) 365 int previous, int noblock, int noquiesce)
366{ 366{
367 struct stripe_head *sh; 367 struct stripe_head *sh;
368 368
@@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
372 372
373 do { 373 do {
374 wait_event_lock_irq(conf->wait_for_stripe, 374 wait_event_lock_irq(conf->wait_for_stripe,
375 conf->quiesce == 0, 375 conf->quiesce == 0 || noquiesce,
376 conf->device_lock, /* nothing */); 376 conf->device_lock, /* nothing */);
377 sh = __find_stripe(conf, sector, conf->generation - previous); 377 sh = __find_stripe(conf, sector, conf->generation - previous);
378 if (!sh) { 378 if (!sh) {
@@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2671 sector_t bn = compute_blocknr(sh, i, 1); 2671 sector_t bn = compute_blocknr(sh, i, 1);
2672 sector_t s = raid5_compute_sector(conf, bn, 0, 2672 sector_t s = raid5_compute_sector(conf, bn, 0,
2673 &dd_idx, NULL); 2673 &dd_idx, NULL);
2674 sh2 = get_active_stripe(conf, s, 0, 1); 2674 sh2 = get_active_stripe(conf, s, 0, 1, 1);
2675 if (sh2 == NULL) 2675 if (sh2 == NULL)
2676 /* so far only the early blocks of this stripe 2676 /* so far only the early blocks of this stripe
2677 * have been requested. When later blocks 2677 * have been requested. When later blocks
@@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh)
2944 /* Finish reconstruct operations initiated by the expansion process */ 2944 /* Finish reconstruct operations initiated by the expansion process */
2945 if (sh->reconstruct_state == reconstruct_state_result) { 2945 if (sh->reconstruct_state == reconstruct_state_result) {
2946 struct stripe_head *sh2 2946 struct stripe_head *sh2
2947 = get_active_stripe(conf, sh->sector, 1, 1); 2947 = get_active_stripe(conf, sh->sector, 1, 1, 1);
2948 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 2948 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
2949 /* sh cannot be written until sh2 has been read. 2949 /* sh cannot be written until sh2 has been read.
2950 * so arrange for sh to be delayed a little 2950 * so arrange for sh to be delayed a little
@@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
3189 3189
3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3190 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
3191 struct stripe_head *sh2 3191 struct stripe_head *sh2
3192 = get_active_stripe(conf, sh->sector, 1, 1); 3192 = get_active_stripe(conf, sh->sector, 1, 1, 1);
3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3193 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
3194 /* sh cannot be written until sh2 has been read. 3194 /* sh cannot be written until sh2 has been read.
3195 * so arrange for sh to be delayed a little 3195 * so arrange for sh to be delayed a little
@@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3675 (unsigned long long)logical_sector); 3675 (unsigned long long)logical_sector);
3676 3676
3677 sh = get_active_stripe(conf, new_sector, previous, 3677 sh = get_active_stripe(conf, new_sector, previous,
3678 (bi->bi_rw&RWA_MASK)); 3678 (bi->bi_rw&RWA_MASK), 0);
3679 if (sh) { 3679 if (sh) {
3680 if (unlikely(previous)) { 3680 if (unlikely(previous)) {
3681 /* expansion might have moved on while waiting for a 3681 /* expansion might have moved on while waiting for a
@@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3873 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { 3873 for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
3874 int j; 3874 int j;
3875 int skipped = 0; 3875 int skipped = 0;
3876 sh = get_active_stripe(conf, stripe_addr+i, 0, 0); 3876 sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
3877 set_bit(STRIPE_EXPANDING, &sh->state); 3877 set_bit(STRIPE_EXPANDING, &sh->state);
3878 atomic_inc(&conf->reshape_stripes); 3878 atomic_inc(&conf->reshape_stripes);
3879 /* If any of this stripe is beyond the end of the old 3879 /* If any of this stripe is beyond the end of the old
@@ -3922,7 +3922,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3922 if (last_sector >= mddev->dev_sectors) 3922 if (last_sector >= mddev->dev_sectors)
3923 last_sector = mddev->dev_sectors - 1; 3923 last_sector = mddev->dev_sectors - 1;
3924 while (first_sector <= last_sector) { 3924 while (first_sector <= last_sector) {
3925 sh = get_active_stripe(conf, first_sector, 1, 0); 3925 sh = get_active_stripe(conf, first_sector, 1, 0, 1);
3926 set_bit(STRIPE_EXPAND_SOURCE, &sh->state); 3926 set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
3927 set_bit(STRIPE_HANDLE, &sh->state); 3927 set_bit(STRIPE_HANDLE, &sh->state);
3928 release_stripe(sh); 3928 release_stripe(sh);
@@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4022 4022
4023 bitmap_cond_end_sync(mddev->bitmap, sector_nr); 4023 bitmap_cond_end_sync(mddev->bitmap, sector_nr);
4024 4024
4025 sh = get_active_stripe(conf, sector_nr, 0, 1); 4025 sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
4026 if (sh == NULL) { 4026 if (sh == NULL) {
4027 sh = get_active_stripe(conf, sector_nr, 0, 0); 4027 sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
4028 /* make sure we don't swamp the stripe cache if someone else 4028 /* make sure we don't swamp the stripe cache if someone else
4029 * is trying to get access 4029 * is trying to get access
4030 */ 4030 */
@@ -4086,7 +4086,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
4086 /* already done this stripe */ 4086 /* already done this stripe */
4087 continue; 4087 continue;
4088 4088
4089 sh = get_active_stripe(conf, sector, 0, 1); 4089 sh = get_active_stripe(conf, sector, 0, 1, 0);
4090 4090
4091 if (!sh) { 4091 if (!sh) {
4092 /* failed to get a stripe - must wait */ 4092 /* failed to get a stripe - must wait */