aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-02-06 04:40:00 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:41:19 -0500
commit6ed3003c19a96fe18edf8179c4be6fe14abbebbc (patch)
treedba37b52eb4c68fedc9e842a69e2c0c3b4a5aa31 /drivers/md/raid5.c
parent73c34431c7119d0bc7d3436abfad75fe47b2c51f (diff)
md: fix an occasional deadlock in raid5
raid5's 'make_request' function calls generic_make_request on underlying devices and if we run out of stripe heads, it could end up waiting for one of those requests to complete. This is bad as recursive calls to generic_make_request go on a queue and are not even attempted until make_request completes. So: don't make any generic_make_request calls in raid5 make_request until all waiting has been done. We do this by simply setting STRIPE_HANDLE instead of calling handle_stripe(). If we need more stripe_heads, raid5d will get called to process the pending stripe_heads which will call generic_make_request from a This change by itself causes a performance hit. So add a change so that raid5_activate_delayed is only called at unplug time, never in raid5. This seems to bring back the performance numbers. Calling it in raid5d was sometimes too soon... Neil said: How about we queue it for 2.6.25-rc1 and then about when -rc2 comes out, we queue it for 2.6.24.y? Acked-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Neil Brown <neilb@suse.de> Tested-by: dean gaudet <dean@arctic.org> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c14
1 files changed, 5 insertions, 9 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 63bfb0757829..2d6f1a51359c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3159,7 +3159,8 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
3159 atomic_inc(&conf->preread_active_stripes); 3159 atomic_inc(&conf->preread_active_stripes);
3160 list_add_tail(&sh->lru, &conf->handle_list); 3160 list_add_tail(&sh->lru, &conf->handle_list);
3161 } 3161 }
3162 } 3162 } else
3163 blk_plug_device(conf->mddev->queue);
3163} 3164}
3164 3165
3165static void activate_bit_delay(raid5_conf_t *conf) 3166static void activate_bit_delay(raid5_conf_t *conf)
@@ -3549,7 +3550,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
3549 goto retry; 3550 goto retry;
3550 } 3551 }
3551 finish_wait(&conf->wait_for_overlap, &w); 3552 finish_wait(&conf->wait_for_overlap, &w);
3552 handle_stripe(sh, NULL); 3553 set_bit(STRIPE_HANDLE, &sh->state);
3554 clear_bit(STRIPE_DELAYED, &sh->state);
3553 release_stripe(sh); 3555 release_stripe(sh);
3554 } else { 3556 } else {
3555 /* cannot get stripe for read-ahead, just give-up */ 3557 /* cannot get stripe for read-ahead, just give-up */
@@ -3892,7 +3894,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3892 * During the scan, completed stripes are saved for us by the interrupt 3894 * During the scan, completed stripes are saved for us by the interrupt
3893 * handler, so that they will not have to wait for our next wakeup. 3895 * handler, so that they will not have to wait for our next wakeup.
3894 */ 3896 */
3895static void raid5d (mddev_t *mddev) 3897static void raid5d(mddev_t *mddev)
3896{ 3898{
3897 struct stripe_head *sh; 3899 struct stripe_head *sh;
3898 raid5_conf_t *conf = mddev_to_conf(mddev); 3900 raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -3917,12 +3919,6 @@ static void raid5d (mddev_t *mddev)
3917 activate_bit_delay(conf); 3919 activate_bit_delay(conf);
3918 } 3920 }
3919 3921
3920 if (list_empty(&conf->handle_list) &&
3921 atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
3922 !blk_queue_plugged(mddev->queue) &&
3923 !list_empty(&conf->delayed_list))
3924 raid5_activate_delayed(conf);
3925
3926 while ((bio = remove_bio_from_retry(conf))) { 3922 while ((bio = remove_bio_from_retry(conf))) {
3927 int ok; 3923 int ok;
3928 spin_unlock_irq(&conf->device_lock); 3924 spin_unlock_irq(&conf->device_lock);