diff options
author | NeilBrown <neilb@suse.de> | 2014-05-27 23:39:22 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2014-05-29 02:59:46 -0400 |
commit | 67f455486d2ea20b2d94d6adf5b9b783d079e321 (patch) | |
tree | 320f360d77b56f03c754b29fa617d61d25d40ec4 | |
parent | bd8839e03b8e70fbbe08bbff70d8cba95273c823 (diff) |
md/raid56: Don't perform reads to support writes until stripe is ready.
If it is found that we need to pre-read some blocks before a write
can succeed, we normally set STRIPE_DELAYED and don't actually perform
the read until STRIPE_PREREAD_ACTIVE subsequently gets set.
However for a degraded RAID6 we currently perform the reads as soon
as we see that a write is pending. This significantly hurts
throughput.
So:
- when handle_stripe_dirtying find a block that it wants on a device
that is failed, set STRIPE_DELAY, instead of doing nothing, and
- when fetch_block detects that a read might be required to satisfy a
write, only perform the read if STRIPE_PREREAD_ACTIVE is set,
and if we would actually need to read something to complete the write.
This also helps RAID5, though less often as RAID5 supports a
read-modify-write cycle. For RAID5 the read is performed too early
only if the write is not a full 4K aligned write (i.e. no an
R5_OVERWRITE).
Also clean up a couple of horrible bits of formatting.
Reported-by: Patrik HornĂk <patrik@dsl.sk>
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 30 |
1 files changed, 18 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ad1b9bea446e..c1e8607d8340 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh, | |||
292 | BUG_ON(atomic_read(&conf->active_stripes)==0); | 292 | BUG_ON(atomic_read(&conf->active_stripes)==0); |
293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 293 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
294 | if (test_bit(STRIPE_DELAYED, &sh->state) && | 294 | if (test_bit(STRIPE_DELAYED, &sh->state) && |
295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) | 295 | !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
296 | list_add_tail(&sh->lru, &conf->delayed_list); | 296 | list_add_tail(&sh->lru, &conf->delayed_list); |
297 | else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 297 | if (atomic_read(&conf->preread_active_stripes) |
298 | < IO_THRESHOLD) | ||
299 | md_wakeup_thread(conf->mddev->thread); | ||
300 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | ||
298 | sh->bm_seq - conf->seq_write > 0) | 301 | sh->bm_seq - conf->seq_write > 0) |
299 | list_add_tail(&sh->lru, &conf->bitmap_list); | 302 | list_add_tail(&sh->lru, &conf->bitmap_list); |
300 | else { | 303 | else { |
@@ -2886,8 +2889,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, | |||
2886 | (s->failed >= 1 && fdev[0]->toread) || | 2889 | (s->failed >= 1 && fdev[0]->toread) || |
2887 | (s->failed >= 2 && fdev[1]->toread) || | 2890 | (s->failed >= 2 && fdev[1]->toread) || |
2888 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && | 2891 | (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && |
2892 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && | ||
2889 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || | 2893 | !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || |
2890 | (sh->raid_conf->level == 6 && s->failed && s->to_write))) { | 2894 | (sh->raid_conf->level == 6 && s->failed && s->to_write && |
2895 | s->to_write < sh->raid_conf->raid_disks - 2 && | ||
2896 | (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { | ||
2891 | /* we would like to get this block, possibly by computing it, | 2897 | /* we would like to get this block, possibly by computing it, |
2892 | * otherwise read it if the backing disk is insync | 2898 | * otherwise read it if the backing disk is insync |
2893 | */ | 2899 | */ |
@@ -3086,7 +3092,8 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3086 | !test_bit(R5_LOCKED, &dev->flags) && | 3092 | !test_bit(R5_LOCKED, &dev->flags) && |
3087 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3093 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3088 | test_bit(R5_Wantcompute, &dev->flags))) { | 3094 | test_bit(R5_Wantcompute, &dev->flags))) { |
3089 | if (test_bit(R5_Insync, &dev->flags)) rcw++; | 3095 | if (test_bit(R5_Insync, &dev->flags)) |
3096 | rcw++; | ||
3090 | else | 3097 | else |
3091 | rcw += 2*disks; | 3098 | rcw += 2*disks; |
3092 | } | 3099 | } |
@@ -3107,10 +3114,10 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3107 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3114 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3108 | test_bit(R5_Wantcompute, &dev->flags)) && | 3115 | test_bit(R5_Wantcompute, &dev->flags)) && |
3109 | test_bit(R5_Insync, &dev->flags)) { | 3116 | test_bit(R5_Insync, &dev->flags)) { |
3110 | if ( | 3117 | if (test_bit(STRIPE_PREREAD_ACTIVE, |
3111 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 3118 | &sh->state)) { |
3112 | pr_debug("Read_old block " | 3119 | pr_debug("Read_old block %d for r-m-w\n", |
3113 | "%d for r-m-w\n", i); | 3120 | i); |
3114 | set_bit(R5_LOCKED, &dev->flags); | 3121 | set_bit(R5_LOCKED, &dev->flags); |
3115 | set_bit(R5_Wantread, &dev->flags); | 3122 | set_bit(R5_Wantread, &dev->flags); |
3116 | s->locked++; | 3123 | s->locked++; |
@@ -3133,10 +3140,9 @@ static void handle_stripe_dirtying(struct r5conf *conf, | |||
3133 | !(test_bit(R5_UPTODATE, &dev->flags) || | 3140 | !(test_bit(R5_UPTODATE, &dev->flags) || |
3134 | test_bit(R5_Wantcompute, &dev->flags))) { | 3141 | test_bit(R5_Wantcompute, &dev->flags))) { |
3135 | rcw++; | 3142 | rcw++; |
3136 | if (!test_bit(R5_Insync, &dev->flags)) | 3143 | if (test_bit(R5_Insync, &dev->flags) && |
3137 | continue; /* it's a failed drive */ | 3144 | test_bit(STRIPE_PREREAD_ACTIVE, |
3138 | if ( | 3145 | &sh->state)) { |
3139 | test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
3140 | pr_debug("Read_old block " | 3146 | pr_debug("Read_old block " |
3141 | "%d for Reconstruct\n", i); | 3147 | "%d for Reconstruct\n", i); |
3142 | set_bit(R5_LOCKED, &dev->flags); | 3148 | set_bit(R5_LOCKED, &dev->flags); |