aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-05-27 23:39:22 -0400
committerNeilBrown <neilb@suse.de>2014-05-29 02:59:46 -0400
commit67f455486d2ea20b2d94d6adf5b9b783d079e321 (patch)
tree320f360d77b56f03c754b29fa617d61d25d40ec4
parentbd8839e03b8e70fbbe08bbff70d8cba95273c823 (diff)
md/raid56: Don't perform reads to support writes until stripe is ready.
If it is found that we need to pre-read some blocks before a write can succeed, we normally set STRIPE_DELAYED and don't actually perform the read until STRIPE_PREREAD_ACTIVE subsequently gets set. However for a degraded RAID6 we currently perform the reads as soon as we see that a write is pending. This significantly hurts throughput. So: - when handle_stripe_dirtying find a block that it wants on a device that is failed, set STRIPE_DELAY, instead of doing nothing, and - when fetch_block detects that a read might be required to satisfy a write, only perform the read if STRIPE_PREREAD_ACTIVE is set, and if we would actually need to read something to complete the write. This also helps RAID5, though less often as RAID5 supports a read-modify-write cycle. For RAID5 the read is performed too early only if the write is not a full 4K aligned write (i.e. no an R5_OVERWRITE). Also clean up a couple of horrible bits of formatting. Reported-by: Patrik HornĂ­k <patrik@dsl.sk> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c30
1 files changed, 18 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ad1b9bea446e..c1e8607d8340 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
292 BUG_ON(atomic_read(&conf->active_stripes)==0); 292 BUG_ON(atomic_read(&conf->active_stripes)==0);
293 if (test_bit(STRIPE_HANDLE, &sh->state)) { 293 if (test_bit(STRIPE_HANDLE, &sh->state)) {
294 if (test_bit(STRIPE_DELAYED, &sh->state) && 294 if (test_bit(STRIPE_DELAYED, &sh->state) &&
295 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 295 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
296 list_add_tail(&sh->lru, &conf->delayed_list); 296 list_add_tail(&sh->lru, &conf->delayed_list);
297 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 297 if (atomic_read(&conf->preread_active_stripes)
298 < IO_THRESHOLD)
299 md_wakeup_thread(conf->mddev->thread);
300 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
298 sh->bm_seq - conf->seq_write > 0) 301 sh->bm_seq - conf->seq_write > 0)
299 list_add_tail(&sh->lru, &conf->bitmap_list); 302 list_add_tail(&sh->lru, &conf->bitmap_list);
300 else { 303 else {
@@ -2886,8 +2889,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
2886 (s->failed >= 1 && fdev[0]->toread) || 2889 (s->failed >= 1 && fdev[0]->toread) ||
2887 (s->failed >= 2 && fdev[1]->toread) || 2890 (s->failed >= 2 && fdev[1]->toread) ||
2888 (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && 2891 (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
2892 (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
2889 !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || 2893 !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
2890 (sh->raid_conf->level == 6 && s->failed && s->to_write))) { 2894 (sh->raid_conf->level == 6 && s->failed && s->to_write &&
2895 s->to_write < sh->raid_conf->raid_disks - 2 &&
2896 (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
2891 /* we would like to get this block, possibly by computing it, 2897 /* we would like to get this block, possibly by computing it,
2892 * otherwise read it if the backing disk is insync 2898 * otherwise read it if the backing disk is insync
2893 */ 2899 */
@@ -3086,7 +3092,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3086 !test_bit(R5_LOCKED, &dev->flags) && 3092 !test_bit(R5_LOCKED, &dev->flags) &&
3087 !(test_bit(R5_UPTODATE, &dev->flags) || 3093 !(test_bit(R5_UPTODATE, &dev->flags) ||
3088 test_bit(R5_Wantcompute, &dev->flags))) { 3094 test_bit(R5_Wantcompute, &dev->flags))) {
3089 if (test_bit(R5_Insync, &dev->flags)) rcw++; 3095 if (test_bit(R5_Insync, &dev->flags))
3096 rcw++;
3090 else 3097 else
3091 rcw += 2*disks; 3098 rcw += 2*disks;
3092 } 3099 }
@@ -3107,10 +3114,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3107 !(test_bit(R5_UPTODATE, &dev->flags) || 3114 !(test_bit(R5_UPTODATE, &dev->flags) ||
3108 test_bit(R5_Wantcompute, &dev->flags)) && 3115 test_bit(R5_Wantcompute, &dev->flags)) &&
3109 test_bit(R5_Insync, &dev->flags)) { 3116 test_bit(R5_Insync, &dev->flags)) {
3110 if ( 3117 if (test_bit(STRIPE_PREREAD_ACTIVE,
3111 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 3118 &sh->state)) {
3112 pr_debug("Read_old block " 3119 pr_debug("Read_old block %d for r-m-w\n",
3113 "%d for r-m-w\n", i); 3120 i);
3114 set_bit(R5_LOCKED, &dev->flags); 3121 set_bit(R5_LOCKED, &dev->flags);
3115 set_bit(R5_Wantread, &dev->flags); 3122 set_bit(R5_Wantread, &dev->flags);
3116 s->locked++; 3123 s->locked++;
@@ -3133,10 +3140,9 @@ static void handle_stripe_dirtying(struct r5conf *conf,
3133 !(test_bit(R5_UPTODATE, &dev->flags) || 3140 !(test_bit(R5_UPTODATE, &dev->flags) ||
3134 test_bit(R5_Wantcompute, &dev->flags))) { 3141 test_bit(R5_Wantcompute, &dev->flags))) {
3135 rcw++; 3142 rcw++;
3136 if (!test_bit(R5_Insync, &dev->flags)) 3143 if (test_bit(R5_Insync, &dev->flags) &&
3137 continue; /* it's a failed drive */ 3144 test_bit(STRIPE_PREREAD_ACTIVE,
3138 if ( 3145 &sh->state)) {
3139 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3140 pr_debug("Read_old block " 3146 pr_debug("Read_old block "
3141 "%d for Reconstruct\n", i); 3147 "%d for Reconstruct\n", i);
3142 set_bit(R5_LOCKED, &dev->flags); 3148 set_bit(R5_LOCKED, &dev->flags);