aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c148
1 files changed, 87 insertions, 61 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1ba97fdc6df1..553d54b87052 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
749static bool stripe_can_batch(struct stripe_head *sh) 749static bool stripe_can_batch(struct stripe_head *sh)
750{ 750{
751 return test_bit(STRIPE_BATCH_READY, &sh->state) && 751 return test_bit(STRIPE_BATCH_READY, &sh->state) &&
752 !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
752 is_full_stripe_write(sh); 753 is_full_stripe_write(sh);
753} 754}
754 755
@@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
837 < IO_THRESHOLD) 838 < IO_THRESHOLD)
838 md_wakeup_thread(conf->mddev->thread); 839 md_wakeup_thread(conf->mddev->thread);
839 840
841 if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
842 int seq = sh->bm_seq;
843 if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
844 sh->batch_head->bm_seq > seq)
845 seq = sh->batch_head->bm_seq;
846 set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
847 sh->batch_head->bm_seq = seq;
848 }
849
840 atomic_inc(&sh->count); 850 atomic_inc(&sh->count);
841unlock_out: 851unlock_out:
842 unlock_two_stripes(head, sh); 852 unlock_two_stripes(head, sh);
@@ -1822,7 +1832,7 @@ again:
1822 } else 1832 } else
1823 init_async_submit(&submit, 0, tx, NULL, NULL, 1833 init_async_submit(&submit, 0, tx, NULL, NULL,
1824 to_addr_conv(sh, percpu, j)); 1834 to_addr_conv(sh, percpu, j));
1825 async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); 1835 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
1826 if (!last_stripe) { 1836 if (!last_stripe) {
1827 j++; 1837 j++;
1828 sh = list_first_entry(&sh->batch_list, struct stripe_head, 1838 sh = list_first_entry(&sh->batch_list, struct stripe_head,
@@ -2987,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
2987 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 2997 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
2988 (unsigned long long)(*bip)->bi_iter.bi_sector, 2998 (unsigned long long)(*bip)->bi_iter.bi_sector,
2989 (unsigned long long)sh->sector, dd_idx); 2999 (unsigned long long)sh->sector, dd_idx);
2990 spin_unlock_irq(&sh->stripe_lock);
2991 3000
2992 if (conf->mddev->bitmap && firstwrite) { 3001 if (conf->mddev->bitmap && firstwrite) {
3002 /* Cannot hold spinlock over bitmap_startwrite,
3003 * but must ensure this isn't added to a batch until
3004 * we have added to the bitmap and set bm_seq.
3005 * So set STRIPE_BITMAP_PENDING to prevent
3006 * batching.
3007 * If multiple add_stripe_bio() calls race here they
3008 * much all set STRIPE_BITMAP_PENDING. So only the first one
3009 * to complete "bitmap_startwrite" gets to set
3010 * STRIPE_BIT_DELAY. This is important as once a stripe
3011 * is added to a batch, STRIPE_BIT_DELAY cannot be changed
3012 * any more.
3013 */
3014 set_bit(STRIPE_BITMAP_PENDING, &sh->state);
3015 spin_unlock_irq(&sh->stripe_lock);
2993 bitmap_startwrite(conf->mddev->bitmap, sh->sector, 3016 bitmap_startwrite(conf->mddev->bitmap, sh->sector,
2994 STRIPE_SECTORS, 0); 3017 STRIPE_SECTORS, 0);
2995 sh->bm_seq = conf->seq_flush+1; 3018 spin_lock_irq(&sh->stripe_lock);
2996 set_bit(STRIPE_BIT_DELAY, &sh->state); 3019 clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
3020 if (!sh->batch_head) {
3021 sh->bm_seq = conf->seq_flush+1;
3022 set_bit(STRIPE_BIT_DELAY, &sh->state);
3023 }
2997 } 3024 }
3025 spin_unlock_irq(&sh->stripe_lock);
2998 3026
2999 if (stripe_can_batch(sh)) 3027 if (stripe_can_batch(sh))
3000 stripe_add_to_batch_list(conf, sh); 3028 stripe_add_to_batch_list(conf, sh);
@@ -3392,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh,
3392 set_bit(STRIPE_HANDLE, &sh->state); 3420 set_bit(STRIPE_HANDLE, &sh->state);
3393} 3421}
3394 3422
3423static void break_stripe_batch_list(struct stripe_head *head_sh,
3424 unsigned long handle_flags);
3395/* handle_stripe_clean_event 3425/* handle_stripe_clean_event
3396 * any written block on an uptodate or failed drive can be returned. 3426 * any written block on an uptodate or failed drive can be returned.
3397 * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 3427 * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
@@ -3405,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf,
3405 int discard_pending = 0; 3435 int discard_pending = 0;
3406 struct stripe_head *head_sh = sh; 3436 struct stripe_head *head_sh = sh;
3407 bool do_endio = false; 3437 bool do_endio = false;
3408 int wakeup_nr = 0;
3409 3438
3410 for (i = disks; i--; ) 3439 for (i = disks; i--; )
3411 if (sh->dev[i].written) { 3440 if (sh->dev[i].written) {
@@ -3494,44 +3523,8 @@ unhash:
3494 if (atomic_dec_and_test(&conf->pending_full_writes)) 3523 if (atomic_dec_and_test(&conf->pending_full_writes))
3495 md_wakeup_thread(conf->mddev->thread); 3524 md_wakeup_thread(conf->mddev->thread);
3496 3525
3497 if (!head_sh->batch_head || !do_endio) 3526 if (head_sh->batch_head && do_endio)
3498 return; 3527 break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
3499 for (i = 0; i < head_sh->disks; i++) {
3500 if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
3501 wakeup_nr++;
3502 }
3503 while (!list_empty(&head_sh->batch_list)) {
3504 int i;
3505 sh = list_first_entry(&head_sh->batch_list,
3506 struct stripe_head, batch_list);
3507 list_del_init(&sh->batch_list);
3508
3509 set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
3510 head_sh->state & ~((1 << STRIPE_ACTIVE) |
3511 (1 << STRIPE_PREREAD_ACTIVE) |
3512 STRIPE_EXPAND_SYNC_FLAG));
3513 sh->check_state = head_sh->check_state;
3514 sh->reconstruct_state = head_sh->reconstruct_state;
3515 for (i = 0; i < sh->disks; i++) {
3516 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
3517 wakeup_nr++;
3518 sh->dev[i].flags = head_sh->dev[i].flags;
3519 }
3520
3521 spin_lock_irq(&sh->stripe_lock);
3522 sh->batch_head = NULL;
3523 spin_unlock_irq(&sh->stripe_lock);
3524 if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
3525 set_bit(STRIPE_HANDLE, &sh->state);
3526 release_stripe(sh);
3527 }
3528
3529 spin_lock_irq(&head_sh->stripe_lock);
3530 head_sh->batch_head = NULL;
3531 spin_unlock_irq(&head_sh->stripe_lock);
3532 wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
3533 if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
3534 set_bit(STRIPE_HANDLE, &head_sh->state);
3535} 3528}
3536 3529
3537static void handle_stripe_dirtying(struct r5conf *conf, 3530static void handle_stripe_dirtying(struct r5conf *conf,
@@ -4172,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
4172 4165
4173static int clear_batch_ready(struct stripe_head *sh) 4166static int clear_batch_ready(struct stripe_head *sh)
4174{ 4167{
4168 /* Return '1' if this is a member of batch, or
4169 * '0' if it is a lone stripe or a head which can now be
4170 * handled.
4171 */
4175 struct stripe_head *tmp; 4172 struct stripe_head *tmp;
4176 if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) 4173 if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
4177 return 0; 4174 return (sh->batch_head && sh->batch_head != sh);
4178 spin_lock(&sh->stripe_lock); 4175 spin_lock(&sh->stripe_lock);
4179 if (!sh->batch_head) { 4176 if (!sh->batch_head) {
4180 spin_unlock(&sh->stripe_lock); 4177 spin_unlock(&sh->stripe_lock);
@@ -4202,38 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh)
4202 return 0; 4199 return 0;
4203} 4200}
4204 4201
4205static void check_break_stripe_batch_list(struct stripe_head *sh) 4202static void break_stripe_batch_list(struct stripe_head *head_sh,
4203 unsigned long handle_flags)
4206{ 4204{
4207 struct stripe_head *head_sh, *next; 4205 struct stripe_head *sh, *next;
4208 int i; 4206 int i;
4209 4207 int do_wakeup = 0;
4210 if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
4211 return;
4212
4213 head_sh = sh;
4214 4208
4215 list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { 4209 list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
4216 4210
4217 list_del_init(&sh->batch_list); 4211 list_del_init(&sh->batch_list);
4218 4212
4219 set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, 4213 WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
4220 head_sh->state & ~((1 << STRIPE_ACTIVE) | 4214 (1 << STRIPE_SYNCING) |
4221 (1 << STRIPE_PREREAD_ACTIVE) | 4215 (1 << STRIPE_REPLACED) |
4222 (1 << STRIPE_DEGRADED) | 4216 (1 << STRIPE_PREREAD_ACTIVE) |
4223 STRIPE_EXPAND_SYNC_FLAG)); 4217 (1 << STRIPE_DELAYED) |
4218 (1 << STRIPE_BIT_DELAY) |
4219 (1 << STRIPE_FULL_WRITE) |
4220 (1 << STRIPE_BIOFILL_RUN) |
4221 (1 << STRIPE_COMPUTE_RUN) |
4222 (1 << STRIPE_OPS_REQ_PENDING) |
4223 (1 << STRIPE_DISCARD) |
4224 (1 << STRIPE_BATCH_READY) |
4225 (1 << STRIPE_BATCH_ERR) |
4226 (1 << STRIPE_BITMAP_PENDING)));
4227 WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
4228 (1 << STRIPE_REPLACED)));
4229
4230 set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
4231 (1 << STRIPE_DEGRADED)),
4232 head_sh->state & (1 << STRIPE_INSYNC));
4233
4224 sh->check_state = head_sh->check_state; 4234 sh->check_state = head_sh->check_state;
4225 sh->reconstruct_state = head_sh->reconstruct_state; 4235 sh->reconstruct_state = head_sh->reconstruct_state;
4226 for (i = 0; i < sh->disks; i++) 4236 for (i = 0; i < sh->disks; i++) {
4237 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
4238 do_wakeup = 1;
4227 sh->dev[i].flags = head_sh->dev[i].flags & 4239 sh->dev[i].flags = head_sh->dev[i].flags &
4228 (~((1 << R5_WriteError) | (1 << R5_Overlap))); 4240 (~((1 << R5_WriteError) | (1 << R5_Overlap)));
4229 4241 }
4230 spin_lock_irq(&sh->stripe_lock); 4242 spin_lock_irq(&sh->stripe_lock);
4231 sh->batch_head = NULL; 4243 sh->batch_head = NULL;
4232 spin_unlock_irq(&sh->stripe_lock); 4244 spin_unlock_irq(&sh->stripe_lock);
4233 4245 if (handle_flags == 0 ||
4234 set_bit(STRIPE_HANDLE, &sh->state); 4246 sh->state & handle_flags)
4247 set_bit(STRIPE_HANDLE, &sh->state);
4235 release_stripe(sh); 4248 release_stripe(sh);
4236 } 4249 }
4250 spin_lock_irq(&head_sh->stripe_lock);
4251 head_sh->batch_head = NULL;
4252 spin_unlock_irq(&head_sh->stripe_lock);
4253 for (i = 0; i < head_sh->disks; i++)
4254 if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
4255 do_wakeup = 1;
4256 if (head_sh->state & handle_flags)
4257 set_bit(STRIPE_HANDLE, &head_sh->state);
4258
4259 if (do_wakeup)
4260 wake_up(&head_sh->raid_conf->wait_for_overlap);
4237} 4261}
4238 4262
4239static void handle_stripe(struct stripe_head *sh) 4263static void handle_stripe(struct stripe_head *sh)
@@ -4258,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh)
4258 return; 4282 return;
4259 } 4283 }
4260 4284
4261 check_break_stripe_batch_list(sh); 4285 if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
4286 break_stripe_batch_list(sh, 0);
4262 4287
4263 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { 4288 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
4264 spin_lock(&sh->stripe_lock); 4289 spin_lock(&sh->stripe_lock);
@@ -4312,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh)
4312 if (s.failed > conf->max_degraded) { 4337 if (s.failed > conf->max_degraded) {
4313 sh->check_state = 0; 4338 sh->check_state = 0;
4314 sh->reconstruct_state = 0; 4339 sh->reconstruct_state = 0;
4340 break_stripe_batch_list(sh, 0);
4315 if (s.to_read+s.to_write+s.written) 4341 if (s.to_read+s.to_write+s.written)
4316 handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); 4342 handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
4317 if (s.syncing + s.replacing) 4343 if (s.syncing + s.replacing)