diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 148 |
1 files changed, 87 insertions, 61 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1ba97fdc6df1..553d54b87052 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) | |||
749 | static bool stripe_can_batch(struct stripe_head *sh) | 749 | static bool stripe_can_batch(struct stripe_head *sh) |
750 | { | 750 | { |
751 | return test_bit(STRIPE_BATCH_READY, &sh->state) && | 751 | return test_bit(STRIPE_BATCH_READY, &sh->state) && |
752 | !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && | ||
752 | is_full_stripe_write(sh); | 753 | is_full_stripe_write(sh); |
753 | } | 754 | } |
754 | 755 | ||
@@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh | |||
837 | < IO_THRESHOLD) | 838 | < IO_THRESHOLD) |
838 | md_wakeup_thread(conf->mddev->thread); | 839 | md_wakeup_thread(conf->mddev->thread); |
839 | 840 | ||
841 | if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) { | ||
842 | int seq = sh->bm_seq; | ||
843 | if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) && | ||
844 | sh->batch_head->bm_seq > seq) | ||
845 | seq = sh->batch_head->bm_seq; | ||
846 | set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state); | ||
847 | sh->batch_head->bm_seq = seq; | ||
848 | } | ||
849 | |||
840 | atomic_inc(&sh->count); | 850 | atomic_inc(&sh->count); |
841 | unlock_out: | 851 | unlock_out: |
842 | unlock_two_stripes(head, sh); | 852 | unlock_two_stripes(head, sh); |
@@ -1822,7 +1832,7 @@ again: | |||
1822 | } else | 1832 | } else |
1823 | init_async_submit(&submit, 0, tx, NULL, NULL, | 1833 | init_async_submit(&submit, 0, tx, NULL, NULL, |
1824 | to_addr_conv(sh, percpu, j)); | 1834 | to_addr_conv(sh, percpu, j)); |
1825 | async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); | 1835 | tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit); |
1826 | if (!last_stripe) { | 1836 | if (!last_stripe) { |
1827 | j++; | 1837 | j++; |
1828 | sh = list_first_entry(&sh->batch_list, struct stripe_head, | 1838 | sh = list_first_entry(&sh->batch_list, struct stripe_head, |
@@ -2987,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, | |||
2987 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", | 2997 | pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", |
2988 | (unsigned long long)(*bip)->bi_iter.bi_sector, | 2998 | (unsigned long long)(*bip)->bi_iter.bi_sector, |
2989 | (unsigned long long)sh->sector, dd_idx); | 2999 | (unsigned long long)sh->sector, dd_idx); |
2990 | spin_unlock_irq(&sh->stripe_lock); | ||
2991 | 3000 | ||
2992 | if (conf->mddev->bitmap && firstwrite) { | 3001 | if (conf->mddev->bitmap && firstwrite) { |
3002 | /* Cannot hold spinlock over bitmap_startwrite, | ||
3003 | * but must ensure this isn't added to a batch until | ||
3004 | * we have added to the bitmap and set bm_seq. | ||
3005 | * So set STRIPE_BITMAP_PENDING to prevent | ||
3006 | * batching. | ||
3007 | * If multiple add_stripe_bio() calls race here they | ||
3008 | * much all set STRIPE_BITMAP_PENDING. So only the first one | ||
3009 | * to complete "bitmap_startwrite" gets to set | ||
3010 | * STRIPE_BIT_DELAY. This is important as once a stripe | ||
3011 | * is added to a batch, STRIPE_BIT_DELAY cannot be changed | ||
3012 | * any more. | ||
3013 | */ | ||
3014 | set_bit(STRIPE_BITMAP_PENDING, &sh->state); | ||
3015 | spin_unlock_irq(&sh->stripe_lock); | ||
2993 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | 3016 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, |
2994 | STRIPE_SECTORS, 0); | 3017 | STRIPE_SECTORS, 0); |
2995 | sh->bm_seq = conf->seq_flush+1; | 3018 | spin_lock_irq(&sh->stripe_lock); |
2996 | set_bit(STRIPE_BIT_DELAY, &sh->state); | 3019 | clear_bit(STRIPE_BITMAP_PENDING, &sh->state); |
3020 | if (!sh->batch_head) { | ||
3021 | sh->bm_seq = conf->seq_flush+1; | ||
3022 | set_bit(STRIPE_BIT_DELAY, &sh->state); | ||
3023 | } | ||
2997 | } | 3024 | } |
3025 | spin_unlock_irq(&sh->stripe_lock); | ||
2998 | 3026 | ||
2999 | if (stripe_can_batch(sh)) | 3027 | if (stripe_can_batch(sh)) |
3000 | stripe_add_to_batch_list(conf, sh); | 3028 | stripe_add_to_batch_list(conf, sh); |
@@ -3392,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh, | |||
3392 | set_bit(STRIPE_HANDLE, &sh->state); | 3420 | set_bit(STRIPE_HANDLE, &sh->state); |
3393 | } | 3421 | } |
3394 | 3422 | ||
3423 | static void break_stripe_batch_list(struct stripe_head *head_sh, | ||
3424 | unsigned long handle_flags); | ||
3395 | /* handle_stripe_clean_event | 3425 | /* handle_stripe_clean_event |
3396 | * any written block on an uptodate or failed drive can be returned. | 3426 | * any written block on an uptodate or failed drive can be returned. |
3397 | * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but | 3427 | * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but |
@@ -3405,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf, | |||
3405 | int discard_pending = 0; | 3435 | int discard_pending = 0; |
3406 | struct stripe_head *head_sh = sh; | 3436 | struct stripe_head *head_sh = sh; |
3407 | bool do_endio = false; | 3437 | bool do_endio = false; |
3408 | int wakeup_nr = 0; | ||
3409 | 3438 | ||
3410 | for (i = disks; i--; ) | 3439 | for (i = disks; i--; ) |
3411 | if (sh->dev[i].written) { | 3440 | if (sh->dev[i].written) { |
@@ -3494,44 +3523,8 @@ unhash: | |||
3494 | if (atomic_dec_and_test(&conf->pending_full_writes)) | 3523 | if (atomic_dec_and_test(&conf->pending_full_writes)) |
3495 | md_wakeup_thread(conf->mddev->thread); | 3524 | md_wakeup_thread(conf->mddev->thread); |
3496 | 3525 | ||
3497 | if (!head_sh->batch_head || !do_endio) | 3526 | if (head_sh->batch_head && do_endio) |
3498 | return; | 3527 | break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS); |
3499 | for (i = 0; i < head_sh->disks; i++) { | ||
3500 | if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) | ||
3501 | wakeup_nr++; | ||
3502 | } | ||
3503 | while (!list_empty(&head_sh->batch_list)) { | ||
3504 | int i; | ||
3505 | sh = list_first_entry(&head_sh->batch_list, | ||
3506 | struct stripe_head, batch_list); | ||
3507 | list_del_init(&sh->batch_list); | ||
3508 | |||
3509 | set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, | ||
3510 | head_sh->state & ~((1 << STRIPE_ACTIVE) | | ||
3511 | (1 << STRIPE_PREREAD_ACTIVE) | | ||
3512 | STRIPE_EXPAND_SYNC_FLAG)); | ||
3513 | sh->check_state = head_sh->check_state; | ||
3514 | sh->reconstruct_state = head_sh->reconstruct_state; | ||
3515 | for (i = 0; i < sh->disks; i++) { | ||
3516 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | ||
3517 | wakeup_nr++; | ||
3518 | sh->dev[i].flags = head_sh->dev[i].flags; | ||
3519 | } | ||
3520 | |||
3521 | spin_lock_irq(&sh->stripe_lock); | ||
3522 | sh->batch_head = NULL; | ||
3523 | spin_unlock_irq(&sh->stripe_lock); | ||
3524 | if (sh->state & STRIPE_EXPAND_SYNC_FLAG) | ||
3525 | set_bit(STRIPE_HANDLE, &sh->state); | ||
3526 | release_stripe(sh); | ||
3527 | } | ||
3528 | |||
3529 | spin_lock_irq(&head_sh->stripe_lock); | ||
3530 | head_sh->batch_head = NULL; | ||
3531 | spin_unlock_irq(&head_sh->stripe_lock); | ||
3532 | wake_up_nr(&conf->wait_for_overlap, wakeup_nr); | ||
3533 | if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG) | ||
3534 | set_bit(STRIPE_HANDLE, &head_sh->state); | ||
3535 | } | 3528 | } |
3536 | 3529 | ||
3537 | static void handle_stripe_dirtying(struct r5conf *conf, | 3530 | static void handle_stripe_dirtying(struct r5conf *conf, |
@@ -4172,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
4172 | 4165 | ||
4173 | static int clear_batch_ready(struct stripe_head *sh) | 4166 | static int clear_batch_ready(struct stripe_head *sh) |
4174 | { | 4167 | { |
4168 | /* Return '1' if this is a member of batch, or | ||
4169 | * '0' if it is a lone stripe or a head which can now be | ||
4170 | * handled. | ||
4171 | */ | ||
4175 | struct stripe_head *tmp; | 4172 | struct stripe_head *tmp; |
4176 | if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) | 4173 | if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state)) |
4177 | return 0; | 4174 | return (sh->batch_head && sh->batch_head != sh); |
4178 | spin_lock(&sh->stripe_lock); | 4175 | spin_lock(&sh->stripe_lock); |
4179 | if (!sh->batch_head) { | 4176 | if (!sh->batch_head) { |
4180 | spin_unlock(&sh->stripe_lock); | 4177 | spin_unlock(&sh->stripe_lock); |
@@ -4202,38 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh) | |||
4202 | return 0; | 4199 | return 0; |
4203 | } | 4200 | } |
4204 | 4201 | ||
4205 | static void check_break_stripe_batch_list(struct stripe_head *sh) | 4202 | static void break_stripe_batch_list(struct stripe_head *head_sh, |
4203 | unsigned long handle_flags) | ||
4206 | { | 4204 | { |
4207 | struct stripe_head *head_sh, *next; | 4205 | struct stripe_head *sh, *next; |
4208 | int i; | 4206 | int i; |
4209 | 4207 | int do_wakeup = 0; | |
4210 | if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) | ||
4211 | return; | ||
4212 | |||
4213 | head_sh = sh; | ||
4214 | 4208 | ||
4215 | list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { | 4209 | list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) { |
4216 | 4210 | ||
4217 | list_del_init(&sh->batch_list); | 4211 | list_del_init(&sh->batch_list); |
4218 | 4212 | ||
4219 | set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG, | 4213 | WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | |
4220 | head_sh->state & ~((1 << STRIPE_ACTIVE) | | 4214 | (1 << STRIPE_SYNCING) | |
4221 | (1 << STRIPE_PREREAD_ACTIVE) | | 4215 | (1 << STRIPE_REPLACED) | |
4222 | (1 << STRIPE_DEGRADED) | | 4216 | (1 << STRIPE_PREREAD_ACTIVE) | |
4223 | STRIPE_EXPAND_SYNC_FLAG)); | 4217 | (1 << STRIPE_DELAYED) | |
4218 | (1 << STRIPE_BIT_DELAY) | | ||
4219 | (1 << STRIPE_FULL_WRITE) | | ||
4220 | (1 << STRIPE_BIOFILL_RUN) | | ||
4221 | (1 << STRIPE_COMPUTE_RUN) | | ||
4222 | (1 << STRIPE_OPS_REQ_PENDING) | | ||
4223 | (1 << STRIPE_DISCARD) | | ||
4224 | (1 << STRIPE_BATCH_READY) | | ||
4225 | (1 << STRIPE_BATCH_ERR) | | ||
4226 | (1 << STRIPE_BITMAP_PENDING))); | ||
4227 | WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | | ||
4228 | (1 << STRIPE_REPLACED))); | ||
4229 | |||
4230 | set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | | ||
4231 | (1 << STRIPE_DEGRADED)), | ||
4232 | head_sh->state & (1 << STRIPE_INSYNC)); | ||
4233 | |||
4224 | sh->check_state = head_sh->check_state; | 4234 | sh->check_state = head_sh->check_state; |
4225 | sh->reconstruct_state = head_sh->reconstruct_state; | 4235 | sh->reconstruct_state = head_sh->reconstruct_state; |
4226 | for (i = 0; i < sh->disks; i++) | 4236 | for (i = 0; i < sh->disks; i++) { |
4237 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | ||
4238 | do_wakeup = 1; | ||
4227 | sh->dev[i].flags = head_sh->dev[i].flags & | 4239 | sh->dev[i].flags = head_sh->dev[i].flags & |
4228 | (~((1 << R5_WriteError) | (1 << R5_Overlap))); | 4240 | (~((1 << R5_WriteError) | (1 << R5_Overlap))); |
4229 | 4241 | } | |
4230 | spin_lock_irq(&sh->stripe_lock); | 4242 | spin_lock_irq(&sh->stripe_lock); |
4231 | sh->batch_head = NULL; | 4243 | sh->batch_head = NULL; |
4232 | spin_unlock_irq(&sh->stripe_lock); | 4244 | spin_unlock_irq(&sh->stripe_lock); |
4233 | 4245 | if (handle_flags == 0 || | |
4234 | set_bit(STRIPE_HANDLE, &sh->state); | 4246 | sh->state & handle_flags) |
4247 | set_bit(STRIPE_HANDLE, &sh->state); | ||
4235 | release_stripe(sh); | 4248 | release_stripe(sh); |
4236 | } | 4249 | } |
4250 | spin_lock_irq(&head_sh->stripe_lock); | ||
4251 | head_sh->batch_head = NULL; | ||
4252 | spin_unlock_irq(&head_sh->stripe_lock); | ||
4253 | for (i = 0; i < head_sh->disks; i++) | ||
4254 | if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags)) | ||
4255 | do_wakeup = 1; | ||
4256 | if (head_sh->state & handle_flags) | ||
4257 | set_bit(STRIPE_HANDLE, &head_sh->state); | ||
4258 | |||
4259 | if (do_wakeup) | ||
4260 | wake_up(&head_sh->raid_conf->wait_for_overlap); | ||
4237 | } | 4261 | } |
4238 | 4262 | ||
4239 | static void handle_stripe(struct stripe_head *sh) | 4263 | static void handle_stripe(struct stripe_head *sh) |
@@ -4258,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh) | |||
4258 | return; | 4282 | return; |
4259 | } | 4283 | } |
4260 | 4284 | ||
4261 | check_break_stripe_batch_list(sh); | 4285 | if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state)) |
4286 | break_stripe_batch_list(sh, 0); | ||
4262 | 4287 | ||
4263 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { | 4288 | if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { |
4264 | spin_lock(&sh->stripe_lock); | 4289 | spin_lock(&sh->stripe_lock); |
@@ -4312,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
4312 | if (s.failed > conf->max_degraded) { | 4337 | if (s.failed > conf->max_degraded) { |
4313 | sh->check_state = 0; | 4338 | sh->check_state = 0; |
4314 | sh->reconstruct_state = 0; | 4339 | sh->reconstruct_state = 0; |
4340 | break_stripe_batch_list(sh, 0); | ||
4315 | if (s.to_read+s.to_write+s.written) | 4341 | if (s.to_read+s.to_write+s.written) |
4316 | handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); | 4342 | handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); |
4317 | if (s.syncing + s.replacing) | 4343 | if (s.syncing + s.replacing) |