aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5.c120
1 files changed, 82 insertions, 38 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3c31f7f8aa65..a833de189ca6 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3424,9 +3424,10 @@ static bool handle_stripe6(struct stripe_head *sh)
3424 mdk_rdev_t *blocked_rdev = NULL; 3424 mdk_rdev_t *blocked_rdev = NULL;
3425 3425
3426 pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 3426 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
3427 "pd_idx=%d, qd_idx=%d\n", 3427 "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
3428 (unsigned long long)sh->sector, sh->state, 3428 (unsigned long long)sh->sector, sh->state,
3429 atomic_read(&sh->count), pd_idx, qd_idx); 3429 atomic_read(&sh->count), pd_idx, qd_idx,
3430 sh->check_state, sh->reconstruct_state);
3430 memset(&s, 0, sizeof(s)); 3431 memset(&s, 0, sizeof(s));
3431 3432
3432 spin_lock(&sh->lock); 3433 spin_lock(&sh->lock);
@@ -3446,35 +3447,24 @@ static bool handle_stripe6(struct stripe_head *sh)
3446 3447
3447 pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 3448 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
3448 i, dev->flags, dev->toread, dev->towrite, dev->written); 3449 i, dev->flags, dev->toread, dev->towrite, dev->written);
3449 /* maybe we can reply to a read */ 3450 /* maybe we can reply to a read
3450 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 3451 *
3451 struct bio *rbi, *rbi2; 3452 * new wantfill requests are only permitted while
3452 pr_debug("Return read for disc %d\n", i); 3453 * ops_complete_biofill is guaranteed to be inactive
3453 spin_lock_irq(&conf->device_lock); 3454 */
3454 rbi = dev->toread; 3455 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
3455 dev->toread = NULL; 3456 !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
3456 if (test_and_clear_bit(R5_Overlap, &dev->flags)) 3457 set_bit(R5_Wantfill, &dev->flags);
3457 wake_up(&conf->wait_for_overlap);
3458 spin_unlock_irq(&conf->device_lock);
3459 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
3460 copy_data(0, rbi, dev->page, dev->sector);
3461 rbi2 = r5_next_bio(rbi, dev->sector);
3462 spin_lock_irq(&conf->device_lock);
3463 if (!raid5_dec_bi_phys_segments(rbi)) {
3464 rbi->bi_next = return_bi;
3465 return_bi = rbi;
3466 }
3467 spin_unlock_irq(&conf->device_lock);
3468 rbi = rbi2;
3469 }
3470 }
3471 3458
3472 /* now count some things */ 3459 /* now count some things */
3473 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 3460 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
3474 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 3461 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
3462 if (test_bit(R5_Wantcompute, &dev->flags))
3463 BUG_ON(++s.compute > 2);
3475 3464
3476 3465 if (test_bit(R5_Wantfill, &dev->flags)) {
3477 if (dev->toread) 3466 s.to_fill++;
3467 } else if (dev->toread)
3478 s.to_read++; 3468 s.to_read++;
3479 if (dev->towrite) { 3469 if (dev->towrite) {
3480 s.to_write++; 3470 s.to_write++;
@@ -3515,6 +3505,11 @@ static bool handle_stripe6(struct stripe_head *sh)
3515 blocked_rdev = NULL; 3505 blocked_rdev = NULL;
3516 } 3506 }
3517 3507
3508 if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
3509 set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
3510 set_bit(STRIPE_BIOFILL_RUN, &sh->state);
3511 }
3512
3518 pr_debug("locked=%d uptodate=%d to_read=%d" 3513 pr_debug("locked=%d uptodate=%d to_read=%d"
3519 " to_write=%d failed=%d failed_num=%d,%d\n", 3514 " to_write=%d failed=%d failed_num=%d,%d\n",
3520 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 3515 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3555,9 +3550,43 @@ static bool handle_stripe6(struct stripe_head *sh)
3555 * or to load a block that is being partially written. 3550 * or to load a block that is being partially written.
3556 */ 3551 */
3557 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || 3552 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
3558 (s.syncing && (s.uptodate < disks)) || s.expanding) 3553 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
3559 handle_stripe_fill6(sh, &s, &r6s, disks); 3554 handle_stripe_fill6(sh, &s, &r6s, disks);
3560 3555
3556 /* Now we check to see if any write operations have recently
3557 * completed
3558 */
3559 if (sh->reconstruct_state == reconstruct_state_drain_result) {
3560 int qd_idx = sh->qd_idx;
3561
3562 sh->reconstruct_state = reconstruct_state_idle;
3563 /* All the 'written' buffers and the parity blocks are ready to
3564 * be written back to disk
3565 */
3566 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
3567 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
3568 for (i = disks; i--; ) {
3569 dev = &sh->dev[i];
3570 if (test_bit(R5_LOCKED, &dev->flags) &&
3571 (i == sh->pd_idx || i == qd_idx ||
3572 dev->written)) {
3573 pr_debug("Writing block %d\n", i);
3574 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
3575 set_bit(R5_Wantwrite, &dev->flags);
3576 if (!test_bit(R5_Insync, &dev->flags) ||
3577 ((i == sh->pd_idx || i == qd_idx) &&
3578 s.failed == 0))
3579 set_bit(STRIPE_INSYNC, &sh->state);
3580 }
3581 }
3582 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
3583 atomic_dec(&conf->preread_active_stripes);
3584 if (atomic_read(&conf->preread_active_stripes) <
3585 IO_THRESHOLD)
3586 md_wakeup_thread(conf->mddev->thread);
3587 }
3588 }
3589
3561 /* Now to consider new write requests and what else, if anything 3590 /* Now to consider new write requests and what else, if anything
3562 * should be read. We do not handle new writes when: 3591 * should be read. We do not handle new writes when:
3563 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. 3592 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
@@ -3569,9 +3598,13 @@ static bool handle_stripe6(struct stripe_head *sh)
3569 3598
3570 /* maybe we need to check and possibly fix the parity for this stripe 3599 /* maybe we need to check and possibly fix the parity for this stripe
3571 * Any reads will already have been scheduled, so we just see if enough 3600 * Any reads will already have been scheduled, so we just see if enough
3572 * data is available 3601 * data is available. The parity check is held off while parity
3602 * dependent operations are in flight.
3573 */ 3603 */
3574 if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) 3604 if (sh->check_state ||
3605 (s.syncing && s.locked == 0 &&
3606 !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
3607 !test_bit(STRIPE_INSYNC, &sh->state)))
3575 handle_parity_checks6(conf, sh, &s, &r6s, disks); 3608 handle_parity_checks6(conf, sh, &s, &r6s, disks);
3576 3609
3577 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 3610 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
@@ -3593,15 +3626,29 @@ static bool handle_stripe6(struct stripe_head *sh)
3593 set_bit(R5_Wantwrite, &dev->flags); 3626 set_bit(R5_Wantwrite, &dev->flags);
3594 set_bit(R5_ReWrite, &dev->flags); 3627 set_bit(R5_ReWrite, &dev->flags);
3595 set_bit(R5_LOCKED, &dev->flags); 3628 set_bit(R5_LOCKED, &dev->flags);
3629 s.locked++;
3596 } else { 3630 } else {
3597 /* let's read it back */ 3631 /* let's read it back */
3598 set_bit(R5_Wantread, &dev->flags); 3632 set_bit(R5_Wantread, &dev->flags);
3599 set_bit(R5_LOCKED, &dev->flags); 3633 set_bit(R5_LOCKED, &dev->flags);
3634 s.locked++;
3600 } 3635 }
3601 } 3636 }
3602 } 3637 }
3603 3638
3604 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3639 /* Finish reconstruct operations initiated by the expansion process */
3640 if (sh->reconstruct_state == reconstruct_state_result) {
3641 sh->reconstruct_state = reconstruct_state_idle;
3642 clear_bit(STRIPE_EXPANDING, &sh->state);
3643 for (i = conf->raid_disks; i--; ) {
3644 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3645 set_bit(R5_LOCKED, &sh->dev[i].flags);
3646 s.locked++;
3647 }
3648 }
3649
3650 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
3651 !sh->reconstruct_state) {
3605 struct stripe_head *sh2 3652 struct stripe_head *sh2
3606 = get_active_stripe(conf, sh->sector, 1, 1); 3653 = get_active_stripe(conf, sh->sector, 1, 1);
3607 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { 3654 if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3622,14 +3669,8 @@ static bool handle_stripe6(struct stripe_head *sh)
3622 /* Need to write out all blocks after computing P&Q */ 3669 /* Need to write out all blocks after computing P&Q */
3623 sh->disks = conf->raid_disks; 3670 sh->disks = conf->raid_disks;
3624 stripe_set_idx(sh->sector, conf, 0, sh); 3671 stripe_set_idx(sh->sector, conf, 0, sh);
3625 compute_parity6(sh, RECONSTRUCT_WRITE); 3672 schedule_reconstruction(sh, &s, 1, 1);
3626 for (i = conf->raid_disks ; i-- ; ) { 3673 } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
3627 set_bit(R5_LOCKED, &sh->dev[i].flags);
3628 s.locked++;
3629 set_bit(R5_Wantwrite, &sh->dev[i].flags);
3630 }
3631 clear_bit(STRIPE_EXPANDING, &sh->state);
3632 } else if (s.expanded) {
3633 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3674 clear_bit(STRIPE_EXPAND_READY, &sh->state);
3634 atomic_dec(&conf->reshape_stripes); 3675 atomic_dec(&conf->reshape_stripes);
3635 wake_up(&conf->wait_for_overlap); 3676 wake_up(&conf->wait_for_overlap);
@@ -3647,6 +3688,9 @@ static bool handle_stripe6(struct stripe_head *sh)
3647 if (unlikely(blocked_rdev)) 3688 if (unlikely(blocked_rdev))
3648 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); 3689 md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
3649 3690
3691 if (s.ops_request)
3692 raid_run_ops(sh, s.ops_request);
3693
3650 ops_run_io(sh, &s); 3694 ops_run_io(sh, &s);
3651 3695
3652 return_io(return_bi); 3696 return_io(return_bi);