diff options
author | Yuri Tikhonov <yur@emcraft.com> | 2009-08-29 22:13:13 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-08-29 22:13:13 -0400 |
commit | 6c0069c0ae9659e3a91b68eaed06a5c6c37f45c8 (patch) | |
tree | 97b01fc8602e7b6d4c495d0f0562ae901d78f58b /drivers/md | |
parent | d82dfee0ad8f240fef1b28e2258891c07da57367 (diff) |
md/raid6: asynchronous handle_stripe6
1/ Use STRIPE_OP_BIOFILL to offload completion of read requests to
raid_run_ops
2/ Implement a handler for sh->reconstruct_state similar to the raid5 case
(adds handling of Q parity)
3/ Prevent handle_parity_checks6 from running concurrently with 'compute'
operations
4/ Hook up raid_run_ops
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 120 |
1 files changed, 82 insertions, 38 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3c31f7f8aa65..a833de189ca6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3424,9 +3424,10 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3424 | mdk_rdev_t *blocked_rdev = NULL; | 3424 | mdk_rdev_t *blocked_rdev = NULL; |
3425 | 3425 | ||
3426 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " | 3426 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, " |
3427 | "pd_idx=%d, qd_idx=%d\n", | 3427 | "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", |
3428 | (unsigned long long)sh->sector, sh->state, | 3428 | (unsigned long long)sh->sector, sh->state, |
3429 | atomic_read(&sh->count), pd_idx, qd_idx); | 3429 | atomic_read(&sh->count), pd_idx, qd_idx, |
3430 | sh->check_state, sh->reconstruct_state); | ||
3430 | memset(&s, 0, sizeof(s)); | 3431 | memset(&s, 0, sizeof(s)); |
3431 | 3432 | ||
3432 | spin_lock(&sh->lock); | 3433 | spin_lock(&sh->lock); |
@@ -3446,35 +3447,24 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3446 | 3447 | ||
3447 | pr_debug("check %d: state 0x%lx read %p write %p written %p\n", | 3448 | pr_debug("check %d: state 0x%lx read %p write %p written %p\n", |
3448 | i, dev->flags, dev->toread, dev->towrite, dev->written); | 3449 | i, dev->flags, dev->toread, dev->towrite, dev->written); |
3449 | /* maybe we can reply to a read */ | 3450 | /* maybe we can reply to a read |
3450 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { | 3451 | * |
3451 | struct bio *rbi, *rbi2; | 3452 | * new wantfill requests are only permitted while |
3452 | pr_debug("Return read for disc %d\n", i); | 3453 | * ops_complete_biofill is guaranteed to be inactive |
3453 | spin_lock_irq(&conf->device_lock); | 3454 | */ |
3454 | rbi = dev->toread; | 3455 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && |
3455 | dev->toread = NULL; | 3456 | !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) |
3456 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | 3457 | set_bit(R5_Wantfill, &dev->flags); |
3457 | wake_up(&conf->wait_for_overlap); | ||
3458 | spin_unlock_irq(&conf->device_lock); | ||
3459 | while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { | ||
3460 | copy_data(0, rbi, dev->page, dev->sector); | ||
3461 | rbi2 = r5_next_bio(rbi, dev->sector); | ||
3462 | spin_lock_irq(&conf->device_lock); | ||
3463 | if (!raid5_dec_bi_phys_segments(rbi)) { | ||
3464 | rbi->bi_next = return_bi; | ||
3465 | return_bi = rbi; | ||
3466 | } | ||
3467 | spin_unlock_irq(&conf->device_lock); | ||
3468 | rbi = rbi2; | ||
3469 | } | ||
3470 | } | ||
3471 | 3458 | ||
3472 | /* now count some things */ | 3459 | /* now count some things */ |
3473 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; | 3460 | if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; |
3474 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; | 3461 | if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; |
3462 | if (test_bit(R5_Wantcompute, &dev->flags)) | ||
3463 | BUG_ON(++s.compute > 2); | ||
3475 | 3464 | ||
3476 | 3465 | if (test_bit(R5_Wantfill, &dev->flags)) { | |
3477 | if (dev->toread) | 3466 | s.to_fill++; |
3467 | } else if (dev->toread) | ||
3478 | s.to_read++; | 3468 | s.to_read++; |
3479 | if (dev->towrite) { | 3469 | if (dev->towrite) { |
3480 | s.to_write++; | 3470 | s.to_write++; |
@@ -3515,6 +3505,11 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3515 | blocked_rdev = NULL; | 3505 | blocked_rdev = NULL; |
3516 | } | 3506 | } |
3517 | 3507 | ||
3508 | if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { | ||
3509 | set_bit(STRIPE_OP_BIOFILL, &s.ops_request); | ||
3510 | set_bit(STRIPE_BIOFILL_RUN, &sh->state); | ||
3511 | } | ||
3512 | |||
3518 | pr_debug("locked=%d uptodate=%d to_read=%d" | 3513 | pr_debug("locked=%d uptodate=%d to_read=%d" |
3519 | " to_write=%d failed=%d failed_num=%d,%d\n", | 3514 | " to_write=%d failed=%d failed_num=%d,%d\n", |
3520 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, | 3515 | s.locked, s.uptodate, s.to_read, s.to_write, s.failed, |
@@ -3555,9 +3550,43 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3555 | * or to load a block that is being partially written. | 3550 | * or to load a block that is being partially written. |
3556 | */ | 3551 | */ |
3557 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || | 3552 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || |
3558 | (s.syncing && (s.uptodate < disks)) || s.expanding) | 3553 | (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) |
3559 | handle_stripe_fill6(sh, &s, &r6s, disks); | 3554 | handle_stripe_fill6(sh, &s, &r6s, disks); |
3560 | 3555 | ||
3556 | /* Now we check to see if any write operations have recently | ||
3557 | * completed | ||
3558 | */ | ||
3559 | if (sh->reconstruct_state == reconstruct_state_drain_result) { | ||
3560 | int qd_idx = sh->qd_idx; | ||
3561 | |||
3562 | sh->reconstruct_state = reconstruct_state_idle; | ||
3563 | /* All the 'written' buffers and the parity blocks are ready to | ||
3564 | * be written back to disk | ||
3565 | */ | ||
3566 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); | ||
3567 | BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); | ||
3568 | for (i = disks; i--; ) { | ||
3569 | dev = &sh->dev[i]; | ||
3570 | if (test_bit(R5_LOCKED, &dev->flags) && | ||
3571 | (i == sh->pd_idx || i == qd_idx || | ||
3572 | dev->written)) { | ||
3573 | pr_debug("Writing block %d\n", i); | ||
3574 | BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); | ||
3575 | set_bit(R5_Wantwrite, &dev->flags); | ||
3576 | if (!test_bit(R5_Insync, &dev->flags) || | ||
3577 | ((i == sh->pd_idx || i == qd_idx) && | ||
3578 | s.failed == 0)) | ||
3579 | set_bit(STRIPE_INSYNC, &sh->state); | ||
3580 | } | ||
3581 | } | ||
3582 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | ||
3583 | atomic_dec(&conf->preread_active_stripes); | ||
3584 | if (atomic_read(&conf->preread_active_stripes) < | ||
3585 | IO_THRESHOLD) | ||
3586 | md_wakeup_thread(conf->mddev->thread); | ||
3587 | } | ||
3588 | } | ||
3589 | |||
3561 | /* Now to consider new write requests and what else, if anything | 3590 | /* Now to consider new write requests and what else, if anything |
3562 | * should be read. We do not handle new writes when: | 3591 | * should be read. We do not handle new writes when: |
3563 | * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. | 3592 | * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. |
@@ -3569,9 +3598,13 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3569 | 3598 | ||
3570 | /* maybe we need to check and possibly fix the parity for this stripe | 3599 | /* maybe we need to check and possibly fix the parity for this stripe |
3571 | * Any reads will already have been scheduled, so we just see if enough | 3600 | * Any reads will already have been scheduled, so we just see if enough |
3572 | * data is available | 3601 | * data is available. The parity check is held off while parity |
3602 | * dependent operations are in flight. | ||
3573 | */ | 3603 | */ |
3574 | if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) | 3604 | if (sh->check_state || |
3605 | (s.syncing && s.locked == 0 && | ||
3606 | !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && | ||
3607 | !test_bit(STRIPE_INSYNC, &sh->state))) | ||
3575 | handle_parity_checks6(conf, sh, &s, &r6s, disks); | 3608 | handle_parity_checks6(conf, sh, &s, &r6s, disks); |
3576 | 3609 | ||
3577 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { | 3610 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { |
@@ -3593,15 +3626,29 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3593 | set_bit(R5_Wantwrite, &dev->flags); | 3626 | set_bit(R5_Wantwrite, &dev->flags); |
3594 | set_bit(R5_ReWrite, &dev->flags); | 3627 | set_bit(R5_ReWrite, &dev->flags); |
3595 | set_bit(R5_LOCKED, &dev->flags); | 3628 | set_bit(R5_LOCKED, &dev->flags); |
3629 | s.locked++; | ||
3596 | } else { | 3630 | } else { |
3597 | /* let's read it back */ | 3631 | /* let's read it back */ |
3598 | set_bit(R5_Wantread, &dev->flags); | 3632 | set_bit(R5_Wantread, &dev->flags); |
3599 | set_bit(R5_LOCKED, &dev->flags); | 3633 | set_bit(R5_LOCKED, &dev->flags); |
3634 | s.locked++; | ||
3600 | } | 3635 | } |
3601 | } | 3636 | } |
3602 | } | 3637 | } |
3603 | 3638 | ||
3604 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { | 3639 | /* Finish reconstruct operations initiated by the expansion process */ |
3640 | if (sh->reconstruct_state == reconstruct_state_result) { | ||
3641 | sh->reconstruct_state = reconstruct_state_idle; | ||
3642 | clear_bit(STRIPE_EXPANDING, &sh->state); | ||
3643 | for (i = conf->raid_disks; i--; ) { | ||
3644 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
3645 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3646 | s.locked++; | ||
3647 | } | ||
3648 | } | ||
3649 | |||
3650 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && | ||
3651 | !sh->reconstruct_state) { | ||
3605 | struct stripe_head *sh2 | 3652 | struct stripe_head *sh2 |
3606 | = get_active_stripe(conf, sh->sector, 1, 1); | 3653 | = get_active_stripe(conf, sh->sector, 1, 1); |
3607 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { | 3654 | if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { |
@@ -3622,14 +3669,8 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3622 | /* Need to write out all blocks after computing P&Q */ | 3669 | /* Need to write out all blocks after computing P&Q */ |
3623 | sh->disks = conf->raid_disks; | 3670 | sh->disks = conf->raid_disks; |
3624 | stripe_set_idx(sh->sector, conf, 0, sh); | 3671 | stripe_set_idx(sh->sector, conf, 0, sh); |
3625 | compute_parity6(sh, RECONSTRUCT_WRITE); | 3672 | schedule_reconstruction(sh, &s, 1, 1); |
3626 | for (i = conf->raid_disks ; i-- ; ) { | 3673 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
3627 | set_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3628 | s.locked++; | ||
3629 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | ||
3630 | } | ||
3631 | clear_bit(STRIPE_EXPANDING, &sh->state); | ||
3632 | } else if (s.expanded) { | ||
3633 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 3674 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
3634 | atomic_dec(&conf->reshape_stripes); | 3675 | atomic_dec(&conf->reshape_stripes); |
3635 | wake_up(&conf->wait_for_overlap); | 3676 | wake_up(&conf->wait_for_overlap); |
@@ -3647,6 +3688,9 @@ static bool handle_stripe6(struct stripe_head *sh) | |||
3647 | if (unlikely(blocked_rdev)) | 3688 | if (unlikely(blocked_rdev)) |
3648 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 3689 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
3649 | 3690 | ||
3691 | if (s.ops_request) | ||
3692 | raid_run_ops(sh, s.ops_request); | ||
3693 | |||
3650 | ops_run_io(sh, &s); | 3694 | ops_run_io(sh, &s); |
3651 | 3695 | ||
3652 | return_io(return_bi); | 3696 | return_io(return_bi); |