aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuri Tikhonov <yur@emcraft.com>2009-08-29 22:13:12 -0400
committerDan Williams <dan.j.williams@intel.com>2009-08-29 22:13:12 -0400
commita9b39a741a7e3b262b9f51fefb68e17b32756999 (patch)
tree659f32a0377ff241627eedb60816bda283e2233f
parent5599becca4bee7badf605e41fd5bcde76d51f2a4 (diff)
md/raid6: asynchronous handle_stripe_dirtying6
In the synchronous implementation of stripe dirtying we processed a degraded stripe with one call to handle_stripe_dirtying6(). I.e. compute the missing blocks from the other drives, then copy in the new data and reconstruct the parities. In the asynchronous case we do not perform stripe operations directly. Instead, operations are scheduled with flags to be later serviced by raid_run_ops. So, for the degraded case the final reconstruction step can only be carried out after all blocks have been brought up to date by being read, or computed. Like the raid5 case schedule_reconstruction() sets STRIPE_OP_RECONSTRUCT to request a parity generation pass and through operation chaining can handle compute and reconstruct in a single raid_run_ops pass. [dan.j.williams@intel.com: fixup handle_stripe_dirtying6 gating] Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/md/raid5.c122
1 files changed, 37 insertions, 85 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 49da6f74d6d6..08f806379b07 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2766,99 +2766,46 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
2766 struct stripe_head *sh, struct stripe_head_state *s, 2766 struct stripe_head *sh, struct stripe_head_state *s,
2767 struct r6_state *r6s, int disks) 2767 struct r6_state *r6s, int disks)
2768{ 2768{
2769 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 2769 int rcw = 0, pd_idx = sh->pd_idx, i;
2770 int qd_idx = sh->qd_idx; 2770 int qd_idx = sh->qd_idx;
2771
2772 set_bit(STRIPE_HANDLE, &sh->state);
2771 for (i = disks; i--; ) { 2773 for (i = disks; i--; ) {
2772 struct r5dev *dev = &sh->dev[i]; 2774 struct r5dev *dev = &sh->dev[i];
2773 /* Would I have to read this buffer for reconstruct_write */ 2775 /* check if we haven't enough data */
2774 if (!test_bit(R5_OVERWRITE, &dev->flags) 2776 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2775 && i != pd_idx && i != qd_idx 2777 i != pd_idx && i != qd_idx &&
2776 && (!test_bit(R5_LOCKED, &dev->flags) 2778 !test_bit(R5_LOCKED, &dev->flags) &&
2777 ) && 2779 !(test_bit(R5_UPTODATE, &dev->flags) ||
2778 !test_bit(R5_UPTODATE, &dev->flags)) { 2780 test_bit(R5_Wantcompute, &dev->flags))) {
2779 if (test_bit(R5_Insync, &dev->flags)) rcw++; 2781 rcw++;
2780 else { 2782 if (!test_bit(R5_Insync, &dev->flags))
2781 pr_debug("raid6: must_compute: " 2783 continue; /* it's a failed drive */
2782 "disk %d flags=%#lx\n", i, dev->flags); 2784
2783 must_compute++; 2785 if (
2786 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2787 pr_debug("Read_old stripe %llu "
2788 "block %d for Reconstruct\n",
2789 (unsigned long long)sh->sector, i);
2790 set_bit(R5_LOCKED, &dev->flags);
2791 set_bit(R5_Wantread, &dev->flags);
2792 s->locked++;
2793 } else {
2794 pr_debug("Request delayed stripe %llu "
2795 "block %d for Reconstruct\n",
2796 (unsigned long long)sh->sector, i);
2797 set_bit(STRIPE_DELAYED, &sh->state);
2798 set_bit(STRIPE_HANDLE, &sh->state);
2784 } 2799 }
2785 } 2800 }
2786 } 2801 }
2787 pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2788 (unsigned long long)sh->sector, rcw, must_compute);
2789 set_bit(STRIPE_HANDLE, &sh->state);
2790
2791 if (rcw > 0)
2792 /* want reconstruct write, but need to get some data */
2793 for (i = disks; i--; ) {
2794 struct r5dev *dev = &sh->dev[i];
2795 if (!test_bit(R5_OVERWRITE, &dev->flags)
2796 && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
2797 && !test_bit(R5_LOCKED, &dev->flags) &&
2798 !test_bit(R5_UPTODATE, &dev->flags) &&
2799 test_bit(R5_Insync, &dev->flags)) {
2800 if (
2801 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2802 pr_debug("Read_old stripe %llu "
2803 "block %d for Reconstruct\n",
2804 (unsigned long long)sh->sector, i);
2805 set_bit(R5_LOCKED, &dev->flags);
2806 set_bit(R5_Wantread, &dev->flags);
2807 s->locked++;
2808 } else {
2809 pr_debug("Request delayed stripe %llu "
2810 "block %d for Reconstruct\n",
2811 (unsigned long long)sh->sector, i);
2812 set_bit(STRIPE_DELAYED, &sh->state);
2813 set_bit(STRIPE_HANDLE, &sh->state);
2814 }
2815 }
2816 }
2817 /* now if nothing is locked, and if we have enough data, we can start a 2802 /* now if nothing is locked, and if we have enough data, we can start a
2818 * write request 2803 * write request
2819 */ 2804 */
2820 if (s->locked == 0 && rcw == 0 && 2805 if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
2806 s->locked == 0 && rcw == 0 &&
2821 !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2807 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2822 if (must_compute > 0) { 2808 schedule_reconstruction(sh, s, 1, 0);
2823 /* We have failed blocks and need to compute them */
2824 switch (s->failed) {
2825 case 0:
2826 BUG();
2827 case 1:
2828 compute_block_1(sh, r6s->failed_num[0], 0);
2829 break;
2830 case 2:
2831 compute_block_2(sh, r6s->failed_num[0],
2832 r6s->failed_num[1]);
2833 break;
2834 default: /* This request should have been failed? */
2835 BUG();
2836 }
2837 }
2838
2839 pr_debug("Computing parity for stripe %llu\n",
2840 (unsigned long long)sh->sector);
2841 compute_parity6(sh, RECONSTRUCT_WRITE);
2842 /* now every locked buffer is ready to be written */
2843 for (i = disks; i--; )
2844 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2845 pr_debug("Writing stripe %llu block %d\n",
2846 (unsigned long long)sh->sector, i);
2847 s->locked++;
2848 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2849 }
2850 if (s->locked == disks)
2851 if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2852 atomic_inc(&conf->pending_full_writes);
2853 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2854 set_bit(STRIPE_INSYNC, &sh->state);
2855
2856 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2857 atomic_dec(&conf->preread_active_stripes);
2858 if (atomic_read(&conf->preread_active_stripes) <
2859 IO_THRESHOLD)
2860 md_wakeup_thread(conf->mddev->thread);
2861 }
2862 } 2809 }
2863} 2810}
2864 2811
@@ -3539,8 +3486,13 @@ static bool handle_stripe6(struct stripe_head *sh)
3539 (s.syncing && (s.uptodate < disks)) || s.expanding) 3486 (s.syncing && (s.uptodate < disks)) || s.expanding)
3540 handle_stripe_fill6(sh, &s, &r6s, disks); 3487 handle_stripe_fill6(sh, &s, &r6s, disks);
3541 3488
3542 /* now to consider writing and what else, if anything should be read */ 3489 /* Now to consider new write requests and what else, if anything
3543 if (s.to_write) 3490 * should be read. We do not handle new writes when:
3491 * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
3492 * 2/ A 'check' operation is in flight, as it may clobber the parity
3493 * block.
3494 */
3495 if (s.to_write && !sh->reconstruct_state && !sh->check_state)
3544 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); 3496 handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
3545 3497
3546 /* maybe we need to check and possibly fix the parity for this stripe 3498 /* maybe we need to check and possibly fix the parity for this stripe