md/raid6: asynchronous handle_stripe_dirtying6

In the synchronous implementation of stripe dirtying we processed a degraded stripe with one call to handle_stripe_dirtying6(). I.e. compute the missing blocks from the other drives, then copy in the new data and reconstruct the parities. In the asynchronous case we do not perform stripe operations directly. Instead, operations are scheduled with flags to be later serviced by raid_run_ops. So, for the degraded case the final reconstruction step can only be carried out after all blocks have been brought up to date by being read, or computed. Like the raid5 case schedule_reconstruction() sets STRIPE_OP_RECONSTRUCT to request a parity generation pass and through operation chaining can handle compute and reconstruct in a single raid_run_ops pass. [dan.j.williams@intel.com: fixup handle_stripe_dirtying6 gating] Signed-off-by: Yuri Tikhonov <yur@emcraft.com> Signed-off-by: Ilya Yanok <yanok@emcraft.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
author: Yuri Tikhonov <yur@emcraft.com> 2009-08-29 22:13:12 -0400
committer: Dan Williams <dan.j.williams@intel.com> 2009-08-29 22:13:12 -0400
commit: a9b39a741a7e3b262b9f51fefb68e17b32756999 (patch)
tree: 659f32a0377ff241627eedb60816bda283e2233f /drivers/md/raid5.c
parent: 5599becca4bee7badf605e41fd5bcde76d51f2a4 (diff)
1 files changed, 37 insertions, 85 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 49da6f74d6d6..08f806379b07 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2766,99 +2766,46 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
                struct stripe_head *sh, struct stripe_head_state *s,
                struct r6_state *r6s, int disks)
 {
-        int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
+        int rcw = 0, pd_idx = sh->pd_idx, i;
        int qd_idx = sh->qd_idx;
+        set_bit(STRIPE_HANDLE, &sh->state);
        for (i = disks; i--; ) {
                struct r5dev *dev = &sh->dev[i];
-                /* Would I have to read this buffer for reconstruct_write */
+                /* check if we haven't enough data */
-                if (!test_bit(R5_OVERWRITE, &dev->flags)
+                if (!test_bit(R5_OVERWRITE, &dev->flags) &&
-                    && i != pd_idx && i != qd_idx
+                    i != pd_idx && i != qd_idx &&
-                    && (!test_bit(R5_LOCKED, &dev->flags)
+                    !test_bit(R5_LOCKED, &dev->flags) &&
-                            ) &&
+                    !(test_bit(R5_UPTODATE, &dev->flags) ||
-                    !test_bit(R5_UPTODATE, &dev->flags)) {
+                      test_bit(R5_Wantcompute, &dev->flags))) {
-                        if (test_bit(R5_Insync, &dev->flags)) rcw++;
+                        rcw++;
-                        else {
+                        if (!test_bit(R5_Insync, &dev->flags))
-                                pr_debug("raid6: must_compute: "
+                                continue; /* it's a failed drive */
-                                        "disk %d flags=%#lx\n", i, dev->flags);
-                                must_compute++;
+                        if (
+                          test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+                                pr_debug("Read_old stripe %llu "
+                                        "block %d for Reconstruct\n",
+                                     (unsigned long long)sh->sector, i);
+                                set_bit(R5_LOCKED, &dev->flags);
+                                set_bit(R5_Wantread, &dev->flags);
+                                s->locked++;
+                        } else {
+                                pr_debug("Request delayed stripe %llu "
+                                        "block %d for Reconstruct\n",
+                                     (unsigned long long)sh->sector, i);
+                                set_bit(STRIPE_DELAYED, &sh->state);
+                                set_bit(STRIPE_HANDLE, &sh->state);
                        }
                }
        }
-        pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
-               (unsigned long long)sh->sector, rcw, must_compute);
-        set_bit(STRIPE_HANDLE, &sh->state);
-        if (rcw > 0)
-                /* want reconstruct write, but need to get some data */
-                for (i = disks; i--; ) {
-                        struct r5dev *dev = &sh->dev[i];
-                        if (!test_bit(R5_OVERWRITE, &dev->flags)
-                            && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
-                            && !test_bit(R5_LOCKED, &dev->flags) &&
-                            !test_bit(R5_UPTODATE, &dev->flags) &&
-                            test_bit(R5_Insync, &dev->flags)) {
-                                if (
-                                  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                                        pr_debug("Read_old stripe %llu "
-                                                "block %d for Reconstruct\n",
-                                             (unsigned long long)sh->sector, i);
-                                        set_bit(R5_LOCKED, &dev->flags);
-                                        set_bit(R5_Wantread, &dev->flags);
-                                        s->locked++;
-                                } else {
-                                        pr_debug("Request delayed stripe %llu "
-                                                "block %d for Reconstruct\n",
-                                             (unsigned long long)sh->sector, i);
-                                        set_bit(STRIPE_DELAYED, &sh->state);
-                                        set_bit(STRIPE_HANDLE, &sh->state);
-                                }
-                        }
-                }
        /* now if nothing is locked, and if we have enough data, we can start a
         * write request
         */
-        if (s->locked == 0 && rcw == 0 &&
+        if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+            s->locked == 0 && rcw == 0 &&
            !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-                if (must_compute > 0) {
+                schedule_reconstruction(sh, s, 1, 0);
-                        /* We have failed blocks and need to compute them */
-                        switch (s->failed) {
-                        case 0:
-                                BUG();
-                        case 1:
-                                compute_block_1(sh, r6s->failed_num[0], 0);
-                                break;
-                        case 2:
-                                compute_block_2(sh, r6s->failed_num[0],
-                                                r6s->failed_num[1]);
-                                break;
-                        default: /* This request should have been failed? */
-                                BUG();
-                        }
-                }
-                pr_debug("Computing parity for stripe %llu\n",
-                        (unsigned long long)sh->sector);
-                compute_parity6(sh, RECONSTRUCT_WRITE);
-                /* now every locked buffer is ready to be written */
-                for (i = disks; i--; )
-                        if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-                                pr_debug("Writing stripe %llu block %d\n",
-                                       (unsigned long long)sh->sector, i);
-                                s->locked++;
-                                set_bit(R5_Wantwrite, &sh->dev[i].flags);
-                        }
-                if (s->locked == disks)
-                        if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
-                                atomic_inc(&conf->pending_full_writes);
-                /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
-                set_bit(STRIPE_INSYNC, &sh->state);
-                if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                        atomic_dec(&conf->preread_active_stripes);
-                        if (atomic_read(&conf->preread_active_stripes) <
-                            IO_THRESHOLD)
-                                md_wakeup_thread(conf->mddev->thread);
-                }
        }
 }
@@ -3539,8 +3486,13 @@ static bool handle_stripe6(struct stripe_head *sh)
            (s.syncing && (s.uptodate < disks)) || s.expanding)
                handle_stripe_fill6(sh, &s, &r6s, disks);
-        /* now to consider writing and what else, if anything should be read */
+        /* Now to consider new write requests and what else, if anything
-        if (s.to_write)
+         * should be read.  We do not handle new writes when:
+         * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+         * 2/ A 'check' operation is in flight, as it may clobber the parity
+         *    block.
+         */
+        if (s.to_write && !sh->reconstruct_state && !sh->check_state)
                handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
        /* maybe we need to check and possibly fix the parity for this stripe
author	Yuri Tikhonov <yur@emcraft.com>	2009-08-29 22:13:12 -0400
committer	Dan Williams <dan.j.williams@intel.com>	2009-08-29 22:13:12 -0400
commit	a9b39a741a7e3b262b9f51fefb68e17b32756999 (patch)
tree	659f32a0377ff241627eedb60816bda283e2233f /drivers/md/raid5.c
parent	5599becca4bee7badf605e41fd5bcde76d51f2a4 (diff)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 49da6f74d6d6..08f806379b07 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c
@@ -2766,99 +2766,46 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
2766	struct stripe_head sh, struct stripe_head_state s,	2766	struct stripe_head sh, struct stripe_head_state s,
2767	struct r6_state *r6s, int disks)	2767	struct r6_state *r6s, int disks)
2768	{	2768	{
2769	int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;	2769	int rcw = 0, pd_idx = sh->pd_idx, i;
2770	int qd_idx = sh->qd_idx;	2770	int qd_idx = sh->qd_idx;
		2771
		2772	set_bit(STRIPE_HANDLE, &sh->state);
2771	for (i = disks; i--; ) {	2773	for (i = disks; i--; ) {
2772	struct r5dev *dev = &sh->dev[i];	2774	struct r5dev *dev = &sh->dev[i];
2773	/* Would I have to read this buffer for reconstruct_write */	2775	/* check if we haven't enough data */
2774	if (!test_bit(R5_OVERWRITE, &dev->flags)	2776	if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2775	&& i != pd_idx && i != qd_idx	2777	i != pd_idx && i != qd_idx &&
2776	&& (!test_bit(R5_LOCKED, &dev->flags)	2778	!test_bit(R5_LOCKED, &dev->flags) &&
2777	) &&	2779	!(test_bit(R5_UPTODATE, &dev->flags) \|\|
2778	!test_bit(R5_UPTODATE, &dev->flags)) {	2780	test_bit(R5_Wantcompute, &dev->flags))) {
2779	if (test_bit(R5_Insync, &dev->flags)) rcw++;	2781	rcw++;
2780	else {	2782	if (!test_bit(R5_Insync, &dev->flags))
2781	pr_debug("raid6: must_compute: "	2783	continue; /* it's a failed drive */
2782	"disk %d flags=%#lx\n", i, dev->flags);	2784
2783	must_compute++;	2785	if (
		2786	test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
		2787	pr_debug("Read_old stripe %llu "
		2788	"block %d for Reconstruct\n",
		2789	(unsigned long long)sh->sector, i);
		2790	set_bit(R5_LOCKED, &dev->flags);
		2791	set_bit(R5_Wantread, &dev->flags);
		2792	s->locked++;
		2793	} else {
		2794	pr_debug("Request delayed stripe %llu "
		2795	"block %d for Reconstruct\n",
		2796	(unsigned long long)sh->sector, i);
		2797	set_bit(STRIPE_DELAYED, &sh->state);
		2798	set_bit(STRIPE_HANDLE, &sh->state);
2784	}	2799	}
2785	}	2800	}
2786	}	2801	}
2787	pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2788	(unsigned long long)sh->sector, rcw, must_compute);
2789	set_bit(STRIPE_HANDLE, &sh->state);
2790
2791	if (rcw > 0)
2792	/* want reconstruct write, but need to get some data */
2793	for (i = disks; i--; ) {
2794	struct r5dev *dev = &sh->dev[i];
2795	if (!test_bit(R5_OVERWRITE, &dev->flags)
2796	&& !(s->failed == 0 && (i == pd_idx \|\| i == qd_idx))
2797	&& !test_bit(R5_LOCKED, &dev->flags) &&
2798	!test_bit(R5_UPTODATE, &dev->flags) &&
2799	test_bit(R5_Insync, &dev->flags)) {
2800	if (
2801	test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2802	pr_debug("Read_old stripe %llu "
2803	"block %d for Reconstruct\n",
2804	(unsigned long long)sh->sector, i);
2805	set_bit(R5_LOCKED, &dev->flags);
2806	set_bit(R5_Wantread, &dev->flags);
2807	s->locked++;
2808	} else {
2809	pr_debug("Request delayed stripe %llu "
2810	"block %d for Reconstruct\n",
2811	(unsigned long long)sh->sector, i);
2812	set_bit(STRIPE_DELAYED, &sh->state);
2813	set_bit(STRIPE_HANDLE, &sh->state);
2814	}
2815	}
2816	}
2817	/* now if nothing is locked, and if we have enough data, we can start a	2802	/* now if nothing is locked, and if we have enough data, we can start a
2818	* write request	2803	* write request
2819	*/	2804	*/
2820	if (s->locked == 0 && rcw == 0 &&	2805	if ((s->req_compute \|\| !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
		2806	s->locked == 0 && rcw == 0 &&
2821	!test_bit(STRIPE_BIT_DELAY, &sh->state)) {	2807	!test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2822	if (must_compute > 0) {	2808	schedule_reconstruction(sh, s, 1, 0);
2823	/* We have failed blocks and need to compute them */
2824	switch (s->failed) {
2825	case 0:
2826	BUG();
2827	case 1:
2828	compute_block_1(sh, r6s->failed_num[0], 0);
2829	break;
2830	case 2:
2831	compute_block_2(sh, r6s->failed_num[0],
2832	r6s->failed_num[1]);
2833	break;
2834	default: /* This request should have been failed? */
2835	BUG();
2836	}
2837	}
2838
2839	pr_debug("Computing parity for stripe %llu\n",
2840	(unsigned long long)sh->sector);
2841	compute_parity6(sh, RECONSTRUCT_WRITE);
2842	/* now every locked buffer is ready to be written */
2843	for (i = disks; i--; )
2844	if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2845	pr_debug("Writing stripe %llu block %d\n",
2846	(unsigned long long)sh->sector, i);
2847	s->locked++;
2848	set_bit(R5_Wantwrite, &sh->dev[i].flags);
2849	}
2850	if (s->locked == disks)
2851	if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
2852	atomic_inc(&conf->pending_full_writes);
2853	/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2854	set_bit(STRIPE_INSYNC, &sh->state);
2855
2856	if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2857	atomic_dec(&conf->preread_active_stripes);
2858	if (atomic_read(&conf->preread_active_stripes) <
2859	IO_THRESHOLD)
2860	md_wakeup_thread(conf->mddev->thread);
2861	}
2862	}	2809	}
2863	}	2810	}
2864		2811
@@ -3539,8 +3486,13 @@ static bool handle_stripe6(struct stripe_head *sh)
3539	(s.syncing && (s.uptodate < disks)) \|\| s.expanding)	3486	(s.syncing && (s.uptodate < disks)) \|\| s.expanding)
3540	handle_stripe_fill6(sh, &s, &r6s, disks);	3487	handle_stripe_fill6(sh, &s, &r6s, disks);
3541		3488
3542	/* now to consider writing and what else, if anything should be read */	3489	/* Now to consider new write requests and what else, if anything
3543	if (s.to_write)	3490	* should be read. We do not handle new writes when:
		3491	* 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
		3492	* 2/ A 'check' operation is in flight, as it may clobber the parity
		3493	* block.
		3494	*/
		3495	if (s.to_write && !sh->reconstruct_state && !sh->check_state)
3544	handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);	3496	handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
3545		3497
3546	/* maybe we need to check and possibly fix the parity for this stripe	3498	/* maybe we need to check and possibly fix the parity for this stripe