[PATCH] md: improve handing of read errors with raid6

This is a simple port of match functionality across from raid5. If we get a read error, we don't kick the drive straight away, but try to over-write with good data first. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: NeilBrown <neilb@suse.de> 2006-01-06 03:20:18 -0500
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-01-06 11:34:03 -0500
commit: d69762e98456b71167865db9e33e732a28dd36ab (patch)
tree: a93c3872d23db70e7e6089328cafd3eebba6aadb /drivers/md
parent: ca65b73bd9c301d243df93780f7b26579e6c9204 (diff)
1 files changed, 66 insertions, 4 deletions
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 52e8796bb8ac..7a51553d8be5 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -367,8 +367,8 @@ static void shrink_stripes(raid6_conf_t *conf)
        conf->slab_cache = NULL;
 }
-static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done,
+static int raid6_end_read_request(struct bio * bi, unsigned int bytes_done,
-                                   int error)
+                                  int error)
 {
        struct stripe_head *sh = bi->bi_private;
        raid6_conf_t *conf = sh->raid_conf;
@@ -420,9 +420,35 @@ static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done,
 #else
                set_bit(R5_UPTODATE, &sh->dev[i].flags);
 #endif
+                if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+                        printk(KERN_INFO "raid6: read error corrected!!\n");
+                        clear_bit(R5_ReadError, &sh->dev[i].flags);
+                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
+                }
+                if (atomic_read(&conf->disks[i].rdev->read_errors))
+                        atomic_set(&conf->disks[i].rdev->read_errors, 0);
        } else {
-                md_error(conf->mddev, conf->disks[i].rdev);
+                int retry = 0;
                clear_bit(R5_UPTODATE, &sh->dev[i].flags);
+                atomic_inc(&conf->disks[i].rdev->read_errors);
+                if (conf->mddev->degraded)
+                        printk(KERN_WARNING "raid6: read error not correctable.\n");
+                else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
+                        /* Oh, no!!! */
+                        printk(KERN_WARNING "raid6: read error NOT corrected!!\n");
+                else if (atomic_read(&conf->disks[i].rdev->read_errors)
+                         > conf->max_nr_stripes)
+                        printk(KERN_WARNING
+                               "raid6: Too many read errors, failing device.\n");
+                else
+                        retry = 1;
+                if (retry)
+                        set_bit(R5_ReadError, &sh->dev[i].flags);
+                else {
+                        clear_bit(R5_ReadError, &sh->dev[i].flags);
+                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
+                        md_error(conf->mddev, conf->disks[i].rdev);
+                }
        }
        rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
 #if 0
@@ -1079,6 +1105,12 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
                if (dev->written) written++;
                rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
                if (!rdev || !test_bit(In_sync, &rdev->flags)) {
+                        /* The ReadError flag will just be confusing now */
+                        clear_bit(R5_ReadError, &dev->flags);
+                        clear_bit(R5_ReWrite, &dev->flags);
+                }
+                if (!rdev || !test_bit(In_sync, &rdev->flags)
+                    || test_bit(R5_ReadError, &dev->flags)) {
                        if ( failed < 2 )
                                failed_num[failed] = i;
                        failed++;
@@ -1095,6 +1127,14 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
        if (failed > 2 && to_read+to_write+written) {
                for (i=disks; i--; ) {
                        int bitmap_end = 0;
+                        if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+                                mdk_rdev_t *rdev = conf->disks[i].rdev;
+                                if (rdev && test_bit(In_sync, &rdev->flags))
+                                        /* multiple read failures in one stripe */
+                                        md_error(conf->mddev, rdev);
+                        }
                        spin_lock_irq(&conf->device_lock);
                        /* fail all writes first */
                        bi = sh->dev[i].towrite;
@@ -1130,7 +1170,8 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
                        }
                        /* fail any reads if this device is non-operational */
-                        if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
+                        if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+                            test_bit(R5_ReadError, &sh->dev[i].flags)) {
                                bi = sh->dev[i].toread;
                                sh->dev[i].toread = NULL;
                                if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
@@ -1457,6 +1498,27 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
                clear_bit(STRIPE_SYNCING, &sh->state);
        }
+        /* If the failed drives are just a ReadError, then we might need
+         * to progress the repair/check process
+         */
+        if (failed <= 2 && ! conf->mddev->ro)
+                for (i=0; i<failed;i++) {
+                        dev = &sh->dev[failed_num[i]];
+                        if (test_bit(R5_ReadError, &dev->flags)
+                            && !test_bit(R5_LOCKED, &dev->flags)
+                            && test_bit(R5_UPTODATE, &dev->flags)
+                                ) {
+                                if (!test_bit(R5_ReWrite, &dev->flags)) {
+                                        set_bit(R5_Wantwrite, &dev->flags);
+                                        set_bit(R5_ReWrite, &dev->flags);
+                                        set_bit(R5_LOCKED, &dev->flags);
+                                } else {
+                                        /* let's read it back */
+                                        set_bit(R5_Wantread, &dev->flags);
+                                        set_bit(R5_LOCKED, &dev->flags);
+                                }
+                        }
+                }
        spin_unlock(&sh->lock);
        while ((bi=return_bi)) {
author	NeilBrown <neilb@suse.de>	2006-01-06 03:20:18 -0500
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-01-06 11:34:03 -0500
commit	d69762e98456b71167865db9e33e732a28dd36ab (patch)
tree	a93c3872d23db70e7e6089328cafd3eebba6aadb /drivers/md
parent	ca65b73bd9c301d243df93780f7b26579e6c9204 (diff)

diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 52e8796bb8ac..7a51553d8be5 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c
@@ -367,8 +367,8 @@ static void shrink_stripes(raid6_conf_t *conf)
367	conf->slab_cache = NULL;	367	conf->slab_cache = NULL;
368	}	368	}
369		369
370	static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done,	370	static int raid6_end_read_request(struct bio * bi, unsigned int bytes_done,
371	int error)	371	int error)
372	{	372	{
373	struct stripe_head *sh = bi->bi_private;	373	struct stripe_head *sh = bi->bi_private;
374	raid6_conf_t *conf = sh->raid_conf;	374	raid6_conf_t *conf = sh->raid_conf;
@@ -420,9 +420,35 @@ static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done,
420	#else	420	#else
421	set_bit(R5_UPTODATE, &sh->dev[i].flags);	421	set_bit(R5_UPTODATE, &sh->dev[i].flags);
422	#endif	422	#endif
		423	if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
		424	printk(KERN_INFO "raid6: read error corrected!!\n");
		425	clear_bit(R5_ReadError, &sh->dev[i].flags);
		426	clear_bit(R5_ReWrite, &sh->dev[i].flags);
		427	}
		428	if (atomic_read(&conf->disks[i].rdev->read_errors))
		429	atomic_set(&conf->disks[i].rdev->read_errors, 0);
423	} else {	430	} else {
424	md_error(conf->mddev, conf->disks[i].rdev);	431	int retry = 0;
425	clear_bit(R5_UPTODATE, &sh->dev[i].flags);	432	clear_bit(R5_UPTODATE, &sh->dev[i].flags);
		433	atomic_inc(&conf->disks[i].rdev->read_errors);
		434	if (conf->mddev->degraded)
		435	printk(KERN_WARNING "raid6: read error not correctable.\n");
		436	else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
		437	/* Oh, no!!! */
		438	printk(KERN_WARNING "raid6: read error NOT corrected!!\n");
		439	else if (atomic_read(&conf->disks[i].rdev->read_errors)
		440	> conf->max_nr_stripes)
		441	printk(KERN_WARNING
		442	"raid6: Too many read errors, failing device.\n");
		443	else
		444	retry = 1;
		445	if (retry)
		446	set_bit(R5_ReadError, &sh->dev[i].flags);
		447	else {
		448	clear_bit(R5_ReadError, &sh->dev[i].flags);
		449	clear_bit(R5_ReWrite, &sh->dev[i].flags);
		450	md_error(conf->mddev, conf->disks[i].rdev);
		451	}
426	}	452	}
427	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);	453	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
428	#if 0	454	#if 0
@@ -1079,6 +1105,12 @@ static void handle_stripe(struct stripe_head sh, struct page tmp_page)
1079	if (dev->written) written++;	1105	if (dev->written) written++;
1080	rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */	1106	rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
1081	if (!rdev \|\| !test_bit(In_sync, &rdev->flags)) {	1107	if (!rdev \|\| !test_bit(In_sync, &rdev->flags)) {
		1108	/* The ReadError flag will just be confusing now */
		1109	clear_bit(R5_ReadError, &dev->flags);
		1110	clear_bit(R5_ReWrite, &dev->flags);
		1111	}
		1112	if (!rdev \|\| !test_bit(In_sync, &rdev->flags)
		1113	\|\| test_bit(R5_ReadError, &dev->flags)) {
1082	if ( failed < 2 )	1114	if ( failed < 2 )
1083	failed_num[failed] = i;	1115	failed_num[failed] = i;
1084	failed++;	1116	failed++;
@@ -1095,6 +1127,14 @@ static void handle_stripe(struct stripe_head sh, struct page tmp_page)
1095	if (failed > 2 && to_read+to_write+written) {	1127	if (failed > 2 && to_read+to_write+written) {
1096	for (i=disks; i--; ) {	1128	for (i=disks; i--; ) {
1097	int bitmap_end = 0;	1129	int bitmap_end = 0;
		1130
		1131	if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
		1132	mdk_rdev_t *rdev = conf->disks[i].rdev;
		1133	if (rdev && test_bit(In_sync, &rdev->flags))
		1134	/* multiple read failures in one stripe */
		1135	md_error(conf->mddev, rdev);
		1136	}
		1137
1098	spin_lock_irq(&conf->device_lock);	1138	spin_lock_irq(&conf->device_lock);
1099	/* fail all writes first */	1139	/* fail all writes first */
1100	bi = sh->dev[i].towrite;	1140	bi = sh->dev[i].towrite;
@@ -1130,7 +1170,8 @@ static void handle_stripe(struct stripe_head sh, struct page tmp_page)
1130	}	1170	}
1131		1171
1132	/* fail any reads if this device is non-operational */	1172	/* fail any reads if this device is non-operational */
1133	if (!test_bit(R5_Insync, &sh->dev[i].flags)) {	1173	if (!test_bit(R5_Insync, &sh->dev[i].flags) \|\|
		1174	test_bit(R5_ReadError, &sh->dev[i].flags)) {
1134	bi = sh->dev[i].toread;	1175	bi = sh->dev[i].toread;
1135	sh->dev[i].toread = NULL;	1176	sh->dev[i].toread = NULL;
1136	if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))	1177	if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
@@ -1457,6 +1498,27 @@ static void handle_stripe(struct stripe_head sh, struct page tmp_page)
1457	clear_bit(STRIPE_SYNCING, &sh->state);	1498	clear_bit(STRIPE_SYNCING, &sh->state);
1458	}	1499	}
1459		1500
		1501	/* If the failed drives are just a ReadError, then we might need
		1502	* to progress the repair/check process
		1503	*/
		1504	if (failed <= 2 && ! conf->mddev->ro)
		1505	for (i=0; i<failed;i++) {
		1506	dev = &sh->dev[failed_num[i]];
		1507	if (test_bit(R5_ReadError, &dev->flags)
		1508	&& !test_bit(R5_LOCKED, &dev->flags)
		1509	&& test_bit(R5_UPTODATE, &dev->flags)
		1510	) {
		1511	if (!test_bit(R5_ReWrite, &dev->flags)) {
		1512	set_bit(R5_Wantwrite, &dev->flags);
		1513	set_bit(R5_ReWrite, &dev->flags);
		1514	set_bit(R5_LOCKED, &dev->flags);
		1515	} else {
		1516	/* let's read it back */
		1517	set_bit(R5_Wantread, &dev->flags);
		1518	set_bit(R5_LOCKED, &dev->flags);
		1519	}
		1520	}
		1521	}
1460	spin_unlock(&sh->lock);	1522	spin_unlock(&sh->lock);
1461		1523
1462	while ((bi=return_bi)) {	1524	while ((bi=return_bi)) {