md/raid10: clear bad-block record when write succeeds.

If we succeed in writing to a block that was recorded as being bad, we clear the bad-block record. This requires some delayed handling as the bad-block-list update has to happen in process-context. Signed-off-by: NeilBrown <neilb@suse.de>
author: NeilBrown <neilb@suse.de> 2011-07-27 21:39:24 -0400
committer: NeilBrown <neilb@suse.de> 2011-07-27 21:39:24 -0400
commit: 749c55e942d91cb27045fe2eb313aa5afe68ae0b (patch)
tree: ddf80a1eb3ef9005bc209c1c4946916b89c22a33 /drivers/md
parent: d4432c23be957ff061f7b23fd60e8506cb472a55 (diff)
2 files changed, 100 insertions, 12 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 13077a3fd7d2..39b2058845f5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -181,7 +181,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
        for (i = 0; i < conf->copies; i++) {
                struct bio **bio = & r10_bio->devs[i].bio;
-                if (*bio && *bio != IO_BLOCKED)
+                if (!BIO_SPECIAL(*bio))
                        bio_put(*bio);
                *bio = NULL;
        }
@@ -267,7 +267,8 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio)
 /*
 * Find the disk number which triggered given bio
 */
-static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, struct bio *bio)
+static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio,
+                         struct bio *bio, int *slotp)
 {
        int slot;
@@ -278,6 +279,8 @@ static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, struct bio *bio)
        BUG_ON(slot == conf->copies);
        update_head_pos(slot, r10_bio);
+        if (slotp)
+                *slotp = slot;
        return r10_bio->devs[slot].devnum;
 }
@@ -329,9 +332,11 @@ static void raid10_end_write_request(struct bio *bio, int error)
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        r10bio_t *r10_bio = bio->bi_private;
        int dev;
+        int dec_rdev = 1;
        conf_t *conf = r10_bio->mddev->private;
+        int slot;
-        dev = find_bio_disk(conf, r10_bio, bio);
+        dev = find_bio_disk(conf, r10_bio, bio, &slot);
        /*
         * this branch is our 'one mirror IO has finished' event handler:
@@ -340,7 +345,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
                md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
                /* an I/O failed, we can't clear the bitmap */
                set_bit(R10BIO_Degraded, &r10_bio->state);
-        } else
+        } else {
                /*
                 * Set R10BIO_Uptodate in our master bio, so that
                 * we will return a good error code for to the higher
@@ -350,8 +355,23 @@ static void raid10_end_write_request(struct bio *bio, int error)
                 * user-side. So if something waits for IO, then it will
                 * wait for the 'master' bio.
                 */
+                sector_t first_bad;
+                int bad_sectors;
                set_bit(R10BIO_Uptodate, &r10_bio->state);
+                /* Maybe we can clear some bad blocks. */
+                if (is_badblock(conf->mirrors[dev].rdev,
+                                r10_bio->devs[slot].addr,
+                                r10_bio->sectors,
+                                &first_bad, &bad_sectors)) {
+                        bio_put(bio);
+                        r10_bio->devs[slot].bio = IO_MADE_GOOD;
+                        dec_rdev = 0;
+                        set_bit(R10BIO_MadeGood, &r10_bio->state);
+                }
+        }
        /*
         *
         * Let's see if all mirrored write operations have finished
@@ -364,10 +384,13 @@ static void raid10_end_write_request(struct bio *bio, int error)
                                !test_bit(R10BIO_Degraded, &r10_bio->state),
                                0);
                md_write_end(r10_bio->mddev);
-                raid_end_bio_io(r10_bio);
+                if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+                        reschedule_retry(r10_bio);
+                else
+                        raid_end_bio_io(r10_bio);
        }
+        if (dec_rdev)
-        rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
+                rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
@@ -1382,7 +1405,7 @@ static void end_sync_read(struct bio *bio, int error)
        conf_t *conf = r10_bio->mddev->private;
        int d;
-        d = find_bio_disk(conf, r10_bio, bio);
+        d = find_bio_disk(conf, r10_bio, bio, NULL);
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                set_bit(R10BIO_Uptodate, &r10_bio->state);
@@ -1414,23 +1437,37 @@ static void end_sync_write(struct bio *bio, int error)
        mddev_t *mddev = r10_bio->mddev;
        conf_t *conf = mddev->private;
        int d;
+        sector_t first_bad;
+        int bad_sectors;
+        int slot;
-        d = find_bio_disk(conf, r10_bio, bio);
+        d = find_bio_disk(conf, r10_bio, bio, &slot);
        if (!uptodate)
                md_error(mddev, conf->mirrors[d].rdev);
+        else if (is_badblock(conf->mirrors[d].rdev,
+                             r10_bio->devs[slot].addr,
+                             r10_bio->sectors,
+                             &first_bad, &bad_sectors))
+                set_bit(R10BIO_MadeGood, &r10_bio->state);
        rdev_dec_pending(conf->mirrors[d].rdev, mddev);
        while (atomic_dec_and_test(&r10_bio->remaining)) {
                if (r10_bio->master_bio == NULL) {
                        /* the primary of several recovery bios */
                        sector_t s = r10_bio->sectors;
-                        put_buf(r10_bio);
+                        if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+                                reschedule_retry(r10_bio);
+                        else
+                                put_buf(r10_bio);
                        md_done_sync(mddev, s, 1);
                        break;
                } else {
                        r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
-                        put_buf(r10_bio);
+                        if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+                                reschedule_retry(r10_bio);
+                        else
+                                put_buf(r10_bio);
                        r10_bio = r10_bio2;
                }
        }
@@ -1901,6 +1938,44 @@ read_more:
                generic_make_request(bio);
 }
+static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio)
+{
+        /* Some sort of write request has finished and it
+         * succeeded in writing where we thought there was a
+         * bad block.  So forget the bad block.
+         */
+        int m;
+        mdk_rdev_t *rdev;
+        if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
+            test_bit(R10BIO_IsRecover, &r10_bio->state)) {
+                for (m = 0; m < conf->copies; m++)
+                        if (r10_bio->devs[m].bio &&
+                            test_bit(BIO_UPTODATE,
+                                     &r10_bio->devs[m].bio->bi_flags)) {
+                                int dev = r10_bio->devs[m].devnum;
+                                rdev = conf->mirrors[dev].rdev;
+                                rdev_clear_badblocks(
+                                        rdev,
+                                        r10_bio->devs[m].addr,
+                                        r10_bio->sectors);
+                        }
+                put_buf(r10_bio);
+        } else {
+                for (m = 0; m < conf->copies; m++)
+                        if (r10_bio->devs[m].bio == IO_MADE_GOOD) {
+                                int dev = r10_bio->devs[m].devnum;
+                                rdev = conf->mirrors[dev].rdev;
+                                rdev_clear_badblocks(
+                                        rdev,
+                                        r10_bio->devs[m].addr,
+                                        r10_bio->sectors);
+                                rdev_dec_pending(rdev, conf->mddev);
+                        }
+                raid_end_bio_io(r10_bio);
+        }
+}
 static void raid10d(mddev_t *mddev)
 {
        r10bio_t *r10_bio;
@@ -1928,7 +2003,9 @@ static void raid10d(mddev_t *mddev)
                mddev = r10_bio->mddev;
                conf = mddev->private;
-                if (test_bit(R10BIO_IsSync, &r10_bio->state))
+                if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+                        handle_write_completed(conf, r10_bio);
+                else if (test_bit(R10BIO_IsSync, &r10_bio->state))
                        sync_request_write(mddev, r10_bio);
                else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
                        recovery_request_write(mddev, r10_bio);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index c646152ba4e4..d8b7f9af92d5 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -118,6 +118,13 @@ struct r10bio_s {
 * level, we store IO_BLOCKED in the appropriate 'bios' pointer
 */
 #define IO_BLOCKED ((struct bio*)1)
+/* When we successfully write to a known bad-block, we need to remove the
+ * bad-block marking which must be done from process context.  So we record
+ * the success by setting devs[n].bio to IO_MADE_GOOD
+ */
+#define IO_MADE_GOOD ((struct bio *)2)
+#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
 /* bits for r10bio.state */
 #define R10BIO_Uptodate 0
@@ -128,4 +135,8 @@ struct r10bio_s {
 * so that raid10d knows what to do with them.
 */
 #define R10BIO_ReadError 4
+/* If a write for this request means we can clear some
+ * known-bad-block records, we set this flag.
+ */
+#define R10BIO_MadeGood 5
 #endif
author	NeilBrown <neilb@suse.de>	2011-07-27 21:39:24 -0400
committer	NeilBrown <neilb@suse.de>	2011-07-27 21:39:24 -0400
commit	749c55e942d91cb27045fe2eb313aa5afe68ae0b (patch)
tree	ddf80a1eb3ef9005bc209c1c4946916b89c22a33 /drivers/md
parent	d4432c23be957ff061f7b23fd60e8506cb472a55 (diff)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 13077a3fd7d2..39b2058845f5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c
@@ -181,7 +181,7 @@ static void put_all_bios(conf_t conf, r10bio_t r10_bio)
181		181
182	for (i = 0; i < conf->copies; i++) {	182	for (i = 0; i < conf->copies; i++) {
183	struct bio **bio = & r10_bio->devs[i].bio;	183	struct bio **bio = & r10_bio->devs[i].bio;
184	if (bio && bio != IO_BLOCKED)	184	if (!BIO_SPECIAL(*bio))
185	bio_put(*bio);	185	bio_put(*bio);
186	*bio = NULL;	186	*bio = NULL;
187	}	187	}
@@ -267,7 +267,8 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio)
267	/*	267	/*
268	* Find the disk number which triggered given bio	268	* Find the disk number which triggered given bio
269	*/	269	*/
270	static int find_bio_disk(conf_t conf, r10bio_t r10_bio, struct bio *bio)	270	static int find_bio_disk(conf_t conf, r10bio_t r10_bio,
		271	struct bio bio, int slotp)
271	{	272	{
272	int slot;	273	int slot;
273		274
@@ -278,6 +279,8 @@ static int find_bio_disk(conf_t conf, r10bio_t r10_bio, struct bio *bio)
278	BUG_ON(slot == conf->copies);	279	BUG_ON(slot == conf->copies);
279	update_head_pos(slot, r10_bio);	280	update_head_pos(slot, r10_bio);
280		281
		282	if (slotp)
		283	*slotp = slot;
281	return r10_bio->devs[slot].devnum;	284	return r10_bio->devs[slot].devnum;
282	}	285	}
283		286
@@ -329,9 +332,11 @@ static void raid10_end_write_request(struct bio *bio, int error)
329	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);	332	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
330	r10bio_t *r10_bio = bio->bi_private;	333	r10bio_t *r10_bio = bio->bi_private;
331	int dev;	334	int dev;
		335	int dec_rdev = 1;
332	conf_t *conf = r10_bio->mddev->private;	336	conf_t *conf = r10_bio->mddev->private;
		337	int slot;
333		338
334	dev = find_bio_disk(conf, r10_bio, bio);	339	dev = find_bio_disk(conf, r10_bio, bio, &slot);
335		340
336	/*	341	/*
337	* this branch is our 'one mirror IO has finished' event handler:	342	* this branch is our 'one mirror IO has finished' event handler:
@@ -340,7 +345,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
340	md_error(r10_bio->mddev, conf->mirrors[dev].rdev);	345	md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
341	/* an I/O failed, we can't clear the bitmap */	346	/* an I/O failed, we can't clear the bitmap */
342	set_bit(R10BIO_Degraded, &r10_bio->state);	347	set_bit(R10BIO_Degraded, &r10_bio->state);
343	} else	348	} else {
344	/*	349	/*
345	* Set R10BIO_Uptodate in our master bio, so that	350	* Set R10BIO_Uptodate in our master bio, so that
346	* we will return a good error code for to the higher	351	* we will return a good error code for to the higher
@@ -350,8 +355,23 @@ static void raid10_end_write_request(struct bio *bio, int error)
350	* user-side. So if something waits for IO, then it will	355	* user-side. So if something waits for IO, then it will
351	* wait for the 'master' bio.	356	* wait for the 'master' bio.
352	*/	357	*/
		358	sector_t first_bad;
		359	int bad_sectors;
		360
353	set_bit(R10BIO_Uptodate, &r10_bio->state);	361	set_bit(R10BIO_Uptodate, &r10_bio->state);
354		362
		363	/* Maybe we can clear some bad blocks. */
		364	if (is_badblock(conf->mirrors[dev].rdev,
		365	r10_bio->devs[slot].addr,
		366	r10_bio->sectors,
		367	&first_bad, &bad_sectors)) {
		368	bio_put(bio);
		369	r10_bio->devs[slot].bio = IO_MADE_GOOD;
		370	dec_rdev = 0;
		371	set_bit(R10BIO_MadeGood, &r10_bio->state);
		372	}
		373	}
		374
355	/*	375	/*
356	*	376	*
357	* Let's see if all mirrored write operations have finished	377	* Let's see if all mirrored write operations have finished
@@ -364,10 +384,13 @@ static void raid10_end_write_request(struct bio *bio, int error)
364	!test_bit(R10BIO_Degraded, &r10_bio->state),	384	!test_bit(R10BIO_Degraded, &r10_bio->state),
365	0);	385	0);
366	md_write_end(r10_bio->mddev);	386	md_write_end(r10_bio->mddev);
367	raid_end_bio_io(r10_bio);	387	if (test_bit(R10BIO_MadeGood, &r10_bio->state))
		388	reschedule_retry(r10_bio);
		389	else
		390	raid_end_bio_io(r10_bio);
368	}	391	}
369		392	if (dec_rdev)
370	rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);	393	rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
371	}	394	}
372		395
373		396
@@ -1382,7 +1405,7 @@ static void end_sync_read(struct bio *bio, int error)
1382	conf_t *conf = r10_bio->mddev->private;	1405	conf_t *conf = r10_bio->mddev->private;
1383	int d;	1406	int d;
1384		1407
1385	d = find_bio_disk(conf, r10_bio, bio);	1408	d = find_bio_disk(conf, r10_bio, bio, NULL);
1386		1409
1387	if (test_bit(BIO_UPTODATE, &bio->bi_flags))	1410	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1388	set_bit(R10BIO_Uptodate, &r10_bio->state);	1411	set_bit(R10BIO_Uptodate, &r10_bio->state);
@@ -1414,23 +1437,37 @@ static void end_sync_write(struct bio *bio, int error)
1414	mddev_t *mddev = r10_bio->mddev;	1437	mddev_t *mddev = r10_bio->mddev;
1415	conf_t *conf = mddev->private;	1438	conf_t *conf = mddev->private;
1416	int d;	1439	int d;
		1440	sector_t first_bad;
		1441	int bad_sectors;
		1442	int slot;
1417		1443
1418	d = find_bio_disk(conf, r10_bio, bio);	1444	d = find_bio_disk(conf, r10_bio, bio, &slot);
1419		1445
1420	if (!uptodate)	1446	if (!uptodate)
1421	md_error(mddev, conf->mirrors[d].rdev);	1447	md_error(mddev, conf->mirrors[d].rdev);
		1448	else if (is_badblock(conf->mirrors[d].rdev,
		1449	r10_bio->devs[slot].addr,
		1450	r10_bio->sectors,
		1451	&first_bad, &bad_sectors))
		1452	set_bit(R10BIO_MadeGood, &r10_bio->state);
1422		1453
1423	rdev_dec_pending(conf->mirrors[d].rdev, mddev);	1454	rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1424	while (atomic_dec_and_test(&r10_bio->remaining)) {	1455	while (atomic_dec_and_test(&r10_bio->remaining)) {
1425	if (r10_bio->master_bio == NULL) {	1456	if (r10_bio->master_bio == NULL) {
1426	/* the primary of several recovery bios */	1457	/* the primary of several recovery bios */
1427	sector_t s = r10_bio->sectors;	1458	sector_t s = r10_bio->sectors;
1428	put_buf(r10_bio);	1459	if (test_bit(R10BIO_MadeGood, &r10_bio->state))
		1460	reschedule_retry(r10_bio);
		1461	else
		1462	put_buf(r10_bio);
1429	md_done_sync(mddev, s, 1);	1463	md_done_sync(mddev, s, 1);
1430	break;	1464	break;
1431	} else {	1465	} else {
1432	r10bio_t r10_bio2 = (r10bio_t )r10_bio->master_bio;	1466	r10bio_t r10_bio2 = (r10bio_t )r10_bio->master_bio;
1433	put_buf(r10_bio);	1467	if (test_bit(R10BIO_MadeGood, &r10_bio->state))
		1468	reschedule_retry(r10_bio);
		1469	else
		1470	put_buf(r10_bio);
1434	r10_bio = r10_bio2;	1471	r10_bio = r10_bio2;
1435	}	1472	}
1436	}	1473	}
@@ -1901,6 +1938,44 @@ read_more:
1901	generic_make_request(bio);	1938	generic_make_request(bio);
1902	}	1939	}
1903		1940
		1941	static void handle_write_completed(conf_t conf, r10bio_t r10_bio)
		1942	{
		1943	/* Some sort of write request has finished and it
		1944	* succeeded in writing where we thought there was a
		1945	* bad block. So forget the bad block.
		1946	*/
		1947	int m;
		1948	mdk_rdev_t *rdev;
		1949
		1950	if (test_bit(R10BIO_IsSync, &r10_bio->state) \|\|
		1951	test_bit(R10BIO_IsRecover, &r10_bio->state)) {
		1952	for (m = 0; m < conf->copies; m++)
		1953	if (r10_bio->devs[m].bio &&
		1954	test_bit(BIO_UPTODATE,
		1955	&r10_bio->devs[m].bio->bi_flags)) {
		1956	int dev = r10_bio->devs[m].devnum;
		1957	rdev = conf->mirrors[dev].rdev;
		1958	rdev_clear_badblocks(
		1959	rdev,
		1960	r10_bio->devs[m].addr,
		1961	r10_bio->sectors);
		1962	}
		1963	put_buf(r10_bio);
		1964	} else {
		1965	for (m = 0; m < conf->copies; m++)
		1966	if (r10_bio->devs[m].bio == IO_MADE_GOOD) {
		1967	int dev = r10_bio->devs[m].devnum;
		1968	rdev = conf->mirrors[dev].rdev;
		1969	rdev_clear_badblocks(
		1970	rdev,
		1971	r10_bio->devs[m].addr,
		1972	r10_bio->sectors);
		1973	rdev_dec_pending(rdev, conf->mddev);
		1974	}
		1975	raid_end_bio_io(r10_bio);
		1976	}
		1977	}
		1978
1904	static void raid10d(mddev_t *mddev)	1979	static void raid10d(mddev_t *mddev)
1905	{	1980	{
1906	r10bio_t *r10_bio;	1981	r10bio_t *r10_bio;
@@ -1928,7 +2003,9 @@ static void raid10d(mddev_t *mddev)
1928		2003
1929	mddev = r10_bio->mddev;	2004	mddev = r10_bio->mddev;
1930	conf = mddev->private;	2005	conf = mddev->private;
1931	if (test_bit(R10BIO_IsSync, &r10_bio->state))	2006	if (test_bit(R10BIO_MadeGood, &r10_bio->state))
		2007	handle_write_completed(conf, r10_bio);
		2008	else if (test_bit(R10BIO_IsSync, &r10_bio->state))
1932	sync_request_write(mddev, r10_bio);	2009	sync_request_write(mddev, r10_bio);
1933	else if (test_bit(R10BIO_IsRecover, &r10_bio->state))	2010	else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
1934	recovery_request_write(mddev, r10_bio);	2011	recovery_request_write(mddev, r10_bio);


diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index c646152ba4e4..d8b7f9af92d5 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h
@@ -118,6 +118,13 @@ struct r10bio_s {
118	* level, we store IO_BLOCKED in the appropriate 'bios' pointer	118	* level, we store IO_BLOCKED in the appropriate 'bios' pointer
119	*/	119	*/
120	#define IO_BLOCKED ((struct bio*)1)	120	#define IO_BLOCKED ((struct bio*)1)
		121	/* When we successfully write to a known bad-block, we need to remove the
		122	* bad-block marking which must be done from process context. So we record
		123	* the success by setting devs[n].bio to IO_MADE_GOOD
		124	*/
		125	#define IO_MADE_GOOD ((struct bio *)2)
		126
		127	#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
121		128
122	/* bits for r10bio.state */	129	/* bits for r10bio.state */
123	#define R10BIO_Uptodate 0	130	#define R10BIO_Uptodate 0
@@ -128,4 +135,8 @@ struct r10bio_s {
128	* so that raid10d knows what to do with them.	135	* so that raid10d knows what to do with them.
129	*/	136	*/
130	#define R10BIO_ReadError 4	137	#define R10BIO_ReadError 4
		138	/* If a write for this request means we can clear some
		139	* known-bad-block records, we set this flag.
		140	*/
		141	#define R10BIO_MadeGood 5
131	#endif	142	#endif