aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:29 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:05 -0500
commit0eb3ff12aa8a12538ef681dc83f4361636a0699f (patch)
treee5d4b8353362cefd096a4b1ee1a8f473c01169b6
parent4443ae10ca15d07922ceda622f03db8865fa3d13 (diff)
[PATCH] md: raid10 read-error handling - resync and read-only
Add in correct read-error handling for resync and read-only situations. When read-only, we don't over-write, so we need to mark the failed drive in the r10_bio so we don't re-try it. During resync, we always read all blocks, so if there is a read error, we simply over-write it with the good block that we found (assuming we found one). Note that the recovery case still isn't handled in an interesting way. There is nothing useful to do for the 2-copies case. If there are 3 or more copies, then we could try reading from one of the non-missing copies, but this is a bit complicated and very rarely would be used, so I'm leaving it for now. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid10.c56
-rw-r--r--include/linux/raid/raid10.h7
2 files changed, 42 insertions, 21 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 64bb4ddc6798..3f8df2ecbae3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -172,7 +172,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
172 172
173 for (i = 0; i < conf->copies; i++) { 173 for (i = 0; i < conf->copies; i++) {
174 struct bio **bio = & r10_bio->devs[i].bio; 174 struct bio **bio = & r10_bio->devs[i].bio;
175 if (*bio) 175 if (*bio && *bio != IO_BLOCKED)
176 bio_put(*bio); 176 bio_put(*bio);
177 *bio = NULL; 177 *bio = NULL;
178 } 178 }
@@ -500,6 +500,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
500 disk = r10_bio->devs[slot].devnum; 500 disk = r10_bio->devs[slot].devnum;
501 501
502 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || 502 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
503 r10_bio->devs[slot].bio == IO_BLOCKED ||
503 !test_bit(In_sync, &rdev->flags)) { 504 !test_bit(In_sync, &rdev->flags)) {
504 slot++; 505 slot++;
505 if (slot == conf->copies) { 506 if (slot == conf->copies) {
@@ -517,6 +518,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
517 slot = 0; 518 slot = 0;
518 disk = r10_bio->devs[slot].devnum; 519 disk = r10_bio->devs[slot].devnum;
519 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || 520 while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
521 r10_bio->devs[slot].bio == IO_BLOCKED ||
520 !test_bit(In_sync, &rdev->flags)) { 522 !test_bit(In_sync, &rdev->flags)) {
521 slot ++; 523 slot ++;
522 if (slot == conf->copies) { 524 if (slot == conf->copies) {
@@ -537,6 +539,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
537 539
538 540
539 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || 541 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
542 r10_bio->devs[nslot].bio == IO_BLOCKED ||
540 !test_bit(In_sync, &rdev->flags)) 543 !test_bit(In_sync, &rdev->flags))
541 continue; 544 continue;
542 545
@@ -1104,7 +1107,6 @@ abort:
1104 1107
1105static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) 1108static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1106{ 1109{
1107 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1108 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private); 1110 r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
1109 conf_t *conf = mddev_to_conf(r10_bio->mddev); 1111 conf_t *conf = mddev_to_conf(r10_bio->mddev);
1110 int i,d; 1112 int i,d;
@@ -1119,7 +1121,10 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1119 BUG(); 1121 BUG();
1120 update_head_pos(i, r10_bio); 1122 update_head_pos(i, r10_bio);
1121 d = r10_bio->devs[i].devnum; 1123 d = r10_bio->devs[i].devnum;
1122 if (!uptodate) 1124
1125 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
1126 set_bit(R10BIO_Uptodate, &r10_bio->state);
1127 else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
1123 md_error(r10_bio->mddev, 1128 md_error(r10_bio->mddev,
1124 conf->mirrors[d].rdev); 1129 conf->mirrors[d].rdev);
1125 1130
@@ -1209,25 +1214,30 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1209 fbio = r10_bio->devs[i].bio; 1214 fbio = r10_bio->devs[i].bio;
1210 1215
1211 /* now find blocks with errors */ 1216 /* now find blocks with errors */
1212 for (i=first+1 ; i < conf->copies ; i++) { 1217 for (i=0 ; i < conf->copies ; i++) {
1213 int vcnt, j, d; 1218 int j, d;
1219 int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
1214 1220
1215 if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
1216 continue;
1217 /* We know that the bi_io_vec layout is the same for
1218 * both 'first' and 'i', so we just compare them.
1219 * All vec entries are PAGE_SIZE;
1220 */
1221 tbio = r10_bio->devs[i].bio; 1221 tbio = r10_bio->devs[i].bio;
1222 vcnt = r10_bio->sectors >> (PAGE_SHIFT-9); 1222
1223 for (j = 0; j < vcnt; j++) 1223 if (tbio->bi_end_io != end_sync_read)
1224 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page), 1224 continue;
1225 page_address(tbio->bi_io_vec[j].bv_page), 1225 if (i == first)
1226 PAGE_SIZE))
1227 break;
1228 if (j == vcnt)
1229 continue; 1226 continue;
1230 mddev->resync_mismatches += r10_bio->sectors; 1227 if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
1228 /* We know that the bi_io_vec layout is the same for
1229 * both 'first' and 'i', so we just compare them.
1230 * All vec entries are PAGE_SIZE;
1231 */
1232 for (j = 0; j < vcnt; j++)
1233 if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
1234 page_address(tbio->bi_io_vec[j].bv_page),
1235 PAGE_SIZE))
1236 break;
1237 if (j == vcnt)
1238 continue;
1239 mddev->resync_mismatches += r10_bio->sectors;
1240 }
1231 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) 1241 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
1232 /* Don't fix anything. */ 1242 /* Don't fix anything. */
1233 continue; 1243 continue;
@@ -1308,7 +1318,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1308 1318
1309 atomic_inc(&conf->mirrors[d].rdev->nr_pending); 1319 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
1310 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); 1320 md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
1311 generic_make_request(wbio); 1321 if (test_bit(R10BIO_Uptodate, &r10_bio->state))
1322 generic_make_request(wbio);
1323 else
1324 bio_endio(wbio, wbio->bi_size, -EIO);
1312} 1325}
1313 1326
1314 1327
@@ -1445,7 +1458,8 @@ static void raid10d(mddev_t *mddev)
1445 unfreeze_array(conf); 1458 unfreeze_array(conf);
1446 1459
1447 bio = r10_bio->devs[r10_bio->read_slot].bio; 1460 bio = r10_bio->devs[r10_bio->read_slot].bio;
1448 r10_bio->devs[r10_bio->read_slot].bio = NULL; 1461 r10_bio->devs[r10_bio->read_slot].bio =
1462 mddev->ro ? IO_BLOCKED : NULL;
1449 bio_put(bio); 1463 bio_put(bio);
1450 mirror = read_balance(conf, r10_bio); 1464 mirror = read_balance(conf, r10_bio);
1451 if (mirror == -1) { 1465 if (mirror == -1) {
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index dfa528385e3f..b1103298a8c2 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -104,6 +104,13 @@ struct r10bio_s {
104 } devs[0]; 104 } devs[0];
105}; 105};
106 106
107/* when we get a read error on a read-only array, we redirect to another
108 * device without failing the first device, or trying to over-write to
109 * correct the read error. To keep track of bad blocks on a per-bio
110 * level, we store IO_BLOCKED in the appropriate 'bios' pointer
111 */
112#define IO_BLOCKED ((struct bio*)1)
113
107/* bits for r10bio.state */ 114/* bits for r10bio.state */
108#define R10BIO_Uptodate 0 115#define R10BIO_Uptodate 0
109#define R10BIO_IsSync 1 116#define R10BIO_IsSync 1