diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:24 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:24 -0400 |
commit | 749c55e942d91cb27045fe2eb313aa5afe68ae0b (patch) | |
tree | ddf80a1eb3ef9005bc209c1c4946916b89c22a33 /drivers/md | |
parent | d4432c23be957ff061f7b23fd60e8506cb472a55 (diff) |
md/raid10: clear bad-block record when write succeeds.
If we succeed in writing to a block that was recorded as
being bad, we clear the bad-block record.
This requires some delayed handling as the bad-block-list update has
to happen in process-context.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid10.c | 101 | ||||
-rw-r--r-- | drivers/md/raid10.h | 11 |
2 files changed, 100 insertions, 12 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 13077a3fd7d2..39b2058845f5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -181,7 +181,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) | |||
181 | 181 | ||
182 | for (i = 0; i < conf->copies; i++) { | 182 | for (i = 0; i < conf->copies; i++) { |
183 | struct bio **bio = & r10_bio->devs[i].bio; | 183 | struct bio **bio = & r10_bio->devs[i].bio; |
184 | if (*bio && *bio != IO_BLOCKED) | 184 | if (!BIO_SPECIAL(*bio)) |
185 | bio_put(*bio); | 185 | bio_put(*bio); |
186 | *bio = NULL; | 186 | *bio = NULL; |
187 | } | 187 | } |
@@ -267,7 +267,8 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio) | |||
267 | /* | 267 | /* |
268 | * Find the disk number which triggered given bio | 268 | * Find the disk number which triggered given bio |
269 | */ | 269 | */ |
270 | static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, struct bio *bio) | 270 | static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, |
271 | struct bio *bio, int *slotp) | ||
271 | { | 272 | { |
272 | int slot; | 273 | int slot; |
273 | 274 | ||
@@ -278,6 +279,8 @@ static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, struct bio *bio) | |||
278 | BUG_ON(slot == conf->copies); | 279 | BUG_ON(slot == conf->copies); |
279 | update_head_pos(slot, r10_bio); | 280 | update_head_pos(slot, r10_bio); |
280 | 281 | ||
282 | if (slotp) | ||
283 | *slotp = slot; | ||
281 | return r10_bio->devs[slot].devnum; | 284 | return r10_bio->devs[slot].devnum; |
282 | } | 285 | } |
283 | 286 | ||
@@ -329,9 +332,11 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
329 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 332 | int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
330 | r10bio_t *r10_bio = bio->bi_private; | 333 | r10bio_t *r10_bio = bio->bi_private; |
331 | int dev; | 334 | int dev; |
335 | int dec_rdev = 1; | ||
332 | conf_t *conf = r10_bio->mddev->private; | 336 | conf_t *conf = r10_bio->mddev->private; |
337 | int slot; | ||
333 | 338 | ||
334 | dev = find_bio_disk(conf, r10_bio, bio); | 339 | dev = find_bio_disk(conf, r10_bio, bio, &slot); |
335 | 340 | ||
336 | /* | 341 | /* |
337 | * this branch is our 'one mirror IO has finished' event handler: | 342 | * this branch is our 'one mirror IO has finished' event handler: |
@@ -340,7 +345,7 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
340 | md_error(r10_bio->mddev, conf->mirrors[dev].rdev); | 345 | md_error(r10_bio->mddev, conf->mirrors[dev].rdev); |
341 | /* an I/O failed, we can't clear the bitmap */ | 346 | /* an I/O failed, we can't clear the bitmap */ |
342 | set_bit(R10BIO_Degraded, &r10_bio->state); | 347 | set_bit(R10BIO_Degraded, &r10_bio->state); |
343 | } else | 348 | } else { |
344 | /* | 349 | /* |
345 | * Set R10BIO_Uptodate in our master bio, so that | 350 | * Set R10BIO_Uptodate in our master bio, so that |
346 | * we will return a good error code for to the higher | 351 | * we will return a good error code for to the higher |
@@ -350,8 +355,23 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
350 | * user-side. So if something waits for IO, then it will | 355 | * user-side. So if something waits for IO, then it will |
351 | * wait for the 'master' bio. | 356 | * wait for the 'master' bio. |
352 | */ | 357 | */ |
358 | sector_t first_bad; | ||
359 | int bad_sectors; | ||
360 | |||
353 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 361 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
354 | 362 | ||
363 | /* Maybe we can clear some bad blocks. */ | ||
364 | if (is_badblock(conf->mirrors[dev].rdev, | ||
365 | r10_bio->devs[slot].addr, | ||
366 | r10_bio->sectors, | ||
367 | &first_bad, &bad_sectors)) { | ||
368 | bio_put(bio); | ||
369 | r10_bio->devs[slot].bio = IO_MADE_GOOD; | ||
370 | dec_rdev = 0; | ||
371 | set_bit(R10BIO_MadeGood, &r10_bio->state); | ||
372 | } | ||
373 | } | ||
374 | |||
355 | /* | 375 | /* |
356 | * | 376 | * |
357 | * Let's see if all mirrored write operations have finished | 377 | * Let's see if all mirrored write operations have finished |
@@ -364,10 +384,13 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
364 | !test_bit(R10BIO_Degraded, &r10_bio->state), | 384 | !test_bit(R10BIO_Degraded, &r10_bio->state), |
365 | 0); | 385 | 0); |
366 | md_write_end(r10_bio->mddev); | 386 | md_write_end(r10_bio->mddev); |
367 | raid_end_bio_io(r10_bio); | 387 | if (test_bit(R10BIO_MadeGood, &r10_bio->state)) |
388 | reschedule_retry(r10_bio); | ||
389 | else | ||
390 | raid_end_bio_io(r10_bio); | ||
368 | } | 391 | } |
369 | 392 | if (dec_rdev) | |
370 | rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); | 393 | rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); |
371 | } | 394 | } |
372 | 395 | ||
373 | 396 | ||
@@ -1382,7 +1405,7 @@ static void end_sync_read(struct bio *bio, int error) | |||
1382 | conf_t *conf = r10_bio->mddev->private; | 1405 | conf_t *conf = r10_bio->mddev->private; |
1383 | int d; | 1406 | int d; |
1384 | 1407 | ||
1385 | d = find_bio_disk(conf, r10_bio, bio); | 1408 | d = find_bio_disk(conf, r10_bio, bio, NULL); |
1386 | 1409 | ||
1387 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 1410 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
1388 | set_bit(R10BIO_Uptodate, &r10_bio->state); | 1411 | set_bit(R10BIO_Uptodate, &r10_bio->state); |
@@ -1414,23 +1437,37 @@ static void end_sync_write(struct bio *bio, int error) | |||
1414 | mddev_t *mddev = r10_bio->mddev; | 1437 | mddev_t *mddev = r10_bio->mddev; |
1415 | conf_t *conf = mddev->private; | 1438 | conf_t *conf = mddev->private; |
1416 | int d; | 1439 | int d; |
1440 | sector_t first_bad; | ||
1441 | int bad_sectors; | ||
1442 | int slot; | ||
1417 | 1443 | ||
1418 | d = find_bio_disk(conf, r10_bio, bio); | 1444 | d = find_bio_disk(conf, r10_bio, bio, &slot); |
1419 | 1445 | ||
1420 | if (!uptodate) | 1446 | if (!uptodate) |
1421 | md_error(mddev, conf->mirrors[d].rdev); | 1447 | md_error(mddev, conf->mirrors[d].rdev); |
1448 | else if (is_badblock(conf->mirrors[d].rdev, | ||
1449 | r10_bio->devs[slot].addr, | ||
1450 | r10_bio->sectors, | ||
1451 | &first_bad, &bad_sectors)) | ||
1452 | set_bit(R10BIO_MadeGood, &r10_bio->state); | ||
1422 | 1453 | ||
1423 | rdev_dec_pending(conf->mirrors[d].rdev, mddev); | 1454 | rdev_dec_pending(conf->mirrors[d].rdev, mddev); |
1424 | while (atomic_dec_and_test(&r10_bio->remaining)) { | 1455 | while (atomic_dec_and_test(&r10_bio->remaining)) { |
1425 | if (r10_bio->master_bio == NULL) { | 1456 | if (r10_bio->master_bio == NULL) { |
1426 | /* the primary of several recovery bios */ | 1457 | /* the primary of several recovery bios */ |
1427 | sector_t s = r10_bio->sectors; | 1458 | sector_t s = r10_bio->sectors; |
1428 | put_buf(r10_bio); | 1459 | if (test_bit(R10BIO_MadeGood, &r10_bio->state)) |
1460 | reschedule_retry(r10_bio); | ||
1461 | else | ||
1462 | put_buf(r10_bio); | ||
1429 | md_done_sync(mddev, s, 1); | 1463 | md_done_sync(mddev, s, 1); |
1430 | break; | 1464 | break; |
1431 | } else { | 1465 | } else { |
1432 | r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; | 1466 | r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; |
1433 | put_buf(r10_bio); | 1467 | if (test_bit(R10BIO_MadeGood, &r10_bio->state)) |
1468 | reschedule_retry(r10_bio); | ||
1469 | else | ||
1470 | put_buf(r10_bio); | ||
1434 | r10_bio = r10_bio2; | 1471 | r10_bio = r10_bio2; |
1435 | } | 1472 | } |
1436 | } | 1473 | } |
@@ -1901,6 +1938,44 @@ read_more: | |||
1901 | generic_make_request(bio); | 1938 | generic_make_request(bio); |
1902 | } | 1939 | } |
1903 | 1940 | ||
1941 | static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio) | ||
1942 | { | ||
1943 | /* Some sort of write request has finished and it | ||
1944 | * succeeded in writing where we thought there was a | ||
1945 | * bad block. So forget the bad block. | ||
1946 | */ | ||
1947 | int m; | ||
1948 | mdk_rdev_t *rdev; | ||
1949 | |||
1950 | if (test_bit(R10BIO_IsSync, &r10_bio->state) || | ||
1951 | test_bit(R10BIO_IsRecover, &r10_bio->state)) { | ||
1952 | for (m = 0; m < conf->copies; m++) | ||
1953 | if (r10_bio->devs[m].bio && | ||
1954 | test_bit(BIO_UPTODATE, | ||
1955 | &r10_bio->devs[m].bio->bi_flags)) { | ||
1956 | int dev = r10_bio->devs[m].devnum; | ||
1957 | rdev = conf->mirrors[dev].rdev; | ||
1958 | rdev_clear_badblocks( | ||
1959 | rdev, | ||
1960 | r10_bio->devs[m].addr, | ||
1961 | r10_bio->sectors); | ||
1962 | } | ||
1963 | put_buf(r10_bio); | ||
1964 | } else { | ||
1965 | for (m = 0; m < conf->copies; m++) | ||
1966 | if (r10_bio->devs[m].bio == IO_MADE_GOOD) { | ||
1967 | int dev = r10_bio->devs[m].devnum; | ||
1968 | rdev = conf->mirrors[dev].rdev; | ||
1969 | rdev_clear_badblocks( | ||
1970 | rdev, | ||
1971 | r10_bio->devs[m].addr, | ||
1972 | r10_bio->sectors); | ||
1973 | rdev_dec_pending(rdev, conf->mddev); | ||
1974 | } | ||
1975 | raid_end_bio_io(r10_bio); | ||
1976 | } | ||
1977 | } | ||
1978 | |||
1904 | static void raid10d(mddev_t *mddev) | 1979 | static void raid10d(mddev_t *mddev) |
1905 | { | 1980 | { |
1906 | r10bio_t *r10_bio; | 1981 | r10bio_t *r10_bio; |
@@ -1928,7 +2003,9 @@ static void raid10d(mddev_t *mddev) | |||
1928 | 2003 | ||
1929 | mddev = r10_bio->mddev; | 2004 | mddev = r10_bio->mddev; |
1930 | conf = mddev->private; | 2005 | conf = mddev->private; |
1931 | if (test_bit(R10BIO_IsSync, &r10_bio->state)) | 2006 | if (test_bit(R10BIO_MadeGood, &r10_bio->state)) |
2007 | handle_write_completed(conf, r10_bio); | ||
2008 | else if (test_bit(R10BIO_IsSync, &r10_bio->state)) | ||
1932 | sync_request_write(mddev, r10_bio); | 2009 | sync_request_write(mddev, r10_bio); |
1933 | else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) | 2010 | else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) |
1934 | recovery_request_write(mddev, r10_bio); | 2011 | recovery_request_write(mddev, r10_bio); |
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index c646152ba4e4..d8b7f9af92d5 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -118,6 +118,13 @@ struct r10bio_s { | |||
118 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | 118 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer |
119 | */ | 119 | */ |
120 | #define IO_BLOCKED ((struct bio*)1) | 120 | #define IO_BLOCKED ((struct bio*)1) |
121 | /* When we successfully write to a known bad-block, we need to remove the | ||
122 | * bad-block marking which must be done from process context. So we record | ||
123 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
124 | */ | ||
125 | #define IO_MADE_GOOD ((struct bio *)2) | ||
126 | |||
127 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
121 | 128 | ||
122 | /* bits for r10bio.state */ | 129 | /* bits for r10bio.state */ |
123 | #define R10BIO_Uptodate 0 | 130 | #define R10BIO_Uptodate 0 |
@@ -128,4 +135,8 @@ struct r10bio_s { | |||
128 | * so that raid10d knows what to do with them. | 135 | * so that raid10d knows what to do with them. |
129 | */ | 136 | */ |
130 | #define R10BIO_ReadError 4 | 137 | #define R10BIO_ReadError 4 |
138 | /* If a write for this request means we can clear some | ||
139 | * known-bad-block records, we set this flag. | ||
140 | */ | ||
141 | #define R10BIO_MadeGood 5 | ||
131 | #endif | 142 | #endif |