aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:39:24 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:39:24 -0400
commitbd870a16c5946d86126f7203db3c73b71de0a1d8 (patch)
tree3492f61a441266c796b8a08ec30c06f7f6f27fbc
parent749c55e942d91cb27045fe2eb313aa5afe68ae0b (diff)
md/raid10: Handle write errors by updating badblock log.
When we get a write error (in the data area, not in metadata), update the badblock log rather than failing the whole device. As the write may well be many blocks, we trying writing each block individually and only log the ones which fail. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid10.c133
-rw-r--r--drivers/md/raid10.h1
2 files changed, 117 insertions, 17 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 39b2058845f5..909450414c67 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -327,6 +327,16 @@ static void raid10_end_read_request(struct bio *bio, int error)
327 } 327 }
328} 328}
329 329
330static void close_write(r10bio_t *r10_bio)
331{
332 /* clear the bitmap if all writes complete successfully */
333 bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
334 r10_bio->sectors,
335 !test_bit(R10BIO_Degraded, &r10_bio->state),
336 0);
337 md_write_end(r10_bio->mddev);
338}
339
330static void raid10_end_write_request(struct bio *bio, int error) 340static void raid10_end_write_request(struct bio *bio, int error)
331{ 341{
332 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 342 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -342,9 +352,9 @@ static void raid10_end_write_request(struct bio *bio, int error)
342 * this branch is our 'one mirror IO has finished' event handler: 352 * this branch is our 'one mirror IO has finished' event handler:
343 */ 353 */
344 if (!uptodate) { 354 if (!uptodate) {
345 md_error(r10_bio->mddev, conf->mirrors[dev].rdev); 355 set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags);
346 /* an I/O failed, we can't clear the bitmap */ 356 set_bit(R10BIO_WriteError, &r10_bio->state);
347 set_bit(R10BIO_Degraded, &r10_bio->state); 357 dec_rdev = 0;
348 } else { 358 } else {
349 /* 359 /*
350 * Set R10BIO_Uptodate in our master bio, so that 360 * Set R10BIO_Uptodate in our master bio, so that
@@ -378,16 +388,15 @@ static void raid10_end_write_request(struct bio *bio, int error)
378 * already. 388 * already.
379 */ 389 */
380 if (atomic_dec_and_test(&r10_bio->remaining)) { 390 if (atomic_dec_and_test(&r10_bio->remaining)) {
381 /* clear the bitmap if all writes complete successfully */ 391 if (test_bit(R10BIO_WriteError, &r10_bio->state))
382 bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
383 r10_bio->sectors,
384 !test_bit(R10BIO_Degraded, &r10_bio->state),
385 0);
386 md_write_end(r10_bio->mddev);
387 if (test_bit(R10BIO_MadeGood, &r10_bio->state))
388 reschedule_retry(r10_bio); 392 reschedule_retry(r10_bio);
389 else 393 else {
390 raid_end_bio_io(r10_bio); 394 close_write(r10_bio);
395 if (test_bit(R10BIO_MadeGood, &r10_bio->state))
396 reschedule_retry(r10_bio);
397 else
398 raid_end_bio_io(r10_bio);
399 }
391 } 400 }
392 if (dec_rdev) 401 if (dec_rdev)
393 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); 402 rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
@@ -1839,6 +1848,82 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1839 } 1848 }
1840} 1849}
1841 1850
1851static void bi_complete(struct bio *bio, int error)
1852{
1853 complete((struct completion *)bio->bi_private);
1854}
1855
1856static int submit_bio_wait(int rw, struct bio *bio)
1857{
1858 struct completion event;
1859 rw |= REQ_SYNC;
1860
1861 init_completion(&event);
1862 bio->bi_private = &event;
1863 bio->bi_end_io = bi_complete;
1864 submit_bio(rw, bio);
1865 wait_for_completion(&event);
1866
1867 return test_bit(BIO_UPTODATE, &bio->bi_flags);
1868}
1869
1870static int narrow_write_error(r10bio_t *r10_bio, int i)
1871{
1872 struct bio *bio = r10_bio->master_bio;
1873 mddev_t *mddev = r10_bio->mddev;
1874 conf_t *conf = mddev->private;
1875 mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
1876 /* bio has the data to be written to slot 'i' where
1877 * we just recently had a write error.
1878 * We repeatedly clone the bio and trim down to one block,
1879 * then try the write. Where the write fails we record
1880 * a bad block.
1881 * It is conceivable that the bio doesn't exactly align with
1882 * blocks. We must handle this.
1883 *
1884 * We currently own a reference to the rdev.
1885 */
1886
1887 int block_sectors;
1888 sector_t sector;
1889 int sectors;
1890 int sect_to_write = r10_bio->sectors;
1891 int ok = 1;
1892
1893 if (rdev->badblocks.shift < 0)
1894 return 0;
1895
1896 block_sectors = 1 << rdev->badblocks.shift;
1897 sector = r10_bio->sector;
1898 sectors = ((r10_bio->sector + block_sectors)
1899 & ~(sector_t)(block_sectors - 1))
1900 - sector;
1901
1902 while (sect_to_write) {
1903 struct bio *wbio;
1904 if (sectors > sect_to_write)
1905 sectors = sect_to_write;
1906 /* Write at 'sector' for 'sectors' */
1907 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
1908 md_trim_bio(wbio, sector - bio->bi_sector, sectors);
1909 wbio->bi_sector = (r10_bio->devs[i].addr+
1910 rdev->data_offset+
1911 (sector - r10_bio->sector));
1912 wbio->bi_bdev = rdev->bdev;
1913 if (submit_bio_wait(WRITE, wbio) == 0)
1914 /* Failure! */
1915 ok = rdev_set_badblocks(rdev, sector,
1916 sectors, 0)
1917 && ok;
1918
1919 bio_put(wbio);
1920 sect_to_write -= sectors;
1921 sector += sectors;
1922 sectors = block_sectors;
1923 }
1924 return ok;
1925}
1926
1842static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio) 1927static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio)
1843{ 1928{
1844 int slot = r10_bio->read_slot; 1929 int slot = r10_bio->read_slot;
@@ -1962,16 +2047,29 @@ static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio)
1962 } 2047 }
1963 put_buf(r10_bio); 2048 put_buf(r10_bio);
1964 } else { 2049 } else {
1965 for (m = 0; m < conf->copies; m++) 2050 for (m = 0; m < conf->copies; m++) {
1966 if (r10_bio->devs[m].bio == IO_MADE_GOOD) { 2051 int dev = r10_bio->devs[m].devnum;
1967 int dev = r10_bio->devs[m].devnum; 2052 struct bio *bio = r10_bio->devs[m].bio;
1968 rdev = conf->mirrors[dev].rdev; 2053 rdev = conf->mirrors[dev].rdev;
2054 if (bio == IO_MADE_GOOD) {
1969 rdev_clear_badblocks( 2055 rdev_clear_badblocks(
1970 rdev, 2056 rdev,
1971 r10_bio->devs[m].addr, 2057 r10_bio->devs[m].addr,
1972 r10_bio->sectors); 2058 r10_bio->sectors);
1973 rdev_dec_pending(rdev, conf->mddev); 2059 rdev_dec_pending(rdev, conf->mddev);
2060 } else if (bio != NULL &&
2061 !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
2062 if (!narrow_write_error(r10_bio, m)) {
2063 md_error(conf->mddev, rdev);
2064 set_bit(R10BIO_Degraded,
2065 &r10_bio->state);
2066 }
2067 rdev_dec_pending(rdev, conf->mddev);
1974 } 2068 }
2069 }
2070 if (test_bit(R10BIO_WriteError,
2071 &r10_bio->state))
2072 close_write(r10_bio);
1975 raid_end_bio_io(r10_bio); 2073 raid_end_bio_io(r10_bio);
1976 } 2074 }
1977} 2075}
@@ -2003,7 +2101,8 @@ static void raid10d(mddev_t *mddev)
2003 2101
2004 mddev = r10_bio->mddev; 2102 mddev = r10_bio->mddev;
2005 conf = mddev->private; 2103 conf = mddev->private;
2006 if (test_bit(R10BIO_MadeGood, &r10_bio->state)) 2104 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
2105 test_bit(R10BIO_WriteError, &r10_bio->state))
2007 handle_write_completed(conf, r10_bio); 2106 handle_write_completed(conf, r10_bio);
2008 else if (test_bit(R10BIO_IsSync, &r10_bio->state)) 2107 else if (test_bit(R10BIO_IsSync, &r10_bio->state))
2009 sync_request_write(mddev, r10_bio); 2108 sync_request_write(mddev, r10_bio);
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index d8b7f9af92d5..79cb52a0d4a2 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -139,4 +139,5 @@ struct r10bio_s {
139 * known-bad-block records, we set this flag. 139 * known-bad-block records, we set this flag.
140 */ 140 */
141#define R10BIO_MadeGood 5 141#define R10BIO_MadeGood 5
142#define R10BIO_WriteError 6
142#endif 143#endif