aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:32:41 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:32:41 -0400
commitcd5ff9a16f0831f68c1024d5d776075b5123b034 (patch)
tree490e297bde3d0c52b55d38b38ed1cdb5b5b6f43e /drivers/md/raid1.c
parent2ca68f5ed7383733102ee53cd8fa4021ecc3b275 (diff)
md/raid1: Handle write errors by updating badblock log.
When we get a write error (in the data area, not in metadata), update the badblock log rather than failing the whole device. As the write may well be many blocks, we trying writing each block individually and only log the ones which fail. Signed-off-by: NeilBrown <neilb@suse.de> Reviewed-by: Namhyung Kim <namhyung@gmail.com>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c168
1 files changed, 145 insertions, 23 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b16d2ee5e9dd..b45d641f4f33 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -318,25 +318,34 @@ static void raid1_end_read_request(struct bio *bio, int error)
318 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); 318 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
319} 319}
320 320
321static void close_write(r1bio_t *r1_bio)
322{
323 /* it really is the end of this request */
324 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
325 /* free extra copy of the data pages */
326 int i = r1_bio->behind_page_count;
327 while (i--)
328 safe_put_page(r1_bio->behind_bvecs[i].bv_page);
329 kfree(r1_bio->behind_bvecs);
330 r1_bio->behind_bvecs = NULL;
331 }
332 /* clear the bitmap if all writes complete successfully */
333 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
334 r1_bio->sectors,
335 !test_bit(R1BIO_Degraded, &r1_bio->state),
336 test_bit(R1BIO_BehindIO, &r1_bio->state));
337 md_write_end(r1_bio->mddev);
338}
339
321static void r1_bio_write_done(r1bio_t *r1_bio) 340static void r1_bio_write_done(r1bio_t *r1_bio)
322{ 341{
323 if (atomic_dec_and_test(&r1_bio->remaining)) 342 if (!atomic_dec_and_test(&r1_bio->remaining))
324 { 343 return;
325 /* it really is the end of this request */ 344
326 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { 345 if (test_bit(R1BIO_WriteError, &r1_bio->state))
327 /* free extra copy of the data pages */ 346 reschedule_retry(r1_bio);
328 int i = r1_bio->behind_page_count; 347 else {
329 while (i--) 348 close_write(r1_bio);
330 safe_put_page(r1_bio->behind_bvecs[i].bv_page);
331 kfree(r1_bio->behind_bvecs);
332 r1_bio->behind_bvecs = NULL;
333 }
334 /* clear the bitmap if all writes complete successfully */
335 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
336 r1_bio->sectors,
337 !test_bit(R1BIO_Degraded, &r1_bio->state),
338 test_bit(R1BIO_BehindIO, &r1_bio->state));
339 md_write_end(r1_bio->mddev);
340 if (test_bit(R1BIO_MadeGood, &r1_bio->state)) 349 if (test_bit(R1BIO_MadeGood, &r1_bio->state))
341 reschedule_retry(r1_bio); 350 reschedule_retry(r1_bio);
342 else 351 else
@@ -360,12 +369,10 @@ static void raid1_end_write_request(struct bio *bio, int error)
360 /* 369 /*
361 * 'one mirror IO has finished' event handler: 370 * 'one mirror IO has finished' event handler:
362 */ 371 */
363 r1_bio->bios[mirror] = NULL;
364 to_put = bio;
365 if (!uptodate) { 372 if (!uptodate) {
366 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); 373 set_bit(WriteErrorSeen,
367 /* an I/O failed, we can't clear the bitmap */ 374 &conf->mirrors[mirror].rdev->flags);
368 set_bit(R1BIO_Degraded, &r1_bio->state); 375 set_bit(R1BIO_WriteError, &r1_bio->state);
369 } else { 376 } else {
370 /* 377 /*
371 * Set R1BIO_Uptodate in our master bio, so that we 378 * Set R1BIO_Uptodate in our master bio, so that we
@@ -380,6 +387,8 @@ static void raid1_end_write_request(struct bio *bio, int error)
380 sector_t first_bad; 387 sector_t first_bad;
381 int bad_sectors; 388 int bad_sectors;
382 389
390 r1_bio->bios[mirror] = NULL;
391 to_put = bio;
383 set_bit(R1BIO_Uptodate, &r1_bio->state); 392 set_bit(R1BIO_Uptodate, &r1_bio->state);
384 393
385 /* Maybe we can clear some bad blocks. */ 394 /* Maybe we can clear some bad blocks. */
@@ -1724,6 +1733,101 @@ static void fix_read_error(conf_t *conf, int read_disk,
1724 } 1733 }
1725} 1734}
1726 1735
1736static void bi_complete(struct bio *bio, int error)
1737{
1738 complete((struct completion *)bio->bi_private);
1739}
1740
1741static int submit_bio_wait(int rw, struct bio *bio)
1742{
1743 struct completion event;
1744 rw |= REQ_SYNC;
1745
1746 init_completion(&event);
1747 bio->bi_private = &event;
1748 bio->bi_end_io = bi_complete;
1749 submit_bio(rw, bio);
1750 wait_for_completion(&event);
1751
1752 return test_bit(BIO_UPTODATE, &bio->bi_flags);
1753}
1754
1755static int narrow_write_error(r1bio_t *r1_bio, int i)
1756{
1757 mddev_t *mddev = r1_bio->mddev;
1758 conf_t *conf = mddev->private;
1759 mdk_rdev_t *rdev = conf->mirrors[i].rdev;
1760 int vcnt, idx;
1761 struct bio_vec *vec;
1762
1763 /* bio has the data to be written to device 'i' where
1764 * we just recently had a write error.
1765 * We repeatedly clone the bio and trim down to one block,
1766 * then try the write. Where the write fails we record
1767 * a bad block.
1768 * It is conceivable that the bio doesn't exactly align with
1769 * blocks. We must handle this somehow.
1770 *
1771 * We currently own a reference on the rdev.
1772 */
1773
1774 int block_sectors;
1775 sector_t sector;
1776 int sectors;
1777 int sect_to_write = r1_bio->sectors;
1778 int ok = 1;
1779
1780 if (rdev->badblocks.shift < 0)
1781 return 0;
1782
1783 block_sectors = 1 << rdev->badblocks.shift;
1784 sector = r1_bio->sector;
1785 sectors = ((sector + block_sectors)
1786 & ~(sector_t)(block_sectors - 1))
1787 - sector;
1788
1789 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
1790 vcnt = r1_bio->behind_page_count;
1791 vec = r1_bio->behind_bvecs;
1792 idx = 0;
1793 while (vec[idx].bv_page == NULL)
1794 idx++;
1795 } else {
1796 vcnt = r1_bio->master_bio->bi_vcnt;
1797 vec = r1_bio->master_bio->bi_io_vec;
1798 idx = r1_bio->master_bio->bi_idx;
1799 }
1800 while (sect_to_write) {
1801 struct bio *wbio;
1802 if (sectors > sect_to_write)
1803 sectors = sect_to_write;
1804 /* Write at 'sector' for 'sectors'*/
1805
1806 wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
1807 memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
1808 wbio->bi_sector = r1_bio->sector;
1809 wbio->bi_rw = WRITE;
1810 wbio->bi_vcnt = vcnt;
1811 wbio->bi_size = r1_bio->sectors << 9;
1812 wbio->bi_idx = idx;
1813
1814 md_trim_bio(wbio, sector - r1_bio->sector, sectors);
1815 wbio->bi_sector += rdev->data_offset;
1816 wbio->bi_bdev = rdev->bdev;
1817 if (submit_bio_wait(WRITE, wbio) == 0)
1818 /* failure! */
1819 ok = rdev_set_badblocks(rdev, sector,
1820 sectors, 0)
1821 && ok;
1822
1823 bio_put(wbio);
1824 sect_to_write -= sectors;
1825 sector += sectors;
1826 sectors = block_sectors;
1827 }
1828 return ok;
1829}
1830
1727static void raid1d(mddev_t *mddev) 1831static void raid1d(mddev_t *mddev)
1728{ 1832{
1729 r1bio_t *r1_bio; 1833 r1bio_t *r1_bio;
@@ -1775,7 +1879,8 @@ static void raid1d(mddev_t *mddev)
1775 md_done_sync(mddev, s, 1); 1879 md_done_sync(mddev, s, 1);
1776 } else 1880 } else
1777 sync_request_write(mddev, r1_bio); 1881 sync_request_write(mddev, r1_bio);
1778 } else if (test_bit(R1BIO_MadeGood, &r1_bio->state)) { 1882 } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
1883 test_bit(R1BIO_WriteError, &r1_bio->state)) {
1779 int m; 1884 int m;
1780 for (m = 0; m < conf->raid_disks ; m++) 1885 for (m = 0; m < conf->raid_disks ; m++)
1781 if (r1_bio->bios[m] == IO_MADE_GOOD) { 1886 if (r1_bio->bios[m] == IO_MADE_GOOD) {
@@ -1785,7 +1890,24 @@ static void raid1d(mddev_t *mddev)
1785 r1_bio->sector, 1890 r1_bio->sector,
1786 r1_bio->sectors); 1891 r1_bio->sectors);
1787 rdev_dec_pending(rdev, mddev); 1892 rdev_dec_pending(rdev, mddev);
1893 } else if (r1_bio->bios[m] != NULL) {
1894 /* This drive got a write error. We
1895 * need to narrow down and record
1896 * precise write errors.
1897 */
1898 if (!narrow_write_error(r1_bio, m)) {
1899 md_error(mddev,
1900 conf->mirrors[m].rdev);
1901 /* an I/O failed, we can't clear
1902 * the bitmap */
1903 set_bit(R1BIO_Degraded,
1904 &r1_bio->state);
1905 }
1906 rdev_dec_pending(conf->mirrors[m].rdev,
1907 mddev);
1788 } 1908 }
1909 if (test_bit(R1BIO_WriteError, &r1_bio->state))
1910 close_write(r1_bio);
1789 raid_end_bio_io(r1_bio); 1911 raid_end_bio_io(r1_bio);
1790 } else if (test_bit(R1BIO_ReadError, &r1_bio->state)) { 1912 } else if (test_bit(R1BIO_ReadError, &r1_bio->state)) {
1791 int disk; 1913 int disk;