diff options
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 168 |
1 files changed, 145 insertions, 23 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b16d2ee5e9dd..b45d641f4f33 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -318,25 +318,34 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
318 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | 318 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); |
319 | } | 319 | } |
320 | 320 | ||
321 | static void close_write(r1bio_t *r1_bio) | ||
322 | { | ||
323 | /* it really is the end of this request */ | ||
324 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | ||
325 | /* free extra copy of the data pages */ | ||
326 | int i = r1_bio->behind_page_count; | ||
327 | while (i--) | ||
328 | safe_put_page(r1_bio->behind_bvecs[i].bv_page); | ||
329 | kfree(r1_bio->behind_bvecs); | ||
330 | r1_bio->behind_bvecs = NULL; | ||
331 | } | ||
332 | /* clear the bitmap if all writes complete successfully */ | ||
333 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
334 | r1_bio->sectors, | ||
335 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
336 | test_bit(R1BIO_BehindIO, &r1_bio->state)); | ||
337 | md_write_end(r1_bio->mddev); | ||
338 | } | ||
339 | |||
321 | static void r1_bio_write_done(r1bio_t *r1_bio) | 340 | static void r1_bio_write_done(r1bio_t *r1_bio) |
322 | { | 341 | { |
323 | if (atomic_dec_and_test(&r1_bio->remaining)) | 342 | if (!atomic_dec_and_test(&r1_bio->remaining)) |
324 | { | 343 | return; |
325 | /* it really is the end of this request */ | 344 | |
326 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 345 | if (test_bit(R1BIO_WriteError, &r1_bio->state)) |
327 | /* free extra copy of the data pages */ | 346 | reschedule_retry(r1_bio); |
328 | int i = r1_bio->behind_page_count; | 347 | else { |
329 | while (i--) | 348 | close_write(r1_bio); |
330 | safe_put_page(r1_bio->behind_bvecs[i].bv_page); | ||
331 | kfree(r1_bio->behind_bvecs); | ||
332 | r1_bio->behind_bvecs = NULL; | ||
333 | } | ||
334 | /* clear the bitmap if all writes complete successfully */ | ||
335 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
336 | r1_bio->sectors, | ||
337 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
338 | test_bit(R1BIO_BehindIO, &r1_bio->state)); | ||
339 | md_write_end(r1_bio->mddev); | ||
340 | if (test_bit(R1BIO_MadeGood, &r1_bio->state)) | 349 | if (test_bit(R1BIO_MadeGood, &r1_bio->state)) |
341 | reschedule_retry(r1_bio); | 350 | reschedule_retry(r1_bio); |
342 | else | 351 | else |
@@ -360,12 +369,10 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
360 | /* | 369 | /* |
361 | * 'one mirror IO has finished' event handler: | 370 | * 'one mirror IO has finished' event handler: |
362 | */ | 371 | */ |
363 | r1_bio->bios[mirror] = NULL; | ||
364 | to_put = bio; | ||
365 | if (!uptodate) { | 372 | if (!uptodate) { |
366 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 373 | set_bit(WriteErrorSeen, |
367 | /* an I/O failed, we can't clear the bitmap */ | 374 | &conf->mirrors[mirror].rdev->flags); |
368 | set_bit(R1BIO_Degraded, &r1_bio->state); | 375 | set_bit(R1BIO_WriteError, &r1_bio->state); |
369 | } else { | 376 | } else { |
370 | /* | 377 | /* |
371 | * Set R1BIO_Uptodate in our master bio, so that we | 378 | * Set R1BIO_Uptodate in our master bio, so that we |
@@ -380,6 +387,8 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
380 | sector_t first_bad; | 387 | sector_t first_bad; |
381 | int bad_sectors; | 388 | int bad_sectors; |
382 | 389 | ||
390 | r1_bio->bios[mirror] = NULL; | ||
391 | to_put = bio; | ||
383 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 392 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
384 | 393 | ||
385 | /* Maybe we can clear some bad blocks. */ | 394 | /* Maybe we can clear some bad blocks. */ |
@@ -1724,6 +1733,101 @@ static void fix_read_error(conf_t *conf, int read_disk, | |||
1724 | } | 1733 | } |
1725 | } | 1734 | } |
1726 | 1735 | ||
1736 | static void bi_complete(struct bio *bio, int error) | ||
1737 | { | ||
1738 | complete((struct completion *)bio->bi_private); | ||
1739 | } | ||
1740 | |||
1741 | static int submit_bio_wait(int rw, struct bio *bio) | ||
1742 | { | ||
1743 | struct completion event; | ||
1744 | rw |= REQ_SYNC; | ||
1745 | |||
1746 | init_completion(&event); | ||
1747 | bio->bi_private = &event; | ||
1748 | bio->bi_end_io = bi_complete; | ||
1749 | submit_bio(rw, bio); | ||
1750 | wait_for_completion(&event); | ||
1751 | |||
1752 | return test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
1753 | } | ||
1754 | |||
1755 | static int narrow_write_error(r1bio_t *r1_bio, int i) | ||
1756 | { | ||
1757 | mddev_t *mddev = r1_bio->mddev; | ||
1758 | conf_t *conf = mddev->private; | ||
1759 | mdk_rdev_t *rdev = conf->mirrors[i].rdev; | ||
1760 | int vcnt, idx; | ||
1761 | struct bio_vec *vec; | ||
1762 | |||
1763 | /* bio has the data to be written to device 'i' where | ||
1764 | * we just recently had a write error. | ||
1765 | * We repeatedly clone the bio and trim down to one block, | ||
1766 | * then try the write. Where the write fails we record | ||
1767 | * a bad block. | ||
1768 | * It is conceivable that the bio doesn't exactly align with | ||
1769 | * blocks. We must handle this somehow. | ||
1770 | * | ||
1771 | * We currently own a reference on the rdev. | ||
1772 | */ | ||
1773 | |||
1774 | int block_sectors; | ||
1775 | sector_t sector; | ||
1776 | int sectors; | ||
1777 | int sect_to_write = r1_bio->sectors; | ||
1778 | int ok = 1; | ||
1779 | |||
1780 | if (rdev->badblocks.shift < 0) | ||
1781 | return 0; | ||
1782 | |||
1783 | block_sectors = 1 << rdev->badblocks.shift; | ||
1784 | sector = r1_bio->sector; | ||
1785 | sectors = ((sector + block_sectors) | ||
1786 | & ~(sector_t)(block_sectors - 1)) | ||
1787 | - sector; | ||
1788 | |||
1789 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | ||
1790 | vcnt = r1_bio->behind_page_count; | ||
1791 | vec = r1_bio->behind_bvecs; | ||
1792 | idx = 0; | ||
1793 | while (vec[idx].bv_page == NULL) | ||
1794 | idx++; | ||
1795 | } else { | ||
1796 | vcnt = r1_bio->master_bio->bi_vcnt; | ||
1797 | vec = r1_bio->master_bio->bi_io_vec; | ||
1798 | idx = r1_bio->master_bio->bi_idx; | ||
1799 | } | ||
1800 | while (sect_to_write) { | ||
1801 | struct bio *wbio; | ||
1802 | if (sectors > sect_to_write) | ||
1803 | sectors = sect_to_write; | ||
1804 | /* Write at 'sector' for 'sectors'*/ | ||
1805 | |||
1806 | wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); | ||
1807 | memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); | ||
1808 | wbio->bi_sector = r1_bio->sector; | ||
1809 | wbio->bi_rw = WRITE; | ||
1810 | wbio->bi_vcnt = vcnt; | ||
1811 | wbio->bi_size = r1_bio->sectors << 9; | ||
1812 | wbio->bi_idx = idx; | ||
1813 | |||
1814 | md_trim_bio(wbio, sector - r1_bio->sector, sectors); | ||
1815 | wbio->bi_sector += rdev->data_offset; | ||
1816 | wbio->bi_bdev = rdev->bdev; | ||
1817 | if (submit_bio_wait(WRITE, wbio) == 0) | ||
1818 | /* failure! */ | ||
1819 | ok = rdev_set_badblocks(rdev, sector, | ||
1820 | sectors, 0) | ||
1821 | && ok; | ||
1822 | |||
1823 | bio_put(wbio); | ||
1824 | sect_to_write -= sectors; | ||
1825 | sector += sectors; | ||
1826 | sectors = block_sectors; | ||
1827 | } | ||
1828 | return ok; | ||
1829 | } | ||
1830 | |||
1727 | static void raid1d(mddev_t *mddev) | 1831 | static void raid1d(mddev_t *mddev) |
1728 | { | 1832 | { |
1729 | r1bio_t *r1_bio; | 1833 | r1bio_t *r1_bio; |
@@ -1775,7 +1879,8 @@ static void raid1d(mddev_t *mddev) | |||
1775 | md_done_sync(mddev, s, 1); | 1879 | md_done_sync(mddev, s, 1); |
1776 | } else | 1880 | } else |
1777 | sync_request_write(mddev, r1_bio); | 1881 | sync_request_write(mddev, r1_bio); |
1778 | } else if (test_bit(R1BIO_MadeGood, &r1_bio->state)) { | 1882 | } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) || |
1883 | test_bit(R1BIO_WriteError, &r1_bio->state)) { | ||
1779 | int m; | 1884 | int m; |
1780 | for (m = 0; m < conf->raid_disks ; m++) | 1885 | for (m = 0; m < conf->raid_disks ; m++) |
1781 | if (r1_bio->bios[m] == IO_MADE_GOOD) { | 1886 | if (r1_bio->bios[m] == IO_MADE_GOOD) { |
@@ -1785,7 +1890,24 @@ static void raid1d(mddev_t *mddev) | |||
1785 | r1_bio->sector, | 1890 | r1_bio->sector, |
1786 | r1_bio->sectors); | 1891 | r1_bio->sectors); |
1787 | rdev_dec_pending(rdev, mddev); | 1892 | rdev_dec_pending(rdev, mddev); |
1893 | } else if (r1_bio->bios[m] != NULL) { | ||
1894 | /* This drive got a write error. We | ||
1895 | * need to narrow down and record | ||
1896 | * precise write errors. | ||
1897 | */ | ||
1898 | if (!narrow_write_error(r1_bio, m)) { | ||
1899 | md_error(mddev, | ||
1900 | conf->mirrors[m].rdev); | ||
1901 | /* an I/O failed, we can't clear | ||
1902 | * the bitmap */ | ||
1903 | set_bit(R1BIO_Degraded, | ||
1904 | &r1_bio->state); | ||
1905 | } | ||
1906 | rdev_dec_pending(conf->mirrors[m].rdev, | ||
1907 | mddev); | ||
1788 | } | 1908 | } |
1909 | if (test_bit(R1BIO_WriteError, &r1_bio->state)) | ||
1910 | close_write(r1_bio); | ||
1789 | raid_end_bio_io(r1_bio); | 1911 | raid_end_bio_io(r1_bio); |
1790 | } else if (test_bit(R1BIO_ReadError, &r1_bio->state)) { | 1912 | } else if (test_bit(R1BIO_ReadError, &r1_bio->state)) { |
1791 | int disk; | 1913 | int disk; |