diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:25 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:25 -0400 |
commit | 58c54fcca3bac5bf9290cfed31c76e4c4bfbabaf (patch) | |
tree | 25f663873429468c3b582bc7544f983759b7592e /drivers | |
parent | 5e5702898e93eee7d69b6efde109609a89a61001 (diff) |
md/raid10: handle further errors during fix_read_error better.
If we find more read/write errors we should record a bad block before
failing the device.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/raid10.c | 59 |
1 files changed, 44 insertions, 15 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fc9ebbab3f62..8b29cd4f01c8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1749,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1749 | atomic_set(&rdev->read_errors, read_errors >> hours_since_last); | 1749 | atomic_set(&rdev->read_errors, read_errors >> hours_since_last); |
1750 | } | 1750 | } |
1751 | 1751 | ||
1752 | static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector, | ||
1753 | int sectors, struct page *page, int rw) | ||
1754 | { | ||
1755 | sector_t first_bad; | ||
1756 | int bad_sectors; | ||
1757 | |||
1758 | if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) | ||
1759 | && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) | ||
1760 | return -1; | ||
1761 | if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) | ||
1762 | /* success */ | ||
1763 | return 1; | ||
1764 | if (rw == WRITE) | ||
1765 | set_bit(WriteErrorSeen, &rdev->flags); | ||
1766 | /* need to record an error - either for the block or the device */ | ||
1767 | if (!rdev_set_badblocks(rdev, sector, sectors, 0)) | ||
1768 | md_error(rdev->mddev, rdev); | ||
1769 | return 0; | ||
1770 | } | ||
1771 | |||
1752 | /* | 1772 | /* |
1753 | * This is a kernel thread which: | 1773 | * This is a kernel thread which: |
1754 | * | 1774 | * |
@@ -1832,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1832 | rcu_read_unlock(); | 1852 | rcu_read_unlock(); |
1833 | 1853 | ||
1834 | if (!success) { | 1854 | if (!success) { |
1835 | /* Cannot read from anywhere -- bye bye array */ | 1855 | /* Cannot read from anywhere, just mark the block |
1856 | * as bad on the first device to discourage future | ||
1857 | * reads. | ||
1858 | */ | ||
1836 | int dn = r10_bio->devs[r10_bio->read_slot].devnum; | 1859 | int dn = r10_bio->devs[r10_bio->read_slot].devnum; |
1837 | md_error(mddev, conf->mirrors[dn].rdev); | 1860 | rdev = conf->mirrors[dn].rdev; |
1861 | |||
1862 | if (!rdev_set_badblocks( | ||
1863 | rdev, | ||
1864 | r10_bio->devs[r10_bio->read_slot].addr | ||
1865 | + sect, | ||
1866 | s, 0)) | ||
1867 | md_error(mddev, rdev); | ||
1838 | break; | 1868 | break; |
1839 | } | 1869 | } |
1840 | 1870 | ||
@@ -1855,10 +1885,10 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1855 | 1885 | ||
1856 | atomic_inc(&rdev->nr_pending); | 1886 | atomic_inc(&rdev->nr_pending); |
1857 | rcu_read_unlock(); | 1887 | rcu_read_unlock(); |
1858 | if (sync_page_io(rdev, | 1888 | if (r10_sync_page_io(rdev, |
1859 | r10_bio->devs[sl].addr + | 1889 | r10_bio->devs[sl].addr + |
1860 | sect, | 1890 | sect, |
1861 | s<<9, conf->tmppage, WRITE, false) | 1891 | s<<9, conf->tmppage, WRITE) |
1862 | == 0) { | 1892 | == 0) { |
1863 | /* Well, this device is dead */ | 1893 | /* Well, this device is dead */ |
1864 | printk(KERN_NOTICE | 1894 | printk(KERN_NOTICE |
@@ -1873,7 +1903,6 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1873 | "drive\n", | 1903 | "drive\n", |
1874 | mdname(mddev), | 1904 | mdname(mddev), |
1875 | bdevname(rdev->bdev, b)); | 1905 | bdevname(rdev->bdev, b)); |
1876 | md_error(mddev, rdev); | ||
1877 | } | 1906 | } |
1878 | rdev_dec_pending(rdev, mddev); | 1907 | rdev_dec_pending(rdev, mddev); |
1879 | rcu_read_lock(); | 1908 | rcu_read_lock(); |
@@ -1893,11 +1922,12 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1893 | 1922 | ||
1894 | atomic_inc(&rdev->nr_pending); | 1923 | atomic_inc(&rdev->nr_pending); |
1895 | rcu_read_unlock(); | 1924 | rcu_read_unlock(); |
1896 | if (sync_page_io(rdev, | 1925 | switch (r10_sync_page_io(rdev, |
1897 | r10_bio->devs[sl].addr + | 1926 | r10_bio->devs[sl].addr + |
1898 | sect, | 1927 | sect, |
1899 | s<<9, conf->tmppage, | 1928 | s<<9, conf->tmppage, |
1900 | READ, false) == 0) { | 1929 | READ)) { |
1930 | case 0: | ||
1901 | /* Well, this device is dead */ | 1931 | /* Well, this device is dead */ |
1902 | printk(KERN_NOTICE | 1932 | printk(KERN_NOTICE |
1903 | "md/raid10:%s: unable to read back " | 1933 | "md/raid10:%s: unable to read back " |
@@ -1911,9 +1941,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) | |||
1911 | "drive\n", | 1941 | "drive\n", |
1912 | mdname(mddev), | 1942 | mdname(mddev), |
1913 | bdevname(rdev->bdev, b)); | 1943 | bdevname(rdev->bdev, b)); |
1914 | 1944 | break; | |
1915 | md_error(mddev, rdev); | 1945 | case 1: |
1916 | } else { | ||
1917 | printk(KERN_INFO | 1946 | printk(KERN_INFO |
1918 | "md/raid10:%s: read error corrected" | 1947 | "md/raid10:%s: read error corrected" |
1919 | " (%d sectors at %llu on %s)\n", | 1948 | " (%d sectors at %llu on %s)\n", |