aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:39:25 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:39:25 -0400
commit58c54fcca3bac5bf9290cfed31c76e4c4bfbabaf (patch)
tree25f663873429468c3b582bc7544f983759b7592e /drivers
parent5e5702898e93eee7d69b6efde109609a89a61001 (diff)
md/raid10: handle further errors during fix_read_error better.
If we find more read/write errors we should record a bad block before failing the device. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid10.c59
1 files changed, 44 insertions, 15 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index fc9ebbab3f62..8b29cd4f01c8 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1749,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
1749 atomic_set(&rdev->read_errors, read_errors >> hours_since_last); 1749 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
1750} 1750}
1751 1751
1752static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
1753 int sectors, struct page *page, int rw)
1754{
1755 sector_t first_bad;
1756 int bad_sectors;
1757
1758 if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
1759 && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
1760 return -1;
1761 if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
1762 /* success */
1763 return 1;
1764 if (rw == WRITE)
1765 set_bit(WriteErrorSeen, &rdev->flags);
1766 /* need to record an error - either for the block or the device */
1767 if (!rdev_set_badblocks(rdev, sector, sectors, 0))
1768 md_error(rdev->mddev, rdev);
1769 return 0;
1770}
1771
1752/* 1772/*
1753 * This is a kernel thread which: 1773 * This is a kernel thread which:
1754 * 1774 *
@@ -1832,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1832 rcu_read_unlock(); 1852 rcu_read_unlock();
1833 1853
1834 if (!success) { 1854 if (!success) {
1835 /* Cannot read from anywhere -- bye bye array */ 1855 /* Cannot read from anywhere, just mark the block
1856 * as bad on the first device to discourage future
1857 * reads.
1858 */
1836 int dn = r10_bio->devs[r10_bio->read_slot].devnum; 1859 int dn = r10_bio->devs[r10_bio->read_slot].devnum;
1837 md_error(mddev, conf->mirrors[dn].rdev); 1860 rdev = conf->mirrors[dn].rdev;
1861
1862 if (!rdev_set_badblocks(
1863 rdev,
1864 r10_bio->devs[r10_bio->read_slot].addr
1865 + sect,
1866 s, 0))
1867 md_error(mddev, rdev);
1838 break; 1868 break;
1839 } 1869 }
1840 1870
@@ -1855,10 +1885,10 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1855 1885
1856 atomic_inc(&rdev->nr_pending); 1886 atomic_inc(&rdev->nr_pending);
1857 rcu_read_unlock(); 1887 rcu_read_unlock();
1858 if (sync_page_io(rdev, 1888 if (r10_sync_page_io(rdev,
1859 r10_bio->devs[sl].addr + 1889 r10_bio->devs[sl].addr +
1860 sect, 1890 sect,
1861 s<<9, conf->tmppage, WRITE, false) 1891 s<<9, conf->tmppage, WRITE)
1862 == 0) { 1892 == 0) {
1863 /* Well, this device is dead */ 1893 /* Well, this device is dead */
1864 printk(KERN_NOTICE 1894 printk(KERN_NOTICE
@@ -1873,7 +1903,6 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1873 "drive\n", 1903 "drive\n",
1874 mdname(mddev), 1904 mdname(mddev),
1875 bdevname(rdev->bdev, b)); 1905 bdevname(rdev->bdev, b));
1876 md_error(mddev, rdev);
1877 } 1906 }
1878 rdev_dec_pending(rdev, mddev); 1907 rdev_dec_pending(rdev, mddev);
1879 rcu_read_lock(); 1908 rcu_read_lock();
@@ -1893,11 +1922,12 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1893 1922
1894 atomic_inc(&rdev->nr_pending); 1923 atomic_inc(&rdev->nr_pending);
1895 rcu_read_unlock(); 1924 rcu_read_unlock();
1896 if (sync_page_io(rdev, 1925 switch (r10_sync_page_io(rdev,
1897 r10_bio->devs[sl].addr + 1926 r10_bio->devs[sl].addr +
1898 sect, 1927 sect,
1899 s<<9, conf->tmppage, 1928 s<<9, conf->tmppage,
1900 READ, false) == 0) { 1929 READ)) {
1930 case 0:
1901 /* Well, this device is dead */ 1931 /* Well, this device is dead */
1902 printk(KERN_NOTICE 1932 printk(KERN_NOTICE
1903 "md/raid10:%s: unable to read back " 1933 "md/raid10:%s: unable to read back "
@@ -1911,9 +1941,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
1911 "drive\n", 1941 "drive\n",
1912 mdname(mddev), 1942 mdname(mddev),
1913 bdevname(rdev->bdev, b)); 1943 bdevname(rdev->bdev, b));
1914 1944 break;
1915 md_error(mddev, rdev); 1945 case 1:
1916 } else {
1917 printk(KERN_INFO 1946 printk(KERN_INFO
1918 "md/raid10:%s: read error corrected" 1947 "md/raid10:%s: read error corrected"
1919 " (%d sectors at %llu on %s)\n", 1948 " (%d sectors at %llu on %s)\n",