aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:31:48 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:31:48 -0400
commit06f603851fa90bcd236328438278d4dc8b655495 (patch)
tree49a7f239a9c2de4bf97d998f161b229a565e33cd /drivers/md/raid1.c
parentd2eb35acfdccbe2a3622ed6cc441a5482148423b (diff)
md/raid1: avoid reading known bad blocks during resync
When performing resync/etc, keep the size of the request small enough that it doesn't overlap any known bad blocks. Devices with badblocks at the start of the request are completely excluded. If there is nowhere to read from due to bad blocks, record a bad block on each target device. Now that we never read from known-bad-blocks we can allow devices with known-bad-blocks into a RAID1. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c97
1 files changed, 75 insertions, 22 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cc3939dc9e3d..8c31c39b6f8c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1155,9 +1155,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1155 if (mddev->recovery_disabled == conf->recovery_disabled) 1155 if (mddev->recovery_disabled == conf->recovery_disabled)
1156 return -EBUSY; 1156 return -EBUSY;
1157 1157
1158 if (rdev->badblocks.count)
1159 return -EINVAL;
1160
1161 if (rdev->raid_disk >= 0) 1158 if (rdev->raid_disk >= 0)
1162 first = last = rdev->raid_disk; 1159 first = last = rdev->raid_disk;
1163 1160
@@ -1303,6 +1300,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio)
1303 * We don't need to freeze the array, because being in an 1300 * We don't need to freeze the array, because being in an
1304 * active sync request, there is no normal IO, and 1301 * active sync request, there is no normal IO, and
1305 * no overlapping syncs. 1302 * no overlapping syncs.
1303 * We don't need to check is_badblock() again as we
1304 * made sure that anything with a bad block in range
1305 * will have bi_end_io clear.
1306 */ 1306 */
1307 mddev_t *mddev = r1_bio->mddev; 1307 mddev_t *mddev = r1_bio->mddev;
1308 conf_t *conf = mddev->private; 1308 conf_t *conf = mddev->private;
@@ -1792,6 +1792,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1792 int write_targets = 0, read_targets = 0; 1792 int write_targets = 0, read_targets = 0;
1793 sector_t sync_blocks; 1793 sector_t sync_blocks;
1794 int still_degraded = 0; 1794 int still_degraded = 0;
1795 int good_sectors = RESYNC_SECTORS;
1796 int min_bad = 0; /* number of sectors that are bad in all devices */
1795 1797
1796 if (!conf->r1buf_pool) 1798 if (!conf->r1buf_pool)
1797 if (init_resync(conf)) 1799 if (init_resync(conf))
@@ -1879,36 +1881,89 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1879 1881
1880 rdev = rcu_dereference(conf->mirrors[i].rdev); 1882 rdev = rcu_dereference(conf->mirrors[i].rdev);
1881 if (rdev == NULL || 1883 if (rdev == NULL ||
1882 test_bit(Faulty, &rdev->flags)) { 1884 test_bit(Faulty, &rdev->flags)) {
1883 still_degraded = 1; 1885 still_degraded = 1;
1884 continue;
1885 } else if (!test_bit(In_sync, &rdev->flags)) { 1886 } else if (!test_bit(In_sync, &rdev->flags)) {
1886 bio->bi_rw = WRITE; 1887 bio->bi_rw = WRITE;
1887 bio->bi_end_io = end_sync_write; 1888 bio->bi_end_io = end_sync_write;
1888 write_targets ++; 1889 write_targets ++;
1889 } else { 1890 } else {
1890 /* may need to read from here */ 1891 /* may need to read from here */
1891 bio->bi_rw = READ; 1892 sector_t first_bad = MaxSector;
1892 bio->bi_end_io = end_sync_read; 1893 int bad_sectors;
1893 if (test_bit(WriteMostly, &rdev->flags)) { 1894
1894 if (wonly < 0) 1895 if (is_badblock(rdev, sector_nr, good_sectors,
1895 wonly = i; 1896 &first_bad, &bad_sectors)) {
1896 } else { 1897 if (first_bad > sector_nr)
1897 if (disk < 0) 1898 good_sectors = first_bad - sector_nr;
1898 disk = i; 1899 else {
1900 bad_sectors -= (sector_nr - first_bad);
1901 if (min_bad == 0 ||
1902 min_bad > bad_sectors)
1903 min_bad = bad_sectors;
1904 }
1905 }
1906 if (sector_nr < first_bad) {
1907 if (test_bit(WriteMostly, &rdev->flags)) {
1908 if (wonly < 0)
1909 wonly = i;
1910 } else {
1911 if (disk < 0)
1912 disk = i;
1913 }
1914 bio->bi_rw = READ;
1915 bio->bi_end_io = end_sync_read;
1916 read_targets++;
1899 } 1917 }
1900 read_targets++;
1901 } 1918 }
1902 atomic_inc(&rdev->nr_pending); 1919 if (bio->bi_end_io) {
1903 bio->bi_sector = sector_nr + rdev->data_offset; 1920 atomic_inc(&rdev->nr_pending);
1904 bio->bi_bdev = rdev->bdev; 1921 bio->bi_sector = sector_nr + rdev->data_offset;
1905 bio->bi_private = r1_bio; 1922 bio->bi_bdev = rdev->bdev;
1923 bio->bi_private = r1_bio;
1924 }
1906 } 1925 }
1907 rcu_read_unlock(); 1926 rcu_read_unlock();
1908 if (disk < 0) 1927 if (disk < 0)
1909 disk = wonly; 1928 disk = wonly;
1910 r1_bio->read_disk = disk; 1929 r1_bio->read_disk = disk;
1911 1930
1931 if (read_targets == 0 && min_bad > 0) {
1932 /* These sectors are bad on all InSync devices, so we
1933 * need to mark them bad on all write targets
1934 */
1935 int ok = 1;
1936 for (i = 0 ; i < conf->raid_disks ; i++)
1937 if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
1938 mdk_rdev_t *rdev =
1939 rcu_dereference(conf->mirrors[i].rdev);
1940 ok = rdev_set_badblocks(rdev, sector_nr,
1941 min_bad, 0
1942 ) && ok;
1943 }
1944 set_bit(MD_CHANGE_DEVS, &mddev->flags);
1945 *skipped = 1;
1946 put_buf(r1_bio);
1947
1948 if (!ok) {
1949 /* Cannot record the badblocks, so need to
1950 * abort the resync.
1951 * If there are multiple read targets, could just
1952 * fail the really bad ones ???
1953 */
1954 conf->recovery_disabled = mddev->recovery_disabled;
1955 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
1956 return 0;
1957 } else
1958 return min_bad;
1959
1960 }
1961 if (min_bad > 0 && min_bad < good_sectors) {
1962 /* only resync enough to reach the next bad->good
1963 * transition */
1964 good_sectors = min_bad;
1965 }
1966
1912 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) 1967 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
1913 /* extra read targets are also write targets */ 1968 /* extra read targets are also write targets */
1914 write_targets += read_targets-1; 1969 write_targets += read_targets-1;
@@ -1925,6 +1980,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1925 1980
1926 if (max_sector > mddev->resync_max) 1981 if (max_sector > mddev->resync_max)
1927 max_sector = mddev->resync_max; /* Don't do IO beyond here */ 1982 max_sector = mddev->resync_max; /* Don't do IO beyond here */
1983 if (max_sector > sector_nr + good_sectors)
1984 max_sector = sector_nr + good_sectors;
1928 nr_sectors = 0; 1985 nr_sectors = 0;
1929 sync_blocks = 0; 1986 sync_blocks = 0;
1930 do { 1987 do {
@@ -2147,10 +2204,6 @@ static int run(mddev_t *mddev)
2147 blk_queue_segment_boundary(mddev->queue, 2204 blk_queue_segment_boundary(mddev->queue,
2148 PAGE_CACHE_SIZE - 1); 2205 PAGE_CACHE_SIZE - 1);
2149 } 2206 }
2150 if (rdev->badblocks.count) {
2151 printk(KERN_ERR "md/raid1: Cannot handle bad blocks yet\n");
2152 return -EINVAL;
2153 }
2154 } 2207 }
2155 2208
2156 mddev->degraded = 0; 2209 mddev->degraded = 0;