diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:31:48 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:31:48 -0400 |
commit | 06f603851fa90bcd236328438278d4dc8b655495 (patch) | |
tree | 49a7f239a9c2de4bf97d998f161b229a565e33cd /drivers | |
parent | d2eb35acfdccbe2a3622ed6cc441a5482148423b (diff) |
md/raid1: avoid reading known bad blocks during resync
When performing resync/etc, keep the size of the request
small enough that it doesn't overlap any known bad blocks.
Devices with badblocks at the start of the request are completely
excluded.
If there is nowhere to read from due to bad blocks, record
a bad block on each target device.
Now that we never read from known-bad-blocks we can allow devices with
known-bad-blocks into a RAID1.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/raid1.c | 97 |
1 files changed, 75 insertions, 22 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cc3939dc9e3d..8c31c39b6f8c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1155,9 +1155,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1155 | if (mddev->recovery_disabled == conf->recovery_disabled) | 1155 | if (mddev->recovery_disabled == conf->recovery_disabled) |
1156 | return -EBUSY; | 1156 | return -EBUSY; |
1157 | 1157 | ||
1158 | if (rdev->badblocks.count) | ||
1159 | return -EINVAL; | ||
1160 | |||
1161 | if (rdev->raid_disk >= 0) | 1158 | if (rdev->raid_disk >= 0) |
1162 | first = last = rdev->raid_disk; | 1159 | first = last = rdev->raid_disk; |
1163 | 1160 | ||
@@ -1303,6 +1300,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio) | |||
1303 | * We don't need to freeze the array, because being in an | 1300 | * We don't need to freeze the array, because being in an |
1304 | * active sync request, there is no normal IO, and | 1301 | * active sync request, there is no normal IO, and |
1305 | * no overlapping syncs. | 1302 | * no overlapping syncs. |
1303 | * We don't need to check is_badblock() again as we | ||
1304 | * made sure that anything with a bad block in range | ||
1305 | * will have bi_end_io clear. | ||
1306 | */ | 1306 | */ |
1307 | mddev_t *mddev = r1_bio->mddev; | 1307 | mddev_t *mddev = r1_bio->mddev; |
1308 | conf_t *conf = mddev->private; | 1308 | conf_t *conf = mddev->private; |
@@ -1792,6 +1792,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1792 | int write_targets = 0, read_targets = 0; | 1792 | int write_targets = 0, read_targets = 0; |
1793 | sector_t sync_blocks; | 1793 | sector_t sync_blocks; |
1794 | int still_degraded = 0; | 1794 | int still_degraded = 0; |
1795 | int good_sectors = RESYNC_SECTORS; | ||
1796 | int min_bad = 0; /* number of sectors that are bad in all devices */ | ||
1795 | 1797 | ||
1796 | if (!conf->r1buf_pool) | 1798 | if (!conf->r1buf_pool) |
1797 | if (init_resync(conf)) | 1799 | if (init_resync(conf)) |
@@ -1879,36 +1881,89 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1879 | 1881 | ||
1880 | rdev = rcu_dereference(conf->mirrors[i].rdev); | 1882 | rdev = rcu_dereference(conf->mirrors[i].rdev); |
1881 | if (rdev == NULL || | 1883 | if (rdev == NULL || |
1882 | test_bit(Faulty, &rdev->flags)) { | 1884 | test_bit(Faulty, &rdev->flags)) { |
1883 | still_degraded = 1; | 1885 | still_degraded = 1; |
1884 | continue; | ||
1885 | } else if (!test_bit(In_sync, &rdev->flags)) { | 1886 | } else if (!test_bit(In_sync, &rdev->flags)) { |
1886 | bio->bi_rw = WRITE; | 1887 | bio->bi_rw = WRITE; |
1887 | bio->bi_end_io = end_sync_write; | 1888 | bio->bi_end_io = end_sync_write; |
1888 | write_targets ++; | 1889 | write_targets ++; |
1889 | } else { | 1890 | } else { |
1890 | /* may need to read from here */ | 1891 | /* may need to read from here */ |
1891 | bio->bi_rw = READ; | 1892 | sector_t first_bad = MaxSector; |
1892 | bio->bi_end_io = end_sync_read; | 1893 | int bad_sectors; |
1893 | if (test_bit(WriteMostly, &rdev->flags)) { | 1894 | |
1894 | if (wonly < 0) | 1895 | if (is_badblock(rdev, sector_nr, good_sectors, |
1895 | wonly = i; | 1896 | &first_bad, &bad_sectors)) { |
1896 | } else { | 1897 | if (first_bad > sector_nr) |
1897 | if (disk < 0) | 1898 | good_sectors = first_bad - sector_nr; |
1898 | disk = i; | 1899 | else { |
1900 | bad_sectors -= (sector_nr - first_bad); | ||
1901 | if (min_bad == 0 || | ||
1902 | min_bad > bad_sectors) | ||
1903 | min_bad = bad_sectors; | ||
1904 | } | ||
1905 | } | ||
1906 | if (sector_nr < first_bad) { | ||
1907 | if (test_bit(WriteMostly, &rdev->flags)) { | ||
1908 | if (wonly < 0) | ||
1909 | wonly = i; | ||
1910 | } else { | ||
1911 | if (disk < 0) | ||
1912 | disk = i; | ||
1913 | } | ||
1914 | bio->bi_rw = READ; | ||
1915 | bio->bi_end_io = end_sync_read; | ||
1916 | read_targets++; | ||
1899 | } | 1917 | } |
1900 | read_targets++; | ||
1901 | } | 1918 | } |
1902 | atomic_inc(&rdev->nr_pending); | 1919 | if (bio->bi_end_io) { |
1903 | bio->bi_sector = sector_nr + rdev->data_offset; | 1920 | atomic_inc(&rdev->nr_pending); |
1904 | bio->bi_bdev = rdev->bdev; | 1921 | bio->bi_sector = sector_nr + rdev->data_offset; |
1905 | bio->bi_private = r1_bio; | 1922 | bio->bi_bdev = rdev->bdev; |
1923 | bio->bi_private = r1_bio; | ||
1924 | } | ||
1906 | } | 1925 | } |
1907 | rcu_read_unlock(); | 1926 | rcu_read_unlock(); |
1908 | if (disk < 0) | 1927 | if (disk < 0) |
1909 | disk = wonly; | 1928 | disk = wonly; |
1910 | r1_bio->read_disk = disk; | 1929 | r1_bio->read_disk = disk; |
1911 | 1930 | ||
1931 | if (read_targets == 0 && min_bad > 0) { | ||
1932 | /* These sectors are bad on all InSync devices, so we | ||
1933 | * need to mark them bad on all write targets | ||
1934 | */ | ||
1935 | int ok = 1; | ||
1936 | for (i = 0 ; i < conf->raid_disks ; i++) | ||
1937 | if (r1_bio->bios[i]->bi_end_io == end_sync_write) { | ||
1938 | mdk_rdev_t *rdev = | ||
1939 | rcu_dereference(conf->mirrors[i].rdev); | ||
1940 | ok = rdev_set_badblocks(rdev, sector_nr, | ||
1941 | min_bad, 0 | ||
1942 | ) && ok; | ||
1943 | } | ||
1944 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | ||
1945 | *skipped = 1; | ||
1946 | put_buf(r1_bio); | ||
1947 | |||
1948 | if (!ok) { | ||
1949 | /* Cannot record the badblocks, so need to | ||
1950 | * abort the resync. | ||
1951 | * If there are multiple read targets, could just | ||
1952 | * fail the really bad ones ??? | ||
1953 | */ | ||
1954 | conf->recovery_disabled = mddev->recovery_disabled; | ||
1955 | set_bit(MD_RECOVERY_INTR, &mddev->recovery); | ||
1956 | return 0; | ||
1957 | } else | ||
1958 | return min_bad; | ||
1959 | |||
1960 | } | ||
1961 | if (min_bad > 0 && min_bad < good_sectors) { | ||
1962 | /* only resync enough to reach the next bad->good | ||
1963 | * transition */ | ||
1964 | good_sectors = min_bad; | ||
1965 | } | ||
1966 | |||
1912 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) | 1967 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) |
1913 | /* extra read targets are also write targets */ | 1968 | /* extra read targets are also write targets */ |
1914 | write_targets += read_targets-1; | 1969 | write_targets += read_targets-1; |
@@ -1925,6 +1980,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1925 | 1980 | ||
1926 | if (max_sector > mddev->resync_max) | 1981 | if (max_sector > mddev->resync_max) |
1927 | max_sector = mddev->resync_max; /* Don't do IO beyond here */ | 1982 | max_sector = mddev->resync_max; /* Don't do IO beyond here */ |
1983 | if (max_sector > sector_nr + good_sectors) | ||
1984 | max_sector = sector_nr + good_sectors; | ||
1928 | nr_sectors = 0; | 1985 | nr_sectors = 0; |
1929 | sync_blocks = 0; | 1986 | sync_blocks = 0; |
1930 | do { | 1987 | do { |
@@ -2147,10 +2204,6 @@ static int run(mddev_t *mddev) | |||
2147 | blk_queue_segment_boundary(mddev->queue, | 2204 | blk_queue_segment_boundary(mddev->queue, |
2148 | PAGE_CACHE_SIZE - 1); | 2205 | PAGE_CACHE_SIZE - 1); |
2149 | } | 2206 | } |
2150 | if (rdev->badblocks.count) { | ||
2151 | printk(KERN_ERR "md/raid1: Cannot handle bad blocks yet\n"); | ||
2152 | return -EINVAL; | ||
2153 | } | ||
2154 | } | 2207 | } |
2155 | 2208 | ||
2156 | mddev->degraded = 0; | 2209 | mddev->degraded = 0; |