aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid10.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:39:24 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:39:24 -0400
commite875ecea266a543e643b19e44cf472f1412708f9 (patch)
treeb602d08f7aa4a743d3c27ad55e347d36991f0814 /drivers/md/raid10.c
parent40c356ce5ad1a6be817825e1da1bc7494349cc6d (diff)
md/raid10 record bad blocks as needed during recovery.
When recovering one or more devices, if all the good devices have bad blocks we should record a bad block on the device being rebuilt. If this fails, we need to abort the recovery. To ensure we don't think that we aborted later than we actually did, we need to move the check for MD_RECOVERY_INTR earlier in md_do_sync, in particular before mddev->curr_resync is updated. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r--drivers/md/raid10.c40
1 files changed, 32 insertions, 8 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5f0355832b46..de6089926273 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2005,7 +2005,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2005 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); 2005 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
2006 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 2006 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
2007 /* recovery... the complicated one */ 2007 /* recovery... the complicated one */
2008 int j, k; 2008 int j;
2009 r10_bio = NULL; 2009 r10_bio = NULL;
2010 2010
2011 for (i=0 ; i<conf->raid_disks; i++) { 2011 for (i=0 ; i<conf->raid_disks; i++) {
@@ -2013,6 +2013,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2013 r10bio_t *rb2; 2013 r10bio_t *rb2;
2014 sector_t sect; 2014 sector_t sect;
2015 int must_sync; 2015 int must_sync;
2016 int any_working;
2016 2017
2017 if (conf->mirrors[i].rdev == NULL || 2018 if (conf->mirrors[i].rdev == NULL ||
2018 test_bit(In_sync, &conf->mirrors[i].rdev->flags)) 2019 test_bit(In_sync, &conf->mirrors[i].rdev->flags))
@@ -2064,7 +2065,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2064 must_sync = bitmap_start_sync(mddev->bitmap, sect, 2065 must_sync = bitmap_start_sync(mddev->bitmap, sect,
2065 &sync_blocks, still_degraded); 2066 &sync_blocks, still_degraded);
2066 2067
2068 any_working = 0;
2067 for (j=0; j<conf->copies;j++) { 2069 for (j=0; j<conf->copies;j++) {
2070 int k;
2068 int d = r10_bio->devs[j].devnum; 2071 int d = r10_bio->devs[j].devnum;
2069 mdk_rdev_t *rdev; 2072 mdk_rdev_t *rdev;
2070 sector_t sector, first_bad; 2073 sector_t sector, first_bad;
@@ -2073,6 +2076,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2073 !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) 2076 !test_bit(In_sync, &conf->mirrors[d].rdev->flags))
2074 continue; 2077 continue;
2075 /* This is where we read from */ 2078 /* This is where we read from */
2079 any_working = 1;
2076 rdev = conf->mirrors[d].rdev; 2080 rdev = conf->mirrors[d].rdev;
2077 sector = r10_bio->devs[j].addr; 2081 sector = r10_bio->devs[j].addr;
2078 2082
@@ -2121,16 +2125,35 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2121 break; 2125 break;
2122 } 2126 }
2123 if (j == conf->copies) { 2127 if (j == conf->copies) {
2124 /* Cannot recover, so abort the recovery */ 2128 /* Cannot recover, so abort the recovery or
2129 * record a bad block */
2125 put_buf(r10_bio); 2130 put_buf(r10_bio);
2126 if (rb2) 2131 if (rb2)
2127 atomic_dec(&rb2->remaining); 2132 atomic_dec(&rb2->remaining);
2128 r10_bio = rb2; 2133 r10_bio = rb2;
2129 if (!test_and_set_bit(MD_RECOVERY_INTR, 2134 if (any_working) {
2130 &mddev->recovery)) 2135 /* problem is that there are bad blocks
2131 printk(KERN_INFO "md/raid10:%s: insufficient " 2136 * on other device(s)
2132 "working devices for recovery.\n", 2137 */
2133 mdname(mddev)); 2138 int k;
2139 for (k = 0; k < conf->copies; k++)
2140 if (r10_bio->devs[k].devnum == i)
2141 break;
2142 if (!rdev_set_badblocks(
2143 conf->mirrors[i].rdev,
2144 r10_bio->devs[k].addr,
2145 max_sync, 0))
2146 any_working = 0;
2147 }
2148 if (!any_working) {
2149 if (!test_and_set_bit(MD_RECOVERY_INTR,
2150 &mddev->recovery))
2151 printk(KERN_INFO "md/raid10:%s: insufficient "
2152 "working devices for recovery.\n",
2153 mdname(mddev));
2154 conf->mirrors[i].recovery_disabled
2155 = mddev->recovery_disabled;
2156 }
2134 break; 2157 break;
2135 } 2158 }
2136 } 2159 }
@@ -2290,7 +2313,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr,
2290 return sectors_skipped + nr_sectors; 2313 return sectors_skipped + nr_sectors;
2291 giveup: 2314 giveup:
2292 /* There is nowhere to write, so all non-sync 2315 /* There is nowhere to write, so all non-sync
2293 * drives must be failed, so try the next chunk... 2316 * drives must be failed or in resync, all drives
2317 * have a bad block, so try the next chunk...
2294 */ 2318 */
2295 if (sector_nr + max_sync < max_sector) 2319 if (sector_nr + max_sync < max_sector)
2296 max_sector = sector_nr + max_sync; 2320 max_sector = sector_nr + max_sync;