diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:24 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:24 -0400 |
commit | e875ecea266a543e643b19e44cf472f1412708f9 (patch) | |
tree | b602d08f7aa4a743d3c27ad55e347d36991f0814 /drivers/md/raid10.c | |
parent | 40c356ce5ad1a6be817825e1da1bc7494349cc6d (diff) |
md/raid10 record bad blocks as needed during recovery.
When recovering one or more devices, if all the good devices have
bad blocks we should record a bad block on the device being rebuilt.
If this fails, we need to abort the recovery.
To ensure we don't think that we aborted later than we actually did,
we need to move the check for MD_RECOVERY_INTR earlier in md_do_sync,
in particular before mddev->curr_resync is updated.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 40 |
1 files changed, 32 insertions, 8 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 5f0355832b46..de6089926273 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -2005,7 +2005,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, | |||
2005 | max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); | 2005 | max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); |
2006 | if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { | 2006 | if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { |
2007 | /* recovery... the complicated one */ | 2007 | /* recovery... the complicated one */ |
2008 | int j, k; | 2008 | int j; |
2009 | r10_bio = NULL; | 2009 | r10_bio = NULL; |
2010 | 2010 | ||
2011 | for (i=0 ; i<conf->raid_disks; i++) { | 2011 | for (i=0 ; i<conf->raid_disks; i++) { |
@@ -2013,6 +2013,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, | |||
2013 | r10bio_t *rb2; | 2013 | r10bio_t *rb2; |
2014 | sector_t sect; | 2014 | sector_t sect; |
2015 | int must_sync; | 2015 | int must_sync; |
2016 | int any_working; | ||
2016 | 2017 | ||
2017 | if (conf->mirrors[i].rdev == NULL || | 2018 | if (conf->mirrors[i].rdev == NULL || |
2018 | test_bit(In_sync, &conf->mirrors[i].rdev->flags)) | 2019 | test_bit(In_sync, &conf->mirrors[i].rdev->flags)) |
@@ -2064,7 +2065,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, | |||
2064 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | 2065 | must_sync = bitmap_start_sync(mddev->bitmap, sect, |
2065 | &sync_blocks, still_degraded); | 2066 | &sync_blocks, still_degraded); |
2066 | 2067 | ||
2068 | any_working = 0; | ||
2067 | for (j=0; j<conf->copies;j++) { | 2069 | for (j=0; j<conf->copies;j++) { |
2070 | int k; | ||
2068 | int d = r10_bio->devs[j].devnum; | 2071 | int d = r10_bio->devs[j].devnum; |
2069 | mdk_rdev_t *rdev; | 2072 | mdk_rdev_t *rdev; |
2070 | sector_t sector, first_bad; | 2073 | sector_t sector, first_bad; |
@@ -2073,6 +2076,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, | |||
2073 | !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) | 2076 | !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) |
2074 | continue; | 2077 | continue; |
2075 | /* This is where we read from */ | 2078 | /* This is where we read from */ |
2079 | any_working = 1; | ||
2076 | rdev = conf->mirrors[d].rdev; | 2080 | rdev = conf->mirrors[d].rdev; |
2077 | sector = r10_bio->devs[j].addr; | 2081 | sector = r10_bio->devs[j].addr; |
2078 | 2082 | ||
@@ -2121,16 +2125,35 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, | |||
2121 | break; | 2125 | break; |
2122 | } | 2126 | } |
2123 | if (j == conf->copies) { | 2127 | if (j == conf->copies) { |
2124 | /* Cannot recover, so abort the recovery */ | 2128 | /* Cannot recover, so abort the recovery or |
2129 | * record a bad block */ | ||
2125 | put_buf(r10_bio); | 2130 | put_buf(r10_bio); |
2126 | if (rb2) | 2131 | if (rb2) |
2127 | atomic_dec(&rb2->remaining); | 2132 | atomic_dec(&rb2->remaining); |
2128 | r10_bio = rb2; | 2133 | r10_bio = rb2; |
2129 | if (!test_and_set_bit(MD_RECOVERY_INTR, | 2134 | if (any_working) { |
2130 | &mddev->recovery)) | 2135 | /* problem is that there are bad blocks |
2131 | printk(KERN_INFO "md/raid10:%s: insufficient " | 2136 | * on other device(s) |
2132 | "working devices for recovery.\n", | 2137 | */ |
2133 | mdname(mddev)); | 2138 | int k; |
2139 | for (k = 0; k < conf->copies; k++) | ||
2140 | if (r10_bio->devs[k].devnum == i) | ||
2141 | break; | ||
2142 | if (!rdev_set_badblocks( | ||
2143 | conf->mirrors[i].rdev, | ||
2144 | r10_bio->devs[k].addr, | ||
2145 | max_sync, 0)) | ||
2146 | any_working = 0; | ||
2147 | } | ||
2148 | if (!any_working) { | ||
2149 | if (!test_and_set_bit(MD_RECOVERY_INTR, | ||
2150 | &mddev->recovery)) | ||
2151 | printk(KERN_INFO "md/raid10:%s: insufficient " | ||
2152 | "working devices for recovery.\n", | ||
2153 | mdname(mddev)); | ||
2154 | conf->mirrors[i].recovery_disabled | ||
2155 | = mddev->recovery_disabled; | ||
2156 | } | ||
2134 | break; | 2157 | break; |
2135 | } | 2158 | } |
2136 | } | 2159 | } |
@@ -2290,7 +2313,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, | |||
2290 | return sectors_skipped + nr_sectors; | 2313 | return sectors_skipped + nr_sectors; |
2291 | giveup: | 2314 | giveup: |
2292 | /* There is nowhere to write, so all non-sync | 2315 | /* There is nowhere to write, so all non-sync |
2293 | * drives must be failed, so try the next chunk... | 2316 | * drives must be failed or in resync, all drives |
2317 | * have a bad block, so try the next chunk... | ||
2294 | */ | 2318 | */ |
2295 | if (sector_nr + max_sync < max_sector) | 2319 | if (sector_nr + max_sync < max_sector) |
2296 | max_sector = sector_nr + max_sync; | 2320 | max_sector = sector_nr + max_sync; |