diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2014-09-12 06:43:59 -0400 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2014-09-17 16:38:56 -0400 |
commit | 2fe6303e7cd099334cdb09370cece6bc168de131 (patch) | |
tree | 389aaecfb5eff331be497e345b2366d688cf2537 | |
parent | 454ff3de42872870ffc3580b69132a9ef40f5cc5 (diff) |
Btrfs: split bio_readpage_error into several functions
The data repair function of direct read will be implemented later, and some code
in bio_readpage_error will be reused, so split bio_readpage_error into
several functions which will be used in direct read repair later.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r-- | fs/btrfs/extent_io.c | 159 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 28 |
2 files changed, 123 insertions, 64 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c7648f53f63d..c191ea58750f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1962,25 +1962,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1962 | SetPageUptodate(page); | 1962 | SetPageUptodate(page); |
1963 | } | 1963 | } |
1964 | 1964 | ||
1965 | /* | ||
1966 | * When IO fails, either with EIO or csum verification fails, we | ||
1967 | * try other mirrors that might have a good copy of the data. This | ||
1968 | * io_failure_record is used to record state as we go through all the | ||
1969 | * mirrors. If another mirror has good data, the page is set up to date | ||
1970 | * and things continue. If a good mirror can't be found, the original | ||
1971 | * bio end_io callback is called to indicate things have failed. | ||
1972 | */ | ||
1973 | struct io_failure_record { | ||
1974 | struct page *page; | ||
1975 | u64 start; | ||
1976 | u64 len; | ||
1977 | u64 logical; | ||
1978 | unsigned long bio_flags; | ||
1979 | int this_mirror; | ||
1980 | int failed_mirror; | ||
1981 | int in_validation; | ||
1982 | }; | ||
1983 | |||
1984 | static int free_io_failure(struct inode *inode, struct io_failure_record *rec) | 1965 | static int free_io_failure(struct inode *inode, struct io_failure_record *rec) |
1985 | { | 1966 | { |
1986 | int ret; | 1967 | int ret; |
@@ -2156,40 +2137,24 @@ out: | |||
2156 | return 0; | 2137 | return 0; |
2157 | } | 2138 | } |
2158 | 2139 | ||
2159 | /* | 2140 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, |
2160 | * this is a generic handler for readpage errors (default | 2141 | struct io_failure_record **failrec_ret) |
2161 | * readpage_io_failed_hook). if other copies exist, read those and write back | ||
2162 | * good data to the failed position. does not investigate in remapping the | ||
2163 | * failed extent elsewhere, hoping the device will be smart enough to do this as | ||
2164 | * needed | ||
2165 | */ | ||
2166 | |||
2167 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | ||
2168 | struct page *page, u64 start, u64 end, | ||
2169 | int failed_mirror) | ||
2170 | { | 2142 | { |
2171 | struct io_failure_record *failrec = NULL; | 2143 | struct io_failure_record *failrec; |
2172 | u64 private; | 2144 | u64 private; |
2173 | struct extent_map *em; | 2145 | struct extent_map *em; |
2174 | struct inode *inode = page->mapping->host; | ||
2175 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; | 2146 | struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; |
2176 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | 2147 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; |
2177 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 2148 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
2178 | struct bio *bio; | ||
2179 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2180 | struct btrfs_io_bio *btrfs_bio; | ||
2181 | int num_copies; | ||
2182 | int ret; | 2149 | int ret; |
2183 | int read_mode; | ||
2184 | u64 logical; | 2150 | u64 logical; |
2185 | 2151 | ||
2186 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
2187 | |||
2188 | ret = get_state_private(failure_tree, start, &private); | 2152 | ret = get_state_private(failure_tree, start, &private); |
2189 | if (ret) { | 2153 | if (ret) { |
2190 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); | 2154 | failrec = kzalloc(sizeof(*failrec), GFP_NOFS); |
2191 | if (!failrec) | 2155 | if (!failrec) |
2192 | return -ENOMEM; | 2156 | return -ENOMEM; |
2157 | |||
2193 | failrec->start = start; | 2158 | failrec->start = start; |
2194 | failrec->len = end - start + 1; | 2159 | failrec->len = end - start + 1; |
2195 | failrec->this_mirror = 0; | 2160 | failrec->this_mirror = 0; |
@@ -2209,11 +2174,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2209 | em = NULL; | 2174 | em = NULL; |
2210 | } | 2175 | } |
2211 | read_unlock(&em_tree->lock); | 2176 | read_unlock(&em_tree->lock); |
2212 | |||
2213 | if (!em) { | 2177 | if (!em) { |
2214 | kfree(failrec); | 2178 | kfree(failrec); |
2215 | return -EIO; | 2179 | return -EIO; |
2216 | } | 2180 | } |
2181 | |||
2217 | logical = start - em->start; | 2182 | logical = start - em->start; |
2218 | logical = em->block_start + logical; | 2183 | logical = em->block_start + logical; |
2219 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { | 2184 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { |
@@ -2222,8 +2187,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2222 | extent_set_compress_type(&failrec->bio_flags, | 2187 | extent_set_compress_type(&failrec->bio_flags, |
2223 | em->compress_type); | 2188 | em->compress_type); |
2224 | } | 2189 | } |
2225 | pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " | 2190 | |
2226 | "len=%llu\n", logical, start, failrec->len); | 2191 | pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n", |
2192 | logical, start, failrec->len); | ||
2193 | |||
2227 | failrec->logical = logical; | 2194 | failrec->logical = logical; |
2228 | free_extent_map(em); | 2195 | free_extent_map(em); |
2229 | 2196 | ||
@@ -2243,8 +2210,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2243 | } | 2210 | } |
2244 | } else { | 2211 | } else { |
2245 | failrec = (struct io_failure_record *)(unsigned long)private; | 2212 | failrec = (struct io_failure_record *)(unsigned long)private; |
2246 | pr_debug("bio_readpage_error: (found) logical=%llu, " | 2213 | pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n", |
2247 | "start=%llu, len=%llu, validation=%d\n", | ||
2248 | failrec->logical, failrec->start, failrec->len, | 2214 | failrec->logical, failrec->start, failrec->len, |
2249 | failrec->in_validation); | 2215 | failrec->in_validation); |
2250 | /* | 2216 | /* |
@@ -2253,6 +2219,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2253 | * clean_io_failure() clean all those errors at once. | 2219 | * clean_io_failure() clean all those errors at once. |
2254 | */ | 2220 | */ |
2255 | } | 2221 | } |
2222 | |||
2223 | *failrec_ret = failrec; | ||
2224 | |||
2225 | return 0; | ||
2226 | } | ||
2227 | |||
2228 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, | ||
2229 | struct io_failure_record *failrec, int failed_mirror) | ||
2230 | { | ||
2231 | int num_copies; | ||
2232 | |||
2256 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, | 2233 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, |
2257 | failrec->logical, failrec->len); | 2234 | failrec->logical, failrec->len); |
2258 | if (num_copies == 1) { | 2235 | if (num_copies == 1) { |
@@ -2261,10 +2238,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2261 | * all the retry and error correction code that follows. no | 2238 | * all the retry and error correction code that follows. no |
2262 | * matter what the error is, it is very likely to persist. | 2239 | * matter what the error is, it is very likely to persist. |
2263 | */ | 2240 | */ |
2264 | pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", | 2241 | pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2265 | num_copies, failrec->this_mirror, failed_mirror); | 2242 | num_copies, failrec->this_mirror, failed_mirror); |
2266 | free_io_failure(inode, failrec); | 2243 | return 0; |
2267 | return -EIO; | ||
2268 | } | 2244 | } |
2269 | 2245 | ||
2270 | /* | 2246 | /* |
@@ -2284,7 +2260,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2284 | BUG_ON(failrec->in_validation); | 2260 | BUG_ON(failrec->in_validation); |
2285 | failrec->in_validation = 1; | 2261 | failrec->in_validation = 1; |
2286 | failrec->this_mirror = failed_mirror; | 2262 | failrec->this_mirror = failed_mirror; |
2287 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
2288 | } else { | 2263 | } else { |
2289 | /* | 2264 | /* |
2290 | * we're ready to fulfill a) and b) alongside. get a good copy | 2265 | * we're ready to fulfill a) and b) alongside. get a good copy |
@@ -2300,22 +2275,32 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2300 | failrec->this_mirror++; | 2275 | failrec->this_mirror++; |
2301 | if (failrec->this_mirror == failed_mirror) | 2276 | if (failrec->this_mirror == failed_mirror) |
2302 | failrec->this_mirror++; | 2277 | failrec->this_mirror++; |
2303 | read_mode = READ_SYNC; | ||
2304 | } | 2278 | } |
2305 | 2279 | ||
2306 | if (failrec->this_mirror > num_copies) { | 2280 | if (failrec->this_mirror > num_copies) { |
2307 | pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", | 2281 | pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", |
2308 | num_copies, failrec->this_mirror, failed_mirror); | 2282 | num_copies, failrec->this_mirror, failed_mirror); |
2309 | free_io_failure(inode, failrec); | 2283 | return 0; |
2310 | return -EIO; | ||
2311 | } | 2284 | } |
2312 | 2285 | ||
2286 | return 1; | ||
2287 | } | ||
2288 | |||
2289 | |||
2290 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | ||
2291 | struct io_failure_record *failrec, | ||
2292 | struct page *page, int pg_offset, int icsum, | ||
2293 | bio_end_io_t *endio_func) | ||
2294 | { | ||
2295 | struct bio *bio; | ||
2296 | struct btrfs_io_bio *btrfs_failed_bio; | ||
2297 | struct btrfs_io_bio *btrfs_bio; | ||
2298 | |||
2313 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 2299 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); |
2314 | if (!bio) { | 2300 | if (!bio) |
2315 | free_io_failure(inode, failrec); | 2301 | return NULL; |
2316 | return -EIO; | 2302 | |
2317 | } | 2303 | bio->bi_end_io = endio_func; |
2318 | bio->bi_end_io = failed_bio->bi_end_io; | ||
2319 | bio->bi_iter.bi_sector = failrec->logical >> 9; | 2304 | bio->bi_iter.bi_sector = failrec->logical >> 9; |
2320 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 2305 | bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
2321 | bio->bi_iter.bi_size = 0; | 2306 | bio->bi_iter.bi_size = 0; |
@@ -2327,17 +2312,63 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | |||
2327 | 2312 | ||
2328 | btrfs_bio = btrfs_io_bio(bio); | 2313 | btrfs_bio = btrfs_io_bio(bio); |
2329 | btrfs_bio->csum = btrfs_bio->csum_inline; | 2314 | btrfs_bio->csum = btrfs_bio->csum_inline; |
2330 | phy_offset >>= inode->i_sb->s_blocksize_bits; | 2315 | icsum *= csum_size; |
2331 | phy_offset *= csum_size; | 2316 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum, |
2332 | memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset, | ||
2333 | csum_size); | 2317 | csum_size); |
2334 | } | 2318 | } |
2335 | 2319 | ||
2336 | bio_add_page(bio, page, failrec->len, start - page_offset(page)); | 2320 | bio_add_page(bio, page, failrec->len, pg_offset); |
2321 | |||
2322 | return bio; | ||
2323 | } | ||
2324 | |||
2325 | /* | ||
2326 | * this is a generic handler for readpage errors (default | ||
2327 | * readpage_io_failed_hook). if other copies exist, read those and write back | ||
2328 | * good data to the failed position. does not investigate in remapping the | ||
2329 | * failed extent elsewhere, hoping the device will be smart enough to do this as | ||
2330 | * needed | ||
2331 | */ | ||
2332 | |||
2333 | static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, | ||
2334 | struct page *page, u64 start, u64 end, | ||
2335 | int failed_mirror) | ||
2336 | { | ||
2337 | struct io_failure_record *failrec; | ||
2338 | struct inode *inode = page->mapping->host; | ||
2339 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
2340 | struct bio *bio; | ||
2341 | int read_mode; | ||
2342 | int ret; | ||
2343 | |||
2344 | BUG_ON(failed_bio->bi_rw & REQ_WRITE); | ||
2345 | |||
2346 | ret = btrfs_get_io_failure_record(inode, start, end, &failrec); | ||
2347 | if (ret) | ||
2348 | return ret; | ||
2349 | |||
2350 | ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror); | ||
2351 | if (!ret) { | ||
2352 | free_io_failure(inode, failrec); | ||
2353 | return -EIO; | ||
2354 | } | ||
2355 | |||
2356 | if (failed_bio->bi_vcnt > 1) | ||
2357 | read_mode = READ_SYNC | REQ_FAILFAST_DEV; | ||
2358 | else | ||
2359 | read_mode = READ_SYNC; | ||
2360 | |||
2361 | phy_offset >>= inode->i_sb->s_blocksize_bits; | ||
2362 | bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, | ||
2363 | start - page_offset(page), | ||
2364 | (int)phy_offset, failed_bio->bi_end_io); | ||
2365 | if (!bio) { | ||
2366 | free_io_failure(inode, failrec); | ||
2367 | return -EIO; | ||
2368 | } | ||
2337 | 2369 | ||
2338 | pr_debug("bio_readpage_error: submitting new read[%#x] to " | 2370 | pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n", |
2339 | "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, | 2371 | read_mode, failrec->this_mirror, failrec->in_validation); |
2340 | failrec->this_mirror, num_copies, failrec->in_validation); | ||
2341 | 2372 | ||
2342 | ret = tree->ops->submit_bio_hook(inode, read_mode, bio, | 2373 | ret = tree->ops->submit_bio_hook(inode, read_mode, bio, |
2343 | failrec->this_mirror, | 2374 | failrec->this_mirror, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 844b4c5029cd..75b621b7cd9f 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -344,6 +344,34 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
344 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 344 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
345 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | 345 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, |
346 | int mirror_num); | 346 | int mirror_num); |
347 | |||
348 | /* | ||
349 | * When IO fails, either with EIO or csum verification fails, we | ||
350 | * try other mirrors that might have a good copy of the data. This | ||
351 | * io_failure_record is used to record state as we go through all the | ||
352 | * mirrors. If another mirror has good data, the page is set up to date | ||
353 | * and things continue. If a good mirror can't be found, the original | ||
354 | * bio end_io callback is called to indicate things have failed. | ||
355 | */ | ||
356 | struct io_failure_record { | ||
357 | struct page *page; | ||
358 | u64 start; | ||
359 | u64 len; | ||
360 | u64 logical; | ||
361 | unsigned long bio_flags; | ||
362 | int this_mirror; | ||
363 | int failed_mirror; | ||
364 | int in_validation; | ||
365 | }; | ||
366 | |||
367 | int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end, | ||
368 | struct io_failure_record **failrec_ret); | ||
369 | int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio, | ||
370 | struct io_failure_record *failrec, int fail_mirror); | ||
371 | struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, | ||
372 | struct io_failure_record *failrec, | ||
373 | struct page *page, int pg_offset, int icsum, | ||
374 | bio_end_io_t *endio_func); | ||
347 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS | 375 | #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |
348 | noinline u64 find_lock_delalloc_range(struct inode *inode, | 376 | noinline u64 find_lock_delalloc_range(struct inode *inode, |
349 | struct extent_io_tree *tree, | 377 | struct extent_io_tree *tree, |