aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-09-12 06:43:59 -0400
committerChris Mason <clm@fb.com>2014-09-17 16:38:56 -0400
commit2fe6303e7cd099334cdb09370cece6bc168de131 (patch)
tree389aaecfb5eff331be497e345b2366d688cf2537
parent454ff3de42872870ffc3580b69132a9ef40f5cc5 (diff)
Btrfs: split bio_readpage_error into several functions
The data repair function of direct read will be implemented later, and some code in bio_readpage_error will be reused, so split bio_readpage_error into several functions which will be used in direct read repair later. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/extent_io.c159
-rw-r--r--fs/btrfs/extent_io.h28
2 files changed, 123 insertions, 64 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7648f53f63d..c191ea58750f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1962,25 +1962,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1962 SetPageUptodate(page); 1962 SetPageUptodate(page);
1963} 1963}
1964 1964
1965/*
1966 * When IO fails, either with EIO or csum verification fails, we
1967 * try other mirrors that might have a good copy of the data. This
1968 * io_failure_record is used to record state as we go through all the
1969 * mirrors. If another mirror has good data, the page is set up to date
1970 * and things continue. If a good mirror can't be found, the original
1971 * bio end_io callback is called to indicate things have failed.
1972 */
1973struct io_failure_record {
1974 struct page *page;
1975 u64 start;
1976 u64 len;
1977 u64 logical;
1978 unsigned long bio_flags;
1979 int this_mirror;
1980 int failed_mirror;
1981 int in_validation;
1982};
1983
1984static int free_io_failure(struct inode *inode, struct io_failure_record *rec) 1965static int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1985{ 1966{
1986 int ret; 1967 int ret;
@@ -2156,40 +2137,24 @@ out:
2156 return 0; 2137 return 0;
2157} 2138}
2158 2139
2159/* 2140int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
2160 * this is a generic handler for readpage errors (default 2141 struct io_failure_record **failrec_ret)
2161 * readpage_io_failed_hook). if other copies exist, read those and write back
2162 * good data to the failed position. does not investigate in remapping the
2163 * failed extent elsewhere, hoping the device will be smart enough to do this as
2164 * needed
2165 */
2166
2167static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2168 struct page *page, u64 start, u64 end,
2169 int failed_mirror)
2170{ 2142{
2171 struct io_failure_record *failrec = NULL; 2143 struct io_failure_record *failrec;
2172 u64 private; 2144 u64 private;
2173 struct extent_map *em; 2145 struct extent_map *em;
2174 struct inode *inode = page->mapping->host;
2175 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 2146 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
2176 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2147 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2177 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2148 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2178 struct bio *bio;
2179 struct btrfs_io_bio *btrfs_failed_bio;
2180 struct btrfs_io_bio *btrfs_bio;
2181 int num_copies;
2182 int ret; 2149 int ret;
2183 int read_mode;
2184 u64 logical; 2150 u64 logical;
2185 2151
2186 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2187
2188 ret = get_state_private(failure_tree, start, &private); 2152 ret = get_state_private(failure_tree, start, &private);
2189 if (ret) { 2153 if (ret) {
2190 failrec = kzalloc(sizeof(*failrec), GFP_NOFS); 2154 failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
2191 if (!failrec) 2155 if (!failrec)
2192 return -ENOMEM; 2156 return -ENOMEM;
2157
2193 failrec->start = start; 2158 failrec->start = start;
2194 failrec->len = end - start + 1; 2159 failrec->len = end - start + 1;
2195 failrec->this_mirror = 0; 2160 failrec->this_mirror = 0;
@@ -2209,11 +2174,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2209 em = NULL; 2174 em = NULL;
2210 } 2175 }
2211 read_unlock(&em_tree->lock); 2176 read_unlock(&em_tree->lock);
2212
2213 if (!em) { 2177 if (!em) {
2214 kfree(failrec); 2178 kfree(failrec);
2215 return -EIO; 2179 return -EIO;
2216 } 2180 }
2181
2217 logical = start - em->start; 2182 logical = start - em->start;
2218 logical = em->block_start + logical; 2183 logical = em->block_start + logical;
2219 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2184 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
@@ -2222,8 +2187,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2222 extent_set_compress_type(&failrec->bio_flags, 2187 extent_set_compress_type(&failrec->bio_flags,
2223 em->compress_type); 2188 em->compress_type);
2224 } 2189 }
2225 pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, " 2190
2226 "len=%llu\n", logical, start, failrec->len); 2191 pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
2192 logical, start, failrec->len);
2193
2227 failrec->logical = logical; 2194 failrec->logical = logical;
2228 free_extent_map(em); 2195 free_extent_map(em);
2229 2196
@@ -2243,8 +2210,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2243 } 2210 }
2244 } else { 2211 } else {
2245 failrec = (struct io_failure_record *)(unsigned long)private; 2212 failrec = (struct io_failure_record *)(unsigned long)private;
2246 pr_debug("bio_readpage_error: (found) logical=%llu, " 2213 pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
2247 "start=%llu, len=%llu, validation=%d\n",
2248 failrec->logical, failrec->start, failrec->len, 2214 failrec->logical, failrec->start, failrec->len,
2249 failrec->in_validation); 2215 failrec->in_validation);
2250 /* 2216 /*
@@ -2253,6 +2219,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2253 * clean_io_failure() clean all those errors at once. 2219 * clean_io_failure() clean all those errors at once.
2254 */ 2220 */
2255 } 2221 }
2222
2223 *failrec_ret = failrec;
2224
2225 return 0;
2226}
2227
2228int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2229 struct io_failure_record *failrec, int failed_mirror)
2230{
2231 int num_copies;
2232
2256 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, 2233 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
2257 failrec->logical, failrec->len); 2234 failrec->logical, failrec->len);
2258 if (num_copies == 1) { 2235 if (num_copies == 1) {
@@ -2261,10 +2238,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2261 * all the retry and error correction code that follows. no 2238 * all the retry and error correction code that follows. no
2262 * matter what the error is, it is very likely to persist. 2239 * matter what the error is, it is very likely to persist.
2263 */ 2240 */
2264 pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n", 2241 pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2265 num_copies, failrec->this_mirror, failed_mirror); 2242 num_copies, failrec->this_mirror, failed_mirror);
2266 free_io_failure(inode, failrec); 2243 return 0;
2267 return -EIO;
2268 } 2244 }
2269 2245
2270 /* 2246 /*
@@ -2284,7 +2260,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2284 BUG_ON(failrec->in_validation); 2260 BUG_ON(failrec->in_validation);
2285 failrec->in_validation = 1; 2261 failrec->in_validation = 1;
2286 failrec->this_mirror = failed_mirror; 2262 failrec->this_mirror = failed_mirror;
2287 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2288 } else { 2263 } else {
2289 /* 2264 /*
2290 * we're ready to fulfill a) and b) alongside. get a good copy 2265 * we're ready to fulfill a) and b) alongside. get a good copy
@@ -2300,22 +2275,32 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2300 failrec->this_mirror++; 2275 failrec->this_mirror++;
2301 if (failrec->this_mirror == failed_mirror) 2276 if (failrec->this_mirror == failed_mirror)
2302 failrec->this_mirror++; 2277 failrec->this_mirror++;
2303 read_mode = READ_SYNC;
2304 } 2278 }
2305 2279
2306 if (failrec->this_mirror > num_copies) { 2280 if (failrec->this_mirror > num_copies) {
2307 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n", 2281 pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2308 num_copies, failrec->this_mirror, failed_mirror); 2282 num_copies, failrec->this_mirror, failed_mirror);
2309 free_io_failure(inode, failrec); 2283 return 0;
2310 return -EIO;
2311 } 2284 }
2312 2285
2286 return 1;
2287}
2288
2289
2290struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2291 struct io_failure_record *failrec,
2292 struct page *page, int pg_offset, int icsum,
2293 bio_end_io_t *endio_func)
2294{
2295 struct bio *bio;
2296 struct btrfs_io_bio *btrfs_failed_bio;
2297 struct btrfs_io_bio *btrfs_bio;
2298
2313 bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 2299 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
2314 if (!bio) { 2300 if (!bio)
2315 free_io_failure(inode, failrec); 2301 return NULL;
2316 return -EIO; 2302
2317 } 2303 bio->bi_end_io = endio_func;
2318 bio->bi_end_io = failed_bio->bi_end_io;
2319 bio->bi_iter.bi_sector = failrec->logical >> 9; 2304 bio->bi_iter.bi_sector = failrec->logical >> 9;
2320 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2305 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2321 bio->bi_iter.bi_size = 0; 2306 bio->bi_iter.bi_size = 0;
@@ -2327,17 +2312,63 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2327 2312
2328 btrfs_bio = btrfs_io_bio(bio); 2313 btrfs_bio = btrfs_io_bio(bio);
2329 btrfs_bio->csum = btrfs_bio->csum_inline; 2314 btrfs_bio->csum = btrfs_bio->csum_inline;
2330 phy_offset >>= inode->i_sb->s_blocksize_bits; 2315 icsum *= csum_size;
2331 phy_offset *= csum_size; 2316 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
2332 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2333 csum_size); 2317 csum_size);
2334 } 2318 }
2335 2319
2336 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2320 bio_add_page(bio, page, failrec->len, pg_offset);
2321
2322 return bio;
2323}
2324
2325/*
2326 * this is a generic handler for readpage errors (default
2327 * readpage_io_failed_hook). if other copies exist, read those and write back
2328 * good data to the failed position. does not investigate in remapping the
2329 * failed extent elsewhere, hoping the device will be smart enough to do this as
2330 * needed
2331 */
2332
2333static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2334 struct page *page, u64 start, u64 end,
2335 int failed_mirror)
2336{
2337 struct io_failure_record *failrec;
2338 struct inode *inode = page->mapping->host;
2339 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2340 struct bio *bio;
2341 int read_mode;
2342 int ret;
2343
2344 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
2345
2346 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
2347 if (ret)
2348 return ret;
2349
2350 ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
2351 if (!ret) {
2352 free_io_failure(inode, failrec);
2353 return -EIO;
2354 }
2355
2356 if (failed_bio->bi_vcnt > 1)
2357 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
2358 else
2359 read_mode = READ_SYNC;
2360
2361 phy_offset >>= inode->i_sb->s_blocksize_bits;
2362 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2363 start - page_offset(page),
2364 (int)phy_offset, failed_bio->bi_end_io);
2365 if (!bio) {
2366 free_io_failure(inode, failrec);
2367 return -EIO;
2368 }
2337 2369
2338 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2370 pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
2339 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, 2371 read_mode, failrec->this_mirror, failrec->in_validation);
2340 failrec->this_mirror, num_copies, failrec->in_validation);
2341 2372
2342 ret = tree->ops->submit_bio_hook(inode, read_mode, bio, 2373 ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
2343 failrec->this_mirror, 2374 failrec->this_mirror,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 844b4c5029cd..75b621b7cd9f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -344,6 +344,34 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
344int end_extent_writepage(struct page *page, int err, u64 start, u64 end); 344int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
345int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, 345int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
346 int mirror_num); 346 int mirror_num);
347
348/*
349 * When IO fails, either with EIO or csum verification fails, we
350 * try other mirrors that might have a good copy of the data. This
351 * io_failure_record is used to record state as we go through all the
352 * mirrors. If another mirror has good data, the page is set up to date
353 * and things continue. If a good mirror can't be found, the original
354 * bio end_io callback is called to indicate things have failed.
355 */
356struct io_failure_record {
357 struct page *page;
358 u64 start;
359 u64 len;
360 u64 logical;
361 unsigned long bio_flags;
362 int this_mirror;
363 int failed_mirror;
364 int in_validation;
365};
366
367int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
368 struct io_failure_record **failrec_ret);
369int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
370 struct io_failure_record *failrec, int fail_mirror);
371struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
372 struct io_failure_record *failrec,
373 struct page *page, int pg_offset, int icsum,
374 bio_end_io_t *endio_func);
347#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 375#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
348noinline u64 find_lock_delalloc_range(struct inode *inode, 376noinline u64 find_lock_delalloc_range(struct inode *inode,
349 struct extent_io_tree *tree, 377 struct extent_io_tree *tree,