aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-09-12 06:44:03 -0400
committerChris Mason <clm@fb.com>2014-09-17 16:39:01 -0400
commit8b110e393c5a6e72d50fcdf9fa7ed8b647cfdfc9 (patch)
tree36f6b0048c79fd63c55892cd45f0b78e8c0cc15b
parent28e1cc7d1baf8038ae4ad4681c8f3dc94fcd7c00 (diff)
Btrfs: implement repair function when direct read fails
This patch implement data repair function when direct read fails. The detail of the implementation is: - When we find the data is not right, we try to read the data from the other mirror. - When the io on the mirror ends, we will insert the endio work into the dedicated btrfs workqueue, not common read endio workqueue, because the original endio work is still blocked in the btrfs endio workqueue, if we insert the endio work of the io on the mirror into that workqueue, deadlock would happen. - After we get right data, we write it back to the corrupted mirror. - And if the data on the new mirror is still corrupted, we will try next mirror until we read right data or all the mirrors are traversed. - After the above work, we set the uptodate flag according to the result. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/async-thread.h1
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent_io.c12
-rw-r--r--fs/btrfs/extent_io.h5
-rw-r--r--fs/btrfs/inode.c276
9 files changed, 281 insertions, 29 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index fbd76ded9a34..2da0a66790ba 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -74,6 +74,7 @@ BTRFS_WORK_HELPER(endio_helper);
74BTRFS_WORK_HELPER(endio_meta_helper); 74BTRFS_WORK_HELPER(endio_meta_helper);
75BTRFS_WORK_HELPER(endio_meta_write_helper); 75BTRFS_WORK_HELPER(endio_meta_write_helper);
76BTRFS_WORK_HELPER(endio_raid56_helper); 76BTRFS_WORK_HELPER(endio_raid56_helper);
77BTRFS_WORK_HELPER(endio_repair_helper);
77BTRFS_WORK_HELPER(rmw_helper); 78BTRFS_WORK_HELPER(rmw_helper);
78BTRFS_WORK_HELPER(endio_write_helper); 79BTRFS_WORK_HELPER(endio_write_helper);
79BTRFS_WORK_HELPER(freespace_write_helper); 80BTRFS_WORK_HELPER(freespace_write_helper);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index e9e31c94758f..e386c29ef1f6 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -53,6 +53,7 @@ BTRFS_WORK_HELPER_PROTO(endio_helper);
53BTRFS_WORK_HELPER_PROTO(endio_meta_helper); 53BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
54BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper); 54BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
55BTRFS_WORK_HELPER_PROTO(endio_raid56_helper); 55BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
56BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
56BTRFS_WORK_HELPER_PROTO(rmw_helper); 57BTRFS_WORK_HELPER_PROTO(rmw_helper);
57BTRFS_WORK_HELPER_PROTO(endio_write_helper); 58BTRFS_WORK_HELPER_PROTO(endio_write_helper);
58BTRFS_WORK_HELPER_PROTO(freespace_write_helper); 59BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4d309471294e..7a7521c87c88 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -271,7 +271,7 @@ struct btrfs_dio_private {
271 * The original bio may be splited to several sub-bios, this is 271 * The original bio may be splited to several sub-bios, this is
272 * done during endio of sub-bios 272 * done during endio of sub-bios
273 */ 273 */
274 int (*subio_endio)(struct inode *, struct btrfs_io_bio *); 274 int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
275}; 275};
276 276
277/* 277/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0f3e4f7e454a..51ff3f8dbab9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1538,6 +1538,7 @@ struct btrfs_fs_info {
1538 struct btrfs_workqueue *endio_workers; 1538 struct btrfs_workqueue *endio_workers;
1539 struct btrfs_workqueue *endio_meta_workers; 1539 struct btrfs_workqueue *endio_meta_workers;
1540 struct btrfs_workqueue *endio_raid56_workers; 1540 struct btrfs_workqueue *endio_raid56_workers;
1541 struct btrfs_workqueue *endio_repair_workers;
1541 struct btrfs_workqueue *rmw_workers; 1542 struct btrfs_workqueue *rmw_workers;
1542 struct btrfs_workqueue *endio_meta_write_workers; 1543 struct btrfs_workqueue *endio_meta_write_workers;
1543 struct btrfs_workqueue *endio_write_workers; 1544 struct btrfs_workqueue *endio_write_workers;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a224fb9b34a3..48794f951427 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -713,7 +713,11 @@ static void end_workqueue_bio(struct bio *bio, int err)
713 func = btrfs_endio_write_helper; 713 func = btrfs_endio_write_helper;
714 } 714 }
715 } else { 715 } else {
716 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { 716 if (unlikely(end_io_wq->metadata ==
717 BTRFS_WQ_ENDIO_DIO_REPAIR)) {
718 wq = fs_info->endio_repair_workers;
719 func = btrfs_endio_repair_helper;
720 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
717 wq = fs_info->endio_raid56_workers; 721 wq = fs_info->endio_raid56_workers;
718 func = btrfs_endio_raid56_helper; 722 func = btrfs_endio_raid56_helper;
719 } else if (end_io_wq->metadata) { 723 } else if (end_io_wq->metadata) {
@@ -741,6 +745,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
741 int metadata) 745 int metadata)
742{ 746{
743 struct end_io_wq *end_io_wq; 747 struct end_io_wq *end_io_wq;
748
744 end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); 749 end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
745 if (!end_io_wq) 750 if (!end_io_wq)
746 return -ENOMEM; 751 return -ENOMEM;
@@ -2055,6 +2060,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
2055 btrfs_destroy_workqueue(fs_info->endio_workers); 2060 btrfs_destroy_workqueue(fs_info->endio_workers);
2056 btrfs_destroy_workqueue(fs_info->endio_meta_workers); 2061 btrfs_destroy_workqueue(fs_info->endio_meta_workers);
2057 btrfs_destroy_workqueue(fs_info->endio_raid56_workers); 2062 btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
2063 btrfs_destroy_workqueue(fs_info->endio_repair_workers);
2058 btrfs_destroy_workqueue(fs_info->rmw_workers); 2064 btrfs_destroy_workqueue(fs_info->rmw_workers);
2059 btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); 2065 btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
2060 btrfs_destroy_workqueue(fs_info->endio_write_workers); 2066 btrfs_destroy_workqueue(fs_info->endio_write_workers);
@@ -2572,6 +2578,8 @@ int open_ctree(struct super_block *sb,
2572 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); 2578 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2573 fs_info->endio_raid56_workers = 2579 fs_info->endio_raid56_workers =
2574 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); 2580 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2581 fs_info->endio_repair_workers =
2582 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2575 fs_info->rmw_workers = 2583 fs_info->rmw_workers =
2576 btrfs_alloc_workqueue("rmw", flags, max_active, 2); 2584 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2577 fs_info->endio_write_workers = 2585 fs_info->endio_write_workers =
@@ -2593,6 +2601,7 @@ int open_ctree(struct super_block *sb,
2593 fs_info->submit_workers && fs_info->flush_workers && 2601 fs_info->submit_workers && fs_info->flush_workers &&
2594 fs_info->endio_workers && fs_info->endio_meta_workers && 2602 fs_info->endio_workers && fs_info->endio_meta_workers &&
2595 fs_info->endio_meta_write_workers && 2603 fs_info->endio_meta_write_workers &&
2604 fs_info->endio_repair_workers &&
2596 fs_info->endio_write_workers && fs_info->endio_raid56_workers && 2605 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2597 fs_info->endio_freespace_worker && fs_info->rmw_workers && 2606 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2598 fs_info->caching_workers && fs_info->readahead_workers && 2607 fs_info->caching_workers && fs_info->readahead_workers &&
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 52a17db700fc..14d06ee1e143 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -30,6 +30,7 @@ enum {
30 BTRFS_WQ_ENDIO_METADATA = 1, 30 BTRFS_WQ_ENDIO_METADATA = 1,
31 BTRFS_WQ_ENDIO_FREE_SPACE = 2, 31 BTRFS_WQ_ENDIO_FREE_SPACE = 2,
32 BTRFS_WQ_ENDIO_RAID56 = 3, 32 BTRFS_WQ_ENDIO_RAID56 = 3,
33 BTRFS_WQ_ENDIO_DIO_REPAIR = 4,
33}; 34};
34 35
35static inline u64 btrfs_sb_offset(int mirror) 36static inline u64 btrfs_sb_offset(int mirror)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 05533c99f89d..9e2ef27672e5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1962,7 +1962,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1962 SetPageUptodate(page); 1962 SetPageUptodate(page);
1963} 1963}
1964 1964
1965static int free_io_failure(struct inode *inode, struct io_failure_record *rec) 1965int free_io_failure(struct inode *inode, struct io_failure_record *rec)
1966{ 1966{
1967 int ret; 1967 int ret;
1968 int err = 0; 1968 int err = 0;
@@ -2081,8 +2081,8 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
2081 * each time an IO finishes, we do a fast check in the IO failure tree 2081 * each time an IO finishes, we do a fast check in the IO failure tree
2082 * to see if we need to process or clean up an io_failure_record 2082 * to see if we need to process or clean up an io_failure_record
2083 */ 2083 */
2084static int clean_io_failure(struct inode *inode, u64 start, 2084int clean_io_failure(struct inode *inode, u64 start, struct page *page,
2085 struct page *page, unsigned int pg_offset) 2085 unsigned int pg_offset)
2086{ 2086{
2087 u64 private; 2087 u64 private;
2088 u64 private_failure; 2088 u64 private_failure;
@@ -2291,7 +2291,7 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
2291struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, 2291struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2292 struct io_failure_record *failrec, 2292 struct io_failure_record *failrec,
2293 struct page *page, int pg_offset, int icsum, 2293 struct page *page, int pg_offset, int icsum,
2294 bio_end_io_t *endio_func) 2294 bio_end_io_t *endio_func, void *data)
2295{ 2295{
2296 struct bio *bio; 2296 struct bio *bio;
2297 struct btrfs_io_bio *btrfs_failed_bio; 2297 struct btrfs_io_bio *btrfs_failed_bio;
@@ -2305,6 +2305,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
2305 bio->bi_iter.bi_sector = failrec->logical >> 9; 2305 bio->bi_iter.bi_sector = failrec->logical >> 9;
2306 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2306 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2307 bio->bi_iter.bi_size = 0; 2307 bio->bi_iter.bi_size = 0;
2308 bio->bi_private = data;
2308 2309
2309 btrfs_failed_bio = btrfs_io_bio(failed_bio); 2310 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2310 if (btrfs_failed_bio->csum) { 2311 if (btrfs_failed_bio->csum) {
@@ -2362,7 +2363,8 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2362 phy_offset >>= inode->i_sb->s_blocksize_bits; 2363 phy_offset >>= inode->i_sb->s_blocksize_bits;
2363 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, 2364 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
2364 start - page_offset(page), 2365 start - page_offset(page),
2365 (int)phy_offset, failed_bio->bi_end_io); 2366 (int)phy_offset, failed_bio->bi_end_io,
2367 NULL);
2366 if (!bio) { 2368 if (!bio) {
2367 free_io_failure(inode, failrec); 2369 free_io_failure(inode, failrec);
2368 return -EIO; 2370 return -EIO;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bf0597f3a9e7..176a4b1ed520 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -341,6 +341,8 @@ struct btrfs_fs_info;
341int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, 341int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
342 struct page *page, unsigned int pg_offset, 342 struct page *page, unsigned int pg_offset,
343 int mirror_num); 343 int mirror_num);
344int clean_io_failure(struct inode *inode, u64 start, struct page *page,
345 unsigned int pg_offset);
344int end_extent_writepage(struct page *page, int err, u64 start, u64 end); 346int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
345int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, 347int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
346 int mirror_num); 348 int mirror_num);
@@ -371,7 +373,8 @@ int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
371struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, 373struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
372 struct io_failure_record *failrec, 374 struct io_failure_record *failrec,
373 struct page *page, int pg_offset, int icsum, 375 struct page *page, int pg_offset, int icsum,
374 bio_end_io_t *endio_func); 376 bio_end_io_t *endio_func, void *data);
377int free_io_failure(struct inode *inode, struct io_failure_record *rec);
375#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 378#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
376noinline u64 find_lock_delalloc_range(struct inode *inode, 379noinline u64 find_lock_delalloc_range(struct inode *inode,
377 struct extent_io_tree *tree, 380 struct extent_io_tree *tree,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 09d8c5ee8869..c3c3269a9e08 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7242,30 +7242,267 @@ unlock_err:
7242 return ret; 7242 return ret;
7243} 7243}
7244 7244
7245static int btrfs_subio_endio_read(struct inode *inode, 7245static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7246 struct btrfs_io_bio *io_bio) 7246 int rw, int mirror_num)
7247{
7248 struct btrfs_root *root = BTRFS_I(inode)->root;
7249 int ret;
7250
7251 BUG_ON(rw & REQ_WRITE);
7252
7253 bio_get(bio);
7254
7255 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
7256 BTRFS_WQ_ENDIO_DIO_REPAIR);
7257 if (ret)
7258 goto err;
7259
7260 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
7261err:
7262 bio_put(bio);
7263 return ret;
7264}
7265
7266static int btrfs_check_dio_repairable(struct inode *inode,
7267 struct bio *failed_bio,
7268 struct io_failure_record *failrec,
7269 int failed_mirror)
7270{
7271 int num_copies;
7272
7273 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
7274 failrec->logical, failrec->len);
7275 if (num_copies == 1) {
7276 /*
7277 * we only have a single copy of the data, so don't bother with
7278 * all the retry and error correction code that follows. no
7279 * matter what the error is, it is very likely to persist.
7280 */
7281 pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
7282 num_copies, failrec->this_mirror, failed_mirror);
7283 return 0;
7284 }
7285
7286 failrec->failed_mirror = failed_mirror;
7287 failrec->this_mirror++;
7288 if (failrec->this_mirror == failed_mirror)
7289 failrec->this_mirror++;
7290
7291 if (failrec->this_mirror > num_copies) {
7292 pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
7293 num_copies, failrec->this_mirror, failed_mirror);
7294 return 0;
7295 }
7296
7297 return 1;
7298}
7299
7300static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7301 struct page *page, u64 start, u64 end,
7302 int failed_mirror, bio_end_io_t *repair_endio,
7303 void *repair_arg)
7304{
7305 struct io_failure_record *failrec;
7306 struct bio *bio;
7307 int isector;
7308 int read_mode;
7309 int ret;
7310
7311 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
7312
7313 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7314 if (ret)
7315 return ret;
7316
7317 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7318 failed_mirror);
7319 if (!ret) {
7320 free_io_failure(inode, failrec);
7321 return -EIO;
7322 }
7323
7324 if (failed_bio->bi_vcnt > 1)
7325 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
7326 else
7327 read_mode = READ_SYNC;
7328
7329 isector = start - btrfs_io_bio(failed_bio)->logical;
7330 isector >>= inode->i_sb->s_blocksize_bits;
7331 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7332 0, isector, repair_endio, repair_arg);
7333 if (!bio) {
7334 free_io_failure(inode, failrec);
7335 return -EIO;
7336 }
7337
7338 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7339 "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
7340 read_mode, failrec->this_mirror, failrec->in_validation);
7341
7342 ret = submit_dio_repair_bio(inode, bio, read_mode,
7343 failrec->this_mirror);
7344 if (ret) {
7345 free_io_failure(inode, failrec);
7346 bio_put(bio);
7347 }
7348
7349 return ret;
7350}
7351
7352struct btrfs_retry_complete {
7353 struct completion done;
7354 struct inode *inode;
7355 u64 start;
7356 int uptodate;
7357};
7358
7359static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
7360{
7361 struct btrfs_retry_complete *done = bio->bi_private;
7362 struct bio_vec *bvec;
7363 int i;
7364
7365 if (err)
7366 goto end;
7367
7368 done->uptodate = 1;
7369 bio_for_each_segment_all(bvec, bio, i)
7370 clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
7371end:
7372 complete(&done->done);
7373 bio_put(bio);
7374}
7375
7376static int __btrfs_correct_data_nocsum(struct inode *inode,
7377 struct btrfs_io_bio *io_bio)
7247{ 7378{
7248 struct bio_vec *bvec; 7379 struct bio_vec *bvec;
7380 struct btrfs_retry_complete done;
7249 u64 start; 7381 u64 start;
7250 int i; 7382 int i;
7251 int ret; 7383 int ret;
7252 int err = 0;
7253 7384
7254 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 7385 start = io_bio->logical;
7255 return 0; 7386 done.inode = inode;
7387
7388 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7389try_again:
7390 done.uptodate = 0;
7391 done.start = start;
7392 init_completion(&done.done);
7393
7394 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
7395 start + bvec->bv_len - 1,
7396 io_bio->mirror_num,
7397 btrfs_retry_endio_nocsum, &done);
7398 if (ret)
7399 return ret;
7400
7401 wait_for_completion(&done.done);
7402
7403 if (!done.uptodate) {
7404 /* We might have another mirror, so try again */
7405 goto try_again;
7406 }
7407
7408 start += bvec->bv_len;
7409 }
7410
7411 return 0;
7412}
7413
7414static void btrfs_retry_endio(struct bio *bio, int err)
7415{
7416 struct btrfs_retry_complete *done = bio->bi_private;
7417 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7418 struct bio_vec *bvec;
7419 int uptodate;
7420 int ret;
7421 int i;
7422
7423 if (err)
7424 goto end;
7425
7426 uptodate = 1;
7427 bio_for_each_segment_all(bvec, bio, i) {
7428 ret = __readpage_endio_check(done->inode, io_bio, i,
7429 bvec->bv_page, 0,
7430 done->start, bvec->bv_len);
7431 if (!ret)
7432 clean_io_failure(done->inode, done->start,
7433 bvec->bv_page, 0);
7434 else
7435 uptodate = 0;
7436 }
7437
7438 done->uptodate = uptodate;
7439end:
7440 complete(&done->done);
7441 bio_put(bio);
7442}
7256 7443
7444static int __btrfs_subio_endio_read(struct inode *inode,
7445 struct btrfs_io_bio *io_bio, int err)
7446{
7447 struct bio_vec *bvec;
7448 struct btrfs_retry_complete done;
7449 u64 start;
7450 u64 offset = 0;
7451 int i;
7452 int ret;
7453
7454 err = 0;
7257 start = io_bio->logical; 7455 start = io_bio->logical;
7456 done.inode = inode;
7457
7258 bio_for_each_segment_all(bvec, &io_bio->bio, i) { 7458 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7259 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page, 7459 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
7260 0, start, bvec->bv_len); 7460 0, start, bvec->bv_len);
7261 if (ret) 7461 if (likely(!ret))
7262 err = -EIO; 7462 goto next;
7463try_again:
7464 done.uptodate = 0;
7465 done.start = start;
7466 init_completion(&done.done);
7467
7468 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
7469 start + bvec->bv_len - 1,
7470 io_bio->mirror_num,
7471 btrfs_retry_endio, &done);
7472 if (ret) {
7473 err = ret;
7474 goto next;
7475 }
7476
7477 wait_for_completion(&done.done);
7478
7479 if (!done.uptodate) {
7480 /* We might have another mirror, so try again */
7481 goto try_again;
7482 }
7483next:
7484 offset += bvec->bv_len;
7263 start += bvec->bv_len; 7485 start += bvec->bv_len;
7264 } 7486 }
7265 7487
7266 return err; 7488 return err;
7267} 7489}
7268 7490
7491static int btrfs_subio_endio_read(struct inode *inode,
7492 struct btrfs_io_bio *io_bio, int err)
7493{
7494 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7495
7496 if (skip_csum) {
7497 if (unlikely(err))
7498 return __btrfs_correct_data_nocsum(inode, io_bio);
7499 else
7500 return 0;
7501 } else {
7502 return __btrfs_subio_endio_read(inode, io_bio, err);
7503 }
7504}
7505
7269static void btrfs_endio_direct_read(struct bio *bio, int err) 7506static void btrfs_endio_direct_read(struct bio *bio, int err)
7270{ 7507{
7271 struct btrfs_dio_private *dip = bio->bi_private; 7508 struct btrfs_dio_private *dip = bio->bi_private;
@@ -7273,8 +7510,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
7273 struct bio *dio_bio; 7510 struct bio *dio_bio;
7274 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 7511 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7275 7512
7276 if (!err && (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)) 7513 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
7277 err = btrfs_subio_endio_read(inode, io_bio); 7514 err = btrfs_subio_endio_read(inode, io_bio, err);
7278 7515
7279 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 7516 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
7280 dip->logical_offset + dip->bytes - 1); 7517 dip->logical_offset + dip->bytes - 1);
@@ -7353,19 +7590,16 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
7353static void btrfs_end_dio_bio(struct bio *bio, int err) 7590static void btrfs_end_dio_bio(struct bio *bio, int err)
7354{ 7591{
7355 struct btrfs_dio_private *dip = bio->bi_private; 7592 struct btrfs_dio_private *dip = bio->bi_private;
7356 int ret;
7357 7593
7358 if (err) { 7594 if (err)
7359 btrfs_err(BTRFS_I(dip->inode)->root->fs_info, 7595 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
7360 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d", 7596 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
7361 btrfs_ino(dip->inode), bio->bi_rw, 7597 btrfs_ino(dip->inode), bio->bi_rw,
7362 (unsigned long long)bio->bi_iter.bi_sector, 7598 (unsigned long long)bio->bi_iter.bi_sector,
7363 bio->bi_iter.bi_size, err); 7599 bio->bi_iter.bi_size, err);
7364 } else if (dip->subio_endio) { 7600
7365 ret = dip->subio_endio(dip->inode, btrfs_io_bio(bio)); 7601 if (dip->subio_endio)
7366 if (ret) 7602 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
7367 err = ret;
7368 }
7369 7603
7370 if (err) { 7604 if (err) {
7371 dip->errors = 1; 7605 dip->errors = 1;