diff options
author | Josef Bacik <josef@redhat.com> | 2012-03-26 21:57:36 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2012-03-26 21:57:36 -0400 |
commit | ea466794084f55d8fcc100711cf17923bf57e962 (patch) | |
tree | 9905d556655ff0f036936ea51f9aa214bd2cbce8 /fs | |
parent | f3f266ab1bfe4770375d24fa8e72a03278e9450a (diff) |
Btrfs: deal with read errors on extent buffers differently
Since we need to read and write extent buffers in their entirety we can't use
the normal bio_readpage_error stuff since it only works on a per page basis. So
instead make it so that if we see an io error in endio we just mark the eb as
having an IO error and then in btree_read_extent_buffer_pages we will manually
try other mirrors and then overwrite the bad mirror if we find a good copy.
This works with larger than page size blocks. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/disk-io.c | 43 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 42 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 8 |
3 files changed, 66 insertions, 27 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 53c5ea702799..6107b6958413 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -360,9 +360,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
360 | u64 start, u64 parent_transid) | 360 | u64 start, u64 parent_transid) |
361 | { | 361 | { |
362 | struct extent_io_tree *io_tree; | 362 | struct extent_io_tree *io_tree; |
363 | int failed = 0; | ||
363 | int ret; | 364 | int ret; |
364 | int num_copies = 0; | 365 | int num_copies = 0; |
365 | int mirror_num = 0; | 366 | int mirror_num = 0; |
367 | int failed_mirror = 0; | ||
366 | 368 | ||
367 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | 369 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); |
368 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 370 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
@@ -371,7 +373,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
371 | WAIT_COMPLETE, | 373 | WAIT_COMPLETE, |
372 | btree_get_extent, mirror_num); | 374 | btree_get_extent, mirror_num); |
373 | if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) | 375 | if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) |
374 | return ret; | 376 | break; |
375 | 377 | ||
376 | /* | 378 | /* |
377 | * This buffer's crc is fine, but its contents are corrupted, so | 379 | * This buffer's crc is fine, but its contents are corrupted, so |
@@ -379,18 +381,31 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
379 | * any less wrong. | 381 | * any less wrong. |
380 | */ | 382 | */ |
381 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 383 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
382 | return ret; | 384 | break; |
385 | |||
386 | if (!failed_mirror) { | ||
387 | failed = 1; | ||
388 | printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror); | ||
389 | failed_mirror = eb->failed_mirror; | ||
390 | } | ||
383 | 391 | ||
384 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 392 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
385 | eb->start, eb->len); | 393 | eb->start, eb->len); |
386 | if (num_copies == 1) | 394 | if (num_copies == 1) |
387 | return ret; | 395 | break; |
388 | 396 | ||
389 | mirror_num++; | 397 | mirror_num++; |
398 | if (mirror_num == failed_mirror) | ||
399 | mirror_num++; | ||
400 | |||
390 | if (mirror_num > num_copies) | 401 | if (mirror_num > num_copies) |
391 | return ret; | 402 | break; |
392 | } | 403 | } |
393 | return -EIO; | 404 | |
405 | if (failed && !ret) | ||
406 | repair_eb_io_failure(root, eb, failed_mirror); | ||
407 | |||
408 | return ret; | ||
394 | } | 409 | } |
395 | 410 | ||
396 | /* | 411 | /* |
@@ -575,6 +590,11 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
575 | if (!reads_done) | 590 | if (!reads_done) |
576 | goto err; | 591 | goto err; |
577 | 592 | ||
593 | if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { | ||
594 | ret = -EIO; | ||
595 | goto err; | ||
596 | } | ||
597 | |||
578 | found_start = btrfs_header_bytenr(eb); | 598 | found_start = btrfs_header_bytenr(eb); |
579 | if (found_start != eb->start) { | 599 | if (found_start != eb->start) { |
580 | printk_ratelimited(KERN_INFO "btrfs bad tree block start " | 600 | printk_ratelimited(KERN_INFO "btrfs bad tree block start " |
@@ -626,21 +646,16 @@ out: | |||
626 | return ret; | 646 | return ret; |
627 | } | 647 | } |
628 | 648 | ||
629 | static int btree_io_failed_hook(struct bio *failed_bio, | 649 | static int btree_io_failed_hook(struct page *page, int failed_mirror) |
630 | struct page *page, u64 start, u64 end, | ||
631 | int mirror_num, struct extent_state *state) | ||
632 | { | 650 | { |
633 | struct extent_buffer *eb; | 651 | struct extent_buffer *eb; |
634 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 652 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
635 | 653 | ||
636 | eb = (struct extent_buffer *)page->private; | 654 | eb = (struct extent_buffer *)page->private; |
637 | if (page != eb->pages[0]) | 655 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
638 | return -EIO; | 656 | eb->failed_mirror = failed_mirror; |
639 | 657 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) | |
640 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
641 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
642 | btree_readahead_hook(root, eb, eb->start, -EIO); | 658 | btree_readahead_hook(root, eb, eb->start, -EIO); |
643 | } | ||
644 | return -EIO; /* we fixed nothing */ | 659 | return -EIO; /* we fixed nothing */ |
645 | } | 660 | } |
646 | 661 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b71cc4547d47..49a368593a16 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1915,6 +1915,26 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
1915 | return 0; | 1915 | return 0; |
1916 | } | 1916 | } |
1917 | 1917 | ||
1918 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | ||
1919 | int mirror_num) | ||
1920 | { | ||
1921 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
1922 | u64 start = eb->start; | ||
1923 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); | ||
1924 | int ret; | ||
1925 | |||
1926 | for (i = 0; i < num_pages; i++) { | ||
1927 | struct page *p = extent_buffer_page(eb, i); | ||
1928 | ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE, | ||
1929 | start, p, mirror_num); | ||
1930 | if (ret) | ||
1931 | break; | ||
1932 | start += PAGE_CACHE_SIZE; | ||
1933 | } | ||
1934 | |||
1935 | return ret; | ||
1936 | } | ||
1937 | |||
1918 | /* | 1938 | /* |
1919 | * each time an IO finishes, we do a fast check in the IO failure tree | 1939 | * each time an IO finishes, we do a fast check in the IO failure tree |
1920 | * to see if we need to process or clean up an io_failure_record | 1940 | * to see if we need to process or clean up an io_failure_record |
@@ -2261,6 +2281,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2261 | u64 start; | 2281 | u64 start; |
2262 | u64 end; | 2282 | u64 end; |
2263 | int whole_page; | 2283 | int whole_page; |
2284 | int failed_mirror; | ||
2264 | int ret; | 2285 | int ret; |
2265 | 2286 | ||
2266 | if (err) | 2287 | if (err) |
@@ -2307,9 +2328,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2307 | else | 2328 | else |
2308 | clean_io_failure(start, page); | 2329 | clean_io_failure(start, page); |
2309 | } | 2330 | } |
2310 | if (!uptodate) { | 2331 | |
2311 | int failed_mirror; | 2332 | if (!uptodate) |
2312 | failed_mirror = (int)(unsigned long)bio->bi_bdev; | 2333 | failed_mirror = (int)(unsigned long)bio->bi_bdev; |
2334 | |||
2335 | if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { | ||
2336 | ret = tree->ops->readpage_io_failed_hook(page, failed_mirror); | ||
2337 | if (!ret && !err && | ||
2338 | test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
2339 | uptodate = 1; | ||
2340 | } else if (!uptodate) { | ||
2313 | /* | 2341 | /* |
2314 | * The generic bio_readpage_error handles errors the | 2342 | * The generic bio_readpage_error handles errors the |
2315 | * following way: If possible, new read requests are | 2343 | * following way: If possible, new read requests are |
@@ -2323,7 +2351,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2323 | ret = bio_readpage_error(bio, page, start, end, | 2351 | ret = bio_readpage_error(bio, page, start, end, |
2324 | failed_mirror, NULL); | 2352 | failed_mirror, NULL); |
2325 | if (ret == 0) { | 2353 | if (ret == 0) { |
2326 | error_handled: | ||
2327 | uptodate = | 2354 | uptodate = |
2328 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 2355 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
2329 | if (err) | 2356 | if (err) |
@@ -2331,13 +2358,6 @@ error_handled: | |||
2331 | uncache_state(&cached); | 2358 | uncache_state(&cached); |
2332 | continue; | 2359 | continue; |
2333 | } | 2360 | } |
2334 | if (tree->ops && tree->ops->readpage_io_failed_hook) { | ||
2335 | ret = tree->ops->readpage_io_failed_hook( | ||
2336 | bio, page, start, end, | ||
2337 | failed_mirror, state); | ||
2338 | if (ret == 0) | ||
2339 | goto error_handled; | ||
2340 | } | ||
2341 | } | 2361 | } |
2342 | 2362 | ||
2343 | if (uptodate && tree->track_uptodate) { | 2363 | if (uptodate && tree->track_uptodate) { |
@@ -4396,6 +4416,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
4396 | goto unlock_exit; | 4416 | goto unlock_exit; |
4397 | } | 4417 | } |
4398 | 4418 | ||
4419 | clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | ||
4420 | eb->failed_mirror = 0; | ||
4399 | atomic_set(&eb->io_pages, num_reads); | 4421 | atomic_set(&eb->io_pages, num_reads); |
4400 | for (i = start_i; i < num_pages; i++) { | 4422 | for (i = start_i; i < num_pages; i++) { |
4401 | page = extent_buffer_page(eb, i); | 4423 | page = extent_buffer_page(eb, i); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 489d7945154f..38c1af7092f3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -58,6 +58,7 @@ | |||
58 | #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 | 58 | #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 |
59 | 59 | ||
60 | struct extent_state; | 60 | struct extent_state; |
61 | struct btrfs_root; | ||
61 | 62 | ||
62 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 63 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
63 | struct bio *bio, int mirror_num, | 64 | struct bio *bio, int mirror_num, |
@@ -73,9 +74,7 @@ struct extent_io_ops { | |||
73 | size_t size, struct bio *bio, | 74 | size_t size, struct bio *bio, |
74 | unsigned long bio_flags); | 75 | unsigned long bio_flags); |
75 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 76 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
76 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, | 77 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
77 | u64 start, u64 end, int failed_mirror, | ||
78 | struct extent_state *state); | ||
79 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | 78 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, |
80 | u64 start, u64 end, | 79 | u64 start, u64 end, |
81 | struct extent_state *state); | 80 | struct extent_state *state); |
@@ -136,6 +135,7 @@ struct extent_buffer { | |||
136 | spinlock_t refs_lock; | 135 | spinlock_t refs_lock; |
137 | atomic_t refs; | 136 | atomic_t refs; |
138 | atomic_t io_pages; | 137 | atomic_t io_pages; |
138 | int failed_mirror; | ||
139 | struct list_head leak_list; | 139 | struct list_head leak_list; |
140 | struct rcu_head rcu_head; | 140 | struct rcu_head rcu_head; |
141 | pid_t lock_owner; | 141 | pid_t lock_owner; |
@@ -327,4 +327,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
327 | u64 length, u64 logical, struct page *page, | 327 | u64 length, u64 logical, struct page *page, |
328 | int mirror_num); | 328 | int mirror_num); |
329 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 329 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
330 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | ||
331 | int mirror_num); | ||
330 | #endif | 332 | #endif |