diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 146 |
1 files changed, 94 insertions, 52 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cda51995c1e..a8f652dc940b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "inode-map.h" | 45 | #include "inode-map.h" |
46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
48 | #include "dev-replace.h" | ||
48 | 49 | ||
49 | #ifdef CONFIG_X86 | 50 | #ifdef CONFIG_X86 |
50 | #include <asm/cpufeature.h> | 51 | #include <asm/cpufeature.h> |
@@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
387 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 388 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
388 | break; | 389 | break; |
389 | 390 | ||
390 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 391 | num_copies = btrfs_num_copies(root->fs_info, |
391 | eb->start, eb->len); | 392 | eb->start, eb->len); |
392 | if (num_copies == 1) | 393 | if (num_copies == 1) |
393 | break; | 394 | break; |
@@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
852 | int mirror_num, unsigned long bio_flags, | 853 | int mirror_num, unsigned long bio_flags, |
853 | u64 bio_offset) | 854 | u64 bio_offset) |
854 | { | 855 | { |
856 | int ret; | ||
857 | |||
855 | /* | 858 | /* |
856 | * when we're called for a write, we're already in the async | 859 | * when we're called for a write, we're already in the async |
857 | * submission context. Just jump into btrfs_map_bio | 860 | * submission context. Just jump into btrfs_map_bio |
858 | */ | 861 | */ |
859 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 862 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
863 | if (ret) | ||
864 | bio_endio(bio, ret); | ||
865 | return ret; | ||
860 | } | 866 | } |
861 | 867 | ||
862 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | 868 | static int check_async_write(struct inode *inode, unsigned long bio_flags) |
@@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
878 | int ret; | 884 | int ret; |
879 | 885 | ||
880 | if (!(rw & REQ_WRITE)) { | 886 | if (!(rw & REQ_WRITE)) { |
881 | |||
882 | /* | 887 | /* |
883 | * called for a read, do the setup so that checksum validation | 888 | * called for a read, do the setup so that checksum validation |
884 | * can happen in the async kernel threads | 889 | * can happen in the async kernel threads |
@@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
886 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 891 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
887 | bio, 1); | 892 | bio, 1); |
888 | if (ret) | 893 | if (ret) |
889 | return ret; | 894 | goto out_w_error; |
890 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 895 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
891 | mirror_num, 0); | 896 | mirror_num, 0); |
892 | } else if (!async) { | 897 | } else if (!async) { |
893 | ret = btree_csum_one_bio(bio); | 898 | ret = btree_csum_one_bio(bio); |
894 | if (ret) | 899 | if (ret) |
895 | return ret; | 900 | goto out_w_error; |
896 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 901 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
897 | mirror_num, 0); | 902 | mirror_num, 0); |
903 | } else { | ||
904 | /* | ||
905 | * kthread helpers are used to submit writes so that | ||
906 | * checksumming can happen in parallel across all CPUs | ||
907 | */ | ||
908 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
909 | inode, rw, bio, mirror_num, 0, | ||
910 | bio_offset, | ||
911 | __btree_submit_bio_start, | ||
912 | __btree_submit_bio_done); | ||
898 | } | 913 | } |
899 | 914 | ||
900 | /* | 915 | if (ret) { |
901 | * kthread helpers are used to submit writes so that checksumming | 916 | out_w_error: |
902 | * can happen in parallel across all CPUs | 917 | bio_endio(bio, ret); |
903 | */ | 918 | } |
904 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 919 | return ret; |
905 | inode, rw, bio, mirror_num, 0, | ||
906 | bio_offset, | ||
907 | __btree_submit_bio_start, | ||
908 | __btree_submit_bio_done); | ||
909 | } | 920 | } |
910 | 921 | ||
911 | #ifdef CONFIG_MIGRATION | 922 | #ifdef CONFIG_MIGRATION |
@@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
990 | 1001 | ||
991 | static int btree_set_page_dirty(struct page *page) | 1002 | static int btree_set_page_dirty(struct page *page) |
992 | { | 1003 | { |
1004 | #ifdef DEBUG | ||
993 | struct extent_buffer *eb; | 1005 | struct extent_buffer *eb; |
994 | 1006 | ||
995 | BUG_ON(!PagePrivate(page)); | 1007 | BUG_ON(!PagePrivate(page)); |
@@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page) | |||
998 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 1010 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); |
999 | BUG_ON(!atomic_read(&eb->refs)); | 1011 | BUG_ON(!atomic_read(&eb->refs)); |
1000 | btrfs_assert_tree_locked(eb); | 1012 | btrfs_assert_tree_locked(eb); |
1013 | #endif | ||
1001 | return __set_page_dirty_nobuffers(page); | 1014 | return __set_page_dirty_nobuffers(page); |
1002 | } | 1015 | } |
1003 | 1016 | ||
@@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1129 | root->fs_info->dirty_metadata_bytes); | 1142 | root->fs_info->dirty_metadata_bytes); |
1130 | } | 1143 | } |
1131 | spin_unlock(&root->fs_info->delalloc_lock); | 1144 | spin_unlock(&root->fs_info->delalloc_lock); |
1132 | } | ||
1133 | 1145 | ||
1134 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
1135 | btrfs_set_lock_blocking(buf); | 1147 | btrfs_set_lock_blocking(buf); |
1136 | clear_extent_buffer_dirty(buf); | 1148 | clear_extent_buffer_dirty(buf); |
1149 | } | ||
1137 | } | 1150 | } |
1138 | } | 1151 | } |
1139 | 1152 | ||
@@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1193 | root->root_key.objectid = objectid; | 1206 | root->root_key.objectid = objectid; |
1194 | root->anon_dev = 0; | 1207 | root->anon_dev = 0; |
1195 | 1208 | ||
1196 | spin_lock_init(&root->root_times_lock); | 1209 | spin_lock_init(&root->root_item_lock); |
1197 | } | 1210 | } |
1198 | 1211 | ||
1199 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | 1212 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
@@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb, | |||
2131 | init_rwsem(&fs_info->extent_commit_sem); | 2144 | init_rwsem(&fs_info->extent_commit_sem); |
2132 | init_rwsem(&fs_info->cleanup_work_sem); | 2145 | init_rwsem(&fs_info->cleanup_work_sem); |
2133 | init_rwsem(&fs_info->subvol_sem); | 2146 | init_rwsem(&fs_info->subvol_sem); |
2147 | fs_info->dev_replace.lock_owner = 0; | ||
2148 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
2149 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
2150 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
2151 | mutex_init(&fs_info->dev_replace.lock); | ||
2134 | 2152 | ||
2135 | spin_lock_init(&fs_info->qgroup_lock); | 2153 | spin_lock_init(&fs_info->qgroup_lock); |
2136 | fs_info->qgroup_tree = RB_ROOT; | 2154 | fs_info->qgroup_tree = RB_ROOT; |
@@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb, | |||
2279 | fs_info->thread_pool_size, | 2297 | fs_info->thread_pool_size, |
2280 | &fs_info->generic_worker); | 2298 | &fs_info->generic_worker); |
2281 | 2299 | ||
2300 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | ||
2301 | fs_info->thread_pool_size, | ||
2302 | &fs_info->generic_worker); | ||
2303 | |||
2282 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2304 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
2283 | min_t(u64, fs_devices->num_devices, | 2305 | min_t(u64, fs_devices->num_devices, |
2284 | fs_info->thread_pool_size), | 2306 | fs_info->thread_pool_size), |
@@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb, | |||
2350 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2372 | ret |= btrfs_start_workers(&fs_info->delayed_workers); |
2351 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2373 | ret |= btrfs_start_workers(&fs_info->caching_workers); |
2352 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2374 | ret |= btrfs_start_workers(&fs_info->readahead_workers); |
2375 | ret |= btrfs_start_workers(&fs_info->flush_workers); | ||
2353 | if (ret) { | 2376 | if (ret) { |
2354 | err = -ENOMEM; | 2377 | err = -ENOMEM; |
2355 | goto fail_sb_buffer; | 2378 | goto fail_sb_buffer; |
@@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb, | |||
2418 | goto fail_tree_roots; | 2441 | goto fail_tree_roots; |
2419 | } | 2442 | } |
2420 | 2443 | ||
2421 | btrfs_close_extra_devices(fs_devices); | 2444 | /* |
2445 | * keep the device that is marked to be the target device for the | ||
2446 | * dev_replace procedure | ||
2447 | */ | ||
2448 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | ||
2422 | 2449 | ||
2423 | if (!fs_devices->latest_bdev) { | 2450 | if (!fs_devices->latest_bdev) { |
2424 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", | 2451 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", |
@@ -2490,6 +2517,14 @@ retry_root_backup: | |||
2490 | goto fail_block_groups; | 2517 | goto fail_block_groups; |
2491 | } | 2518 | } |
2492 | 2519 | ||
2520 | ret = btrfs_init_dev_replace(fs_info); | ||
2521 | if (ret) { | ||
2522 | pr_err("btrfs: failed to init dev_replace: %d\n", ret); | ||
2523 | goto fail_block_groups; | ||
2524 | } | ||
2525 | |||
2526 | btrfs_close_extra_devices(fs_info, fs_devices, 1); | ||
2527 | |||
2493 | ret = btrfs_init_space_info(fs_info); | 2528 | ret = btrfs_init_space_info(fs_info); |
2494 | if (ret) { | 2529 | if (ret) { |
2495 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2530 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
@@ -2503,6 +2538,13 @@ retry_root_backup: | |||
2503 | } | 2538 | } |
2504 | fs_info->num_tolerated_disk_barrier_failures = | 2539 | fs_info->num_tolerated_disk_barrier_failures = |
2505 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | 2540 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); |
2541 | if (fs_info->fs_devices->missing_devices > | ||
2542 | fs_info->num_tolerated_disk_barrier_failures && | ||
2543 | !(sb->s_flags & MS_RDONLY)) { | ||
2544 | printk(KERN_WARNING | ||
2545 | "Btrfs: too many missing devices, writeable mount is not allowed\n"); | ||
2546 | goto fail_block_groups; | ||
2547 | } | ||
2506 | 2548 | ||
2507 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2549 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
2508 | "btrfs-cleaner"); | 2550 | "btrfs-cleaner"); |
@@ -2631,6 +2673,13 @@ retry_root_backup: | |||
2631 | return ret; | 2673 | return ret; |
2632 | } | 2674 | } |
2633 | 2675 | ||
2676 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
2677 | if (ret) { | ||
2678 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
2679 | close_ctree(tree_root); | ||
2680 | return ret; | ||
2681 | } | ||
2682 | |||
2634 | return 0; | 2683 | return 0; |
2635 | 2684 | ||
2636 | fail_qgroup: | 2685 | fail_qgroup: |
@@ -2667,6 +2716,7 @@ fail_sb_buffer: | |||
2667 | btrfs_stop_workers(&fs_info->submit_workers); | 2716 | btrfs_stop_workers(&fs_info->submit_workers); |
2668 | btrfs_stop_workers(&fs_info->delayed_workers); | 2717 | btrfs_stop_workers(&fs_info->delayed_workers); |
2669 | btrfs_stop_workers(&fs_info->caching_workers); | 2718 | btrfs_stop_workers(&fs_info->caching_workers); |
2719 | btrfs_stop_workers(&fs_info->flush_workers); | ||
2670 | fail_alloc: | 2720 | fail_alloc: |
2671 | fail_iput: | 2721 | fail_iput: |
2672 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root) | |||
3270 | smp_mb(); | 3320 | smp_mb(); |
3271 | 3321 | ||
3272 | /* pause restriper - we want to resume on mount */ | 3322 | /* pause restriper - we want to resume on mount */ |
3273 | btrfs_pause_balance(root->fs_info); | 3323 | btrfs_pause_balance(fs_info); |
3324 | |||
3325 | btrfs_dev_replace_suspend_for_unmount(fs_info); | ||
3274 | 3326 | ||
3275 | btrfs_scrub_cancel(root); | 3327 | btrfs_scrub_cancel(fs_info); |
3276 | 3328 | ||
3277 | /* wait for any defraggers to finish */ | 3329 | /* wait for any defraggers to finish */ |
3278 | wait_event(fs_info->transaction_wait, | 3330 | wait_event(fs_info->transaction_wait, |
3279 | (atomic_read(&fs_info->defrag_running) == 0)); | 3331 | (atomic_read(&fs_info->defrag_running) == 0)); |
3280 | 3332 | ||
3281 | /* clear out the rbtree of defraggable inodes */ | 3333 | /* clear out the rbtree of defraggable inodes */ |
3282 | btrfs_run_defrag_inodes(fs_info); | 3334 | btrfs_cleanup_defrag_inodes(fs_info); |
3283 | 3335 | ||
3284 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3336 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
3285 | ret = btrfs_commit_super(root); | 3337 | ret = btrfs_commit_super(root); |
@@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root) | |||
3339 | btrfs_stop_workers(&fs_info->delayed_workers); | 3391 | btrfs_stop_workers(&fs_info->delayed_workers); |
3340 | btrfs_stop_workers(&fs_info->caching_workers); | 3392 | btrfs_stop_workers(&fs_info->caching_workers); |
3341 | btrfs_stop_workers(&fs_info->readahead_workers); | 3393 | btrfs_stop_workers(&fs_info->readahead_workers); |
3394 | btrfs_stop_workers(&fs_info->flush_workers); | ||
3342 | 3395 | ||
3343 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 3396 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
3344 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) | 3397 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) |
@@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3383 | int was_dirty; | 3436 | int was_dirty; |
3384 | 3437 | ||
3385 | btrfs_assert_tree_locked(buf); | 3438 | btrfs_assert_tree_locked(buf); |
3386 | if (transid != root->fs_info->generation) { | 3439 | if (transid != root->fs_info->generation) |
3387 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 3440 | WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " |
3388 | "found %llu running %llu\n", | 3441 | "found %llu running %llu\n", |
3389 | (unsigned long long)buf->start, | 3442 | (unsigned long long)buf->start, |
3390 | (unsigned long long)transid, | 3443 | (unsigned long long)transid, |
3391 | (unsigned long long)root->fs_info->generation); | 3444 | (unsigned long long)root->fs_info->generation); |
3392 | WARN_ON(1); | ||
3393 | } | ||
3394 | was_dirty = set_extent_buffer_dirty(buf); | 3445 | was_dirty = set_extent_buffer_dirty(buf); |
3395 | if (!was_dirty) { | 3446 | if (!was_dirty) { |
3396 | spin_lock(&root->fs_info->delalloc_lock); | 3447 | spin_lock(&root->fs_info->delalloc_lock); |
@@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3399 | } | 3450 | } |
3400 | } | 3451 | } |
3401 | 3452 | ||
3402 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
3454 | int flush_delayed) | ||
3403 | { | 3455 | { |
3404 | /* | 3456 | /* |
3405 | * looks as though older kernels can get into trouble with | 3457 | * looks as though older kernels can get into trouble with |
@@ -3411,36 +3463,26 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
3411 | if (current->flags & PF_MEMALLOC) | 3463 | if (current->flags & PF_MEMALLOC) |
3412 | return; | 3464 | return; |
3413 | 3465 | ||
3414 | btrfs_balance_delayed_items(root); | 3466 | if (flush_delayed) |
3467 | btrfs_balance_delayed_items(root); | ||
3415 | 3468 | ||
3416 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3469 | num_dirty = root->fs_info->dirty_metadata_bytes; |
3417 | 3470 | ||
3418 | if (num_dirty > thresh) { | 3471 | if (num_dirty > thresh) { |
3419 | balance_dirty_pages_ratelimited_nr( | 3472 | balance_dirty_pages_ratelimited( |
3420 | root->fs_info->btree_inode->i_mapping, 1); | 3473 | root->fs_info->btree_inode->i_mapping); |
3421 | } | 3474 | } |
3422 | return; | 3475 | return; |
3423 | } | 3476 | } |
3424 | 3477 | ||
3425 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3478 | void btrfs_btree_balance_dirty(struct btrfs_root *root) |
3426 | { | 3479 | { |
3427 | /* | 3480 | __btrfs_btree_balance_dirty(root, 1); |
3428 | * looks as though older kernels can get into trouble with | 3481 | } |
3429 | * this code, they end up stuck in balance_dirty_pages forever | ||
3430 | */ | ||
3431 | u64 num_dirty; | ||
3432 | unsigned long thresh = 32 * 1024 * 1024; | ||
3433 | |||
3434 | if (current->flags & PF_MEMALLOC) | ||
3435 | return; | ||
3436 | |||
3437 | num_dirty = root->fs_info->dirty_metadata_bytes; | ||
3438 | 3482 | ||
3439 | if (num_dirty > thresh) { | 3483 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) |
3440 | balance_dirty_pages_ratelimited_nr( | 3484 | { |
3441 | root->fs_info->btree_inode->i_mapping, 1); | 3485 | __btrfs_btree_balance_dirty(root, 0); |
3442 | } | ||
3443 | return; | ||
3444 | } | 3486 | } |
3445 | 3487 | ||
3446 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | 3488 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |