diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
| -rw-r--r-- | fs/btrfs/disk-io.c | 142 |
1 files changed, 92 insertions, 50 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 22a0439e5a86..a8f652dc940b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include "inode-map.h" | 45 | #include "inode-map.h" |
| 46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
| 47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
| 48 | #include "dev-replace.h" | ||
| 48 | 49 | ||
| 49 | #ifdef CONFIG_X86 | 50 | #ifdef CONFIG_X86 |
| 50 | #include <asm/cpufeature.h> | 51 | #include <asm/cpufeature.h> |
| @@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
| 387 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 388 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
| 388 | break; | 389 | break; |
| 389 | 390 | ||
| 390 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 391 | num_copies = btrfs_num_copies(root->fs_info, |
| 391 | eb->start, eb->len); | 392 | eb->start, eb->len); |
| 392 | if (num_copies == 1) | 393 | if (num_copies == 1) |
| 393 | break; | 394 | break; |
| @@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 852 | int mirror_num, unsigned long bio_flags, | 853 | int mirror_num, unsigned long bio_flags, |
| 853 | u64 bio_offset) | 854 | u64 bio_offset) |
| 854 | { | 855 | { |
| 856 | int ret; | ||
| 857 | |||
| 855 | /* | 858 | /* |
| 856 | * when we're called for a write, we're already in the async | 859 | * when we're called for a write, we're already in the async |
| 857 | * submission context. Just jump into btrfs_map_bio | 860 | * submission context. Just jump into btrfs_map_bio |
| 858 | */ | 861 | */ |
| 859 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 862 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
| 863 | if (ret) | ||
| 864 | bio_endio(bio, ret); | ||
| 865 | return ret; | ||
| 860 | } | 866 | } |
| 861 | 867 | ||
| 862 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | 868 | static int check_async_write(struct inode *inode, unsigned long bio_flags) |
| @@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 878 | int ret; | 884 | int ret; |
| 879 | 885 | ||
| 880 | if (!(rw & REQ_WRITE)) { | 886 | if (!(rw & REQ_WRITE)) { |
| 881 | |||
| 882 | /* | 887 | /* |
| 883 | * called for a read, do the setup so that checksum validation | 888 | * called for a read, do the setup so that checksum validation |
| 884 | * can happen in the async kernel threads | 889 | * can happen in the async kernel threads |
| @@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 886 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 891 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
| 887 | bio, 1); | 892 | bio, 1); |
| 888 | if (ret) | 893 | if (ret) |
| 889 | return ret; | 894 | goto out_w_error; |
| 890 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 895 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
| 891 | mirror_num, 0); | 896 | mirror_num, 0); |
| 892 | } else if (!async) { | 897 | } else if (!async) { |
| 893 | ret = btree_csum_one_bio(bio); | 898 | ret = btree_csum_one_bio(bio); |
| 894 | if (ret) | 899 | if (ret) |
| 895 | return ret; | 900 | goto out_w_error; |
| 896 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 901 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
| 897 | mirror_num, 0); | 902 | mirror_num, 0); |
| 903 | } else { | ||
| 904 | /* | ||
| 905 | * kthread helpers are used to submit writes so that | ||
| 906 | * checksumming can happen in parallel across all CPUs | ||
| 907 | */ | ||
| 908 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 909 | inode, rw, bio, mirror_num, 0, | ||
| 910 | bio_offset, | ||
| 911 | __btree_submit_bio_start, | ||
| 912 | __btree_submit_bio_done); | ||
| 898 | } | 913 | } |
| 899 | 914 | ||
| 900 | /* | 915 | if (ret) { |
| 901 | * kthread helpers are used to submit writes so that checksumming | 916 | out_w_error: |
| 902 | * can happen in parallel across all CPUs | 917 | bio_endio(bio, ret); |
| 903 | */ | 918 | } |
| 904 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 919 | return ret; |
| 905 | inode, rw, bio, mirror_num, 0, | ||
| 906 | bio_offset, | ||
| 907 | __btree_submit_bio_start, | ||
| 908 | __btree_submit_bio_done); | ||
| 909 | } | 920 | } |
| 910 | 921 | ||
| 911 | #ifdef CONFIG_MIGRATION | 922 | #ifdef CONFIG_MIGRATION |
| @@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
| 990 | 1001 | ||
| 991 | static int btree_set_page_dirty(struct page *page) | 1002 | static int btree_set_page_dirty(struct page *page) |
| 992 | { | 1003 | { |
| 1004 | #ifdef DEBUG | ||
| 993 | struct extent_buffer *eb; | 1005 | struct extent_buffer *eb; |
| 994 | 1006 | ||
| 995 | BUG_ON(!PagePrivate(page)); | 1007 | BUG_ON(!PagePrivate(page)); |
| @@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page) | |||
| 998 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 1010 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); |
| 999 | BUG_ON(!atomic_read(&eb->refs)); | 1011 | BUG_ON(!atomic_read(&eb->refs)); |
| 1000 | btrfs_assert_tree_locked(eb); | 1012 | btrfs_assert_tree_locked(eb); |
| 1013 | #endif | ||
| 1001 | return __set_page_dirty_nobuffers(page); | 1014 | return __set_page_dirty_nobuffers(page); |
| 1002 | } | 1015 | } |
| 1003 | 1016 | ||
| @@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 1129 | root->fs_info->dirty_metadata_bytes); | 1142 | root->fs_info->dirty_metadata_bytes); |
| 1130 | } | 1143 | } |
| 1131 | spin_unlock(&root->fs_info->delalloc_lock); | 1144 | spin_unlock(&root->fs_info->delalloc_lock); |
| 1132 | } | ||
| 1133 | 1145 | ||
| 1134 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
| 1135 | btrfs_set_lock_blocking(buf); | 1147 | btrfs_set_lock_blocking(buf); |
| 1136 | clear_extent_buffer_dirty(buf); | 1148 | clear_extent_buffer_dirty(buf); |
| 1149 | } | ||
| 1137 | } | 1150 | } |
| 1138 | } | 1151 | } |
| 1139 | 1152 | ||
| @@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1193 | root->root_key.objectid = objectid; | 1206 | root->root_key.objectid = objectid; |
| 1194 | root->anon_dev = 0; | 1207 | root->anon_dev = 0; |
| 1195 | 1208 | ||
| 1196 | spin_lock_init(&root->root_times_lock); | 1209 | spin_lock_init(&root->root_item_lock); |
| 1197 | } | 1210 | } |
| 1198 | 1211 | ||
| 1199 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | 1212 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
| @@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb, | |||
| 2131 | init_rwsem(&fs_info->extent_commit_sem); | 2144 | init_rwsem(&fs_info->extent_commit_sem); |
| 2132 | init_rwsem(&fs_info->cleanup_work_sem); | 2145 | init_rwsem(&fs_info->cleanup_work_sem); |
| 2133 | init_rwsem(&fs_info->subvol_sem); | 2146 | init_rwsem(&fs_info->subvol_sem); |
| 2147 | fs_info->dev_replace.lock_owner = 0; | ||
| 2148 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
| 2149 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
| 2150 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
| 2151 | mutex_init(&fs_info->dev_replace.lock); | ||
| 2134 | 2152 | ||
| 2135 | spin_lock_init(&fs_info->qgroup_lock); | 2153 | spin_lock_init(&fs_info->qgroup_lock); |
| 2136 | fs_info->qgroup_tree = RB_ROOT; | 2154 | fs_info->qgroup_tree = RB_ROOT; |
| @@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb, | |||
| 2279 | fs_info->thread_pool_size, | 2297 | fs_info->thread_pool_size, |
| 2280 | &fs_info->generic_worker); | 2298 | &fs_info->generic_worker); |
| 2281 | 2299 | ||
| 2300 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | ||
| 2301 | fs_info->thread_pool_size, | ||
| 2302 | &fs_info->generic_worker); | ||
| 2303 | |||
| 2282 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2304 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
| 2283 | min_t(u64, fs_devices->num_devices, | 2305 | min_t(u64, fs_devices->num_devices, |
| 2284 | fs_info->thread_pool_size), | 2306 | fs_info->thread_pool_size), |
| @@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb, | |||
| 2350 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2372 | ret |= btrfs_start_workers(&fs_info->delayed_workers); |
| 2351 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2373 | ret |= btrfs_start_workers(&fs_info->caching_workers); |
| 2352 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2374 | ret |= btrfs_start_workers(&fs_info->readahead_workers); |
| 2375 | ret |= btrfs_start_workers(&fs_info->flush_workers); | ||
| 2353 | if (ret) { | 2376 | if (ret) { |
| 2354 | err = -ENOMEM; | 2377 | err = -ENOMEM; |
| 2355 | goto fail_sb_buffer; | 2378 | goto fail_sb_buffer; |
| @@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb, | |||
| 2418 | goto fail_tree_roots; | 2441 | goto fail_tree_roots; |
| 2419 | } | 2442 | } |
| 2420 | 2443 | ||
| 2421 | btrfs_close_extra_devices(fs_devices); | 2444 | /* |
| 2445 | * keep the device that is marked to be the target device for the | ||
| 2446 | * dev_replace procedure | ||
| 2447 | */ | ||
| 2448 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | ||
| 2422 | 2449 | ||
| 2423 | if (!fs_devices->latest_bdev) { | 2450 | if (!fs_devices->latest_bdev) { |
| 2424 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", | 2451 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", |
| @@ -2490,6 +2517,14 @@ retry_root_backup: | |||
| 2490 | goto fail_block_groups; | 2517 | goto fail_block_groups; |
| 2491 | } | 2518 | } |
| 2492 | 2519 | ||
| 2520 | ret = btrfs_init_dev_replace(fs_info); | ||
| 2521 | if (ret) { | ||
| 2522 | pr_err("btrfs: failed to init dev_replace: %d\n", ret); | ||
| 2523 | goto fail_block_groups; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | btrfs_close_extra_devices(fs_info, fs_devices, 1); | ||
| 2527 | |||
| 2493 | ret = btrfs_init_space_info(fs_info); | 2528 | ret = btrfs_init_space_info(fs_info); |
| 2494 | if (ret) { | 2529 | if (ret) { |
| 2495 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2530 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
| @@ -2503,6 +2538,13 @@ retry_root_backup: | |||
| 2503 | } | 2538 | } |
| 2504 | fs_info->num_tolerated_disk_barrier_failures = | 2539 | fs_info->num_tolerated_disk_barrier_failures = |
| 2505 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | 2540 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); |
| 2541 | if (fs_info->fs_devices->missing_devices > | ||
| 2542 | fs_info->num_tolerated_disk_barrier_failures && | ||
| 2543 | !(sb->s_flags & MS_RDONLY)) { | ||
| 2544 | printk(KERN_WARNING | ||
| 2545 | "Btrfs: too many missing devices, writeable mount is not allowed\n"); | ||
| 2546 | goto fail_block_groups; | ||
| 2547 | } | ||
| 2506 | 2548 | ||
| 2507 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2549 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
| 2508 | "btrfs-cleaner"); | 2550 | "btrfs-cleaner"); |
| @@ -2631,6 +2673,13 @@ retry_root_backup: | |||
| 2631 | return ret; | 2673 | return ret; |
| 2632 | } | 2674 | } |
| 2633 | 2675 | ||
| 2676 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
| 2677 | if (ret) { | ||
| 2678 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
| 2679 | close_ctree(tree_root); | ||
| 2680 | return ret; | ||
| 2681 | } | ||
| 2682 | |||
| 2634 | return 0; | 2683 | return 0; |
| 2635 | 2684 | ||
| 2636 | fail_qgroup: | 2685 | fail_qgroup: |
| @@ -2667,6 +2716,7 @@ fail_sb_buffer: | |||
| 2667 | btrfs_stop_workers(&fs_info->submit_workers); | 2716 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2668 | btrfs_stop_workers(&fs_info->delayed_workers); | 2717 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 2669 | btrfs_stop_workers(&fs_info->caching_workers); | 2718 | btrfs_stop_workers(&fs_info->caching_workers); |
| 2719 | btrfs_stop_workers(&fs_info->flush_workers); | ||
| 2670 | fail_alloc: | 2720 | fail_alloc: |
| 2671 | fail_iput: | 2721 | fail_iput: |
| 2672 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| @@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root) | |||
| 3270 | smp_mb(); | 3320 | smp_mb(); |
| 3271 | 3321 | ||
| 3272 | /* pause restriper - we want to resume on mount */ | 3322 | /* pause restriper - we want to resume on mount */ |
| 3273 | btrfs_pause_balance(root->fs_info); | 3323 | btrfs_pause_balance(fs_info); |
| 3274 | 3324 | ||
| 3275 | btrfs_scrub_cancel(root); | 3325 | btrfs_dev_replace_suspend_for_unmount(fs_info); |
| 3326 | |||
| 3327 | btrfs_scrub_cancel(fs_info); | ||
| 3276 | 3328 | ||
| 3277 | /* wait for any defraggers to finish */ | 3329 | /* wait for any defraggers to finish */ |
| 3278 | wait_event(fs_info->transaction_wait, | 3330 | wait_event(fs_info->transaction_wait, |
| 3279 | (atomic_read(&fs_info->defrag_running) == 0)); | 3331 | (atomic_read(&fs_info->defrag_running) == 0)); |
| 3280 | 3332 | ||
| 3281 | /* clear out the rbtree of defraggable inodes */ | 3333 | /* clear out the rbtree of defraggable inodes */ |
| 3282 | btrfs_run_defrag_inodes(fs_info); | 3334 | btrfs_cleanup_defrag_inodes(fs_info); |
| 3283 | 3335 | ||
| 3284 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3336 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 3285 | ret = btrfs_commit_super(root); | 3337 | ret = btrfs_commit_super(root); |
| @@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 3339 | btrfs_stop_workers(&fs_info->delayed_workers); | 3391 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 3340 | btrfs_stop_workers(&fs_info->caching_workers); | 3392 | btrfs_stop_workers(&fs_info->caching_workers); |
| 3341 | btrfs_stop_workers(&fs_info->readahead_workers); | 3393 | btrfs_stop_workers(&fs_info->readahead_workers); |
| 3394 | btrfs_stop_workers(&fs_info->flush_workers); | ||
| 3342 | 3395 | ||
| 3343 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 3396 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 3344 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) | 3397 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) |
| @@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3383 | int was_dirty; | 3436 | int was_dirty; |
| 3384 | 3437 | ||
| 3385 | btrfs_assert_tree_locked(buf); | 3438 | btrfs_assert_tree_locked(buf); |
| 3386 | if (transid != root->fs_info->generation) { | 3439 | if (transid != root->fs_info->generation) |
| 3387 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 3440 | WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " |
| 3388 | "found %llu running %llu\n", | 3441 | "found %llu running %llu\n", |
| 3389 | (unsigned long long)buf->start, | 3442 | (unsigned long long)buf->start, |
| 3390 | (unsigned long long)transid, | 3443 | (unsigned long long)transid, |
| 3391 | (unsigned long long)root->fs_info->generation); | 3444 | (unsigned long long)root->fs_info->generation); |
| 3392 | WARN_ON(1); | ||
| 3393 | } | ||
| 3394 | was_dirty = set_extent_buffer_dirty(buf); | 3445 | was_dirty = set_extent_buffer_dirty(buf); |
| 3395 | if (!was_dirty) { | 3446 | if (!was_dirty) { |
| 3396 | spin_lock(&root->fs_info->delalloc_lock); | 3447 | spin_lock(&root->fs_info->delalloc_lock); |
| @@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3399 | } | 3450 | } |
| 3400 | } | 3451 | } |
| 3401 | 3452 | ||
| 3402 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
| 3454 | int flush_delayed) | ||
| 3403 | { | 3455 | { |
| 3404 | /* | 3456 | /* |
| 3405 | * looks as though older kernels can get into trouble with | 3457 | * looks as though older kernels can get into trouble with |
| @@ -3411,7 +3463,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
| 3411 | if (current->flags & PF_MEMALLOC) | 3463 | if (current->flags & PF_MEMALLOC) |
| 3412 | return; | 3464 | return; |
| 3413 | 3465 | ||
| 3414 | btrfs_balance_delayed_items(root); | 3466 | if (flush_delayed) |
| 3467 | btrfs_balance_delayed_items(root); | ||
| 3415 | 3468 | ||
| 3416 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3469 | num_dirty = root->fs_info->dirty_metadata_bytes; |
| 3417 | 3470 | ||
| @@ -3422,25 +3475,14 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
| 3422 | return; | 3475 | return; |
| 3423 | } | 3476 | } |
| 3424 | 3477 | ||
| 3425 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3478 | void btrfs_btree_balance_dirty(struct btrfs_root *root) |
| 3426 | { | 3479 | { |
| 3427 | /* | 3480 | __btrfs_btree_balance_dirty(root, 1); |
| 3428 | * looks as though older kernels can get into trouble with | 3481 | } |
| 3429 | * this code, they end up stuck in balance_dirty_pages forever | ||
| 3430 | */ | ||
| 3431 | u64 num_dirty; | ||
| 3432 | unsigned long thresh = 32 * 1024 * 1024; | ||
| 3433 | |||
| 3434 | if (current->flags & PF_MEMALLOC) | ||
| 3435 | return; | ||
| 3436 | |||
| 3437 | num_dirty = root->fs_info->dirty_metadata_bytes; | ||
| 3438 | 3482 | ||
| 3439 | if (num_dirty > thresh) { | 3483 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) |
| 3440 | balance_dirty_pages_ratelimited( | 3484 | { |
| 3441 | root->fs_info->btree_inode->i_mapping); | 3485 | __btrfs_btree_balance_dirty(root, 0); |
| 3442 | } | ||
| 3443 | return; | ||
| 3444 | } | 3486 | } |
| 3445 | 3487 | ||
| 3446 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | 3488 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |
