aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c146
1 files changed, 94 insertions, 52 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7cda51995c1e..a8f652dc940b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -45,6 +45,7 @@
45#include "inode-map.h" 45#include "inode-map.h"
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h" 47#include "rcu-string.h"
48#include "dev-replace.h"
48 49
49#ifdef CONFIG_X86 50#ifdef CONFIG_X86
50#include <asm/cpufeature.h> 51#include <asm/cpufeature.h>
@@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
387 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) 388 if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
388 break; 389 break;
389 390
390 num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, 391 num_copies = btrfs_num_copies(root->fs_info,
391 eb->start, eb->len); 392 eb->start, eb->len);
392 if (num_copies == 1) 393 if (num_copies == 1)
393 break; 394 break;
@@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
852 int mirror_num, unsigned long bio_flags, 853 int mirror_num, unsigned long bio_flags,
853 u64 bio_offset) 854 u64 bio_offset)
854{ 855{
856 int ret;
857
855 /* 858 /*
856 * when we're called for a write, we're already in the async 859 * when we're called for a write, we're already in the async
857 * submission context. Just jump into btrfs_map_bio 860 * submission context. Just jump into btrfs_map_bio
858 */ 861 */
859 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); 862 ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
863 if (ret)
864 bio_endio(bio, ret);
865 return ret;
860} 866}
861 867
862static int check_async_write(struct inode *inode, unsigned long bio_flags) 868static int check_async_write(struct inode *inode, unsigned long bio_flags)
@@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
878 int ret; 884 int ret;
879 885
880 if (!(rw & REQ_WRITE)) { 886 if (!(rw & REQ_WRITE)) {
881
882 /* 887 /*
883 * called for a read, do the setup so that checksum validation 888 * called for a read, do the setup so that checksum validation
884 * can happen in the async kernel threads 889 * can happen in the async kernel threads
@@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
886 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, 891 ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info,
887 bio, 1); 892 bio, 1);
888 if (ret) 893 if (ret)
889 return ret; 894 goto out_w_error;
890 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 895 ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
891 mirror_num, 0); 896 mirror_num, 0);
892 } else if (!async) { 897 } else if (!async) {
893 ret = btree_csum_one_bio(bio); 898 ret = btree_csum_one_bio(bio);
894 if (ret) 899 if (ret)
895 return ret; 900 goto out_w_error;
896 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 901 ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
897 mirror_num, 0); 902 mirror_num, 0);
903 } else {
904 /*
905 * kthread helpers are used to submit writes so that
906 * checksumming can happen in parallel across all CPUs
907 */
908 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
909 inode, rw, bio, mirror_num, 0,
910 bio_offset,
911 __btree_submit_bio_start,
912 __btree_submit_bio_done);
898 } 913 }
899 914
900 /* 915 if (ret) {
901 * kthread helpers are used to submit writes so that checksumming 916out_w_error:
902 * can happen in parallel across all CPUs 917 bio_endio(bio, ret);
903 */ 918 }
904 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 919 return ret;
905 inode, rw, bio, mirror_num, 0,
906 bio_offset,
907 __btree_submit_bio_start,
908 __btree_submit_bio_done);
909} 920}
910 921
911#ifdef CONFIG_MIGRATION 922#ifdef CONFIG_MIGRATION
@@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
990 1001
991static int btree_set_page_dirty(struct page *page) 1002static int btree_set_page_dirty(struct page *page)
992{ 1003{
1004#ifdef DEBUG
993 struct extent_buffer *eb; 1005 struct extent_buffer *eb;
994 1006
995 BUG_ON(!PagePrivate(page)); 1007 BUG_ON(!PagePrivate(page));
@@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page)
998 BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 1010 BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
999 BUG_ON(!atomic_read(&eb->refs)); 1011 BUG_ON(!atomic_read(&eb->refs));
1000 btrfs_assert_tree_locked(eb); 1012 btrfs_assert_tree_locked(eb);
1013#endif
1001 return __set_page_dirty_nobuffers(page); 1014 return __set_page_dirty_nobuffers(page);
1002} 1015}
1003 1016
@@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1129 root->fs_info->dirty_metadata_bytes); 1142 root->fs_info->dirty_metadata_bytes);
1130 } 1143 }
1131 spin_unlock(&root->fs_info->delalloc_lock); 1144 spin_unlock(&root->fs_info->delalloc_lock);
1132 }
1133 1145
1134 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1146 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1135 btrfs_set_lock_blocking(buf); 1147 btrfs_set_lock_blocking(buf);
1136 clear_extent_buffer_dirty(buf); 1148 clear_extent_buffer_dirty(buf);
1149 }
1137 } 1150 }
1138} 1151}
1139 1152
@@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1193 root->root_key.objectid = objectid; 1206 root->root_key.objectid = objectid;
1194 root->anon_dev = 0; 1207 root->anon_dev = 0;
1195 1208
1196 spin_lock_init(&root->root_times_lock); 1209 spin_lock_init(&root->root_item_lock);
1197} 1210}
1198 1211
1199static int __must_check find_and_setup_root(struct btrfs_root *tree_root, 1212static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
@@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb,
2131 init_rwsem(&fs_info->extent_commit_sem); 2144 init_rwsem(&fs_info->extent_commit_sem);
2132 init_rwsem(&fs_info->cleanup_work_sem); 2145 init_rwsem(&fs_info->cleanup_work_sem);
2133 init_rwsem(&fs_info->subvol_sem); 2146 init_rwsem(&fs_info->subvol_sem);
2147 fs_info->dev_replace.lock_owner = 0;
2148 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2149 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2150 mutex_init(&fs_info->dev_replace.lock_management_lock);
2151 mutex_init(&fs_info->dev_replace.lock);
2134 2152
2135 spin_lock_init(&fs_info->qgroup_lock); 2153 spin_lock_init(&fs_info->qgroup_lock);
2136 fs_info->qgroup_tree = RB_ROOT; 2154 fs_info->qgroup_tree = RB_ROOT;
@@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb,
2279 fs_info->thread_pool_size, 2297 fs_info->thread_pool_size,
2280 &fs_info->generic_worker); 2298 &fs_info->generic_worker);
2281 2299
2300 btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
2301 fs_info->thread_pool_size,
2302 &fs_info->generic_worker);
2303
2282 btrfs_init_workers(&fs_info->submit_workers, "submit", 2304 btrfs_init_workers(&fs_info->submit_workers, "submit",
2283 min_t(u64, fs_devices->num_devices, 2305 min_t(u64, fs_devices->num_devices,
2284 fs_info->thread_pool_size), 2306 fs_info->thread_pool_size),
@@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb,
2350 ret |= btrfs_start_workers(&fs_info->delayed_workers); 2372 ret |= btrfs_start_workers(&fs_info->delayed_workers);
2351 ret |= btrfs_start_workers(&fs_info->caching_workers); 2373 ret |= btrfs_start_workers(&fs_info->caching_workers);
2352 ret |= btrfs_start_workers(&fs_info->readahead_workers); 2374 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2375 ret |= btrfs_start_workers(&fs_info->flush_workers);
2353 if (ret) { 2376 if (ret) {
2354 err = -ENOMEM; 2377 err = -ENOMEM;
2355 goto fail_sb_buffer; 2378 goto fail_sb_buffer;
@@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb,
2418 goto fail_tree_roots; 2441 goto fail_tree_roots;
2419 } 2442 }
2420 2443
2421 btrfs_close_extra_devices(fs_devices); 2444 /*
2445 * keep the device that is marked to be the target device for the
2446 * dev_replace procedure
2447 */
2448 btrfs_close_extra_devices(fs_info, fs_devices, 0);
2422 2449
2423 if (!fs_devices->latest_bdev) { 2450 if (!fs_devices->latest_bdev) {
2424 printk(KERN_CRIT "btrfs: failed to read devices on %s\n", 2451 printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
@@ -2490,6 +2517,14 @@ retry_root_backup:
2490 goto fail_block_groups; 2517 goto fail_block_groups;
2491 } 2518 }
2492 2519
2520 ret = btrfs_init_dev_replace(fs_info);
2521 if (ret) {
2522 pr_err("btrfs: failed to init dev_replace: %d\n", ret);
2523 goto fail_block_groups;
2524 }
2525
2526 btrfs_close_extra_devices(fs_info, fs_devices, 1);
2527
2493 ret = btrfs_init_space_info(fs_info); 2528 ret = btrfs_init_space_info(fs_info);
2494 if (ret) { 2529 if (ret) {
2495 printk(KERN_ERR "Failed to initial space info: %d\n", ret); 2530 printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@@ -2503,6 +2538,13 @@ retry_root_backup:
2503 } 2538 }
2504 fs_info->num_tolerated_disk_barrier_failures = 2539 fs_info->num_tolerated_disk_barrier_failures =
2505 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 2540 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
2541 if (fs_info->fs_devices->missing_devices >
2542 fs_info->num_tolerated_disk_barrier_failures &&
2543 !(sb->s_flags & MS_RDONLY)) {
2544 printk(KERN_WARNING
2545 "Btrfs: too many missing devices, writeable mount is not allowed\n");
2546 goto fail_block_groups;
2547 }
2506 2548
2507 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 2549 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
2508 "btrfs-cleaner"); 2550 "btrfs-cleaner");
@@ -2631,6 +2673,13 @@ retry_root_backup:
2631 return ret; 2673 return ret;
2632 } 2674 }
2633 2675
2676 ret = btrfs_resume_dev_replace_async(fs_info);
2677 if (ret) {
2678 pr_warn("btrfs: failed to resume dev_replace\n");
2679 close_ctree(tree_root);
2680 return ret;
2681 }
2682
2634 return 0; 2683 return 0;
2635 2684
2636fail_qgroup: 2685fail_qgroup:
@@ -2667,6 +2716,7 @@ fail_sb_buffer:
2667 btrfs_stop_workers(&fs_info->submit_workers); 2716 btrfs_stop_workers(&fs_info->submit_workers);
2668 btrfs_stop_workers(&fs_info->delayed_workers); 2717 btrfs_stop_workers(&fs_info->delayed_workers);
2669 btrfs_stop_workers(&fs_info->caching_workers); 2718 btrfs_stop_workers(&fs_info->caching_workers);
2719 btrfs_stop_workers(&fs_info->flush_workers);
2670fail_alloc: 2720fail_alloc:
2671fail_iput: 2721fail_iput:
2672 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2722 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root)
3270 smp_mb(); 3320 smp_mb();
3271 3321
3272 /* pause restriper - we want to resume on mount */ 3322 /* pause restriper - we want to resume on mount */
3273 btrfs_pause_balance(root->fs_info); 3323 btrfs_pause_balance(fs_info);
3324
3325 btrfs_dev_replace_suspend_for_unmount(fs_info);
3274 3326
3275 btrfs_scrub_cancel(root); 3327 btrfs_scrub_cancel(fs_info);
3276 3328
3277 /* wait for any defraggers to finish */ 3329 /* wait for any defraggers to finish */
3278 wait_event(fs_info->transaction_wait, 3330 wait_event(fs_info->transaction_wait,
3279 (atomic_read(&fs_info->defrag_running) == 0)); 3331 (atomic_read(&fs_info->defrag_running) == 0));
3280 3332
3281 /* clear out the rbtree of defraggable inodes */ 3333 /* clear out the rbtree of defraggable inodes */
3282 btrfs_run_defrag_inodes(fs_info); 3334 btrfs_cleanup_defrag_inodes(fs_info);
3283 3335
3284 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3336 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3285 ret = btrfs_commit_super(root); 3337 ret = btrfs_commit_super(root);
@@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root)
3339 btrfs_stop_workers(&fs_info->delayed_workers); 3391 btrfs_stop_workers(&fs_info->delayed_workers);
3340 btrfs_stop_workers(&fs_info->caching_workers); 3392 btrfs_stop_workers(&fs_info->caching_workers);
3341 btrfs_stop_workers(&fs_info->readahead_workers); 3393 btrfs_stop_workers(&fs_info->readahead_workers);
3394 btrfs_stop_workers(&fs_info->flush_workers);
3342 3395
3343#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 3396#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
3344 if (btrfs_test_opt(root, CHECK_INTEGRITY)) 3397 if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3383 int was_dirty; 3436 int was_dirty;
3384 3437
3385 btrfs_assert_tree_locked(buf); 3438 btrfs_assert_tree_locked(buf);
3386 if (transid != root->fs_info->generation) { 3439 if (transid != root->fs_info->generation)
3387 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " 3440 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
3388 "found %llu running %llu\n", 3441 "found %llu running %llu\n",
3389 (unsigned long long)buf->start, 3442 (unsigned long long)buf->start,
3390 (unsigned long long)transid, 3443 (unsigned long long)transid,
3391 (unsigned long long)root->fs_info->generation); 3444 (unsigned long long)root->fs_info->generation);
3392 WARN_ON(1);
3393 }
3394 was_dirty = set_extent_buffer_dirty(buf); 3445 was_dirty = set_extent_buffer_dirty(buf);
3395 if (!was_dirty) { 3446 if (!was_dirty) {
3396 spin_lock(&root->fs_info->delalloc_lock); 3447 spin_lock(&root->fs_info->delalloc_lock);
@@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3399 } 3450 }
3400} 3451}
3401 3452
3402void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 3453static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3454 int flush_delayed)
3403{ 3455{
3404 /* 3456 /*
3405 * looks as though older kernels can get into trouble with 3457 * looks as though older kernels can get into trouble with
@@ -3411,36 +3463,26 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
3411 if (current->flags & PF_MEMALLOC) 3463 if (current->flags & PF_MEMALLOC)
3412 return; 3464 return;
3413 3465
3414 btrfs_balance_delayed_items(root); 3466 if (flush_delayed)
3467 btrfs_balance_delayed_items(root);
3415 3468
3416 num_dirty = root->fs_info->dirty_metadata_bytes; 3469 num_dirty = root->fs_info->dirty_metadata_bytes;
3417 3470
3418 if (num_dirty > thresh) { 3471 if (num_dirty > thresh) {
3419 balance_dirty_pages_ratelimited_nr( 3472 balance_dirty_pages_ratelimited(
3420 root->fs_info->btree_inode->i_mapping, 1); 3473 root->fs_info->btree_inode->i_mapping);
3421 } 3474 }
3422 return; 3475 return;
3423} 3476}
3424 3477
3425void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 3478void btrfs_btree_balance_dirty(struct btrfs_root *root)
3426{ 3479{
3427 /* 3480 __btrfs_btree_balance_dirty(root, 1);
3428 * looks as though older kernels can get into trouble with 3481}
3429 * this code, they end up stuck in balance_dirty_pages forever
3430 */
3431 u64 num_dirty;
3432 unsigned long thresh = 32 * 1024 * 1024;
3433
3434 if (current->flags & PF_MEMALLOC)
3435 return;
3436
3437 num_dirty = root->fs_info->dirty_metadata_bytes;
3438 3482
3439 if (num_dirty > thresh) { 3483void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
3440 balance_dirty_pages_ratelimited_nr( 3484{
3441 root->fs_info->btree_inode->i_mapping, 1); 3485 __btrfs_btree_balance_dirty(root, 0);
3442 }
3443 return;
3444} 3486}
3445 3487
3446int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) 3488int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)