diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 370 |
1 files changed, 227 insertions, 143 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 22e98e04c2ea..a8f652dc940b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -45,6 +45,11 @@ | |||
45 | #include "inode-map.h" | 45 | #include "inode-map.h" |
46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
48 | #include "dev-replace.h" | ||
49 | |||
50 | #ifdef CONFIG_X86 | ||
51 | #include <asm/cpufeature.h> | ||
52 | #endif | ||
48 | 53 | ||
49 | static struct extent_io_ops btree_extent_io_ops; | 54 | static struct extent_io_ops btree_extent_io_ops; |
50 | static void end_workqueue_fn(struct btrfs_work *work); | 55 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -217,26 +222,16 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
217 | write_lock(&em_tree->lock); | 222 | write_lock(&em_tree->lock); |
218 | ret = add_extent_mapping(em_tree, em); | 223 | ret = add_extent_mapping(em_tree, em); |
219 | if (ret == -EEXIST) { | 224 | if (ret == -EEXIST) { |
220 | u64 failed_start = em->start; | ||
221 | u64 failed_len = em->len; | ||
222 | |||
223 | free_extent_map(em); | 225 | free_extent_map(em); |
224 | em = lookup_extent_mapping(em_tree, start, len); | 226 | em = lookup_extent_mapping(em_tree, start, len); |
225 | if (em) { | 227 | if (!em) |
226 | ret = 0; | 228 | em = ERR_PTR(-EIO); |
227 | } else { | ||
228 | em = lookup_extent_mapping(em_tree, failed_start, | ||
229 | failed_len); | ||
230 | ret = -EIO; | ||
231 | } | ||
232 | } else if (ret) { | 229 | } else if (ret) { |
233 | free_extent_map(em); | 230 | free_extent_map(em); |
234 | em = NULL; | 231 | em = ERR_PTR(ret); |
235 | } | 232 | } |
236 | write_unlock(&em_tree->lock); | 233 | write_unlock(&em_tree->lock); |
237 | 234 | ||
238 | if (ret) | ||
239 | em = ERR_PTR(ret); | ||
240 | out: | 235 | out: |
241 | return em; | 236 | return em; |
242 | } | 237 | } |
@@ -393,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
393 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 388 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
394 | break; | 389 | break; |
395 | 390 | ||
396 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 391 | num_copies = btrfs_num_copies(root->fs_info, |
397 | eb->start, eb->len); | 392 | eb->start, eb->len); |
398 | if (num_copies == 1) | 393 | if (num_copies == 1) |
399 | break; | 394 | break; |
@@ -439,10 +434,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
439 | WARN_ON(1); | 434 | WARN_ON(1); |
440 | return 0; | 435 | return 0; |
441 | } | 436 | } |
442 | if (eb->pages[0] != page) { | ||
443 | WARN_ON(1); | ||
444 | return 0; | ||
445 | } | ||
446 | if (!PageUptodate(page)) { | 437 | if (!PageUptodate(page)) { |
447 | WARN_ON(1); | 438 | WARN_ON(1); |
448 | return 0; | 439 | return 0; |
@@ -862,21 +853,37 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
862 | int mirror_num, unsigned long bio_flags, | 853 | int mirror_num, unsigned long bio_flags, |
863 | u64 bio_offset) | 854 | u64 bio_offset) |
864 | { | 855 | { |
856 | int ret; | ||
857 | |||
865 | /* | 858 | /* |
866 | * when we're called for a write, we're already in the async | 859 | * when we're called for a write, we're already in the async |
867 | * submission context. Just jump into btrfs_map_bio | 860 | * submission context. Just jump into btrfs_map_bio |
868 | */ | 861 | */ |
869 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 862 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
863 | if (ret) | ||
864 | bio_endio(bio, ret); | ||
865 | return ret; | ||
866 | } | ||
867 | |||
868 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | ||
869 | { | ||
870 | if (bio_flags & EXTENT_BIO_TREE_LOG) | ||
871 | return 0; | ||
872 | #ifdef CONFIG_X86 | ||
873 | if (cpu_has_xmm4_2) | ||
874 | return 0; | ||
875 | #endif | ||
876 | return 1; | ||
870 | } | 877 | } |
871 | 878 | ||
872 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 879 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
873 | int mirror_num, unsigned long bio_flags, | 880 | int mirror_num, unsigned long bio_flags, |
874 | u64 bio_offset) | 881 | u64 bio_offset) |
875 | { | 882 | { |
883 | int async = check_async_write(inode, bio_flags); | ||
876 | int ret; | 884 | int ret; |
877 | 885 | ||
878 | if (!(rw & REQ_WRITE)) { | 886 | if (!(rw & REQ_WRITE)) { |
879 | |||
880 | /* | 887 | /* |
881 | * called for a read, do the setup so that checksum validation | 888 | * called for a read, do the setup so that checksum validation |
882 | * can happen in the async kernel threads | 889 | * can happen in the async kernel threads |
@@ -884,20 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
884 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 891 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
885 | bio, 1); | 892 | bio, 1); |
886 | if (ret) | 893 | if (ret) |
887 | return ret; | 894 | goto out_w_error; |
888 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 895 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
889 | mirror_num, 0); | 896 | mirror_num, 0); |
897 | } else if (!async) { | ||
898 | ret = btree_csum_one_bio(bio); | ||
899 | if (ret) | ||
900 | goto out_w_error; | ||
901 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | ||
902 | mirror_num, 0); | ||
903 | } else { | ||
904 | /* | ||
905 | * kthread helpers are used to submit writes so that | ||
906 | * checksumming can happen in parallel across all CPUs | ||
907 | */ | ||
908 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
909 | inode, rw, bio, mirror_num, 0, | ||
910 | bio_offset, | ||
911 | __btree_submit_bio_start, | ||
912 | __btree_submit_bio_done); | ||
890 | } | 913 | } |
891 | 914 | ||
892 | /* | 915 | if (ret) { |
893 | * kthread helpers are used to submit writes so that checksumming | 916 | out_w_error: |
894 | * can happen in parallel across all CPUs | 917 | bio_endio(bio, ret); |
895 | */ | 918 | } |
896 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 919 | return ret; |
897 | inode, rw, bio, mirror_num, 0, | ||
898 | bio_offset, | ||
899 | __btree_submit_bio_start, | ||
900 | __btree_submit_bio_done); | ||
901 | } | 920 | } |
902 | 921 | ||
903 | #ifdef CONFIG_MIGRATION | 922 | #ifdef CONFIG_MIGRATION |
@@ -982,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
982 | 1001 | ||
983 | static int btree_set_page_dirty(struct page *page) | 1002 | static int btree_set_page_dirty(struct page *page) |
984 | { | 1003 | { |
1004 | #ifdef DEBUG | ||
985 | struct extent_buffer *eb; | 1005 | struct extent_buffer *eb; |
986 | 1006 | ||
987 | BUG_ON(!PagePrivate(page)); | 1007 | BUG_ON(!PagePrivate(page)); |
@@ -990,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page) | |||
990 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 1010 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); |
991 | BUG_ON(!atomic_read(&eb->refs)); | 1011 | BUG_ON(!atomic_read(&eb->refs)); |
992 | btrfs_assert_tree_locked(eb); | 1012 | btrfs_assert_tree_locked(eb); |
1013 | #endif | ||
993 | return __set_page_dirty_nobuffers(page); | 1014 | return __set_page_dirty_nobuffers(page); |
994 | } | 1015 | } |
995 | 1016 | ||
@@ -1121,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1121 | root->fs_info->dirty_metadata_bytes); | 1142 | root->fs_info->dirty_metadata_bytes); |
1122 | } | 1143 | } |
1123 | spin_unlock(&root->fs_info->delalloc_lock); | 1144 | spin_unlock(&root->fs_info->delalloc_lock); |
1124 | } | ||
1125 | 1145 | ||
1126 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
1127 | btrfs_set_lock_blocking(buf); | 1147 | btrfs_set_lock_blocking(buf); |
1128 | clear_extent_buffer_dirty(buf); | 1148 | clear_extent_buffer_dirty(buf); |
1149 | } | ||
1129 | } | 1150 | } |
1130 | } | 1151 | } |
1131 | 1152 | ||
@@ -1168,8 +1189,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1168 | atomic_set(&root->log_commit[0], 0); | 1189 | atomic_set(&root->log_commit[0], 0); |
1169 | atomic_set(&root->log_commit[1], 0); | 1190 | atomic_set(&root->log_commit[1], 0); |
1170 | atomic_set(&root->log_writers, 0); | 1191 | atomic_set(&root->log_writers, 0); |
1192 | atomic_set(&root->log_batch, 0); | ||
1171 | atomic_set(&root->orphan_inodes, 0); | 1193 | atomic_set(&root->orphan_inodes, 0); |
1172 | root->log_batch = 0; | ||
1173 | root->log_transid = 0; | 1194 | root->log_transid = 0; |
1174 | root->last_log_commit = 0; | 1195 | root->last_log_commit = 0; |
1175 | extent_io_tree_init(&root->dirty_log_pages, | 1196 | extent_io_tree_init(&root->dirty_log_pages, |
@@ -1185,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1185 | root->root_key.objectid = objectid; | 1206 | root->root_key.objectid = objectid; |
1186 | root->anon_dev = 0; | 1207 | root->anon_dev = 0; |
1187 | 1208 | ||
1188 | spin_lock_init(&root->root_times_lock); | 1209 | spin_lock_init(&root->root_item_lock); |
1189 | } | 1210 | } |
1190 | 1211 | ||
1191 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | 1212 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
@@ -1667,9 +1688,10 @@ static int transaction_kthread(void *arg) | |||
1667 | spin_unlock(&root->fs_info->trans_lock); | 1688 | spin_unlock(&root->fs_info->trans_lock); |
1668 | 1689 | ||
1669 | /* If the file system is aborted, this will always fail. */ | 1690 | /* If the file system is aborted, this will always fail. */ |
1670 | trans = btrfs_join_transaction(root); | 1691 | trans = btrfs_attach_transaction(root); |
1671 | if (IS_ERR(trans)) { | 1692 | if (IS_ERR(trans)) { |
1672 | cannot_commit = true; | 1693 | if (PTR_ERR(trans) != -ENOENT) |
1694 | cannot_commit = true; | ||
1673 | goto sleep; | 1695 | goto sleep; |
1674 | } | 1696 | } |
1675 | if (transid == trans->transid) { | 1697 | if (transid == trans->transid) { |
@@ -1994,13 +2016,11 @@ int open_ctree(struct super_block *sb, | |||
1994 | INIT_LIST_HEAD(&fs_info->trans_list); | 2016 | INIT_LIST_HEAD(&fs_info->trans_list); |
1995 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2017 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1996 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2018 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
1997 | INIT_LIST_HEAD(&fs_info->hashers); | ||
1998 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2019 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1999 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 2020 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
2000 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2021 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
2001 | spin_lock_init(&fs_info->delalloc_lock); | 2022 | spin_lock_init(&fs_info->delalloc_lock); |
2002 | spin_lock_init(&fs_info->trans_lock); | 2023 | spin_lock_init(&fs_info->trans_lock); |
2003 | spin_lock_init(&fs_info->ref_cache_lock); | ||
2004 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 2024 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
2005 | spin_lock_init(&fs_info->delayed_iput_lock); | 2025 | spin_lock_init(&fs_info->delayed_iput_lock); |
2006 | spin_lock_init(&fs_info->defrag_inodes_lock); | 2026 | spin_lock_init(&fs_info->defrag_inodes_lock); |
@@ -2014,12 +2034,15 @@ int open_ctree(struct super_block *sb, | |||
2014 | INIT_LIST_HEAD(&fs_info->space_info); | 2034 | INIT_LIST_HEAD(&fs_info->space_info); |
2015 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); | 2035 | INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); |
2016 | btrfs_mapping_init(&fs_info->mapping_tree); | 2036 | btrfs_mapping_init(&fs_info->mapping_tree); |
2017 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | 2037 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
2018 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | 2038 | BTRFS_BLOCK_RSV_GLOBAL); |
2019 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 2039 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv, |
2020 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 2040 | BTRFS_BLOCK_RSV_DELALLOC); |
2021 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 2041 | btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); |
2022 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); | 2042 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); |
2043 | btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); | ||
2044 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv, | ||
2045 | BTRFS_BLOCK_RSV_DELOPS); | ||
2023 | atomic_set(&fs_info->nr_async_submits, 0); | 2046 | atomic_set(&fs_info->nr_async_submits, 0); |
2024 | atomic_set(&fs_info->async_delalloc_pages, 0); | 2047 | atomic_set(&fs_info->async_delalloc_pages, 0); |
2025 | atomic_set(&fs_info->async_submit_draining, 0); | 2048 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -2121,6 +2144,11 @@ int open_ctree(struct super_block *sb, | |||
2121 | init_rwsem(&fs_info->extent_commit_sem); | 2144 | init_rwsem(&fs_info->extent_commit_sem); |
2122 | init_rwsem(&fs_info->cleanup_work_sem); | 2145 | init_rwsem(&fs_info->cleanup_work_sem); |
2123 | init_rwsem(&fs_info->subvol_sem); | 2146 | init_rwsem(&fs_info->subvol_sem); |
2147 | fs_info->dev_replace.lock_owner = 0; | ||
2148 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
2149 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
2150 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
2151 | mutex_init(&fs_info->dev_replace.lock); | ||
2124 | 2152 | ||
2125 | spin_lock_init(&fs_info->qgroup_lock); | 2153 | spin_lock_init(&fs_info->qgroup_lock); |
2126 | fs_info->qgroup_tree = RB_ROOT; | 2154 | fs_info->qgroup_tree = RB_ROOT; |
@@ -2269,6 +2297,10 @@ int open_ctree(struct super_block *sb, | |||
2269 | fs_info->thread_pool_size, | 2297 | fs_info->thread_pool_size, |
2270 | &fs_info->generic_worker); | 2298 | &fs_info->generic_worker); |
2271 | 2299 | ||
2300 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | ||
2301 | fs_info->thread_pool_size, | ||
2302 | &fs_info->generic_worker); | ||
2303 | |||
2272 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2304 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
2273 | min_t(u64, fs_devices->num_devices, | 2305 | min_t(u64, fs_devices->num_devices, |
2274 | fs_info->thread_pool_size), | 2306 | fs_info->thread_pool_size), |
@@ -2340,6 +2372,7 @@ int open_ctree(struct super_block *sb, | |||
2340 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2372 | ret |= btrfs_start_workers(&fs_info->delayed_workers); |
2341 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2373 | ret |= btrfs_start_workers(&fs_info->caching_workers); |
2342 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2374 | ret |= btrfs_start_workers(&fs_info->readahead_workers); |
2375 | ret |= btrfs_start_workers(&fs_info->flush_workers); | ||
2343 | if (ret) { | 2376 | if (ret) { |
2344 | err = -ENOMEM; | 2377 | err = -ENOMEM; |
2345 | goto fail_sb_buffer; | 2378 | goto fail_sb_buffer; |
@@ -2408,7 +2441,11 @@ int open_ctree(struct super_block *sb, | |||
2408 | goto fail_tree_roots; | 2441 | goto fail_tree_roots; |
2409 | } | 2442 | } |
2410 | 2443 | ||
2411 | btrfs_close_extra_devices(fs_devices); | 2444 | /* |
2445 | * keep the device that is marked to be the target device for the | ||
2446 | * dev_replace procedure | ||
2447 | */ | ||
2448 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | ||
2412 | 2449 | ||
2413 | if (!fs_devices->latest_bdev) { | 2450 | if (!fs_devices->latest_bdev) { |
2414 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", | 2451 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", |
@@ -2480,6 +2517,14 @@ retry_root_backup: | |||
2480 | goto fail_block_groups; | 2517 | goto fail_block_groups; |
2481 | } | 2518 | } |
2482 | 2519 | ||
2520 | ret = btrfs_init_dev_replace(fs_info); | ||
2521 | if (ret) { | ||
2522 | pr_err("btrfs: failed to init dev_replace: %d\n", ret); | ||
2523 | goto fail_block_groups; | ||
2524 | } | ||
2525 | |||
2526 | btrfs_close_extra_devices(fs_info, fs_devices, 1); | ||
2527 | |||
2483 | ret = btrfs_init_space_info(fs_info); | 2528 | ret = btrfs_init_space_info(fs_info); |
2484 | if (ret) { | 2529 | if (ret) { |
2485 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2530 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
@@ -2491,6 +2536,15 @@ retry_root_backup: | |||
2491 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 2536 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
2492 | goto fail_block_groups; | 2537 | goto fail_block_groups; |
2493 | } | 2538 | } |
2539 | fs_info->num_tolerated_disk_barrier_failures = | ||
2540 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
2541 | if (fs_info->fs_devices->missing_devices > | ||
2542 | fs_info->num_tolerated_disk_barrier_failures && | ||
2543 | !(sb->s_flags & MS_RDONLY)) { | ||
2544 | printk(KERN_WARNING | ||
2545 | "Btrfs: too many missing devices, writeable mount is not allowed\n"); | ||
2546 | goto fail_block_groups; | ||
2547 | } | ||
2494 | 2548 | ||
2495 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2549 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
2496 | "btrfs-cleaner"); | 2550 | "btrfs-cleaner"); |
@@ -2619,6 +2673,13 @@ retry_root_backup: | |||
2619 | return ret; | 2673 | return ret; |
2620 | } | 2674 | } |
2621 | 2675 | ||
2676 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
2677 | if (ret) { | ||
2678 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
2679 | close_ctree(tree_root); | ||
2680 | return ret; | ||
2681 | } | ||
2682 | |||
2622 | return 0; | 2683 | return 0; |
2623 | 2684 | ||
2624 | fail_qgroup: | 2685 | fail_qgroup: |
@@ -2655,6 +2716,7 @@ fail_sb_buffer: | |||
2655 | btrfs_stop_workers(&fs_info->submit_workers); | 2716 | btrfs_stop_workers(&fs_info->submit_workers); |
2656 | btrfs_stop_workers(&fs_info->delayed_workers); | 2717 | btrfs_stop_workers(&fs_info->delayed_workers); |
2657 | btrfs_stop_workers(&fs_info->caching_workers); | 2718 | btrfs_stop_workers(&fs_info->caching_workers); |
2719 | btrfs_stop_workers(&fs_info->flush_workers); | ||
2658 | fail_alloc: | 2720 | fail_alloc: |
2659 | fail_iput: | 2721 | fail_iput: |
2660 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -2874,12 +2936,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait) | |||
2874 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", | 2936 | printk_in_rcu("btrfs: disabling barriers on dev %s\n", |
2875 | rcu_str_deref(device->name)); | 2937 | rcu_str_deref(device->name)); |
2876 | device->nobarriers = 1; | 2938 | device->nobarriers = 1; |
2877 | } | 2939 | } else if (!bio_flagged(bio, BIO_UPTODATE)) { |
2878 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
2879 | ret = -EIO; | 2940 | ret = -EIO; |
2880 | if (!bio_flagged(bio, BIO_EOPNOTSUPP)) | 2941 | btrfs_dev_stat_inc_and_print(device, |
2881 | btrfs_dev_stat_inc_and_print(device, | 2942 | BTRFS_DEV_STAT_FLUSH_ERRS); |
2882 | BTRFS_DEV_STAT_FLUSH_ERRS); | ||
2883 | } | 2943 | } |
2884 | 2944 | ||
2885 | /* drop the reference from the wait == 0 run */ | 2945 | /* drop the reference from the wait == 0 run */ |
@@ -2918,14 +2978,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
2918 | { | 2978 | { |
2919 | struct list_head *head; | 2979 | struct list_head *head; |
2920 | struct btrfs_device *dev; | 2980 | struct btrfs_device *dev; |
2921 | int errors = 0; | 2981 | int errors_send = 0; |
2982 | int errors_wait = 0; | ||
2922 | int ret; | 2983 | int ret; |
2923 | 2984 | ||
2924 | /* send down all the barriers */ | 2985 | /* send down all the barriers */ |
2925 | head = &info->fs_devices->devices; | 2986 | head = &info->fs_devices->devices; |
2926 | list_for_each_entry_rcu(dev, head, dev_list) { | 2987 | list_for_each_entry_rcu(dev, head, dev_list) { |
2927 | if (!dev->bdev) { | 2988 | if (!dev->bdev) { |
2928 | errors++; | 2989 | errors_send++; |
2929 | continue; | 2990 | continue; |
2930 | } | 2991 | } |
2931 | if (!dev->in_fs_metadata || !dev->writeable) | 2992 | if (!dev->in_fs_metadata || !dev->writeable) |
@@ -2933,13 +2994,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
2933 | 2994 | ||
2934 | ret = write_dev_flush(dev, 0); | 2995 | ret = write_dev_flush(dev, 0); |
2935 | if (ret) | 2996 | if (ret) |
2936 | errors++; | 2997 | errors_send++; |
2937 | } | 2998 | } |
2938 | 2999 | ||
2939 | /* wait for all the barriers */ | 3000 | /* wait for all the barriers */ |
2940 | list_for_each_entry_rcu(dev, head, dev_list) { | 3001 | list_for_each_entry_rcu(dev, head, dev_list) { |
2941 | if (!dev->bdev) { | 3002 | if (!dev->bdev) { |
2942 | errors++; | 3003 | errors_wait++; |
2943 | continue; | 3004 | continue; |
2944 | } | 3005 | } |
2945 | if (!dev->in_fs_metadata || !dev->writeable) | 3006 | if (!dev->in_fs_metadata || !dev->writeable) |
@@ -2947,13 +3008,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
2947 | 3008 | ||
2948 | ret = write_dev_flush(dev, 1); | 3009 | ret = write_dev_flush(dev, 1); |
2949 | if (ret) | 3010 | if (ret) |
2950 | errors++; | 3011 | errors_wait++; |
2951 | } | 3012 | } |
2952 | if (errors) | 3013 | if (errors_send > info->num_tolerated_disk_barrier_failures || |
3014 | errors_wait > info->num_tolerated_disk_barrier_failures) | ||
2953 | return -EIO; | 3015 | return -EIO; |
2954 | return 0; | 3016 | return 0; |
2955 | } | 3017 | } |
2956 | 3018 | ||
3019 | int btrfs_calc_num_tolerated_disk_barrier_failures( | ||
3020 | struct btrfs_fs_info *fs_info) | ||
3021 | { | ||
3022 | struct btrfs_ioctl_space_info space; | ||
3023 | struct btrfs_space_info *sinfo; | ||
3024 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | ||
3025 | BTRFS_BLOCK_GROUP_SYSTEM, | ||
3026 | BTRFS_BLOCK_GROUP_METADATA, | ||
3027 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | ||
3028 | int num_types = 4; | ||
3029 | int i; | ||
3030 | int c; | ||
3031 | int num_tolerated_disk_barrier_failures = | ||
3032 | (int)fs_info->fs_devices->num_devices; | ||
3033 | |||
3034 | for (i = 0; i < num_types; i++) { | ||
3035 | struct btrfs_space_info *tmp; | ||
3036 | |||
3037 | sinfo = NULL; | ||
3038 | rcu_read_lock(); | ||
3039 | list_for_each_entry_rcu(tmp, &fs_info->space_info, list) { | ||
3040 | if (tmp->flags == types[i]) { | ||
3041 | sinfo = tmp; | ||
3042 | break; | ||
3043 | } | ||
3044 | } | ||
3045 | rcu_read_unlock(); | ||
3046 | |||
3047 | if (!sinfo) | ||
3048 | continue; | ||
3049 | |||
3050 | down_read(&sinfo->groups_sem); | ||
3051 | for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { | ||
3052 | if (!list_empty(&sinfo->block_groups[c])) { | ||
3053 | u64 flags; | ||
3054 | |||
3055 | btrfs_get_block_group_info( | ||
3056 | &sinfo->block_groups[c], &space); | ||
3057 | if (space.total_bytes == 0 || | ||
3058 | space.used_bytes == 0) | ||
3059 | continue; | ||
3060 | flags = space.flags; | ||
3061 | /* | ||
3062 | * return | ||
3063 | * 0: if dup, single or RAID0 is configured for | ||
3064 | * any of metadata, system or data, else | ||
3065 | * 1: if RAID5 is configured, or if RAID1 or | ||
3066 | * RAID10 is configured and only two mirrors | ||
3067 | * are used, else | ||
3068 | * 2: if RAID6 is configured, else | ||
3069 | * num_mirrors - 1: if RAID1 or RAID10 is | ||
3070 | * configured and more than | ||
3071 | * 2 mirrors are used. | ||
3072 | */ | ||
3073 | if (num_tolerated_disk_barrier_failures > 0 && | ||
3074 | ((flags & (BTRFS_BLOCK_GROUP_DUP | | ||
3075 | BTRFS_BLOCK_GROUP_RAID0)) || | ||
3076 | ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) | ||
3077 | == 0))) | ||
3078 | num_tolerated_disk_barrier_failures = 0; | ||
3079 | else if (num_tolerated_disk_barrier_failures > 1 | ||
3080 | && | ||
3081 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | ||
3082 | BTRFS_BLOCK_GROUP_RAID10))) | ||
3083 | num_tolerated_disk_barrier_failures = 1; | ||
3084 | } | ||
3085 | } | ||
3086 | up_read(&sinfo->groups_sem); | ||
3087 | } | ||
3088 | |||
3089 | return num_tolerated_disk_barrier_failures; | ||
3090 | } | ||
3091 | |||
2957 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 3092 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2958 | { | 3093 | { |
2959 | struct list_head *head; | 3094 | struct list_head *head; |
@@ -2976,8 +3111,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2976 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 3111 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2977 | head = &root->fs_info->fs_devices->devices; | 3112 | head = &root->fs_info->fs_devices->devices; |
2978 | 3113 | ||
2979 | if (do_barriers) | 3114 | if (do_barriers) { |
2980 | barrier_all_devices(root->fs_info); | 3115 | ret = barrier_all_devices(root->fs_info); |
3116 | if (ret) { | ||
3117 | mutex_unlock( | ||
3118 | &root->fs_info->fs_devices->device_list_mutex); | ||
3119 | btrfs_error(root->fs_info, ret, | ||
3120 | "errors while submitting device barriers."); | ||
3121 | return ret; | ||
3122 | } | ||
3123 | } | ||
2981 | 3124 | ||
2982 | list_for_each_entry_rcu(dev, head, dev_list) { | 3125 | list_for_each_entry_rcu(dev, head, dev_list) { |
2983 | if (!dev->bdev) { | 3126 | if (!dev->bdev) { |
@@ -3177,16 +3320,18 @@ int close_ctree(struct btrfs_root *root) | |||
3177 | smp_mb(); | 3320 | smp_mb(); |
3178 | 3321 | ||
3179 | /* pause restriper - we want to resume on mount */ | 3322 | /* pause restriper - we want to resume on mount */ |
3180 | btrfs_pause_balance(root->fs_info); | 3323 | btrfs_pause_balance(fs_info); |
3181 | 3324 | ||
3182 | btrfs_scrub_cancel(root); | 3325 | btrfs_dev_replace_suspend_for_unmount(fs_info); |
3326 | |||
3327 | btrfs_scrub_cancel(fs_info); | ||
3183 | 3328 | ||
3184 | /* wait for any defraggers to finish */ | 3329 | /* wait for any defraggers to finish */ |
3185 | wait_event(fs_info->transaction_wait, | 3330 | wait_event(fs_info->transaction_wait, |
3186 | (atomic_read(&fs_info->defrag_running) == 0)); | 3331 | (atomic_read(&fs_info->defrag_running) == 0)); |
3187 | 3332 | ||
3188 | /* clear out the rbtree of defraggable inodes */ | 3333 | /* clear out the rbtree of defraggable inodes */ |
3189 | btrfs_run_defrag_inodes(fs_info); | 3334 | btrfs_cleanup_defrag_inodes(fs_info); |
3190 | 3335 | ||
3191 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3336 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
3192 | ret = btrfs_commit_super(root); | 3337 | ret = btrfs_commit_super(root); |
@@ -3211,10 +3356,6 @@ int close_ctree(struct btrfs_root *root) | |||
3211 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3356 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
3212 | (unsigned long long)fs_info->delalloc_bytes); | 3357 | (unsigned long long)fs_info->delalloc_bytes); |
3213 | } | 3358 | } |
3214 | if (fs_info->total_ref_cache_size) { | ||
3215 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | ||
3216 | (unsigned long long)fs_info->total_ref_cache_size); | ||
3217 | } | ||
3218 | 3359 | ||
3219 | free_extent_buffer(fs_info->extent_root->node); | 3360 | free_extent_buffer(fs_info->extent_root->node); |
3220 | free_extent_buffer(fs_info->extent_root->commit_root); | 3361 | free_extent_buffer(fs_info->extent_root->commit_root); |
@@ -3250,6 +3391,7 @@ int close_ctree(struct btrfs_root *root) | |||
3250 | btrfs_stop_workers(&fs_info->delayed_workers); | 3391 | btrfs_stop_workers(&fs_info->delayed_workers); |
3251 | btrfs_stop_workers(&fs_info->caching_workers); | 3392 | btrfs_stop_workers(&fs_info->caching_workers); |
3252 | btrfs_stop_workers(&fs_info->readahead_workers); | 3393 | btrfs_stop_workers(&fs_info->readahead_workers); |
3394 | btrfs_stop_workers(&fs_info->flush_workers); | ||
3253 | 3395 | ||
3254 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 3396 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
3255 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) | 3397 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) |
@@ -3294,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3294 | int was_dirty; | 3436 | int was_dirty; |
3295 | 3437 | ||
3296 | btrfs_assert_tree_locked(buf); | 3438 | btrfs_assert_tree_locked(buf); |
3297 | if (transid != root->fs_info->generation) { | 3439 | if (transid != root->fs_info->generation) |
3298 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 3440 | WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " |
3299 | "found %llu running %llu\n", | 3441 | "found %llu running %llu\n", |
3300 | (unsigned long long)buf->start, | 3442 | (unsigned long long)buf->start, |
3301 | (unsigned long long)transid, | 3443 | (unsigned long long)transid, |
3302 | (unsigned long long)root->fs_info->generation); | 3444 | (unsigned long long)root->fs_info->generation); |
3303 | WARN_ON(1); | ||
3304 | } | ||
3305 | was_dirty = set_extent_buffer_dirty(buf); | 3445 | was_dirty = set_extent_buffer_dirty(buf); |
3306 | if (!was_dirty) { | 3446 | if (!was_dirty) { |
3307 | spin_lock(&root->fs_info->delalloc_lock); | 3447 | spin_lock(&root->fs_info->delalloc_lock); |
@@ -3310,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3310 | } | 3450 | } |
3311 | } | 3451 | } |
3312 | 3452 | ||
3313 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
3454 | int flush_delayed) | ||
3314 | { | 3455 | { |
3315 | /* | 3456 | /* |
3316 | * looks as though older kernels can get into trouble with | 3457 | * looks as though older kernels can get into trouble with |
@@ -3322,36 +3463,26 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
3322 | if (current->flags & PF_MEMALLOC) | 3463 | if (current->flags & PF_MEMALLOC) |
3323 | return; | 3464 | return; |
3324 | 3465 | ||
3325 | btrfs_balance_delayed_items(root); | 3466 | if (flush_delayed) |
3467 | btrfs_balance_delayed_items(root); | ||
3326 | 3468 | ||
3327 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3469 | num_dirty = root->fs_info->dirty_metadata_bytes; |
3328 | 3470 | ||
3329 | if (num_dirty > thresh) { | 3471 | if (num_dirty > thresh) { |
3330 | balance_dirty_pages_ratelimited_nr( | 3472 | balance_dirty_pages_ratelimited( |
3331 | root->fs_info->btree_inode->i_mapping, 1); | 3473 | root->fs_info->btree_inode->i_mapping); |
3332 | } | 3474 | } |
3333 | return; | 3475 | return; |
3334 | } | 3476 | } |
3335 | 3477 | ||
3336 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3478 | void btrfs_btree_balance_dirty(struct btrfs_root *root) |
3337 | { | 3479 | { |
3338 | /* | 3480 | __btrfs_btree_balance_dirty(root, 1); |
3339 | * looks as though older kernels can get into trouble with | 3481 | } |
3340 | * this code, they end up stuck in balance_dirty_pages forever | ||
3341 | */ | ||
3342 | u64 num_dirty; | ||
3343 | unsigned long thresh = 32 * 1024 * 1024; | ||
3344 | |||
3345 | if (current->flags & PF_MEMALLOC) | ||
3346 | return; | ||
3347 | |||
3348 | num_dirty = root->fs_info->dirty_metadata_bytes; | ||
3349 | 3482 | ||
3350 | if (num_dirty > thresh) { | 3483 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) |
3351 | balance_dirty_pages_ratelimited_nr( | 3484 | { |
3352 | root->fs_info->btree_inode->i_mapping, 1); | 3485 | __btrfs_btree_balance_dirty(root, 0); |
3353 | } | ||
3354 | return; | ||
3355 | } | 3486 | } |
3356 | 3487 | ||
3357 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | 3488 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |
@@ -3360,52 +3491,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
3360 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 3491 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
3361 | } | 3492 | } |
3362 | 3493 | ||
3363 | int btree_lock_page_hook(struct page *page, void *data, | ||
3364 | void (*flush_fn)(void *)) | ||
3365 | { | ||
3366 | struct inode *inode = page->mapping->host; | ||
3367 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3368 | struct extent_buffer *eb; | ||
3369 | |||
3370 | /* | ||
3371 | * We culled this eb but the page is still hanging out on the mapping, | ||
3372 | * carry on. | ||
3373 | */ | ||
3374 | if (!PagePrivate(page)) | ||
3375 | goto out; | ||
3376 | |||
3377 | eb = (struct extent_buffer *)page->private; | ||
3378 | if (!eb) { | ||
3379 | WARN_ON(1); | ||
3380 | goto out; | ||
3381 | } | ||
3382 | if (page != eb->pages[0]) | ||
3383 | goto out; | ||
3384 | |||
3385 | if (!btrfs_try_tree_write_lock(eb)) { | ||
3386 | flush_fn(data); | ||
3387 | btrfs_tree_lock(eb); | ||
3388 | } | ||
3389 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | ||
3390 | |||
3391 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
3392 | spin_lock(&root->fs_info->delalloc_lock); | ||
3393 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
3394 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
3395 | else | ||
3396 | WARN_ON(1); | ||
3397 | spin_unlock(&root->fs_info->delalloc_lock); | ||
3398 | } | ||
3399 | |||
3400 | btrfs_tree_unlock(eb); | ||
3401 | out: | ||
3402 | if (!trylock_page(page)) { | ||
3403 | flush_fn(data); | ||
3404 | lock_page(page); | ||
3405 | } | ||
3406 | return 0; | ||
3407 | } | ||
3408 | |||
3409 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3494 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
3410 | int read_only) | 3495 | int read_only) |
3411 | { | 3496 | { |
@@ -3608,7 +3693,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, | |||
3608 | 3693 | ||
3609 | while (1) { | 3694 | while (1) { |
3610 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | 3695 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, |
3611 | mark); | 3696 | mark, NULL); |
3612 | if (ret) | 3697 | if (ret) |
3613 | break; | 3698 | break; |
3614 | 3699 | ||
@@ -3663,7 +3748,7 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
3663 | again: | 3748 | again: |
3664 | while (1) { | 3749 | while (1) { |
3665 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3750 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
3666 | EXTENT_DIRTY); | 3751 | EXTENT_DIRTY, NULL); |
3667 | if (ret) | 3752 | if (ret) |
3668 | break; | 3753 | break; |
3669 | 3754 | ||
@@ -3800,7 +3885,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3800 | } | 3885 | } |
3801 | 3886 | ||
3802 | static struct extent_io_ops btree_extent_io_ops = { | 3887 | static struct extent_io_ops btree_extent_io_ops = { |
3803 | .write_cache_pages_lock_hook = btree_lock_page_hook, | ||
3804 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3888 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
3805 | .readpage_io_failed_hook = btree_io_failed_hook, | 3889 | .readpage_io_failed_hook = btree_io_failed_hook, |
3806 | .submit_bio_hook = btree_submit_bio_hook, | 3890 | .submit_bio_hook = btree_submit_bio_hook, |