diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 227 |
1 files changed, 139 insertions, 88 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a8f652dc940b..02369a3c162e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
48 | #include "dev-replace.h" | 48 | #include "dev-replace.h" |
49 | #include "raid56.h" | ||
49 | 50 | ||
50 | #ifdef CONFIG_X86 | 51 | #ifdef CONFIG_X86 |
51 | #include <asm/cpufeature.h> | 52 | #include <asm/cpufeature.h> |
@@ -56,7 +57,8 @@ static void end_workqueue_fn(struct btrfs_work *work); | |||
56 | static void free_fs_root(struct btrfs_root *root); | 57 | static void free_fs_root(struct btrfs_root *root); |
57 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 58 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
58 | int read_only); | 59 | int read_only); |
59 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root); | 60 | static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, |
61 | struct btrfs_root *root); | ||
60 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root); | 62 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root); |
61 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 63 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
62 | struct btrfs_root *root); | 64 | struct btrfs_root *root); |
@@ -420,7 +422,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
420 | static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | 422 | static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) |
421 | { | 423 | { |
422 | struct extent_io_tree *tree; | 424 | struct extent_io_tree *tree; |
423 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 425 | u64 start = page_offset(page); |
424 | u64 found_start; | 426 | u64 found_start; |
425 | struct extent_buffer *eb; | 427 | struct extent_buffer *eb; |
426 | 428 | ||
@@ -639,8 +641,15 @@ err: | |||
639 | btree_readahead_hook(root, eb, eb->start, ret); | 641 | btree_readahead_hook(root, eb, eb->start, ret); |
640 | } | 642 | } |
641 | 643 | ||
642 | if (ret) | 644 | if (ret) { |
645 | /* | ||
646 | * our io error hook is going to dec the io pages | ||
647 | * again, we have to make sure it has something | ||
648 | * to decrement | ||
649 | */ | ||
650 | atomic_inc(&eb->io_pages); | ||
643 | clear_extent_buffer_uptodate(eb); | 651 | clear_extent_buffer_uptodate(eb); |
652 | } | ||
644 | free_extent_buffer(eb); | 653 | free_extent_buffer(eb); |
645 | out: | 654 | out: |
646 | return ret; | 655 | return ret; |
@@ -654,6 +663,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) | |||
654 | eb = (struct extent_buffer *)page->private; | 663 | eb = (struct extent_buffer *)page->private; |
655 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); | 664 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
656 | eb->read_mirror = failed_mirror; | 665 | eb->read_mirror = failed_mirror; |
666 | atomic_dec(&eb->io_pages); | ||
657 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) | 667 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) |
658 | btree_readahead_hook(root, eb, eb->start, -EIO); | 668 | btree_readahead_hook(root, eb, eb->start, -EIO); |
659 | return -EIO; /* we fixed nothing */ | 669 | return -EIO; /* we fixed nothing */ |
@@ -670,17 +680,23 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
670 | end_io_wq->work.flags = 0; | 680 | end_io_wq->work.flags = 0; |
671 | 681 | ||
672 | if (bio->bi_rw & REQ_WRITE) { | 682 | if (bio->bi_rw & REQ_WRITE) { |
673 | if (end_io_wq->metadata == 1) | 683 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) |
674 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 684 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, |
675 | &end_io_wq->work); | 685 | &end_io_wq->work); |
676 | else if (end_io_wq->metadata == 2) | 686 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) |
677 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | 687 | btrfs_queue_worker(&fs_info->endio_freespace_worker, |
678 | &end_io_wq->work); | 688 | &end_io_wq->work); |
689 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | ||
690 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | ||
691 | &end_io_wq->work); | ||
679 | else | 692 | else |
680 | btrfs_queue_worker(&fs_info->endio_write_workers, | 693 | btrfs_queue_worker(&fs_info->endio_write_workers, |
681 | &end_io_wq->work); | 694 | &end_io_wq->work); |
682 | } else { | 695 | } else { |
683 | if (end_io_wq->metadata) | 696 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
697 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | ||
698 | &end_io_wq->work); | ||
699 | else if (end_io_wq->metadata) | ||
684 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 700 | btrfs_queue_worker(&fs_info->endio_meta_workers, |
685 | &end_io_wq->work); | 701 | &end_io_wq->work); |
686 | else | 702 | else |
@@ -695,6 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
695 | * 0 - if data | 711 | * 0 - if data |
696 | * 1 - if normal metadta | 712 | * 1 - if normal metadta |
697 | * 2 - if writing to the free space cache area | 713 | * 2 - if writing to the free space cache area |
714 | * 3 - raid parity work | ||
698 | */ | 715 | */ |
699 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | 716 | int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, |
700 | int metadata) | 717 | int metadata) |
@@ -946,18 +963,20 @@ static int btree_writepages(struct address_space *mapping, | |||
946 | struct writeback_control *wbc) | 963 | struct writeback_control *wbc) |
947 | { | 964 | { |
948 | struct extent_io_tree *tree; | 965 | struct extent_io_tree *tree; |
966 | struct btrfs_fs_info *fs_info; | ||
967 | int ret; | ||
968 | |||
949 | tree = &BTRFS_I(mapping->host)->io_tree; | 969 | tree = &BTRFS_I(mapping->host)->io_tree; |
950 | if (wbc->sync_mode == WB_SYNC_NONE) { | 970 | if (wbc->sync_mode == WB_SYNC_NONE) { |
951 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
952 | u64 num_dirty; | ||
953 | unsigned long thresh = 32 * 1024 * 1024; | ||
954 | 971 | ||
955 | if (wbc->for_kupdate) | 972 | if (wbc->for_kupdate) |
956 | return 0; | 973 | return 0; |
957 | 974 | ||
975 | fs_info = BTRFS_I(mapping->host)->root->fs_info; | ||
958 | /* this is a bit racy, but that's ok */ | 976 | /* this is a bit racy, but that's ok */ |
959 | num_dirty = root->fs_info->dirty_metadata_bytes; | 977 | ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, |
960 | if (num_dirty < thresh) | 978 | BTRFS_DIRTY_METADATA_THRESH); |
979 | if (ret < 0) | ||
961 | return 0; | 980 | return 0; |
962 | } | 981 | } |
963 | return btree_write_cache_pages(mapping, wbc); | 982 | return btree_write_cache_pages(mapping, wbc); |
@@ -1125,24 +1144,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
1125 | void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1144 | void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1126 | struct extent_buffer *buf) | 1145 | struct extent_buffer *buf) |
1127 | { | 1146 | { |
1147 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1148 | |||
1128 | if (btrfs_header_generation(buf) == | 1149 | if (btrfs_header_generation(buf) == |
1129 | root->fs_info->running_transaction->transid) { | 1150 | fs_info->running_transaction->transid) { |
1130 | btrfs_assert_tree_locked(buf); | 1151 | btrfs_assert_tree_locked(buf); |
1131 | 1152 | ||
1132 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { | 1153 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { |
1133 | spin_lock(&root->fs_info->delalloc_lock); | 1154 | __percpu_counter_add(&fs_info->dirty_metadata_bytes, |
1134 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | 1155 | -buf->len, |
1135 | root->fs_info->dirty_metadata_bytes -= buf->len; | 1156 | fs_info->dirty_metadata_batch); |
1136 | else { | ||
1137 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1138 | btrfs_panic(root->fs_info, -EOVERFLOW, | ||
1139 | "Can't clear %lu bytes from " | ||
1140 | " dirty_mdatadata_bytes (%llu)", | ||
1141 | buf->len, | ||
1142 | root->fs_info->dirty_metadata_bytes); | ||
1143 | } | ||
1144 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1145 | |||
1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1157 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
1147 | btrfs_set_lock_blocking(buf); | 1158 | btrfs_set_lock_blocking(buf); |
1148 | clear_extent_buffer_dirty(buf); | 1159 | clear_extent_buffer_dirty(buf); |
@@ -1178,9 +1189,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1178 | 1189 | ||
1179 | INIT_LIST_HEAD(&root->dirty_list); | 1190 | INIT_LIST_HEAD(&root->dirty_list); |
1180 | INIT_LIST_HEAD(&root->root_list); | 1191 | INIT_LIST_HEAD(&root->root_list); |
1192 | INIT_LIST_HEAD(&root->logged_list[0]); | ||
1193 | INIT_LIST_HEAD(&root->logged_list[1]); | ||
1181 | spin_lock_init(&root->orphan_lock); | 1194 | spin_lock_init(&root->orphan_lock); |
1182 | spin_lock_init(&root->inode_lock); | 1195 | spin_lock_init(&root->inode_lock); |
1183 | spin_lock_init(&root->accounting_lock); | 1196 | spin_lock_init(&root->accounting_lock); |
1197 | spin_lock_init(&root->log_extents_lock[0]); | ||
1198 | spin_lock_init(&root->log_extents_lock[1]); | ||
1184 | mutex_init(&root->objectid_mutex); | 1199 | mutex_init(&root->objectid_mutex); |
1185 | mutex_init(&root->log_mutex); | 1200 | mutex_init(&root->log_mutex); |
1186 | init_waitqueue_head(&root->log_writer_wait); | 1201 | init_waitqueue_head(&root->log_writer_wait); |
@@ -2004,10 +2019,24 @@ int open_ctree(struct super_block *sb, | |||
2004 | goto fail_srcu; | 2019 | goto fail_srcu; |
2005 | } | 2020 | } |
2006 | 2021 | ||
2022 | ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); | ||
2023 | if (ret) { | ||
2024 | err = ret; | ||
2025 | goto fail_bdi; | ||
2026 | } | ||
2027 | fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * | ||
2028 | (1 + ilog2(nr_cpu_ids)); | ||
2029 | |||
2030 | ret = percpu_counter_init(&fs_info->delalloc_bytes, 0); | ||
2031 | if (ret) { | ||
2032 | err = ret; | ||
2033 | goto fail_dirty_metadata_bytes; | ||
2034 | } | ||
2035 | |||
2007 | fs_info->btree_inode = new_inode(sb); | 2036 | fs_info->btree_inode = new_inode(sb); |
2008 | if (!fs_info->btree_inode) { | 2037 | if (!fs_info->btree_inode) { |
2009 | err = -ENOMEM; | 2038 | err = -ENOMEM; |
2010 | goto fail_bdi; | 2039 | goto fail_delalloc_bytes; |
2011 | } | 2040 | } |
2012 | 2041 | ||
2013 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2042 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
@@ -2017,7 +2046,6 @@ int open_ctree(struct super_block *sb, | |||
2017 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2046 | INIT_LIST_HEAD(&fs_info->dead_roots); |
2018 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2047 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
2019 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2048 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
2020 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
2021 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2049 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
2022 | spin_lock_init(&fs_info->delalloc_lock); | 2050 | spin_lock_init(&fs_info->delalloc_lock); |
2023 | spin_lock_init(&fs_info->trans_lock); | 2051 | spin_lock_init(&fs_info->trans_lock); |
@@ -2028,6 +2056,7 @@ int open_ctree(struct super_block *sb, | |||
2028 | spin_lock_init(&fs_info->tree_mod_seq_lock); | 2056 | spin_lock_init(&fs_info->tree_mod_seq_lock); |
2029 | rwlock_init(&fs_info->tree_mod_log_lock); | 2057 | rwlock_init(&fs_info->tree_mod_log_lock); |
2030 | mutex_init(&fs_info->reloc_mutex); | 2058 | mutex_init(&fs_info->reloc_mutex); |
2059 | seqlock_init(&fs_info->profiles_lock); | ||
2031 | 2060 | ||
2032 | init_completion(&fs_info->kobj_unregister); | 2061 | init_completion(&fs_info->kobj_unregister); |
2033 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 2062 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
@@ -2126,6 +2155,7 @@ int open_ctree(struct super_block *sb, | |||
2126 | 2155 | ||
2127 | spin_lock_init(&fs_info->block_group_cache_lock); | 2156 | spin_lock_init(&fs_info->block_group_cache_lock); |
2128 | fs_info->block_group_cache_tree = RB_ROOT; | 2157 | fs_info->block_group_cache_tree = RB_ROOT; |
2158 | fs_info->first_logical_byte = (u64)-1; | ||
2129 | 2159 | ||
2130 | extent_io_tree_init(&fs_info->freed_extents[0], | 2160 | extent_io_tree_init(&fs_info->freed_extents[0], |
2131 | fs_info->btree_inode->i_mapping); | 2161 | fs_info->btree_inode->i_mapping); |
@@ -2165,6 +2195,12 @@ int open_ctree(struct super_block *sb, | |||
2165 | init_waitqueue_head(&fs_info->transaction_blocked_wait); | 2195 | init_waitqueue_head(&fs_info->transaction_blocked_wait); |
2166 | init_waitqueue_head(&fs_info->async_submit_wait); | 2196 | init_waitqueue_head(&fs_info->async_submit_wait); |
2167 | 2197 | ||
2198 | ret = btrfs_alloc_stripe_hash_table(fs_info); | ||
2199 | if (ret) { | ||
2200 | err = ret; | ||
2201 | goto fail_alloc; | ||
2202 | } | ||
2203 | |||
2168 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 2204 | __setup_root(4096, 4096, 4096, 4096, tree_root, |
2169 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 2205 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
2170 | 2206 | ||
@@ -2187,7 +2223,8 @@ int open_ctree(struct super_block *sb, | |||
2187 | goto fail_alloc; | 2223 | goto fail_alloc; |
2188 | 2224 | ||
2189 | /* check FS state, whether FS is broken. */ | 2225 | /* check FS state, whether FS is broken. */ |
2190 | fs_info->fs_state |= btrfs_super_flags(disk_super); | 2226 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) |
2227 | set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); | ||
2191 | 2228 | ||
2192 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2229 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
2193 | if (ret) { | 2230 | if (ret) { |
@@ -2261,6 +2298,8 @@ int open_ctree(struct super_block *sb, | |||
2261 | leafsize = btrfs_super_leafsize(disk_super); | 2298 | leafsize = btrfs_super_leafsize(disk_super); |
2262 | sectorsize = btrfs_super_sectorsize(disk_super); | 2299 | sectorsize = btrfs_super_sectorsize(disk_super); |
2263 | stripesize = btrfs_super_stripesize(disk_super); | 2300 | stripesize = btrfs_super_stripesize(disk_super); |
2301 | fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); | ||
2302 | fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); | ||
2264 | 2303 | ||
2265 | /* | 2304 | /* |
2266 | * mixed block groups end up with duplicate but slightly offset | 2305 | * mixed block groups end up with duplicate but slightly offset |
@@ -2332,6 +2371,12 @@ int open_ctree(struct super_block *sb, | |||
2332 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | 2371 | btrfs_init_workers(&fs_info->endio_meta_write_workers, |
2333 | "endio-meta-write", fs_info->thread_pool_size, | 2372 | "endio-meta-write", fs_info->thread_pool_size, |
2334 | &fs_info->generic_worker); | 2373 | &fs_info->generic_worker); |
2374 | btrfs_init_workers(&fs_info->endio_raid56_workers, | ||
2375 | "endio-raid56", fs_info->thread_pool_size, | ||
2376 | &fs_info->generic_worker); | ||
2377 | btrfs_init_workers(&fs_info->rmw_workers, | ||
2378 | "rmw", fs_info->thread_pool_size, | ||
2379 | &fs_info->generic_worker); | ||
2335 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | 2380 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", |
2336 | fs_info->thread_pool_size, | 2381 | fs_info->thread_pool_size, |
2337 | &fs_info->generic_worker); | 2382 | &fs_info->generic_worker); |
@@ -2350,6 +2395,8 @@ int open_ctree(struct super_block *sb, | |||
2350 | */ | 2395 | */ |
2351 | fs_info->endio_workers.idle_thresh = 4; | 2396 | fs_info->endio_workers.idle_thresh = 4; |
2352 | fs_info->endio_meta_workers.idle_thresh = 4; | 2397 | fs_info->endio_meta_workers.idle_thresh = 4; |
2398 | fs_info->endio_raid56_workers.idle_thresh = 4; | ||
2399 | fs_info->rmw_workers.idle_thresh = 2; | ||
2353 | 2400 | ||
2354 | fs_info->endio_write_workers.idle_thresh = 2; | 2401 | fs_info->endio_write_workers.idle_thresh = 2; |
2355 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2402 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
@@ -2366,6 +2413,8 @@ int open_ctree(struct super_block *sb, | |||
2366 | ret |= btrfs_start_workers(&fs_info->fixup_workers); | 2413 | ret |= btrfs_start_workers(&fs_info->fixup_workers); |
2367 | ret |= btrfs_start_workers(&fs_info->endio_workers); | 2414 | ret |= btrfs_start_workers(&fs_info->endio_workers); |
2368 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); | 2415 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); |
2416 | ret |= btrfs_start_workers(&fs_info->rmw_workers); | ||
2417 | ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); | ||
2369 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); | 2418 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); |
2370 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); | 2419 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); |
2371 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | 2420 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); |
@@ -2390,8 +2439,7 @@ int open_ctree(struct super_block *sb, | |||
2390 | sb->s_blocksize = sectorsize; | 2439 | sb->s_blocksize = sectorsize; |
2391 | sb->s_blocksize_bits = blksize_bits(sectorsize); | 2440 | sb->s_blocksize_bits = blksize_bits(sectorsize); |
2392 | 2441 | ||
2393 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | 2442 | if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { |
2394 | sizeof(disk_super->magic))) { | ||
2395 | printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); | 2443 | printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); |
2396 | goto fail_sb_buffer; | 2444 | goto fail_sb_buffer; |
2397 | } | 2445 | } |
@@ -2694,13 +2742,13 @@ fail_cleaner: | |||
2694 | * kthreads | 2742 | * kthreads |
2695 | */ | 2743 | */ |
2696 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | 2744 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); |
2697 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
2698 | 2745 | ||
2699 | fail_block_groups: | 2746 | fail_block_groups: |
2700 | btrfs_free_block_groups(fs_info); | 2747 | btrfs_free_block_groups(fs_info); |
2701 | 2748 | ||
2702 | fail_tree_roots: | 2749 | fail_tree_roots: |
2703 | free_root_pointers(fs_info, 1); | 2750 | free_root_pointers(fs_info, 1); |
2751 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
2704 | 2752 | ||
2705 | fail_sb_buffer: | 2753 | fail_sb_buffer: |
2706 | btrfs_stop_workers(&fs_info->generic_worker); | 2754 | btrfs_stop_workers(&fs_info->generic_worker); |
@@ -2710,6 +2758,8 @@ fail_sb_buffer: | |||
2710 | btrfs_stop_workers(&fs_info->workers); | 2758 | btrfs_stop_workers(&fs_info->workers); |
2711 | btrfs_stop_workers(&fs_info->endio_workers); | 2759 | btrfs_stop_workers(&fs_info->endio_workers); |
2712 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2760 | btrfs_stop_workers(&fs_info->endio_meta_workers); |
2761 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | ||
2762 | btrfs_stop_workers(&fs_info->rmw_workers); | ||
2713 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2763 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2714 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2764 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2715 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2765 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
@@ -2721,13 +2771,17 @@ fail_alloc: | |||
2721 | fail_iput: | 2771 | fail_iput: |
2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2772 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2723 | 2773 | ||
2724 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
2725 | iput(fs_info->btree_inode); | 2774 | iput(fs_info->btree_inode); |
2775 | fail_delalloc_bytes: | ||
2776 | percpu_counter_destroy(&fs_info->delalloc_bytes); | ||
2777 | fail_dirty_metadata_bytes: | ||
2778 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | ||
2726 | fail_bdi: | 2779 | fail_bdi: |
2727 | bdi_destroy(&fs_info->bdi); | 2780 | bdi_destroy(&fs_info->bdi); |
2728 | fail_srcu: | 2781 | fail_srcu: |
2729 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2782 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2730 | fail: | 2783 | fail: |
2784 | btrfs_free_stripe_hash_table(fs_info); | ||
2731 | btrfs_close_devices(fs_info->fs_devices); | 2785 | btrfs_close_devices(fs_info->fs_devices); |
2732 | return err; | 2786 | return err; |
2733 | 2787 | ||
@@ -2795,8 +2849,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) | |||
2795 | 2849 | ||
2796 | super = (struct btrfs_super_block *)bh->b_data; | 2850 | super = (struct btrfs_super_block *)bh->b_data; |
2797 | if (btrfs_super_bytenr(super) != bytenr || | 2851 | if (btrfs_super_bytenr(super) != bytenr || |
2798 | strncmp((char *)(&super->magic), BTRFS_MAGIC, | 2852 | super->magic != cpu_to_le64(BTRFS_MAGIC)) { |
2799 | sizeof(super->magic))) { | ||
2800 | brelse(bh); | 2853 | brelse(bh); |
2801 | continue; | 2854 | continue; |
2802 | } | 2855 | } |
@@ -3076,11 +3129,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( | |||
3076 | ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) | 3129 | ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) |
3077 | == 0))) | 3130 | == 0))) |
3078 | num_tolerated_disk_barrier_failures = 0; | 3131 | num_tolerated_disk_barrier_failures = 0; |
3079 | else if (num_tolerated_disk_barrier_failures > 1 | 3132 | else if (num_tolerated_disk_barrier_failures > 1) { |
3080 | && | 3133 | if (flags & (BTRFS_BLOCK_GROUP_RAID1 | |
3081 | (flags & (BTRFS_BLOCK_GROUP_RAID1 | | 3134 | BTRFS_BLOCK_GROUP_RAID5 | |
3082 | BTRFS_BLOCK_GROUP_RAID10))) | 3135 | BTRFS_BLOCK_GROUP_RAID10)) { |
3083 | num_tolerated_disk_barrier_failures = 1; | 3136 | num_tolerated_disk_barrier_failures = 1; |
3137 | } else if (flags & | ||
3138 | BTRFS_BLOCK_GROUP_RAID5) { | ||
3139 | num_tolerated_disk_barrier_failures = 2; | ||
3140 | } | ||
3141 | } | ||
3084 | } | 3142 | } |
3085 | } | 3143 | } |
3086 | up_read(&sinfo->groups_sem); | 3144 | up_read(&sinfo->groups_sem); |
@@ -3195,6 +3253,11 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | |||
3195 | if (btrfs_root_refs(&root->root_item) == 0) | 3253 | if (btrfs_root_refs(&root->root_item) == 0) |
3196 | synchronize_srcu(&fs_info->subvol_srcu); | 3254 | synchronize_srcu(&fs_info->subvol_srcu); |
3197 | 3255 | ||
3256 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | ||
3257 | btrfs_free_log(NULL, root); | ||
3258 | btrfs_free_log_root_tree(NULL, fs_info); | ||
3259 | } | ||
3260 | |||
3198 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | 3261 | __btrfs_remove_free_space_cache(root->free_ino_pinned); |
3199 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | 3262 | __btrfs_remove_free_space_cache(root->free_ino_ctl); |
3200 | free_fs_root(root); | 3263 | free_fs_root(root); |
@@ -3339,7 +3402,7 @@ int close_ctree(struct btrfs_root *root) | |||
3339 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 3402 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
3340 | } | 3403 | } |
3341 | 3404 | ||
3342 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 3405 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
3343 | btrfs_error_commit_super(root); | 3406 | btrfs_error_commit_super(root); |
3344 | 3407 | ||
3345 | btrfs_put_block_group_cache(fs_info); | 3408 | btrfs_put_block_group_cache(fs_info); |
@@ -3352,9 +3415,9 @@ int close_ctree(struct btrfs_root *root) | |||
3352 | 3415 | ||
3353 | btrfs_free_qgroup_config(root->fs_info); | 3416 | btrfs_free_qgroup_config(root->fs_info); |
3354 | 3417 | ||
3355 | if (fs_info->delalloc_bytes) { | 3418 | if (percpu_counter_sum(&fs_info->delalloc_bytes)) { |
3356 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3419 | printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n", |
3357 | (unsigned long long)fs_info->delalloc_bytes); | 3420 | percpu_counter_sum(&fs_info->delalloc_bytes)); |
3358 | } | 3421 | } |
3359 | 3422 | ||
3360 | free_extent_buffer(fs_info->extent_root->node); | 3423 | free_extent_buffer(fs_info->extent_root->node); |
@@ -3384,6 +3447,8 @@ int close_ctree(struct btrfs_root *root) | |||
3384 | btrfs_stop_workers(&fs_info->workers); | 3447 | btrfs_stop_workers(&fs_info->workers); |
3385 | btrfs_stop_workers(&fs_info->endio_workers); | 3448 | btrfs_stop_workers(&fs_info->endio_workers); |
3386 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 3449 | btrfs_stop_workers(&fs_info->endio_meta_workers); |
3450 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | ||
3451 | btrfs_stop_workers(&fs_info->rmw_workers); | ||
3387 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 3452 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
3388 | btrfs_stop_workers(&fs_info->endio_write_workers); | 3453 | btrfs_stop_workers(&fs_info->endio_write_workers); |
3389 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 3454 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
@@ -3401,9 +3466,13 @@ int close_ctree(struct btrfs_root *root) | |||
3401 | btrfs_close_devices(fs_info->fs_devices); | 3466 | btrfs_close_devices(fs_info->fs_devices); |
3402 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 3467 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
3403 | 3468 | ||
3469 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | ||
3470 | percpu_counter_destroy(&fs_info->delalloc_bytes); | ||
3404 | bdi_destroy(&fs_info->bdi); | 3471 | bdi_destroy(&fs_info->bdi); |
3405 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3472 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
3406 | 3473 | ||
3474 | btrfs_free_stripe_hash_table(fs_info); | ||
3475 | |||
3407 | return 0; | 3476 | return 0; |
3408 | } | 3477 | } |
3409 | 3478 | ||
@@ -3443,11 +3512,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3443 | (unsigned long long)transid, | 3512 | (unsigned long long)transid, |
3444 | (unsigned long long)root->fs_info->generation); | 3513 | (unsigned long long)root->fs_info->generation); |
3445 | was_dirty = set_extent_buffer_dirty(buf); | 3514 | was_dirty = set_extent_buffer_dirty(buf); |
3446 | if (!was_dirty) { | 3515 | if (!was_dirty) |
3447 | spin_lock(&root->fs_info->delalloc_lock); | 3516 | __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, |
3448 | root->fs_info->dirty_metadata_bytes += buf->len; | 3517 | buf->len, |
3449 | spin_unlock(&root->fs_info->delalloc_lock); | 3518 | root->fs_info->dirty_metadata_batch); |
3450 | } | ||
3451 | } | 3519 | } |
3452 | 3520 | ||
3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | 3521 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
@@ -3457,8 +3525,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | |||
3457 | * looks as though older kernels can get into trouble with | 3525 | * looks as though older kernels can get into trouble with |
3458 | * this code, they end up stuck in balance_dirty_pages forever | 3526 | * this code, they end up stuck in balance_dirty_pages forever |
3459 | */ | 3527 | */ |
3460 | u64 num_dirty; | 3528 | int ret; |
3461 | unsigned long thresh = 32 * 1024 * 1024; | ||
3462 | 3529 | ||
3463 | if (current->flags & PF_MEMALLOC) | 3530 | if (current->flags & PF_MEMALLOC) |
3464 | return; | 3531 | return; |
@@ -3466,9 +3533,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | |||
3466 | if (flush_delayed) | 3533 | if (flush_delayed) |
3467 | btrfs_balance_delayed_items(root); | 3534 | btrfs_balance_delayed_items(root); |
3468 | 3535 | ||
3469 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3536 | ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes, |
3470 | 3537 | BTRFS_DIRTY_METADATA_THRESH); | |
3471 | if (num_dirty > thresh) { | 3538 | if (ret > 0) { |
3472 | balance_dirty_pages_ratelimited( | 3539 | balance_dirty_pages_ratelimited( |
3473 | root->fs_info->btree_inode->i_mapping); | 3540 | root->fs_info->btree_inode->i_mapping); |
3474 | } | 3541 | } |
@@ -3518,7 +3585,8 @@ void btrfs_error_commit_super(struct btrfs_root *root) | |||
3518 | btrfs_cleanup_transaction(root); | 3585 | btrfs_cleanup_transaction(root); |
3519 | } | 3586 | } |
3520 | 3587 | ||
3521 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | 3588 | static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, |
3589 | struct btrfs_root *root) | ||
3522 | { | 3590 | { |
3523 | struct btrfs_inode *btrfs_inode; | 3591 | struct btrfs_inode *btrfs_inode; |
3524 | struct list_head splice; | 3592 | struct list_head splice; |
@@ -3528,7 +3596,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | |||
3528 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 3596 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
3529 | spin_lock(&root->fs_info->ordered_extent_lock); | 3597 | spin_lock(&root->fs_info->ordered_extent_lock); |
3530 | 3598 | ||
3531 | list_splice_init(&root->fs_info->ordered_operations, &splice); | 3599 | list_splice_init(&t->ordered_operations, &splice); |
3532 | while (!list_empty(&splice)) { | 3600 | while (!list_empty(&splice)) { |
3533 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 3601 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
3534 | ordered_operations); | 3602 | ordered_operations); |
@@ -3544,35 +3612,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | |||
3544 | 3612 | ||
3545 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root) | 3613 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root) |
3546 | { | 3614 | { |
3547 | struct list_head splice; | ||
3548 | struct btrfs_ordered_extent *ordered; | 3615 | struct btrfs_ordered_extent *ordered; |
3549 | struct inode *inode; | ||
3550 | |||
3551 | INIT_LIST_HEAD(&splice); | ||
3552 | 3616 | ||
3553 | spin_lock(&root->fs_info->ordered_extent_lock); | 3617 | spin_lock(&root->fs_info->ordered_extent_lock); |
3554 | 3618 | /* | |
3555 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 3619 | * This will just short circuit the ordered completion stuff which will |
3556 | while (!list_empty(&splice)) { | 3620 | * make sure the ordered extent gets properly cleaned up. |
3557 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | 3621 | */ |
3558 | root_extent_list); | 3622 | list_for_each_entry(ordered, &root->fs_info->ordered_extents, |
3559 | 3623 | root_extent_list) | |
3560 | list_del_init(&ordered->root_extent_list); | 3624 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); |
3561 | atomic_inc(&ordered->refs); | ||
3562 | |||
3563 | /* the inode may be getting freed (in sys_unlink path). */ | ||
3564 | inode = igrab(ordered->inode); | ||
3565 | |||
3566 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
3567 | if (inode) | ||
3568 | iput(inode); | ||
3569 | |||
3570 | atomic_set(&ordered->refs, 1); | ||
3571 | btrfs_put_ordered_extent(ordered); | ||
3572 | |||
3573 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
3574 | } | ||
3575 | |||
3576 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3625 | spin_unlock(&root->fs_info->ordered_extent_lock); |
3577 | } | 3626 | } |
3578 | 3627 | ||
@@ -3594,11 +3643,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3594 | } | 3643 | } |
3595 | 3644 | ||
3596 | while ((node = rb_first(&delayed_refs->root)) != NULL) { | 3645 | while ((node = rb_first(&delayed_refs->root)) != NULL) { |
3597 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 3646 | struct btrfs_delayed_ref_head *head = NULL; |
3598 | 3647 | ||
3648 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
3599 | atomic_set(&ref->refs, 1); | 3649 | atomic_set(&ref->refs, 1); |
3600 | if (btrfs_delayed_ref_is_head(ref)) { | 3650 | if (btrfs_delayed_ref_is_head(ref)) { |
3601 | struct btrfs_delayed_ref_head *head; | ||
3602 | 3651 | ||
3603 | head = btrfs_delayed_node_to_head(ref); | 3652 | head = btrfs_delayed_node_to_head(ref); |
3604 | if (!mutex_trylock(&head->mutex)) { | 3653 | if (!mutex_trylock(&head->mutex)) { |
@@ -3614,16 +3663,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3614 | continue; | 3663 | continue; |
3615 | } | 3664 | } |
3616 | 3665 | ||
3617 | kfree(head->extent_op); | 3666 | btrfs_free_delayed_extent_op(head->extent_op); |
3618 | delayed_refs->num_heads--; | 3667 | delayed_refs->num_heads--; |
3619 | if (list_empty(&head->cluster)) | 3668 | if (list_empty(&head->cluster)) |
3620 | delayed_refs->num_heads_ready--; | 3669 | delayed_refs->num_heads_ready--; |
3621 | list_del_init(&head->cluster); | 3670 | list_del_init(&head->cluster); |
3622 | } | 3671 | } |
3672 | |||
3623 | ref->in_tree = 0; | 3673 | ref->in_tree = 0; |
3624 | rb_erase(&ref->rb_node, &delayed_refs->root); | 3674 | rb_erase(&ref->rb_node, &delayed_refs->root); |
3625 | delayed_refs->num_entries--; | 3675 | delayed_refs->num_entries--; |
3626 | 3676 | if (head) | |
3677 | mutex_unlock(&head->mutex); | ||
3627 | spin_unlock(&delayed_refs->lock); | 3678 | spin_unlock(&delayed_refs->lock); |
3628 | btrfs_put_delayed_ref(ref); | 3679 | btrfs_put_delayed_ref(ref); |
3629 | 3680 | ||
@@ -3671,6 +3722,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | |||
3671 | delalloc_inodes); | 3722 | delalloc_inodes); |
3672 | 3723 | ||
3673 | list_del_init(&btrfs_inode->delalloc_inodes); | 3724 | list_del_init(&btrfs_inode->delalloc_inodes); |
3725 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
3726 | &btrfs_inode->runtime_flags); | ||
3674 | 3727 | ||
3675 | btrfs_invalidate_inodes(btrfs_inode->root); | 3728 | btrfs_invalidate_inodes(btrfs_inode->root); |
3676 | } | 3729 | } |
@@ -3823,10 +3876,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3823 | 3876 | ||
3824 | while (!list_empty(&list)) { | 3877 | while (!list_empty(&list)) { |
3825 | t = list_entry(list.next, struct btrfs_transaction, list); | 3878 | t = list_entry(list.next, struct btrfs_transaction, list); |
3826 | if (!t) | ||
3827 | break; | ||
3828 | 3879 | ||
3829 | btrfs_destroy_ordered_operations(root); | 3880 | btrfs_destroy_ordered_operations(t, root); |
3830 | 3881 | ||
3831 | btrfs_destroy_ordered_extents(root); | 3882 | btrfs_destroy_ordered_extents(root); |
3832 | 3883 | ||