aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c227
1 files changed, 139 insertions, 88 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8f652dc940b..02369a3c162e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -46,6 +46,7 @@
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h" 47#include "rcu-string.h"
48#include "dev-replace.h" 48#include "dev-replace.h"
49#include "raid56.h"
49 50
50#ifdef CONFIG_X86 51#ifdef CONFIG_X86
51#include <asm/cpufeature.h> 52#include <asm/cpufeature.h>
@@ -56,7 +57,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
56static void free_fs_root(struct btrfs_root *root); 57static void free_fs_root(struct btrfs_root *root);
57static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 58static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
58 int read_only); 59 int read_only);
59static void btrfs_destroy_ordered_operations(struct btrfs_root *root); 60static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
61 struct btrfs_root *root);
60static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 62static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
61static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 63static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
62 struct btrfs_root *root); 64 struct btrfs_root *root);
@@ -420,7 +422,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
420static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 422static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
421{ 423{
422 struct extent_io_tree *tree; 424 struct extent_io_tree *tree;
423 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 425 u64 start = page_offset(page);
424 u64 found_start; 426 u64 found_start;
425 struct extent_buffer *eb; 427 struct extent_buffer *eb;
426 428
@@ -639,8 +641,15 @@ err:
639 btree_readahead_hook(root, eb, eb->start, ret); 641 btree_readahead_hook(root, eb, eb->start, ret);
640 } 642 }
641 643
642 if (ret) 644 if (ret) {
645 /*
646 * our io error hook is going to dec the io pages
647 * again, we have to make sure it has something
648 * to decrement
649 */
650 atomic_inc(&eb->io_pages);
643 clear_extent_buffer_uptodate(eb); 651 clear_extent_buffer_uptodate(eb);
652 }
644 free_extent_buffer(eb); 653 free_extent_buffer(eb);
645out: 654out:
646 return ret; 655 return ret;
@@ -654,6 +663,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
654 eb = (struct extent_buffer *)page->private; 663 eb = (struct extent_buffer *)page->private;
655 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 664 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
656 eb->read_mirror = failed_mirror; 665 eb->read_mirror = failed_mirror;
666 atomic_dec(&eb->io_pages);
657 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 667 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
658 btree_readahead_hook(root, eb, eb->start, -EIO); 668 btree_readahead_hook(root, eb, eb->start, -EIO);
659 return -EIO; /* we fixed nothing */ 669 return -EIO; /* we fixed nothing */
@@ -670,17 +680,23 @@ static void end_workqueue_bio(struct bio *bio, int err)
670 end_io_wq->work.flags = 0; 680 end_io_wq->work.flags = 0;
671 681
672 if (bio->bi_rw & REQ_WRITE) { 682 if (bio->bi_rw & REQ_WRITE) {
673 if (end_io_wq->metadata == 1) 683 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
674 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 684 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
675 &end_io_wq->work); 685 &end_io_wq->work);
676 else if (end_io_wq->metadata == 2) 686 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
677 btrfs_queue_worker(&fs_info->endio_freespace_worker, 687 btrfs_queue_worker(&fs_info->endio_freespace_worker,
678 &end_io_wq->work); 688 &end_io_wq->work);
689 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
690 btrfs_queue_worker(&fs_info->endio_raid56_workers,
691 &end_io_wq->work);
679 else 692 else
680 btrfs_queue_worker(&fs_info->endio_write_workers, 693 btrfs_queue_worker(&fs_info->endio_write_workers,
681 &end_io_wq->work); 694 &end_io_wq->work);
682 } else { 695 } else {
683 if (end_io_wq->metadata) 696 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
697 btrfs_queue_worker(&fs_info->endio_raid56_workers,
698 &end_io_wq->work);
699 else if (end_io_wq->metadata)
684 btrfs_queue_worker(&fs_info->endio_meta_workers, 700 btrfs_queue_worker(&fs_info->endio_meta_workers,
685 &end_io_wq->work); 701 &end_io_wq->work);
686 else 702 else
@@ -695,6 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
695 * 0 - if data 711 * 0 - if data
696 * 1 - if normal metadta 712 * 1 - if normal metadta
697 * 2 - if writing to the free space cache area 713 * 2 - if writing to the free space cache area
714 * 3 - raid parity work
698 */ 715 */
699int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 716int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
700 int metadata) 717 int metadata)
@@ -946,18 +963,20 @@ static int btree_writepages(struct address_space *mapping,
946 struct writeback_control *wbc) 963 struct writeback_control *wbc)
947{ 964{
948 struct extent_io_tree *tree; 965 struct extent_io_tree *tree;
966 struct btrfs_fs_info *fs_info;
967 int ret;
968
949 tree = &BTRFS_I(mapping->host)->io_tree; 969 tree = &BTRFS_I(mapping->host)->io_tree;
950 if (wbc->sync_mode == WB_SYNC_NONE) { 970 if (wbc->sync_mode == WB_SYNC_NONE) {
951 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
952 u64 num_dirty;
953 unsigned long thresh = 32 * 1024 * 1024;
954 971
955 if (wbc->for_kupdate) 972 if (wbc->for_kupdate)
956 return 0; 973 return 0;
957 974
975 fs_info = BTRFS_I(mapping->host)->root->fs_info;
958 /* this is a bit racy, but that's ok */ 976 /* this is a bit racy, but that's ok */
959 num_dirty = root->fs_info->dirty_metadata_bytes; 977 ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
960 if (num_dirty < thresh) 978 BTRFS_DIRTY_METADATA_THRESH);
979 if (ret < 0)
961 return 0; 980 return 0;
962 } 981 }
963 return btree_write_cache_pages(mapping, wbc); 982 return btree_write_cache_pages(mapping, wbc);
@@ -1125,24 +1144,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1125void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1144void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1126 struct extent_buffer *buf) 1145 struct extent_buffer *buf)
1127{ 1146{
1147 struct btrfs_fs_info *fs_info = root->fs_info;
1148
1128 if (btrfs_header_generation(buf) == 1149 if (btrfs_header_generation(buf) ==
1129 root->fs_info->running_transaction->transid) { 1150 fs_info->running_transaction->transid) {
1130 btrfs_assert_tree_locked(buf); 1151 btrfs_assert_tree_locked(buf);
1131 1152
1132 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { 1153 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
1133 spin_lock(&root->fs_info->delalloc_lock); 1154 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
1134 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1155 -buf->len,
1135 root->fs_info->dirty_metadata_bytes -= buf->len; 1156 fs_info->dirty_metadata_batch);
1136 else {
1137 spin_unlock(&root->fs_info->delalloc_lock);
1138 btrfs_panic(root->fs_info, -EOVERFLOW,
1139 "Can't clear %lu bytes from "
1140 " dirty_mdatadata_bytes (%llu)",
1141 buf->len,
1142 root->fs_info->dirty_metadata_bytes);
1143 }
1144 spin_unlock(&root->fs_info->delalloc_lock);
1145
1146 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1157 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1147 btrfs_set_lock_blocking(buf); 1158 btrfs_set_lock_blocking(buf);
1148 clear_extent_buffer_dirty(buf); 1159 clear_extent_buffer_dirty(buf);
@@ -1178,9 +1189,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1178 1189
1179 INIT_LIST_HEAD(&root->dirty_list); 1190 INIT_LIST_HEAD(&root->dirty_list);
1180 INIT_LIST_HEAD(&root->root_list); 1191 INIT_LIST_HEAD(&root->root_list);
1192 INIT_LIST_HEAD(&root->logged_list[0]);
1193 INIT_LIST_HEAD(&root->logged_list[1]);
1181 spin_lock_init(&root->orphan_lock); 1194 spin_lock_init(&root->orphan_lock);
1182 spin_lock_init(&root->inode_lock); 1195 spin_lock_init(&root->inode_lock);
1183 spin_lock_init(&root->accounting_lock); 1196 spin_lock_init(&root->accounting_lock);
1197 spin_lock_init(&root->log_extents_lock[0]);
1198 spin_lock_init(&root->log_extents_lock[1]);
1184 mutex_init(&root->objectid_mutex); 1199 mutex_init(&root->objectid_mutex);
1185 mutex_init(&root->log_mutex); 1200 mutex_init(&root->log_mutex);
1186 init_waitqueue_head(&root->log_writer_wait); 1201 init_waitqueue_head(&root->log_writer_wait);
@@ -2004,10 +2019,24 @@ int open_ctree(struct super_block *sb,
2004 goto fail_srcu; 2019 goto fail_srcu;
2005 } 2020 }
2006 2021
2022 ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
2023 if (ret) {
2024 err = ret;
2025 goto fail_bdi;
2026 }
2027 fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
2028 (1 + ilog2(nr_cpu_ids));
2029
2030 ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
2031 if (ret) {
2032 err = ret;
2033 goto fail_dirty_metadata_bytes;
2034 }
2035
2007 fs_info->btree_inode = new_inode(sb); 2036 fs_info->btree_inode = new_inode(sb);
2008 if (!fs_info->btree_inode) { 2037 if (!fs_info->btree_inode) {
2009 err = -ENOMEM; 2038 err = -ENOMEM;
2010 goto fail_bdi; 2039 goto fail_delalloc_bytes;
2011 } 2040 }
2012 2041
2013 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 2042 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2017,7 +2046,6 @@ int open_ctree(struct super_block *sb,
2017 INIT_LIST_HEAD(&fs_info->dead_roots); 2046 INIT_LIST_HEAD(&fs_info->dead_roots);
2018 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2047 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2019 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 2048 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
2020 INIT_LIST_HEAD(&fs_info->ordered_operations);
2021 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2049 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2022 spin_lock_init(&fs_info->delalloc_lock); 2050 spin_lock_init(&fs_info->delalloc_lock);
2023 spin_lock_init(&fs_info->trans_lock); 2051 spin_lock_init(&fs_info->trans_lock);
@@ -2028,6 +2056,7 @@ int open_ctree(struct super_block *sb,
2028 spin_lock_init(&fs_info->tree_mod_seq_lock); 2056 spin_lock_init(&fs_info->tree_mod_seq_lock);
2029 rwlock_init(&fs_info->tree_mod_log_lock); 2057 rwlock_init(&fs_info->tree_mod_log_lock);
2030 mutex_init(&fs_info->reloc_mutex); 2058 mutex_init(&fs_info->reloc_mutex);
2059 seqlock_init(&fs_info->profiles_lock);
2031 2060
2032 init_completion(&fs_info->kobj_unregister); 2061 init_completion(&fs_info->kobj_unregister);
2033 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 2062 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2126,6 +2155,7 @@ int open_ctree(struct super_block *sb,
2126 2155
2127 spin_lock_init(&fs_info->block_group_cache_lock); 2156 spin_lock_init(&fs_info->block_group_cache_lock);
2128 fs_info->block_group_cache_tree = RB_ROOT; 2157 fs_info->block_group_cache_tree = RB_ROOT;
2158 fs_info->first_logical_byte = (u64)-1;
2129 2159
2130 extent_io_tree_init(&fs_info->freed_extents[0], 2160 extent_io_tree_init(&fs_info->freed_extents[0],
2131 fs_info->btree_inode->i_mapping); 2161 fs_info->btree_inode->i_mapping);
@@ -2165,6 +2195,12 @@ int open_ctree(struct super_block *sb,
2165 init_waitqueue_head(&fs_info->transaction_blocked_wait); 2195 init_waitqueue_head(&fs_info->transaction_blocked_wait);
2166 init_waitqueue_head(&fs_info->async_submit_wait); 2196 init_waitqueue_head(&fs_info->async_submit_wait);
2167 2197
2198 ret = btrfs_alloc_stripe_hash_table(fs_info);
2199 if (ret) {
2200 err = ret;
2201 goto fail_alloc;
2202 }
2203
2168 __setup_root(4096, 4096, 4096, 4096, tree_root, 2204 __setup_root(4096, 4096, 4096, 4096, tree_root,
2169 fs_info, BTRFS_ROOT_TREE_OBJECTID); 2205 fs_info, BTRFS_ROOT_TREE_OBJECTID);
2170 2206
@@ -2187,7 +2223,8 @@ int open_ctree(struct super_block *sb,
2187 goto fail_alloc; 2223 goto fail_alloc;
2188 2224
2189 /* check FS state, whether FS is broken. */ 2225 /* check FS state, whether FS is broken. */
2190 fs_info->fs_state |= btrfs_super_flags(disk_super); 2226 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
2227 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
2191 2228
2192 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2229 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2193 if (ret) { 2230 if (ret) {
@@ -2261,6 +2298,8 @@ int open_ctree(struct super_block *sb,
2261 leafsize = btrfs_super_leafsize(disk_super); 2298 leafsize = btrfs_super_leafsize(disk_super);
2262 sectorsize = btrfs_super_sectorsize(disk_super); 2299 sectorsize = btrfs_super_sectorsize(disk_super);
2263 stripesize = btrfs_super_stripesize(disk_super); 2300 stripesize = btrfs_super_stripesize(disk_super);
2301 fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
2302 fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
2264 2303
2265 /* 2304 /*
2266 * mixed block groups end up with duplicate but slightly offset 2305 * mixed block groups end up with duplicate but slightly offset
@@ -2332,6 +2371,12 @@ int open_ctree(struct super_block *sb,
2332 btrfs_init_workers(&fs_info->endio_meta_write_workers, 2371 btrfs_init_workers(&fs_info->endio_meta_write_workers,
2333 "endio-meta-write", fs_info->thread_pool_size, 2372 "endio-meta-write", fs_info->thread_pool_size,
2334 &fs_info->generic_worker); 2373 &fs_info->generic_worker);
2374 btrfs_init_workers(&fs_info->endio_raid56_workers,
2375 "endio-raid56", fs_info->thread_pool_size,
2376 &fs_info->generic_worker);
2377 btrfs_init_workers(&fs_info->rmw_workers,
2378 "rmw", fs_info->thread_pool_size,
2379 &fs_info->generic_worker);
2335 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 2380 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
2336 fs_info->thread_pool_size, 2381 fs_info->thread_pool_size,
2337 &fs_info->generic_worker); 2382 &fs_info->generic_worker);
@@ -2350,6 +2395,8 @@ int open_ctree(struct super_block *sb,
2350 */ 2395 */
2351 fs_info->endio_workers.idle_thresh = 4; 2396 fs_info->endio_workers.idle_thresh = 4;
2352 fs_info->endio_meta_workers.idle_thresh = 4; 2397 fs_info->endio_meta_workers.idle_thresh = 4;
2398 fs_info->endio_raid56_workers.idle_thresh = 4;
2399 fs_info->rmw_workers.idle_thresh = 2;
2353 2400
2354 fs_info->endio_write_workers.idle_thresh = 2; 2401 fs_info->endio_write_workers.idle_thresh = 2;
2355 fs_info->endio_meta_write_workers.idle_thresh = 2; 2402 fs_info->endio_meta_write_workers.idle_thresh = 2;
@@ -2366,6 +2413,8 @@ int open_ctree(struct super_block *sb,
2366 ret |= btrfs_start_workers(&fs_info->fixup_workers); 2413 ret |= btrfs_start_workers(&fs_info->fixup_workers);
2367 ret |= btrfs_start_workers(&fs_info->endio_workers); 2414 ret |= btrfs_start_workers(&fs_info->endio_workers);
2368 ret |= btrfs_start_workers(&fs_info->endio_meta_workers); 2415 ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2416 ret |= btrfs_start_workers(&fs_info->rmw_workers);
2417 ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
2369 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); 2418 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
2370 ret |= btrfs_start_workers(&fs_info->endio_write_workers); 2419 ret |= btrfs_start_workers(&fs_info->endio_write_workers);
2371 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); 2420 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
@@ -2390,8 +2439,7 @@ int open_ctree(struct super_block *sb,
2390 sb->s_blocksize = sectorsize; 2439 sb->s_blocksize = sectorsize;
2391 sb->s_blocksize_bits = blksize_bits(sectorsize); 2440 sb->s_blocksize_bits = blksize_bits(sectorsize);
2392 2441
2393 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 2442 if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
2394 sizeof(disk_super->magic))) {
2395 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2443 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
2396 goto fail_sb_buffer; 2444 goto fail_sb_buffer;
2397 } 2445 }
@@ -2694,13 +2742,13 @@ fail_cleaner:
2694 * kthreads 2742 * kthreads
2695 */ 2743 */
2696 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2744 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2697 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2698 2745
2699fail_block_groups: 2746fail_block_groups:
2700 btrfs_free_block_groups(fs_info); 2747 btrfs_free_block_groups(fs_info);
2701 2748
2702fail_tree_roots: 2749fail_tree_roots:
2703 free_root_pointers(fs_info, 1); 2750 free_root_pointers(fs_info, 1);
2751 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2704 2752
2705fail_sb_buffer: 2753fail_sb_buffer:
2706 btrfs_stop_workers(&fs_info->generic_worker); 2754 btrfs_stop_workers(&fs_info->generic_worker);
@@ -2710,6 +2758,8 @@ fail_sb_buffer:
2710 btrfs_stop_workers(&fs_info->workers); 2758 btrfs_stop_workers(&fs_info->workers);
2711 btrfs_stop_workers(&fs_info->endio_workers); 2759 btrfs_stop_workers(&fs_info->endio_workers);
2712 btrfs_stop_workers(&fs_info->endio_meta_workers); 2760 btrfs_stop_workers(&fs_info->endio_meta_workers);
2761 btrfs_stop_workers(&fs_info->endio_raid56_workers);
2762 btrfs_stop_workers(&fs_info->rmw_workers);
2713 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2763 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2714 btrfs_stop_workers(&fs_info->endio_write_workers); 2764 btrfs_stop_workers(&fs_info->endio_write_workers);
2715 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2765 btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -2721,13 +2771,17 @@ fail_alloc:
2721fail_iput: 2771fail_iput:
2722 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2772 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2723 2773
2724 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2725 iput(fs_info->btree_inode); 2774 iput(fs_info->btree_inode);
2775fail_delalloc_bytes:
2776 percpu_counter_destroy(&fs_info->delalloc_bytes);
2777fail_dirty_metadata_bytes:
2778 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
2726fail_bdi: 2779fail_bdi:
2727 bdi_destroy(&fs_info->bdi); 2780 bdi_destroy(&fs_info->bdi);
2728fail_srcu: 2781fail_srcu:
2729 cleanup_srcu_struct(&fs_info->subvol_srcu); 2782 cleanup_srcu_struct(&fs_info->subvol_srcu);
2730fail: 2783fail:
2784 btrfs_free_stripe_hash_table(fs_info);
2731 btrfs_close_devices(fs_info->fs_devices); 2785 btrfs_close_devices(fs_info->fs_devices);
2732 return err; 2786 return err;
2733 2787
@@ -2795,8 +2849,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2795 2849
2796 super = (struct btrfs_super_block *)bh->b_data; 2850 super = (struct btrfs_super_block *)bh->b_data;
2797 if (btrfs_super_bytenr(super) != bytenr || 2851 if (btrfs_super_bytenr(super) != bytenr ||
2798 strncmp((char *)(&super->magic), BTRFS_MAGIC, 2852 super->magic != cpu_to_le64(BTRFS_MAGIC)) {
2799 sizeof(super->magic))) {
2800 brelse(bh); 2853 brelse(bh);
2801 continue; 2854 continue;
2802 } 2855 }
@@ -3076,11 +3129,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
3076 ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) 3129 ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
3077 == 0))) 3130 == 0)))
3078 num_tolerated_disk_barrier_failures = 0; 3131 num_tolerated_disk_barrier_failures = 0;
3079 else if (num_tolerated_disk_barrier_failures > 1 3132 else if (num_tolerated_disk_barrier_failures > 1) {
3080 && 3133 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3081 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 3134 BTRFS_BLOCK_GROUP_RAID5 |
3082 BTRFS_BLOCK_GROUP_RAID10))) 3135 BTRFS_BLOCK_GROUP_RAID10)) {
3083 num_tolerated_disk_barrier_failures = 1; 3136 num_tolerated_disk_barrier_failures = 1;
3137 } else if (flags &
3138 BTRFS_BLOCK_GROUP_RAID5) {
3139 num_tolerated_disk_barrier_failures = 2;
3140 }
3141 }
3084 } 3142 }
3085 } 3143 }
3086 up_read(&sinfo->groups_sem); 3144 up_read(&sinfo->groups_sem);
@@ -3195,6 +3253,11 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3195 if (btrfs_root_refs(&root->root_item) == 0) 3253 if (btrfs_root_refs(&root->root_item) == 0)
3196 synchronize_srcu(&fs_info->subvol_srcu); 3254 synchronize_srcu(&fs_info->subvol_srcu);
3197 3255
3256 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3257 btrfs_free_log(NULL, root);
3258 btrfs_free_log_root_tree(NULL, fs_info);
3259 }
3260
3198 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3261 __btrfs_remove_free_space_cache(root->free_ino_pinned);
3199 __btrfs_remove_free_space_cache(root->free_ino_ctl); 3262 __btrfs_remove_free_space_cache(root->free_ino_ctl);
3200 free_fs_root(root); 3263 free_fs_root(root);
@@ -3339,7 +3402,7 @@ int close_ctree(struct btrfs_root *root)
3339 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3402 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3340 } 3403 }
3341 3404
3342 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 3405 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3343 btrfs_error_commit_super(root); 3406 btrfs_error_commit_super(root);
3344 3407
3345 btrfs_put_block_group_cache(fs_info); 3408 btrfs_put_block_group_cache(fs_info);
@@ -3352,9 +3415,9 @@ int close_ctree(struct btrfs_root *root)
3352 3415
3353 btrfs_free_qgroup_config(root->fs_info); 3416 btrfs_free_qgroup_config(root->fs_info);
3354 3417
3355 if (fs_info->delalloc_bytes) { 3418 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3356 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 3419 printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
3357 (unsigned long long)fs_info->delalloc_bytes); 3420 percpu_counter_sum(&fs_info->delalloc_bytes));
3358 } 3421 }
3359 3422
3360 free_extent_buffer(fs_info->extent_root->node); 3423 free_extent_buffer(fs_info->extent_root->node);
@@ -3384,6 +3447,8 @@ int close_ctree(struct btrfs_root *root)
3384 btrfs_stop_workers(&fs_info->workers); 3447 btrfs_stop_workers(&fs_info->workers);
3385 btrfs_stop_workers(&fs_info->endio_workers); 3448 btrfs_stop_workers(&fs_info->endio_workers);
3386 btrfs_stop_workers(&fs_info->endio_meta_workers); 3449 btrfs_stop_workers(&fs_info->endio_meta_workers);
3450 btrfs_stop_workers(&fs_info->endio_raid56_workers);
3451 btrfs_stop_workers(&fs_info->rmw_workers);
3387 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 3452 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
3388 btrfs_stop_workers(&fs_info->endio_write_workers); 3453 btrfs_stop_workers(&fs_info->endio_write_workers);
3389 btrfs_stop_workers(&fs_info->endio_freespace_worker); 3454 btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -3401,9 +3466,13 @@ int close_ctree(struct btrfs_root *root)
3401 btrfs_close_devices(fs_info->fs_devices); 3466 btrfs_close_devices(fs_info->fs_devices);
3402 btrfs_mapping_tree_free(&fs_info->mapping_tree); 3467 btrfs_mapping_tree_free(&fs_info->mapping_tree);
3403 3468
3469 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
3470 percpu_counter_destroy(&fs_info->delalloc_bytes);
3404 bdi_destroy(&fs_info->bdi); 3471 bdi_destroy(&fs_info->bdi);
3405 cleanup_srcu_struct(&fs_info->subvol_srcu); 3472 cleanup_srcu_struct(&fs_info->subvol_srcu);
3406 3473
3474 btrfs_free_stripe_hash_table(fs_info);
3475
3407 return 0; 3476 return 0;
3408} 3477}
3409 3478
@@ -3443,11 +3512,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3443 (unsigned long long)transid, 3512 (unsigned long long)transid,
3444 (unsigned long long)root->fs_info->generation); 3513 (unsigned long long)root->fs_info->generation);
3445 was_dirty = set_extent_buffer_dirty(buf); 3514 was_dirty = set_extent_buffer_dirty(buf);
3446 if (!was_dirty) { 3515 if (!was_dirty)
3447 spin_lock(&root->fs_info->delalloc_lock); 3516 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
3448 root->fs_info->dirty_metadata_bytes += buf->len; 3517 buf->len,
3449 spin_unlock(&root->fs_info->delalloc_lock); 3518 root->fs_info->dirty_metadata_batch);
3450 }
3451} 3519}
3452 3520
3453static void __btrfs_btree_balance_dirty(struct btrfs_root *root, 3521static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3457,8 +3525,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3457 * looks as though older kernels can get into trouble with 3525 * looks as though older kernels can get into trouble with
3458 * this code, they end up stuck in balance_dirty_pages forever 3526 * this code, they end up stuck in balance_dirty_pages forever
3459 */ 3527 */
3460 u64 num_dirty; 3528 int ret;
3461 unsigned long thresh = 32 * 1024 * 1024;
3462 3529
3463 if (current->flags & PF_MEMALLOC) 3530 if (current->flags & PF_MEMALLOC)
3464 return; 3531 return;
@@ -3466,9 +3533,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3466 if (flush_delayed) 3533 if (flush_delayed)
3467 btrfs_balance_delayed_items(root); 3534 btrfs_balance_delayed_items(root);
3468 3535
3469 num_dirty = root->fs_info->dirty_metadata_bytes; 3536 ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
3470 3537 BTRFS_DIRTY_METADATA_THRESH);
3471 if (num_dirty > thresh) { 3538 if (ret > 0) {
3472 balance_dirty_pages_ratelimited( 3539 balance_dirty_pages_ratelimited(
3473 root->fs_info->btree_inode->i_mapping); 3540 root->fs_info->btree_inode->i_mapping);
3474 } 3541 }
@@ -3518,7 +3585,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
3518 btrfs_cleanup_transaction(root); 3585 btrfs_cleanup_transaction(root);
3519} 3586}
3520 3587
3521static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3588static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3589 struct btrfs_root *root)
3522{ 3590{
3523 struct btrfs_inode *btrfs_inode; 3591 struct btrfs_inode *btrfs_inode;
3524 struct list_head splice; 3592 struct list_head splice;
@@ -3528,7 +3596,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3528 mutex_lock(&root->fs_info->ordered_operations_mutex); 3596 mutex_lock(&root->fs_info->ordered_operations_mutex);
3529 spin_lock(&root->fs_info->ordered_extent_lock); 3597 spin_lock(&root->fs_info->ordered_extent_lock);
3530 3598
3531 list_splice_init(&root->fs_info->ordered_operations, &splice); 3599 list_splice_init(&t->ordered_operations, &splice);
3532 while (!list_empty(&splice)) { 3600 while (!list_empty(&splice)) {
3533 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 3601 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3534 ordered_operations); 3602 ordered_operations);
@@ -3544,35 +3612,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3544 3612
3545static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3613static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3546{ 3614{
3547 struct list_head splice;
3548 struct btrfs_ordered_extent *ordered; 3615 struct btrfs_ordered_extent *ordered;
3549 struct inode *inode;
3550
3551 INIT_LIST_HEAD(&splice);
3552 3616
3553 spin_lock(&root->fs_info->ordered_extent_lock); 3617 spin_lock(&root->fs_info->ordered_extent_lock);
3554 3618 /*
3555 list_splice_init(&root->fs_info->ordered_extents, &splice); 3619 * This will just short circuit the ordered completion stuff which will
3556 while (!list_empty(&splice)) { 3620 * make sure the ordered extent gets properly cleaned up.
3557 ordered = list_entry(splice.next, struct btrfs_ordered_extent, 3621 */
3558 root_extent_list); 3622 list_for_each_entry(ordered, &root->fs_info->ordered_extents,
3559 3623 root_extent_list)
3560 list_del_init(&ordered->root_extent_list); 3624 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
3561 atomic_inc(&ordered->refs);
3562
3563 /* the inode may be getting freed (in sys_unlink path). */
3564 inode = igrab(ordered->inode);
3565
3566 spin_unlock(&root->fs_info->ordered_extent_lock);
3567 if (inode)
3568 iput(inode);
3569
3570 atomic_set(&ordered->refs, 1);
3571 btrfs_put_ordered_extent(ordered);
3572
3573 spin_lock(&root->fs_info->ordered_extent_lock);
3574 }
3575
3576 spin_unlock(&root->fs_info->ordered_extent_lock); 3625 spin_unlock(&root->fs_info->ordered_extent_lock);
3577} 3626}
3578 3627
@@ -3594,11 +3643,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3594 } 3643 }
3595 3644
3596 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3645 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3597 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3646 struct btrfs_delayed_ref_head *head = NULL;
3598 3647
3648 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3599 atomic_set(&ref->refs, 1); 3649 atomic_set(&ref->refs, 1);
3600 if (btrfs_delayed_ref_is_head(ref)) { 3650 if (btrfs_delayed_ref_is_head(ref)) {
3601 struct btrfs_delayed_ref_head *head;
3602 3651
3603 head = btrfs_delayed_node_to_head(ref); 3652 head = btrfs_delayed_node_to_head(ref);
3604 if (!mutex_trylock(&head->mutex)) { 3653 if (!mutex_trylock(&head->mutex)) {
@@ -3614,16 +3663,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3614 continue; 3663 continue;
3615 } 3664 }
3616 3665
3617 kfree(head->extent_op); 3666 btrfs_free_delayed_extent_op(head->extent_op);
3618 delayed_refs->num_heads--; 3667 delayed_refs->num_heads--;
3619 if (list_empty(&head->cluster)) 3668 if (list_empty(&head->cluster))
3620 delayed_refs->num_heads_ready--; 3669 delayed_refs->num_heads_ready--;
3621 list_del_init(&head->cluster); 3670 list_del_init(&head->cluster);
3622 } 3671 }
3672
3623 ref->in_tree = 0; 3673 ref->in_tree = 0;
3624 rb_erase(&ref->rb_node, &delayed_refs->root); 3674 rb_erase(&ref->rb_node, &delayed_refs->root);
3625 delayed_refs->num_entries--; 3675 delayed_refs->num_entries--;
3626 3676 if (head)
3677 mutex_unlock(&head->mutex);
3627 spin_unlock(&delayed_refs->lock); 3678 spin_unlock(&delayed_refs->lock);
3628 btrfs_put_delayed_ref(ref); 3679 btrfs_put_delayed_ref(ref);
3629 3680
@@ -3671,6 +3722,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3671 delalloc_inodes); 3722 delalloc_inodes);
3672 3723
3673 list_del_init(&btrfs_inode->delalloc_inodes); 3724 list_del_init(&btrfs_inode->delalloc_inodes);
3725 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
3726 &btrfs_inode->runtime_flags);
3674 3727
3675 btrfs_invalidate_inodes(btrfs_inode->root); 3728 btrfs_invalidate_inodes(btrfs_inode->root);
3676 } 3729 }
@@ -3823,10 +3876,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3823 3876
3824 while (!list_empty(&list)) { 3877 while (!list_empty(&list)) {
3825 t = list_entry(list.next, struct btrfs_transaction, list); 3878 t = list_entry(list.next, struct btrfs_transaction, list);
3826 if (!t)
3827 break;
3828 3879
3829 btrfs_destroy_ordered_operations(root); 3880 btrfs_destroy_ordered_operations(t, root);
3830 3881
3831 btrfs_destroy_ordered_extents(root); 3882 btrfs_destroy_ordered_extents(root);
3832 3883