aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
commitb695188dd39162a1a6bff11fdbcc4c0b65b933ab (patch)
treea3df7c052d38b5bfaf335fbf3130abcc5c6ca577 /fs/btrfs/disk-io.c
parent48476df99894492a0f7239f2f3c9a2dde4ff38e2 (diff)
parent180e001cd5fc2950dc6a7997dde5b65c954d0e79 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "The biggest feature in the pull is the new (and still experimental) raid56 code that David Woodhouse started long ago. I'm still working on the parity logging setup that will avoid inconsistent parity after a crash, so this is only for testing right now. But, I'd really like to get it out to a broader audience to hammer out any performance issues or other problems. scrub does not yet correct errors on raid5/6 either. Josef has another pass at fsync performance. The big change here is to combine waiting for metadata with waiting for data, which is a big latency win. It is also step one toward using atomics from the hardware during a commit. Mark Fasheh has a new way to use btrfs send/receive to send only the metadata changes. SUSE is using this to make snapper more efficient at finding changes between snapshosts. Snapshot-aware defrag is also included. Otherwise we have a large number of fixes and cleanups. Eric Sandeen wins the award for removing the most lines, and I'm hoping we steal this idea from XFS over and over again." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) btrfs: fixup/remove module.h usage as required Btrfs: delete inline extents when we find them during logging btrfs: try harder to allocate raid56 stripe cache Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails Btrfs: fix missing deleted items in btrfs_clean_quota_tree btrfs: use only inline_pages from extent buffer Btrfs: fix wrong reserved space when deleting a snapshot/subvolume Btrfs: fix wrong reserved space in qgroup during snap/subv creation Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot btrfs: remove a printk from scan_one_device Btrfs: fix NULL pointer after aborting a transaction Btrfs: fix memory leak of log roots Btrfs: copy everything if we've created an inline extent btrfs: cleanup for open-coded alignment Btrfs: do not change inode flags in rename Btrfs: use reserved space for creating a snapshot clear chunk_alloc flag on retryable failure ...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c227
1 files changed, 139 insertions, 88 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8f652dc940b..02369a3c162e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -46,6 +46,7 @@
46#include "check-integrity.h" 46#include "check-integrity.h"
47#include "rcu-string.h" 47#include "rcu-string.h"
48#include "dev-replace.h" 48#include "dev-replace.h"
49#include "raid56.h"
49 50
50#ifdef CONFIG_X86 51#ifdef CONFIG_X86
51#include <asm/cpufeature.h> 52#include <asm/cpufeature.h>
@@ -56,7 +57,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
56static void free_fs_root(struct btrfs_root *root); 57static void free_fs_root(struct btrfs_root *root);
57static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 58static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
58 int read_only); 59 int read_only);
59static void btrfs_destroy_ordered_operations(struct btrfs_root *root); 60static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
61 struct btrfs_root *root);
60static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 62static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
61static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 63static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
62 struct btrfs_root *root); 64 struct btrfs_root *root);
@@ -420,7 +422,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
420static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 422static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
421{ 423{
422 struct extent_io_tree *tree; 424 struct extent_io_tree *tree;
423 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 425 u64 start = page_offset(page);
424 u64 found_start; 426 u64 found_start;
425 struct extent_buffer *eb; 427 struct extent_buffer *eb;
426 428
@@ -639,8 +641,15 @@ err:
639 btree_readahead_hook(root, eb, eb->start, ret); 641 btree_readahead_hook(root, eb, eb->start, ret);
640 } 642 }
641 643
642 if (ret) 644 if (ret) {
645 /*
646 * our io error hook is going to dec the io pages
647 * again, we have to make sure it has something
648 * to decrement
649 */
650 atomic_inc(&eb->io_pages);
643 clear_extent_buffer_uptodate(eb); 651 clear_extent_buffer_uptodate(eb);
652 }
644 free_extent_buffer(eb); 653 free_extent_buffer(eb);
645out: 654out:
646 return ret; 655 return ret;
@@ -654,6 +663,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
654 eb = (struct extent_buffer *)page->private; 663 eb = (struct extent_buffer *)page->private;
655 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 664 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
656 eb->read_mirror = failed_mirror; 665 eb->read_mirror = failed_mirror;
666 atomic_dec(&eb->io_pages);
657 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) 667 if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
658 btree_readahead_hook(root, eb, eb->start, -EIO); 668 btree_readahead_hook(root, eb, eb->start, -EIO);
659 return -EIO; /* we fixed nothing */ 669 return -EIO; /* we fixed nothing */
@@ -670,17 +680,23 @@ static void end_workqueue_bio(struct bio *bio, int err)
670 end_io_wq->work.flags = 0; 680 end_io_wq->work.flags = 0;
671 681
672 if (bio->bi_rw & REQ_WRITE) { 682 if (bio->bi_rw & REQ_WRITE) {
673 if (end_io_wq->metadata == 1) 683 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
674 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 684 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
675 &end_io_wq->work); 685 &end_io_wq->work);
676 else if (end_io_wq->metadata == 2) 686 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
677 btrfs_queue_worker(&fs_info->endio_freespace_worker, 687 btrfs_queue_worker(&fs_info->endio_freespace_worker,
678 &end_io_wq->work); 688 &end_io_wq->work);
689 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
690 btrfs_queue_worker(&fs_info->endio_raid56_workers,
691 &end_io_wq->work);
679 else 692 else
680 btrfs_queue_worker(&fs_info->endio_write_workers, 693 btrfs_queue_worker(&fs_info->endio_write_workers,
681 &end_io_wq->work); 694 &end_io_wq->work);
682 } else { 695 } else {
683 if (end_io_wq->metadata) 696 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
697 btrfs_queue_worker(&fs_info->endio_raid56_workers,
698 &end_io_wq->work);
699 else if (end_io_wq->metadata)
684 btrfs_queue_worker(&fs_info->endio_meta_workers, 700 btrfs_queue_worker(&fs_info->endio_meta_workers,
685 &end_io_wq->work); 701 &end_io_wq->work);
686 else 702 else
@@ -695,6 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
695 * 0 - if data 711 * 0 - if data
696 * 1 - if normal metadta 712 * 1 - if normal metadta
697 * 2 - if writing to the free space cache area 713 * 2 - if writing to the free space cache area
714 * 3 - raid parity work
698 */ 715 */
699int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 716int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
700 int metadata) 717 int metadata)
@@ -946,18 +963,20 @@ static int btree_writepages(struct address_space *mapping,
946 struct writeback_control *wbc) 963 struct writeback_control *wbc)
947{ 964{
948 struct extent_io_tree *tree; 965 struct extent_io_tree *tree;
966 struct btrfs_fs_info *fs_info;
967 int ret;
968
949 tree = &BTRFS_I(mapping->host)->io_tree; 969 tree = &BTRFS_I(mapping->host)->io_tree;
950 if (wbc->sync_mode == WB_SYNC_NONE) { 970 if (wbc->sync_mode == WB_SYNC_NONE) {
951 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
952 u64 num_dirty;
953 unsigned long thresh = 32 * 1024 * 1024;
954 971
955 if (wbc->for_kupdate) 972 if (wbc->for_kupdate)
956 return 0; 973 return 0;
957 974
975 fs_info = BTRFS_I(mapping->host)->root->fs_info;
958 /* this is a bit racy, but that's ok */ 976 /* this is a bit racy, but that's ok */
959 num_dirty = root->fs_info->dirty_metadata_bytes; 977 ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
960 if (num_dirty < thresh) 978 BTRFS_DIRTY_METADATA_THRESH);
979 if (ret < 0)
961 return 0; 980 return 0;
962 } 981 }
963 return btree_write_cache_pages(mapping, wbc); 982 return btree_write_cache_pages(mapping, wbc);
@@ -1125,24 +1144,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1125void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1144void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1126 struct extent_buffer *buf) 1145 struct extent_buffer *buf)
1127{ 1146{
1147 struct btrfs_fs_info *fs_info = root->fs_info;
1148
1128 if (btrfs_header_generation(buf) == 1149 if (btrfs_header_generation(buf) ==
1129 root->fs_info->running_transaction->transid) { 1150 fs_info->running_transaction->transid) {
1130 btrfs_assert_tree_locked(buf); 1151 btrfs_assert_tree_locked(buf);
1131 1152
1132 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { 1153 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
1133 spin_lock(&root->fs_info->delalloc_lock); 1154 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
1134 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1155 -buf->len,
1135 root->fs_info->dirty_metadata_bytes -= buf->len; 1156 fs_info->dirty_metadata_batch);
1136 else {
1137 spin_unlock(&root->fs_info->delalloc_lock);
1138 btrfs_panic(root->fs_info, -EOVERFLOW,
1139 "Can't clear %lu bytes from "
1140 " dirty_mdatadata_bytes (%llu)",
1141 buf->len,
1142 root->fs_info->dirty_metadata_bytes);
1143 }
1144 spin_unlock(&root->fs_info->delalloc_lock);
1145
1146 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1157 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1147 btrfs_set_lock_blocking(buf); 1158 btrfs_set_lock_blocking(buf);
1148 clear_extent_buffer_dirty(buf); 1159 clear_extent_buffer_dirty(buf);
@@ -1178,9 +1189,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1178 1189
1179 INIT_LIST_HEAD(&root->dirty_list); 1190 INIT_LIST_HEAD(&root->dirty_list);
1180 INIT_LIST_HEAD(&root->root_list); 1191 INIT_LIST_HEAD(&root->root_list);
1192 INIT_LIST_HEAD(&root->logged_list[0]);
1193 INIT_LIST_HEAD(&root->logged_list[1]);
1181 spin_lock_init(&root->orphan_lock); 1194 spin_lock_init(&root->orphan_lock);
1182 spin_lock_init(&root->inode_lock); 1195 spin_lock_init(&root->inode_lock);
1183 spin_lock_init(&root->accounting_lock); 1196 spin_lock_init(&root->accounting_lock);
1197 spin_lock_init(&root->log_extents_lock[0]);
1198 spin_lock_init(&root->log_extents_lock[1]);
1184 mutex_init(&root->objectid_mutex); 1199 mutex_init(&root->objectid_mutex);
1185 mutex_init(&root->log_mutex); 1200 mutex_init(&root->log_mutex);
1186 init_waitqueue_head(&root->log_writer_wait); 1201 init_waitqueue_head(&root->log_writer_wait);
@@ -2004,10 +2019,24 @@ int open_ctree(struct super_block *sb,
2004 goto fail_srcu; 2019 goto fail_srcu;
2005 } 2020 }
2006 2021
2022 ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
2023 if (ret) {
2024 err = ret;
2025 goto fail_bdi;
2026 }
2027 fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
2028 (1 + ilog2(nr_cpu_ids));
2029
2030 ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
2031 if (ret) {
2032 err = ret;
2033 goto fail_dirty_metadata_bytes;
2034 }
2035
2007 fs_info->btree_inode = new_inode(sb); 2036 fs_info->btree_inode = new_inode(sb);
2008 if (!fs_info->btree_inode) { 2037 if (!fs_info->btree_inode) {
2009 err = -ENOMEM; 2038 err = -ENOMEM;
2010 goto fail_bdi; 2039 goto fail_delalloc_bytes;
2011 } 2040 }
2012 2041
2013 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 2042 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2017,7 +2046,6 @@ int open_ctree(struct super_block *sb,
2017 INIT_LIST_HEAD(&fs_info->dead_roots); 2046 INIT_LIST_HEAD(&fs_info->dead_roots);
2018 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2047 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2019 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 2048 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
2020 INIT_LIST_HEAD(&fs_info->ordered_operations);
2021 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2049 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2022 spin_lock_init(&fs_info->delalloc_lock); 2050 spin_lock_init(&fs_info->delalloc_lock);
2023 spin_lock_init(&fs_info->trans_lock); 2051 spin_lock_init(&fs_info->trans_lock);
@@ -2028,6 +2056,7 @@ int open_ctree(struct super_block *sb,
2028 spin_lock_init(&fs_info->tree_mod_seq_lock); 2056 spin_lock_init(&fs_info->tree_mod_seq_lock);
2029 rwlock_init(&fs_info->tree_mod_log_lock); 2057 rwlock_init(&fs_info->tree_mod_log_lock);
2030 mutex_init(&fs_info->reloc_mutex); 2058 mutex_init(&fs_info->reloc_mutex);
2059 seqlock_init(&fs_info->profiles_lock);
2031 2060
2032 init_completion(&fs_info->kobj_unregister); 2061 init_completion(&fs_info->kobj_unregister);
2033 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 2062 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2126,6 +2155,7 @@ int open_ctree(struct super_block *sb,
2126 2155
2127 spin_lock_init(&fs_info->block_group_cache_lock); 2156 spin_lock_init(&fs_info->block_group_cache_lock);
2128 fs_info->block_group_cache_tree = RB_ROOT; 2157 fs_info->block_group_cache_tree = RB_ROOT;
2158 fs_info->first_logical_byte = (u64)-1;
2129 2159
2130 extent_io_tree_init(&fs_info->freed_extents[0], 2160 extent_io_tree_init(&fs_info->freed_extents[0],
2131 fs_info->btree_inode->i_mapping); 2161 fs_info->btree_inode->i_mapping);
@@ -2165,6 +2195,12 @@ int open_ctree(struct super_block *sb,
2165 init_waitqueue_head(&fs_info->transaction_blocked_wait); 2195 init_waitqueue_head(&fs_info->transaction_blocked_wait);
2166 init_waitqueue_head(&fs_info->async_submit_wait); 2196 init_waitqueue_head(&fs_info->async_submit_wait);
2167 2197
2198 ret = btrfs_alloc_stripe_hash_table(fs_info);
2199 if (ret) {
2200 err = ret;
2201 goto fail_alloc;
2202 }
2203
2168 __setup_root(4096, 4096, 4096, 4096, tree_root, 2204 __setup_root(4096, 4096, 4096, 4096, tree_root,
2169 fs_info, BTRFS_ROOT_TREE_OBJECTID); 2205 fs_info, BTRFS_ROOT_TREE_OBJECTID);
2170 2206
@@ -2187,7 +2223,8 @@ int open_ctree(struct super_block *sb,
2187 goto fail_alloc; 2223 goto fail_alloc;
2188 2224
2189 /* check FS state, whether FS is broken. */ 2225 /* check FS state, whether FS is broken. */
2190 fs_info->fs_state |= btrfs_super_flags(disk_super); 2226 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
2227 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
2191 2228
2192 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2229 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2193 if (ret) { 2230 if (ret) {
@@ -2261,6 +2298,8 @@ int open_ctree(struct super_block *sb,
2261 leafsize = btrfs_super_leafsize(disk_super); 2298 leafsize = btrfs_super_leafsize(disk_super);
2262 sectorsize = btrfs_super_sectorsize(disk_super); 2299 sectorsize = btrfs_super_sectorsize(disk_super);
2263 stripesize = btrfs_super_stripesize(disk_super); 2300 stripesize = btrfs_super_stripesize(disk_super);
2301 fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
2302 fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
2264 2303
2265 /* 2304 /*
2266 * mixed block groups end up with duplicate but slightly offset 2305 * mixed block groups end up with duplicate but slightly offset
@@ -2332,6 +2371,12 @@ int open_ctree(struct super_block *sb,
2332 btrfs_init_workers(&fs_info->endio_meta_write_workers, 2371 btrfs_init_workers(&fs_info->endio_meta_write_workers,
2333 "endio-meta-write", fs_info->thread_pool_size, 2372 "endio-meta-write", fs_info->thread_pool_size,
2334 &fs_info->generic_worker); 2373 &fs_info->generic_worker);
2374 btrfs_init_workers(&fs_info->endio_raid56_workers,
2375 "endio-raid56", fs_info->thread_pool_size,
2376 &fs_info->generic_worker);
2377 btrfs_init_workers(&fs_info->rmw_workers,
2378 "rmw", fs_info->thread_pool_size,
2379 &fs_info->generic_worker);
2335 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 2380 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
2336 fs_info->thread_pool_size, 2381 fs_info->thread_pool_size,
2337 &fs_info->generic_worker); 2382 &fs_info->generic_worker);
@@ -2350,6 +2395,8 @@ int open_ctree(struct super_block *sb,
2350 */ 2395 */
2351 fs_info->endio_workers.idle_thresh = 4; 2396 fs_info->endio_workers.idle_thresh = 4;
2352 fs_info->endio_meta_workers.idle_thresh = 4; 2397 fs_info->endio_meta_workers.idle_thresh = 4;
2398 fs_info->endio_raid56_workers.idle_thresh = 4;
2399 fs_info->rmw_workers.idle_thresh = 2;
2353 2400
2354 fs_info->endio_write_workers.idle_thresh = 2; 2401 fs_info->endio_write_workers.idle_thresh = 2;
2355 fs_info->endio_meta_write_workers.idle_thresh = 2; 2402 fs_info->endio_meta_write_workers.idle_thresh = 2;
@@ -2366,6 +2413,8 @@ int open_ctree(struct super_block *sb,
2366 ret |= btrfs_start_workers(&fs_info->fixup_workers); 2413 ret |= btrfs_start_workers(&fs_info->fixup_workers);
2367 ret |= btrfs_start_workers(&fs_info->endio_workers); 2414 ret |= btrfs_start_workers(&fs_info->endio_workers);
2368 ret |= btrfs_start_workers(&fs_info->endio_meta_workers); 2415 ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2416 ret |= btrfs_start_workers(&fs_info->rmw_workers);
2417 ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
2369 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); 2418 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
2370 ret |= btrfs_start_workers(&fs_info->endio_write_workers); 2419 ret |= btrfs_start_workers(&fs_info->endio_write_workers);
2371 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); 2420 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
@@ -2390,8 +2439,7 @@ int open_ctree(struct super_block *sb,
2390 sb->s_blocksize = sectorsize; 2439 sb->s_blocksize = sectorsize;
2391 sb->s_blocksize_bits = blksize_bits(sectorsize); 2440 sb->s_blocksize_bits = blksize_bits(sectorsize);
2392 2441
2393 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 2442 if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
2394 sizeof(disk_super->magic))) {
2395 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2443 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
2396 goto fail_sb_buffer; 2444 goto fail_sb_buffer;
2397 } 2445 }
@@ -2694,13 +2742,13 @@ fail_cleaner:
2694 * kthreads 2742 * kthreads
2695 */ 2743 */
2696 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2744 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2697 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2698 2745
2699fail_block_groups: 2746fail_block_groups:
2700 btrfs_free_block_groups(fs_info); 2747 btrfs_free_block_groups(fs_info);
2701 2748
2702fail_tree_roots: 2749fail_tree_roots:
2703 free_root_pointers(fs_info, 1); 2750 free_root_pointers(fs_info, 1);
2751 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2704 2752
2705fail_sb_buffer: 2753fail_sb_buffer:
2706 btrfs_stop_workers(&fs_info->generic_worker); 2754 btrfs_stop_workers(&fs_info->generic_worker);
@@ -2710,6 +2758,8 @@ fail_sb_buffer:
2710 btrfs_stop_workers(&fs_info->workers); 2758 btrfs_stop_workers(&fs_info->workers);
2711 btrfs_stop_workers(&fs_info->endio_workers); 2759 btrfs_stop_workers(&fs_info->endio_workers);
2712 btrfs_stop_workers(&fs_info->endio_meta_workers); 2760 btrfs_stop_workers(&fs_info->endio_meta_workers);
2761 btrfs_stop_workers(&fs_info->endio_raid56_workers);
2762 btrfs_stop_workers(&fs_info->rmw_workers);
2713 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2763 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2714 btrfs_stop_workers(&fs_info->endio_write_workers); 2764 btrfs_stop_workers(&fs_info->endio_write_workers);
2715 btrfs_stop_workers(&fs_info->endio_freespace_worker); 2765 btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -2721,13 +2771,17 @@ fail_alloc:
2721fail_iput: 2771fail_iput:
2722 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2772 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2723 2773
2724 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2725 iput(fs_info->btree_inode); 2774 iput(fs_info->btree_inode);
2775fail_delalloc_bytes:
2776 percpu_counter_destroy(&fs_info->delalloc_bytes);
2777fail_dirty_metadata_bytes:
2778 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
2726fail_bdi: 2779fail_bdi:
2727 bdi_destroy(&fs_info->bdi); 2780 bdi_destroy(&fs_info->bdi);
2728fail_srcu: 2781fail_srcu:
2729 cleanup_srcu_struct(&fs_info->subvol_srcu); 2782 cleanup_srcu_struct(&fs_info->subvol_srcu);
2730fail: 2783fail:
2784 btrfs_free_stripe_hash_table(fs_info);
2731 btrfs_close_devices(fs_info->fs_devices); 2785 btrfs_close_devices(fs_info->fs_devices);
2732 return err; 2786 return err;
2733 2787
@@ -2795,8 +2849,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2795 2849
2796 super = (struct btrfs_super_block *)bh->b_data; 2850 super = (struct btrfs_super_block *)bh->b_data;
2797 if (btrfs_super_bytenr(super) != bytenr || 2851 if (btrfs_super_bytenr(super) != bytenr ||
2798 strncmp((char *)(&super->magic), BTRFS_MAGIC, 2852 super->magic != cpu_to_le64(BTRFS_MAGIC)) {
2799 sizeof(super->magic))) {
2800 brelse(bh); 2853 brelse(bh);
2801 continue; 2854 continue;
2802 } 2855 }
@@ -3076,11 +3129,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
3076 ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) 3129 ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
3077 == 0))) 3130 == 0)))
3078 num_tolerated_disk_barrier_failures = 0; 3131 num_tolerated_disk_barrier_failures = 0;
3079 else if (num_tolerated_disk_barrier_failures > 1 3132 else if (num_tolerated_disk_barrier_failures > 1) {
3080 && 3133 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3081 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 3134 BTRFS_BLOCK_GROUP_RAID5 |
3082 BTRFS_BLOCK_GROUP_RAID10))) 3135 BTRFS_BLOCK_GROUP_RAID10)) {
3083 num_tolerated_disk_barrier_failures = 1; 3136 num_tolerated_disk_barrier_failures = 1;
3137 } else if (flags &
3138 BTRFS_BLOCK_GROUP_RAID5) {
3139 num_tolerated_disk_barrier_failures = 2;
3140 }
3141 }
3084 } 3142 }
3085 } 3143 }
3086 up_read(&sinfo->groups_sem); 3144 up_read(&sinfo->groups_sem);
@@ -3195,6 +3253,11 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3195 if (btrfs_root_refs(&root->root_item) == 0) 3253 if (btrfs_root_refs(&root->root_item) == 0)
3196 synchronize_srcu(&fs_info->subvol_srcu); 3254 synchronize_srcu(&fs_info->subvol_srcu);
3197 3255
3256 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
3257 btrfs_free_log(NULL, root);
3258 btrfs_free_log_root_tree(NULL, fs_info);
3259 }
3260
3198 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3261 __btrfs_remove_free_space_cache(root->free_ino_pinned);
3199 __btrfs_remove_free_space_cache(root->free_ino_ctl); 3262 __btrfs_remove_free_space_cache(root->free_ino_ctl);
3200 free_fs_root(root); 3263 free_fs_root(root);
@@ -3339,7 +3402,7 @@ int close_ctree(struct btrfs_root *root)
3339 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3402 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3340 } 3403 }
3341 3404
3342 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 3405 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3343 btrfs_error_commit_super(root); 3406 btrfs_error_commit_super(root);
3344 3407
3345 btrfs_put_block_group_cache(fs_info); 3408 btrfs_put_block_group_cache(fs_info);
@@ -3352,9 +3415,9 @@ int close_ctree(struct btrfs_root *root)
3352 3415
3353 btrfs_free_qgroup_config(root->fs_info); 3416 btrfs_free_qgroup_config(root->fs_info);
3354 3417
3355 if (fs_info->delalloc_bytes) { 3418 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3356 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 3419 printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
3357 (unsigned long long)fs_info->delalloc_bytes); 3420 percpu_counter_sum(&fs_info->delalloc_bytes));
3358 } 3421 }
3359 3422
3360 free_extent_buffer(fs_info->extent_root->node); 3423 free_extent_buffer(fs_info->extent_root->node);
@@ -3384,6 +3447,8 @@ int close_ctree(struct btrfs_root *root)
3384 btrfs_stop_workers(&fs_info->workers); 3447 btrfs_stop_workers(&fs_info->workers);
3385 btrfs_stop_workers(&fs_info->endio_workers); 3448 btrfs_stop_workers(&fs_info->endio_workers);
3386 btrfs_stop_workers(&fs_info->endio_meta_workers); 3449 btrfs_stop_workers(&fs_info->endio_meta_workers);
3450 btrfs_stop_workers(&fs_info->endio_raid56_workers);
3451 btrfs_stop_workers(&fs_info->rmw_workers);
3387 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 3452 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
3388 btrfs_stop_workers(&fs_info->endio_write_workers); 3453 btrfs_stop_workers(&fs_info->endio_write_workers);
3389 btrfs_stop_workers(&fs_info->endio_freespace_worker); 3454 btrfs_stop_workers(&fs_info->endio_freespace_worker);
@@ -3401,9 +3466,13 @@ int close_ctree(struct btrfs_root *root)
3401 btrfs_close_devices(fs_info->fs_devices); 3466 btrfs_close_devices(fs_info->fs_devices);
3402 btrfs_mapping_tree_free(&fs_info->mapping_tree); 3467 btrfs_mapping_tree_free(&fs_info->mapping_tree);
3403 3468
3469 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
3470 percpu_counter_destroy(&fs_info->delalloc_bytes);
3404 bdi_destroy(&fs_info->bdi); 3471 bdi_destroy(&fs_info->bdi);
3405 cleanup_srcu_struct(&fs_info->subvol_srcu); 3472 cleanup_srcu_struct(&fs_info->subvol_srcu);
3406 3473
3474 btrfs_free_stripe_hash_table(fs_info);
3475
3407 return 0; 3476 return 0;
3408} 3477}
3409 3478
@@ -3443,11 +3512,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3443 (unsigned long long)transid, 3512 (unsigned long long)transid,
3444 (unsigned long long)root->fs_info->generation); 3513 (unsigned long long)root->fs_info->generation);
3445 was_dirty = set_extent_buffer_dirty(buf); 3514 was_dirty = set_extent_buffer_dirty(buf);
3446 if (!was_dirty) { 3515 if (!was_dirty)
3447 spin_lock(&root->fs_info->delalloc_lock); 3516 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
3448 root->fs_info->dirty_metadata_bytes += buf->len; 3517 buf->len,
3449 spin_unlock(&root->fs_info->delalloc_lock); 3518 root->fs_info->dirty_metadata_batch);
3450 }
3451} 3519}
3452 3520
3453static void __btrfs_btree_balance_dirty(struct btrfs_root *root, 3521static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3457,8 +3525,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3457 * looks as though older kernels can get into trouble with 3525 * looks as though older kernels can get into trouble with
3458 * this code, they end up stuck in balance_dirty_pages forever 3526 * this code, they end up stuck in balance_dirty_pages forever
3459 */ 3527 */
3460 u64 num_dirty; 3528 int ret;
3461 unsigned long thresh = 32 * 1024 * 1024;
3462 3529
3463 if (current->flags & PF_MEMALLOC) 3530 if (current->flags & PF_MEMALLOC)
3464 return; 3531 return;
@@ -3466,9 +3533,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3466 if (flush_delayed) 3533 if (flush_delayed)
3467 btrfs_balance_delayed_items(root); 3534 btrfs_balance_delayed_items(root);
3468 3535
3469 num_dirty = root->fs_info->dirty_metadata_bytes; 3536 ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
3470 3537 BTRFS_DIRTY_METADATA_THRESH);
3471 if (num_dirty > thresh) { 3538 if (ret > 0) {
3472 balance_dirty_pages_ratelimited( 3539 balance_dirty_pages_ratelimited(
3473 root->fs_info->btree_inode->i_mapping); 3540 root->fs_info->btree_inode->i_mapping);
3474 } 3541 }
@@ -3518,7 +3585,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
3518 btrfs_cleanup_transaction(root); 3585 btrfs_cleanup_transaction(root);
3519} 3586}
3520 3587
3521static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3588static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3589 struct btrfs_root *root)
3522{ 3590{
3523 struct btrfs_inode *btrfs_inode; 3591 struct btrfs_inode *btrfs_inode;
3524 struct list_head splice; 3592 struct list_head splice;
@@ -3528,7 +3596,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3528 mutex_lock(&root->fs_info->ordered_operations_mutex); 3596 mutex_lock(&root->fs_info->ordered_operations_mutex);
3529 spin_lock(&root->fs_info->ordered_extent_lock); 3597 spin_lock(&root->fs_info->ordered_extent_lock);
3530 3598
3531 list_splice_init(&root->fs_info->ordered_operations, &splice); 3599 list_splice_init(&t->ordered_operations, &splice);
3532 while (!list_empty(&splice)) { 3600 while (!list_empty(&splice)) {
3533 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 3601 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3534 ordered_operations); 3602 ordered_operations);
@@ -3544,35 +3612,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3544 3612
3545static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3613static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3546{ 3614{
3547 struct list_head splice;
3548 struct btrfs_ordered_extent *ordered; 3615 struct btrfs_ordered_extent *ordered;
3549 struct inode *inode;
3550
3551 INIT_LIST_HEAD(&splice);
3552 3616
3553 spin_lock(&root->fs_info->ordered_extent_lock); 3617 spin_lock(&root->fs_info->ordered_extent_lock);
3554 3618 /*
3555 list_splice_init(&root->fs_info->ordered_extents, &splice); 3619 * This will just short circuit the ordered completion stuff which will
3556 while (!list_empty(&splice)) { 3620 * make sure the ordered extent gets properly cleaned up.
3557 ordered = list_entry(splice.next, struct btrfs_ordered_extent, 3621 */
3558 root_extent_list); 3622 list_for_each_entry(ordered, &root->fs_info->ordered_extents,
3559 3623 root_extent_list)
3560 list_del_init(&ordered->root_extent_list); 3624 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
3561 atomic_inc(&ordered->refs);
3562
3563 /* the inode may be getting freed (in sys_unlink path). */
3564 inode = igrab(ordered->inode);
3565
3566 spin_unlock(&root->fs_info->ordered_extent_lock);
3567 if (inode)
3568 iput(inode);
3569
3570 atomic_set(&ordered->refs, 1);
3571 btrfs_put_ordered_extent(ordered);
3572
3573 spin_lock(&root->fs_info->ordered_extent_lock);
3574 }
3575
3576 spin_unlock(&root->fs_info->ordered_extent_lock); 3625 spin_unlock(&root->fs_info->ordered_extent_lock);
3577} 3626}
3578 3627
@@ -3594,11 +3643,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3594 } 3643 }
3595 3644
3596 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3645 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3597 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3646 struct btrfs_delayed_ref_head *head = NULL;
3598 3647
3648 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3599 atomic_set(&ref->refs, 1); 3649 atomic_set(&ref->refs, 1);
3600 if (btrfs_delayed_ref_is_head(ref)) { 3650 if (btrfs_delayed_ref_is_head(ref)) {
3601 struct btrfs_delayed_ref_head *head;
3602 3651
3603 head = btrfs_delayed_node_to_head(ref); 3652 head = btrfs_delayed_node_to_head(ref);
3604 if (!mutex_trylock(&head->mutex)) { 3653 if (!mutex_trylock(&head->mutex)) {
@@ -3614,16 +3663,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3614 continue; 3663 continue;
3615 } 3664 }
3616 3665
3617 kfree(head->extent_op); 3666 btrfs_free_delayed_extent_op(head->extent_op);
3618 delayed_refs->num_heads--; 3667 delayed_refs->num_heads--;
3619 if (list_empty(&head->cluster)) 3668 if (list_empty(&head->cluster))
3620 delayed_refs->num_heads_ready--; 3669 delayed_refs->num_heads_ready--;
3621 list_del_init(&head->cluster); 3670 list_del_init(&head->cluster);
3622 } 3671 }
3672
3623 ref->in_tree = 0; 3673 ref->in_tree = 0;
3624 rb_erase(&ref->rb_node, &delayed_refs->root); 3674 rb_erase(&ref->rb_node, &delayed_refs->root);
3625 delayed_refs->num_entries--; 3675 delayed_refs->num_entries--;
3626 3676 if (head)
3677 mutex_unlock(&head->mutex);
3627 spin_unlock(&delayed_refs->lock); 3678 spin_unlock(&delayed_refs->lock);
3628 btrfs_put_delayed_ref(ref); 3679 btrfs_put_delayed_ref(ref);
3629 3680
@@ -3671,6 +3722,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3671 delalloc_inodes); 3722 delalloc_inodes);
3672 3723
3673 list_del_init(&btrfs_inode->delalloc_inodes); 3724 list_del_init(&btrfs_inode->delalloc_inodes);
3725 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
3726 &btrfs_inode->runtime_flags);
3674 3727
3675 btrfs_invalidate_inodes(btrfs_inode->root); 3728 btrfs_invalidate_inodes(btrfs_inode->root);
3676 } 3729 }
@@ -3823,10 +3876,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3823 3876
3824 while (!list_empty(&list)) { 3877 while (!list_empty(&list)) {
3825 t = list_entry(list.next, struct btrfs_transaction, list); 3878 t = list_entry(list.next, struct btrfs_transaction, list);
3826 if (!t)
3827 break;
3828 3879
3829 btrfs_destroy_ordered_operations(root); 3880 btrfs_destroy_ordered_operations(t, root);
3830 3881
3831 btrfs_destroy_ordered_extents(root); 3882 btrfs_destroy_ordered_extents(root);
3832 3883