aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2013-01-29 05:09:20 -0500
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 12:59:04 -0500
commite2d845211eda9cf296e8edf6724b3d541f4fbfd5 (patch)
tree69acdf4bfe81d173673fee8d8104393e4b73681e /fs
parentc018daecead7a46a575e2a1397fea850b83396c8 (diff)
Btrfs: use percpu counter for dirty metadata count
->dirty_metadata_bytes is accessed very frequently, so use percpu counter instead of the u64 variant to reduce the contention of the lock. This patch also fixed the problem that we access it without lock protection in __btrfs_btree_balance_dirty(), which may cause we skip the dirty pages flush. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c64
-rw-r--r--fs/btrfs/extent_io.c9
3 files changed, 42 insertions, 40 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d8e539fe5544..4c476281b66b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
191/* ioprio of readahead is set to idle */ 191/* ioprio of readahead is set to idle */
192#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) 192#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
193 193
194#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
195
194/* 196/*
195 * The key defines the order in the tree, and so it also defines (optimal) 197 * The key defines the order in the tree, and so it also defines (optimal)
196 * block layout. 198 * block layout.
@@ -1448,10 +1450,9 @@ struct btrfs_fs_info {
1448 1450
1449 u64 total_pinned; 1451 u64 total_pinned;
1450 1452
1451 /* protected by the delalloc lock, used to keep from writing 1453 /* used to keep from writing metadata until there is a nice batch */
1452 * metadata until there is a nice batch 1454 struct percpu_counter dirty_metadata_bytes;
1453 */ 1455 s32 dirty_metadata_batch;
1454 u64 dirty_metadata_bytes;
1455 struct list_head dirty_cowonly_roots; 1456 struct list_head dirty_cowonly_roots;
1456 1457
1457 struct btrfs_fs_devices *fs_devices; 1458 struct btrfs_fs_devices *fs_devices;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 04f98e3ffd90..34ace168eebc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -946,18 +946,20 @@ static int btree_writepages(struct address_space *mapping,
946 struct writeback_control *wbc) 946 struct writeback_control *wbc)
947{ 947{
948 struct extent_io_tree *tree; 948 struct extent_io_tree *tree;
949 struct btrfs_fs_info *fs_info;
950 int ret;
951
949 tree = &BTRFS_I(mapping->host)->io_tree; 952 tree = &BTRFS_I(mapping->host)->io_tree;
950 if (wbc->sync_mode == WB_SYNC_NONE) { 953 if (wbc->sync_mode == WB_SYNC_NONE) {
951 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
952 u64 num_dirty;
953 unsigned long thresh = 32 * 1024 * 1024;
954 954
955 if (wbc->for_kupdate) 955 if (wbc->for_kupdate)
956 return 0; 956 return 0;
957 957
958 fs_info = BTRFS_I(mapping->host)->root->fs_info;
958 /* this is a bit racy, but that's ok */ 959 /* this is a bit racy, but that's ok */
959 num_dirty = root->fs_info->dirty_metadata_bytes; 960 ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
960 if (num_dirty < thresh) 961 BTRFS_DIRTY_METADATA_THRESH);
962 if (ret < 0)
961 return 0; 963 return 0;
962 } 964 }
963 return btree_write_cache_pages(mapping, wbc); 965 return btree_write_cache_pages(mapping, wbc);
@@ -1125,24 +1127,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1125void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1127void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1126 struct extent_buffer *buf) 1128 struct extent_buffer *buf)
1127{ 1129{
1130 struct btrfs_fs_info *fs_info = root->fs_info;
1131
1128 if (btrfs_header_generation(buf) == 1132 if (btrfs_header_generation(buf) ==
1129 root->fs_info->running_transaction->transid) { 1133 fs_info->running_transaction->transid) {
1130 btrfs_assert_tree_locked(buf); 1134 btrfs_assert_tree_locked(buf);
1131 1135
1132 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { 1136 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
1133 spin_lock(&root->fs_info->delalloc_lock); 1137 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
1134 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1138 -buf->len,
1135 root->fs_info->dirty_metadata_bytes -= buf->len; 1139 fs_info->dirty_metadata_batch);
1136 else {
1137 spin_unlock(&root->fs_info->delalloc_lock);
1138 btrfs_panic(root->fs_info, -EOVERFLOW,
1139 "Can't clear %lu bytes from "
1140 " dirty_mdatadata_bytes (%llu)",
1141 buf->len,
1142 root->fs_info->dirty_metadata_bytes);
1143 }
1144 spin_unlock(&root->fs_info->delalloc_lock);
1145
1146 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1140 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1147 btrfs_set_lock_blocking(buf); 1141 btrfs_set_lock_blocking(buf);
1148 clear_extent_buffer_dirty(buf); 1142 clear_extent_buffer_dirty(buf);
@@ -2008,10 +2002,18 @@ int open_ctree(struct super_block *sb,
2008 goto fail_srcu; 2002 goto fail_srcu;
2009 } 2003 }
2010 2004
2005 ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
2006 if (ret) {
2007 err = ret;
2008 goto fail_bdi;
2009 }
2010 fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
2011 (1 + ilog2(nr_cpu_ids));
2012
2011 fs_info->btree_inode = new_inode(sb); 2013 fs_info->btree_inode = new_inode(sb);
2012 if (!fs_info->btree_inode) { 2014 if (!fs_info->btree_inode) {
2013 err = -ENOMEM; 2015 err = -ENOMEM;
2014 goto fail_bdi; 2016 goto fail_dirty_metadata_bytes;
2015 } 2017 }
2016 2018
2017 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 2019 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2266,6 +2268,7 @@ int open_ctree(struct super_block *sb,
2266 leafsize = btrfs_super_leafsize(disk_super); 2268 leafsize = btrfs_super_leafsize(disk_super);
2267 sectorsize = btrfs_super_sectorsize(disk_super); 2269 sectorsize = btrfs_super_sectorsize(disk_super);
2268 stripesize = btrfs_super_stripesize(disk_super); 2270 stripesize = btrfs_super_stripesize(disk_super);
2271 fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
2269 2272
2270 /* 2273 /*
2271 * mixed block groups end up with duplicate but slightly offset 2274 * mixed block groups end up with duplicate but slightly offset
@@ -2728,6 +2731,8 @@ fail_iput:
2728 2731
2729 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2732 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2730 iput(fs_info->btree_inode); 2733 iput(fs_info->btree_inode);
2734fail_dirty_metadata_bytes:
2735 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
2731fail_bdi: 2736fail_bdi:
2732 bdi_destroy(&fs_info->bdi); 2737 bdi_destroy(&fs_info->bdi);
2733fail_srcu: 2738fail_srcu:
@@ -3406,6 +3411,7 @@ int close_ctree(struct btrfs_root *root)
3406 btrfs_close_devices(fs_info->fs_devices); 3411 btrfs_close_devices(fs_info->fs_devices);
3407 btrfs_mapping_tree_free(&fs_info->mapping_tree); 3412 btrfs_mapping_tree_free(&fs_info->mapping_tree);
3408 3413
3414 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
3409 bdi_destroy(&fs_info->bdi); 3415 bdi_destroy(&fs_info->bdi);
3410 cleanup_srcu_struct(&fs_info->subvol_srcu); 3416 cleanup_srcu_struct(&fs_info->subvol_srcu);
3411 3417
@@ -3448,11 +3454,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3448 (unsigned long long)transid, 3454 (unsigned long long)transid,
3449 (unsigned long long)root->fs_info->generation); 3455 (unsigned long long)root->fs_info->generation);
3450 was_dirty = set_extent_buffer_dirty(buf); 3456 was_dirty = set_extent_buffer_dirty(buf);
3451 if (!was_dirty) { 3457 if (!was_dirty)
3452 spin_lock(&root->fs_info->delalloc_lock); 3458 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
3453 root->fs_info->dirty_metadata_bytes += buf->len; 3459 buf->len,
3454 spin_unlock(&root->fs_info->delalloc_lock); 3460 root->fs_info->dirty_metadata_batch);
3455 }
3456} 3461}
3457 3462
3458static void __btrfs_btree_balance_dirty(struct btrfs_root *root, 3463static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3462,8 +3467,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3462 * looks as though older kernels can get into trouble with 3467 * looks as though older kernels can get into trouble with
3463 * this code, they end up stuck in balance_dirty_pages forever 3468 * this code, they end up stuck in balance_dirty_pages forever
3464 */ 3469 */
3465 u64 num_dirty; 3470 int ret;
3466 unsigned long thresh = 32 * 1024 * 1024;
3467 3471
3468 if (current->flags & PF_MEMALLOC) 3472 if (current->flags & PF_MEMALLOC)
3469 return; 3473 return;
@@ -3471,9 +3475,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3471 if (flush_delayed) 3475 if (flush_delayed)
3472 btrfs_balance_delayed_items(root); 3476 btrfs_balance_delayed_items(root);
3473 3477
3474 num_dirty = root->fs_info->dirty_metadata_bytes; 3478 ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
3475 3479 BTRFS_DIRTY_METADATA_THRESH);
3476 if (num_dirty > thresh) { 3480 if (ret > 0) {
3477 balance_dirty_pages_ratelimited_nr( 3481 balance_dirty_pages_ratelimited_nr(
3478 root->fs_info->btree_inode->i_mapping, 1); 3482 root->fs_info->btree_inode->i_mapping, 1);
3479 } 3483 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index bda36fef301e..b9019c847c58 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3122,12 +3122,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
3122 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); 3122 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3123 spin_unlock(&eb->refs_lock); 3123 spin_unlock(&eb->refs_lock);
3124 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 3124 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3125 spin_lock(&fs_info->delalloc_lock); 3125 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
3126 if (fs_info->dirty_metadata_bytes >= eb->len) 3126 -eb->len,
3127 fs_info->dirty_metadata_bytes -= eb->len; 3127 fs_info->dirty_metadata_batch);
3128 else
3129 WARN_ON(1);
3130 spin_unlock(&fs_info->delalloc_lock);
3131 ret = 1; 3128 ret = 1;
3132 } else { 3129 } else {
3133 spin_unlock(&eb->refs_lock); 3130 spin_unlock(&eb->refs_lock);