aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-05-13 20:29:04 -0400
committerChris Mason <clm@fb.com>2014-06-09 20:20:34 -0400
commit21c7e75654b77b53a4436bf28496aac11536b6ba (patch)
tree0f0a3b14a01c719ea1b13f87e75cbcb7c1d12f79 /fs
parent32d6b47fe6fc1714d5f1bba1b9f38e0ab0ad58a8 (diff)
Btrfs: reclaim the reserved metadata space at background
Before applying this patch, the task had to reclaim the metadata space by itself if the metadata space was not enough. And When the task started the space reclamation, all the other tasks which wanted to reserve the metadata space were blocked. At some cases, they would be blocked for a long time, it made the performance fluctuate wildly. So we introduce the background metadata space reclamation, when the space is about to be exhausted, we insert a reclaim work into the workqueue, the worker of the workqueue helps us to reclaim the reserved space at the background. By this way, the tasks needn't reclaim the space by themselves at most cases, and even if the tasks have to reclaim the space or are blocked for the space reclamation, they will get enough space more quickly. Here is my test result(Tested by compilebench): Memory: 2GB CPU: 2Cores * 1CPU Partition: 40GB(SSD) Test command: # compilebench -D <mnt> -m Without this patch: intial create total runs 30 avg 54.36 MB/s (user 0.52s sys 2.44s) compile total runs 30 avg 123.72 MB/s (user 0.13s sys 1.17s) read compiled tree total runs 3 avg 81.15 MB/s (user 0.74s sys 4.89s) delete compiled tree total runs 30 avg 5.32 seconds (user 0.35s sys 4.37s) With this patch: intial create total runs 30 avg 59.80 MB/s (user 0.52s sys 2.53s) compile total runs 30 avg 151.44 MB/s (user 0.13s sys 1.11s) read compiled tree total runs 3 avg 83.25 MB/s (user 0.76s sys 4.91s) delete compiled tree total runs 30 avg 5.29 seconds (user 0.34s sys 4.34s) Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c105
-rw-r--r--fs/btrfs/super.c1
4 files changed, 114 insertions, 1 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f4a439e1a43f..0a805b8d61cc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -33,6 +33,7 @@
33#include <asm/kmap_types.h> 33#include <asm/kmap_types.h>
34#include <linux/pagemap.h> 34#include <linux/pagemap.h>
35#include <linux/btrfs.h> 35#include <linux/btrfs.h>
36#include <linux/workqueue.h>
36#include "extent_io.h" 37#include "extent_io.h"
37#include "extent_map.h" 38#include "extent_map.h"
38#include "async-thread.h" 39#include "async-thread.h"
@@ -1322,6 +1323,8 @@ struct btrfs_stripe_hash_table {
1322 1323
1323#define BTRFS_STRIPE_HASH_TABLE_BITS 11 1324#define BTRFS_STRIPE_HASH_TABLE_BITS 11
1324 1325
1326void btrfs_init_async_reclaim_work(struct work_struct *work);
1327
1325/* fs_info */ 1328/* fs_info */
1326struct reloc_control; 1329struct reloc_control;
1327struct btrfs_device; 1330struct btrfs_device;
@@ -1697,6 +1700,9 @@ struct btrfs_fs_info {
1697 1700
1698 struct semaphore uuid_tree_rescan_sem; 1701 struct semaphore uuid_tree_rescan_sem;
1699 unsigned int update_uuid_tree_gen:1; 1702 unsigned int update_uuid_tree_gen:1;
1703
1704 /* Used to reclaim the metadata space in the background. */
1705 struct work_struct async_reclaim_work;
1700}; 1706};
1701 1707
1702struct btrfs_subvolume_writers { 1708struct btrfs_subvolume_writers {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 983314932af3..4b1f16dd9ce3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2291,6 +2291,7 @@ int open_ctree(struct super_block *sb,
2291 atomic_set(&fs_info->balance_cancel_req, 0); 2291 atomic_set(&fs_info->balance_cancel_req, 0);
2292 fs_info->balance_ctl = NULL; 2292 fs_info->balance_ctl = NULL;
2293 init_waitqueue_head(&fs_info->balance_wait_q); 2293 init_waitqueue_head(&fs_info->balance_wait_q);
2294 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
2294 2295
2295 sb->s_blocksize = 4096; 2296 sb->s_blocksize = 4096;
2296 sb->s_blocksize_bits = blksize_bits(4096); 2297 sb->s_blocksize_bits = blksize_bits(4096);
@@ -3603,6 +3604,8 @@ int close_ctree(struct btrfs_root *root)
3603 /* clear out the rbtree of defraggable inodes */ 3604 /* clear out the rbtree of defraggable inodes */
3604 btrfs_cleanup_defrag_inodes(fs_info); 3605 btrfs_cleanup_defrag_inodes(fs_info);
3605 3606
3607 cancel_work_sync(&fs_info->async_reclaim_work);
3608
3606 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3609 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3607 ret = btrfs_commit_super(root); 3610 ret = btrfs_commit_super(root);
3608 if (ret) 3611 if (ret)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5590af92094b..15467e3f5876 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4204,6 +4204,104 @@ static int flush_space(struct btrfs_root *root,
4204 4204
4205 return ret; 4205 return ret;
4206} 4206}
4207
4208static inline u64
4209btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
4210 struct btrfs_space_info *space_info)
4211{
4212 u64 used;
4213 u64 expected;
4214 u64 to_reclaim;
4215
4216 to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
4217 16 * 1024 * 1024);
4218 spin_lock(&space_info->lock);
4219 if (can_overcommit(root, space_info, to_reclaim,
4220 BTRFS_RESERVE_FLUSH_ALL)) {
4221 to_reclaim = 0;
4222 goto out;
4223 }
4224
4225 used = space_info->bytes_used + space_info->bytes_reserved +
4226 space_info->bytes_pinned + space_info->bytes_readonly +
4227 space_info->bytes_may_use;
4228 if (can_overcommit(root, space_info, 1024 * 1024,
4229 BTRFS_RESERVE_FLUSH_ALL))
4230 expected = div_factor_fine(space_info->total_bytes, 95);
4231 else
4232 expected = div_factor_fine(space_info->total_bytes, 90);
4233
4234 if (used > expected)
4235 to_reclaim = used - expected;
4236 else
4237 to_reclaim = 0;
4238 to_reclaim = min(to_reclaim, space_info->bytes_may_use +
4239 space_info->bytes_reserved);
4240out:
4241 spin_unlock(&space_info->lock);
4242
4243 return to_reclaim;
4244}
4245
4246static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4247 struct btrfs_fs_info *fs_info, u64 used)
4248{
4249 return (used >= div_factor_fine(space_info->total_bytes, 98) &&
4250 !btrfs_fs_closing(fs_info) &&
4251 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4252}
4253
4254static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
4255 struct btrfs_fs_info *fs_info)
4256{
4257 u64 used;
4258
4259 spin_lock(&space_info->lock);
4260 used = space_info->bytes_used + space_info->bytes_reserved +
4261 space_info->bytes_pinned + space_info->bytes_readonly +
4262 space_info->bytes_may_use;
4263 if (need_do_async_reclaim(space_info, fs_info, used)) {
4264 spin_unlock(&space_info->lock);
4265 return 1;
4266 }
4267 spin_unlock(&space_info->lock);
4268
4269 return 0;
4270}
4271
4272static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4273{
4274 struct btrfs_fs_info *fs_info;
4275 struct btrfs_space_info *space_info;
4276 u64 to_reclaim;
4277 int flush_state;
4278
4279 fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
4280 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
4281
4282 to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
4283 space_info);
4284 if (!to_reclaim)
4285 return;
4286
4287 flush_state = FLUSH_DELAYED_ITEMS_NR;
4288 do {
4289 flush_space(fs_info->fs_root, space_info, to_reclaim,
4290 to_reclaim, flush_state);
4291 flush_state++;
4292 if (!btrfs_need_do_async_reclaim(space_info, fs_info))
4293 return;
4294 } while (flush_state <= COMMIT_TRANS);
4295
4296 if (btrfs_need_do_async_reclaim(space_info, fs_info))
4297 queue_work(system_unbound_wq, work);
4298}
4299
4300void btrfs_init_async_reclaim_work(struct work_struct *work)
4301{
4302 INIT_WORK(work, btrfs_async_reclaim_metadata_space);
4303}
4304
4207/** 4305/**
4208 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space 4306 * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
4209 * @root - the root we're allocating for 4307 * @root - the root we're allocating for
@@ -4311,8 +4409,13 @@ again:
4311 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { 4409 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4312 flushing = true; 4410 flushing = true;
4313 space_info->flush = 1; 4411 space_info->flush = 1;
4412 } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
4413 used += orig_bytes;
4414 if (need_do_async_reclaim(space_info, root->fs_info, used) &&
4415 !work_busy(&root->fs_info->async_reclaim_work))
4416 queue_work(system_unbound_wq,
4417 &root->fs_info->async_reclaim_work);
4314 } 4418 }
4315
4316 spin_unlock(&space_info->lock); 4419 spin_unlock(&space_info->lock);
4317 4420
4318 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) 4421 if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9601d25a4607..8f7c03db1f8d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1413,6 +1413,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1413 * this also happens on 'umount -rf' or on shutdown, when 1413 * this also happens on 'umount -rf' or on shutdown, when
1414 * the filesystem is busy. 1414 * the filesystem is busy.
1415 */ 1415 */
1416 cancel_work_sync(&fs_info->async_reclaim_work);
1416 1417
1417 /* wait for the uuid_scan task to finish */ 1418 /* wait for the uuid_scan task to finish */
1418 down(&fs_info->uuid_tree_rescan_sem); 1419 down(&fs_info->uuid_tree_rescan_sem);