aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:46:25 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:48 -0400
commit5da9d01b66458b180a6bee0e637a1d0a3effc622 (patch)
tree47eca61c0ad07ddc791cb7677c548d663fbac818 /fs
parent424499dbd0c4d88742bf581b5714b27fb44b9fef (diff)
Btrfs: Shrink delay allocated space in a synchronized
Shrink delayed allocation space in a synchronized manner is more controllable than flushing all delay allocated space in an async thread. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c165
-rw-r--r--fs/btrfs/inode.c32
4 files changed, 88 insertions, 121 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a68f34603b9d..85c7b95dd2fe 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -700,10 +700,6 @@ struct btrfs_space_info {
700 700
701 struct list_head list; 701 struct list_head list;
702 702
703 /* for controlling how we free up space for allocations */
704 wait_queue_head_t flush_wait;
705 int flushing;
706
707 /* for block groups in our same type */ 703 /* for block groups in our same type */
708 struct list_head block_groups[BTRFS_NR_RAID_TYPES]; 704 struct list_head block_groups[BTRFS_NR_RAID_TYPES];
709 spinlock_t lock; 705 spinlock_t lock;
@@ -928,7 +924,6 @@ struct btrfs_fs_info {
928 struct btrfs_workers endio_meta_write_workers; 924 struct btrfs_workers endio_meta_write_workers;
929 struct btrfs_workers endio_write_workers; 925 struct btrfs_workers endio_write_workers;
930 struct btrfs_workers submit_workers; 926 struct btrfs_workers submit_workers;
931 struct btrfs_workers enospc_workers;
932 /* 927 /*
933 * fixup workers take dirty pages that didn't properly go through 928 * fixup workers take dirty pages that didn't properly go through
934 * the cow mechanism and make them safe to write. It happens 929 * the cow mechanism and make them safe to write. It happens
@@ -2312,6 +2307,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2312 u32 min_type); 2307 u32 min_type);
2313 2308
2314int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2309int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2310int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
2315int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2311int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2316 struct extent_state **cached_state); 2312 struct extent_state **cached_state);
2317int btrfs_writepages(struct address_space *mapping, 2313int btrfs_writepages(struct address_space *mapping,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index feca04197d02..05f26acfd070 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1759,9 +1759,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1759 min_t(u64, fs_devices->num_devices, 1759 min_t(u64, fs_devices->num_devices,
1760 fs_info->thread_pool_size), 1760 fs_info->thread_pool_size),
1761 &fs_info->generic_worker); 1761 &fs_info->generic_worker);
1762 btrfs_init_workers(&fs_info->enospc_workers, "enospc",
1763 fs_info->thread_pool_size,
1764 &fs_info->generic_worker);
1765 1762
1766 /* a higher idle thresh on the submit workers makes it much more 1763 /* a higher idle thresh on the submit workers makes it much more
1767 * likely that bios will be send down in a sane order to the 1764 * likely that bios will be send down in a sane order to the
@@ -1809,7 +1806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1809 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1806 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1810 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1807 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1811 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1808 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1812 btrfs_start_workers(&fs_info->enospc_workers, 1);
1813 1809
1814 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1810 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1815 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1811 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2040,7 +2036,6 @@ fail_sb_buffer:
2040 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2041 btrfs_stop_workers(&fs_info->endio_write_workers); 2037 btrfs_stop_workers(&fs_info->endio_write_workers);
2042 btrfs_stop_workers(&fs_info->submit_workers); 2038 btrfs_stop_workers(&fs_info->submit_workers);
2043 btrfs_stop_workers(&fs_info->enospc_workers);
2044fail_iput: 2039fail_iput:
2045 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2040 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2046 iput(fs_info->btree_inode); 2041 iput(fs_info->btree_inode);
@@ -2473,7 +2468,6 @@ int close_ctree(struct btrfs_root *root)
2473 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2468 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2474 btrfs_stop_workers(&fs_info->endio_write_workers); 2469 btrfs_stop_workers(&fs_info->endio_write_workers);
2475 btrfs_stop_workers(&fs_info->submit_workers); 2470 btrfs_stop_workers(&fs_info->submit_workers);
2476 btrfs_stop_workers(&fs_info->enospc_workers);
2477 2471
2478 btrfs_close_devices(fs_info->fs_devices); 2472 btrfs_close_devices(fs_info->fs_devices);
2479 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2473 btrfs_mapping_tree_free(&fs_info->mapping_tree);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2c95507c9abb..f32b1618ee6d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -74,6 +74,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
74static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, 74static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
75 struct btrfs_root *root, 75 struct btrfs_root *root,
76 struct btrfs_space_info *sinfo, u64 num_bytes); 76 struct btrfs_space_info *sinfo, u64 num_bytes);
77static int shrink_delalloc(struct btrfs_trans_handle *trans,
78 struct btrfs_root *root,
79 struct btrfs_space_info *sinfo, u64 to_reclaim);
77 80
78static noinline int 81static noinline int
79block_group_cache_done(struct btrfs_block_group_cache *cache) 82block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2693,7 +2696,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2693 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 2696 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
2694 INIT_LIST_HEAD(&found->block_groups[i]); 2697 INIT_LIST_HEAD(&found->block_groups[i]);
2695 init_rwsem(&found->groups_sem); 2698 init_rwsem(&found->groups_sem);
2696 init_waitqueue_head(&found->flush_wait);
2697 spin_lock_init(&found->lock); 2699 spin_lock_init(&found->lock);
2698 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | 2700 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
2699 BTRFS_BLOCK_GROUP_SYSTEM | 2701 BTRFS_BLOCK_GROUP_SYSTEM |
@@ -2907,105 +2909,6 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2907 meta_sinfo->force_delalloc = 0; 2909 meta_sinfo->force_delalloc = 0;
2908} 2910}
2909 2911
2910struct async_flush {
2911 struct btrfs_root *root;
2912 struct btrfs_space_info *info;
2913 struct btrfs_work work;
2914};
2915
2916static noinline void flush_delalloc_async(struct btrfs_work *work)
2917{
2918 struct async_flush *async;
2919 struct btrfs_root *root;
2920 struct btrfs_space_info *info;
2921
2922 async = container_of(work, struct async_flush, work);
2923 root = async->root;
2924 info = async->info;
2925
2926 btrfs_start_delalloc_inodes(root, 0);
2927 wake_up(&info->flush_wait);
2928 btrfs_wait_ordered_extents(root, 0, 0);
2929
2930 spin_lock(&info->lock);
2931 info->flushing = 0;
2932 spin_unlock(&info->lock);
2933 wake_up(&info->flush_wait);
2934
2935 kfree(async);
2936}
2937
2938static void wait_on_flush(struct btrfs_space_info *info)
2939{
2940 DEFINE_WAIT(wait);
2941 u64 used;
2942
2943 while (1) {
2944 prepare_to_wait(&info->flush_wait, &wait,
2945 TASK_UNINTERRUPTIBLE);
2946 spin_lock(&info->lock);
2947 if (!info->flushing) {
2948 spin_unlock(&info->lock);
2949 break;
2950 }
2951
2952 used = info->bytes_used + info->bytes_reserved +
2953 info->bytes_pinned + info->bytes_readonly +
2954 info->bytes_super + info->bytes_root +
2955 info->bytes_may_use + info->bytes_delalloc;
2956 if (used < info->total_bytes) {
2957 spin_unlock(&info->lock);
2958 break;
2959 }
2960 spin_unlock(&info->lock);
2961 schedule();
2962 }
2963 finish_wait(&info->flush_wait, &wait);
2964}
2965
2966static void flush_delalloc(struct btrfs_root *root,
2967 struct btrfs_space_info *info)
2968{
2969 struct async_flush *async;
2970 bool wait = false;
2971
2972 spin_lock(&info->lock);
2973
2974 if (!info->flushing)
2975 info->flushing = 1;
2976 else
2977 wait = true;
2978
2979 spin_unlock(&info->lock);
2980
2981 if (wait) {
2982 wait_on_flush(info);
2983 return;
2984 }
2985
2986 async = kzalloc(sizeof(*async), GFP_NOFS);
2987 if (!async)
2988 goto flush;
2989
2990 async->root = root;
2991 async->info = info;
2992 async->work.func = flush_delalloc_async;
2993
2994 btrfs_queue_worker(&root->fs_info->enospc_workers,
2995 &async->work);
2996 wait_on_flush(info);
2997 return;
2998
2999flush:
3000 btrfs_start_delalloc_inodes(root, 0);
3001 btrfs_wait_ordered_extents(root, 0, 0);
3002
3003 spin_lock(&info->lock);
3004 info->flushing = 0;
3005 spin_unlock(&info->lock);
3006 wake_up(&info->flush_wait);
3007}
3008
3009/* 2912/*
3010 * Reserve metadata space for delalloc. 2913 * Reserve metadata space for delalloc.
3011 */ 2914 */
@@ -3058,7 +2961,7 @@ again:
3058 filemap_flush(inode->i_mapping); 2961 filemap_flush(inode->i_mapping);
3059 goto again; 2962 goto again;
3060 } else if (flushed == 3) { 2963 } else if (flushed == 3) {
3061 flush_delalloc(root, meta_sinfo); 2964 shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
3062 goto again; 2965 goto again;
3063 } 2966 }
3064 spin_lock(&meta_sinfo->lock); 2967 spin_lock(&meta_sinfo->lock);
@@ -3171,7 +3074,7 @@ again:
3171 } 3074 }
3172 3075
3173 if (retries == 2) { 3076 if (retries == 2) {
3174 flush_delalloc(root, meta_sinfo); 3077 shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
3175 goto again; 3078 goto again;
3176 } 3079 }
3177 spin_lock(&meta_sinfo->lock); 3080 spin_lock(&meta_sinfo->lock);
@@ -3197,7 +3100,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3197{ 3100{
3198 struct btrfs_space_info *data_sinfo; 3101 struct btrfs_space_info *data_sinfo;
3199 u64 used; 3102 u64 used;
3200 int ret = 0, committed = 0, flushed = 0; 3103 int ret = 0, committed = 0;
3201 3104
3202 /* make sure bytes are sectorsize aligned */ 3105 /* make sure bytes are sectorsize aligned */
3203 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3106 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3217,13 +3120,6 @@ again:
3217 if (used + bytes > data_sinfo->total_bytes) { 3120 if (used + bytes > data_sinfo->total_bytes) {
3218 struct btrfs_trans_handle *trans; 3121 struct btrfs_trans_handle *trans;
3219 3122
3220 if (!flushed) {
3221 spin_unlock(&data_sinfo->lock);
3222 flush_delalloc(root, data_sinfo);
3223 flushed = 1;
3224 goto again;
3225 }
3226
3227 /* 3123 /*
3228 * if we don't have enough free bytes in this space then we need 3124 * if we don't have enough free bytes in this space then we need
3229 * to alloc a new chunk. 3125 * to alloc a new chunk.
@@ -3467,6 +3363,55 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3467 return ret == 1 ? 1 : 0; 3363 return ret == 1 ? 1 : 0;
3468} 3364}
3469 3365
3366/*
3367 * shrink metadata reservation for delalloc
3368 */
3369static int shrink_delalloc(struct btrfs_trans_handle *trans,
3370 struct btrfs_root *root,
3371 struct btrfs_space_info *sinfo, u64 to_reclaim)
3372{
3373 u64 reserved;
3374 u64 max_reclaim;
3375 u64 reclaimed = 0;
3376 int pause = 1;
3377 int ret;
3378
3379 spin_lock(&sinfo->lock);
3380 reserved = sinfo->bytes_delalloc;
3381 spin_unlock(&sinfo->lock);
3382
3383 if (reserved == 0)
3384 return 0;
3385
3386 max_reclaim = min(reserved, to_reclaim);
3387
3388 while (1) {
3389 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
3390 if (!ret) {
3391 __set_current_state(TASK_INTERRUPTIBLE);
3392 schedule_timeout(pause);
3393 pause <<= 1;
3394 if (pause > HZ / 10)
3395 pause = HZ / 10;
3396 } else {
3397 pause = 1;
3398 }
3399
3400 spin_lock(&sinfo->lock);
3401 if (reserved > sinfo->bytes_delalloc)
3402 reclaimed = reserved - sinfo->bytes_delalloc;
3403 reserved = sinfo->bytes_delalloc;
3404 spin_unlock(&sinfo->lock);
3405
3406 if (reserved == 0 || reclaimed >= max_reclaim)
3407 break;
3408
3409 if (trans && trans->transaction->blocked)
3410 return -EAGAIN;
3411 }
3412 return reclaimed >= to_reclaim;
3413}
3414
3470static int update_block_group(struct btrfs_trans_handle *trans, 3415static int update_block_group(struct btrfs_trans_handle *trans,
3471 struct btrfs_root *root, 3416 struct btrfs_root *root,
3472 u64 bytenr, u64 num_bytes, int alloc, 3417 u64 bytenr, u64 num_bytes, int alloc,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2bfdc641d4e3..d53cad1afe26 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5611,6 +5611,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5611 return 0; 5611 return 0;
5612} 5612}
5613 5613
5614int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
5615{
5616 struct btrfs_inode *binode;
5617 struct inode *inode = NULL;
5618
5619 spin_lock(&root->fs_info->delalloc_lock);
5620 while (!list_empty(&root->fs_info->delalloc_inodes)) {
5621 binode = list_entry(root->fs_info->delalloc_inodes.next,
5622 struct btrfs_inode, delalloc_inodes);
5623 inode = igrab(&binode->vfs_inode);
5624 if (inode) {
5625 list_move_tail(&binode->delalloc_inodes,
5626 &root->fs_info->delalloc_inodes);
5627 break;
5628 }
5629
5630 list_del_init(&binode->delalloc_inodes);
5631 cond_resched_lock(&root->fs_info->delalloc_lock);
5632 }
5633 spin_unlock(&root->fs_info->delalloc_lock);
5634
5635 if (inode) {
5636 write_inode_now(inode, 0);
5637 if (delay_iput)
5638 btrfs_add_delayed_iput(inode);
5639 else
5640 iput(inode);
5641 return 1;
5642 }
5643 return 0;
5644}
5645
5614static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 5646static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5615 const char *symname) 5647 const char *symname)
5616{ 5648{