diff options
author | Yan, Zheng <zheng.yan@oracle.com> | 2010-05-16 10:46:25 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 10:34:48 -0400 |
commit | 5da9d01b66458b180a6bee0e637a1d0a3effc622 (patch) | |
tree | 47eca61c0ad07ddc791cb7677c548d663fbac818 /fs | |
parent | 424499dbd0c4d88742bf581b5714b27fb44b9fef (diff) |
Btrfs: Shrink delay allocated space in a synchronized
Shrink delayed allocation space in a synchronized manner is more
controllable than flushing all delay allocated space in an async
thread.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.h | 6 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 165 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 32 |
4 files changed, 88 insertions, 121 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a68f34603b9d..85c7b95dd2fe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -700,10 +700,6 @@ struct btrfs_space_info { | |||
700 | 700 | ||
701 | struct list_head list; | 701 | struct list_head list; |
702 | 702 | ||
703 | /* for controlling how we free up space for allocations */ | ||
704 | wait_queue_head_t flush_wait; | ||
705 | int flushing; | ||
706 | |||
707 | /* for block groups in our same type */ | 703 | /* for block groups in our same type */ |
708 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | 704 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
709 | spinlock_t lock; | 705 | spinlock_t lock; |
@@ -928,7 +924,6 @@ struct btrfs_fs_info { | |||
928 | struct btrfs_workers endio_meta_write_workers; | 924 | struct btrfs_workers endio_meta_write_workers; |
929 | struct btrfs_workers endio_write_workers; | 925 | struct btrfs_workers endio_write_workers; |
930 | struct btrfs_workers submit_workers; | 926 | struct btrfs_workers submit_workers; |
931 | struct btrfs_workers enospc_workers; | ||
932 | /* | 927 | /* |
933 | * fixup workers take dirty pages that didn't properly go through | 928 | * fixup workers take dirty pages that didn't properly go through |
934 | * the cow mechanism and make them safe to write. It happens | 929 | * the cow mechanism and make them safe to write. It happens |
@@ -2312,6 +2307,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2312 | u32 min_type); | 2307 | u32 min_type); |
2313 | 2308 | ||
2314 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2309 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2310 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
2315 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2311 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
2316 | struct extent_state **cached_state); | 2312 | struct extent_state **cached_state); |
2317 | int btrfs_writepages(struct address_space *mapping, | 2313 | int btrfs_writepages(struct address_space *mapping, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index feca04197d02..05f26acfd070 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1759,9 +1759,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1759 | min_t(u64, fs_devices->num_devices, | 1759 | min_t(u64, fs_devices->num_devices, |
1760 | fs_info->thread_pool_size), | 1760 | fs_info->thread_pool_size), |
1761 | &fs_info->generic_worker); | 1761 | &fs_info->generic_worker); |
1762 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
1763 | fs_info->thread_pool_size, | ||
1764 | &fs_info->generic_worker); | ||
1765 | 1762 | ||
1766 | /* a higher idle thresh on the submit workers makes it much more | 1763 | /* a higher idle thresh on the submit workers makes it much more |
1767 | * likely that bios will be send down in a sane order to the | 1764 | * likely that bios will be send down in a sane order to the |
@@ -1809,7 +1806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1809 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1806 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1810 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1807 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1811 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1808 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1812 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
1813 | 1809 | ||
1814 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1810 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1815 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1811 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2040,7 +2036,6 @@ fail_sb_buffer: | |||
2040 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2036 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2041 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2037 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2042 | btrfs_stop_workers(&fs_info->submit_workers); | 2038 | btrfs_stop_workers(&fs_info->submit_workers); |
2043 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2044 | fail_iput: | 2039 | fail_iput: |
2045 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2040 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2046 | iput(fs_info->btree_inode); | 2041 | iput(fs_info->btree_inode); |
@@ -2473,7 +2468,6 @@ int close_ctree(struct btrfs_root *root) | |||
2473 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2468 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2474 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2469 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2475 | btrfs_stop_workers(&fs_info->submit_workers); | 2470 | btrfs_stop_workers(&fs_info->submit_workers); |
2476 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2477 | 2471 | ||
2478 | btrfs_close_devices(fs_info->fs_devices); | 2472 | btrfs_close_devices(fs_info->fs_devices); |
2479 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2473 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2c95507c9abb..f32b1618ee6d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -74,6 +74,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
74 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | 74 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, |
75 | struct btrfs_root *root, | 75 | struct btrfs_root *root, |
76 | struct btrfs_space_info *sinfo, u64 num_bytes); | 76 | struct btrfs_space_info *sinfo, u64 num_bytes); |
77 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
78 | struct btrfs_root *root, | ||
79 | struct btrfs_space_info *sinfo, u64 to_reclaim); | ||
77 | 80 | ||
78 | static noinline int | 81 | static noinline int |
79 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 82 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -2693,7 +2696,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2693 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 2696 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
2694 | INIT_LIST_HEAD(&found->block_groups[i]); | 2697 | INIT_LIST_HEAD(&found->block_groups[i]); |
2695 | init_rwsem(&found->groups_sem); | 2698 | init_rwsem(&found->groups_sem); |
2696 | init_waitqueue_head(&found->flush_wait); | ||
2697 | spin_lock_init(&found->lock); | 2699 | spin_lock_init(&found->lock); |
2698 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | | 2700 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
2699 | BTRFS_BLOCK_GROUP_SYSTEM | | 2701 | BTRFS_BLOCK_GROUP_SYSTEM | |
@@ -2907,105 +2909,6 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | |||
2907 | meta_sinfo->force_delalloc = 0; | 2909 | meta_sinfo->force_delalloc = 0; |
2908 | } | 2910 | } |
2909 | 2911 | ||
2910 | struct async_flush { | ||
2911 | struct btrfs_root *root; | ||
2912 | struct btrfs_space_info *info; | ||
2913 | struct btrfs_work work; | ||
2914 | }; | ||
2915 | |||
2916 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2917 | { | ||
2918 | struct async_flush *async; | ||
2919 | struct btrfs_root *root; | ||
2920 | struct btrfs_space_info *info; | ||
2921 | |||
2922 | async = container_of(work, struct async_flush, work); | ||
2923 | root = async->root; | ||
2924 | info = async->info; | ||
2925 | |||
2926 | btrfs_start_delalloc_inodes(root, 0); | ||
2927 | wake_up(&info->flush_wait); | ||
2928 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2929 | |||
2930 | spin_lock(&info->lock); | ||
2931 | info->flushing = 0; | ||
2932 | spin_unlock(&info->lock); | ||
2933 | wake_up(&info->flush_wait); | ||
2934 | |||
2935 | kfree(async); | ||
2936 | } | ||
2937 | |||
2938 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2939 | { | ||
2940 | DEFINE_WAIT(wait); | ||
2941 | u64 used; | ||
2942 | |||
2943 | while (1) { | ||
2944 | prepare_to_wait(&info->flush_wait, &wait, | ||
2945 | TASK_UNINTERRUPTIBLE); | ||
2946 | spin_lock(&info->lock); | ||
2947 | if (!info->flushing) { | ||
2948 | spin_unlock(&info->lock); | ||
2949 | break; | ||
2950 | } | ||
2951 | |||
2952 | used = info->bytes_used + info->bytes_reserved + | ||
2953 | info->bytes_pinned + info->bytes_readonly + | ||
2954 | info->bytes_super + info->bytes_root + | ||
2955 | info->bytes_may_use + info->bytes_delalloc; | ||
2956 | if (used < info->total_bytes) { | ||
2957 | spin_unlock(&info->lock); | ||
2958 | break; | ||
2959 | } | ||
2960 | spin_unlock(&info->lock); | ||
2961 | schedule(); | ||
2962 | } | ||
2963 | finish_wait(&info->flush_wait, &wait); | ||
2964 | } | ||
2965 | |||
2966 | static void flush_delalloc(struct btrfs_root *root, | ||
2967 | struct btrfs_space_info *info) | ||
2968 | { | ||
2969 | struct async_flush *async; | ||
2970 | bool wait = false; | ||
2971 | |||
2972 | spin_lock(&info->lock); | ||
2973 | |||
2974 | if (!info->flushing) | ||
2975 | info->flushing = 1; | ||
2976 | else | ||
2977 | wait = true; | ||
2978 | |||
2979 | spin_unlock(&info->lock); | ||
2980 | |||
2981 | if (wait) { | ||
2982 | wait_on_flush(info); | ||
2983 | return; | ||
2984 | } | ||
2985 | |||
2986 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2987 | if (!async) | ||
2988 | goto flush; | ||
2989 | |||
2990 | async->root = root; | ||
2991 | async->info = info; | ||
2992 | async->work.func = flush_delalloc_async; | ||
2993 | |||
2994 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
2995 | &async->work); | ||
2996 | wait_on_flush(info); | ||
2997 | return; | ||
2998 | |||
2999 | flush: | ||
3000 | btrfs_start_delalloc_inodes(root, 0); | ||
3001 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3002 | |||
3003 | spin_lock(&info->lock); | ||
3004 | info->flushing = 0; | ||
3005 | spin_unlock(&info->lock); | ||
3006 | wake_up(&info->flush_wait); | ||
3007 | } | ||
3008 | |||
3009 | /* | 2912 | /* |
3010 | * Reserve metadata space for delalloc. | 2913 | * Reserve metadata space for delalloc. |
3011 | */ | 2914 | */ |
@@ -3058,7 +2961,7 @@ again: | |||
3058 | filemap_flush(inode->i_mapping); | 2961 | filemap_flush(inode->i_mapping); |
3059 | goto again; | 2962 | goto again; |
3060 | } else if (flushed == 3) { | 2963 | } else if (flushed == 3) { |
3061 | flush_delalloc(root, meta_sinfo); | 2964 | shrink_delalloc(NULL, root, meta_sinfo, num_bytes); |
3062 | goto again; | 2965 | goto again; |
3063 | } | 2966 | } |
3064 | spin_lock(&meta_sinfo->lock); | 2967 | spin_lock(&meta_sinfo->lock); |
@@ -3171,7 +3074,7 @@ again: | |||
3171 | } | 3074 | } |
3172 | 3075 | ||
3173 | if (retries == 2) { | 3076 | if (retries == 2) { |
3174 | flush_delalloc(root, meta_sinfo); | 3077 | shrink_delalloc(NULL, root, meta_sinfo, num_bytes); |
3175 | goto again; | 3078 | goto again; |
3176 | } | 3079 | } |
3177 | spin_lock(&meta_sinfo->lock); | 3080 | spin_lock(&meta_sinfo->lock); |
@@ -3197,7 +3100,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
3197 | { | 3100 | { |
3198 | struct btrfs_space_info *data_sinfo; | 3101 | struct btrfs_space_info *data_sinfo; |
3199 | u64 used; | 3102 | u64 used; |
3200 | int ret = 0, committed = 0, flushed = 0; | 3103 | int ret = 0, committed = 0; |
3201 | 3104 | ||
3202 | /* make sure bytes are sectorsize aligned */ | 3105 | /* make sure bytes are sectorsize aligned */ |
3203 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3106 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
@@ -3217,13 +3120,6 @@ again: | |||
3217 | if (used + bytes > data_sinfo->total_bytes) { | 3120 | if (used + bytes > data_sinfo->total_bytes) { |
3218 | struct btrfs_trans_handle *trans; | 3121 | struct btrfs_trans_handle *trans; |
3219 | 3122 | ||
3220 | if (!flushed) { | ||
3221 | spin_unlock(&data_sinfo->lock); | ||
3222 | flush_delalloc(root, data_sinfo); | ||
3223 | flushed = 1; | ||
3224 | goto again; | ||
3225 | } | ||
3226 | |||
3227 | /* | 3123 | /* |
3228 | * if we don't have enough free bytes in this space then we need | 3124 | * if we don't have enough free bytes in this space then we need |
3229 | * to alloc a new chunk. | 3125 | * to alloc a new chunk. |
@@ -3467,6 +3363,55 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | |||
3467 | return ret == 1 ? 1 : 0; | 3363 | return ret == 1 ? 1 : 0; |
3468 | } | 3364 | } |
3469 | 3365 | ||
3366 | /* | ||
3367 | * shrink metadata reservation for delalloc | ||
3368 | */ | ||
3369 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
3370 | struct btrfs_root *root, | ||
3371 | struct btrfs_space_info *sinfo, u64 to_reclaim) | ||
3372 | { | ||
3373 | u64 reserved; | ||
3374 | u64 max_reclaim; | ||
3375 | u64 reclaimed = 0; | ||
3376 | int pause = 1; | ||
3377 | int ret; | ||
3378 | |||
3379 | spin_lock(&sinfo->lock); | ||
3380 | reserved = sinfo->bytes_delalloc; | ||
3381 | spin_unlock(&sinfo->lock); | ||
3382 | |||
3383 | if (reserved == 0) | ||
3384 | return 0; | ||
3385 | |||
3386 | max_reclaim = min(reserved, to_reclaim); | ||
3387 | |||
3388 | while (1) { | ||
3389 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
3390 | if (!ret) { | ||
3391 | __set_current_state(TASK_INTERRUPTIBLE); | ||
3392 | schedule_timeout(pause); | ||
3393 | pause <<= 1; | ||
3394 | if (pause > HZ / 10) | ||
3395 | pause = HZ / 10; | ||
3396 | } else { | ||
3397 | pause = 1; | ||
3398 | } | ||
3399 | |||
3400 | spin_lock(&sinfo->lock); | ||
3401 | if (reserved > sinfo->bytes_delalloc) | ||
3402 | reclaimed = reserved - sinfo->bytes_delalloc; | ||
3403 | reserved = sinfo->bytes_delalloc; | ||
3404 | spin_unlock(&sinfo->lock); | ||
3405 | |||
3406 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
3407 | break; | ||
3408 | |||
3409 | if (trans && trans->transaction->blocked) | ||
3410 | return -EAGAIN; | ||
3411 | } | ||
3412 | return reclaimed >= to_reclaim; | ||
3413 | } | ||
3414 | |||
3470 | static int update_block_group(struct btrfs_trans_handle *trans, | 3415 | static int update_block_group(struct btrfs_trans_handle *trans, |
3471 | struct btrfs_root *root, | 3416 | struct btrfs_root *root, |
3472 | u64 bytenr, u64 num_bytes, int alloc, | 3417 | u64 bytenr, u64 num_bytes, int alloc, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bfdc641d4e3..d53cad1afe26 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5611,6 +5611,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
5611 | return 0; | 5611 | return 0; |
5612 | } | 5612 | } |
5613 | 5613 | ||
5614 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
5615 | { | ||
5616 | struct btrfs_inode *binode; | ||
5617 | struct inode *inode = NULL; | ||
5618 | |||
5619 | spin_lock(&root->fs_info->delalloc_lock); | ||
5620 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
5621 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
5622 | struct btrfs_inode, delalloc_inodes); | ||
5623 | inode = igrab(&binode->vfs_inode); | ||
5624 | if (inode) { | ||
5625 | list_move_tail(&binode->delalloc_inodes, | ||
5626 | &root->fs_info->delalloc_inodes); | ||
5627 | break; | ||
5628 | } | ||
5629 | |||
5630 | list_del_init(&binode->delalloc_inodes); | ||
5631 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
5632 | } | ||
5633 | spin_unlock(&root->fs_info->delalloc_lock); | ||
5634 | |||
5635 | if (inode) { | ||
5636 | write_inode_now(inode, 0); | ||
5637 | if (delay_iput) | ||
5638 | btrfs_add_delayed_iput(inode); | ||
5639 | else | ||
5640 | iput(inode); | ||
5641 | return 1; | ||
5642 | } | ||
5643 | return 0; | ||
5644 | } | ||
5645 | |||
5614 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 5646 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
5615 | const char *symname) | 5647 | const char *symname) |
5616 | { | 5648 | { |