diff options
| author | Yan, Zheng <zheng.yan@oracle.com> | 2010-05-16 10:46:25 -0400 |
|---|---|---|
| committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 10:34:48 -0400 |
| commit | 5da9d01b66458b180a6bee0e637a1d0a3effc622 (patch) | |
| tree | 47eca61c0ad07ddc791cb7677c548d663fbac818 | |
| parent | 424499dbd0c4d88742bf581b5714b27fb44b9fef (diff) | |
Btrfs: Shrink delay allocated space in a synchronized
Shrink delayed allocation space in a synchronized manner is more
controllable than flushing all delay allocated space in an async
thread.
Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
| -rw-r--r-- | fs/btrfs/ctree.h | 6 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 165 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 32 |
4 files changed, 88 insertions, 121 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a68f34603b9d..85c7b95dd2fe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -700,10 +700,6 @@ struct btrfs_space_info { | |||
| 700 | 700 | ||
| 701 | struct list_head list; | 701 | struct list_head list; |
| 702 | 702 | ||
| 703 | /* for controlling how we free up space for allocations */ | ||
| 704 | wait_queue_head_t flush_wait; | ||
| 705 | int flushing; | ||
| 706 | |||
| 707 | /* for block groups in our same type */ | 703 | /* for block groups in our same type */ |
| 708 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | 704 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
| 709 | spinlock_t lock; | 705 | spinlock_t lock; |
| @@ -928,7 +924,6 @@ struct btrfs_fs_info { | |||
| 928 | struct btrfs_workers endio_meta_write_workers; | 924 | struct btrfs_workers endio_meta_write_workers; |
| 929 | struct btrfs_workers endio_write_workers; | 925 | struct btrfs_workers endio_write_workers; |
| 930 | struct btrfs_workers submit_workers; | 926 | struct btrfs_workers submit_workers; |
| 931 | struct btrfs_workers enospc_workers; | ||
| 932 | /* | 927 | /* |
| 933 | * fixup workers take dirty pages that didn't properly go through | 928 | * fixup workers take dirty pages that didn't properly go through |
| 934 | * the cow mechanism and make them safe to write. It happens | 929 | * the cow mechanism and make them safe to write. It happens |
| @@ -2312,6 +2307,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 2312 | u32 min_type); | 2307 | u32 min_type); |
| 2313 | 2308 | ||
| 2314 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2309 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 2310 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
| 2315 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2311 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 2316 | struct extent_state **cached_state); | 2312 | struct extent_state **cached_state); |
| 2317 | int btrfs_writepages(struct address_space *mapping, | 2313 | int btrfs_writepages(struct address_space *mapping, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index feca04197d02..05f26acfd070 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1759,9 +1759,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1759 | min_t(u64, fs_devices->num_devices, | 1759 | min_t(u64, fs_devices->num_devices, |
| 1760 | fs_info->thread_pool_size), | 1760 | fs_info->thread_pool_size), |
| 1761 | &fs_info->generic_worker); | 1761 | &fs_info->generic_worker); |
| 1762 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
| 1763 | fs_info->thread_pool_size, | ||
| 1764 | &fs_info->generic_worker); | ||
| 1765 | 1762 | ||
| 1766 | /* a higher idle thresh on the submit workers makes it much more | 1763 | /* a higher idle thresh on the submit workers makes it much more |
| 1767 | * likely that bios will be send down in a sane order to the | 1764 | * likely that bios will be send down in a sane order to the |
| @@ -1809,7 +1806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1809 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1806 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
| 1810 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1807 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
| 1811 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1808 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
| 1812 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
| 1813 | 1809 | ||
| 1814 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1810 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1815 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1811 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
| @@ -2040,7 +2036,6 @@ fail_sb_buffer: | |||
| 2040 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2036 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2041 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2037 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2042 | btrfs_stop_workers(&fs_info->submit_workers); | 2038 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2043 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2044 | fail_iput: | 2039 | fail_iput: |
| 2045 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2040 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 2046 | iput(fs_info->btree_inode); | 2041 | iput(fs_info->btree_inode); |
| @@ -2473,7 +2468,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 2473 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2468 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2474 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2469 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2475 | btrfs_stop_workers(&fs_info->submit_workers); | 2470 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2476 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2477 | 2471 | ||
| 2478 | btrfs_close_devices(fs_info->fs_devices); | 2472 | btrfs_close_devices(fs_info->fs_devices); |
| 2479 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2473 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2c95507c9abb..f32b1618ee6d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -74,6 +74,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 74 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | 74 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, |
| 75 | struct btrfs_root *root, | 75 | struct btrfs_root *root, |
| 76 | struct btrfs_space_info *sinfo, u64 num_bytes); | 76 | struct btrfs_space_info *sinfo, u64 num_bytes); |
| 77 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
| 78 | struct btrfs_root *root, | ||
| 79 | struct btrfs_space_info *sinfo, u64 to_reclaim); | ||
| 77 | 80 | ||
| 78 | static noinline int | 81 | static noinline int |
| 79 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 82 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -2693,7 +2696,6 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2693 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | 2696 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
| 2694 | INIT_LIST_HEAD(&found->block_groups[i]); | 2697 | INIT_LIST_HEAD(&found->block_groups[i]); |
| 2695 | init_rwsem(&found->groups_sem); | 2698 | init_rwsem(&found->groups_sem); |
| 2696 | init_waitqueue_head(&found->flush_wait); | ||
| 2697 | spin_lock_init(&found->lock); | 2699 | spin_lock_init(&found->lock); |
| 2698 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | | 2700 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
| 2699 | BTRFS_BLOCK_GROUP_SYSTEM | | 2701 | BTRFS_BLOCK_GROUP_SYSTEM | |
| @@ -2907,105 +2909,6 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | |||
| 2907 | meta_sinfo->force_delalloc = 0; | 2909 | meta_sinfo->force_delalloc = 0; |
| 2908 | } | 2910 | } |
| 2909 | 2911 | ||
| 2910 | struct async_flush { | ||
| 2911 | struct btrfs_root *root; | ||
| 2912 | struct btrfs_space_info *info; | ||
| 2913 | struct btrfs_work work; | ||
| 2914 | }; | ||
| 2915 | |||
| 2916 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
| 2917 | { | ||
| 2918 | struct async_flush *async; | ||
| 2919 | struct btrfs_root *root; | ||
| 2920 | struct btrfs_space_info *info; | ||
| 2921 | |||
| 2922 | async = container_of(work, struct async_flush, work); | ||
| 2923 | root = async->root; | ||
| 2924 | info = async->info; | ||
| 2925 | |||
| 2926 | btrfs_start_delalloc_inodes(root, 0); | ||
| 2927 | wake_up(&info->flush_wait); | ||
| 2928 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 2929 | |||
| 2930 | spin_lock(&info->lock); | ||
| 2931 | info->flushing = 0; | ||
| 2932 | spin_unlock(&info->lock); | ||
| 2933 | wake_up(&info->flush_wait); | ||
| 2934 | |||
| 2935 | kfree(async); | ||
| 2936 | } | ||
| 2937 | |||
| 2938 | static void wait_on_flush(struct btrfs_space_info *info) | ||
| 2939 | { | ||
| 2940 | DEFINE_WAIT(wait); | ||
| 2941 | u64 used; | ||
| 2942 | |||
| 2943 | while (1) { | ||
| 2944 | prepare_to_wait(&info->flush_wait, &wait, | ||
| 2945 | TASK_UNINTERRUPTIBLE); | ||
| 2946 | spin_lock(&info->lock); | ||
| 2947 | if (!info->flushing) { | ||
| 2948 | spin_unlock(&info->lock); | ||
| 2949 | break; | ||
| 2950 | } | ||
| 2951 | |||
| 2952 | used = info->bytes_used + info->bytes_reserved + | ||
| 2953 | info->bytes_pinned + info->bytes_readonly + | ||
| 2954 | info->bytes_super + info->bytes_root + | ||
| 2955 | info->bytes_may_use + info->bytes_delalloc; | ||
| 2956 | if (used < info->total_bytes) { | ||
| 2957 | spin_unlock(&info->lock); | ||
| 2958 | break; | ||
| 2959 | } | ||
| 2960 | spin_unlock(&info->lock); | ||
| 2961 | schedule(); | ||
| 2962 | } | ||
| 2963 | finish_wait(&info->flush_wait, &wait); | ||
| 2964 | } | ||
| 2965 | |||
| 2966 | static void flush_delalloc(struct btrfs_root *root, | ||
| 2967 | struct btrfs_space_info *info) | ||
| 2968 | { | ||
| 2969 | struct async_flush *async; | ||
| 2970 | bool wait = false; | ||
| 2971 | |||
| 2972 | spin_lock(&info->lock); | ||
| 2973 | |||
| 2974 | if (!info->flushing) | ||
| 2975 | info->flushing = 1; | ||
| 2976 | else | ||
| 2977 | wait = true; | ||
| 2978 | |||
| 2979 | spin_unlock(&info->lock); | ||
| 2980 | |||
| 2981 | if (wait) { | ||
| 2982 | wait_on_flush(info); | ||
| 2983 | return; | ||
| 2984 | } | ||
| 2985 | |||
| 2986 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
| 2987 | if (!async) | ||
| 2988 | goto flush; | ||
| 2989 | |||
| 2990 | async->root = root; | ||
| 2991 | async->info = info; | ||
| 2992 | async->work.func = flush_delalloc_async; | ||
| 2993 | |||
| 2994 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
| 2995 | &async->work); | ||
| 2996 | wait_on_flush(info); | ||
| 2997 | return; | ||
| 2998 | |||
| 2999 | flush: | ||
| 3000 | btrfs_start_delalloc_inodes(root, 0); | ||
| 3001 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 3002 | |||
| 3003 | spin_lock(&info->lock); | ||
| 3004 | info->flushing = 0; | ||
| 3005 | spin_unlock(&info->lock); | ||
| 3006 | wake_up(&info->flush_wait); | ||
| 3007 | } | ||
| 3008 | |||
| 3009 | /* | 2912 | /* |
| 3010 | * Reserve metadata space for delalloc. | 2913 | * Reserve metadata space for delalloc. |
| 3011 | */ | 2914 | */ |
| @@ -3058,7 +2961,7 @@ again: | |||
| 3058 | filemap_flush(inode->i_mapping); | 2961 | filemap_flush(inode->i_mapping); |
| 3059 | goto again; | 2962 | goto again; |
| 3060 | } else if (flushed == 3) { | 2963 | } else if (flushed == 3) { |
| 3061 | flush_delalloc(root, meta_sinfo); | 2964 | shrink_delalloc(NULL, root, meta_sinfo, num_bytes); |
| 3062 | goto again; | 2965 | goto again; |
| 3063 | } | 2966 | } |
| 3064 | spin_lock(&meta_sinfo->lock); | 2967 | spin_lock(&meta_sinfo->lock); |
| @@ -3171,7 +3074,7 @@ again: | |||
| 3171 | } | 3074 | } |
| 3172 | 3075 | ||
| 3173 | if (retries == 2) { | 3076 | if (retries == 2) { |
| 3174 | flush_delalloc(root, meta_sinfo); | 3077 | shrink_delalloc(NULL, root, meta_sinfo, num_bytes); |
| 3175 | goto again; | 3078 | goto again; |
| 3176 | } | 3079 | } |
| 3177 | spin_lock(&meta_sinfo->lock); | 3080 | spin_lock(&meta_sinfo->lock); |
| @@ -3197,7 +3100,7 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
| 3197 | { | 3100 | { |
| 3198 | struct btrfs_space_info *data_sinfo; | 3101 | struct btrfs_space_info *data_sinfo; |
| 3199 | u64 used; | 3102 | u64 used; |
| 3200 | int ret = 0, committed = 0, flushed = 0; | 3103 | int ret = 0, committed = 0; |
| 3201 | 3104 | ||
| 3202 | /* make sure bytes are sectorsize aligned */ | 3105 | /* make sure bytes are sectorsize aligned */ |
| 3203 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3106 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| @@ -3217,13 +3120,6 @@ again: | |||
| 3217 | if (used + bytes > data_sinfo->total_bytes) { | 3120 | if (used + bytes > data_sinfo->total_bytes) { |
| 3218 | struct btrfs_trans_handle *trans; | 3121 | struct btrfs_trans_handle *trans; |
| 3219 | 3122 | ||
| 3220 | if (!flushed) { | ||
| 3221 | spin_unlock(&data_sinfo->lock); | ||
| 3222 | flush_delalloc(root, data_sinfo); | ||
| 3223 | flushed = 1; | ||
| 3224 | goto again; | ||
| 3225 | } | ||
| 3226 | |||
| 3227 | /* | 3123 | /* |
| 3228 | * if we don't have enough free bytes in this space then we need | 3124 | * if we don't have enough free bytes in this space then we need |
| 3229 | * to alloc a new chunk. | 3125 | * to alloc a new chunk. |
| @@ -3467,6 +3363,55 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | |||
| 3467 | return ret == 1 ? 1 : 0; | 3363 | return ret == 1 ? 1 : 0; |
| 3468 | } | 3364 | } |
| 3469 | 3365 | ||
| 3366 | /* | ||
| 3367 | * shrink metadata reservation for delalloc | ||
| 3368 | */ | ||
| 3369 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
| 3370 | struct btrfs_root *root, | ||
| 3371 | struct btrfs_space_info *sinfo, u64 to_reclaim) | ||
| 3372 | { | ||
| 3373 | u64 reserved; | ||
| 3374 | u64 max_reclaim; | ||
| 3375 | u64 reclaimed = 0; | ||
| 3376 | int pause = 1; | ||
| 3377 | int ret; | ||
| 3378 | |||
| 3379 | spin_lock(&sinfo->lock); | ||
| 3380 | reserved = sinfo->bytes_delalloc; | ||
| 3381 | spin_unlock(&sinfo->lock); | ||
| 3382 | |||
| 3383 | if (reserved == 0) | ||
| 3384 | return 0; | ||
| 3385 | |||
| 3386 | max_reclaim = min(reserved, to_reclaim); | ||
| 3387 | |||
| 3388 | while (1) { | ||
| 3389 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
| 3390 | if (!ret) { | ||
| 3391 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 3392 | schedule_timeout(pause); | ||
| 3393 | pause <<= 1; | ||
| 3394 | if (pause > HZ / 10) | ||
| 3395 | pause = HZ / 10; | ||
| 3396 | } else { | ||
| 3397 | pause = 1; | ||
| 3398 | } | ||
| 3399 | |||
| 3400 | spin_lock(&sinfo->lock); | ||
| 3401 | if (reserved > sinfo->bytes_delalloc) | ||
| 3402 | reclaimed = reserved - sinfo->bytes_delalloc; | ||
| 3403 | reserved = sinfo->bytes_delalloc; | ||
| 3404 | spin_unlock(&sinfo->lock); | ||
| 3405 | |||
| 3406 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
| 3407 | break; | ||
| 3408 | |||
| 3409 | if (trans && trans->transaction->blocked) | ||
| 3410 | return -EAGAIN; | ||
| 3411 | } | ||
| 3412 | return reclaimed >= to_reclaim; | ||
| 3413 | } | ||
| 3414 | |||
| 3470 | static int update_block_group(struct btrfs_trans_handle *trans, | 3415 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 3471 | struct btrfs_root *root, | 3416 | struct btrfs_root *root, |
| 3472 | u64 bytenr, u64 num_bytes, int alloc, | 3417 | u64 bytenr, u64 num_bytes, int alloc, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bfdc641d4e3..d53cad1afe26 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -5611,6 +5611,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 5611 | return 0; | 5611 | return 0; |
| 5612 | } | 5612 | } |
| 5613 | 5613 | ||
| 5614 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
| 5615 | { | ||
| 5616 | struct btrfs_inode *binode; | ||
| 5617 | struct inode *inode = NULL; | ||
| 5618 | |||
| 5619 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 5620 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
| 5621 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
| 5622 | struct btrfs_inode, delalloc_inodes); | ||
| 5623 | inode = igrab(&binode->vfs_inode); | ||
| 5624 | if (inode) { | ||
| 5625 | list_move_tail(&binode->delalloc_inodes, | ||
| 5626 | &root->fs_info->delalloc_inodes); | ||
| 5627 | break; | ||
| 5628 | } | ||
| 5629 | |||
| 5630 | list_del_init(&binode->delalloc_inodes); | ||
| 5631 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
| 5632 | } | ||
| 5633 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 5634 | |||
| 5635 | if (inode) { | ||
| 5636 | write_inode_now(inode, 0); | ||
| 5637 | if (delay_iput) | ||
| 5638 | btrfs_add_delayed_iput(inode); | ||
| 5639 | else | ||
| 5640 | iput(inode); | ||
| 5641 | return 1; | ||
| 5642 | } | ||
| 5643 | return 0; | ||
| 5644 | } | ||
| 5645 | |||
| 5614 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 5646 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
| 5615 | const char *symname) | 5647 | const char *symname) |
| 5616 | { | 5648 | { |
