diff options
author | Miao Xie <miaox@cn.fujitsu.com> | 2012-10-16 07:33:38 -0400 |
---|---|---|
committer | Josef Bacik <jbacik@fusionio.com> | 2012-12-11 13:31:31 -0500 |
commit | 08e007d2e57744472a9424735a368ffe6d625597 (patch) | |
tree | 84227c096c05bc4c5430190f0f550b094d3bf2b7 /fs/btrfs/extent-tree.c | |
parent | 561c294d4cfb30c4acfa0a243448fc55af730d87 (diff) |
Btrfs: improve the noflush reservation
In some places(such as: evicting inode), we just can not flush the reserved
space of delalloc, flushing the delayed directory index and delayed inode
is OK, but we don't try to flush those things and just go back when there is
no enough space to be reserved. This patch fixes this problem.
We defined 3 types of the flush operations: NO_FLUSH, FLUSH_LIMIT and FLUSH_ALL.
If we can in the transaction, we should not flush anything, or the deadlock
would happen, so use NO_FLUSH. If we flushing the reserved space of delalloc
would cause deadlock, use FLUSH_LIMIT. In the other cases, FLUSH_ALL is used,
and we will flush all things.
Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 97 |
1 files changed, 48 insertions, 49 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cfcce290aba..2136adda2a0f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3644,7 +3644,7 @@ out: | |||
3644 | 3644 | ||
3645 | static int can_overcommit(struct btrfs_root *root, | 3645 | static int can_overcommit(struct btrfs_root *root, |
3646 | struct btrfs_space_info *space_info, u64 bytes, | 3646 | struct btrfs_space_info *space_info, u64 bytes, |
3647 | int flush) | 3647 | enum btrfs_reserve_flush_enum flush) |
3648 | { | 3648 | { |
3649 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3649 | u64 profile = btrfs_get_alloc_profile(root, 0); |
3650 | u64 avail; | 3650 | u64 avail; |
@@ -3672,7 +3672,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
3672 | * 1/2th of the space. If we can flush, don't let us overcommit | 3672 | * 1/2th of the space. If we can flush, don't let us overcommit |
3673 | * too much, let it overcommit up to 1/8 of the space. | 3673 | * too much, let it overcommit up to 1/8 of the space. |
3674 | */ | 3674 | */ |
3675 | if (flush) | 3675 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
3676 | avail >>= 3; | 3676 | avail >>= 3; |
3677 | else | 3677 | else |
3678 | avail >>= 1; | 3678 | avail >>= 1; |
@@ -3696,6 +3696,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3696 | long time_left; | 3696 | long time_left; |
3697 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3697 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
3698 | int loops = 0; | 3698 | int loops = 0; |
3699 | enum btrfs_reserve_flush_enum flush; | ||
3699 | 3700 | ||
3700 | trans = (struct btrfs_trans_handle *)current->journal_info; | 3701 | trans = (struct btrfs_trans_handle *)current->journal_info; |
3701 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3702 | block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -3723,8 +3724,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3723 | wait_event(root->fs_info->async_submit_wait, | 3724 | wait_event(root->fs_info->async_submit_wait, |
3724 | !atomic_read(&root->fs_info->async_delalloc_pages)); | 3725 | !atomic_read(&root->fs_info->async_delalloc_pages)); |
3725 | 3726 | ||
3727 | if (!trans) | ||
3728 | flush = BTRFS_RESERVE_FLUSH_ALL; | ||
3729 | else | ||
3730 | flush = BTRFS_RESERVE_NO_FLUSH; | ||
3726 | spin_lock(&space_info->lock); | 3731 | spin_lock(&space_info->lock); |
3727 | if (can_overcommit(root, space_info, orig, !trans)) { | 3732 | if (can_overcommit(root, space_info, orig, flush)) { |
3728 | spin_unlock(&space_info->lock); | 3733 | spin_unlock(&space_info->lock); |
3729 | break; | 3734 | break; |
3730 | } | 3735 | } |
@@ -3882,7 +3887,8 @@ static int flush_space(struct btrfs_root *root, | |||
3882 | */ | 3887 | */ |
3883 | static int reserve_metadata_bytes(struct btrfs_root *root, | 3888 | static int reserve_metadata_bytes(struct btrfs_root *root, |
3884 | struct btrfs_block_rsv *block_rsv, | 3889 | struct btrfs_block_rsv *block_rsv, |
3885 | u64 orig_bytes, int flush) | 3890 | u64 orig_bytes, |
3891 | enum btrfs_reserve_flush_enum flush) | ||
3886 | { | 3892 | { |
3887 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3893 | struct btrfs_space_info *space_info = block_rsv->space_info; |
3888 | u64 used; | 3894 | u64 used; |
@@ -3895,10 +3901,11 @@ again: | |||
3895 | ret = 0; | 3901 | ret = 0; |
3896 | spin_lock(&space_info->lock); | 3902 | spin_lock(&space_info->lock); |
3897 | /* | 3903 | /* |
3898 | * We only want to wait if somebody other than us is flushing and we are | 3904 | * We only want to wait if somebody other than us is flushing and we |
3899 | * actually alloed to flush. | 3905 | * are actually allowed to flush all things. |
3900 | */ | 3906 | */ |
3901 | while (flush && !flushing && space_info->flush) { | 3907 | while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && |
3908 | space_info->flush) { | ||
3902 | spin_unlock(&space_info->lock); | 3909 | spin_unlock(&space_info->lock); |
3903 | /* | 3910 | /* |
3904 | * If we have a trans handle we can't wait because the flusher | 3911 | * If we have a trans handle we can't wait because the flusher |
@@ -3964,23 +3971,40 @@ again: | |||
3964 | * Couldn't make our reservation, save our place so while we're trying | 3971 | * Couldn't make our reservation, save our place so while we're trying |
3965 | * to reclaim space we can actually use it instead of somebody else | 3972 | * to reclaim space we can actually use it instead of somebody else |
3966 | * stealing it from us. | 3973 | * stealing it from us. |
3974 | * | ||
3975 | * We make the other tasks wait for the flush only when we can flush | ||
3976 | * all things. | ||
3967 | */ | 3977 | */ |
3968 | if (ret && flush) { | 3978 | if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { |
3969 | flushing = true; | 3979 | flushing = true; |
3970 | space_info->flush = 1; | 3980 | space_info->flush = 1; |
3971 | } | 3981 | } |
3972 | 3982 | ||
3973 | spin_unlock(&space_info->lock); | 3983 | spin_unlock(&space_info->lock); |
3974 | 3984 | ||
3975 | if (!ret || !flush) | 3985 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
3976 | goto out; | 3986 | goto out; |
3977 | 3987 | ||
3978 | ret = flush_space(root, space_info, num_bytes, orig_bytes, | 3988 | ret = flush_space(root, space_info, num_bytes, orig_bytes, |
3979 | flush_state); | 3989 | flush_state); |
3980 | flush_state++; | 3990 | flush_state++; |
3991 | |||
3992 | /* | ||
3993 | * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock | ||
3994 | * would happen. So skip delalloc flush. | ||
3995 | */ | ||
3996 | if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | ||
3997 | (flush_state == FLUSH_DELALLOC || | ||
3998 | flush_state == FLUSH_DELALLOC_WAIT)) | ||
3999 | flush_state = ALLOC_CHUNK; | ||
4000 | |||
3981 | if (!ret) | 4001 | if (!ret) |
3982 | goto again; | 4002 | goto again; |
3983 | else if (flush_state <= COMMIT_TRANS) | 4003 | else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && |
4004 | flush_state < COMMIT_TRANS) | ||
4005 | goto again; | ||
4006 | else if (flush == BTRFS_RESERVE_FLUSH_ALL && | ||
4007 | flush_state <= COMMIT_TRANS) | ||
3984 | goto again; | 4008 | goto again; |
3985 | 4009 | ||
3986 | out: | 4010 | out: |
@@ -4131,9 +4155,9 @@ void btrfs_free_block_rsv(struct btrfs_root *root, | |||
4131 | kfree(rsv); | 4155 | kfree(rsv); |
4132 | } | 4156 | } |
4133 | 4157 | ||
4134 | static inline int __block_rsv_add(struct btrfs_root *root, | 4158 | int btrfs_block_rsv_add(struct btrfs_root *root, |
4135 | struct btrfs_block_rsv *block_rsv, | 4159 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
4136 | u64 num_bytes, int flush) | 4160 | enum btrfs_reserve_flush_enum flush) |
4137 | { | 4161 | { |
4138 | int ret; | 4162 | int ret; |
4139 | 4163 | ||
@@ -4149,20 +4173,6 @@ static inline int __block_rsv_add(struct btrfs_root *root, | |||
4149 | return ret; | 4173 | return ret; |
4150 | } | 4174 | } |
4151 | 4175 | ||
4152 | int btrfs_block_rsv_add(struct btrfs_root *root, | ||
4153 | struct btrfs_block_rsv *block_rsv, | ||
4154 | u64 num_bytes) | ||
4155 | { | ||
4156 | return __block_rsv_add(root, block_rsv, num_bytes, 1); | ||
4157 | } | ||
4158 | |||
4159 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | ||
4160 | struct btrfs_block_rsv *block_rsv, | ||
4161 | u64 num_bytes) | ||
4162 | { | ||
4163 | return __block_rsv_add(root, block_rsv, num_bytes, 0); | ||
4164 | } | ||
4165 | |||
4166 | int btrfs_block_rsv_check(struct btrfs_root *root, | 4176 | int btrfs_block_rsv_check(struct btrfs_root *root, |
4167 | struct btrfs_block_rsv *block_rsv, int min_factor) | 4177 | struct btrfs_block_rsv *block_rsv, int min_factor) |
4168 | { | 4178 | { |
@@ -4181,9 +4191,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, | |||
4181 | return ret; | 4191 | return ret; |
4182 | } | 4192 | } |
4183 | 4193 | ||
4184 | static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, | 4194 | int btrfs_block_rsv_refill(struct btrfs_root *root, |
4185 | struct btrfs_block_rsv *block_rsv, | 4195 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
4186 | u64 min_reserved, int flush) | 4196 | enum btrfs_reserve_flush_enum flush) |
4187 | { | 4197 | { |
4188 | u64 num_bytes = 0; | 4198 | u64 num_bytes = 0; |
4189 | int ret = -ENOSPC; | 4199 | int ret = -ENOSPC; |
@@ -4211,20 +4221,6 @@ static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, | |||
4211 | return ret; | 4221 | return ret; |
4212 | } | 4222 | } |
4213 | 4223 | ||
4214 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
4215 | struct btrfs_block_rsv *block_rsv, | ||
4216 | u64 min_reserved) | ||
4217 | { | ||
4218 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); | ||
4219 | } | ||
4220 | |||
4221 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
4222 | struct btrfs_block_rsv *block_rsv, | ||
4223 | u64 min_reserved) | ||
4224 | { | ||
4225 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); | ||
4226 | } | ||
4227 | |||
4228 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 4224 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
4229 | struct btrfs_block_rsv *dst_rsv, | 4225 | struct btrfs_block_rsv *dst_rsv, |
4230 | u64 num_bytes) | 4226 | u64 num_bytes) |
@@ -4515,14 +4511,15 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4515 | u64 csum_bytes; | 4511 | u64 csum_bytes; |
4516 | unsigned nr_extents = 0; | 4512 | unsigned nr_extents = 0; |
4517 | int extra_reserve = 0; | 4513 | int extra_reserve = 0; |
4518 | int flush = 1; | 4514 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
4519 | int ret; | 4515 | int ret; |
4520 | 4516 | ||
4521 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 4517 | /* Need to be holding the i_mutex here if we aren't free space cache */ |
4522 | if (btrfs_is_free_space_inode(inode)) | 4518 | if (btrfs_is_free_space_inode(inode)) |
4523 | flush = 0; | 4519 | flush = BTRFS_RESERVE_NO_FLUSH; |
4524 | 4520 | ||
4525 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4521 | if (flush != BTRFS_RESERVE_NO_FLUSH && |
4522 | btrfs_transaction_in_commit(root->fs_info)) | ||
4526 | schedule_timeout(1); | 4523 | schedule_timeout(1); |
4527 | 4524 | ||
4528 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); | 4525 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); |
@@ -6252,7 +6249,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
6252 | block_rsv = get_block_rsv(trans, root); | 6249 | block_rsv = get_block_rsv(trans, root); |
6253 | 6250 | ||
6254 | if (block_rsv->size == 0) { | 6251 | if (block_rsv->size == 0) { |
6255 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | 6252 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
6253 | BTRFS_RESERVE_NO_FLUSH); | ||
6256 | /* | 6254 | /* |
6257 | * If we couldn't reserve metadata bytes try and use some from | 6255 | * If we couldn't reserve metadata bytes try and use some from |
6258 | * the global reserve. | 6256 | * the global reserve. |
@@ -6279,7 +6277,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
6279 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); | 6277 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); |
6280 | WARN_ON(1); | 6278 | WARN_ON(1); |
6281 | } | 6279 | } |
6282 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | 6280 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
6281 | BTRFS_RESERVE_NO_FLUSH); | ||
6283 | if (!ret) { | 6282 | if (!ret) { |
6284 | return block_rsv; | 6283 | return block_rsv; |
6285 | } else if (ret && block_rsv != global_rsv) { | 6284 | } else if (ret && block_rsv != global_rsv) { |