aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2013-02-07 05:12:07 -0500
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 12:59:45 -0500
commit0934856d4697e63c14056375e26e3bd6e8ebd34b (patch)
tree5b47949ee3edf90519e901c657b862fa24c1adcc /fs/btrfs
parent4a7d0f6854c4a4ad1dba00a3b128a32d39b9a742 (diff)
Btrfs: fix deadlock due to unsubmitted
The deadlock problem happened when running fsstress(a test program in LTP). Steps to reproduce: # mkfs.btrfs -b 100M <partition> # mount <partition> <mnt> # <Path>/fsstress -p 3 -n 10000000 -d <mnt> The reason is: btrfs_direct_IO() |->do_direct_IO() |->get_page() |->get_blocks() | |->btrfs_delalloc_resereve_space() | |->btrfs_add_ordered_extent() ------- Add a new ordered extent |->dio_send_cur_page(page0) -------------- We didn't submit bio here |->get_page() |->get_blocks() |->btrfs_delalloc_resereve_space() |->flush_space() |->btrfs_start_ordered_extent() |->wait_event() ---------- Wait the completion of the ordered extent that is mentioned above But because we didn't submit the bio that is mentioned above, the ordered extent can not complete, we would wait for its completion forever. There are two methods which can fix this deadlock problem: 1. submit the bio before we invoke get_blocks() 2. reserve the space before we do dio Though the 1st is the simplest way, we need modify the code of VFS, and it is likely to break contiguous requests, and introduce performance regression for the other filesystems. So we have to choose the 2nd way. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Cc: Josef Bacik <jbacik@fusionio.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode.c81
2 files changed, 43 insertions, 41 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1818dd90c27e..51e1151a0a02 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4748,7 +4748,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4748 spin_lock(&BTRFS_I(inode)->lock); 4748 spin_lock(&BTRFS_I(inode)->lock);
4749 dropped = drop_outstanding_extent(inode); 4749 dropped = drop_outstanding_extent(inode);
4750 4750
4751 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4751 if (num_bytes)
4752 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4752 spin_unlock(&BTRFS_I(inode)->lock); 4753 spin_unlock(&BTRFS_I(inode)->lock);
4753 if (dropped > 0) 4754 if (dropped > 0)
4754 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4755 to_free += btrfs_calc_trans_metadata_size(root, dropped);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16925807a9ed..d11f38d8696c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6059,16 +6059,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6059 u64 len = bh_result->b_size; 6059 u64 len = bh_result->b_size;
6060 struct btrfs_trans_handle *trans; 6060 struct btrfs_trans_handle *trans;
6061 int unlock_bits = EXTENT_LOCKED; 6061 int unlock_bits = EXTENT_LOCKED;
6062 int ret; 6062 int ret = 0;
6063 6063
6064 if (create) { 6064 if (create) {
6065 ret = btrfs_delalloc_reserve_space(inode, len); 6065 spin_lock(&BTRFS_I(inode)->lock);
6066 if (ret) 6066 BTRFS_I(inode)->outstanding_extents++;
6067 return ret; 6067 spin_unlock(&BTRFS_I(inode)->lock);
6068 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 6068 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
6069 } else { 6069 } else
6070 len = min_t(u64, len, root->sectorsize); 6070 len = min_t(u64, len, root->sectorsize);
6071 }
6072 6071
6073 lockstart = start; 6072 lockstart = start;
6074 lockend = start + len - 1; 6073 lockend = start + len - 1;
@@ -6080,14 +6079,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6080 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 6079 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
6081 return -ENOTBLK; 6080 return -ENOTBLK;
6082 6081
6083 if (create) {
6084 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6085 lockend, EXTENT_DELALLOC, NULL,
6086 &cached_state, GFP_NOFS);
6087 if (ret)
6088 goto unlock_err;
6089 }
6090
6091 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 6082 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
6092 if (IS_ERR(em)) { 6083 if (IS_ERR(em)) {
6093 ret = PTR_ERR(em); 6084 ret = PTR_ERR(em);
@@ -6119,7 +6110,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6119 if (!create && (em->block_start == EXTENT_MAP_HOLE || 6110 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
6120 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6111 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6121 free_extent_map(em); 6112 free_extent_map(em);
6122 ret = 0;
6123 goto unlock_err; 6113 goto unlock_err;
6124 } 6114 }
6125 6115
@@ -6217,6 +6207,11 @@ unlock:
6217 */ 6207 */
6218 if (start + len > i_size_read(inode)) 6208 if (start + len > i_size_read(inode))
6219 i_size_write(inode, start + len); 6209 i_size_write(inode, start + len);
6210
6211 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6212 lockstart + len - 1, EXTENT_DELALLOC, NULL,
6213 &cached_state, GFP_NOFS);
6214 BUG_ON(ret);
6220 } 6215 }
6221 6216
6222 /* 6217 /*
@@ -6225,24 +6220,9 @@ unlock:
6225 * aren't using if there is any left over space. 6220 * aren't using if there is any left over space.
6226 */ 6221 */
6227 if (lockstart < lockend) { 6222 if (lockstart < lockend) {
6228 if (create && len < lockend - lockstart) { 6223 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6229 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6224 lockend, unlock_bits, 1, 0,
6230 lockstart + len - 1, 6225 &cached_state, GFP_NOFS);
6231 unlock_bits | EXTENT_DEFRAG, 1, 0,
6232 &cached_state, GFP_NOFS);
6233 /*
6234 * Beside unlock, we also need to cleanup reserved space
6235 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6236 */
6237 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6238 lockstart + len, lockend,
6239 unlock_bits | EXTENT_DO_ACCOUNTING |
6240 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
6241 } else {
6242 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6243 lockend, unlock_bits, 1, 0,
6244 &cached_state, GFP_NOFS);
6245 }
6246 } else { 6226 } else {
6247 free_extent_state(cached_state); 6227 free_extent_state(cached_state);
6248 } 6228 }
@@ -6252,9 +6232,6 @@ unlock:
6252 return 0; 6232 return 0;
6253 6233
6254unlock_err: 6234unlock_err:
6255 if (create)
6256 unlock_bits |= EXTENT_DO_ACCOUNTING;
6257
6258 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6235 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6259 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 6236 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6260 return ret; 6237 return ret;
@@ -6692,15 +6669,39 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6692{ 6669{
6693 struct file *file = iocb->ki_filp; 6670 struct file *file = iocb->ki_filp;
6694 struct inode *inode = file->f_mapping->host; 6671 struct inode *inode = file->f_mapping->host;
6672 size_t count = 0;
6673 ssize_t ret;
6695 6674
6696 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6675 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6697 offset, nr_segs)) 6676 offset, nr_segs))
6698 return 0; 6677 return 0;
6699 6678
6700 return __blockdev_direct_IO(rw, iocb, inode, 6679 if (rw & WRITE) {
6701 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6680 count = iov_length(iov, nr_segs);
6702 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6681 ret = btrfs_delalloc_reserve_space(inode, count);
6703 btrfs_submit_direct, 0); 6682 if (ret)
6683 return ret;
6684 }
6685
6686 ret = __blockdev_direct_IO(rw, iocb, inode,
6687 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6688 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6689 btrfs_submit_direct, 0);
6690
6691 if (rw & WRITE) {
6692 if (ret < 0 && ret != -EIOCBQUEUED)
6693 btrfs_delalloc_release_space(inode, count);
6694 else if (ret > 0 && (size_t)ret < count) {
6695 spin_lock(&BTRFS_I(inode)->lock);
6696 BTRFS_I(inode)->outstanding_extents++;
6697 spin_unlock(&BTRFS_I(inode)->lock);
6698 btrfs_delalloc_release_space(inode,
6699 count - (size_t)ret);
6700 }
6701 btrfs_delalloc_release_metadata(inode, 0);
6702 }
6703
6704 return ret;
6704} 6705}
6705 6706
6706#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 6707#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)