diff options
author | Chandan Rajendra <chandan@linux.vnet.ibm.com> | 2016-12-23 04:30:18 -0500 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2017-01-19 12:01:02 -0500 |
commit | 97dcdea076ecef41ea4aaa23d4397c2f622e4265 (patch) | |
tree | a851e5d1fec17e990a31866d011ca4771c45e95e | |
parent | 47b5d64691350e116093c9b47b55ca6b9433bc50 (diff) |
Btrfs: Fix deadlock between direct IO and fast fsync
The following deadlock is seen when executing generic/113 test,
---------------------------------------------------------+----------------------------------------------------
Direct I/O task Fast fsync task
---------------------------------------------------------+----------------------------------------------------
btrfs_direct_IO
__blockdev_direct_IO
do_blockdev_direct_IO
do_direct_IO
btrfs_get_blocks_direct
while (blocks needs to written)
get_more_blocks (first iteration)
btrfs_get_blocks_direct
btrfs_create_dio_extent
down_read(&BTRFS_I(inode) >dio_sem)
Create and add extent map and ordered extent
up_read(&BTRFS_I(inode) >dio_sem)
btrfs_sync_file
btrfs_log_dentry_safe
btrfs_log_inode_parent
btrfs_log_inode
btrfs_log_changed_extents
down_write(&BTRFS_I(inode) >dio_sem)
Collect new extent maps and ordered extents
wait for ordered extent completion
get_more_blocks (second iteration)
btrfs_get_blocks_direct
btrfs_create_dio_extent
down_read(&BTRFS_I(inode) >dio_sem)
--------------------------------------------------------------------------------------------------------------
In the above description, Btrfs direct I/O code path has not yet started
submitting bios for file range covered by the initial ordered
extent. Meanwhile, The fast fsync task obtains the write semaphore and
waits for I/O on the ordered extent to get completed. However, the
Direct I/O task is now blocked on obtaining the read semaphore.
To resolve the deadlock, this commit modifies the Direct I/O code path
to obtain the read semaphore before invoking
__blockdev_direct_IO(). The semaphore is then given up after
__blockdev_direct_IO() returns. This allows the Direct I/O code to
complete I/O on all the ordered extents it creates.
Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/inode.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 415a6ad5ba22..bed6cf5f67a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7215,7 +7215,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, | |||
7215 | struct extent_map *em = NULL; | 7215 | struct extent_map *em = NULL; |
7216 | int ret; | 7216 | int ret; |
7217 | 7217 | ||
7218 | down_read(&BTRFS_I(inode)->dio_sem); | ||
7219 | if (type != BTRFS_ORDERED_NOCOW) { | 7218 | if (type != BTRFS_ORDERED_NOCOW) { |
7220 | em = create_pinned_em(inode, start, len, orig_start, | 7219 | em = create_pinned_em(inode, start, len, orig_start, |
7221 | block_start, block_len, orig_block_len, | 7220 | block_start, block_len, orig_block_len, |
@@ -7234,7 +7233,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, | |||
7234 | em = ERR_PTR(ret); | 7233 | em = ERR_PTR(ret); |
7235 | } | 7234 | } |
7236 | out: | 7235 | out: |
7237 | up_read(&BTRFS_I(inode)->dio_sem); | ||
7238 | 7236 | ||
7239 | return em; | 7237 | return em; |
7240 | } | 7238 | } |
@@ -8695,6 +8693,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8695 | dio_data.unsubmitted_oe_range_start = (u64)offset; | 8693 | dio_data.unsubmitted_oe_range_start = (u64)offset; |
8696 | dio_data.unsubmitted_oe_range_end = (u64)offset; | 8694 | dio_data.unsubmitted_oe_range_end = (u64)offset; |
8697 | current->journal_info = &dio_data; | 8695 | current->journal_info = &dio_data; |
8696 | down_read(&BTRFS_I(inode)->dio_sem); | ||
8698 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, | 8697 | } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, |
8699 | &BTRFS_I(inode)->runtime_flags)) { | 8698 | &BTRFS_I(inode)->runtime_flags)) { |
8700 | inode_dio_end(inode); | 8699 | inode_dio_end(inode); |
@@ -8707,6 +8706,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8707 | iter, btrfs_get_blocks_direct, NULL, | 8706 | iter, btrfs_get_blocks_direct, NULL, |
8708 | btrfs_submit_direct, flags); | 8707 | btrfs_submit_direct, flags); |
8709 | if (iov_iter_rw(iter) == WRITE) { | 8708 | if (iov_iter_rw(iter) == WRITE) { |
8709 | up_read(&BTRFS_I(inode)->dio_sem); | ||
8710 | current->journal_info = NULL; | 8710 | current->journal_info = NULL; |
8711 | if (ret < 0 && ret != -EIOCBQUEUED) { | 8711 | if (ret < 0 && ret != -EIOCBQUEUED) { |
8712 | if (dio_data.reserve) | 8712 | if (dio_data.reserve) |