aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2013-02-08 02:04:11 -0500
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 12:59:48 -0500
commit38851cc19adbfa1def2b47106d8050a80e0a3673 (patch)
tree4efe5e9ff5c776b73ec149ce29dd8e67bc4394cc /fs/btrfs
parent2e60a51e62185cce48758e596ae7cb2da673b58f (diff)
Btrfs: implement unlocked dio write
This idea is from ext4. By this patch, we can make the dio write parallel, and improve the performance. But because we can not update isize without i_mutex, the unlocked dio write just can be done in front of the EOF. We needn't worry about the race between dio write and truncate, because the truncate need wait untill all the dio write end. And we also needn't worry about the race between dio write and punch hole, because we have extent lock to protect our operation. I ran fio to test the performance of this feature. == Hardware == CPU: Intel(R) Core(TM)2 Duo CPU E7500 @ 2.93GHz Mem: 2GB SSD: Intel X25-M 120GB (Test Partition: 60GB) == config file == [global] ioengine=psync direct=1 bs=4k size=32G runtime=60 directory=/mnt/btrfs/ filename=testfile group_reporting thread [file1] numjobs=1 # 2 4 rw=randwrite == result (KBps) == write 1 2 4 lock 24936 24738 24726 nolock 24962 30866 32101 == result (iops) == write 1 2 4 lock 6234 6184 6181 nolock 6240 7716 8025 Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/inode.c35
1 files changed, 23 insertions, 12 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6ee8f1063ff..fce61991213c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6677,28 +6677,36 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6677 struct inode *inode = file->f_mapping->host; 6677 struct inode *inode = file->f_mapping->host;
6678 size_t count = 0; 6678 size_t count = 0;
6679 int flags = 0; 6679 int flags = 0;
6680 bool wakeup = false; 6680 bool wakeup = true;
6681 bool relock = false;
6681 ssize_t ret; 6682 ssize_t ret;
6682 6683
6683 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6684 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6684 offset, nr_segs)) 6685 offset, nr_segs))
6685 return 0; 6686 return 0;
6686 6687
6688 atomic_inc(&inode->i_dio_count);
6689 smp_mb__after_atomic_inc();
6690
6687 if (rw & WRITE) { 6691 if (rw & WRITE) {
6688 count = iov_length(iov, nr_segs); 6692 count = iov_length(iov, nr_segs);
6693 /*
6694 * If the write DIO is beyond the EOF, we need update
6695 * the isize, but it is protected by i_mutex. So we can
6696 * not unlock the i_mutex at this case.
6697 */
6698 if (offset + count <= inode->i_size) {
6699 mutex_unlock(&inode->i_mutex);
6700 relock = true;
6701 }
6689 ret = btrfs_delalloc_reserve_space(inode, count); 6702 ret = btrfs_delalloc_reserve_space(inode, count);
6690 if (ret) 6703 if (ret)
6691 return ret; 6704 goto out;
6692 } else { 6705 } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
6693 atomic_inc(&inode->i_dio_count); 6706 &BTRFS_I(inode)->runtime_flags))) {
6694 smp_mb__after_atomic_inc(); 6707 inode_dio_done(inode);
6695 if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 6708 flags = DIO_LOCKING | DIO_SKIP_HOLES;
6696 &BTRFS_I(inode)->runtime_flags))) { 6709 wakeup = false;
6697 inode_dio_done(inode);
6698 flags = DIO_LOCKING | DIO_SKIP_HOLES;
6699 } else {
6700 wakeup = true;
6701 }
6702 } 6710 }
6703 6711
6704 ret = __blockdev_direct_IO(rw, iocb, inode, 6712 ret = __blockdev_direct_IO(rw, iocb, inode,
@@ -6717,8 +6725,11 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6717 } 6725 }
6718 btrfs_delalloc_release_metadata(inode, 0); 6726 btrfs_delalloc_release_metadata(inode, 0);
6719 } 6727 }
6728out:
6720 if (wakeup) 6729 if (wakeup)
6721 inode_dio_done(inode); 6730 inode_dio_done(inode);
6731 if (relock)
6732 mutex_lock(&inode->i_mutex);
6722 6733
6723 return ret; 6734 return ret;
6724} 6735}