aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2013-02-08 02:01:08 -0500
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 12:59:47 -0500
commit2e60a51e62185cce48758e596ae7cb2da673b58f (patch)
treebdbbac16110a3eeda8732c3ffb38a440204e2831
parent0934856d4697e63c14056375e26e3bd6e8ebd34b (diff)
Btrfs: serialize unlocked dio reads with truncate
Currently, we can do unlocked dio reads, but the following race is possible: dio_read_task truncate_task ->btrfs_setattr() ->btrfs_direct_IO ->__blockdev_direct_IO ->btrfs_get_block ->btrfs_truncate() #alloc truncated blocks #to other inode ->submit_io() #INFORMATION LEAK In order to avoid this problem, we must serialize unlocked dio reads with truncate. There are two approaches: - use extent lock to protect the extent that we truncate - use inode_dio_wait() to make sure the truncating task will wait for the read DIO. If we use the 1st one, we will meet the endless truncation problem due to the nonlocked read DIO after we implement the nonlocked write DIO. It is because we still need invoke inode_dio_wait() avoid the race between write DIO and truncation. By that time, we have to introduce btrfs_inode_{block, resume}_nolock_dio() again. That is we have to implement this patch again, so I choose the 2nd way to fix the problem. Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
-rw-r--r--fs/btrfs/btrfs_inode.h19
-rw-r--r--fs/btrfs/inode.c23
2 files changed, 40 insertions, 2 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c935a774a9f8..d9b97d4960e6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -41,6 +41,7 @@
41#define BTRFS_INODE_NEEDS_FULL_SYNC 7 41#define BTRFS_INODE_NEEDS_FULL_SYNC 7
42#define BTRFS_INODE_COPY_EVERYTHING 8 42#define BTRFS_INODE_COPY_EVERYTHING 8
43#define BTRFS_INODE_IN_DELALLOC_LIST 9 43#define BTRFS_INODE_IN_DELALLOC_LIST 9
44#define BTRFS_INODE_READDIO_NEED_LOCK 10
44 45
45/* in memory btrfs inode */ 46/* in memory btrfs inode */
46struct btrfs_inode { 47struct btrfs_inode {
@@ -217,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
217 return 0; 218 return 0;
218} 219}
219 220
221/*
222 * Disable DIO read nolock optimization, so new dio readers will be forced
223 * to grab i_mutex. It is used to avoid the endless truncate due to
224 * nonlocked dio read.
225 */
226static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
227{
228 set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
229 smp_mb();
230}
231
232static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
233{
234 smp_mb__before_clear_bit();
235 clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
236 &BTRFS_I(inode)->runtime_flags);
237}
238
220#endif 239#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d11f38d8696c..c6ee8f1063ff 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3888,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3888 3888
3889 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3889 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3890 truncate_setsize(inode, newsize); 3890 truncate_setsize(inode, newsize);
3891
3892 /* Disable nonlocked read DIO to avoid the end less truncate */
3893 btrfs_inode_block_unlocked_dio(inode);
3894 inode_dio_wait(inode);
3895 btrfs_inode_resume_unlocked_dio(inode);
3896
3891 ret = btrfs_truncate(inode); 3897 ret = btrfs_truncate(inode);
3892 if (ret && inode->i_nlink) 3898 if (ret && inode->i_nlink)
3893 btrfs_orphan_del(NULL, inode); 3899 btrfs_orphan_del(NULL, inode);
@@ -6670,6 +6676,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6670 struct file *file = iocb->ki_filp; 6676 struct file *file = iocb->ki_filp;
6671 struct inode *inode = file->f_mapping->host; 6677 struct inode *inode = file->f_mapping->host;
6672 size_t count = 0; 6678 size_t count = 0;
6679 int flags = 0;
6680 bool wakeup = false;
6673 ssize_t ret; 6681 ssize_t ret;
6674 6682
6675 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6683 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
@@ -6681,13 +6689,22 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6681 ret = btrfs_delalloc_reserve_space(inode, count); 6689 ret = btrfs_delalloc_reserve_space(inode, count);
6682 if (ret) 6690 if (ret)
6683 return ret; 6691 return ret;
6692 } else {
6693 atomic_inc(&inode->i_dio_count);
6694 smp_mb__after_atomic_inc();
6695 if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
6696 &BTRFS_I(inode)->runtime_flags))) {
6697 inode_dio_done(inode);
6698 flags = DIO_LOCKING | DIO_SKIP_HOLES;
6699 } else {
6700 wakeup = true;
6701 }
6684 } 6702 }
6685 6703
6686 ret = __blockdev_direct_IO(rw, iocb, inode, 6704 ret = __blockdev_direct_IO(rw, iocb, inode,
6687 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6705 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6688 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6706 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6689 btrfs_submit_direct, 0); 6707 btrfs_submit_direct, flags);
6690
6691 if (rw & WRITE) { 6708 if (rw & WRITE) {
6692 if (ret < 0 && ret != -EIOCBQUEUED) 6709 if (ret < 0 && ret != -EIOCBQUEUED)
6693 btrfs_delalloc_release_space(inode, count); 6710 btrfs_delalloc_release_space(inode, count);
@@ -6700,6 +6717,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6700 } 6717 }
6701 btrfs_delalloc_release_metadata(inode, 0); 6718 btrfs_delalloc_release_metadata(inode, 0);
6702 } 6719 }
6720 if (wakeup)
6721 inode_dio_done(inode);
6703 6722
6704 return ret; 6723 return ret;
6705} 6724}