aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorZheng Liu <wenqing.lz@taobao.com>2012-07-22 20:19:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-07-22 20:19:31 -0400
commit4bd809dbbf177ad0c450d702466b1da63e1b4b7e (patch)
tree80493da203ac42a448489bc8e589207b9f735c2b /fs/ext4
parent729f52c6be51013c9268e5fc85acbc1091286fdb (diff)
ext4: don't take the i_mutex lock when doing DIO overwrites
Aligned and overwrite direct I/O can be parallelized. In ext4_file_dio_write, we first check whether these conditions are satisfied or not. If so, we take i_data_sem and release i_mutex lock directly. Meanwhile iocb->private is set to indicate that this is a dio overwrite, and it will be handled in ext4_ext_direct_IO. [ Added fix from Dan Carpenter to fix locking bug on the error path. ] CC: Tao Ma <tm@tao.ma> CC: Eric Sandeen <sandeen@redhat.com> CC: Robin Dong <hao.bigrat@gmail.com> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/file.c51
-rw-r--r--fs/ext4/inode.c24
2 files changed, 71 insertions, 4 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a10dc7742aec..1c81509f5bd9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -93,9 +93,13 @@ static ssize_t
93ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, 93ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
94 unsigned long nr_segs, loff_t pos) 94 unsigned long nr_segs, loff_t pos)
95{ 95{
96 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 96 struct file *file = iocb->ki_filp;
97 struct inode *inode = file->f_mapping->host;
98 struct blk_plug plug;
97 int unaligned_aio = 0; 99 int unaligned_aio = 0;
98 ssize_t ret; 100 ssize_t ret;
101 int overwrite = 0;
102 size_t length = iov_length(iov, nr_segs);
99 103
100 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && 104 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
101 !is_sync_kiocb(iocb)) 105 !is_sync_kiocb(iocb))
@@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
115 ext4_aiodio_wait(inode); 119 ext4_aiodio_wait(inode);
116 } 120 }
117 121
118 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 122 BUG_ON(iocb->ki_pos != pos);
123
124 mutex_lock(&inode->i_mutex);
125 blk_start_plug(&plug);
126
127 iocb->private = &overwrite;
128
129 /* check whether we do a DIO overwrite or not */
130 if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
131 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
132 struct ext4_map_blocks map;
133 unsigned int blkbits = inode->i_blkbits;
134 int err, len;
135
136 map.m_lblk = pos >> blkbits;
137 map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
138 - map.m_lblk;
139 len = map.m_len;
140
141 err = ext4_map_blocks(NULL, inode, &map, 0);
142 /*
143 * 'err==len' means that all of blocks has been preallocated no
144 * matter they are initialized or not. For excluding
145 * uninitialized extents, we need to check m_flags. There are
146 * two conditions that indicate for initialized extents.
147 * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
148 * 2) If we do a real lookup, non-flags are returned.
149 * So we should check these two conditions.
150 */
151 if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
152 overwrite = 1;
153 }
154
155 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
156 mutex_unlock(&inode->i_mutex);
157
158 if (ret > 0 || ret == -EIOCBQUEUED) {
159 ssize_t err;
160
161 err = generic_write_sync(file, pos, ret);
162 if (err < 0 && ret > 0)
163 ret = err;
164 }
165 blk_finish_plug(&plug);
119 166
120 if (unaligned_aio) 167 if (unaligned_aio)
121 mutex_unlock(ext4_aio_mutex(inode)); 168 mutex_unlock(ext4_aio_mutex(inode));
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 76cb3b1ad78a..bed574dd4c22 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2996 if (rw == WRITE && final_size <= inode->i_size) { 2996 if (rw == WRITE && final_size <= inode->i_size) {
2997 int overwrite = 0; 2997 int overwrite = 0;
2998 2998
2999 BUG_ON(iocb->private == NULL);
3000
3001 /* If we do a overwrite dio, i_mutex locking can be released */
3002 overwrite = *((int *)iocb->private);
3003
3004 if (overwrite) {
3005 down_read(&EXT4_I(inode)->i_data_sem);
3006 mutex_unlock(&inode->i_mutex);
3007 }
3008
2999 /* 3009 /*
3000 * We could direct write to holes and fallocate. 3010 * We could direct write to holes and fallocate.
3001 * 3011 *
@@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3021 if (!is_sync_kiocb(iocb)) { 3031 if (!is_sync_kiocb(iocb)) {
3022 ext4_io_end_t *io_end = 3032 ext4_io_end_t *io_end =
3023 ext4_init_io_end(inode, GFP_NOFS); 3033 ext4_init_io_end(inode, GFP_NOFS);
3024 if (!io_end) 3034 if (!io_end) {
3025 return -ENOMEM; 3035 ret = -ENOMEM;
3036 goto retake_lock;
3037 }
3026 io_end->flag |= EXT4_IO_END_DIRECT; 3038 io_end->flag |= EXT4_IO_END_DIRECT;
3027 iocb->private = io_end; 3039 iocb->private = io_end;
3028 /* 3040 /*
@@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3083 ret = err; 3095 ret = err;
3084 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 3096 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3085 } 3097 }
3098
3099 retake_lock:
3100 /* take i_mutex locking again if we do a ovewrite dio */
3101 if (overwrite) {
3102 up_read(&EXT4_I(inode)->i_data_sem);
3103 mutex_lock(&inode->i_mutex);
3104 }
3105
3086 return ret; 3106 return ret;
3087 } 3107 }
3088 3108