diff options
| author | Zheng Liu <wenqing.lz@taobao.com> | 2012-07-22 20:19:31 -0400 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2012-07-22 20:19:31 -0400 |
| commit | 4bd809dbbf177ad0c450d702466b1da63e1b4b7e (patch) | |
| tree | 80493da203ac42a448489bc8e589207b9f735c2b /fs/ext4 | |
| parent | 729f52c6be51013c9268e5fc85acbc1091286fdb (diff) | |
ext4: don't take the i_mutex lock when doing DIO overwrites
Aligned and overwrite direct I/O can be parallelized. In
ext4_file_dio_write, we first check whether these conditions are
satisfied or not. If so, we take i_data_sem and release i_mutex lock
directly. Meanwhile iocb->private is set to indicate that this is a
dio overwrite, and it will be handled in ext4_ext_direct_IO.
[ Added fix from Dan Carpenter to fix locking bug on the error path. ]
CC: Tao Ma <tm@tao.ma>
CC: Eric Sandeen <sandeen@redhat.com>
CC: Robin Dong <hao.bigrat@gmail.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Diffstat (limited to 'fs/ext4')
| -rw-r--r-- | fs/ext4/file.c | 51 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 24 |
2 files changed, 71 insertions, 4 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index a10dc7742aec..1c81509f5bd9 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -93,9 +93,13 @@ static ssize_t | |||
| 93 | ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | 93 | ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, |
| 94 | unsigned long nr_segs, loff_t pos) | 94 | unsigned long nr_segs, loff_t pos) |
| 95 | { | 95 | { |
| 96 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 96 | struct file *file = iocb->ki_filp; |
| 97 | struct inode *inode = file->f_mapping->host; | ||
| 98 | struct blk_plug plug; | ||
| 97 | int unaligned_aio = 0; | 99 | int unaligned_aio = 0; |
| 98 | ssize_t ret; | 100 | ssize_t ret; |
| 101 | int overwrite = 0; | ||
| 102 | size_t length = iov_length(iov, nr_segs); | ||
| 99 | 103 | ||
| 100 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && | 104 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && |
| 101 | !is_sync_kiocb(iocb)) | 105 | !is_sync_kiocb(iocb)) |
| @@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 115 | ext4_aiodio_wait(inode); | 119 | ext4_aiodio_wait(inode); |
| 116 | } | 120 | } |
| 117 | 121 | ||
| 118 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 122 | BUG_ON(iocb->ki_pos != pos); |
| 123 | |||
| 124 | mutex_lock(&inode->i_mutex); | ||
| 125 | blk_start_plug(&plug); | ||
| 126 | |||
| 127 | iocb->private = &overwrite; | ||
| 128 | |||
| 129 | /* check whether we do a DIO overwrite or not */ | ||
| 130 | if (ext4_should_dioread_nolock(inode) && !unaligned_aio && | ||
| 131 | !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { | ||
| 132 | struct ext4_map_blocks map; | ||
| 133 | unsigned int blkbits = inode->i_blkbits; | ||
| 134 | int err, len; | ||
| 135 | |||
| 136 | map.m_lblk = pos >> blkbits; | ||
| 137 | map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) | ||
| 138 | - map.m_lblk; | ||
| 139 | len = map.m_len; | ||
| 140 | |||
| 141 | err = ext4_map_blocks(NULL, inode, &map, 0); | ||
| 142 | /* | ||
| 143 | * 'err==len' means that all of blocks has been preallocated no | ||
| 144 | * matter they are initialized or not. For excluding | ||
| 145 | * uninitialized extents, we need to check m_flags. There are | ||
| 146 | * two conditions that indicate for initialized extents. | ||
| 147 | * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned; | ||
| 148 | * 2) If we do a real lookup, non-flags are returned. | ||
| 149 | * So we should check these two conditions. | ||
| 150 | */ | ||
| 151 | if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) | ||
| 152 | overwrite = 1; | ||
| 153 | } | ||
| 154 | |||
| 155 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | ||
| 156 | mutex_unlock(&inode->i_mutex); | ||
| 157 | |||
| 158 | if (ret > 0 || ret == -EIOCBQUEUED) { | ||
| 159 | ssize_t err; | ||
| 160 | |||
| 161 | err = generic_write_sync(file, pos, ret); | ||
| 162 | if (err < 0 && ret > 0) | ||
| 163 | ret = err; | ||
| 164 | } | ||
| 165 | blk_finish_plug(&plug); | ||
| 119 | 166 | ||
| 120 | if (unaligned_aio) | 167 | if (unaligned_aio) |
| 121 | mutex_unlock(ext4_aio_mutex(inode)); | 168 | mutex_unlock(ext4_aio_mutex(inode)); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 76cb3b1ad78a..bed574dd4c22 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 2996 | if (rw == WRITE && final_size <= inode->i_size) { | 2996 | if (rw == WRITE && final_size <= inode->i_size) { |
| 2997 | int overwrite = 0; | 2997 | int overwrite = 0; |
| 2998 | 2998 | ||
| 2999 | BUG_ON(iocb->private == NULL); | ||
| 3000 | |||
| 3001 | /* If we do a overwrite dio, i_mutex locking can be released */ | ||
| 3002 | overwrite = *((int *)iocb->private); | ||
| 3003 | |||
| 3004 | if (overwrite) { | ||
| 3005 | down_read(&EXT4_I(inode)->i_data_sem); | ||
| 3006 | mutex_unlock(&inode->i_mutex); | ||
| 3007 | } | ||
| 3008 | |||
| 2999 | /* | 3009 | /* |
| 3000 | * We could direct write to holes and fallocate. | 3010 | * We could direct write to holes and fallocate. |
| 3001 | * | 3011 | * |
| @@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3021 | if (!is_sync_kiocb(iocb)) { | 3031 | if (!is_sync_kiocb(iocb)) { |
| 3022 | ext4_io_end_t *io_end = | 3032 | ext4_io_end_t *io_end = |
| 3023 | ext4_init_io_end(inode, GFP_NOFS); | 3033 | ext4_init_io_end(inode, GFP_NOFS); |
| 3024 | if (!io_end) | 3034 | if (!io_end) { |
| 3025 | return -ENOMEM; | 3035 | ret = -ENOMEM; |
| 3036 | goto retake_lock; | ||
| 3037 | } | ||
| 3026 | io_end->flag |= EXT4_IO_END_DIRECT; | 3038 | io_end->flag |= EXT4_IO_END_DIRECT; |
| 3027 | iocb->private = io_end; | 3039 | iocb->private = io_end; |
| 3028 | /* | 3040 | /* |
| @@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3083 | ret = err; | 3095 | ret = err; |
| 3084 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3096 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
| 3085 | } | 3097 | } |
| 3098 | |||
| 3099 | retake_lock: | ||
| 3100 | /* take i_mutex locking again if we do a ovewrite dio */ | ||
| 3101 | if (overwrite) { | ||
| 3102 | up_read(&EXT4_I(inode)->i_data_sem); | ||
| 3103 | mutex_lock(&inode->i_mutex); | ||
| 3104 | } | ||
| 3105 | |||
| 3086 | return ret; | 3106 | return ret; |
| 3087 | } | 3107 | } |
| 3088 | 3108 | ||
