aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-04-11 23:56:53 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-11 23:56:53 -0400
commit4eec708d263f0ee10861d69251708a225b64cac7 (patch)
tree2db2d3f0431d98ed83d7e7ad8fbeaca7d7a2dcc2 /fs/ext4/inode.c
parent0058f9658c94037173f7603fc8bae2007cc10253 (diff)
ext4: use io_end for multiple bios
Change writeback path to create just one io_end structure for the extent to which we submit IO and share it among bios writing that extent. This prevents needless splitting and joining of unwritten extents when they cannot be submitted as a single bio. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org> Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c85
1 files changed, 47 insertions, 38 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 62189c84175f..62492e954483 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1483,7 +1483,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1483 struct ext4_io_submit io_submit; 1483 struct ext4_io_submit io_submit;
1484 1484
1485 BUG_ON(mpd->next_page <= mpd->first_page); 1485 BUG_ON(mpd->next_page <= mpd->first_page);
1486 memset(&io_submit, 0, sizeof(io_submit)); 1486 ext4_io_submit_init(&io_submit, mpd->wbc);
1487 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1488 if (!io_submit.io_end)
1489 return -ENOMEM;
1487 /* 1490 /*
1488 * We need to start from the first_page to the next_page - 1 1491 * We need to start from the first_page to the next_page - 1
1489 * to make sure we also write the mapped dirty buffer_heads. 1492 * to make sure we also write the mapped dirty buffer_heads.
@@ -1571,6 +1574,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1571 pagevec_release(&pvec); 1574 pagevec_release(&pvec);
1572 } 1575 }
1573 ext4_io_submit(&io_submit); 1576 ext4_io_submit(&io_submit);
1577 /* Drop io_end reference we got from init */
1578 ext4_put_io_end_defer(io_submit.io_end);
1574 return ret; 1579 return ret;
1575} 1580}
1576 1581
@@ -2229,9 +2234,16 @@ static int ext4_writepage(struct page *page,
2229 */ 2234 */
2230 return __ext4_journalled_writepage(page, len); 2235 return __ext4_journalled_writepage(page, len);
2231 2236
2232 memset(&io_submit, 0, sizeof(io_submit)); 2237 ext4_io_submit_init(&io_submit, wbc);
2238 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
2239 if (!io_submit.io_end) {
2240 redirty_page_for_writepage(wbc, page);
2241 return -ENOMEM;
2242 }
2233 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 2243 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2234 ext4_io_submit(&io_submit); 2244 ext4_io_submit(&io_submit);
2245 /* Drop io_end reference we got from init */
2246 ext4_put_io_end_defer(io_submit.io_end);
2235 return ret; 2247 return ret;
2236} 2248}
2237 2249
@@ -3062,9 +3074,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3062 struct inode *inode = file_inode(iocb->ki_filp); 3074 struct inode *inode = file_inode(iocb->ki_filp);
3063 ext4_io_end_t *io_end = iocb->private; 3075 ext4_io_end_t *io_end = iocb->private;
3064 3076
3065 /* if not async direct IO or dio with 0 bytes write, just return */ 3077 /* if not async direct IO just return */
3066 if (!io_end || !size) 3078 if (!io_end) {
3067 goto out; 3079 inode_dio_done(inode);
3080 if (is_async)
3081 aio_complete(iocb, ret, 0);
3082 return;
3083 }
3068 3084
3069 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3085 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3070 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3086 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3072,25 +3088,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3072 size); 3088 size);
3073 3089
3074 iocb->private = NULL; 3090 iocb->private = NULL;
3075
3076 /* if not aio dio with unwritten extents, just free io and return */
3077 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3078 ext4_free_io_end(io_end);
3079out:
3080 inode_dio_done(inode);
3081 if (is_async)
3082 aio_complete(iocb, ret, 0);
3083 return;
3084 }
3085
3086 io_end->offset = offset; 3091 io_end->offset = offset;
3087 io_end->size = size; 3092 io_end->size = size;
3088 if (is_async) { 3093 if (is_async) {
3089 io_end->iocb = iocb; 3094 io_end->iocb = iocb;
3090 io_end->result = ret; 3095 io_end->result = ret;
3091 } 3096 }
3092 3097 ext4_put_io_end_defer(io_end);
3093 ext4_add_complete_io(io_end);
3094} 3098}
3095 3099
3096/* 3100/*
@@ -3124,6 +3128,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3124 get_block_t *get_block_func = NULL; 3128 get_block_t *get_block_func = NULL;
3125 int dio_flags = 0; 3129 int dio_flags = 0;
3126 loff_t final_size = offset + count; 3130 loff_t final_size = offset + count;
3131 ext4_io_end_t *io_end = NULL;
3127 3132
3128 /* Use the old path for reads and writes beyond i_size. */ 3133 /* Use the old path for reads and writes beyond i_size. */
3129 if (rw != WRITE || final_size > inode->i_size) 3134 if (rw != WRITE || final_size > inode->i_size)
@@ -3162,13 +3167,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3162 iocb->private = NULL; 3167 iocb->private = NULL;
3163 ext4_inode_aio_set(inode, NULL); 3168 ext4_inode_aio_set(inode, NULL);
3164 if (!is_sync_kiocb(iocb)) { 3169 if (!is_sync_kiocb(iocb)) {
3165 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); 3170 io_end = ext4_init_io_end(inode, GFP_NOFS);
3166 if (!io_end) { 3171 if (!io_end) {
3167 ret = -ENOMEM; 3172 ret = -ENOMEM;
3168 goto retake_lock; 3173 goto retake_lock;
3169 } 3174 }
3170 io_end->flag |= EXT4_IO_END_DIRECT; 3175 io_end->flag |= EXT4_IO_END_DIRECT;
3171 iocb->private = io_end; 3176 /*
3177 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3178 */
3179 iocb->private = ext4_get_io_end(io_end);
3172 /* 3180 /*
3173 * we save the io structure for current async direct 3181 * we save the io structure for current async direct
3174 * IO, so that later ext4_map_blocks() could flag the 3182 * IO, so that later ext4_map_blocks() could flag the
@@ -3192,26 +3200,27 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3192 NULL, 3200 NULL,
3193 dio_flags); 3201 dio_flags);
3194 3202
3195 if (iocb->private)
3196 ext4_inode_aio_set(inode, NULL);
3197 /* 3203 /*
3198 * The io_end structure takes a reference to the inode, that 3204 * Put our reference to io_end. This can free the io_end structure e.g.
3199 * structure needs to be destroyed and the reference to the 3205 * in sync IO case or in case of error. It can even perform extent
3200 * inode need to be dropped, when IO is complete, even with 0 3206 * conversion if all bios we submitted finished before we got here.
3201 * byte write, or failed. 3207 * Note that in that case iocb->private can be already set to NULL
3202 * 3208 * here.
3203 * In the successful AIO DIO case, the io_end structure will
3204 * be destroyed and the reference to the inode will be dropped
3205 * after the end_io call back function is called.
3206 *
3207 * In the case there is 0 byte write, or error case, since VFS
3208 * direct IO won't invoke the end_io call back function, we
3209 * need to free the end_io structure here.
3210 */ 3209 */
3211 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3210 if (io_end) {
3212 ext4_free_io_end(iocb->private); 3211 ext4_inode_aio_set(inode, NULL);
3213 iocb->private = NULL; 3212 ext4_put_io_end(io_end);
3214 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3213 /*
3214 * In case of error or no write ext4_end_io_dio() was not
3215 * called so we have to put iocb's reference.
3216 */
3217 if (ret <= 0 && ret != -EIOCBQUEUED) {
3218 WARN_ON(iocb->private != io_end);
3219 ext4_put_io_end(io_end);
3220 iocb->private = NULL;
3221 }
3222 }
3223 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3215 EXT4_STATE_DIO_UNWRITTEN)) { 3224 EXT4_STATE_DIO_UNWRITTEN)) {
3216 int err; 3225 int err;
3217 /* 3226 /*