diff options
author | Jan Kara <jack@suse.cz> | 2013-04-11 23:56:53 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-04-11 23:56:53 -0400 |
commit | 4eec708d263f0ee10861d69251708a225b64cac7 (patch) | |
tree | 2db2d3f0431d98ed83d7e7ad8fbeaca7d7a2dcc2 /fs/ext4/inode.c | |
parent | 0058f9658c94037173f7603fc8bae2007cc10253 (diff) |
ext4: use io_end for multiple bios
Change writeback path to create just one io_end structure for the
extent to which we submit IO and share it among bios writing that
extent. This prevents needless splitting and joining of unwritten
extents when they cannot be submitted as a single bio.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 85 |
1 files changed, 47 insertions, 38 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 62189c84175f..62492e954483 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1483,7 +1483,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1483 | struct ext4_io_submit io_submit; | 1483 | struct ext4_io_submit io_submit; |
1484 | 1484 | ||
1485 | BUG_ON(mpd->next_page <= mpd->first_page); | 1485 | BUG_ON(mpd->next_page <= mpd->first_page); |
1486 | memset(&io_submit, 0, sizeof(io_submit)); | 1486 | ext4_io_submit_init(&io_submit, mpd->wbc); |
1487 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
1488 | if (!io_submit.io_end) | ||
1489 | return -ENOMEM; | ||
1487 | /* | 1490 | /* |
1488 | * We need to start from the first_page to the next_page - 1 | 1491 | * We need to start from the first_page to the next_page - 1 |
1489 | * to make sure we also write the mapped dirty buffer_heads. | 1492 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -1571,6 +1574,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1571 | pagevec_release(&pvec); | 1574 | pagevec_release(&pvec); |
1572 | } | 1575 | } |
1573 | ext4_io_submit(&io_submit); | 1576 | ext4_io_submit(&io_submit); |
1577 | /* Drop io_end reference we got from init */ | ||
1578 | ext4_put_io_end_defer(io_submit.io_end); | ||
1574 | return ret; | 1579 | return ret; |
1575 | } | 1580 | } |
1576 | 1581 | ||
@@ -2229,9 +2234,16 @@ static int ext4_writepage(struct page *page, | |||
2229 | */ | 2234 | */ |
2230 | return __ext4_journalled_writepage(page, len); | 2235 | return __ext4_journalled_writepage(page, len); |
2231 | 2236 | ||
2232 | memset(&io_submit, 0, sizeof(io_submit)); | 2237 | ext4_io_submit_init(&io_submit, wbc); |
2238 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
2239 | if (!io_submit.io_end) { | ||
2240 | redirty_page_for_writepage(wbc, page); | ||
2241 | return -ENOMEM; | ||
2242 | } | ||
2233 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); | 2243 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
2234 | ext4_io_submit(&io_submit); | 2244 | ext4_io_submit(&io_submit); |
2245 | /* Drop io_end reference we got from init */ | ||
2246 | ext4_put_io_end_defer(io_submit.io_end); | ||
2235 | return ret; | 2247 | return ret; |
2236 | } | 2248 | } |
2237 | 2249 | ||
@@ -3062,9 +3074,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3062 | struct inode *inode = file_inode(iocb->ki_filp); | 3074 | struct inode *inode = file_inode(iocb->ki_filp); |
3063 | ext4_io_end_t *io_end = iocb->private; | 3075 | ext4_io_end_t *io_end = iocb->private; |
3064 | 3076 | ||
3065 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3077 | /* if not async direct IO just return */ |
3066 | if (!io_end || !size) | 3078 | if (!io_end) { |
3067 | goto out; | 3079 | inode_dio_done(inode); |
3080 | if (is_async) | ||
3081 | aio_complete(iocb, ret, 0); | ||
3082 | return; | ||
3083 | } | ||
3068 | 3084 | ||
3069 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3085 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3070 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3086 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
@@ -3072,25 +3088,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3072 | size); | 3088 | size); |
3073 | 3089 | ||
3074 | iocb->private = NULL; | 3090 | iocb->private = NULL; |
3075 | |||
3076 | /* if not aio dio with unwritten extents, just free io and return */ | ||
3077 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
3078 | ext4_free_io_end(io_end); | ||
3079 | out: | ||
3080 | inode_dio_done(inode); | ||
3081 | if (is_async) | ||
3082 | aio_complete(iocb, ret, 0); | ||
3083 | return; | ||
3084 | } | ||
3085 | |||
3086 | io_end->offset = offset; | 3091 | io_end->offset = offset; |
3087 | io_end->size = size; | 3092 | io_end->size = size; |
3088 | if (is_async) { | 3093 | if (is_async) { |
3089 | io_end->iocb = iocb; | 3094 | io_end->iocb = iocb; |
3090 | io_end->result = ret; | 3095 | io_end->result = ret; |
3091 | } | 3096 | } |
3092 | 3097 | ext4_put_io_end_defer(io_end); | |
3093 | ext4_add_complete_io(io_end); | ||
3094 | } | 3098 | } |
3095 | 3099 | ||
3096 | /* | 3100 | /* |
@@ -3124,6 +3128,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3124 | get_block_t *get_block_func = NULL; | 3128 | get_block_t *get_block_func = NULL; |
3125 | int dio_flags = 0; | 3129 | int dio_flags = 0; |
3126 | loff_t final_size = offset + count; | 3130 | loff_t final_size = offset + count; |
3131 | ext4_io_end_t *io_end = NULL; | ||
3127 | 3132 | ||
3128 | /* Use the old path for reads and writes beyond i_size. */ | 3133 | /* Use the old path for reads and writes beyond i_size. */ |
3129 | if (rw != WRITE || final_size > inode->i_size) | 3134 | if (rw != WRITE || final_size > inode->i_size) |
@@ -3162,13 +3167,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3162 | iocb->private = NULL; | 3167 | iocb->private = NULL; |
3163 | ext4_inode_aio_set(inode, NULL); | 3168 | ext4_inode_aio_set(inode, NULL); |
3164 | if (!is_sync_kiocb(iocb)) { | 3169 | if (!is_sync_kiocb(iocb)) { |
3165 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | 3170 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
3166 | if (!io_end) { | 3171 | if (!io_end) { |
3167 | ret = -ENOMEM; | 3172 | ret = -ENOMEM; |
3168 | goto retake_lock; | 3173 | goto retake_lock; |
3169 | } | 3174 | } |
3170 | io_end->flag |= EXT4_IO_END_DIRECT; | 3175 | io_end->flag |= EXT4_IO_END_DIRECT; |
3171 | iocb->private = io_end; | 3176 | /* |
3177 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
3178 | */ | ||
3179 | iocb->private = ext4_get_io_end(io_end); | ||
3172 | /* | 3180 | /* |
3173 | * we save the io structure for current async direct | 3181 | * we save the io structure for current async direct |
3174 | * IO, so that later ext4_map_blocks() could flag the | 3182 | * IO, so that later ext4_map_blocks() could flag the |
@@ -3192,26 +3200,27 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3192 | NULL, | 3200 | NULL, |
3193 | dio_flags); | 3201 | dio_flags); |
3194 | 3202 | ||
3195 | if (iocb->private) | ||
3196 | ext4_inode_aio_set(inode, NULL); | ||
3197 | /* | 3203 | /* |
3198 | * The io_end structure takes a reference to the inode, that | 3204 | * Put our reference to io_end. This can free the io_end structure e.g. |
3199 | * structure needs to be destroyed and the reference to the | 3205 | * in sync IO case or in case of error. It can even perform extent |
3200 | * inode need to be dropped, when IO is complete, even with 0 | 3206 | * conversion if all bios we submitted finished before we got here. |
3201 | * byte write, or failed. | 3207 | * Note that in that case iocb->private can be already set to NULL |
3202 | * | 3208 | * here. |
3203 | * In the successful AIO DIO case, the io_end structure will | ||
3204 | * be destroyed and the reference to the inode will be dropped | ||
3205 | * after the end_io call back function is called. | ||
3206 | * | ||
3207 | * In the case there is 0 byte write, or error case, since VFS | ||
3208 | * direct IO won't invoke the end_io call back function, we | ||
3209 | * need to free the end_io structure here. | ||
3210 | */ | 3209 | */ |
3211 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3210 | if (io_end) { |
3212 | ext4_free_io_end(iocb->private); | 3211 | ext4_inode_aio_set(inode, NULL); |
3213 | iocb->private = NULL; | 3212 | ext4_put_io_end(io_end); |
3214 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3213 | /* |
3214 | * In case of error or no write ext4_end_io_dio() was not | ||
3215 | * called so we have to put iocb's reference. | ||
3216 | */ | ||
3217 | if (ret <= 0 && ret != -EIOCBQUEUED) { | ||
3218 | WARN_ON(iocb->private != io_end); | ||
3219 | ext4_put_io_end(io_end); | ||
3220 | iocb->private = NULL; | ||
3221 | } | ||
3222 | } | ||
3223 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3215 | EXT4_STATE_DIO_UNWRITTEN)) { | 3224 | EXT4_STATE_DIO_UNWRITTEN)) { |
3216 | int err; | 3225 | int err; |
3217 | /* | 3226 | /* |