aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-04-11 23:56:53 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-11 23:56:53 -0400
commit4eec708d263f0ee10861d69251708a225b64cac7 (patch)
tree2db2d3f0431d98ed83d7e7ad8fbeaca7d7a2dcc2 /fs/ext4
parent0058f9658c94037173f7603fc8bae2007cc10253 (diff)
ext4: use io_end for multiple bios
Change writeback path to create just one io_end structure for the extent to which we submit IO and share it among bios writing that extent. This prevents needless splitting and joining of unwritten extents when they cannot be submitted as a single bio. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org> Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/inode.c85
-rw-r--r--fs/ext4/page-io.c121
3 files changed, 128 insertions, 86 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3b41d4ae6f9d..779d26b7beff 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,6 +209,7 @@ typedef struct ext4_io_end {
209 ssize_t size; /* size of the extent */ 209 ssize_t size; /* size of the extent */
210 struct kiocb *iocb; /* iocb struct for AIO */ 210 struct kiocb *iocb; /* iocb struct for AIO */
211 int result; /* error value for AIO */ 211 int result; /* error value for AIO */
212 atomic_t count; /* reference counter */
212} ext4_io_end_t; 213} ext4_io_end_t;
213 214
214struct ext4_io_submit { 215struct ext4_io_submit {
@@ -2627,11 +2628,14 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2627 2628
2628/* page-io.c */ 2629/* page-io.c */
2629extern int __init ext4_init_pageio(void); 2630extern int __init ext4_init_pageio(void);
2630extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2631extern void ext4_exit_pageio(void); 2631extern void ext4_exit_pageio(void);
2632extern void ext4_ioend_shutdown(struct inode *); 2632extern void ext4_ioend_shutdown(struct inode *);
2633extern void ext4_free_io_end(ext4_io_end_t *io);
2634extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2633extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2634extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
2635extern int ext4_put_io_end(ext4_io_end_t *io_end);
2636extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2637extern void ext4_io_submit_init(struct ext4_io_submit *io,
2638 struct writeback_control *wbc);
2635extern void ext4_end_io_work(struct work_struct *work); 2639extern void ext4_end_io_work(struct work_struct *work);
2636extern void ext4_io_submit(struct ext4_io_submit *io); 2640extern void ext4_io_submit(struct ext4_io_submit *io);
2637extern int ext4_bio_write_page(struct ext4_io_submit *io, 2641extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 62189c84175f..62492e954483 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1483,7 +1483,10 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1483 struct ext4_io_submit io_submit; 1483 struct ext4_io_submit io_submit;
1484 1484
1485 BUG_ON(mpd->next_page <= mpd->first_page); 1485 BUG_ON(mpd->next_page <= mpd->first_page);
1486 memset(&io_submit, 0, sizeof(io_submit)); 1486 ext4_io_submit_init(&io_submit, mpd->wbc);
1487 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1488 if (!io_submit.io_end)
1489 return -ENOMEM;
1487 /* 1490 /*
1488 * We need to start from the first_page to the next_page - 1 1491 * We need to start from the first_page to the next_page - 1
1489 * to make sure we also write the mapped dirty buffer_heads. 1492 * to make sure we also write the mapped dirty buffer_heads.
@@ -1571,6 +1574,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1571 pagevec_release(&pvec); 1574 pagevec_release(&pvec);
1572 } 1575 }
1573 ext4_io_submit(&io_submit); 1576 ext4_io_submit(&io_submit);
1577 /* Drop io_end reference we got from init */
1578 ext4_put_io_end_defer(io_submit.io_end);
1574 return ret; 1579 return ret;
1575} 1580}
1576 1581
@@ -2229,9 +2234,16 @@ static int ext4_writepage(struct page *page,
2229 */ 2234 */
2230 return __ext4_journalled_writepage(page, len); 2235 return __ext4_journalled_writepage(page, len);
2231 2236
2232 memset(&io_submit, 0, sizeof(io_submit)); 2237 ext4_io_submit_init(&io_submit, wbc);
2238 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
2239 if (!io_submit.io_end) {
2240 redirty_page_for_writepage(wbc, page);
2241 return -ENOMEM;
2242 }
2233 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 2243 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2234 ext4_io_submit(&io_submit); 2244 ext4_io_submit(&io_submit);
2245 /* Drop io_end reference we got from init */
2246 ext4_put_io_end_defer(io_submit.io_end);
2235 return ret; 2247 return ret;
2236} 2248}
2237 2249
@@ -3062,9 +3074,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3062 struct inode *inode = file_inode(iocb->ki_filp); 3074 struct inode *inode = file_inode(iocb->ki_filp);
3063 ext4_io_end_t *io_end = iocb->private; 3075 ext4_io_end_t *io_end = iocb->private;
3064 3076
3065 /* if not async direct IO or dio with 0 bytes write, just return */ 3077 /* if not async direct IO just return */
3066 if (!io_end || !size) 3078 if (!io_end) {
3067 goto out; 3079 inode_dio_done(inode);
3080 if (is_async)
3081 aio_complete(iocb, ret, 0);
3082 return;
3083 }
3068 3084
3069 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3085 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3070 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3086 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3072,25 +3088,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3072 size); 3088 size);
3073 3089
3074 iocb->private = NULL; 3090 iocb->private = NULL;
3075
3076 /* if not aio dio with unwritten extents, just free io and return */
3077 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3078 ext4_free_io_end(io_end);
3079out:
3080 inode_dio_done(inode);
3081 if (is_async)
3082 aio_complete(iocb, ret, 0);
3083 return;
3084 }
3085
3086 io_end->offset = offset; 3091 io_end->offset = offset;
3087 io_end->size = size; 3092 io_end->size = size;
3088 if (is_async) { 3093 if (is_async) {
3089 io_end->iocb = iocb; 3094 io_end->iocb = iocb;
3090 io_end->result = ret; 3095 io_end->result = ret;
3091 } 3096 }
3092 3097 ext4_put_io_end_defer(io_end);
3093 ext4_add_complete_io(io_end);
3094} 3098}
3095 3099
3096/* 3100/*
@@ -3124,6 +3128,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3124 get_block_t *get_block_func = NULL; 3128 get_block_t *get_block_func = NULL;
3125 int dio_flags = 0; 3129 int dio_flags = 0;
3126 loff_t final_size = offset + count; 3130 loff_t final_size = offset + count;
3131 ext4_io_end_t *io_end = NULL;
3127 3132
3128 /* Use the old path for reads and writes beyond i_size. */ 3133 /* Use the old path for reads and writes beyond i_size. */
3129 if (rw != WRITE || final_size > inode->i_size) 3134 if (rw != WRITE || final_size > inode->i_size)
@@ -3162,13 +3167,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3162 iocb->private = NULL; 3167 iocb->private = NULL;
3163 ext4_inode_aio_set(inode, NULL); 3168 ext4_inode_aio_set(inode, NULL);
3164 if (!is_sync_kiocb(iocb)) { 3169 if (!is_sync_kiocb(iocb)) {
3165 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); 3170 io_end = ext4_init_io_end(inode, GFP_NOFS);
3166 if (!io_end) { 3171 if (!io_end) {
3167 ret = -ENOMEM; 3172 ret = -ENOMEM;
3168 goto retake_lock; 3173 goto retake_lock;
3169 } 3174 }
3170 io_end->flag |= EXT4_IO_END_DIRECT; 3175 io_end->flag |= EXT4_IO_END_DIRECT;
3171 iocb->private = io_end; 3176 /*
3177 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3178 */
3179 iocb->private = ext4_get_io_end(io_end);
3172 /* 3180 /*
3173 * we save the io structure for current async direct 3181 * we save the io structure for current async direct
3174 * IO, so that later ext4_map_blocks() could flag the 3182 * IO, so that later ext4_map_blocks() could flag the
@@ -3192,26 +3200,27 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3192 NULL, 3200 NULL,
3193 dio_flags); 3201 dio_flags);
3194 3202
3195 if (iocb->private)
3196 ext4_inode_aio_set(inode, NULL);
3197 /* 3203 /*
3198 * The io_end structure takes a reference to the inode, that 3204 * Put our reference to io_end. This can free the io_end structure e.g.
3199 * structure needs to be destroyed and the reference to the 3205 * in sync IO case or in case of error. It can even perform extent
3200 * inode need to be dropped, when IO is complete, even with 0 3206 * conversion if all bios we submitted finished before we got here.
3201 * byte write, or failed. 3207 * Note that in that case iocb->private can be already set to NULL
3202 * 3208 * here.
3203 * In the successful AIO DIO case, the io_end structure will
3204 * be destroyed and the reference to the inode will be dropped
3205 * after the end_io call back function is called.
3206 *
3207 * In the case there is 0 byte write, or error case, since VFS
3208 * direct IO won't invoke the end_io call back function, we
3209 * need to free the end_io structure here.
3210 */ 3209 */
3211 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3210 if (io_end) {
3212 ext4_free_io_end(iocb->private); 3211 ext4_inode_aio_set(inode, NULL);
3213 iocb->private = NULL; 3212 ext4_put_io_end(io_end);
3214 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3213 /*
3214 * In case of error or no write ext4_end_io_dio() was not
3215 * called so we have to put iocb's reference.
3216 */
3217 if (ret <= 0 && ret != -EIOCBQUEUED) {
3218 WARN_ON(iocb->private != io_end);
3219 ext4_put_io_end(io_end);
3220 iocb->private = NULL;
3221 }
3222 }
3223 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3215 EXT4_STATE_DIO_UNWRITTEN)) { 3224 EXT4_STATE_DIO_UNWRITTEN)) {
3216 int err; 3225 int err;
3217 /* 3226 /*
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1d98fcfc2ff0..14f9837350d1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -61,17 +61,28 @@ void ext4_ioend_shutdown(struct inode *inode)
61 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 61 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
62} 62}
63 63
64void ext4_free_io_end(ext4_io_end_t *io) 64static void ext4_release_io_end(ext4_io_end_t *io_end)
65{ 65{
66 int i; 66 BUG_ON(!list_empty(&io_end->list));
67 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
68
69 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
70 wake_up_all(ext4_ioend_wq(io_end->inode));
71 if (io_end->flag & EXT4_IO_END_DIRECT)
72 inode_dio_done(io_end->inode);
73 if (io_end->iocb)
74 aio_complete(io_end->iocb, io_end->result, 0);
75 kmem_cache_free(io_end_cachep, io_end);
76}
67 77
68 BUG_ON(!io); 78static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
69 BUG_ON(!list_empty(&io->list)); 79{
70 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 80 struct inode *inode = io_end->inode;
71 81
72 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) 82 io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
73 wake_up_all(ext4_ioend_wq(io->inode)); 83 /* Wake up anyone waiting on unwritten extent conversion */
74 kmem_cache_free(io_end_cachep, io); 84 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
85 wake_up_all(ext4_ioend_wq(inode));
75} 86}
76 87
77/* check a range of space and convert unwritten extents to written. */ 88/* check a range of space and convert unwritten extents to written. */
@@ -94,13 +105,8 @@ static int ext4_end_io(ext4_io_end_t *io)
94 "(inode %lu, offset %llu, size %zd, error %d)", 105 "(inode %lu, offset %llu, size %zd, error %d)",
95 inode->i_ino, offset, size, ret); 106 inode->i_ino, offset, size, ret);
96 } 107 }
97 /* Wake up anyone waiting on unwritten extent conversion */ 108 ext4_clear_io_unwritten_flag(io);
98 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 109 ext4_release_io_end(io);
99 wake_up_all(ext4_ioend_wq(inode));
100 if (io->flag & EXT4_IO_END_DIRECT)
101 inode_dio_done(inode);
102 if (io->iocb)
103 aio_complete(io->iocb, io->result, 0);
104 return ret; 110 return ret;
105} 111}
106 112
@@ -131,7 +137,7 @@ static void dump_completed_IO(struct inode *inode)
131} 137}
132 138
133/* Add the io_end to per-inode completed end_io list. */ 139/* Add the io_end to per-inode completed end_io list. */
134void ext4_add_complete_io(ext4_io_end_t *io_end) 140static void ext4_add_complete_io(ext4_io_end_t *io_end)
135{ 141{
136 struct ext4_inode_info *ei = EXT4_I(io_end->inode); 142 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
137 struct workqueue_struct *wq; 143 struct workqueue_struct *wq;
@@ -168,8 +174,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
168 err = ext4_end_io(io); 174 err = ext4_end_io(io);
169 if (unlikely(!ret && err)) 175 if (unlikely(!ret && err))
170 ret = err; 176 ret = err;
171 io->flag &= ~EXT4_IO_END_UNWRITTEN;
172 ext4_free_io_end(io);
173 } 177 }
174 return ret; 178 return ret;
175} 179}
@@ -201,10 +205,43 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
201 atomic_inc(&EXT4_I(inode)->i_ioend_count); 205 atomic_inc(&EXT4_I(inode)->i_ioend_count);
202 io->inode = inode; 206 io->inode = inode;
203 INIT_LIST_HEAD(&io->list); 207 INIT_LIST_HEAD(&io->list);
208 atomic_set(&io->count, 1);
204 } 209 }
205 return io; 210 return io;
206} 211}
207 212
213void ext4_put_io_end_defer(ext4_io_end_t *io_end)
214{
215 if (atomic_dec_and_test(&io_end->count)) {
216 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
217 ext4_release_io_end(io_end);
218 return;
219 }
220 ext4_add_complete_io(io_end);
221 }
222}
223
224int ext4_put_io_end(ext4_io_end_t *io_end)
225{
226 int err = 0;
227
228 if (atomic_dec_and_test(&io_end->count)) {
229 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
230 err = ext4_convert_unwritten_extents(io_end->inode,
231 io_end->offset, io_end->size);
232 ext4_clear_io_unwritten_flag(io_end);
233 }
234 ext4_release_io_end(io_end);
235 }
236 return err;
237}
238
239ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
240{
241 atomic_inc(&io_end->count);
242 return io_end;
243}
244
208/* 245/*
209 * Print an buffer I/O error compatible with the fs/buffer.c. This 246 * Print an buffer I/O error compatible with the fs/buffer.c. This
210 * provides compatibility with dmesg scrapers that look for a specific 247 * provides compatibility with dmesg scrapers that look for a specific
@@ -287,12 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
287 bi_sector >> (inode->i_blkbits - 9)); 324 bi_sector >> (inode->i_blkbits - 9));
288 } 325 }
289 326
290 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 327 ext4_put_io_end_defer(io_end);
291 ext4_free_io_end(io_end);
292 return;
293 }
294
295 ext4_add_complete_io(io_end);
296} 328}
297 329
298void ext4_io_submit(struct ext4_io_submit *io) 330void ext4_io_submit(struct ext4_io_submit *io)
@@ -306,40 +338,37 @@ void ext4_io_submit(struct ext4_io_submit *io)
306 bio_put(io->io_bio); 338 bio_put(io->io_bio);
307 } 339 }
308 io->io_bio = NULL; 340 io->io_bio = NULL;
309 io->io_op = 0; 341}
342
343void ext4_io_submit_init(struct ext4_io_submit *io,
344 struct writeback_control *wbc)
345{
346 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
347 io->io_bio = NULL;
310 io->io_end = NULL; 348 io->io_end = NULL;
311} 349}
312 350
313static int io_submit_init(struct ext4_io_submit *io, 351static int io_submit_init_bio(struct ext4_io_submit *io,
314 struct inode *inode, 352 struct buffer_head *bh)
315 struct writeback_control *wbc,
316 struct buffer_head *bh)
317{ 353{
318 ext4_io_end_t *io_end;
319 struct page *page = bh->b_page;
320 int nvecs = bio_get_nr_vecs(bh->b_bdev); 354 int nvecs = bio_get_nr_vecs(bh->b_bdev);
321 struct bio *bio; 355 struct bio *bio;
322 356
323 io_end = ext4_init_io_end(inode, GFP_NOFS);
324 if (!io_end)
325 return -ENOMEM;
326 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 357 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
327 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 358 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
328 bio->bi_bdev = bh->b_bdev; 359 bio->bi_bdev = bh->b_bdev;
329 bio->bi_private = io->io_end = io_end;
330 bio->bi_end_io = ext4_end_bio; 360 bio->bi_end_io = ext4_end_bio;
331 361 bio->bi_private = ext4_get_io_end(io->io_end);
332 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 362 if (!io->io_end->size)
333 363 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
364 + bh_offset(bh);
334 io->io_bio = bio; 365 io->io_bio = bio;
335 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
336 io->io_next_block = bh->b_blocknr; 366 io->io_next_block = bh->b_blocknr;
337 return 0; 367 return 0;
338} 368}
339 369
340static int io_submit_add_bh(struct ext4_io_submit *io, 370static int io_submit_add_bh(struct ext4_io_submit *io,
341 struct inode *inode, 371 struct inode *inode,
342 struct writeback_control *wbc,
343 struct buffer_head *bh) 372 struct buffer_head *bh)
344{ 373{
345 ext4_io_end_t *io_end; 374 ext4_io_end_t *io_end;
@@ -350,18 +379,18 @@ submit_and_retry:
350 ext4_io_submit(io); 379 ext4_io_submit(io);
351 } 380 }
352 if (io->io_bio == NULL) { 381 if (io->io_bio == NULL) {
353 ret = io_submit_init(io, inode, wbc, bh); 382 ret = io_submit_init_bio(io, bh);
354 if (ret) 383 if (ret)
355 return ret; 384 return ret;
356 } 385 }
386 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
387 if (ret != bh->b_size)
388 goto submit_and_retry;
357 io_end = io->io_end; 389 io_end = io->io_end;
358 if (buffer_uninit(bh)) 390 if (buffer_uninit(bh))
359 ext4_set_io_unwritten_flag(inode, io_end); 391 ext4_set_io_unwritten_flag(inode, io_end);
360 io->io_end->size += bh->b_size; 392 io_end->size += bh->b_size;
361 io->io_next_block++; 393 io->io_next_block++;
362 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
363 if (ret != bh->b_size)
364 goto submit_and_retry;
365 return 0; 394 return 0;
366} 395}
367 396
@@ -433,7 +462,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
433 do { 462 do {
434 if (!buffer_async_write(bh)) 463 if (!buffer_async_write(bh))
435 continue; 464 continue;
436 ret = io_submit_add_bh(io, inode, wbc, bh); 465 ret = io_submit_add_bh(io, inode, bh);
437 if (ret) { 466 if (ret) {
438 /* 467 /*
439 * We only get here on ENOMEM. Not much else 468 * We only get here on ENOMEM. Not much else