aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/inode.c98
-rw-r--r--fs/ext4/page-io.c121
3 files changed, 142 insertions, 85 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 019db3c1bc3b..82d2b6000a61 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,6 +209,7 @@ typedef struct ext4_io_end {
209 ssize_t size; /* size of the extent */ 209 ssize_t size; /* size of the extent */
210 struct kiocb *iocb; /* iocb struct for AIO */ 210 struct kiocb *iocb; /* iocb struct for AIO */
211 int result; /* error value for AIO */ 211 int result; /* error value for AIO */
212 atomic_t count; /* reference counter */
212} ext4_io_end_t; 213} ext4_io_end_t;
213 214
214struct ext4_io_submit { 215struct ext4_io_submit {
@@ -2648,11 +2649,14 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2648 2649
2649/* page-io.c */ 2650/* page-io.c */
2650extern int __init ext4_init_pageio(void); 2651extern int __init ext4_init_pageio(void);
2651extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2652extern void ext4_exit_pageio(void); 2652extern void ext4_exit_pageio(void);
2653extern void ext4_ioend_shutdown(struct inode *); 2653extern void ext4_ioend_shutdown(struct inode *);
2654extern void ext4_free_io_end(ext4_io_end_t *io);
2655extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2654extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2655extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
2656extern int ext4_put_io_end(ext4_io_end_t *io_end);
2657extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2658extern void ext4_io_submit_init(struct ext4_io_submit *io,
2659 struct writeback_control *wbc);
2656extern void ext4_end_io_work(struct work_struct *work); 2660extern void ext4_end_io_work(struct work_struct *work);
2657extern void ext4_io_submit(struct ext4_io_submit *io); 2661extern void ext4_io_submit(struct ext4_io_submit *io);
2658extern int ext4_bio_write_page(struct ext4_io_submit *io, 2662extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 38f03dcdc8be..13fbbaea10ea 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1465,6 +1465,8 @@ static void ext4_da_page_release_reservation(struct page *page,
1465 * Delayed allocation stuff 1465 * Delayed allocation stuff
1466 */ 1466 */
1467 1467
1468static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd);
1469
1468/* 1470/*
1469 * mpage_da_submit_io - walks through extent of pages and try to write 1471 * mpage_da_submit_io - walks through extent of pages and try to write
1470 * them with writepage() call back 1472 * them with writepage() call back
@@ -1493,7 +1495,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1493 struct ext4_io_submit io_submit; 1495 struct ext4_io_submit io_submit;
1494 1496
1495 BUG_ON(mpd->next_page <= mpd->first_page); 1497 BUG_ON(mpd->next_page <= mpd->first_page);
1496 memset(&io_submit, 0, sizeof(io_submit)); 1498 ext4_io_submit_init(&io_submit, mpd->wbc);
1499 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1500 if (!io_submit.io_end) {
1501 ext4_da_block_invalidatepages(mpd);
1502 return -ENOMEM;
1503 }
1497 /* 1504 /*
1498 * We need to start from the first_page to the next_page - 1 1505 * We need to start from the first_page to the next_page - 1
1499 * to make sure we also write the mapped dirty buffer_heads. 1506 * to make sure we also write the mapped dirty buffer_heads.
@@ -1581,6 +1588,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1581 pagevec_release(&pvec); 1588 pagevec_release(&pvec);
1582 } 1589 }
1583 ext4_io_submit(&io_submit); 1590 ext4_io_submit(&io_submit);
1591 /* Drop io_end reference we got from init */
1592 ext4_put_io_end_defer(io_submit.io_end);
1584 return ret; 1593 return ret;
1585} 1594}
1586 1595
@@ -2239,9 +2248,17 @@ static int ext4_writepage(struct page *page,
2239 */ 2248 */
2240 return __ext4_journalled_writepage(page, len); 2249 return __ext4_journalled_writepage(page, len);
2241 2250
2242 memset(&io_submit, 0, sizeof(io_submit)); 2251 ext4_io_submit_init(&io_submit, wbc);
2252 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
2253 if (!io_submit.io_end) {
2254 redirty_page_for_writepage(wbc, page);
2255 unlock_page(page);
2256 return -ENOMEM;
2257 }
2243 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 2258 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2244 ext4_io_submit(&io_submit); 2259 ext4_io_submit(&io_submit);
2260 /* Drop io_end reference we got from init */
2261 ext4_put_io_end_defer(io_submit.io_end);
2245 return ret; 2262 return ret;
2246} 2263}
2247 2264
@@ -3076,9 +3093,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3076 struct inode *inode = file_inode(iocb->ki_filp); 3093 struct inode *inode = file_inode(iocb->ki_filp);
3077 ext4_io_end_t *io_end = iocb->private; 3094 ext4_io_end_t *io_end = iocb->private;
3078 3095
3079 /* if not async direct IO or dio with 0 bytes write, just return */ 3096 /* if not async direct IO just return */
3080 if (!io_end || !size) 3097 if (!io_end) {
3081 goto out; 3098 inode_dio_done(inode);
3099 if (is_async)
3100 aio_complete(iocb, ret, 0);
3101 return;
3102 }
3082 3103
3083 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3104 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3084 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3105 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3086,25 +3107,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3086 size); 3107 size);
3087 3108
3088 iocb->private = NULL; 3109 iocb->private = NULL;
3089
3090 /* if not aio dio with unwritten extents, just free io and return */
3091 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3092 ext4_free_io_end(io_end);
3093out:
3094 inode_dio_done(inode);
3095 if (is_async)
3096 aio_complete(iocb, ret, 0);
3097 return;
3098 }
3099
3100 io_end->offset = offset; 3110 io_end->offset = offset;
3101 io_end->size = size; 3111 io_end->size = size;
3102 if (is_async) { 3112 if (is_async) {
3103 io_end->iocb = iocb; 3113 io_end->iocb = iocb;
3104 io_end->result = ret; 3114 io_end->result = ret;
3105 } 3115 }
3106 3116 ext4_put_io_end_defer(io_end);
3107 ext4_add_complete_io(io_end);
3108} 3117}
3109 3118
3110/* 3119/*
@@ -3138,6 +3147,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3138 get_block_t *get_block_func = NULL; 3147 get_block_t *get_block_func = NULL;
3139 int dio_flags = 0; 3148 int dio_flags = 0;
3140 loff_t final_size = offset + count; 3149 loff_t final_size = offset + count;
3150 ext4_io_end_t *io_end = NULL;
3141 3151
3142 /* Use the old path for reads and writes beyond i_size. */ 3152 /* Use the old path for reads and writes beyond i_size. */
3143 if (rw != WRITE || final_size > inode->i_size) 3153 if (rw != WRITE || final_size > inode->i_size)
@@ -3176,13 +3186,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3176 iocb->private = NULL; 3186 iocb->private = NULL;
3177 ext4_inode_aio_set(inode, NULL); 3187 ext4_inode_aio_set(inode, NULL);
3178 if (!is_sync_kiocb(iocb)) { 3188 if (!is_sync_kiocb(iocb)) {
3179 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); 3189 io_end = ext4_init_io_end(inode, GFP_NOFS);
3180 if (!io_end) { 3190 if (!io_end) {
3181 ret = -ENOMEM; 3191 ret = -ENOMEM;
3182 goto retake_lock; 3192 goto retake_lock;
3183 } 3193 }
3184 io_end->flag |= EXT4_IO_END_DIRECT; 3194 io_end->flag |= EXT4_IO_END_DIRECT;
3185 iocb->private = io_end; 3195 /*
3196 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3197 */
3198 iocb->private = ext4_get_io_end(io_end);
3186 /* 3199 /*
3187 * we save the io structure for current async direct 3200 * we save the io structure for current async direct
3188 * IO, so that later ext4_map_blocks() could flag the 3201 * IO, so that later ext4_map_blocks() could flag the
@@ -3206,26 +3219,35 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3206 NULL, 3219 NULL,
3207 dio_flags); 3220 dio_flags);
3208 3221
3209 if (iocb->private)
3210 ext4_inode_aio_set(inode, NULL);
3211 /* 3222 /*
3212 * The io_end structure takes a reference to the inode, that 3223 * Put our reference to io_end. This can free the io_end structure e.g.
3213 * structure needs to be destroyed and the reference to the 3224 * in sync IO case or in case of error. It can even perform extent
3214 * inode need to be dropped, when IO is complete, even with 0 3225 * conversion if all bios we submitted finished before we got here.
3215 * byte write, or failed. 3226 * Note that in that case iocb->private can be already set to NULL
3216 * 3227 * here.
3217 * In the successful AIO DIO case, the io_end structure will
3218 * be destroyed and the reference to the inode will be dropped
3219 * after the end_io call back function is called.
3220 *
3221 * In the case there is 0 byte write, or error case, since VFS
3222 * direct IO won't invoke the end_io call back function, we
3223 * need to free the end_io structure here.
3224 */ 3228 */
3225 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3229 if (io_end) {
3226 ext4_free_io_end(iocb->private); 3230 ext4_inode_aio_set(inode, NULL);
3227 iocb->private = NULL; 3231 ext4_put_io_end(io_end);
3228 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3232 /*
3233 * When no IO was submitted ext4_end_io_dio() was not
3234 * called so we have to put iocb's reference.
3235 */
3236 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
3237 WARN_ON(iocb->private != io_end);
3238 WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
3239 WARN_ON(io_end->iocb);
3240 /*
3241 * Generic code already did inode_dio_done() so we
3242 * have to clear EXT4_IO_END_DIRECT to not do it for
3243 * the second time.
3244 */
3245 io_end->flag = 0;
3246 ext4_put_io_end(io_end);
3247 iocb->private = NULL;
3248 }
3249 }
3250 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3229 EXT4_STATE_DIO_UNWRITTEN)) { 3251 EXT4_STATE_DIO_UNWRITTEN)) {
3230 int err; 3252 int err;
3231 /* 3253 /*
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4acf1f78881b..19599bded62a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -62,15 +62,28 @@ void ext4_ioend_shutdown(struct inode *inode)
62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
63} 63}
64 64
65void ext4_free_io_end(ext4_io_end_t *io) 65static void ext4_release_io_end(ext4_io_end_t *io_end)
66{ 66{
67 BUG_ON(!io); 67 BUG_ON(!list_empty(&io_end->list));
68 BUG_ON(!list_empty(&io->list)); 68 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
69 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 69
70 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
71 wake_up_all(ext4_ioend_wq(io_end->inode));
72 if (io_end->flag & EXT4_IO_END_DIRECT)
73 inode_dio_done(io_end->inode);
74 if (io_end->iocb)
75 aio_complete(io_end->iocb, io_end->result, 0);
76 kmem_cache_free(io_end_cachep, io_end);
77}
78
79static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
80{
81 struct inode *inode = io_end->inode;
70 82
71 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) 83 io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
72 wake_up_all(ext4_ioend_wq(io->inode)); 84 /* Wake up anyone waiting on unwritten extent conversion */
73 kmem_cache_free(io_end_cachep, io); 85 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
86 wake_up_all(ext4_ioend_wq(inode));
74} 87}
75 88
76/* check a range of space and convert unwritten extents to written. */ 89/* check a range of space and convert unwritten extents to written. */
@@ -93,13 +106,8 @@ static int ext4_end_io(ext4_io_end_t *io)
93 "(inode %lu, offset %llu, size %zd, error %d)", 106 "(inode %lu, offset %llu, size %zd, error %d)",
94 inode->i_ino, offset, size, ret); 107 inode->i_ino, offset, size, ret);
95 } 108 }
96 /* Wake up anyone waiting on unwritten extent conversion */ 109 ext4_clear_io_unwritten_flag(io);
97 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 110 ext4_release_io_end(io);
98 wake_up_all(ext4_ioend_wq(inode));
99 if (io->flag & EXT4_IO_END_DIRECT)
100 inode_dio_done(inode);
101 if (io->iocb)
102 aio_complete(io->iocb, io->result, 0);
103 return ret; 111 return ret;
104} 112}
105 113
@@ -130,7 +138,7 @@ static void dump_completed_IO(struct inode *inode)
130} 138}
131 139
132/* Add the io_end to per-inode completed end_io list. */ 140/* Add the io_end to per-inode completed end_io list. */
133void ext4_add_complete_io(ext4_io_end_t *io_end) 141static void ext4_add_complete_io(ext4_io_end_t *io_end)
134{ 142{
135 struct ext4_inode_info *ei = EXT4_I(io_end->inode); 143 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
136 struct workqueue_struct *wq; 144 struct workqueue_struct *wq;
@@ -167,8 +175,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
167 err = ext4_end_io(io); 175 err = ext4_end_io(io);
168 if (unlikely(!ret && err)) 176 if (unlikely(!ret && err))
169 ret = err; 177 ret = err;
170 io->flag &= ~EXT4_IO_END_UNWRITTEN;
171 ext4_free_io_end(io);
172 } 178 }
173 return ret; 179 return ret;
174} 180}
@@ -200,10 +206,43 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
200 atomic_inc(&EXT4_I(inode)->i_ioend_count); 206 atomic_inc(&EXT4_I(inode)->i_ioend_count);
201 io->inode = inode; 207 io->inode = inode;
202 INIT_LIST_HEAD(&io->list); 208 INIT_LIST_HEAD(&io->list);
209 atomic_set(&io->count, 1);
203 } 210 }
204 return io; 211 return io;
205} 212}
206 213
214void ext4_put_io_end_defer(ext4_io_end_t *io_end)
215{
216 if (atomic_dec_and_test(&io_end->count)) {
217 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
218 ext4_release_io_end(io_end);
219 return;
220 }
221 ext4_add_complete_io(io_end);
222 }
223}
224
225int ext4_put_io_end(ext4_io_end_t *io_end)
226{
227 int err = 0;
228
229 if (atomic_dec_and_test(&io_end->count)) {
230 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
231 err = ext4_convert_unwritten_extents(io_end->inode,
232 io_end->offset, io_end->size);
233 ext4_clear_io_unwritten_flag(io_end);
234 }
235 ext4_release_io_end(io_end);
236 }
237 return err;
238}
239
240ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
241{
242 atomic_inc(&io_end->count);
243 return io_end;
244}
245
207/* 246/*
208 * Print an buffer I/O error compatible with the fs/buffer.c. This 247 * Print an buffer I/O error compatible with the fs/buffer.c. This
209 * provides compatibility with dmesg scrapers that look for a specific 248 * provides compatibility with dmesg scrapers that look for a specific
@@ -286,12 +325,7 @@ static void ext4_end_bio(struct bio *bio, int error)
286 bi_sector >> (inode->i_blkbits - 9)); 325 bi_sector >> (inode->i_blkbits - 9));
287 } 326 }
288 327
289 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 328 ext4_put_io_end_defer(io_end);
290 ext4_free_io_end(io_end);
291 return;
292 }
293
294 ext4_add_complete_io(io_end);
295} 329}
296 330
297void ext4_io_submit(struct ext4_io_submit *io) 331void ext4_io_submit(struct ext4_io_submit *io)
@@ -305,40 +339,37 @@ void ext4_io_submit(struct ext4_io_submit *io)
305 bio_put(io->io_bio); 339 bio_put(io->io_bio);
306 } 340 }
307 io->io_bio = NULL; 341 io->io_bio = NULL;
308 io->io_op = 0; 342}
343
344void ext4_io_submit_init(struct ext4_io_submit *io,
345 struct writeback_control *wbc)
346{
347 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
348 io->io_bio = NULL;
309 io->io_end = NULL; 349 io->io_end = NULL;
310} 350}
311 351
312static int io_submit_init(struct ext4_io_submit *io, 352static int io_submit_init_bio(struct ext4_io_submit *io,
313 struct inode *inode, 353 struct buffer_head *bh)
314 struct writeback_control *wbc,
315 struct buffer_head *bh)
316{ 354{
317 ext4_io_end_t *io_end;
318 struct page *page = bh->b_page;
319 int nvecs = bio_get_nr_vecs(bh->b_bdev); 355 int nvecs = bio_get_nr_vecs(bh->b_bdev);
320 struct bio *bio; 356 struct bio *bio;
321 357
322 io_end = ext4_init_io_end(inode, GFP_NOFS);
323 if (!io_end)
324 return -ENOMEM;
325 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 358 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
326 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 359 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
327 bio->bi_bdev = bh->b_bdev; 360 bio->bi_bdev = bh->b_bdev;
328 bio->bi_private = io->io_end = io_end;
329 bio->bi_end_io = ext4_end_bio; 361 bio->bi_end_io = ext4_end_bio;
330 362 bio->bi_private = ext4_get_io_end(io->io_end);
331 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 363 if (!io->io_end->size)
332 364 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
365 + bh_offset(bh);
333 io->io_bio = bio; 366 io->io_bio = bio;
334 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
335 io->io_next_block = bh->b_blocknr; 367 io->io_next_block = bh->b_blocknr;
336 return 0; 368 return 0;
337} 369}
338 370
339static int io_submit_add_bh(struct ext4_io_submit *io, 371static int io_submit_add_bh(struct ext4_io_submit *io,
340 struct inode *inode, 372 struct inode *inode,
341 struct writeback_control *wbc,
342 struct buffer_head *bh) 373 struct buffer_head *bh)
343{ 374{
344 ext4_io_end_t *io_end; 375 ext4_io_end_t *io_end;
@@ -349,18 +380,18 @@ submit_and_retry:
349 ext4_io_submit(io); 380 ext4_io_submit(io);
350 } 381 }
351 if (io->io_bio == NULL) { 382 if (io->io_bio == NULL) {
352 ret = io_submit_init(io, inode, wbc, bh); 383 ret = io_submit_init_bio(io, bh);
353 if (ret) 384 if (ret)
354 return ret; 385 return ret;
355 } 386 }
387 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
388 if (ret != bh->b_size)
389 goto submit_and_retry;
356 io_end = io->io_end; 390 io_end = io->io_end;
357 if (test_clear_buffer_uninit(bh)) 391 if (test_clear_buffer_uninit(bh))
358 ext4_set_io_unwritten_flag(inode, io_end); 392 ext4_set_io_unwritten_flag(inode, io_end);
359 io->io_end->size += bh->b_size; 393 io_end->size += bh->b_size;
360 io->io_next_block++; 394 io->io_next_block++;
361 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
362 if (ret != bh->b_size)
363 goto submit_and_retry;
364 return 0; 395 return 0;
365} 396}
366 397
@@ -432,7 +463,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
432 do { 463 do {
433 if (!buffer_async_write(bh)) 464 if (!buffer_async_write(bh))
434 continue; 465 continue;
435 ret = io_submit_add_bh(io, inode, wbc, bh); 466 ret = io_submit_add_bh(io, inode, bh);
436 if (ret) { 467 if (ret) {
437 /* 468 /*
438 * We only get here on ENOMEM. Not much else 469 * We only get here on ENOMEM. Not much else