aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2013-05-11 19:07:42 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-05-11 19:07:42 -0400
commita549984b8c95acbecefd1fdd4bfdbea4d29b0588 (patch)
treeeb25bf90acc6c084de08616ebb26ee091158c46e /fs/ext4
parente6155736ad76b2070652745f9e54cdea3f0d8567 (diff)
ext4: revert "ext4: use io_end for multiple bios"
This reverts commit 4eec708d263f0ee10861d69251708a225b64cac7. Multiple users have reported crashes which is apparently caused by this commit. Thanks to Dmitry Monakhov for bisecting it. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: Dmitry Monakhov <dmonakhov@openvz.org> Cc: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/inode.c85
-rw-r--r--fs/ext4/page-io.c121
3 files changed, 85 insertions, 129 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0aabb344b02e..5aae3d12d400 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
209 ssize_t size; /* size of the extent */ 209 ssize_t size; /* size of the extent */
210 struct kiocb *iocb; /* iocb struct for AIO */ 210 struct kiocb *iocb; /* iocb struct for AIO */
211 int result; /* error value for AIO */ 211 int result; /* error value for AIO */
212 atomic_t count; /* reference counter */
213} ext4_io_end_t; 212} ext4_io_end_t;
214 213
215struct ext4_io_submit { 214struct ext4_io_submit {
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2651 2650
2652/* page-io.c */ 2651/* page-io.c */
2653extern int __init ext4_init_pageio(void); 2652extern int __init ext4_init_pageio(void);
2653extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2654extern void ext4_exit_pageio(void); 2654extern void ext4_exit_pageio(void);
2655extern void ext4_ioend_shutdown(struct inode *); 2655extern void ext4_ioend_shutdown(struct inode *);
2656extern void ext4_free_io_end(ext4_io_end_t *io);
2656extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2657extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2657extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
2658extern int ext4_put_io_end(ext4_io_end_t *io_end);
2659extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2660extern void ext4_io_submit_init(struct ext4_io_submit *io,
2661 struct writeback_control *wbc);
2662extern void ext4_end_io_work(struct work_struct *work); 2658extern void ext4_end_io_work(struct work_struct *work);
2663extern void ext4_io_submit(struct ext4_io_submit *io); 2659extern void ext4_io_submit(struct ext4_io_submit *io);
2664extern int ext4_bio_write_page(struct ext4_io_submit *io, 2660extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 793d44b84d7f..d66656992358 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1487,10 +1487,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1487 struct ext4_io_submit io_submit; 1487 struct ext4_io_submit io_submit;
1488 1488
1489 BUG_ON(mpd->next_page <= mpd->first_page); 1489 BUG_ON(mpd->next_page <= mpd->first_page);
1490 ext4_io_submit_init(&io_submit, mpd->wbc); 1490 memset(&io_submit, 0, sizeof(io_submit));
1491 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1492 if (!io_submit.io_end)
1493 return -ENOMEM;
1494 /* 1491 /*
1495 * We need to start from the first_page to the next_page - 1 1492 * We need to start from the first_page to the next_page - 1
1496 * to make sure we also write the mapped dirty buffer_heads. 1493 * to make sure we also write the mapped dirty buffer_heads.
@@ -1578,8 +1575,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1578 pagevec_release(&pvec); 1575 pagevec_release(&pvec);
1579 } 1576 }
1580 ext4_io_submit(&io_submit); 1577 ext4_io_submit(&io_submit);
1581 /* Drop io_end reference we got from init */
1582 ext4_put_io_end_defer(io_submit.io_end);
1583 return ret; 1578 return ret;
1584} 1579}
1585 1580
@@ -2238,16 +2233,9 @@ static int ext4_writepage(struct page *page,
2238 */ 2233 */
2239 return __ext4_journalled_writepage(page, len); 2234 return __ext4_journalled_writepage(page, len);
2240 2235
2241 ext4_io_submit_init(&io_submit, wbc); 2236 memset(&io_submit, 0, sizeof(io_submit));
2242 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
2243 if (!io_submit.io_end) {
2244 redirty_page_for_writepage(wbc, page);
2245 return -ENOMEM;
2246 }
2247 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 2237 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2248 ext4_io_submit(&io_submit); 2238 ext4_io_submit(&io_submit);
2249 /* Drop io_end reference we got from init */
2250 ext4_put_io_end_defer(io_submit.io_end);
2251 return ret; 2239 return ret;
2252} 2240}
2253 2241
@@ -3078,13 +3066,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3078 struct inode *inode = file_inode(iocb->ki_filp); 3066 struct inode *inode = file_inode(iocb->ki_filp);
3079 ext4_io_end_t *io_end = iocb->private; 3067 ext4_io_end_t *io_end = iocb->private;
3080 3068
3081 /* if not async direct IO just return */ 3069 /* if not async direct IO or dio with 0 bytes write, just return */
3082 if (!io_end) { 3070 if (!io_end || !size)
3083 inode_dio_done(inode); 3071 goto out;
3084 if (is_async)
3085 aio_complete(iocb, ret, 0);
3086 return;
3087 }
3088 3072
3089 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3073 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3090 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3074 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3092,13 +3076,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3092 size); 3076 size);
3093 3077
3094 iocb->private = NULL; 3078 iocb->private = NULL;
3079
3080 /* if not aio dio with unwritten extents, just free io and return */
3081 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3082 ext4_free_io_end(io_end);
3083out:
3084 inode_dio_done(inode);
3085 if (is_async)
3086 aio_complete(iocb, ret, 0);
3087 return;
3088 }
3089
3095 io_end->offset = offset; 3090 io_end->offset = offset;
3096 io_end->size = size; 3091 io_end->size = size;
3097 if (is_async) { 3092 if (is_async) {
3098 io_end->iocb = iocb; 3093 io_end->iocb = iocb;
3099 io_end->result = ret; 3094 io_end->result = ret;
3100 } 3095 }
3101 ext4_put_io_end_defer(io_end); 3096
3097 ext4_add_complete_io(io_end);
3102} 3098}
3103 3099
3104/* 3100/*
@@ -3132,7 +3128,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3132 get_block_t *get_block_func = NULL; 3128 get_block_t *get_block_func = NULL;
3133 int dio_flags = 0; 3129 int dio_flags = 0;
3134 loff_t final_size = offset + count; 3130 loff_t final_size = offset + count;
3135 ext4_io_end_t *io_end = NULL;
3136 3131
3137 /* Use the old path for reads and writes beyond i_size. */ 3132 /* Use the old path for reads and writes beyond i_size. */
3138 if (rw != WRITE || final_size > inode->i_size) 3133 if (rw != WRITE || final_size > inode->i_size)
@@ -3171,16 +3166,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3171 iocb->private = NULL; 3166 iocb->private = NULL;
3172 ext4_inode_aio_set(inode, NULL); 3167 ext4_inode_aio_set(inode, NULL);
3173 if (!is_sync_kiocb(iocb)) { 3168 if (!is_sync_kiocb(iocb)) {
3174 io_end = ext4_init_io_end(inode, GFP_NOFS); 3169 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
3175 if (!io_end) { 3170 if (!io_end) {
3176 ret = -ENOMEM; 3171 ret = -ENOMEM;
3177 goto retake_lock; 3172 goto retake_lock;
3178 } 3173 }
3179 io_end->flag |= EXT4_IO_END_DIRECT; 3174 io_end->flag |= EXT4_IO_END_DIRECT;
3180 /* 3175 iocb->private = io_end;
3181 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3182 */
3183 iocb->private = ext4_get_io_end(io_end);
3184 /* 3176 /*
3185 * we save the io structure for current async direct 3177 * we save the io structure for current async direct
3186 * IO, so that later ext4_map_blocks() could flag the 3178 * IO, so that later ext4_map_blocks() could flag the
@@ -3204,27 +3196,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3204 NULL, 3196 NULL,
3205 dio_flags); 3197 dio_flags);
3206 3198
3199 if (iocb->private)
3200 ext4_inode_aio_set(inode, NULL);
3207 /* 3201 /*
3208 * Put our reference to io_end. This can free the io_end structure e.g. 3202 * The io_end structure takes a reference to the inode, that
3209 * in sync IO case or in case of error. It can even perform extent 3203 * structure needs to be destroyed and the reference to the
3210 * conversion if all bios we submitted finished before we got here. 3204 * inode need to be dropped, when IO is complete, even with 0
3211 * Note that in that case iocb->private can be already set to NULL 3205 * byte write, or failed.
3212 * here. 3206 *
3207 * In the successful AIO DIO case, the io_end structure will
3208 * be destroyed and the reference to the inode will be dropped
3209 * after the end_io call back function is called.
3210 *
3211 * In the case there is 0 byte write, or error case, since VFS
3212 * direct IO won't invoke the end_io call back function, we
3213 * need to free the end_io structure here.
3213 */ 3214 */
3214 if (io_end) { 3215 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3215 ext4_inode_aio_set(inode, NULL); 3216 ext4_free_io_end(iocb->private);
3216 ext4_put_io_end(io_end); 3217 iocb->private = NULL;
3217 /* 3218 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3218 * In case of error or no write ext4_end_io_dio() was not
3219 * called so we have to put iocb's reference.
3220 */
3221 if (ret <= 0 && ret != -EIOCBQUEUED) {
3222 WARN_ON(iocb->private != io_end);
3223 ext4_put_io_end(io_end);
3224 iocb->private = NULL;
3225 }
3226 }
3227 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3228 EXT4_STATE_DIO_UNWRITTEN)) { 3219 EXT4_STATE_DIO_UNWRITTEN)) {
3229 int err; 3220 int err;
3230 /* 3221 /*
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 5929cd0baa20..6626aba57ebb 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -61,28 +61,15 @@ void ext4_ioend_shutdown(struct inode *inode)
61 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 61 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
62} 62}
63 63
64static void ext4_release_io_end(ext4_io_end_t *io_end) 64void ext4_free_io_end(ext4_io_end_t *io)
65{ 65{
66 BUG_ON(!list_empty(&io_end->list)); 66 BUG_ON(!io);
67 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 67 BUG_ON(!list_empty(&io->list));
68 68 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
69 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
70 wake_up_all(ext4_ioend_wq(io_end->inode));
71 if (io_end->flag & EXT4_IO_END_DIRECT)
72 inode_dio_done(io_end->inode);
73 if (io_end->iocb)
74 aio_complete(io_end->iocb, io_end->result, 0);
75 kmem_cache_free(io_end_cachep, io_end);
76}
77
78static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
79{
80 struct inode *inode = io_end->inode;
81 69
82 io_end->flag &= ~EXT4_IO_END_UNWRITTEN; 70 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
83 /* Wake up anyone waiting on unwritten extent conversion */ 71 wake_up_all(ext4_ioend_wq(io->inode));
84 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 72 kmem_cache_free(io_end_cachep, io);
85 wake_up_all(ext4_ioend_wq(inode));
86} 73}
87 74
88/* check a range of space and convert unwritten extents to written. */ 75/* check a range of space and convert unwritten extents to written. */
@@ -105,8 +92,13 @@ static int ext4_end_io(ext4_io_end_t *io)
105 "(inode %lu, offset %llu, size %zd, error %d)", 92 "(inode %lu, offset %llu, size %zd, error %d)",
106 inode->i_ino, offset, size, ret); 93 inode->i_ino, offset, size, ret);
107 } 94 }
108 ext4_clear_io_unwritten_flag(io); 95 /* Wake up anyone waiting on unwritten extent conversion */
109 ext4_release_io_end(io); 96 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
97 wake_up_all(ext4_ioend_wq(inode));
98 if (io->flag & EXT4_IO_END_DIRECT)
99 inode_dio_done(inode);
100 if (io->iocb)
101 aio_complete(io->iocb, io->result, 0);
110 return ret; 102 return ret;
111} 103}
112 104
@@ -137,7 +129,7 @@ static void dump_completed_IO(struct inode *inode)
137} 129}
138 130
139/* Add the io_end to per-inode completed end_io list. */ 131/* Add the io_end to per-inode completed end_io list. */
140static void ext4_add_complete_io(ext4_io_end_t *io_end) 132void ext4_add_complete_io(ext4_io_end_t *io_end)
141{ 133{
142 struct ext4_inode_info *ei = EXT4_I(io_end->inode); 134 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
143 struct workqueue_struct *wq; 135 struct workqueue_struct *wq;
@@ -174,6 +166,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
174 err = ext4_end_io(io); 166 err = ext4_end_io(io);
175 if (unlikely(!ret && err)) 167 if (unlikely(!ret && err))
176 ret = err; 168 ret = err;
169 io->flag &= ~EXT4_IO_END_UNWRITTEN;
170 ext4_free_io_end(io);
177 } 171 }
178 return ret; 172 return ret;
179} 173}
@@ -205,43 +199,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
205 atomic_inc(&EXT4_I(inode)->i_ioend_count); 199 atomic_inc(&EXT4_I(inode)->i_ioend_count);
206 io->inode = inode; 200 io->inode = inode;
207 INIT_LIST_HEAD(&io->list); 201 INIT_LIST_HEAD(&io->list);
208 atomic_set(&io->count, 1);
209 } 202 }
210 return io; 203 return io;
211} 204}
212 205
213void ext4_put_io_end_defer(ext4_io_end_t *io_end)
214{
215 if (atomic_dec_and_test(&io_end->count)) {
216 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
217 ext4_release_io_end(io_end);
218 return;
219 }
220 ext4_add_complete_io(io_end);
221 }
222}
223
224int ext4_put_io_end(ext4_io_end_t *io_end)
225{
226 int err = 0;
227
228 if (atomic_dec_and_test(&io_end->count)) {
229 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
230 err = ext4_convert_unwritten_extents(io_end->inode,
231 io_end->offset, io_end->size);
232 ext4_clear_io_unwritten_flag(io_end);
233 }
234 ext4_release_io_end(io_end);
235 }
236 return err;
237}
238
239ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
240{
241 atomic_inc(&io_end->count);
242 return io_end;
243}
244
245/* 206/*
246 * Print an buffer I/O error compatible with the fs/buffer.c. This 207 * Print an buffer I/O error compatible with the fs/buffer.c. This
247 * provides compatibility with dmesg scrapers that look for a specific 208 * provides compatibility with dmesg scrapers that look for a specific
@@ -324,7 +285,12 @@ static void ext4_end_bio(struct bio *bio, int error)
324 bi_sector >> (inode->i_blkbits - 9)); 285 bi_sector >> (inode->i_blkbits - 9));
325 } 286 }
326 287
327 ext4_put_io_end_defer(io_end); 288 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
289 ext4_free_io_end(io_end);
290 return;
291 }
292
293 ext4_add_complete_io(io_end);
328} 294}
329 295
330void ext4_io_submit(struct ext4_io_submit *io) 296void ext4_io_submit(struct ext4_io_submit *io)
@@ -338,37 +304,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
338 bio_put(io->io_bio); 304 bio_put(io->io_bio);
339 } 305 }
340 io->io_bio = NULL; 306 io->io_bio = NULL;
341} 307 io->io_op = 0;
342
343void ext4_io_submit_init(struct ext4_io_submit *io,
344 struct writeback_control *wbc)
345{
346 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
347 io->io_bio = NULL;
348 io->io_end = NULL; 308 io->io_end = NULL;
349} 309}
350 310
351static int io_submit_init_bio(struct ext4_io_submit *io, 311static int io_submit_init(struct ext4_io_submit *io,
352 struct buffer_head *bh) 312 struct inode *inode,
313 struct writeback_control *wbc,
314 struct buffer_head *bh)
353{ 315{
316 ext4_io_end_t *io_end;
317 struct page *page = bh->b_page;
354 int nvecs = bio_get_nr_vecs(bh->b_bdev); 318 int nvecs = bio_get_nr_vecs(bh->b_bdev);
355 struct bio *bio; 319 struct bio *bio;
356 320
321 io_end = ext4_init_io_end(inode, GFP_NOFS);
322 if (!io_end)
323 return -ENOMEM;
357 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 324 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
358 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 325 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
359 bio->bi_bdev = bh->b_bdev; 326 bio->bi_bdev = bh->b_bdev;
327 bio->bi_private = io->io_end = io_end;
360 bio->bi_end_io = ext4_end_bio; 328 bio->bi_end_io = ext4_end_bio;
361 bio->bi_private = ext4_get_io_end(io->io_end); 329
362 if (!io->io_end->size) 330 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
363 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) 331
364 + bh_offset(bh);
365 io->io_bio = bio; 332 io->io_bio = bio;
333 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
366 io->io_next_block = bh->b_blocknr; 334 io->io_next_block = bh->b_blocknr;
367 return 0; 335 return 0;
368} 336}
369 337
370static int io_submit_add_bh(struct ext4_io_submit *io, 338static int io_submit_add_bh(struct ext4_io_submit *io,
371 struct inode *inode, 339 struct inode *inode,
340 struct writeback_control *wbc,
372 struct buffer_head *bh) 341 struct buffer_head *bh)
373{ 342{
374 ext4_io_end_t *io_end; 343 ext4_io_end_t *io_end;
@@ -379,18 +348,18 @@ submit_and_retry:
379 ext4_io_submit(io); 348 ext4_io_submit(io);
380 } 349 }
381 if (io->io_bio == NULL) { 350 if (io->io_bio == NULL) {
382 ret = io_submit_init_bio(io, bh); 351 ret = io_submit_init(io, inode, wbc, bh);
383 if (ret) 352 if (ret)
384 return ret; 353 return ret;
385 } 354 }
386 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
387 if (ret != bh->b_size)
388 goto submit_and_retry;
389 io_end = io->io_end; 355 io_end = io->io_end;
390 if (test_clear_buffer_uninit(bh)) 356 if (test_clear_buffer_uninit(bh))
391 ext4_set_io_unwritten_flag(inode, io_end); 357 ext4_set_io_unwritten_flag(inode, io_end);
392 io_end->size += bh->b_size; 358 io->io_end->size += bh->b_size;
393 io->io_next_block++; 359 io->io_next_block++;
360 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
361 if (ret != bh->b_size)
362 goto submit_and_retry;
394 return 0; 363 return 0;
395} 364}
396 365
@@ -462,7 +431,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
462 do { 431 do {
463 if (!buffer_async_write(bh)) 432 if (!buffer_async_write(bh))
464 continue; 433 continue;
465 ret = io_submit_add_bh(io, inode, bh); 434 ret = io_submit_add_bh(io, inode, wbc, bh);
466 if (ret) { 435 if (ret) {
467 /* 436 /*
468 * We only get here on ENOMEM. Not much else 437 * We only get here on ENOMEM. Not much else