aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c9
-rw-r--r--fs/ext4/extents_status.c17
-rw-r--r--fs/ext4/extents_status.h3
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c85
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/page-io.c121
-rw-r--r--include/linux/journal-head.h8
-rw-r--r--include/trace/events/ext4.h4
10 files changed, 119 insertions, 146 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0aabb344b02e..5aae3d12d400 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -209,7 +209,6 @@ typedef struct ext4_io_end {
209 ssize_t size; /* size of the extent */ 209 ssize_t size; /* size of the extent */
210 struct kiocb *iocb; /* iocb struct for AIO */ 210 struct kiocb *iocb; /* iocb struct for AIO */
211 int result; /* error value for AIO */ 211 int result; /* error value for AIO */
212 atomic_t count; /* reference counter */
213} ext4_io_end_t; 212} ext4_io_end_t;
214 213
215struct ext4_io_submit { 214struct ext4_io_submit {
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2651 2650
2652/* page-io.c */ 2651/* page-io.c */
2653extern int __init ext4_init_pageio(void); 2652extern int __init ext4_init_pageio(void);
2653extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2654extern void ext4_exit_pageio(void); 2654extern void ext4_exit_pageio(void);
2655extern void ext4_ioend_shutdown(struct inode *); 2655extern void ext4_ioend_shutdown(struct inode *);
2656extern void ext4_free_io_end(ext4_io_end_t *io);
2656extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2657extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2657extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end);
2658extern int ext4_put_io_end(ext4_io_end_t *io_end);
2659extern void ext4_put_io_end_defer(ext4_io_end_t *io_end);
2660extern void ext4_io_submit_init(struct ext4_io_submit *io,
2661 struct writeback_control *wbc);
2662extern void ext4_end_io_work(struct work_struct *work); 2658extern void ext4_end_io_work(struct work_struct *work);
2663extern void ext4_io_submit(struct ext4_io_submit *io); 2659extern void ext4_io_submit(struct ext4_io_submit *io);
2664extern int ext4_bio_write_page(struct ext4_io_submit *io, 2660extern int ext4_bio_write_page(struct ext4_io_submit *io,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 107936db244e..bc0f1910b9cf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode,
3642{ 3642{
3643 struct extent_status es; 3643 struct extent_status es;
3644 3644
3645 ext4_es_find_delayed_extent(inode, lblk_start, &es); 3645 ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
3646 if (es.es_len == 0) 3646 if (es.es_len == 0)
3647 return 0; /* there is no delay extent in this tree */ 3647 return 0; /* there is no delay extent in this tree */
3648 else if (es.es_lblk <= lblk_start && 3648 else if (es.es_lblk <= lblk_start &&
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
4608 struct extent_status es; 4608 struct extent_status es;
4609 ext4_lblk_t block, next_del; 4609 ext4_lblk_t block, next_del;
4610 4610
4611 ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
4612
4613 if (newes->es_pblk == 0) { 4611 if (newes->es_pblk == 0) {
4612 ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
4613 newes->es_lblk + newes->es_len - 1, &es);
4614
4614 /* 4615 /*
4615 * No extent in extent-tree contains block @newes->es_pblk, 4616 * No extent in extent-tree contains block @newes->es_pblk,
4616 * then the block may stay in 1)a hole or 2)delayed-extent. 4617 * then the block may stay in 1)a hole or 2)delayed-extent.
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode,
4630 } 4631 }
4631 4632
4632 block = newes->es_lblk + newes->es_len; 4633 block = newes->es_lblk + newes->es_len;
4633 ext4_es_find_delayed_extent(inode, block, &es); 4634 ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
4634 if (es.es_len == 0) 4635 if (es.es_len == 0)
4635 next_del = EXT_MAX_BLOCKS; 4636 next_del = EXT_MAX_BLOCKS;
4636 else 4637 else
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index fe3337a85ede..e6941e622d31 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
232} 232}
233 233
234/* 234/*
235 * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk 235 * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
236 * if it exists, otherwise, the next extent after @es->lblk. 236 * @es->lblk if it exists, otherwise, the next extent after @es->lblk.
237 * 237 *
238 * @inode: the inode which owns delayed extents 238 * @inode: the inode which owns delayed extents
239 * @lblk: the offset where we start to search 239 * @lblk: the offset where we start to search
240 * @end: the offset where we stop to search
240 * @es: delayed extent that we found 241 * @es: delayed extent that we found
241 */ 242 */
242void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 243void ext4_es_find_delayed_extent_range(struct inode *inode,
244 ext4_lblk_t lblk, ext4_lblk_t end,
243 struct extent_status *es) 245 struct extent_status *es)
244{ 246{
245 struct ext4_es_tree *tree = NULL; 247 struct ext4_es_tree *tree = NULL;
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
247 struct rb_node *node; 249 struct rb_node *node;
248 250
249 BUG_ON(es == NULL); 251 BUG_ON(es == NULL);
250 trace_ext4_es_find_delayed_extent_enter(inode, lblk); 252 BUG_ON(end < lblk);
253 trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
251 254
252 read_lock(&EXT4_I(inode)->i_es_lock); 255 read_lock(&EXT4_I(inode)->i_es_lock);
253 tree = &EXT4_I(inode)->i_es_tree; 256 tree = &EXT4_I(inode)->i_es_tree;
@@ -270,6 +273,10 @@ out:
270 if (es1 && !ext4_es_is_delayed(es1)) { 273 if (es1 && !ext4_es_is_delayed(es1)) {
271 while ((node = rb_next(&es1->rb_node)) != NULL) { 274 while ((node = rb_next(&es1->rb_node)) != NULL) {
272 es1 = rb_entry(node, struct extent_status, rb_node); 275 es1 = rb_entry(node, struct extent_status, rb_node);
276 if (es1->es_lblk > end) {
277 es1 = NULL;
278 break;
279 }
273 if (ext4_es_is_delayed(es1)) 280 if (ext4_es_is_delayed(es1))
274 break; 281 break;
275 } 282 }
@@ -285,7 +292,7 @@ out:
285 read_unlock(&EXT4_I(inode)->i_es_lock); 292 read_unlock(&EXT4_I(inode)->i_es_lock);
286 293
287 ext4_es_lru_add(inode); 294 ext4_es_lru_add(inode);
288 trace_ext4_es_find_delayed_extent_exit(inode, es); 295 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
289} 296}
290 297
291static struct extent_status * 298static struct extent_status *
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index d8e2d4dc311e..f740eb03b707 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
62 unsigned long long status); 62 unsigned long long status);
63extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 63extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
64 ext4_lblk_t len); 64 ext4_lblk_t len);
65extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 65extern void ext4_es_find_delayed_extent_range(struct inode *inode,
66 ext4_lblk_t lblk, ext4_lblk_t end,
66 struct extent_status *es); 67 struct extent_status *es);
67extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 68extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
68 struct extent_status *es); 69 struct extent_status *es);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4959e29573b6..b1b4d51b5d86 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -465,7 +465,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
465 * If there is a delay extent at this offset, 465 * If there is a delay extent at this offset,
466 * it will be as a data. 466 * it will be as a data.
467 */ 467 */
468 ext4_es_find_delayed_extent(inode, last, &es); 468 ext4_es_find_delayed_extent_range(inode, last, last, &es);
469 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { 469 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
470 if (last != start) 470 if (last != start)
471 dataoff = last << blkbits; 471 dataoff = last << blkbits;
@@ -548,7 +548,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
548 * If there is a delay extent at this offset, 548 * If there is a delay extent at this offset,
549 * we will skip this extent. 549 * we will skip this extent.
550 */ 550 */
551 ext4_es_find_delayed_extent(inode, last, &es); 551 ext4_es_find_delayed_extent_range(inode, last, last, &es);
552 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { 552 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
553 last = es.es_lblk + es.es_len; 553 last = es.es_lblk + es.es_len;
554 holeoff = last << blkbits; 554 holeoff = last << blkbits;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0723774bdfb5..d6382b89ecbd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1488,10 +1488,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1488 struct ext4_io_submit io_submit; 1488 struct ext4_io_submit io_submit;
1489 1489
1490 BUG_ON(mpd->next_page <= mpd->first_page); 1490 BUG_ON(mpd->next_page <= mpd->first_page);
1491 ext4_io_submit_init(&io_submit, mpd->wbc); 1491 memset(&io_submit, 0, sizeof(io_submit));
1492 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
1493 if (!io_submit.io_end)
1494 return -ENOMEM;
1495 /* 1492 /*
1496 * We need to start from the first_page to the next_page - 1 1493 * We need to start from the first_page to the next_page - 1
1497 * to make sure we also write the mapped dirty buffer_heads. 1494 * to make sure we also write the mapped dirty buffer_heads.
@@ -1579,8 +1576,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1579 pagevec_release(&pvec); 1576 pagevec_release(&pvec);
1580 } 1577 }
1581 ext4_io_submit(&io_submit); 1578 ext4_io_submit(&io_submit);
1582 /* Drop io_end reference we got from init */
1583 ext4_put_io_end_defer(io_submit.io_end);
1584 return ret; 1579 return ret;
1585} 1580}
1586 1581
@@ -2239,16 +2234,9 @@ static int ext4_writepage(struct page *page,
2239 */ 2234 */
2240 return __ext4_journalled_writepage(page, len); 2235 return __ext4_journalled_writepage(page, len);
2241 2236
2242 ext4_io_submit_init(&io_submit, wbc); 2237 memset(&io_submit, 0, sizeof(io_submit));
2243 io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS);
2244 if (!io_submit.io_end) {
2245 redirty_page_for_writepage(wbc, page);
2246 return -ENOMEM;
2247 }
2248 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 2238 ret = ext4_bio_write_page(&io_submit, page, len, wbc);
2249 ext4_io_submit(&io_submit); 2239 ext4_io_submit(&io_submit);
2250 /* Drop io_end reference we got from init */
2251 ext4_put_io_end_defer(io_submit.io_end);
2252 return ret; 2240 return ret;
2253} 2241}
2254 2242
@@ -3079,13 +3067,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3079 struct inode *inode = file_inode(iocb->ki_filp); 3067 struct inode *inode = file_inode(iocb->ki_filp);
3080 ext4_io_end_t *io_end = iocb->private; 3068 ext4_io_end_t *io_end = iocb->private;
3081 3069
3082 /* if not async direct IO just return */ 3070 /* if not async direct IO or dio with 0 bytes write, just return */
3083 if (!io_end) { 3071 if (!io_end || !size)
3084 inode_dio_done(inode); 3072 goto out;
3085 if (is_async)
3086 aio_complete(iocb, ret, 0);
3087 return;
3088 }
3089 3073
3090 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3074 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3091 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3075 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3093,13 +3077,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3093 size); 3077 size);
3094 3078
3095 iocb->private = NULL; 3079 iocb->private = NULL;
3080
3081 /* if not aio dio with unwritten extents, just free io and return */
3082 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3083 ext4_free_io_end(io_end);
3084out:
3085 inode_dio_done(inode);
3086 if (is_async)
3087 aio_complete(iocb, ret, 0);
3088 return;
3089 }
3090
3096 io_end->offset = offset; 3091 io_end->offset = offset;
3097 io_end->size = size; 3092 io_end->size = size;
3098 if (is_async) { 3093 if (is_async) {
3099 io_end->iocb = iocb; 3094 io_end->iocb = iocb;
3100 io_end->result = ret; 3095 io_end->result = ret;
3101 } 3096 }
3102 ext4_put_io_end_defer(io_end); 3097
3098 ext4_add_complete_io(io_end);
3103} 3099}
3104 3100
3105/* 3101/*
@@ -3133,7 +3129,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3133 get_block_t *get_block_func = NULL; 3129 get_block_t *get_block_func = NULL;
3134 int dio_flags = 0; 3130 int dio_flags = 0;
3135 loff_t final_size = offset + count; 3131 loff_t final_size = offset + count;
3136 ext4_io_end_t *io_end = NULL;
3137 3132
3138 /* Use the old path for reads and writes beyond i_size. */ 3133 /* Use the old path for reads and writes beyond i_size. */
3139 if (rw != WRITE || final_size > inode->i_size) 3134 if (rw != WRITE || final_size > inode->i_size)
@@ -3172,16 +3167,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3172 iocb->private = NULL; 3167 iocb->private = NULL;
3173 ext4_inode_aio_set(inode, NULL); 3168 ext4_inode_aio_set(inode, NULL);
3174 if (!is_sync_kiocb(iocb)) { 3169 if (!is_sync_kiocb(iocb)) {
3175 io_end = ext4_init_io_end(inode, GFP_NOFS); 3170 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
3176 if (!io_end) { 3171 if (!io_end) {
3177 ret = -ENOMEM; 3172 ret = -ENOMEM;
3178 goto retake_lock; 3173 goto retake_lock;
3179 } 3174 }
3180 io_end->flag |= EXT4_IO_END_DIRECT; 3175 io_end->flag |= EXT4_IO_END_DIRECT;
3181 /* 3176 iocb->private = io_end;
3182 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3183 */
3184 iocb->private = ext4_get_io_end(io_end);
3185 /* 3177 /*
3186 * we save the io structure for current async direct 3178 * we save the io structure for current async direct
3187 * IO, so that later ext4_map_blocks() could flag the 3179 * IO, so that later ext4_map_blocks() could flag the
@@ -3205,27 +3197,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3205 NULL, 3197 NULL,
3206 dio_flags); 3198 dio_flags);
3207 3199
3200 if (iocb->private)
3201 ext4_inode_aio_set(inode, NULL);
3208 /* 3202 /*
3209 * Put our reference to io_end. This can free the io_end structure e.g. 3203 * The io_end structure takes a reference to the inode, that
3210 * in sync IO case or in case of error. It can even perform extent 3204 * structure needs to be destroyed and the reference to the
3211 * conversion if all bios we submitted finished before we got here. 3205 * inode need to be dropped, when IO is complete, even with 0
3212 * Note that in that case iocb->private can be already set to NULL 3206 * byte write, or failed.
3213 * here. 3207 *
3208 * In the successful AIO DIO case, the io_end structure will
3209 * be destroyed and the reference to the inode will be dropped
3210 * after the end_io call back function is called.
3211 *
3212 * In the case there is 0 byte write, or error case, since VFS
3213 * direct IO won't invoke the end_io call back function, we
3214 * need to free the end_io structure here.
3214 */ 3215 */
3215 if (io_end) { 3216 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3216 ext4_inode_aio_set(inode, NULL); 3217 ext4_free_io_end(iocb->private);
3217 ext4_put_io_end(io_end); 3218 iocb->private = NULL;
3218 /* 3219 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3219 * In case of error or no write ext4_end_io_dio() was not
3220 * called so we have to put iocb's reference.
3221 */
3222 if (ret <= 0 && ret != -EIOCBQUEUED) {
3223 WARN_ON(iocb->private != io_end);
3224 ext4_put_io_end(io_end);
3225 iocb->private = NULL;
3226 }
3227 }
3228 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3229 EXT4_STATE_DIO_UNWRITTEN)) { 3220 EXT4_STATE_DIO_UNWRITTEN)) {
3230 int err; 3221 int err;
3231 /* 3222 /*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1ed9e07434b..def84082a9a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2105,7 +2105,11 @@ repeat:
2105 group = ac->ac_g_ex.fe_group; 2105 group = ac->ac_g_ex.fe_group;
2106 2106
2107 for (i = 0; i < ngroups; group++, i++) { 2107 for (i = 0; i < ngroups; group++, i++) {
2108 if (group == ngroups) 2108 /*
2109 * Artificially restricted ngroups for non-extent
2110 * files makes group > ngroups possible on first loop.
2111 */
2112 if (group >= ngroups)
2109 group = 0; 2113 group = 0;
2110 2114
2111 /* This now checks without needing the buddy page */ 2115 /* This now checks without needing the buddy page */
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 19599bded62a..4acf1f78881b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -62,28 +62,15 @@ void ext4_ioend_shutdown(struct inode *inode)
62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
63} 63}
64 64
65static void ext4_release_io_end(ext4_io_end_t *io_end) 65void ext4_free_io_end(ext4_io_end_t *io)
66{ 66{
67 BUG_ON(!list_empty(&io_end->list)); 67 BUG_ON(!io);
68 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 68 BUG_ON(!list_empty(&io->list));
69 69 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
70 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
71 wake_up_all(ext4_ioend_wq(io_end->inode));
72 if (io_end->flag & EXT4_IO_END_DIRECT)
73 inode_dio_done(io_end->inode);
74 if (io_end->iocb)
75 aio_complete(io_end->iocb, io_end->result, 0);
76 kmem_cache_free(io_end_cachep, io_end);
77}
78
79static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
80{
81 struct inode *inode = io_end->inode;
82 70
83 io_end->flag &= ~EXT4_IO_END_UNWRITTEN; 71 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
84 /* Wake up anyone waiting on unwritten extent conversion */ 72 wake_up_all(ext4_ioend_wq(io->inode));
85 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 73 kmem_cache_free(io_end_cachep, io);
86 wake_up_all(ext4_ioend_wq(inode));
87} 74}
88 75
89/* check a range of space and convert unwritten extents to written. */ 76/* check a range of space and convert unwritten extents to written. */
@@ -106,8 +93,13 @@ static int ext4_end_io(ext4_io_end_t *io)
106 "(inode %lu, offset %llu, size %zd, error %d)", 93 "(inode %lu, offset %llu, size %zd, error %d)",
107 inode->i_ino, offset, size, ret); 94 inode->i_ino, offset, size, ret);
108 } 95 }
109 ext4_clear_io_unwritten_flag(io); 96 /* Wake up anyone waiting on unwritten extent conversion */
110 ext4_release_io_end(io); 97 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
98 wake_up_all(ext4_ioend_wq(inode));
99 if (io->flag & EXT4_IO_END_DIRECT)
100 inode_dio_done(inode);
101 if (io->iocb)
102 aio_complete(io->iocb, io->result, 0);
111 return ret; 103 return ret;
112} 104}
113 105
@@ -138,7 +130,7 @@ static void dump_completed_IO(struct inode *inode)
138} 130}
139 131
140/* Add the io_end to per-inode completed end_io list. */ 132/* Add the io_end to per-inode completed end_io list. */
141static void ext4_add_complete_io(ext4_io_end_t *io_end) 133void ext4_add_complete_io(ext4_io_end_t *io_end)
142{ 134{
143 struct ext4_inode_info *ei = EXT4_I(io_end->inode); 135 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
144 struct workqueue_struct *wq; 136 struct workqueue_struct *wq;
@@ -175,6 +167,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
175 err = ext4_end_io(io); 167 err = ext4_end_io(io);
176 if (unlikely(!ret && err)) 168 if (unlikely(!ret && err))
177 ret = err; 169 ret = err;
170 io->flag &= ~EXT4_IO_END_UNWRITTEN;
171 ext4_free_io_end(io);
178 } 172 }
179 return ret; 173 return ret;
180} 174}
@@ -206,43 +200,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
206 atomic_inc(&EXT4_I(inode)->i_ioend_count); 200 atomic_inc(&EXT4_I(inode)->i_ioend_count);
207 io->inode = inode; 201 io->inode = inode;
208 INIT_LIST_HEAD(&io->list); 202 INIT_LIST_HEAD(&io->list);
209 atomic_set(&io->count, 1);
210 } 203 }
211 return io; 204 return io;
212} 205}
213 206
214void ext4_put_io_end_defer(ext4_io_end_t *io_end)
215{
216 if (atomic_dec_and_test(&io_end->count)) {
217 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
218 ext4_release_io_end(io_end);
219 return;
220 }
221 ext4_add_complete_io(io_end);
222 }
223}
224
225int ext4_put_io_end(ext4_io_end_t *io_end)
226{
227 int err = 0;
228
229 if (atomic_dec_and_test(&io_end->count)) {
230 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
231 err = ext4_convert_unwritten_extents(io_end->inode,
232 io_end->offset, io_end->size);
233 ext4_clear_io_unwritten_flag(io_end);
234 }
235 ext4_release_io_end(io_end);
236 }
237 return err;
238}
239
240ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
241{
242 atomic_inc(&io_end->count);
243 return io_end;
244}
245
246/* 207/*
247 * Print an buffer I/O error compatible with the fs/buffer.c. This 208 * Print an buffer I/O error compatible with the fs/buffer.c. This
248 * provides compatibility with dmesg scrapers that look for a specific 209 * provides compatibility with dmesg scrapers that look for a specific
@@ -325,7 +286,12 @@ static void ext4_end_bio(struct bio *bio, int error)
325 bi_sector >> (inode->i_blkbits - 9)); 286 bi_sector >> (inode->i_blkbits - 9));
326 } 287 }
327 288
328 ext4_put_io_end_defer(io_end); 289 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
290 ext4_free_io_end(io_end);
291 return;
292 }
293
294 ext4_add_complete_io(io_end);
329} 295}
330 296
331void ext4_io_submit(struct ext4_io_submit *io) 297void ext4_io_submit(struct ext4_io_submit *io)
@@ -339,37 +305,40 @@ void ext4_io_submit(struct ext4_io_submit *io)
339 bio_put(io->io_bio); 305 bio_put(io->io_bio);
340 } 306 }
341 io->io_bio = NULL; 307 io->io_bio = NULL;
342} 308 io->io_op = 0;
343
344void ext4_io_submit_init(struct ext4_io_submit *io,
345 struct writeback_control *wbc)
346{
347 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
348 io->io_bio = NULL;
349 io->io_end = NULL; 309 io->io_end = NULL;
350} 310}
351 311
352static int io_submit_init_bio(struct ext4_io_submit *io, 312static int io_submit_init(struct ext4_io_submit *io,
353 struct buffer_head *bh) 313 struct inode *inode,
314 struct writeback_control *wbc,
315 struct buffer_head *bh)
354{ 316{
317 ext4_io_end_t *io_end;
318 struct page *page = bh->b_page;
355 int nvecs = bio_get_nr_vecs(bh->b_bdev); 319 int nvecs = bio_get_nr_vecs(bh->b_bdev);
356 struct bio *bio; 320 struct bio *bio;
357 321
322 io_end = ext4_init_io_end(inode, GFP_NOFS);
323 if (!io_end)
324 return -ENOMEM;
358 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 325 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
359 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 326 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
360 bio->bi_bdev = bh->b_bdev; 327 bio->bi_bdev = bh->b_bdev;
328 bio->bi_private = io->io_end = io_end;
361 bio->bi_end_io = ext4_end_bio; 329 bio->bi_end_io = ext4_end_bio;
362 bio->bi_private = ext4_get_io_end(io->io_end); 330
363 if (!io->io_end->size) 331 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
364 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) 332
365 + bh_offset(bh);
366 io->io_bio = bio; 333 io->io_bio = bio;
334 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
367 io->io_next_block = bh->b_blocknr; 335 io->io_next_block = bh->b_blocknr;
368 return 0; 336 return 0;
369} 337}
370 338
371static int io_submit_add_bh(struct ext4_io_submit *io, 339static int io_submit_add_bh(struct ext4_io_submit *io,
372 struct inode *inode, 340 struct inode *inode,
341 struct writeback_control *wbc,
373 struct buffer_head *bh) 342 struct buffer_head *bh)
374{ 343{
375 ext4_io_end_t *io_end; 344 ext4_io_end_t *io_end;
@@ -380,18 +349,18 @@ submit_and_retry:
380 ext4_io_submit(io); 349 ext4_io_submit(io);
381 } 350 }
382 if (io->io_bio == NULL) { 351 if (io->io_bio == NULL) {
383 ret = io_submit_init_bio(io, bh); 352 ret = io_submit_init(io, inode, wbc, bh);
384 if (ret) 353 if (ret)
385 return ret; 354 return ret;
386 } 355 }
387 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
388 if (ret != bh->b_size)
389 goto submit_and_retry;
390 io_end = io->io_end; 356 io_end = io->io_end;
391 if (test_clear_buffer_uninit(bh)) 357 if (test_clear_buffer_uninit(bh))
392 ext4_set_io_unwritten_flag(inode, io_end); 358 ext4_set_io_unwritten_flag(inode, io_end);
393 io_end->size += bh->b_size; 359 io->io_end->size += bh->b_size;
394 io->io_next_block++; 360 io->io_next_block++;
361 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
362 if (ret != bh->b_size)
363 goto submit_and_retry;
395 return 0; 364 return 0;
396} 365}
397 366
@@ -463,7 +432,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
463 do { 432 do {
464 if (!buffer_async_write(bh)) 433 if (!buffer_async_write(bh))
465 continue; 434 continue;
466 ret = io_submit_add_bh(io, inode, bh); 435 ret = io_submit_add_bh(io, inode, wbc, bh);
467 if (ret) { 436 if (ret) {
468 /* 437 /*
469 * We only get here on ENOMEM. Not much else 438 * We only get here on ENOMEM. Not much else
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 13a3da25ff07..98cd41bb39c8 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -30,15 +30,19 @@ struct journal_head {
30 30
31 /* 31 /*
32 * Journalling list for this buffer [jbd_lock_bh_state()] 32 * Journalling list for this buffer [jbd_lock_bh_state()]
33 * NOTE: We *cannot* combine this with b_modified into a bitfield
34 * as gcc would then (which the C standard allows but which is
35 * very unuseful) make 64-bit accesses to the bitfield and clobber
36 * b_jcount if its update races with bitfield modification.
33 */ 37 */
34 unsigned b_jlist:4; 38 unsigned b_jlist;
35 39
36 /* 40 /*
37 * This flag signals the buffer has been modified by 41 * This flag signals the buffer has been modified by
38 * the currently running transaction 42 * the currently running transaction
39 * [jbd_lock_bh_state()] 43 * [jbd_lock_bh_state()]
40 */ 44 */
41 unsigned b_modified:1; 45 unsigned b_modified;
42 46
43 /* 47 /*
44 * Copy of the buffer data frozen for writing to the log. 48 * Copy of the buffer data frozen for writing to the log.
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index d0e686402df8..8ee15b97cd38 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2139,7 +2139,7 @@ TRACE_EVENT(ext4_es_remove_extent,
2139 __entry->lblk, __entry->len) 2139 __entry->lblk, __entry->len)
2140); 2140);
2141 2141
2142TRACE_EVENT(ext4_es_find_delayed_extent_enter, 2142TRACE_EVENT(ext4_es_find_delayed_extent_range_enter,
2143 TP_PROTO(struct inode *inode, ext4_lblk_t lblk), 2143 TP_PROTO(struct inode *inode, ext4_lblk_t lblk),
2144 2144
2145 TP_ARGS(inode, lblk), 2145 TP_ARGS(inode, lblk),
@@ -2161,7 +2161,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_enter,
2161 (unsigned long) __entry->ino, __entry->lblk) 2161 (unsigned long) __entry->ino, __entry->lblk)
2162); 2162);
2163 2163
2164TRACE_EVENT(ext4_es_find_delayed_extent_exit, 2164TRACE_EVENT(ext4_es_find_delayed_extent_range_exit,
2165 TP_PROTO(struct inode *inode, struct extent_status *es), 2165 TP_PROTO(struct inode *inode, struct extent_status *es),
2166 2166
2167 TP_ARGS(inode, es), 2167 TP_ARGS(inode, es),