diff options
-rw-r--r-- | fs/ext4/ext4.h | 8 | ||||
-rw-r--r-- | fs/ext4/extents.c | 9 | ||||
-rw-r--r-- | fs/ext4/extents_status.c | 17 | ||||
-rw-r--r-- | fs/ext4/extents_status.h | 3 | ||||
-rw-r--r-- | fs/ext4/file.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 85 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 6 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 121 | ||||
-rw-r--r-- | include/linux/journal-head.h | 8 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 4 |
10 files changed, 119 insertions, 146 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0aabb344b02e..5aae3d12d400 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -209,7 +209,6 @@ typedef struct ext4_io_end { | |||
209 | ssize_t size; /* size of the extent */ | 209 | ssize_t size; /* size of the extent */ |
210 | struct kiocb *iocb; /* iocb struct for AIO */ | 210 | struct kiocb *iocb; /* iocb struct for AIO */ |
211 | int result; /* error value for AIO */ | 211 | int result; /* error value for AIO */ |
212 | atomic_t count; /* reference counter */ | ||
213 | } ext4_io_end_t; | 212 | } ext4_io_end_t; |
214 | 213 | ||
215 | struct ext4_io_submit { | 214 | struct ext4_io_submit { |
@@ -2651,14 +2650,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2651 | 2650 | ||
2652 | /* page-io.c */ | 2651 | /* page-io.c */ |
2653 | extern int __init ext4_init_pageio(void); | 2652 | extern int __init ext4_init_pageio(void); |
2653 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
2654 | extern void ext4_exit_pageio(void); | 2654 | extern void ext4_exit_pageio(void); |
2655 | extern void ext4_ioend_shutdown(struct inode *); | 2655 | extern void ext4_ioend_shutdown(struct inode *); |
2656 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
2656 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2657 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2657 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); | ||
2658 | extern int ext4_put_io_end(ext4_io_end_t *io_end); | ||
2659 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | ||
2660 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | ||
2661 | struct writeback_control *wbc); | ||
2662 | extern void ext4_end_io_work(struct work_struct *work); | 2658 | extern void ext4_end_io_work(struct work_struct *work); |
2663 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2659 | extern void ext4_io_submit(struct ext4_io_submit *io); |
2664 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2660 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 107936db244e..bc0f1910b9cf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode, | |||
3642 | { | 3642 | { |
3643 | struct extent_status es; | 3643 | struct extent_status es; |
3644 | 3644 | ||
3645 | ext4_es_find_delayed_extent(inode, lblk_start, &es); | 3645 | ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es); |
3646 | if (es.es_len == 0) | 3646 | if (es.es_len == 0) |
3647 | return 0; /* there is no delay extent in this tree */ | 3647 | return 0; /* there is no delay extent in this tree */ |
3648 | else if (es.es_lblk <= lblk_start && | 3648 | else if (es.es_lblk <= lblk_start && |
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode, | |||
4608 | struct extent_status es; | 4608 | struct extent_status es; |
4609 | ext4_lblk_t block, next_del; | 4609 | ext4_lblk_t block, next_del; |
4610 | 4610 | ||
4611 | ext4_es_find_delayed_extent(inode, newes->es_lblk, &es); | ||
4612 | |||
4613 | if (newes->es_pblk == 0) { | 4611 | if (newes->es_pblk == 0) { |
4612 | ext4_es_find_delayed_extent_range(inode, newes->es_lblk, | ||
4613 | newes->es_lblk + newes->es_len - 1, &es); | ||
4614 | |||
4614 | /* | 4615 | /* |
4615 | * No extent in extent-tree contains block @newes->es_pblk, | 4616 | * No extent in extent-tree contains block @newes->es_pblk, |
4616 | * then the block may stay in 1)a hole or 2)delayed-extent. | 4617 | * then the block may stay in 1)a hole or 2)delayed-extent. |
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode, | |||
4630 | } | 4631 | } |
4631 | 4632 | ||
4632 | block = newes->es_lblk + newes->es_len; | 4633 | block = newes->es_lblk + newes->es_len; |
4633 | ext4_es_find_delayed_extent(inode, block, &es); | 4634 | ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es); |
4634 | if (es.es_len == 0) | 4635 | if (es.es_len == 0) |
4635 | next_del = EXT_MAX_BLOCKS; | 4636 | next_del = EXT_MAX_BLOCKS; |
4636 | else | 4637 | else |
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index fe3337a85ede..e6941e622d31 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root, | |||
232 | } | 232 | } |
233 | 233 | ||
234 | /* | 234 | /* |
235 | * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk | 235 | * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering |
236 | * if it exists, otherwise, the next extent after @es->lblk. | 236 | * @es->lblk if it exists, otherwise, the next extent after @es->lblk. |
237 | * | 237 | * |
238 | * @inode: the inode which owns delayed extents | 238 | * @inode: the inode which owns delayed extents |
239 | * @lblk: the offset where we start to search | 239 | * @lblk: the offset where we start to search |
240 | * @end: the offset where we stop to search | ||
240 | * @es: delayed extent that we found | 241 | * @es: delayed extent that we found |
241 | */ | 242 | */ |
242 | void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, | 243 | void ext4_es_find_delayed_extent_range(struct inode *inode, |
244 | ext4_lblk_t lblk, ext4_lblk_t end, | ||
243 | struct extent_status *es) | 245 | struct extent_status *es) |
244 | { | 246 | { |
245 | struct ext4_es_tree *tree = NULL; | 247 | struct ext4_es_tree *tree = NULL; |
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, | |||
247 | struct rb_node *node; | 249 | struct rb_node *node; |
248 | 250 | ||
249 | BUG_ON(es == NULL); | 251 | BUG_ON(es == NULL); |
250 | trace_ext4_es_find_delayed_extent_enter(inode, lblk); | 252 | BUG_ON(end < lblk); |
253 | trace_ext4_es_find_delayed_extent_range_enter(inode, lblk); | ||
251 | 254 | ||
252 | read_lock(&EXT4_I(inode)->i_es_lock); | 255 | read_lock(&EXT4_I(inode)->i_es_lock); |
253 | tree = &EXT4_I(inode)->i_es_tree; | 256 | tree = &EXT4_I(inode)->i_es_tree; |
@@ -270,6 +273,10 @@ out: | |||
270 | if (es1 && !ext4_es_is_delayed(es1)) { | 273 | if (es1 && !ext4_es_is_delayed(es1)) { |
271 | while ((node = rb_next(&es1->rb_node)) != NULL) { | 274 | while ((node = rb_next(&es1->rb_node)) != NULL) { |
272 | es1 = rb_entry(node, struct extent_status, rb_node); | 275 | es1 = rb_entry(node, struct extent_status, rb_node); |
276 | if (es1->es_lblk > end) { | ||
277 | es1 = NULL; | ||
278 | break; | ||
279 | } | ||
273 | if (ext4_es_is_delayed(es1)) | 280 | if (ext4_es_is_delayed(es1)) |
274 | break; | 281 | break; |
275 | } | 282 | } |
@@ -285,7 +292,7 @@ out: | |||
285 | read_unlock(&EXT4_I(inode)->i_es_lock); | 292 | read_unlock(&EXT4_I(inode)->i_es_lock); |
286 | 293 | ||
287 | ext4_es_lru_add(inode); | 294 | ext4_es_lru_add(inode); |
288 | trace_ext4_es_find_delayed_extent_exit(inode, es); | 295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
289 | } | 296 | } |
290 | 297 | ||
291 | static struct extent_status * | 298 | static struct extent_status * |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index d8e2d4dc311e..f740eb03b707 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
62 | unsigned long long status); | 62 | unsigned long long status); |
63 | extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, | 63 | extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, |
64 | ext4_lblk_t len); | 64 | ext4_lblk_t len); |
65 | extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, | 65 | extern void ext4_es_find_delayed_extent_range(struct inode *inode, |
66 | ext4_lblk_t lblk, ext4_lblk_t end, | ||
66 | struct extent_status *es); | 67 | struct extent_status *es); |
67 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, | 68 | extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, |
68 | struct extent_status *es); | 69 | struct extent_status *es); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4959e29573b6..b1b4d51b5d86 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -465,7 +465,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
465 | * If there is a delay extent at this offset, | 465 | * If there is a delay extent at this offset, |
466 | * it will be as a data. | 466 | * it will be as a data. |
467 | */ | 467 | */ |
468 | ext4_es_find_delayed_extent(inode, last, &es); | 468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
470 | if (last != start) | 470 | if (last != start) |
471 | dataoff = last << blkbits; | 471 | dataoff = last << blkbits; |
@@ -548,7 +548,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
548 | * If there is a delay extent at this offset, | 548 | * If there is a delay extent at this offset, |
549 | * we will skip this extent. | 549 | * we will skip this extent. |
550 | */ | 550 | */ |
551 | ext4_es_find_delayed_extent(inode, last, &es); | 551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
553 | last = es.es_lblk + es.es_len; | 553 | last = es.es_lblk + es.es_len; |
554 | holeoff = last << blkbits; | 554 | holeoff = last << blkbits; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0723774bdfb5..d6382b89ecbd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1488,10 +1488,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1488 | struct ext4_io_submit io_submit; | 1488 | struct ext4_io_submit io_submit; |
1489 | 1489 | ||
1490 | BUG_ON(mpd->next_page <= mpd->first_page); | 1490 | BUG_ON(mpd->next_page <= mpd->first_page); |
1491 | ext4_io_submit_init(&io_submit, mpd->wbc); | 1491 | memset(&io_submit, 0, sizeof(io_submit)); |
1492 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
1493 | if (!io_submit.io_end) | ||
1494 | return -ENOMEM; | ||
1495 | /* | 1492 | /* |
1496 | * We need to start from the first_page to the next_page - 1 | 1493 | * We need to start from the first_page to the next_page - 1 |
1497 | * to make sure we also write the mapped dirty buffer_heads. | 1494 | * to make sure we also write the mapped dirty buffer_heads. |
@@ -1579,8 +1576,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1579 | pagevec_release(&pvec); | 1576 | pagevec_release(&pvec); |
1580 | } | 1577 | } |
1581 | ext4_io_submit(&io_submit); | 1578 | ext4_io_submit(&io_submit); |
1582 | /* Drop io_end reference we got from init */ | ||
1583 | ext4_put_io_end_defer(io_submit.io_end); | ||
1584 | return ret; | 1579 | return ret; |
1585 | } | 1580 | } |
1586 | 1581 | ||
@@ -2239,16 +2234,9 @@ static int ext4_writepage(struct page *page, | |||
2239 | */ | 2234 | */ |
2240 | return __ext4_journalled_writepage(page, len); | 2235 | return __ext4_journalled_writepage(page, len); |
2241 | 2236 | ||
2242 | ext4_io_submit_init(&io_submit, wbc); | 2237 | memset(&io_submit, 0, sizeof(io_submit)); |
2243 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
2244 | if (!io_submit.io_end) { | ||
2245 | redirty_page_for_writepage(wbc, page); | ||
2246 | return -ENOMEM; | ||
2247 | } | ||
2248 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); | 2238 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
2249 | ext4_io_submit(&io_submit); | 2239 | ext4_io_submit(&io_submit); |
2250 | /* Drop io_end reference we got from init */ | ||
2251 | ext4_put_io_end_defer(io_submit.io_end); | ||
2252 | return ret; | 2240 | return ret; |
2253 | } | 2241 | } |
2254 | 2242 | ||
@@ -3079,13 +3067,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3079 | struct inode *inode = file_inode(iocb->ki_filp); | 3067 | struct inode *inode = file_inode(iocb->ki_filp); |
3080 | ext4_io_end_t *io_end = iocb->private; | 3068 | ext4_io_end_t *io_end = iocb->private; |
3081 | 3069 | ||
3082 | /* if not async direct IO just return */ | 3070 | /* if not async direct IO or dio with 0 bytes write, just return */ |
3083 | if (!io_end) { | 3071 | if (!io_end || !size) |
3084 | inode_dio_done(inode); | 3072 | goto out; |
3085 | if (is_async) | ||
3086 | aio_complete(iocb, ret, 0); | ||
3087 | return; | ||
3088 | } | ||
3089 | 3073 | ||
3090 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3074 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3091 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3075 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
@@ -3093,13 +3077,25 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
3093 | size); | 3077 | size); |
3094 | 3078 | ||
3095 | iocb->private = NULL; | 3079 | iocb->private = NULL; |
3080 | |||
3081 | /* if not aio dio with unwritten extents, just free io and return */ | ||
3082 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
3083 | ext4_free_io_end(io_end); | ||
3084 | out: | ||
3085 | inode_dio_done(inode); | ||
3086 | if (is_async) | ||
3087 | aio_complete(iocb, ret, 0); | ||
3088 | return; | ||
3089 | } | ||
3090 | |||
3096 | io_end->offset = offset; | 3091 | io_end->offset = offset; |
3097 | io_end->size = size; | 3092 | io_end->size = size; |
3098 | if (is_async) { | 3093 | if (is_async) { |
3099 | io_end->iocb = iocb; | 3094 | io_end->iocb = iocb; |
3100 | io_end->result = ret; | 3095 | io_end->result = ret; |
3101 | } | 3096 | } |
3102 | ext4_put_io_end_defer(io_end); | 3097 | |
3098 | ext4_add_complete_io(io_end); | ||
3103 | } | 3099 | } |
3104 | 3100 | ||
3105 | /* | 3101 | /* |
@@ -3133,7 +3129,6 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3133 | get_block_t *get_block_func = NULL; | 3129 | get_block_t *get_block_func = NULL; |
3134 | int dio_flags = 0; | 3130 | int dio_flags = 0; |
3135 | loff_t final_size = offset + count; | 3131 | loff_t final_size = offset + count; |
3136 | ext4_io_end_t *io_end = NULL; | ||
3137 | 3132 | ||
3138 | /* Use the old path for reads and writes beyond i_size. */ | 3133 | /* Use the old path for reads and writes beyond i_size. */ |
3139 | if (rw != WRITE || final_size > inode->i_size) | 3134 | if (rw != WRITE || final_size > inode->i_size) |
@@ -3172,16 +3167,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3172 | iocb->private = NULL; | 3167 | iocb->private = NULL; |
3173 | ext4_inode_aio_set(inode, NULL); | 3168 | ext4_inode_aio_set(inode, NULL); |
3174 | if (!is_sync_kiocb(iocb)) { | 3169 | if (!is_sync_kiocb(iocb)) { |
3175 | io_end = ext4_init_io_end(inode, GFP_NOFS); | 3170 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); |
3176 | if (!io_end) { | 3171 | if (!io_end) { |
3177 | ret = -ENOMEM; | 3172 | ret = -ENOMEM; |
3178 | goto retake_lock; | 3173 | goto retake_lock; |
3179 | } | 3174 | } |
3180 | io_end->flag |= EXT4_IO_END_DIRECT; | 3175 | io_end->flag |= EXT4_IO_END_DIRECT; |
3181 | /* | 3176 | iocb->private = io_end; |
3182 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
3183 | */ | ||
3184 | iocb->private = ext4_get_io_end(io_end); | ||
3185 | /* | 3177 | /* |
3186 | * we save the io structure for current async direct | 3178 | * we save the io structure for current async direct |
3187 | * IO, so that later ext4_map_blocks() could flag the | 3179 | * IO, so that later ext4_map_blocks() could flag the |
@@ -3205,27 +3197,26 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3205 | NULL, | 3197 | NULL, |
3206 | dio_flags); | 3198 | dio_flags); |
3207 | 3199 | ||
3200 | if (iocb->private) | ||
3201 | ext4_inode_aio_set(inode, NULL); | ||
3208 | /* | 3202 | /* |
3209 | * Put our reference to io_end. This can free the io_end structure e.g. | 3203 | * The io_end structure takes a reference to the inode, that |
3210 | * in sync IO case or in case of error. It can even perform extent | 3204 | * structure needs to be destroyed and the reference to the |
3211 | * conversion if all bios we submitted finished before we got here. | 3205 | * inode need to be dropped, when IO is complete, even with 0 |
3212 | * Note that in that case iocb->private can be already set to NULL | 3206 | * byte write, or failed. |
3213 | * here. | 3207 | * |
3208 | * In the successful AIO DIO case, the io_end structure will | ||
3209 | * be destroyed and the reference to the inode will be dropped | ||
3210 | * after the end_io call back function is called. | ||
3211 | * | ||
3212 | * In the case there is 0 byte write, or error case, since VFS | ||
3213 | * direct IO won't invoke the end_io call back function, we | ||
3214 | * need to free the end_io structure here. | ||
3214 | */ | 3215 | */ |
3215 | if (io_end) { | 3216 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { |
3216 | ext4_inode_aio_set(inode, NULL); | 3217 | ext4_free_io_end(iocb->private); |
3217 | ext4_put_io_end(io_end); | 3218 | iocb->private = NULL; |
3218 | /* | 3219 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, |
3219 | * In case of error or no write ext4_end_io_dio() was not | ||
3220 | * called so we have to put iocb's reference. | ||
3221 | */ | ||
3222 | if (ret <= 0 && ret != -EIOCBQUEUED) { | ||
3223 | WARN_ON(iocb->private != io_end); | ||
3224 | ext4_put_io_end(io_end); | ||
3225 | iocb->private = NULL; | ||
3226 | } | ||
3227 | } | ||
3228 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3229 | EXT4_STATE_DIO_UNWRITTEN)) { | 3220 | EXT4_STATE_DIO_UNWRITTEN)) { |
3230 | int err; | 3221 | int err; |
3231 | /* | 3222 | /* |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b1ed9e07434b..def84082a9a9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2105,7 +2105,11 @@ repeat: | |||
2105 | group = ac->ac_g_ex.fe_group; | 2105 | group = ac->ac_g_ex.fe_group; |
2106 | 2106 | ||
2107 | for (i = 0; i < ngroups; group++, i++) { | 2107 | for (i = 0; i < ngroups; group++, i++) { |
2108 | if (group == ngroups) | 2108 | /* |
2109 | * Artificially restricted ngroups for non-extent | ||
2110 | * files makes group > ngroups possible on first loop. | ||
2111 | */ | ||
2112 | if (group >= ngroups) | ||
2109 | group = 0; | 2113 | group = 0; |
2110 | 2114 | ||
2111 | /* This now checks without needing the buddy page */ | 2115 | /* This now checks without needing the buddy page */ |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 19599bded62a..4acf1f78881b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -62,28 +62,15 @@ void ext4_ioend_shutdown(struct inode *inode) | |||
62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); |
63 | } | 63 | } |
64 | 64 | ||
65 | static void ext4_release_io_end(ext4_io_end_t *io_end) | 65 | void ext4_free_io_end(ext4_io_end_t *io) |
66 | { | 66 | { |
67 | BUG_ON(!list_empty(&io_end->list)); | 67 | BUG_ON(!io); |
68 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | 68 | BUG_ON(!list_empty(&io->list)); |
69 | 69 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | |
70 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) | ||
71 | wake_up_all(ext4_ioend_wq(io_end->inode)); | ||
72 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
73 | inode_dio_done(io_end->inode); | ||
74 | if (io_end->iocb) | ||
75 | aio_complete(io_end->iocb, io_end->result, 0); | ||
76 | kmem_cache_free(io_end_cachep, io_end); | ||
77 | } | ||
78 | |||
79 | static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) | ||
80 | { | ||
81 | struct inode *inode = io_end->inode; | ||
82 | 70 | ||
83 | io_end->flag &= ~EXT4_IO_END_UNWRITTEN; | 71 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) |
84 | /* Wake up anyone waiting on unwritten extent conversion */ | 72 | wake_up_all(ext4_ioend_wq(io->inode)); |
85 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | 73 | kmem_cache_free(io_end_cachep, io); |
86 | wake_up_all(ext4_ioend_wq(inode)); | ||
87 | } | 74 | } |
88 | 75 | ||
89 | /* check a range of space and convert unwritten extents to written. */ | 76 | /* check a range of space and convert unwritten extents to written. */ |
@@ -106,8 +93,13 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
106 | "(inode %lu, offset %llu, size %zd, error %d)", | 93 | "(inode %lu, offset %llu, size %zd, error %d)", |
107 | inode->i_ino, offset, size, ret); | 94 | inode->i_ino, offset, size, ret); |
108 | } | 95 | } |
109 | ext4_clear_io_unwritten_flag(io); | 96 | /* Wake up anyone waiting on unwritten extent conversion */ |
110 | ext4_release_io_end(io); | 97 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) |
98 | wake_up_all(ext4_ioend_wq(inode)); | ||
99 | if (io->flag & EXT4_IO_END_DIRECT) | ||
100 | inode_dio_done(inode); | ||
101 | if (io->iocb) | ||
102 | aio_complete(io->iocb, io->result, 0); | ||
111 | return ret; | 103 | return ret; |
112 | } | 104 | } |
113 | 105 | ||
@@ -138,7 +130,7 @@ static void dump_completed_IO(struct inode *inode) | |||
138 | } | 130 | } |
139 | 131 | ||
140 | /* Add the io_end to per-inode completed end_io list. */ | 132 | /* Add the io_end to per-inode completed end_io list. */ |
141 | static void ext4_add_complete_io(ext4_io_end_t *io_end) | 133 | void ext4_add_complete_io(ext4_io_end_t *io_end) |
142 | { | 134 | { |
143 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); | 135 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
144 | struct workqueue_struct *wq; | 136 | struct workqueue_struct *wq; |
@@ -175,6 +167,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
175 | err = ext4_end_io(io); | 167 | err = ext4_end_io(io); |
176 | if (unlikely(!ret && err)) | 168 | if (unlikely(!ret && err)) |
177 | ret = err; | 169 | ret = err; |
170 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
171 | ext4_free_io_end(io); | ||
178 | } | 172 | } |
179 | return ret; | 173 | return ret; |
180 | } | 174 | } |
@@ -206,43 +200,10 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
206 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | 200 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
207 | io->inode = inode; | 201 | io->inode = inode; |
208 | INIT_LIST_HEAD(&io->list); | 202 | INIT_LIST_HEAD(&io->list); |
209 | atomic_set(&io->count, 1); | ||
210 | } | 203 | } |
211 | return io; | 204 | return io; |
212 | } | 205 | } |
213 | 206 | ||
214 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) | ||
215 | { | ||
216 | if (atomic_dec_and_test(&io_end->count)) { | ||
217 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { | ||
218 | ext4_release_io_end(io_end); | ||
219 | return; | ||
220 | } | ||
221 | ext4_add_complete_io(io_end); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | int ext4_put_io_end(ext4_io_end_t *io_end) | ||
226 | { | ||
227 | int err = 0; | ||
228 | |||
229 | if (atomic_dec_and_test(&io_end->count)) { | ||
230 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | ||
231 | err = ext4_convert_unwritten_extents(io_end->inode, | ||
232 | io_end->offset, io_end->size); | ||
233 | ext4_clear_io_unwritten_flag(io_end); | ||
234 | } | ||
235 | ext4_release_io_end(io_end); | ||
236 | } | ||
237 | return err; | ||
238 | } | ||
239 | |||
240 | ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) | ||
241 | { | ||
242 | atomic_inc(&io_end->count); | ||
243 | return io_end; | ||
244 | } | ||
245 | |||
246 | /* | 207 | /* |
247 | * Print an buffer I/O error compatible with the fs/buffer.c. This | 208 | * Print an buffer I/O error compatible with the fs/buffer.c. This |
248 | * provides compatibility with dmesg scrapers that look for a specific | 209 | * provides compatibility with dmesg scrapers that look for a specific |
@@ -325,7 +286,12 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
325 | bi_sector >> (inode->i_blkbits - 9)); | 286 | bi_sector >> (inode->i_blkbits - 9)); |
326 | } | 287 | } |
327 | 288 | ||
328 | ext4_put_io_end_defer(io_end); | 289 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
290 | ext4_free_io_end(io_end); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | ext4_add_complete_io(io_end); | ||
329 | } | 295 | } |
330 | 296 | ||
331 | void ext4_io_submit(struct ext4_io_submit *io) | 297 | void ext4_io_submit(struct ext4_io_submit *io) |
@@ -339,37 +305,40 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
339 | bio_put(io->io_bio); | 305 | bio_put(io->io_bio); |
340 | } | 306 | } |
341 | io->io_bio = NULL; | 307 | io->io_bio = NULL; |
342 | } | 308 | io->io_op = 0; |
343 | |||
344 | void ext4_io_submit_init(struct ext4_io_submit *io, | ||
345 | struct writeback_control *wbc) | ||
346 | { | ||
347 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
348 | io->io_bio = NULL; | ||
349 | io->io_end = NULL; | 309 | io->io_end = NULL; |
350 | } | 310 | } |
351 | 311 | ||
352 | static int io_submit_init_bio(struct ext4_io_submit *io, | 312 | static int io_submit_init(struct ext4_io_submit *io, |
353 | struct buffer_head *bh) | 313 | struct inode *inode, |
314 | struct writeback_control *wbc, | ||
315 | struct buffer_head *bh) | ||
354 | { | 316 | { |
317 | ext4_io_end_t *io_end; | ||
318 | struct page *page = bh->b_page; | ||
355 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 319 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
356 | struct bio *bio; | 320 | struct bio *bio; |
357 | 321 | ||
322 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
323 | if (!io_end) | ||
324 | return -ENOMEM; | ||
358 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); | 325 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
359 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 326 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
360 | bio->bi_bdev = bh->b_bdev; | 327 | bio->bi_bdev = bh->b_bdev; |
328 | bio->bi_private = io->io_end = io_end; | ||
361 | bio->bi_end_io = ext4_end_bio; | 329 | bio->bi_end_io = ext4_end_bio; |
362 | bio->bi_private = ext4_get_io_end(io->io_end); | 330 | |
363 | if (!io->io_end->size) | 331 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); |
364 | io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) | 332 | |
365 | + bh_offset(bh); | ||
366 | io->io_bio = bio; | 333 | io->io_bio = bio; |
334 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
367 | io->io_next_block = bh->b_blocknr; | 335 | io->io_next_block = bh->b_blocknr; |
368 | return 0; | 336 | return 0; |
369 | } | 337 | } |
370 | 338 | ||
371 | static int io_submit_add_bh(struct ext4_io_submit *io, | 339 | static int io_submit_add_bh(struct ext4_io_submit *io, |
372 | struct inode *inode, | 340 | struct inode *inode, |
341 | struct writeback_control *wbc, | ||
373 | struct buffer_head *bh) | 342 | struct buffer_head *bh) |
374 | { | 343 | { |
375 | ext4_io_end_t *io_end; | 344 | ext4_io_end_t *io_end; |
@@ -380,18 +349,18 @@ submit_and_retry: | |||
380 | ext4_io_submit(io); | 349 | ext4_io_submit(io); |
381 | } | 350 | } |
382 | if (io->io_bio == NULL) { | 351 | if (io->io_bio == NULL) { |
383 | ret = io_submit_init_bio(io, bh); | 352 | ret = io_submit_init(io, inode, wbc, bh); |
384 | if (ret) | 353 | if (ret) |
385 | return ret; | 354 | return ret; |
386 | } | 355 | } |
387 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
388 | if (ret != bh->b_size) | ||
389 | goto submit_and_retry; | ||
390 | io_end = io->io_end; | 356 | io_end = io->io_end; |
391 | if (test_clear_buffer_uninit(bh)) | 357 | if (test_clear_buffer_uninit(bh)) |
392 | ext4_set_io_unwritten_flag(inode, io_end); | 358 | ext4_set_io_unwritten_flag(inode, io_end); |
393 | io_end->size += bh->b_size; | 359 | io->io_end->size += bh->b_size; |
394 | io->io_next_block++; | 360 | io->io_next_block++; |
361 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
362 | if (ret != bh->b_size) | ||
363 | goto submit_and_retry; | ||
395 | return 0; | 364 | return 0; |
396 | } | 365 | } |
397 | 366 | ||
@@ -463,7 +432,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
463 | do { | 432 | do { |
464 | if (!buffer_async_write(bh)) | 433 | if (!buffer_async_write(bh)) |
465 | continue; | 434 | continue; |
466 | ret = io_submit_add_bh(io, inode, bh); | 435 | ret = io_submit_add_bh(io, inode, wbc, bh); |
467 | if (ret) { | 436 | if (ret) { |
468 | /* | 437 | /* |
469 | * We only get here on ENOMEM. Not much else | 438 | * We only get here on ENOMEM. Not much else |
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h index 13a3da25ff07..98cd41bb39c8 100644 --- a/include/linux/journal-head.h +++ b/include/linux/journal-head.h | |||
@@ -30,15 +30,19 @@ struct journal_head { | |||
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Journalling list for this buffer [jbd_lock_bh_state()] | 32 | * Journalling list for this buffer [jbd_lock_bh_state()] |
33 | * NOTE: We *cannot* combine this with b_modified into a bitfield | ||
34 | * as gcc would then (which the C standard allows but which is | ||
35 | * very unuseful) make 64-bit accesses to the bitfield and clobber | ||
36 | * b_jcount if its update races with bitfield modification. | ||
33 | */ | 37 | */ |
34 | unsigned b_jlist:4; | 38 | unsigned b_jlist; |
35 | 39 | ||
36 | /* | 40 | /* |
37 | * This flag signals the buffer has been modified by | 41 | * This flag signals the buffer has been modified by |
38 | * the currently running transaction | 42 | * the currently running transaction |
39 | * [jbd_lock_bh_state()] | 43 | * [jbd_lock_bh_state()] |
40 | */ | 44 | */ |
41 | unsigned b_modified:1; | 45 | unsigned b_modified; |
42 | 46 | ||
43 | /* | 47 | /* |
44 | * Copy of the buffer data frozen for writing to the log. | 48 | * Copy of the buffer data frozen for writing to the log. |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d0e686402df8..8ee15b97cd38 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -2139,7 +2139,7 @@ TRACE_EVENT(ext4_es_remove_extent, | |||
2139 | __entry->lblk, __entry->len) | 2139 | __entry->lblk, __entry->len) |
2140 | ); | 2140 | ); |
2141 | 2141 | ||
2142 | TRACE_EVENT(ext4_es_find_delayed_extent_enter, | 2142 | TRACE_EVENT(ext4_es_find_delayed_extent_range_enter, |
2143 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk), | 2143 | TP_PROTO(struct inode *inode, ext4_lblk_t lblk), |
2144 | 2144 | ||
2145 | TP_ARGS(inode, lblk), | 2145 | TP_ARGS(inode, lblk), |
@@ -2161,7 +2161,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_enter, | |||
2161 | (unsigned long) __entry->ino, __entry->lblk) | 2161 | (unsigned long) __entry->ino, __entry->lblk) |
2162 | ); | 2162 | ); |
2163 | 2163 | ||
2164 | TRACE_EVENT(ext4_es_find_delayed_extent_exit, | 2164 | TRACE_EVENT(ext4_es_find_delayed_extent_range_exit, |
2165 | TP_PROTO(struct inode *inode, struct extent_status *es), | 2165 | TP_PROTO(struct inode *inode, struct extent_status *es), |
2166 | 2166 | ||
2167 | TP_ARGS(inode, es), | 2167 | TP_ARGS(inode, es), |