diff options
author | Jan Kara <jack@suse.cz> | 2013-06-04 13:21:11 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-06-04 13:21:11 -0400 |
commit | 6b523df4fb5ae281ddbc817f40504b33e6226554 (patch) | |
tree | f129ccf336689296ff21e34ed86712b25d437a65 /fs/ext4 | |
parent | 3613d22807a2616e9346800bacd88aa8bbbefcd7 (diff) |
ext4: use transaction reservation for extent conversion in ext4_end_io
Later we would like to clear PageWriteback bit only after extent
conversion from unwritten to written extents is performed. However it
is not possible to start a transaction after PageWriteback is set
because that violates lock ordering (and is easy to deadlock). So we
have to reserve a transaction before locking pages and sending them
for IO and later we use the transaction for extent conversion from
ext4_end_io().
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 12 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 5 | ||||
-rw-r--r-- | fs/ext4/extents.c | 40 | ||||
-rw-r--r-- | fs/ext4/inode.c | 25 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 11 |
5 files changed, 69 insertions, 24 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0a9b729f991b..8de219b758fb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -184,10 +184,13 @@ struct ext4_map_blocks { | |||
184 | #define EXT4_IO_END_DIRECT 0x0004 | 184 | #define EXT4_IO_END_DIRECT 0x0004 |
185 | 185 | ||
186 | /* | 186 | /* |
187 | * For converting uninitialized extents on a work queue. | 187 | * For converting uninitialized extents on a work queue. 'handle' is used for |
188 | * buffered writeback. | ||
188 | */ | 189 | */ |
189 | typedef struct ext4_io_end { | 190 | typedef struct ext4_io_end { |
190 | struct list_head list; /* per-file finished IO list */ | 191 | struct list_head list; /* per-file finished IO list */ |
192 | handle_t *handle; /* handle reserved for extent | ||
193 | * conversion */ | ||
191 | struct inode *inode; /* file being written to */ | 194 | struct inode *inode; /* file being written to */ |
192 | unsigned int flag; /* unwritten or not */ | 195 | unsigned int flag; /* unwritten or not */ |
193 | loff_t offset; /* offset in the file */ | 196 | loff_t offset; /* offset in the file */ |
@@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
1322 | struct ext4_io_end *io_end) | 1325 | struct ext4_io_end *io_end) |
1323 | { | 1326 | { |
1324 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1327 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
1328 | /* Writeback has to have coversion transaction reserved */ | ||
1329 | WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && | ||
1330 | !(io_end->flag & EXT4_IO_END_DIRECT)); | ||
1325 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1331 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
1326 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1332 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
1327 | } | 1333 | } |
@@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *); | |||
2591 | extern void ext4_ext_release(struct super_block *); | 2597 | extern void ext4_ext_release(struct super_block *); |
2592 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2598 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
2593 | loff_t len); | 2599 | loff_t len); |
2594 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2600 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
2595 | ssize_t len); | 2601 | loff_t offset, ssize_t len); |
2596 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2602 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
2597 | struct ext4_map_blocks *map, int flags); | 2603 | struct ext4_map_blocks *map, int flags); |
2598 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 2604 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index fdd865eb1879..2877258d9497 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) | |||
134 | #define EXT4_HT_MIGRATE 8 | 134 | #define EXT4_HT_MIGRATE 8 |
135 | #define EXT4_HT_MOVE_EXTENTS 9 | 135 | #define EXT4_HT_MOVE_EXTENTS 9 |
136 | #define EXT4_HT_XATTR 10 | 136 | #define EXT4_HT_XATTR 10 |
137 | #define EXT4_HT_MAX 11 | 137 | #define EXT4_HT_EXT_CONVERT 11 |
138 | #define EXT4_HT_MAX 12 | ||
138 | 139 | ||
139 | /** | 140 | /** |
140 | * struct ext4_journal_cb_entry - Base structure for callback information. | 141 | * struct ext4_journal_cb_entry - Base structure for callback information. |
@@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode, | |||
319 | #define ext4_journal_stop(handle) \ | 320 | #define ext4_journal_stop(handle) \ |
320 | __ext4_journal_stop(__func__, __LINE__, (handle)) | 321 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
321 | 322 | ||
322 | #define ext4_journal_start_reserve(handle, type) \ | 323 | #define ext4_journal_start_reserved(handle, type) \ |
323 | __ext4_journal_start_reserved((handle), __LINE__, (type)) | 324 | __ext4_journal_start_reserved((handle), __LINE__, (type)) |
324 | 325 | ||
325 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | 326 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94283d06cace..208f664f9ee0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -4566,10 +4566,9 @@ retry: | |||
4566 | * function, to convert the fallocated extents after IO is completed. | 4566 | * function, to convert the fallocated extents after IO is completed. |
4567 | * Returns 0 on success. | 4567 | * Returns 0 on success. |
4568 | */ | 4568 | */ |
4569 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 4569 | int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
4570 | ssize_t len) | 4570 | loff_t offset, ssize_t len) |
4571 | { | 4571 | { |
4572 | handle_t *handle; | ||
4573 | unsigned int max_blocks; | 4572 | unsigned int max_blocks; |
4574 | int ret = 0; | 4573 | int ret = 0; |
4575 | int ret2 = 0; | 4574 | int ret2 = 0; |
@@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4584 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - | 4583 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
4585 | map.m_lblk); | 4584 | map.m_lblk); |
4586 | /* | 4585 | /* |
4587 | * credits to insert 1 extent into extent tree | 4586 | * This is somewhat ugly but the idea is clear: When transaction is |
4587 | * reserved, everything goes into it. Otherwise we rather start several | ||
4588 | * smaller transactions for conversion of each extent separately. | ||
4588 | */ | 4589 | */ |
4589 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 4590 | if (handle) { |
4591 | handle = ext4_journal_start_reserved(handle, | ||
4592 | EXT4_HT_EXT_CONVERT); | ||
4593 | if (IS_ERR(handle)) | ||
4594 | return PTR_ERR(handle); | ||
4595 | credits = 0; | ||
4596 | } else { | ||
4597 | /* | ||
4598 | * credits to insert 1 extent into extent tree | ||
4599 | */ | ||
4600 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
4601 | } | ||
4590 | while (ret >= 0 && ret < max_blocks) { | 4602 | while (ret >= 0 && ret < max_blocks) { |
4591 | map.m_lblk += ret; | 4603 | map.m_lblk += ret; |
4592 | map.m_len = (max_blocks -= ret); | 4604 | map.m_len = (max_blocks -= ret); |
4593 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | 4605 | if (credits) { |
4594 | if (IS_ERR(handle)) { | 4606 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
4595 | ret = PTR_ERR(handle); | 4607 | credits); |
4596 | break; | 4608 | if (IS_ERR(handle)) { |
4609 | ret = PTR_ERR(handle); | ||
4610 | break; | ||
4611 | } | ||
4597 | } | 4612 | } |
4598 | ret = ext4_map_blocks(handle, inode, &map, | 4613 | ret = ext4_map_blocks(handle, inode, &map, |
4599 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4614 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
@@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
4604 | inode->i_ino, map.m_lblk, | 4619 | inode->i_ino, map.m_lblk, |
4605 | map.m_len, ret); | 4620 | map.m_len, ret); |
4606 | ext4_mark_inode_dirty(handle, inode); | 4621 | ext4_mark_inode_dirty(handle, inode); |
4607 | ret2 = ext4_journal_stop(handle); | 4622 | if (credits) |
4608 | if (ret <= 0 || ret2 ) | 4623 | ret2 = ext4_journal_stop(handle); |
4624 | if (ret <= 0 || ret2) | ||
4609 | break; | 4625 | break; |
4610 | } | 4626 | } |
4627 | if (!credits) | ||
4628 | ret2 = ext4_journal_stop(handle); | ||
4611 | return ret > 0 ? ret2 : ret; | 4629 | return ret > 0 ? ret2 : ret; |
4612 | } | 4630 | } |
4613 | 4631 | ||
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 736d164dc2ba..510dba785db4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1410 | struct mpage_da_data { | 1410 | struct mpage_da_data { |
1411 | struct inode *inode; | 1411 | struct inode *inode; |
1412 | struct writeback_control *wbc; | 1412 | struct writeback_control *wbc; |
1413 | |||
1413 | pgoff_t first_page; /* The first page to write */ | 1414 | pgoff_t first_page; /* The first page to write */ |
1414 | pgoff_t next_page; /* Current page to examine */ | 1415 | pgoff_t next_page; /* Current page to examine */ |
1415 | pgoff_t last_page; /* Last page to examine */ | 1416 | pgoff_t last_page; /* Last page to examine */ |
@@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | |||
2108 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); | 2109 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); |
2109 | if (err < 0) | 2110 | if (err < 0) |
2110 | return err; | 2111 | return err; |
2111 | if (map->m_flags & EXT4_MAP_UNINIT) | 2112 | if (map->m_flags & EXT4_MAP_UNINIT) { |
2113 | if (!mpd->io_submit.io_end->handle && | ||
2114 | ext4_handle_valid(handle)) { | ||
2115 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; | ||
2116 | handle->h_rsv_handle = NULL; | ||
2117 | } | ||
2112 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); | 2118 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); |
2119 | } | ||
2113 | 2120 | ||
2114 | BUG_ON(map->m_len == 0); | 2121 | BUG_ON(map->m_len == 0); |
2115 | if (map->m_flags & EXT4_MAP_NEW) { | 2122 | if (map->m_flags & EXT4_MAP_NEW) { |
@@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2351 | handle_t *handle = NULL; | 2358 | handle_t *handle = NULL; |
2352 | struct mpage_da_data mpd; | 2359 | struct mpage_da_data mpd; |
2353 | struct inode *inode = mapping->host; | 2360 | struct inode *inode = mapping->host; |
2354 | int needed_blocks, ret = 0; | 2361 | int needed_blocks, rsv_blocks = 0, ret = 0; |
2355 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2362 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2356 | bool done; | 2363 | bool done; |
2357 | struct blk_plug plug; | 2364 | struct blk_plug plug; |
@@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2379 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2386 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2380 | return -EROFS; | 2387 | return -EROFS; |
2381 | 2388 | ||
2389 | if (ext4_should_dioread_nolock(inode)) { | ||
2390 | /* | ||
2391 | * We may need to convert upto one extent per block in | ||
2392 | * the page and we may dirty the inode. | ||
2393 | */ | ||
2394 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | ||
2395 | } | ||
2396 | |||
2382 | /* | 2397 | /* |
2383 | * If we have inline data and arrive here, it means that | 2398 | * If we have inline data and arrive here, it means that |
2384 | * we will soon create the block for the 1st page, so | 2399 | * we will soon create the block for the 1st page, so |
@@ -2438,8 +2453,8 @@ retry: | |||
2438 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2453 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2439 | 2454 | ||
2440 | /* start a new transaction */ | 2455 | /* start a new transaction */ |
2441 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2456 | handle = ext4_journal_start_with_reserve(inode, |
2442 | needed_blocks); | 2457 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); |
2443 | if (IS_ERR(handle)) { | 2458 | if (IS_ERR(handle)) { |
2444 | ret = PTR_ERR(handle); | 2459 | ret = PTR_ERR(handle); |
2445 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2460 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
@@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3120 | * for non AIO case, since the IO is already | 3135 | * for non AIO case, since the IO is already |
3121 | * completed, we could do the conversion right here | 3136 | * completed, we could do the conversion right here |
3122 | */ | 3137 | */ |
3123 | err = ext4_convert_unwritten_extents(inode, | 3138 | err = ext4_convert_unwritten_extents(NULL, inode, |
3124 | offset, ret); | 3139 | offset, ret); |
3125 | if (err < 0) | 3140 | if (err < 0) |
3126 | ret = err; | 3141 | ret = err; |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index de6860c7836e..5f20bc481041 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) | |||
66 | { | 66 | { |
67 | BUG_ON(!list_empty(&io_end->list)); | 67 | BUG_ON(!list_empty(&io_end->list)); |
68 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | 68 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); |
69 | WARN_ON(io_end->handle); | ||
69 | 70 | ||
70 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) | 71 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) |
71 | wake_up_all(ext4_ioend_wq(io_end->inode)); | 72 | wake_up_all(ext4_ioend_wq(io_end->inode)); |
@@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
92 | struct inode *inode = io->inode; | 93 | struct inode *inode = io->inode; |
93 | loff_t offset = io->offset; | 94 | loff_t offset = io->offset; |
94 | ssize_t size = io->size; | 95 | ssize_t size = io->size; |
96 | handle_t *handle = io->handle; | ||
95 | int ret = 0; | 97 | int ret = 0; |
96 | 98 | ||
97 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 99 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
98 | "list->prev 0x%p\n", | 100 | "list->prev 0x%p\n", |
99 | io, inode->i_ino, io->list.next, io->list.prev); | 101 | io, inode->i_ino, io->list.next, io->list.prev); |
100 | 102 | ||
101 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 103 | io->handle = NULL; /* Following call will use up the handle */ |
104 | ret = ext4_convert_unwritten_extents(handle, inode, offset, size); | ||
102 | if (ret < 0) { | 105 | if (ret < 0) { |
103 | ext4_msg(inode->i_sb, KERN_EMERG, | 106 | ext4_msg(inode->i_sb, KERN_EMERG, |
104 | "failed to convert unwritten extents to written " | 107 | "failed to convert unwritten extents to written " |
@@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end) | |||
228 | 231 | ||
229 | if (atomic_dec_and_test(&io_end->count)) { | 232 | if (atomic_dec_and_test(&io_end->count)) { |
230 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | 233 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { |
231 | err = ext4_convert_unwritten_extents(io_end->inode, | 234 | err = ext4_convert_unwritten_extents(io_end->handle, |
232 | io_end->offset, io_end->size); | 235 | io_end->inode, io_end->offset, |
236 | io_end->size); | ||
237 | io_end->handle = NULL; | ||
233 | ext4_clear_io_unwritten_flag(io_end); | 238 | ext4_clear_io_unwritten_flag(io_end); |
234 | } | 239 | } |
235 | ext4_release_io_end(io_end); | 240 | ext4_release_io_end(io_end); |