aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-06-04 13:21:11 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-06-04 13:21:11 -0400
commit6b523df4fb5ae281ddbc817f40504b33e6226554 (patch)
treef129ccf336689296ff21e34ed86712b25d437a65 /fs/ext4
parent3613d22807a2616e9346800bacd88aa8bbbefcd7 (diff)
ext4: use transaction reservation for extent conversion in ext4_end_io
Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/ext4_jbd2.h5
-rw-r--r--fs/ext4/extents.c40
-rw-r--r--fs/ext4/inode.c25
-rw-r--r--fs/ext4/page-io.c11
5 files changed, 69 insertions, 24 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0a9b729f991b..8de219b758fb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -184,10 +184,13 @@ struct ext4_map_blocks {
184#define EXT4_IO_END_DIRECT 0x0004 184#define EXT4_IO_END_DIRECT 0x0004
185 185
186/* 186/*
187 * For converting uninitialized extents on a work queue. 187 * For converting uninitialized extents on a work queue. 'handle' is used for
188 * buffered writeback.
188 */ 189 */
189typedef struct ext4_io_end { 190typedef struct ext4_io_end {
190 struct list_head list; /* per-file finished IO list */ 191 struct list_head list; /* per-file finished IO list */
192 handle_t *handle; /* handle reserved for extent
193 * conversion */
191 struct inode *inode; /* file being written to */ 194 struct inode *inode; /* file being written to */
192 unsigned int flag; /* unwritten or not */ 195 unsigned int flag; /* unwritten or not */
193 loff_t offset; /* offset in the file */ 196 loff_t offset; /* offset in the file */
@@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1322 struct ext4_io_end *io_end) 1325 struct ext4_io_end *io_end)
1323{ 1326{
1324 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 1327 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1328 /* Writeback has to have coversion transaction reserved */
1329 WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
1330 !(io_end->flag & EXT4_IO_END_DIRECT));
1325 io_end->flag |= EXT4_IO_END_UNWRITTEN; 1331 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1326 atomic_inc(&EXT4_I(inode)->i_unwritten); 1332 atomic_inc(&EXT4_I(inode)->i_unwritten);
1327 } 1333 }
@@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *);
2591extern void ext4_ext_release(struct super_block *); 2597extern void ext4_ext_release(struct super_block *);
2592extern long ext4_fallocate(struct file *file, int mode, loff_t offset, 2598extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
2593 loff_t len); 2599 loff_t len);
2594extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 2600extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
2595 ssize_t len); 2601 loff_t offset, ssize_t len);
2596extern int ext4_map_blocks(handle_t *handle, struct inode *inode, 2602extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
2597 struct ext4_map_blocks *map, int flags); 2603 struct ext4_map_blocks *map, int flags);
2598extern int ext4_ext_calc_metadata_amount(struct inode *inode, 2604extern int ext4_ext_calc_metadata_amount(struct inode *inode,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index fdd865eb1879..2877258d9497 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
134#define EXT4_HT_MIGRATE 8 134#define EXT4_HT_MIGRATE 8
135#define EXT4_HT_MOVE_EXTENTS 9 135#define EXT4_HT_MOVE_EXTENTS 9
136#define EXT4_HT_XATTR 10 136#define EXT4_HT_XATTR 10
137#define EXT4_HT_MAX 11 137#define EXT4_HT_EXT_CONVERT 11
138#define EXT4_HT_MAX 12
138 139
139/** 140/**
140 * struct ext4_journal_cb_entry - Base structure for callback information. 141 * struct ext4_journal_cb_entry - Base structure for callback information.
@@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
319#define ext4_journal_stop(handle) \ 320#define ext4_journal_stop(handle) \
320 __ext4_journal_stop(__func__, __LINE__, (handle)) 321 __ext4_journal_stop(__func__, __LINE__, (handle))
321 322
322#define ext4_journal_start_reserve(handle, type) \ 323#define ext4_journal_start_reserved(handle, type) \
323 __ext4_journal_start_reserved((handle), __LINE__, (type)) 324 __ext4_journal_start_reserved((handle), __LINE__, (type))
324 325
325handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, 326handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 94283d06cace..208f664f9ee0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4566,10 +4566,9 @@ retry:
4566 * function, to convert the fallocated extents after IO is completed. 4566 * function, to convert the fallocated extents after IO is completed.
4567 * Returns 0 on success. 4567 * Returns 0 on success.
4568 */ 4568 */
4569int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 4569int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4570 ssize_t len) 4570 loff_t offset, ssize_t len)
4571{ 4571{
4572 handle_t *handle;
4573 unsigned int max_blocks; 4572 unsigned int max_blocks;
4574 int ret = 0; 4573 int ret = 0;
4575 int ret2 = 0; 4574 int ret2 = 0;
@@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4584 max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - 4583 max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
4585 map.m_lblk); 4584 map.m_lblk);
4586 /* 4585 /*
4587 * credits to insert 1 extent into extent tree 4586 * This is somewhat ugly but the idea is clear: When transaction is
4587 * reserved, everything goes into it. Otherwise we rather start several
4588 * smaller transactions for conversion of each extent separately.
4588 */ 4589 */
4589 credits = ext4_chunk_trans_blocks(inode, max_blocks); 4590 if (handle) {
4591 handle = ext4_journal_start_reserved(handle,
4592 EXT4_HT_EXT_CONVERT);
4593 if (IS_ERR(handle))
4594 return PTR_ERR(handle);
4595 credits = 0;
4596 } else {
4597 /*
4598 * credits to insert 1 extent into extent tree
4599 */
4600 credits = ext4_chunk_trans_blocks(inode, max_blocks);
4601 }
4590 while (ret >= 0 && ret < max_blocks) { 4602 while (ret >= 0 && ret < max_blocks) {
4591 map.m_lblk += ret; 4603 map.m_lblk += ret;
4592 map.m_len = (max_blocks -= ret); 4604 map.m_len = (max_blocks -= ret);
4593 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); 4605 if (credits) {
4594 if (IS_ERR(handle)) { 4606 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4595 ret = PTR_ERR(handle); 4607 credits);
4596 break; 4608 if (IS_ERR(handle)) {
4609 ret = PTR_ERR(handle);
4610 break;
4611 }
4597 } 4612 }
4598 ret = ext4_map_blocks(handle, inode, &map, 4613 ret = ext4_map_blocks(handle, inode, &map,
4599 EXT4_GET_BLOCKS_IO_CONVERT_EXT); 4614 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
@@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4604 inode->i_ino, map.m_lblk, 4619 inode->i_ino, map.m_lblk,
4605 map.m_len, ret); 4620 map.m_len, ret);
4606 ext4_mark_inode_dirty(handle, inode); 4621 ext4_mark_inode_dirty(handle, inode);
4607 ret2 = ext4_journal_stop(handle); 4622 if (credits)
4608 if (ret <= 0 || ret2 ) 4623 ret2 = ext4_journal_stop(handle);
4624 if (ret <= 0 || ret2)
4609 break; 4625 break;
4610 } 4626 }
4627 if (!credits)
4628 ret2 = ext4_journal_stop(handle);
4611 return ret > 0 ? ret2 : ret; 4629 return ret > 0 ? ret2 : ret;
4612} 4630}
4613 4631
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 736d164dc2ba..510dba785db4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1410struct mpage_da_data { 1410struct mpage_da_data {
1411 struct inode *inode; 1411 struct inode *inode;
1412 struct writeback_control *wbc; 1412 struct writeback_control *wbc;
1413
1413 pgoff_t first_page; /* The first page to write */ 1414 pgoff_t first_page; /* The first page to write */
1414 pgoff_t next_page; /* Current page to examine */ 1415 pgoff_t next_page; /* Current page to examine */
1415 pgoff_t last_page; /* Last page to examine */ 1416 pgoff_t last_page; /* Last page to examine */
@@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2108 err = ext4_map_blocks(handle, inode, map, get_blocks_flags); 2109 err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
2109 if (err < 0) 2110 if (err < 0)
2110 return err; 2111 return err;
2111 if (map->m_flags & EXT4_MAP_UNINIT) 2112 if (map->m_flags & EXT4_MAP_UNINIT) {
2113 if (!mpd->io_submit.io_end->handle &&
2114 ext4_handle_valid(handle)) {
2115 mpd->io_submit.io_end->handle = handle->h_rsv_handle;
2116 handle->h_rsv_handle = NULL;
2117 }
2112 ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); 2118 ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
2119 }
2113 2120
2114 BUG_ON(map->m_len == 0); 2121 BUG_ON(map->m_len == 0);
2115 if (map->m_flags & EXT4_MAP_NEW) { 2122 if (map->m_flags & EXT4_MAP_NEW) {
@@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2351 handle_t *handle = NULL; 2358 handle_t *handle = NULL;
2352 struct mpage_da_data mpd; 2359 struct mpage_da_data mpd;
2353 struct inode *inode = mapping->host; 2360 struct inode *inode = mapping->host;
2354 int needed_blocks, ret = 0; 2361 int needed_blocks, rsv_blocks = 0, ret = 0;
2355 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2362 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2356 bool done; 2363 bool done;
2357 struct blk_plug plug; 2364 struct blk_plug plug;
@@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping,
2379 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) 2386 if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
2380 return -EROFS; 2387 return -EROFS;
2381 2388
2389 if (ext4_should_dioread_nolock(inode)) {
2390 /*
2391 * We may need to convert upto one extent per block in
2392 * the page and we may dirty the inode.
2393 */
2394 rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
2395 }
2396
2382 /* 2397 /*
2383 * If we have inline data and arrive here, it means that 2398 * If we have inline data and arrive here, it means that
2384 * we will soon create the block for the 1st page, so 2399 * we will soon create the block for the 1st page, so
@@ -2438,8 +2453,8 @@ retry:
2438 needed_blocks = ext4_da_writepages_trans_blocks(inode); 2453 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2439 2454
2440 /* start a new transaction */ 2455 /* start a new transaction */
2441 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 2456 handle = ext4_journal_start_with_reserve(inode,
2442 needed_blocks); 2457 EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
2443 if (IS_ERR(handle)) { 2458 if (IS_ERR(handle)) {
2444 ret = PTR_ERR(handle); 2459 ret = PTR_ERR(handle);
2445 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2460 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3120 * for non AIO case, since the IO is already 3135 * for non AIO case, since the IO is already
3121 * completed, we could do the conversion right here 3136 * completed, we could do the conversion right here
3122 */ 3137 */
3123 err = ext4_convert_unwritten_extents(inode, 3138 err = ext4_convert_unwritten_extents(NULL, inode,
3124 offset, ret); 3139 offset, ret);
3125 if (err < 0) 3140 if (err < 0)
3126 ret = err; 3141 ret = err;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index de6860c7836e..5f20bc481041 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
66{ 66{
67 BUG_ON(!list_empty(&io_end->list)); 67 BUG_ON(!list_empty(&io_end->list));
68 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 68 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
69 WARN_ON(io_end->handle);
69 70
70 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) 71 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
71 wake_up_all(ext4_ioend_wq(io_end->inode)); 72 wake_up_all(ext4_ioend_wq(io_end->inode));
@@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io)
92 struct inode *inode = io->inode; 93 struct inode *inode = io->inode;
93 loff_t offset = io->offset; 94 loff_t offset = io->offset;
94 ssize_t size = io->size; 95 ssize_t size = io->size;
96 handle_t *handle = io->handle;
95 int ret = 0; 97 int ret = 0;
96 98
97 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," 99 ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
98 "list->prev 0x%p\n", 100 "list->prev 0x%p\n",
99 io, inode->i_ino, io->list.next, io->list.prev); 101 io, inode->i_ino, io->list.next, io->list.prev);
100 102
101 ret = ext4_convert_unwritten_extents(inode, offset, size); 103 io->handle = NULL; /* Following call will use up the handle */
104 ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
102 if (ret < 0) { 105 if (ret < 0) {
103 ext4_msg(inode->i_sb, KERN_EMERG, 106 ext4_msg(inode->i_sb, KERN_EMERG,
104 "failed to convert unwritten extents to written " 107 "failed to convert unwritten extents to written "
@@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
228 231
229 if (atomic_dec_and_test(&io_end->count)) { 232 if (atomic_dec_and_test(&io_end->count)) {
230 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 233 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
231 err = ext4_convert_unwritten_extents(io_end->inode, 234 err = ext4_convert_unwritten_extents(io_end->handle,
232 io_end->offset, io_end->size); 235 io_end->inode, io_end->offset,
236 io_end->size);
237 io_end->handle = NULL;
233 ext4_clear_io_unwritten_flag(io_end); 238 ext4_clear_io_unwritten_flag(io_end);
234 } 239 }
235 ext4_release_io_end(io_end); 240 ext4_release_io_end(io_end);