diff options
author | Mingming Cao <cmm@us.ibm.com> | 2008-08-19 22:16:03 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-08-19 22:16:03 -0400 |
commit | f3bd1f3fa8ca7ec70cfd87aa94dc5e1a260901f2 (patch) | |
tree | a0f0ce2e044f8a75009e94d5f788b45a3948bbe3 /fs/ext4 | |
parent | ee12b630687d510f6f4b6d4acdc4e267fd4adeda (diff) |
ext4: journal credits reservation fixes for DIO, fallocate
DIO and fallocate credit calculation is different than writepage, as
they do start a new journal right for each call to ext4_get_blocks_wrap().
This patch uses the helper function in DIO and fallocate case, passing
a flag indicating that the modified data are contigous thus could account
less indirect/index blocks.
This patch also fixed the journal credit reservation for direct I/O
(DIO). Previously the estimated credits for DIO only was calculated for
non-extent files, which was not enough if the file is extent-based.
Also fixed was fallocate double-counting credits for modifying the the
superblock.
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/extents.c | 11 | ||||
-rw-r--r-- | fs/ext4/inode.c | 45 |
3 files changed, 30 insertions, 27 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 38e661b0ea88..295003241d3d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1073,6 +1073,7 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *); | |||
1073 | extern void ext4_set_aops(struct inode *inode); | 1073 | extern void ext4_set_aops(struct inode *inode); |
1074 | extern int ext4_writepage_trans_blocks(struct inode *); | 1074 | extern int ext4_writepage_trans_blocks(struct inode *); |
1075 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | 1075 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); |
1076 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | ||
1076 | extern int ext4_block_truncate_page(handle_t *handle, | 1077 | extern int ext4_block_truncate_page(handle_t *handle, |
1077 | struct address_space *mapping, loff_t from); | 1078 | struct address_space *mapping, loff_t from); |
1078 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1079 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5c5dd3a1d657..5596b70efa20 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1758,7 +1758,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, | |||
1758 | { | 1758 | { |
1759 | if (path) { | 1759 | if (path) { |
1760 | int depth = ext_depth(inode); | 1760 | int depth = ext_depth(inode); |
1761 | int ret; | 1761 | int ret = 0; |
1762 | 1762 | ||
1763 | /* probably there is space in leaf? */ | 1763 | /* probably there is space in leaf? */ |
1764 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) | 1764 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) |
@@ -1777,7 +1777,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, | |||
1777 | } | 1777 | } |
1778 | } | 1778 | } |
1779 | 1779 | ||
1780 | return ext4_meta_trans_blocks(inode, num, 1); | 1780 | return ext4_chunk_trans_blocks(inode, num); |
1781 | } | 1781 | } |
1782 | 1782 | ||
1783 | /* | 1783 | /* |
@@ -2810,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2810 | /* | 2810 | /* |
2811 | * probably first extent we're gonna free will be last in block | 2811 | * probably first extent we're gonna free will be last in block |
2812 | */ | 2812 | */ |
2813 | err = ext4_writepage_trans_blocks(inode) + 3; | 2813 | err = ext4_writepage_trans_blocks(inode); |
2814 | handle = ext4_journal_start(inode, err); | 2814 | handle = ext4_journal_start(inode, err); |
2815 | if (IS_ERR(handle)) | 2815 | if (IS_ERR(handle)) |
2816 | return; | 2816 | return; |
@@ -2923,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2923 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 2923 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
2924 | - block; | 2924 | - block; |
2925 | /* | 2925 | /* |
2926 | * credits to insert 1 extent into extent tree + buffers to be able to | 2926 | * credits to insert 1 extent into extent tree |
2927 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | ||
2928 | */ | 2927 | */ |
2929 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 2928 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
2930 | mutex_lock(&inode->i_mutex); | 2929 | mutex_lock(&inode->i_mutex); |
2931 | retry: | 2930 | retry: |
2932 | while (ret >= 0 && ret < max_blocks) { | 2931 | while (ret >= 0 && ret < max_blocks) { |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a27129065144..ffc95ba48859 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1044,18 +1044,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1044 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1044 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1045 | } | 1045 | } |
1046 | 1046 | ||
1047 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1048 | #define DIO_MAX_BLOCKS 4096 | ||
1049 | /* | ||
1050 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
1051 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
1052 | * For B blocks with A block pointers per block we need: | ||
1053 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
1054 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
1055 | */ | ||
1056 | #define DIO_CREDITS 25 | ||
1057 | |||
1058 | |||
1059 | /* | 1047 | /* |
1060 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1048 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, |
1061 | * and returns if the blocks are already mapped. | 1049 | * and returns if the blocks are already mapped. |
@@ -1167,19 +1155,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1167 | return retval; | 1155 | return retval; |
1168 | } | 1156 | } |
1169 | 1157 | ||
1158 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1159 | #define DIO_MAX_BLOCKS 4096 | ||
1160 | |||
1170 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1161 | static int ext4_get_block(struct inode *inode, sector_t iblock, |
1171 | struct buffer_head *bh_result, int create) | 1162 | struct buffer_head *bh_result, int create) |
1172 | { | 1163 | { |
1173 | handle_t *handle = ext4_journal_current_handle(); | 1164 | handle_t *handle = ext4_journal_current_handle(); |
1174 | int ret = 0, started = 0; | 1165 | int ret = 0, started = 0; |
1175 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 1166 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
1167 | int dio_credits; | ||
1176 | 1168 | ||
1177 | if (create && !handle) { | 1169 | if (create && !handle) { |
1178 | /* Direct IO write... */ | 1170 | /* Direct IO write... */ |
1179 | if (max_blocks > DIO_MAX_BLOCKS) | 1171 | if (max_blocks > DIO_MAX_BLOCKS) |
1180 | max_blocks = DIO_MAX_BLOCKS; | 1172 | max_blocks = DIO_MAX_BLOCKS; |
1181 | handle = ext4_journal_start(inode, DIO_CREDITS + | 1173 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
1182 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | 1174 | handle = ext4_journal_start(inode, dio_credits); |
1183 | if (IS_ERR(handle)) { | 1175 | if (IS_ERR(handle)) { |
1184 | ret = PTR_ERR(handle); | 1176 | ret = PTR_ERR(handle); |
1185 | goto out; | 1177 | goto out; |
@@ -2243,7 +2235,7 @@ static int ext4_da_writepage(struct page *page, | |||
2243 | * for DIO, writepages, and truncate | 2235 | * for DIO, writepages, and truncate |
2244 | */ | 2236 | */ |
2245 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2237 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS |
2246 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | 2238 | #define EXT4_MAX_WRITEBACK_CREDITS 25 |
2247 | 2239 | ||
2248 | static int ext4_da_writepages(struct address_space *mapping, | 2240 | static int ext4_da_writepages(struct address_space *mapping, |
2249 | struct writeback_control *wbc) | 2241 | struct writeback_control *wbc) |
@@ -4441,7 +4433,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4441 | 4433 | ||
4442 | /* | 4434 | /* |
4443 | * Calulate the total number of credits to reserve to fit | 4435 | * Calulate the total number of credits to reserve to fit |
4444 | * the modification of a single pages into a single transaction | 4436 | * the modification of a single pages into a single transaction, |
4437 | * which may include multiple chunks of block allocations. | ||
4445 | * | 4438 | * |
4446 | * This could be called via ext4_write_begin() or later | 4439 | * This could be called via ext4_write_begin() or later |
4447 | * ext4_da_writepages() in delalyed allocation case. | 4440 | * ext4_da_writepages() in delalyed allocation case. |
@@ -4449,11 +4442,6 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4449 | * In both case it's possible that we could allocating multiple | 4442 | * In both case it's possible that we could allocating multiple |
4450 | * chunks of blocks. We need to consider the worse case, when | 4443 | * chunks of blocks. We need to consider the worse case, when |
4451 | * one new block per extent. | 4444 | * one new block per extent. |
4452 | * | ||
4453 | * For Direct IO and fallocate, the journal credits reservation | ||
4454 | * is based on one single extent allocation, so they could use | ||
4455 | * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single | ||
4456 | * chunk of allocation needs. | ||
4457 | */ | 4445 | */ |
4458 | int ext4_writepage_trans_blocks(struct inode *inode) | 4446 | int ext4_writepage_trans_blocks(struct inode *inode) |
4459 | { | 4447 | { |
@@ -4467,6 +4455,21 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
4467 | ret += bpp; | 4455 | ret += bpp; |
4468 | return ret; | 4456 | return ret; |
4469 | } | 4457 | } |
4458 | |||
4459 | /* | ||
4460 | * Calculate the journal credits for a chunk of data modification. | ||
4461 | * | ||
4462 | * This is called from DIO, fallocate or whoever calling | ||
4463 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | ||
4464 | * | ||
4465 | * journal buffers for data blocks are not included here, as DIO | ||
4466 | * and fallocate do no need to journal data buffers. | ||
4467 | */ | ||
4468 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | ||
4469 | { | ||
4470 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | ||
4471 | } | ||
4472 | |||
4470 | /* | 4473 | /* |
4471 | * The caller must have previously called ext4_reserve_inode_write(). | 4474 | * The caller must have previously called ext4_reserve_inode_write(). |
4472 | * Give this, we know that the caller already has write access to iloc->bh. | 4475 | * Give this, we know that the caller already has write access to iloc->bh. |