aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-08-19 22:16:07 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-08-19 22:16:07 -0400
commita02908f19c819aeec5e3dcf238adaa6deddd70b0 (patch)
tree78764672019b22a5a8925abc702d7c54244138e8 /fs
parentc001077f4003fa75793bb62979baa6241dd8eb19 (diff)
ext4: journal credits calulation cleanup and fix for non-extent writepage
When considering how many journal credits are needed for modifying a chunk of data, we need to account for the super block, inode block, quota blocks and xattr block, indirect/index blocks, also, group bitmap and group descriptor blocks for new allocation (including data and indirect/index blocks). There are many places in ext4 do the calculation on their own and often missed one or two meta blocks, and often they assume single block allocation, and did not considering the multile chunk of allocation case. This patch is trying to cleanup current journal credit code, provides some common helper funtion to calculate the journal credits, to be used for writepage, writepages, DIO, fallocate, migration, defrag, and for both nonextent and extent files. This patch modified the writepage/write_begin credit caculation for nonextent files, to use the new helper function. It also fixed the problem that writepage on nonextent files did not consider the case blocksize <pagesize, thus could possibelly need multiple block allocation in a single transaction. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ext4_jbd2.h8
-rw-r--r--fs/ext4/inode.c131
3 files changed, 108 insertions, 34 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c7924d9e358..38e661b0ea88 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1072,6 +1072,7 @@ extern void ext4_set_inode_flags(struct inode *);
1072extern void ext4_get_inode_flags(struct ext4_inode_info *); 1072extern void ext4_get_inode_flags(struct ext4_inode_info *);
1073extern void ext4_set_aops(struct inode *inode); 1073extern void ext4_set_aops(struct inode *inode);
1074extern int ext4_writepage_trans_blocks(struct inode *); 1074extern int ext4_writepage_trans_blocks(struct inode *);
1075extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
1075extern int ext4_block_truncate_page(handle_t *handle, 1076extern int ext4_block_truncate_page(handle_t *handle,
1076 struct address_space *mapping, loff_t from); 1077 struct address_space *mapping, loff_t from);
1077extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); 1078extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
@@ -1227,6 +1228,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
1227/* extents.c */ 1228/* extents.c */
1228extern int ext4_ext_tree_init(handle_t *handle, struct inode *); 1229extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
1229extern int ext4_ext_writepage_trans_blocks(struct inode *, int); 1230extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
1231extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
1232 int chunk);
1230extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, 1233extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
1231 ext4_lblk_t iblock, 1234 ext4_lblk_t iblock,
1232 unsigned long max_blocks, struct buffer_head *bh_result, 1235 unsigned long max_blocks, struct buffer_head *bh_result,
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index eb8bc3afe6e9..b455c685a98b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -51,6 +51,14 @@
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
53 53
54/*
55 * Define the number of metadata blocks we need to account to modify data.
56 *
57 * This include super block, inode block, quota blocks and xattr blocks
58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
61
54/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
55 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
56 * generous. We can grow the delete transaction later if necessary. */ 64 * generous. We can grow the delete transaction later if necessary. */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5e17d5f22a7e..a27129065144 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4354,56 +4354,119 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
4354 return 0; 4354 return 0;
4355} 4355}
4356 4356
4357static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
4358 int chunk)
4359{
4360 int indirects;
4361
4362 /* if nrblocks are contiguous */
4363 if (chunk) {
4364 /*
4365 * With N contiguous data blocks, it need at most
4366 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
4367 * 2 dindirect blocks
4368 * 1 tindirect block
4369 */
4370 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
4371 return indirects + 3;
4372 }
4373 /*
4374 * if nrblocks are not contiguous, worse case, each block touch
4375 * a indirect block, and each indirect block touch a double indirect
4376 * block, plus a triple indirect block
4377 */
4378 indirects = nrblocks * 2 + 1;
4379 return indirects;
4380}
4381
4382static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4383{
4384 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
4385 return ext4_indirect_trans_blocks(inode, nrblocks, 0);
4386 return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
4387}
4357/* 4388/*
4358 * How many blocks doth make a writepage()? 4389 * Account for index blocks, block groups bitmaps and block group
4390 * descriptor blocks if modify datablocks and index blocks
4391 * worse case, the indexs blocks spread over different block groups
4359 * 4392 *
4360 * With N blocks per page, it may be: 4393 * If datablocks are discontiguous, they are possible to spread over
4361 * N data blocks 4394 * different block groups too. If they are contiugous, with flexbg,
4362 * 2 indirect block 4395 * they could still across block group boundary.
4363 * 2 dindirect
4364 * 1 tindirect
4365 * N+5 bitmap blocks (from the above)
4366 * N+5 group descriptor summary blocks
4367 * 1 inode block
4368 * 1 superblock.
4369 * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
4370 * 4396 *
4371 * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS 4397 * Also account for superblock, inode, quota and xattr blocks
4398 */
4399int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4400{
4401 int groups, gdpblocks;
4402 int idxblocks;
4403 int ret = 0;
4404
4405 /*
4406 * How many index blocks need to touch to modify nrblocks?
4407 * The "Chunk" flag indicating whether the nrblocks is
4408 * physically contiguous on disk
4409 *
4410 * For Direct IO and fallocate, they calls get_block to allocate
4411 * one single extent at a time, so they could set the "Chunk" flag
4412 */
4413 idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk);
4414
4415 ret = idxblocks;
4416
4417 /*
4418 * Now let's see how many group bitmaps and group descriptors need
4419 * to account
4420 */
4421 groups = idxblocks;
4422 if (chunk)
4423 groups += 1;
4424 else
4425 groups += nrblocks;
4426
4427 gdpblocks = groups;
4428 if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
4429 groups = EXT4_SB(inode->i_sb)->s_groups_count;
4430 if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
4431 gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
4432
4433 /* bitmaps and block group descriptor blocks */
4434 ret += groups + gdpblocks;
4435
4436 /* Blocks for super block, inode, quota and xattr blocks */
4437 ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
4438
4439 return ret;
4440}
4441
4442/*
4443 * Calulate the total number of credits to reserve to fit
4444 * the modification of a single pages into a single transaction
4372 * 4445 *
4373 * With ordered or writeback data it's the same, less the N data blocks. 4446 * This could be called via ext4_write_begin() or later
4447 * ext4_da_writepages() in delalyed allocation case.
4374 * 4448 *
4375 * If the inode's direct blocks can hold an integral number of pages then a 4449 * In both case it's possible that we could allocating multiple
4376 * page cannot straddle two indirect blocks, and we can only touch one indirect 4450 * chunks of blocks. We need to consider the worse case, when
4377 * and dindirect block, and the "5" above becomes "3". 4451 * one new block per extent.
4378 * 4452 *
4379 * This still overestimates under most circumstances. If we were to pass the 4453 * For Direct IO and fallocate, the journal credits reservation
4380 * start and end offsets in here as well we could do block_to_path() on each 4454 * is based on one single extent allocation, so they could use
4381 * block and work out the exact number of indirects which are touched. Pah. 4455 * EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
4456 * chunk of allocation needs.
4382 */ 4457 */
4383
4384int ext4_writepage_trans_blocks(struct inode *inode) 4458int ext4_writepage_trans_blocks(struct inode *inode)
4385{ 4459{
4386 int bpp = ext4_journal_blocks_per_page(inode); 4460 int bpp = ext4_journal_blocks_per_page(inode);
4387 int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
4388 int ret; 4461 int ret;
4389 4462
4390 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 4463 ret = ext4_meta_trans_blocks(inode, bpp, 0);
4391 return ext4_ext_writepage_trans_blocks(inode, bpp);
4392 4464
4465 /* Account for data blocks for journalled mode */
4393 if (ext4_should_journal_data(inode)) 4466 if (ext4_should_journal_data(inode))
4394 ret = 3 * (bpp + indirects) + 2; 4467 ret += bpp;
4395 else
4396 ret = 2 * (bpp + indirects) + 2;
4397
4398#ifdef CONFIG_QUOTA
4399 /* We know that structure was already allocated during DQUOT_INIT so
4400 * we will be updating only the data blocks + inodes */
4401 ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
4402#endif
4403
4404 return ret; 4468 return ret;
4405} 4469}
4406
4407/* 4470/*
4408 * The caller must have previously called ext4_reserve_inode_write(). 4471 * The caller must have previously called ext4_reserve_inode_write().
4409 * Give this, we know that the caller already has write access to iloc->bh. 4472 * Give this, we know that the caller already has write access to iloc->bh.