aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-01-01 02:41:30 -0500
committerTheodore Ts'o <tytso@mit.edu>2010-01-01 02:41:30 -0500
commit9d0be50230b333005635967f7ecd4897dbfd181b (patch)
tree59aefe29e33284e1d904b23eaf2cc98994431374
parentee5f4d9cdf32fd99172d11665c592a288c2b1ff4 (diff)
ext4: Calculate metadata requirements more accurately
In the past, ext4_calc_metadata_amount(), and its sub-functions ext4_ext_calc_metadata_amount() and ext4_indirect_calc_metadata_amount() badly over-estimated the number of metadata blocks that might be required for delayed allocation blocks. This didn't matter as much when functions which managed the reserved metadata blocks were more aggressive about dropping reserved metadata blocks as delayed allocation blocks were written, but unfortunately they were too aggressive. This was fixed in commit 0637c6f, but as a result the over-estimation by ext4_calc_metadata_amount() would lead to reserving 2-3 times the number of pending delayed allocation blocks as potentially required metadata blocks. So if there are 1 megabytes of blocks which have been not yet been allocation, up to 3 megabytes of space would get reserved out of the user's quota and from the file system free space pool until all of the inode's data blocks have been allocated. This commit addresses this problem by much more accurately estimating the number of metadata blocks that will be required. It will still somewhat over-estimate the number of blocks needed, since it must make a worst case estimate not knowing which physical blocks will be needed, but it is much more accurate than before. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c49
-rw-r--r--fs/ext4/inode.c62
-rw-r--r--fs/ext4/super.c1
5 files changed, 73 insertions, 44 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56f9271ee8cc..af7b62699ea9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -699,6 +699,8 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 699 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 700 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 701 unsigned short i_delalloc_reserved_flag;
702 sector_t i_da_metadata_calc_last_lblock;
703 int i_da_metadata_calc_len;
702 704
703 /* on-disk additional length */ 705 /* on-disk additional length */
704 __u16 i_extra_isize; 706 __u16 i_extra_isize;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 91ae46098ea4..7d7b74e94687 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bdaa92a29e0e..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,38 +1009,44 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1046/* 1052/*
@@ -1078,6 +1084,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1078 */ 1084 */
1079 mdb_free = ei->i_reserved_meta_blocks; 1085 mdb_free = ei->i_reserved_meta_blocks;
1080 ei->i_reserved_meta_blocks = 0; 1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1081 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1082 } 1089 }
1083 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
@@ -1802,12 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1802 return ret ? ret : copied; 1809 return ret ? ret : copied;
1803} 1810}
1804 1811
1805static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1806{ 1816{
1807 int retries = 0; 1817 int retries = 0;
1808 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1809 struct ext4_inode_info *ei = EXT4_I(inode); 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1810 unsigned long md_needed, md_reserved, total = 0; 1820 unsigned long md_needed, md_reserved;
1811 1821
1812 /* 1822 /*
1813 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1817,8 +1827,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1817repeat: 1827repeat:
1818 spin_lock(&ei->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1819 md_reserved = ei->i_reserved_meta_blocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1820 md_needed = ext4_calc_metadata_amount(inode, nrblocks); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1821 total = md_needed + nrblocks;
1822 spin_unlock(&ei->i_block_reservation_lock); 1831 spin_unlock(&ei->i_block_reservation_lock);
1823 1832
1824 /* 1833 /*
@@ -1826,7 +1835,7 @@ repeat:
1826 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1827 * time. 1836 * time.
1828 */ 1837 */
1829 if (vfs_dq_reserve_block(inode, total)) { 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1830 /* 1839 /*
1831 * We tend to badly over-estimate the amount of 1840 * We tend to badly over-estimate the amount of
1832 * metadata blocks which are needed, so if we have 1841 * metadata blocks which are needed, so if we have
@@ -1838,8 +1847,8 @@ repeat:
1838 return -EDQUOT; 1847 return -EDQUOT;
1839 } 1848 }
1840 1849
1841 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1842 vfs_dq_release_reservation_block(inode, total); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1843 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1844 retry: 1853 retry:
1845 if (md_reserved) 1854 if (md_reserved)
@@ -1850,7 +1859,7 @@ repeat:
1850 return -ENOSPC; 1859 return -ENOSPC;
1851 } 1860 }
1852 spin_lock(&ei->i_block_reservation_lock); 1861 spin_lock(&ei->i_block_reservation_lock);
1853 ei->i_reserved_data_blocks += nrblocks; 1862 ei->i_reserved_data_blocks++;
1854 ei->i_reserved_meta_blocks += md_needed; 1863 ei->i_reserved_meta_blocks += md_needed;
1855 spin_unlock(&ei->i_block_reservation_lock); 1864 spin_unlock(&ei->i_block_reservation_lock);
1856 1865
@@ -1891,6 +1900,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
1891 */ 1900 */
1892 to_free += ei->i_reserved_meta_blocks; 1901 to_free += ei->i_reserved_meta_blocks;
1893 ei->i_reserved_meta_blocks = 0; 1902 ei->i_reserved_meta_blocks = 0;
1903 ei->i_da_metadata_calc_len = 0;
1894 } 1904 }
1895 1905
1896 /* update fs dirty blocks counter */ 1906 /* update fs dirty blocks counter */
@@ -2504,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2504 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2505 * is it OK? 2515 * is it OK?
2506 */ 2516 */
2507 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2508 if (ret) 2518 if (ret)
2509 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2510 return ret; 2520 return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7cccb35c0f4d..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
707#ifdef CONFIG_QUOTA 708#ifdef CONFIG_QUOTA