diff options
author | Theodore Ts'o <tytso@mit.edu> | 2010-01-01 02:41:30 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2010-01-01 02:41:30 -0500 |
commit | 9d0be50230b333005635967f7ecd4897dbfd181b (patch) | |
tree | 59aefe29e33284e1d904b23eaf2cc98994431374 /fs/ext4/inode.c | |
parent | ee5f4d9cdf32fd99172d11665c592a288c2b1ff4 (diff) |
ext4: Calculate metadata requirements more accurately
In the past, ext4_calc_metadata_amount(), and its sub-functions
ext4_ext_calc_metadata_amount() and ext4_indirect_calc_metadata_amount()
badly over-estimated the number of metadata blocks that might be
required for delayed allocation blocks. This didn't matter as much
when functions which managed the reserved metadata blocks were more
aggressive about dropping reserved metadata blocks as delayed
allocation blocks were written, but unfortunately they were too
aggressive. This was fixed in commit 0637c6f, but as a result the
over-estimation by ext4_calc_metadata_amount() would lead to reserving
2-3 times the number of pending delayed allocation blocks as
potentially required metadata blocks. So if there are 1 megabytes of
blocks which have been not yet been allocation, up to 3 megabytes of
space would get reserved out of the user's quota and from the file
system free space pool until all of the inode's data blocks have been
allocated.
This commit addresses this problem by much more accurately estimating
the number of metadata blocks that will be required. It will still
somewhat over-estimate the number of blocks needed, since it must make
a worst case estimate not knowing which physical blocks will be
needed, but it is much more accurate than before.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 62 |
1 files changed, 36 insertions, 26 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bdaa92a29e0e..c818972c8302 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1009,38 +1009,44 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) | |||
1009 | return &EXT4_I(inode)->i_reserved_quota; | 1009 | return &EXT4_I(inode)->i_reserved_quota; |
1010 | } | 1010 | } |
1011 | #endif | 1011 | #endif |
1012 | |||
1012 | /* | 1013 | /* |
1013 | * Calculate the number of metadata blocks need to reserve | 1014 | * Calculate the number of metadata blocks need to reserve |
1014 | * to allocate @blocks for non extent file based file | 1015 | * to allocate a new block at @lblocks for non extent file based file |
1015 | */ | 1016 | */ |
1016 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | 1017 | static int ext4_indirect_calc_metadata_amount(struct inode *inode, |
1018 | sector_t lblock) | ||
1017 | { | 1019 | { |
1018 | int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); | 1020 | struct ext4_inode_info *ei = EXT4_I(inode); |
1019 | int ind_blks, dind_blks, tind_blks; | 1021 | int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; |
1020 | 1022 | int blk_bits; | |
1021 | /* number of new indirect blocks needed */ | ||
1022 | ind_blks = (blocks + icap - 1) / icap; | ||
1023 | 1023 | ||
1024 | dind_blks = (ind_blks + icap - 1) / icap; | 1024 | if (lblock < EXT4_NDIR_BLOCKS) |
1025 | return 0; | ||
1025 | 1026 | ||
1026 | tind_blks = 1; | 1027 | lblock -= EXT4_NDIR_BLOCKS; |
1027 | 1028 | ||
1028 | return ind_blks + dind_blks + tind_blks; | 1029 | if (ei->i_da_metadata_calc_len && |
1030 | (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { | ||
1031 | ei->i_da_metadata_calc_len++; | ||
1032 | return 0; | ||
1033 | } | ||
1034 | ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; | ||
1035 | ei->i_da_metadata_calc_len = 1; | ||
1036 | blk_bits = roundup_pow_of_two(lblock + 1); | ||
1037 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | ||
1029 | } | 1038 | } |
1030 | 1039 | ||
1031 | /* | 1040 | /* |
1032 | * Calculate the number of metadata blocks need to reserve | 1041 | * Calculate the number of metadata blocks need to reserve |
1033 | * to allocate given number of blocks | 1042 | * to allocate a block located at @lblock |
1034 | */ | 1043 | */ |
1035 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1044 | static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) |
1036 | { | 1045 | { |
1037 | if (!blocks) | ||
1038 | return 0; | ||
1039 | |||
1040 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1046 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1041 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1047 | return ext4_ext_calc_metadata_amount(inode, lblock); |
1042 | 1048 | ||
1043 | return ext4_indirect_calc_metadata_amount(inode, blocks); | 1049 | return ext4_indirect_calc_metadata_amount(inode, lblock); |
1044 | } | 1050 | } |
1045 | 1051 | ||
1046 | /* | 1052 | /* |
@@ -1078,6 +1084,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1078 | */ | 1084 | */ |
1079 | mdb_free = ei->i_reserved_meta_blocks; | 1085 | mdb_free = ei->i_reserved_meta_blocks; |
1080 | ei->i_reserved_meta_blocks = 0; | 1086 | ei->i_reserved_meta_blocks = 0; |
1087 | ei->i_da_metadata_calc_len = 0; | ||
1081 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); | 1088 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
1082 | } | 1089 | } |
1083 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1090 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
@@ -1802,12 +1809,15 @@ static int ext4_journalled_write_end(struct file *file, | |||
1802 | return ret ? ret : copied; | 1809 | return ret ? ret : copied; |
1803 | } | 1810 | } |
1804 | 1811 | ||
1805 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1812 | /* |
1813 | * Reserve a single block located at lblock | ||
1814 | */ | ||
1815 | static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) | ||
1806 | { | 1816 | { |
1807 | int retries = 0; | 1817 | int retries = 0; |
1808 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1818 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1809 | struct ext4_inode_info *ei = EXT4_I(inode); | 1819 | struct ext4_inode_info *ei = EXT4_I(inode); |
1810 | unsigned long md_needed, md_reserved, total = 0; | 1820 | unsigned long md_needed, md_reserved; |
1811 | 1821 | ||
1812 | /* | 1822 | /* |
1813 | * recalculate the amount of metadata blocks to reserve | 1823 | * recalculate the amount of metadata blocks to reserve |
@@ -1817,8 +1827,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
1817 | repeat: | 1827 | repeat: |
1818 | spin_lock(&ei->i_block_reservation_lock); | 1828 | spin_lock(&ei->i_block_reservation_lock); |
1819 | md_reserved = ei->i_reserved_meta_blocks; | 1829 | md_reserved = ei->i_reserved_meta_blocks; |
1820 | md_needed = ext4_calc_metadata_amount(inode, nrblocks); | 1830 | md_needed = ext4_calc_metadata_amount(inode, lblock); |
1821 | total = md_needed + nrblocks; | ||
1822 | spin_unlock(&ei->i_block_reservation_lock); | 1831 | spin_unlock(&ei->i_block_reservation_lock); |
1823 | 1832 | ||
1824 | /* | 1833 | /* |
@@ -1826,7 +1835,7 @@ repeat: | |||
1826 | * later. Real quota accounting is done at pages writeout | 1835 | * later. Real quota accounting is done at pages writeout |
1827 | * time. | 1836 | * time. |
1828 | */ | 1837 | */ |
1829 | if (vfs_dq_reserve_block(inode, total)) { | 1838 | if (vfs_dq_reserve_block(inode, md_needed + 1)) { |
1830 | /* | 1839 | /* |
1831 | * We tend to badly over-estimate the amount of | 1840 | * We tend to badly over-estimate the amount of |
1832 | * metadata blocks which are needed, so if we have | 1841 | * metadata blocks which are needed, so if we have |
@@ -1838,8 +1847,8 @@ repeat: | |||
1838 | return -EDQUOT; | 1847 | return -EDQUOT; |
1839 | } | 1848 | } |
1840 | 1849 | ||
1841 | if (ext4_claim_free_blocks(sbi, total)) { | 1850 | if (ext4_claim_free_blocks(sbi, md_needed + 1)) { |
1842 | vfs_dq_release_reservation_block(inode, total); | 1851 | vfs_dq_release_reservation_block(inode, md_needed + 1); |
1843 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1852 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1844 | retry: | 1853 | retry: |
1845 | if (md_reserved) | 1854 | if (md_reserved) |
@@ -1850,7 +1859,7 @@ repeat: | |||
1850 | return -ENOSPC; | 1859 | return -ENOSPC; |
1851 | } | 1860 | } |
1852 | spin_lock(&ei->i_block_reservation_lock); | 1861 | spin_lock(&ei->i_block_reservation_lock); |
1853 | ei->i_reserved_data_blocks += nrblocks; | 1862 | ei->i_reserved_data_blocks++; |
1854 | ei->i_reserved_meta_blocks += md_needed; | 1863 | ei->i_reserved_meta_blocks += md_needed; |
1855 | spin_unlock(&ei->i_block_reservation_lock); | 1864 | spin_unlock(&ei->i_block_reservation_lock); |
1856 | 1865 | ||
@@ -1891,6 +1900,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1891 | */ | 1900 | */ |
1892 | to_free += ei->i_reserved_meta_blocks; | 1901 | to_free += ei->i_reserved_meta_blocks; |
1893 | ei->i_reserved_meta_blocks = 0; | 1902 | ei->i_reserved_meta_blocks = 0; |
1903 | ei->i_da_metadata_calc_len = 0; | ||
1894 | } | 1904 | } |
1895 | 1905 | ||
1896 | /* update fs dirty blocks counter */ | 1906 | /* update fs dirty blocks counter */ |
@@ -2504,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2504 | * XXX: __block_prepare_write() unmaps passed block, | 2514 | * XXX: __block_prepare_write() unmaps passed block, |
2505 | * is it OK? | 2515 | * is it OK? |
2506 | */ | 2516 | */ |
2507 | ret = ext4_da_reserve_space(inode, 1); | 2517 | ret = ext4_da_reserve_space(inode, iblock); |
2508 | if (ret) | 2518 | if (ret) |
2509 | /* not enough space to reserve */ | 2519 | /* not enough space to reserve */ |
2510 | return ret; | 2520 | return ret; |