diff options
author | Mingming Cao <cmm@us.ibm.com> | 2008-08-19 22:15:58 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2008-08-19 22:15:58 -0400 |
commit | 525f4ed8dcb72c71b306a78ecbf06f41d08fe441 (patch) | |
tree | d4d74dc7b109c5d74fa955970a8528ec90e20b06 | |
parent | a1d6cc563bfdf1bf2829d3e6ce4d8b774251796b (diff) |
ext4: journal credit fix for the delayed allocation's writepages() function
Previous delalloc writepages implementation started a new transaction
outside of a loop which called get_block() to do the block allocation.
Since we didn't know exactly how many blocks would need to be allocated,
the estimated journal credits required was very conservative and caused
many issues.
With the reworked delayed allocation, a new transaction is created for
each get_block(), thus we don't need to guess how many credits for the
multiple chunk of allocation. We start every transaction with enough
credits for inserting a single exent. When estimate the credits for
indirect blocks to allocate a chunk of blocks, we need to know the
number of data blocks to allocate. We use the total number of reserved
delalloc datablocks; if that is too big, for non-extent files, we need
to limit the number of blocks to EXT4_MAX_TRANS_BLOCKS.
Code cleanup from Aneesh.
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Reviewed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/extents.c | 8 | ||||
-rw-r--r-- | fs/ext4/inode.c | 74 |
2 files changed, 58 insertions, 24 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5596b70efa20..b24d3c53f20c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1753,7 +1753,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1753 | * When pass the actual path, the caller should calculate credits | 1753 | * When pass the actual path, the caller should calculate credits |
1754 | * under i_data_sem. | 1754 | * under i_data_sem. |
1755 | */ | 1755 | */ |
1756 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, | 1756 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, |
1757 | struct ext4_ext_path *path) | 1757 | struct ext4_ext_path *path) |
1758 | { | 1758 | { |
1759 | if (path) { | 1759 | if (path) { |
@@ -1772,12 +1772,12 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, | |||
1772 | * and other metadat blocks still need to be | 1772 | * and other metadat blocks still need to be |
1773 | * accounted. | 1773 | * accounted. |
1774 | */ | 1774 | */ |
1775 | /* 1 one bitmap, 1 block group descriptor */ | 1775 | /* 1 bitmap, 1 block group descriptor */ |
1776 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | 1776 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); |
1777 | } | 1777 | } |
1778 | } | 1778 | } |
1779 | 1779 | ||
1780 | return ext4_chunk_trans_blocks(inode, num); | 1780 | return ext4_chunk_trans_blocks(inode, nrblocks); |
1781 | } | 1781 | } |
1782 | 1782 | ||
1783 | /* | 1783 | /* |
@@ -1791,7 +1791,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, | |||
1791 | * If the nrblocks are discontiguous, they could cause | 1791 | * If the nrblocks are discontiguous, they could cause |
1792 | * the whole tree split more than once, but this is really rare. | 1792 | * the whole tree split more than once, but this is really rare. |
1793 | */ | 1793 | */ |
1794 | int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk) | 1794 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) |
1795 | { | 1795 | { |
1796 | int index; | 1796 | int index; |
1797 | int depth = ext_depth(inode); | 1797 | int depth = ext_depth(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8dd22eade42c..d1906d9a22de 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1848,29 +1848,53 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1848 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1848 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
1849 | sector_t logical, struct buffer_head *bh) | 1849 | sector_t logical, struct buffer_head *bh) |
1850 | { | 1850 | { |
1851 | struct buffer_head *lbh = &mpd->lbh; | ||
1852 | sector_t next; | 1851 | sector_t next; |
1852 | size_t b_size = bh->b_size; | ||
1853 | struct buffer_head *lbh = &mpd->lbh; | ||
1854 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
1853 | 1855 | ||
1854 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | 1856 | /* check if thereserved journal credits might overflow */ |
1855 | 1857 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | |
1858 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1859 | /* | ||
1860 | * With non-extent format we are limited by the journal | ||
1861 | * credit available. Total credit needed to insert | ||
1862 | * nrblocks contiguous blocks is dependent on the | ||
1863 | * nrblocks. So limit nrblocks. | ||
1864 | */ | ||
1865 | goto flush_it; | ||
1866 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
1867 | EXT4_MAX_TRANS_DATA) { | ||
1868 | /* | ||
1869 | * Adding the new buffer_head would make it cross the | ||
1870 | * allowed limit for which we have journal credit | ||
1871 | * reserved. So limit the new bh->b_size | ||
1872 | */ | ||
1873 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
1874 | mpd->inode->i_blkbits; | ||
1875 | /* we will do mpage_da_submit_io in the next loop */ | ||
1876 | } | ||
1877 | } | ||
1856 | /* | 1878 | /* |
1857 | * First block in the extent | 1879 | * First block in the extent |
1858 | */ | 1880 | */ |
1859 | if (lbh->b_size == 0) { | 1881 | if (lbh->b_size == 0) { |
1860 | lbh->b_blocknr = logical; | 1882 | lbh->b_blocknr = logical; |
1861 | lbh->b_size = bh->b_size; | 1883 | lbh->b_size = b_size; |
1862 | lbh->b_state = bh->b_state & BH_FLAGS; | 1884 | lbh->b_state = bh->b_state & BH_FLAGS; |
1863 | return; | 1885 | return; |
1864 | } | 1886 | } |
1865 | 1887 | ||
1888 | next = lbh->b_blocknr + nrblocks; | ||
1866 | /* | 1889 | /* |
1867 | * Can we merge the block to our big extent? | 1890 | * Can we merge the block to our big extent? |
1868 | */ | 1891 | */ |
1869 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 1892 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { |
1870 | lbh->b_size += bh->b_size; | 1893 | lbh->b_size += b_size; |
1871 | return; | 1894 | return; |
1872 | } | 1895 | } |
1873 | 1896 | ||
1897 | flush_it: | ||
1874 | /* | 1898 | /* |
1875 | * We couldn't merge the block to our extent, so we | 1899 | * We couldn't merge the block to our extent, so we |
1876 | * need to flush current extent and start new one | 1900 | * need to flush current extent and start new one |
@@ -2231,17 +2255,29 @@ static int ext4_da_writepage(struct page *page, | |||
2231 | } | 2255 | } |
2232 | 2256 | ||
2233 | /* | 2257 | /* |
2234 | * For now just follow the DIO way to estimate the max credits | 2258 | * This is called via ext4_da_writepages() to |
2235 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2259 | * calulate the total number of credits to reserve to fit |
2236 | * todo: need to calculate the max credits need for | 2260 | * a single extent allocation into a single transaction, |
2237 | * extent based files, currently the DIO credits is based on | 2261 | * ext4_da_writpeages() will loop calling this before |
2238 | * indirect-blocks mapping way. | 2262 | * the block allocation. |
2239 | * | ||
2240 | * Probably should have a generic way to calculate credits | ||
2241 | * for DIO, writepages, and truncate | ||
2242 | */ | 2263 | */ |
2243 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2264 | |
2244 | #define EXT4_MAX_WRITEBACK_CREDITS 25 | 2265 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2266 | { | ||
2267 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
2268 | |||
2269 | /* | ||
2270 | * With non-extent format the journal credit needed to | ||
2271 | * insert nrblocks contiguous block is dependent on | ||
2272 | * number of contiguous block. So we will limit | ||
2273 | * number of contiguous block to a sane value | ||
2274 | */ | ||
2275 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | ||
2276 | (max_blocks > EXT4_MAX_TRANS_DATA)) | ||
2277 | max_blocks = EXT4_MAX_TRANS_DATA; | ||
2278 | |||
2279 | return ext4_chunk_trans_blocks(inode, max_blocks); | ||
2280 | } | ||
2245 | 2281 | ||
2246 | static int ext4_da_writepages(struct address_space *mapping, | 2282 | static int ext4_da_writepages(struct address_space *mapping, |
2247 | struct writeback_control *wbc) | 2283 | struct writeback_control *wbc) |
@@ -2283,7 +2319,7 @@ restart_loop: | |||
2283 | * by delalloc | 2319 | * by delalloc |
2284 | */ | 2320 | */ |
2285 | BUG_ON(ext4_should_journal_data(inode)); | 2321 | BUG_ON(ext4_should_journal_data(inode)); |
2286 | needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); | 2322 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
2287 | 2323 | ||
2288 | /* start a new transaction*/ | 2324 | /* start a new transaction*/ |
2289 | handle = ext4_journal_start(inode, needed_blocks); | 2325 | handle = ext4_journal_start(inode, needed_blocks); |
@@ -4461,11 +4497,9 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
4461 | * the modification of a single pages into a single transaction, | 4497 | * the modification of a single pages into a single transaction, |
4462 | * which may include multiple chunks of block allocations. | 4498 | * which may include multiple chunks of block allocations. |
4463 | * | 4499 | * |
4464 | * This could be called via ext4_write_begin() or later | 4500 | * This could be called via ext4_write_begin() |
4465 | * ext4_da_writepages() in delalyed allocation case. | ||
4466 | * | 4501 | * |
4467 | * In both case it's possible that we could allocating multiple | 4502 | * We need to consider the worse case, when |
4468 | * chunks of blocks. We need to consider the worse case, when | ||
4469 | * one new block per extent. | 4503 | * one new block per extent. |
4470 | */ | 4504 | */ |
4471 | int ext4_writepage_trans_blocks(struct inode *inode) | 4505 | int ext4_writepage_trans_blocks(struct inode *inode) |