aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMingming Cao <cmm@us.ibm.com>2008-08-19 22:15:58 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-08-19 22:15:58 -0400
commit525f4ed8dcb72c71b306a78ecbf06f41d08fe441 (patch)
treed4d74dc7b109c5d74fa955970a8528ec90e20b06 /fs
parenta1d6cc563bfdf1bf2829d3e6ce4d8b774251796b (diff)
ext4: journal credit fix for the delayed allocation's writepages() function
Previous delalloc writepages implementation started a new transaction outside of a loop which called get_block() to do the block allocation. Since we didn't know exactly how many blocks would need to be allocated, the estimated journal credits required was very conservative and caused many issues. With the reworked delayed allocation, a new transaction is created for each get_block(), thus we don't need to guess how many credits for the multiple chunk of allocation. We start every transaction with enough credits for inserting a single exent. When estimate the credits for indirect blocks to allocate a chunk of blocks, we need to know the number of data blocks to allocate. We use the total number of reserved delalloc datablocks; if that is too big, for non-extent files, we need to limit the number of blocks to EXT4_MAX_TRANS_BLOCKS. Code cleanup from Aneesh. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Reviewed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/extents.c8
-rw-r--r--fs/ext4/inode.c74
2 files changed, 58 insertions, 24 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5596b70efa20..b24d3c53f20c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1753,7 +1753,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1753 * When pass the actual path, the caller should calculate credits 1753 * When pass the actual path, the caller should calculate credits
1754 * under i_data_sem. 1754 * under i_data_sem.
1755 */ 1755 */
1756int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, 1756int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1757 struct ext4_ext_path *path) 1757 struct ext4_ext_path *path)
1758{ 1758{
1759 if (path) { 1759 if (path) {
@@ -1772,12 +1772,12 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
1772 * and other metadat blocks still need to be 1772 * and other metadat blocks still need to be
1773 * accounted. 1773 * accounted.
1774 */ 1774 */
1775 /* 1 one bitmap, 1 block group descriptor */ 1775 /* 1 bitmap, 1 block group descriptor */
1776 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 1776 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1777 } 1777 }
1778 } 1778 }
1779 1779
1780 return ext4_chunk_trans_blocks(inode, num); 1780 return ext4_chunk_trans_blocks(inode, nrblocks);
1781} 1781}
1782 1782
1783/* 1783/*
@@ -1791,7 +1791,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
1791 * If the nrblocks are discontiguous, they could cause 1791 * If the nrblocks are discontiguous, they could cause
1792 * the whole tree split more than once, but this is really rare. 1792 * the whole tree split more than once, but this is really rare.
1793 */ 1793 */
1794int ext4_ext_index_trans_blocks(struct inode *inode, int num, int chunk) 1794int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
1795{ 1795{
1796 int index; 1796 int index;
1797 int depth = ext_depth(inode); 1797 int depth = ext_depth(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dd22eade42c..d1906d9a22de 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1848,29 +1848,53 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd)
1848static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, 1848static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
1849 sector_t logical, struct buffer_head *bh) 1849 sector_t logical, struct buffer_head *bh)
1850{ 1850{
1851 struct buffer_head *lbh = &mpd->lbh;
1852 sector_t next; 1851 sector_t next;
1852 size_t b_size = bh->b_size;
1853 struct buffer_head *lbh = &mpd->lbh;
1854 int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
1853 1855
1854 next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); 1856 /* check if thereserved journal credits might overflow */
1855 1857 if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
1858 if (nrblocks >= EXT4_MAX_TRANS_DATA) {
1859 /*
1860 * With non-extent format we are limited by the journal
1861 * credit available. Total credit needed to insert
1862 * nrblocks contiguous blocks is dependent on the
1863 * nrblocks. So limit nrblocks.
1864 */
1865 goto flush_it;
1866 } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) >
1867 EXT4_MAX_TRANS_DATA) {
1868 /*
1869 * Adding the new buffer_head would make it cross the
1870 * allowed limit for which we have journal credit
1871 * reserved. So limit the new bh->b_size
1872 */
1873 b_size = (EXT4_MAX_TRANS_DATA - nrblocks) <<
1874 mpd->inode->i_blkbits;
1875 /* we will do mpage_da_submit_io in the next loop */
1876 }
1877 }
1856 /* 1878 /*
1857 * First block in the extent 1879 * First block in the extent
1858 */ 1880 */
1859 if (lbh->b_size == 0) { 1881 if (lbh->b_size == 0) {
1860 lbh->b_blocknr = logical; 1882 lbh->b_blocknr = logical;
1861 lbh->b_size = bh->b_size; 1883 lbh->b_size = b_size;
1862 lbh->b_state = bh->b_state & BH_FLAGS; 1884 lbh->b_state = bh->b_state & BH_FLAGS;
1863 return; 1885 return;
1864 } 1886 }
1865 1887
1888 next = lbh->b_blocknr + nrblocks;
1866 /* 1889 /*
1867 * Can we merge the block to our big extent? 1890 * Can we merge the block to our big extent?
1868 */ 1891 */
1869 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { 1892 if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
1870 lbh->b_size += bh->b_size; 1893 lbh->b_size += b_size;
1871 return; 1894 return;
1872 } 1895 }
1873 1896
1897flush_it:
1874 /* 1898 /*
1875 * We couldn't merge the block to our extent, so we 1899 * We couldn't merge the block to our extent, so we
1876 * need to flush current extent and start new one 1900 * need to flush current extent and start new one
@@ -2231,17 +2255,29 @@ static int ext4_da_writepage(struct page *page,
2231} 2255}
2232 2256
2233/* 2257/*
2234 * For now just follow the DIO way to estimate the max credits 2258 * This is called via ext4_da_writepages() to
2235 * needed to write out EXT4_MAX_WRITEBACK_PAGES. 2259 * calulate the total number of credits to reserve to fit
2236 * todo: need to calculate the max credits need for 2260 * a single extent allocation into a single transaction,
2237 * extent based files, currently the DIO credits is based on 2261 * ext4_da_writpeages() will loop calling this before
2238 * indirect-blocks mapping way. 2262 * the block allocation.
2239 *
2240 * Probably should have a generic way to calculate credits
2241 * for DIO, writepages, and truncate
2242 */ 2263 */
2243#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS 2264
2244#define EXT4_MAX_WRITEBACK_CREDITS 25 2265static int ext4_da_writepages_trans_blocks(struct inode *inode)
2266{
2267 int max_blocks = EXT4_I(inode)->i_reserved_data_blocks;
2268
2269 /*
2270 * With non-extent format the journal credit needed to
2271 * insert nrblocks contiguous block is dependent on
2272 * number of contiguous block. So we will limit
2273 * number of contiguous block to a sane value
2274 */
2275 if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
2276 (max_blocks > EXT4_MAX_TRANS_DATA))
2277 max_blocks = EXT4_MAX_TRANS_DATA;
2278
2279 return ext4_chunk_trans_blocks(inode, max_blocks);
2280}
2245 2281
2246static int ext4_da_writepages(struct address_space *mapping, 2282static int ext4_da_writepages(struct address_space *mapping,
2247 struct writeback_control *wbc) 2283 struct writeback_control *wbc)
@@ -2283,7 +2319,7 @@ restart_loop:
2283 * by delalloc 2319 * by delalloc
2284 */ 2320 */
2285 BUG_ON(ext4_should_journal_data(inode)); 2321 BUG_ON(ext4_should_journal_data(inode));
2286 needed_blocks = EXT4_DATA_TRANS_BLOCKS(inode->i_sb); 2322 needed_blocks = ext4_da_writepages_trans_blocks(inode);
2287 2323
2288 /* start a new transaction*/ 2324 /* start a new transaction*/
2289 handle = ext4_journal_start(inode, needed_blocks); 2325 handle = ext4_journal_start(inode, needed_blocks);
@@ -4461,11 +4497,9 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4461 * the modification of a single pages into a single transaction, 4497 * the modification of a single pages into a single transaction,
4462 * which may include multiple chunks of block allocations. 4498 * which may include multiple chunks of block allocations.
4463 * 4499 *
4464 * This could be called via ext4_write_begin() or later 4500 * This could be called via ext4_write_begin()
4465 * ext4_da_writepages() in delalyed allocation case.
4466 * 4501 *
4467 * In both case it's possible that we could allocating multiple 4502 * We need to consider the worse case, when
4468 * chunks of blocks. We need to consider the worse case, when
4469 * one new block per extent. 4503 * one new block per extent.
4470 */ 4504 */
4471int ext4_writepage_trans_blocks(struct inode *inode) 4505int ext4_writepage_trans_blocks(struct inode *inode)