aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2009-05-12 16:30:27 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-05-12 16:30:27 -0400
commit29fa89d088941d79765d60f22d5ccdd6b8696e11 (patch)
tree3fbe031eec140c03dafa11f2416c4f6e3826f21d
parent8fb0e342481c4d80040670fec915f0b9c7c6499a (diff)
ext4: Mark the unwritten buffer_head as mapped during write_begin
Setting BH_Unwritten buffer_heads as BH_Mapped avoids multiple (unnecessary) calls to get_block() during the call to the write(2) system call. Setting BH_Unwritten buffer heads as BH_Mapped requires that the writepages() functions can handle BH_Unwritten buffer_heads. After this commit, things work as follows: ext4_ext_get_block() returns unmapped, unwritten, buffer head when called with create = 0 for prealloc space. This makes sure we handle the read path and non-delayed allocation case correctly. Even though the buffer head is marked unmapped we have valid b_blocknr and b_bdev values in the buffer_head. ext4_da_get_block_prep() called for block resrevation will now return mapped, unwritten, new buffer_head for prealloc space. This avoids multiple calls to get_block() for write to same offset. By making such buffers as BH_New, we also assure that sub-block zeroing of buffered writes happens correctly. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/inode.c82
2 files changed, 54 insertions, 32 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a953214f2829..ea5c47608cea 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2872,6 +2872,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2872 if (create == EXT4_CREATE_UNINITIALIZED_EXT) 2872 if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2873 goto out; 2873 goto out;
2874 if (!create) { 2874 if (!create) {
2875 if (allocated > max_blocks)
2876 allocated = max_blocks;
2875 /* 2877 /*
2876 * We have blocks reserved already. We 2878 * We have blocks reserved already. We
2877 * return allocated blocks so that delalloc 2879 * return allocated blocks so that delalloc
@@ -2879,8 +2881,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2879 * the buffer head will be unmapped so that 2881 * the buffer head will be unmapped so that
2880 * a read from the block returns 0s. 2882 * a read from the block returns 0s.
2881 */ 2883 */
2882 if (allocated > max_blocks)
2883 allocated = max_blocks;
2884 set_buffer_unwritten(bh_result); 2884 set_buffer_unwritten(bh_result);
2885 bh_result->b_bdev = inode->i_sb->s_bdev; 2885 bh_result->b_bdev = inode->i_sb->s_bdev;
2886 bh_result->b_blocknr = newblock; 2886 bh_result->b_blocknr = newblock;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d7ad0bb73cd5..96f3366f59f6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1852,7 +1852,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1852 * @logical - first logical block to start assignment with 1852 * @logical - first logical block to start assignment with
1853 * 1853 *
1854 * the function goes through all passed space and put actual disk 1854 * the function goes through all passed space and put actual disk
1855 * block numbers into buffer heads, dropping BH_Delay 1855 * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
1856 */ 1856 */
1857static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, 1857static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1858 struct buffer_head *exbh) 1858 struct buffer_head *exbh)
@@ -1902,16 +1902,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
1902 do { 1902 do {
1903 if (cur_logical >= logical + blocks) 1903 if (cur_logical >= logical + blocks)
1904 break; 1904 break;
1905 if (buffer_delay(bh)) { 1905
1906 bh->b_blocknr = pblock; 1906 if (buffer_delay(bh) ||
1907 clear_buffer_delay(bh); 1907 buffer_unwritten(bh)) {
1908 bh->b_bdev = inode->i_sb->s_bdev; 1908
1909 } else if (buffer_unwritten(bh)) { 1909 BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
1910 bh->b_blocknr = pblock; 1910
1911 clear_buffer_unwritten(bh); 1911 if (buffer_delay(bh)) {
1912 set_buffer_mapped(bh); 1912 clear_buffer_delay(bh);
1913 set_buffer_new(bh); 1913 bh->b_blocknr = pblock;
1914 bh->b_bdev = inode->i_sb->s_bdev; 1914 } else {
1915 /*
1916 * unwritten already should have
1917 * blocknr assigned. Verify that
1918 */
1919 clear_buffer_unwritten(bh);
1920 BUG_ON(bh->b_blocknr != pblock);
1921 }
1922
1915 } else if (buffer_mapped(bh)) 1923 } else if (buffer_mapped(bh))
1916 BUG_ON(bh->b_blocknr != pblock); 1924 BUG_ON(bh->b_blocknr != pblock);
1917 1925
@@ -2053,7 +2061,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
2053 * We consider only non-mapped and non-allocated blocks 2061 * We consider only non-mapped and non-allocated blocks
2054 */ 2062 */
2055 if ((mpd->b_state & (1 << BH_Mapped)) && 2063 if ((mpd->b_state & (1 << BH_Mapped)) &&
2056 !(mpd->b_state & (1 << BH_Delay))) 2064 !(mpd->b_state & (1 << BH_Delay)) &&
2065 !(mpd->b_state & (1 << BH_Unwritten)))
2057 return 0; 2066 return 0;
2058 /* 2067 /*
2059 * We need to make sure the BH_Delay flag is passed down to 2068 * We need to make sure the BH_Delay flag is passed down to
@@ -2205,6 +2214,17 @@ flush_it:
2205 return; 2214 return;
2206} 2215}
2207 2216
2217static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2218{
2219 /*
2220 * unmapped buffer is possible for holes.
2221 * delay buffer is possible with delayed allocation.
2222 * We also need to consider unwritten buffer as unmapped.
2223 */
2224 return (!buffer_mapped(bh) || buffer_delay(bh) ||
2225 buffer_unwritten(bh)) && buffer_dirty(bh);
2226}
2227
2208/* 2228/*
2209 * __mpage_da_writepage - finds extent of pages and blocks 2229 * __mpage_da_writepage - finds extent of pages and blocks
2210 * 2230 *
@@ -2289,8 +2309,7 @@ static int __mpage_da_writepage(struct page *page,
2289 * Otherwise we won't make progress 2309 * Otherwise we won't make progress
2290 * with the page in ext4_da_writepage 2310 * with the page in ext4_da_writepage
2291 */ 2311 */
2292 if (buffer_dirty(bh) && 2312 if (ext4_bh_unmapped_or_delay(NULL, bh)) {
2293 (!buffer_mapped(bh) || buffer_delay(bh))) {
2294 mpage_add_bh_to_extent(mpd, logical, 2313 mpage_add_bh_to_extent(mpd, logical,
2295 bh->b_size, 2314 bh->b_size,
2296 bh->b_state); 2315 bh->b_state);
@@ -2318,6 +2337,14 @@ static int __mpage_da_writepage(struct page *page,
2318/* 2337/*
2319 * this is a special callback for ->write_begin() only 2338 * this is a special callback for ->write_begin() only
2320 * it's intention is to return mapped block or reserve space 2339 * it's intention is to return mapped block or reserve space
2340 *
2341 * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
2342 * We also have b_blocknr = -1 and b_bdev initialized properly
2343 *
2344 * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
2345 * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
2346 * initialized properly.
2347 *
2321 */ 2348 */
2322static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 2349static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2323 struct buffer_head *bh_result, int create) 2350 struct buffer_head *bh_result, int create)
@@ -2353,28 +2380,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2353 set_buffer_delay(bh_result); 2380 set_buffer_delay(bh_result);
2354 } else if (ret > 0) { 2381 } else if (ret > 0) {
2355 bh_result->b_size = (ret << inode->i_blkbits); 2382 bh_result->b_size = (ret << inode->i_blkbits);
2356 /* 2383 if (buffer_unwritten(bh_result)) {
2357 * With sub-block writes into unwritten extents 2384 /* A delayed write to unwritten bh should
2358 * we also need to mark the buffer as new so that 2385 * be marked new and mapped. Mapped ensures
2359 * the unwritten parts of the buffer gets correctly zeroed. 2386 * that we don't do get_block multiple times
2360 */ 2387 * when we write to the same offset and new
2361 if (buffer_unwritten(bh_result)) 2388 * ensures that we do proper zero out for
2389 * partial write.
2390 */
2362 set_buffer_new(bh_result); 2391 set_buffer_new(bh_result);
2392 set_buffer_mapped(bh_result);
2393 }
2363 ret = 0; 2394 ret = 0;
2364 } 2395 }
2365 2396
2366 return ret; 2397 return ret;
2367} 2398}
2368 2399
2369static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
2370{
2371 /*
2372 * unmapped buffer is possible for holes.
2373 * delay buffer is possible with delayed allocation
2374 */
2375 return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
2376}
2377
2378static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, 2400static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
2379 struct buffer_head *bh_result, int create) 2401 struct buffer_head *bh_result, int create)
2380{ 2402{
@@ -2828,7 +2850,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
2828 for (i = 0; i < idx; i++) 2850 for (i = 0; i < idx; i++)
2829 bh = bh->b_this_page; 2851 bh = bh->b_this_page;
2830 2852
2831 if (!buffer_mapped(bh) || (buffer_delay(bh))) 2853 if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
2832 return 0; 2854 return 0;
2833 return 1; 2855 return 1;
2834} 2856}