diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2009-05-12 16:30:27 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-05-12 16:30:27 -0400 |
commit | 29fa89d088941d79765d60f22d5ccdd6b8696e11 (patch) | |
tree | 3fbe031eec140c03dafa11f2416c4f6e3826f21d | |
parent | 8fb0e342481c4d80040670fec915f0b9c7c6499a (diff) |
ext4: Mark the unwritten buffer_head as mapped during write_begin
Setting BH_Unwritten buffer_heads as BH_Mapped avoids multiple
(unnecessary) calls to get_block() during the call to the write(2)
system call. Setting BH_Unwritten buffer heads as BH_Mapped requires
that the writepages() functions can handle BH_Unwritten buffer_heads.
After this commit, things work as follows:
ext4_ext_get_block() returns unmapped, unwritten, buffer head when
called with create = 0 for prealloc space. This makes sure we handle
the read path and non-delayed allocation case correctly. Even though
the buffer head is marked unmapped we have valid b_blocknr and b_bdev
values in the buffer_head.
ext4_da_get_block_prep() called for block resrevation will now return
mapped, unwritten, new buffer_head for prealloc space. This avoids
multiple calls to get_block() for write to same offset. By making such
buffers as BH_New, we also assure that sub-block zeroing of buffered
writes happens correctly.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/extents.c | 4 | ||||
-rw-r--r-- | fs/ext4/inode.c | 82 |
2 files changed, 54 insertions, 32 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a953214f2829..ea5c47608cea 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -2872,6 +2872,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2872 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) | 2872 | if (create == EXT4_CREATE_UNINITIALIZED_EXT) |
2873 | goto out; | 2873 | goto out; |
2874 | if (!create) { | 2874 | if (!create) { |
2875 | if (allocated > max_blocks) | ||
2876 | allocated = max_blocks; | ||
2875 | /* | 2877 | /* |
2876 | * We have blocks reserved already. We | 2878 | * We have blocks reserved already. We |
2877 | * return allocated blocks so that delalloc | 2879 | * return allocated blocks so that delalloc |
@@ -2879,8 +2881,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
2879 | * the buffer head will be unmapped so that | 2881 | * the buffer head will be unmapped so that |
2880 | * a read from the block returns 0s. | 2882 | * a read from the block returns 0s. |
2881 | */ | 2883 | */ |
2882 | if (allocated > max_blocks) | ||
2883 | allocated = max_blocks; | ||
2884 | set_buffer_unwritten(bh_result); | 2884 | set_buffer_unwritten(bh_result); |
2885 | bh_result->b_bdev = inode->i_sb->s_bdev; | 2885 | bh_result->b_bdev = inode->i_sb->s_bdev; |
2886 | bh_result->b_blocknr = newblock; | 2886 | bh_result->b_blocknr = newblock; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d7ad0bb73cd5..96f3366f59f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1852,7 +1852,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1852 | * @logical - first logical block to start assignment with | 1852 | * @logical - first logical block to start assignment with |
1853 | * | 1853 | * |
1854 | * the function goes through all passed space and put actual disk | 1854 | * the function goes through all passed space and put actual disk |
1855 | * block numbers into buffer heads, dropping BH_Delay | 1855 | * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten |
1856 | */ | 1856 | */ |
1857 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | 1857 | static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, |
1858 | struct buffer_head *exbh) | 1858 | struct buffer_head *exbh) |
@@ -1902,16 +1902,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1902 | do { | 1902 | do { |
1903 | if (cur_logical >= logical + blocks) | 1903 | if (cur_logical >= logical + blocks) |
1904 | break; | 1904 | break; |
1905 | if (buffer_delay(bh)) { | 1905 | |
1906 | bh->b_blocknr = pblock; | 1906 | if (buffer_delay(bh) || |
1907 | clear_buffer_delay(bh); | 1907 | buffer_unwritten(bh)) { |
1908 | bh->b_bdev = inode->i_sb->s_bdev; | 1908 | |
1909 | } else if (buffer_unwritten(bh)) { | 1909 | BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); |
1910 | bh->b_blocknr = pblock; | 1910 | |
1911 | clear_buffer_unwritten(bh); | 1911 | if (buffer_delay(bh)) { |
1912 | set_buffer_mapped(bh); | 1912 | clear_buffer_delay(bh); |
1913 | set_buffer_new(bh); | 1913 | bh->b_blocknr = pblock; |
1914 | bh->b_bdev = inode->i_sb->s_bdev; | 1914 | } else { |
1915 | /* | ||
1916 | * unwritten already should have | ||
1917 | * blocknr assigned. Verify that | ||
1918 | */ | ||
1919 | clear_buffer_unwritten(bh); | ||
1920 | BUG_ON(bh->b_blocknr != pblock); | ||
1921 | } | ||
1922 | |||
1915 | } else if (buffer_mapped(bh)) | 1923 | } else if (buffer_mapped(bh)) |
1916 | BUG_ON(bh->b_blocknr != pblock); | 1924 | BUG_ON(bh->b_blocknr != pblock); |
1917 | 1925 | ||
@@ -2053,7 +2061,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
2053 | * We consider only non-mapped and non-allocated blocks | 2061 | * We consider only non-mapped and non-allocated blocks |
2054 | */ | 2062 | */ |
2055 | if ((mpd->b_state & (1 << BH_Mapped)) && | 2063 | if ((mpd->b_state & (1 << BH_Mapped)) && |
2056 | !(mpd->b_state & (1 << BH_Delay))) | 2064 | !(mpd->b_state & (1 << BH_Delay)) && |
2065 | !(mpd->b_state & (1 << BH_Unwritten))) | ||
2057 | return 0; | 2066 | return 0; |
2058 | /* | 2067 | /* |
2059 | * We need to make sure the BH_Delay flag is passed down to | 2068 | * We need to make sure the BH_Delay flag is passed down to |
@@ -2205,6 +2214,17 @@ flush_it: | |||
2205 | return; | 2214 | return; |
2206 | } | 2215 | } |
2207 | 2216 | ||
2217 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
2218 | { | ||
2219 | /* | ||
2220 | * unmapped buffer is possible for holes. | ||
2221 | * delay buffer is possible with delayed allocation. | ||
2222 | * We also need to consider unwritten buffer as unmapped. | ||
2223 | */ | ||
2224 | return (!buffer_mapped(bh) || buffer_delay(bh) || | ||
2225 | buffer_unwritten(bh)) && buffer_dirty(bh); | ||
2226 | } | ||
2227 | |||
2208 | /* | 2228 | /* |
2209 | * __mpage_da_writepage - finds extent of pages and blocks | 2229 | * __mpage_da_writepage - finds extent of pages and blocks |
2210 | * | 2230 | * |
@@ -2289,8 +2309,7 @@ static int __mpage_da_writepage(struct page *page, | |||
2289 | * Otherwise we won't make progress | 2309 | * Otherwise we won't make progress |
2290 | * with the page in ext4_da_writepage | 2310 | * with the page in ext4_da_writepage |
2291 | */ | 2311 | */ |
2292 | if (buffer_dirty(bh) && | 2312 | if (ext4_bh_unmapped_or_delay(NULL, bh)) { |
2293 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
2294 | mpage_add_bh_to_extent(mpd, logical, | 2313 | mpage_add_bh_to_extent(mpd, logical, |
2295 | bh->b_size, | 2314 | bh->b_size, |
2296 | bh->b_state); | 2315 | bh->b_state); |
@@ -2318,6 +2337,14 @@ static int __mpage_da_writepage(struct page *page, | |||
2318 | /* | 2337 | /* |
2319 | * this is a special callback for ->write_begin() only | 2338 | * this is a special callback for ->write_begin() only |
2320 | * it's intention is to return mapped block or reserve space | 2339 | * it's intention is to return mapped block or reserve space |
2340 | * | ||
2341 | * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. | ||
2342 | * We also have b_blocknr = -1 and b_bdev initialized properly | ||
2343 | * | ||
2344 | * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. | ||
2345 | * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev | ||
2346 | * initialized properly. | ||
2347 | * | ||
2321 | */ | 2348 | */ |
2322 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | 2349 | static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, |
2323 | struct buffer_head *bh_result, int create) | 2350 | struct buffer_head *bh_result, int create) |
@@ -2353,28 +2380,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2353 | set_buffer_delay(bh_result); | 2380 | set_buffer_delay(bh_result); |
2354 | } else if (ret > 0) { | 2381 | } else if (ret > 0) { |
2355 | bh_result->b_size = (ret << inode->i_blkbits); | 2382 | bh_result->b_size = (ret << inode->i_blkbits); |
2356 | /* | 2383 | if (buffer_unwritten(bh_result)) { |
2357 | * With sub-block writes into unwritten extents | 2384 | /* A delayed write to unwritten bh should |
2358 | * we also need to mark the buffer as new so that | 2385 | * be marked new and mapped. Mapped ensures |
2359 | * the unwritten parts of the buffer gets correctly zeroed. | 2386 | * that we don't do get_block multiple times |
2360 | */ | 2387 | * when we write to the same offset and new |
2361 | if (buffer_unwritten(bh_result)) | 2388 | * ensures that we do proper zero out for |
2389 | * partial write. | ||
2390 | */ | ||
2362 | set_buffer_new(bh_result); | 2391 | set_buffer_new(bh_result); |
2392 | set_buffer_mapped(bh_result); | ||
2393 | } | ||
2363 | ret = 0; | 2394 | ret = 0; |
2364 | } | 2395 | } |
2365 | 2396 | ||
2366 | return ret; | 2397 | return ret; |
2367 | } | 2398 | } |
2368 | 2399 | ||
2369 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | ||
2370 | { | ||
2371 | /* | ||
2372 | * unmapped buffer is possible for holes. | ||
2373 | * delay buffer is possible with delayed allocation | ||
2374 | */ | ||
2375 | return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); | ||
2376 | } | ||
2377 | |||
2378 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, | 2400 | static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, |
2379 | struct buffer_head *bh_result, int create) | 2401 | struct buffer_head *bh_result, int create) |
2380 | { | 2402 | { |
@@ -2828,7 +2850,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
2828 | for (i = 0; i < idx; i++) | 2850 | for (i = 0; i < idx; i++) |
2829 | bh = bh->b_this_page; | 2851 | bh = bh->b_this_page; |
2830 | 2852 | ||
2831 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2853 | if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) |
2832 | return 0; | 2854 | return 0; |
2833 | return 1; | 2855 | return 1; |
2834 | } | 2856 | } |