aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2012-03-19 23:03:19 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-03-19 23:03:19 -0400
commit5f95d21fb6f2aaa52830e5b7fb405f6c71d3ab85 (patch)
tree383159c3d591279fcfae9695d64819f71636fd4b /fs/ext4
parent3339578f05787259917788f461f4196b7349c2a4 (diff)
ext4: rewrite punch hole to use ext4_ext_remove_space()
This commit rewrites ext4 punch hole implementation to use ext4_ext_remove_space() instead of its home gown way of doing this via ext4_ext_map_blocks(). There are several reasons for changing this. Firstly it is quite non obvious that punching hole needs to ext4_ext_map_blocks() to punch a hole, especially given that this function should map blocks, not unmap it. It also required a lot of new code in ext4_ext_map_blocks(). Secondly the design of it is not very effective. The reason is that we are trying to punch out blocks in ext4_ext_punch_hole() in opposite direction than in ext4_ext_rm_leaf() which causes the ext4_ext_rm_leaf() to iterate through the whole tree from the end to the start to find the requested extent for every extent we are going to punch out. And finally the current implementation does not use the existing code, but bring a lot of new code, which is IMO unnecessary since there already is some infrastructure we can use. Specifically ext4_ext_remove_space(). This commit changes ext4_ext_remove_space() to accept 'end' parameter so we can not only truncate to the end of file, but also remove the space in the middle of the file (punch a hole). Moreover, because the last block to punch out, might be in the middle of the extent, we have to split the extent at 'end + 1' so ext4_ext_rm_leaf() can easily either remove the whole fist part of split extent, or change its size. ext4_ext_remove_space() is then used to actually remove the space (extents) from within the hole, instead of ext4_ext_map_blocks(). Note that this also fix the issue with punch hole, where we would forget to remove empty index blocks from the extent tree, resulting in double free block error and file system corruption. This is simply because we now use different code path, where this problem does not exist. This has been tested with fsx running for several days and xfstests, plus xfstest #251 with '-o discard' run on the loop image (which converts discard requestes into punch hole to the backing file). All of it on 1K and 4K file system block size. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/extents.c170
1 files changed, 88 insertions, 82 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4394a757aa4c..4726a3a447a5 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,14 @@
44 44
45#include <trace/events/ext4.h> 45#include <trace/events/ext4.h>
46 46
47/*
48 * used by extent splitting.
49 */
50#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
51 due to ENOSPC */
52#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
53#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
54
47static int ext4_split_extent(handle_t *handle, 55static int ext4_split_extent(handle_t *handle,
48 struct inode *inode, 56 struct inode *inode,
49 struct ext4_ext_path *path, 57 struct ext4_ext_path *path,
@@ -51,6 +59,13 @@ static int ext4_split_extent(handle_t *handle,
51 int split_flag, 59 int split_flag,
52 int flags); 60 int flags);
53 61
62static int ext4_split_extent_at(handle_t *handle,
63 struct inode *inode,
64 struct ext4_ext_path *path,
65 ext4_lblk_t split,
66 int split_flag,
67 int flags);
68
54static int ext4_ext_truncate_extend_restart(handle_t *handle, 69static int ext4_ext_truncate_extend_restart(handle_t *handle,
55 struct inode *inode, 70 struct inode *inode,
56 int needed) 71 int needed)
@@ -2310,7 +2325,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2310 struct ext4_extent *ex; 2325 struct ext4_extent *ex;
2311 2326
2312 /* the header must be checked already in ext4_ext_remove_space() */ 2327 /* the header must be checked already in ext4_ext_remove_space() */
2313 ext_debug("truncate since %u in leaf\n", start); 2328 ext_debug("truncate since %u in leaf to %u\n", start, end);
2314 if (!path[depth].p_hdr) 2329 if (!path[depth].p_hdr)
2315 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); 2330 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2316 eh = path[depth].p_hdr; 2331 eh = path[depth].p_hdr;
@@ -2345,7 +2360,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2345 ext_debug(" border %u:%u\n", a, b); 2360 ext_debug(" border %u:%u\n", a, b);
2346 2361
2347 /* If this extent is beyond the end of the hole, skip it */ 2362 /* If this extent is beyond the end of the hole, skip it */
2348 if (end <= ex_ee_block) { 2363 if (end < ex_ee_block) {
2349 ex--; 2364 ex--;
2350 ex_ee_block = le32_to_cpu(ex->ee_block); 2365 ex_ee_block = le32_to_cpu(ex->ee_block);
2351 ex_ee_len = ext4_ext_get_actual_len(ex); 2366 ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2484,7 +2499,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
2484 return 1; 2499 return 1;
2485} 2500}
2486 2501
2487static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) 2502static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2503 ext4_lblk_t end)
2488{ 2504{
2489 struct super_block *sb = inode->i_sb; 2505 struct super_block *sb = inode->i_sb;
2490 int depth = ext_depth(inode); 2506 int depth = ext_depth(inode);
@@ -2493,7 +2509,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2493 handle_t *handle; 2509 handle_t *handle;
2494 int i, err; 2510 int i, err;
2495 2511
2496 ext_debug("truncate since %u\n", start); 2512 ext_debug("truncate since %u to %u\n", start, end);
2497 2513
2498 /* probably first extent we're gonna free will be last in block */ 2514 /* probably first extent we're gonna free will be last in block */
2499 handle = ext4_journal_start(inode, depth + 1); 2515 handle = ext4_journal_start(inode, depth + 1);
@@ -2506,6 +2522,61 @@ again:
2506 trace_ext4_ext_remove_space(inode, start, depth); 2522 trace_ext4_ext_remove_space(inode, start, depth);
2507 2523
2508 /* 2524 /*
2525 * Check if we are removing extents inside the extent tree. If that
2526 * is the case, we are going to punch a hole inside the extent tree
2527 * so we have to check whether we need to split the extent covering
2528 * the last block to remove so we can easily remove the part of it
2529 * in ext4_ext_rm_leaf().
2530 */
2531 if (end < EXT_MAX_BLOCKS - 1) {
2532 struct ext4_extent *ex;
2533 ext4_lblk_t ee_block;
2534
2535 /* find extent for this block */
2536 path = ext4_ext_find_extent(inode, end, NULL);
2537 if (IS_ERR(path)) {
2538 ext4_journal_stop(handle);
2539 return PTR_ERR(path);
2540 }
2541 depth = ext_depth(inode);
2542 ex = path[depth].p_ext;
2543 if (!ex)
2544 goto cont;
2545
2546 ee_block = le32_to_cpu(ex->ee_block);
2547
2548 /*
2549 * See if the last block is inside the extent, if so split
2550 * the extent at 'end' block so we can easily remove the
2551 * tail of the first part of the split extent in
2552 * ext4_ext_rm_leaf().
2553 */
2554 if (end >= ee_block &&
2555 end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
2556 int split_flag = 0;
2557
2558 if (ext4_ext_is_uninitialized(ex))
2559 split_flag = EXT4_EXT_MARK_UNINIT1 |
2560 EXT4_EXT_MARK_UNINIT2;
2561
2562 /*
2563 * Split the extent in two so that 'end' is the last
2564 * block in the first new extent
2565 */
2566 err = ext4_split_extent_at(handle, inode, path,
2567 end + 1, split_flag,
2568 EXT4_GET_BLOCKS_PRE_IO |
2569 EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
2570
2571 if (err < 0)
2572 goto out;
2573 }
2574 ext4_ext_drop_refs(path);
2575 kfree(path);
2576 }
2577cont:
2578
2579 /*
2509 * We start scanning from right side, freeing all the blocks 2580 * We start scanning from right side, freeing all the blocks
2510 * after i_size and walking into the tree depth-wise. 2581 * after i_size and walking into the tree depth-wise.
2511 */ 2582 */
@@ -2517,6 +2588,7 @@ again:
2517 } 2588 }
2518 path[0].p_depth = depth; 2589 path[0].p_depth = depth;
2519 path[0].p_hdr = ext_inode_hdr(inode); 2590 path[0].p_hdr = ext_inode_hdr(inode);
2591
2520 if (ext4_ext_check(inode, path[0].p_hdr, depth)) { 2592 if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
2521 err = -EIO; 2593 err = -EIO;
2522 goto out; 2594 goto out;
@@ -2528,7 +2600,7 @@ again:
2528 /* this is leaf block */ 2600 /* this is leaf block */
2529 err = ext4_ext_rm_leaf(handle, inode, path, 2601 err = ext4_ext_rm_leaf(handle, inode, path,
2530 &partial_cluster, start, 2602 &partial_cluster, start,
2531 EXT_MAX_BLOCKS - 1); 2603 end);
2532 /* root level has p_bh == NULL, brelse() eats this */ 2604 /* root level has p_bh == NULL, brelse() eats this */
2533 brelse(path[i].p_bh); 2605 brelse(path[i].p_bh);
2534 path[i].p_bh = NULL; 2606 path[i].p_bh = NULL;
@@ -2711,14 +2783,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2711} 2783}
2712 2784
2713/* 2785/*
2714 * used by extent splitting.
2715 */
2716#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
2717 due to ENOSPC */
2718#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
2719#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
2720
2721/*
2722 * ext4_split_extent_at() splits an extent at given block. 2786 * ext4_split_extent_at() splits an extent at given block.
2723 * 2787 *
2724 * @handle: the journal handle 2788 * @handle: the journal handle
@@ -4230,7 +4294,7 @@ void ext4_ext_truncate(struct inode *inode)
4230 4294
4231 last_block = (inode->i_size + sb->s_blocksize - 1) 4295 last_block = (inode->i_size + sb->s_blocksize - 1)
4232 >> EXT4_BLOCK_SIZE_BITS(sb); 4296 >> EXT4_BLOCK_SIZE_BITS(sb);
4233 err = ext4_ext_remove_space(inode, last_block); 4297 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4234 4298
4235 /* In a multi-transaction truncate, we only make the final 4299 /* In a multi-transaction truncate, we only make the final
4236 * transaction synchronous. 4300 * transaction synchronous.
@@ -4707,14 +4771,12 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4707{ 4771{
4708 struct inode *inode = file->f_path.dentry->d_inode; 4772 struct inode *inode = file->f_path.dentry->d_inode;
4709 struct super_block *sb = inode->i_sb; 4773 struct super_block *sb = inode->i_sb;
4710 struct ext4_ext_cache cache_ex; 4774 ext4_lblk_t first_block, stop_block;
4711 ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
4712 struct address_space *mapping = inode->i_mapping; 4775 struct address_space *mapping = inode->i_mapping;
4713 struct ext4_map_blocks map;
4714 handle_t *handle; 4776 handle_t *handle;
4715 loff_t first_page, last_page, page_len; 4777 loff_t first_page, last_page, page_len;
4716 loff_t first_page_offset, last_page_offset; 4778 loff_t first_page_offset, last_page_offset;
4717 int ret, credits, blocks_released, err = 0; 4779 int credits, err = 0;
4718 4780
4719 /* No need to punch hole beyond i_size */ 4781 /* No need to punch hole beyond i_size */
4720 if (offset >= inode->i_size) 4782 if (offset >= inode->i_size)
@@ -4730,10 +4792,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4730 offset; 4792 offset;
4731 } 4793 }
4732 4794
4733 first_block = (offset + sb->s_blocksize - 1) >>
4734 EXT4_BLOCK_SIZE_BITS(sb);
4735 last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4736
4737 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 4795 first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
4738 last_page = (offset + length) >> PAGE_CACHE_SHIFT; 4796 last_page = (offset + length) >> PAGE_CACHE_SHIFT;
4739 4797
@@ -4812,7 +4870,6 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4812 } 4870 }
4813 } 4871 }
4814 4872
4815
4816 /* 4873 /*
4817 * If i_size is contained in the last page, we need to 4874 * If i_size is contained in the last page, we need to
4818 * unmap and zero the partial page after i_size 4875 * unmap and zero the partial page after i_size
@@ -4832,73 +4889,22 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4832 } 4889 }
4833 } 4890 }
4834 4891
4892 first_block = (offset + sb->s_blocksize - 1) >>
4893 EXT4_BLOCK_SIZE_BITS(sb);
4894 stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
4895
4835 /* If there are no blocks to remove, return now */ 4896 /* If there are no blocks to remove, return now */
4836 if (first_block >= last_block) 4897 if (first_block >= stop_block)
4837 goto out; 4898 goto out;
4838 4899
4839 down_write(&EXT4_I(inode)->i_data_sem); 4900 down_write(&EXT4_I(inode)->i_data_sem);
4840 ext4_ext_invalidate_cache(inode); 4901 ext4_ext_invalidate_cache(inode);
4841 ext4_discard_preallocations(inode); 4902 ext4_discard_preallocations(inode);
4842 4903
4843 /* 4904 err = ext4_ext_remove_space(inode, first_block, stop_block - 1);
4844 * Loop over all the blocks and identify blocks
4845 * that need to be punched out
4846 */
4847 iblock = first_block;
4848 blocks_released = 0;
4849 while (iblock < last_block) {
4850 max_blocks = last_block - iblock;
4851 num_blocks = 1;
4852 memset(&map, 0, sizeof(map));
4853 map.m_lblk = iblock;
4854 map.m_len = max_blocks;
4855 ret = ext4_ext_map_blocks(handle, inode, &map,
4856 EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
4857
4858 if (ret > 0) {
4859 blocks_released += ret;
4860 num_blocks = ret;
4861 } else if (ret == 0) {
4862 /*
4863 * If map blocks could not find the block,
4864 * then it is in a hole. If the hole was
4865 * not already cached, then map blocks should
4866 * put it in the cache. So we can get the hole
4867 * out of the cache
4868 */
4869 memset(&cache_ex, 0, sizeof(cache_ex));
4870 if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
4871 !cache_ex.ec_start) {
4872 4905
4873 /* The hole is cached */ 4906 ext4_ext_invalidate_cache(inode);
4874 num_blocks = cache_ex.ec_block + 4907 ext4_discard_preallocations(inode);
4875 cache_ex.ec_len - iblock;
4876
4877 } else {
4878 /* The block could not be identified */
4879 err = -EIO;
4880 break;
4881 }
4882 } else {
4883 /* Map blocks error */
4884 err = ret;
4885 break;
4886 }
4887
4888 if (num_blocks == 0) {
4889 /* This condition should never happen */
4890 ext_debug("Block lookup failed");
4891 err = -EIO;
4892 break;
4893 }
4894
4895 iblock += num_blocks;
4896 }
4897
4898 if (blocks_released > 0) {
4899 ext4_ext_invalidate_cache(inode);
4900 ext4_discard_preallocations(inode);
4901 }
4902 4908
4903 if (IS_SYNC(inode)) 4909 if (IS_SYNC(inode))
4904 ext4_handle_sync(handle); 4910 ext4_handle_sync(handle);