aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2012-11-28 12:32:26 -0500
committerTheodore Ts'o <tytso@mit.edu>2012-11-28 12:32:26 -0500
commit91dd8c114499e9818f2d5919ef0b9eee61810220 (patch)
tree5eab15bc469385b207f3e4613cdc47d7078338ca /fs/ext4/extents.c
parentf3b59291a69d0b734be1fc8be489fef2dd846d3d (diff)
ext4: prevent race while walking extent tree for fiemap
Currently ext4_ext_walk_space() only takes i_data_sem for read when searching for the extent at given block with ext4_ext_find_extent(). Then it drops the lock and the extent tree can be changed at will. However later on we're searching for the 'next' extent, but the extent tree might already have changed, so the information might not be accurate. In fact we can hit BUG_ON(end <= start) if the extent got inserted into the tree after the one we found and before the block we were searching for. This has been reproduced by running xfstests 225 in loop on s390x architecture, but theoretically we could hit this on any other architecture as well, but probably not as often. Moreover the extent currently in delayed allocation might be allocated after we search the extent tree and before we search extent status tree delayed buffers resulting in those delayed buffers being completely missed, even though completely written and allocated. We fix all those problems in several steps: 1. remove unnecessary callback indirection 2. rename functions ext4_ext_walk_space -> ext4_fill_fiemap_extents ext4_ext_fiemap_cb -> ext4_find_delayed_extent 3. move fiemap_fill_next_extent() into ext4_fill_fiemap_extents() 4. hold the i_data_sem for: ext4_ext_find_extent() ext4_ext_next_allocated_block() ext4_find_delayed_extent() 5. call fiemap_fill_next_extent after releasing the i_data_sem 6. move path reinitialization into the critical section. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c136
1 files changed, 76 insertions, 60 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index d3dd6182c07a..fbe7dc284240 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -109,6 +109,9 @@ static int ext4_split_extent_at(handle_t *handle,
109 int split_flag, 109 int split_flag,
110 int flags); 110 int flags);
111 111
112static int ext4_find_delayed_extent(struct inode *inode,
113 struct ext4_ext_cache *newex);
114
112static int ext4_ext_truncate_extend_restart(handle_t *handle, 115static int ext4_ext_truncate_extend_restart(handle_t *handle,
113 struct inode *inode, 116 struct inode *inode,
114 int needed) 117 int needed)
@@ -1959,27 +1962,33 @@ cleanup:
1959 return err; 1962 return err;
1960} 1963}
1961 1964
1962static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, 1965static int ext4_fill_fiemap_extents(struct inode *inode,
1963 ext4_lblk_t num, ext_prepare_callback func, 1966 ext4_lblk_t block, ext4_lblk_t num,
1964 void *cbdata) 1967 struct fiemap_extent_info *fieinfo)
1965{ 1968{
1966 struct ext4_ext_path *path = NULL; 1969 struct ext4_ext_path *path = NULL;
1967 struct ext4_ext_cache cbex; 1970 struct ext4_ext_cache cbex;
1968 struct ext4_extent *ex; 1971 struct ext4_extent *ex;
1969 ext4_lblk_t next, start = 0, end = 0; 1972 ext4_lblk_t next, next_del, start = 0, end = 0;
1970 ext4_lblk_t last = block + num; 1973 ext4_lblk_t last = block + num;
1971 int depth, exists, err = 0; 1974 int exists, depth = 0, err = 0;
1972 1975 unsigned int flags = 0;
1973 BUG_ON(func == NULL); 1976 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
1974 BUG_ON(inode == NULL);
1975 1977
1976 while (block < last && block != EXT_MAX_BLOCKS) { 1978 while (block < last && block != EXT_MAX_BLOCKS) {
1977 num = last - block; 1979 num = last - block;
1978 /* find extent for this block */ 1980 /* find extent for this block */
1979 down_read(&EXT4_I(inode)->i_data_sem); 1981 down_read(&EXT4_I(inode)->i_data_sem);
1982
1983 if (path && ext_depth(inode) != depth) {
1984 /* depth was changed. we have to realloc path */
1985 kfree(path);
1986 path = NULL;
1987 }
1988
1980 path = ext4_ext_find_extent(inode, block, path); 1989 path = ext4_ext_find_extent(inode, block, path);
1981 up_read(&EXT4_I(inode)->i_data_sem);
1982 if (IS_ERR(path)) { 1990 if (IS_ERR(path)) {
1991 up_read(&EXT4_I(inode)->i_data_sem);
1983 err = PTR_ERR(path); 1992 err = PTR_ERR(path);
1984 path = NULL; 1993 path = NULL;
1985 break; 1994 break;
@@ -1987,13 +1996,16 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1987 1996
1988 depth = ext_depth(inode); 1997 depth = ext_depth(inode);
1989 if (unlikely(path[depth].p_hdr == NULL)) { 1998 if (unlikely(path[depth].p_hdr == NULL)) {
1999 up_read(&EXT4_I(inode)->i_data_sem);
1990 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); 2000 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
1991 err = -EIO; 2001 err = -EIO;
1992 break; 2002 break;
1993 } 2003 }
1994 ex = path[depth].p_ext; 2004 ex = path[depth].p_ext;
1995 next = ext4_ext_next_allocated_block(path); 2005 next = ext4_ext_next_allocated_block(path);
2006 ext4_ext_drop_refs(path);
1996 2007
2008 flags = 0;
1997 exists = 0; 2009 exists = 0;
1998 if (!ex) { 2010 if (!ex) {
1999 /* there is no extent yet, so try to allocate 2011 /* there is no extent yet, so try to allocate
@@ -2037,30 +2049,54 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
2037 cbex.ec_block = le32_to_cpu(ex->ee_block); 2049 cbex.ec_block = le32_to_cpu(ex->ee_block);
2038 cbex.ec_len = ext4_ext_get_actual_len(ex); 2050 cbex.ec_len = ext4_ext_get_actual_len(ex);
2039 cbex.ec_start = ext4_ext_pblock(ex); 2051 cbex.ec_start = ext4_ext_pblock(ex);
2052 if (ext4_ext_is_uninitialized(ex))
2053 flags |= FIEMAP_EXTENT_UNWRITTEN;
2040 } 2054 }
2041 2055
2056 /*
2057 * Find delayed extent and update cbex accordingly. We call
2058 * it even in !exists case to find out whether cbex is the
2059 * last existing extent or not.
2060 */
2061 next_del = ext4_find_delayed_extent(inode, &cbex);
2062 if (!exists && next_del) {
2063 exists = 1;
2064 flags |= FIEMAP_EXTENT_DELALLOC;
2065 }
2066 up_read(&EXT4_I(inode)->i_data_sem);
2067
2042 if (unlikely(cbex.ec_len == 0)) { 2068 if (unlikely(cbex.ec_len == 0)) {
2043 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); 2069 EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
2044 err = -EIO; 2070 err = -EIO;
2045 break; 2071 break;
2046 } 2072 }
2047 err = func(inode, next, &cbex, ex, cbdata);
2048 ext4_ext_drop_refs(path);
2049
2050 if (err < 0)
2051 break;
2052 2073
2053 if (err == EXT_REPEAT) 2074 /* This is possible iff next == next_del == EXT_MAX_BLOCKS */
2054 continue; 2075 if (next == next_del) {
2055 else if (err == EXT_BREAK) { 2076 flags |= FIEMAP_EXTENT_LAST;
2056 err = 0; 2077 if (unlikely(next_del != EXT_MAX_BLOCKS ||
2057 break; 2078 next != EXT_MAX_BLOCKS)) {
2079 EXT4_ERROR_INODE(inode,
2080 "next extent == %u, next "
2081 "delalloc extent = %u",
2082 next, next_del);
2083 err = -EIO;
2084 break;
2085 }
2058 } 2086 }
2059 2087
2060 if (ext_depth(inode) != depth) { 2088 if (exists) {
2061 /* depth was changed. we have to realloc path */ 2089 err = fiemap_fill_next_extent(fieinfo,
2062 kfree(path); 2090 (__u64)cbex.ec_block << blksize_bits,
2063 path = NULL; 2091 (__u64)cbex.ec_start << blksize_bits,
2092 (__u64)cbex.ec_len << blksize_bits,
2093 flags);
2094 if (err < 0)
2095 break;
2096 if (err == 1) {
2097 err = 0;
2098 break;
2099 }
2064 } 2100 }
2065 2101
2066 block = cbex.ec_block + cbex.ec_len; 2102 block = cbex.ec_block + cbex.ec_len;
@@ -4493,26 +4529,23 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
4493} 4529}
4494 4530
4495/* 4531/*
4496 * Callback function called for each extent to gather FIEMAP information. 4532 * If newex is not existing extent (newex->ec_start equals zero) find
4533 * delayed extent at start of newex and update newex accordingly and
4534 * return start of the next delayed extent.
4535 *
4536 * If newex is existing extent (newex->ec_start is not equal zero)
4537 * return start of next delayed extent or EXT_MAX_BLOCKS if no delayed
4538 * extent found. Leave newex unmodified.
4497 */ 4539 */
4498static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next, 4540static int ext4_find_delayed_extent(struct inode *inode,
4499 struct ext4_ext_cache *newex, struct ext4_extent *ex, 4541 struct ext4_ext_cache *newex)
4500 void *data)
4501{ 4542{
4502 struct extent_status es; 4543 struct extent_status es;
4503 __u64 logical;
4504 __u64 physical;
4505 __u64 length;
4506 __u32 flags = 0;
4507 ext4_lblk_t next_del; 4544 ext4_lblk_t next_del;
4508 int ret = 0;
4509 struct fiemap_extent_info *fieinfo = data;
4510 unsigned char blksize_bits;
4511 4545
4512 es.start = newex->ec_block; 4546 es.start = newex->ec_block;
4513 next_del = ext4_es_find_extent(inode, &es); 4547 next_del = ext4_es_find_extent(inode, &es);
4514 4548
4515 next = min(next_del, next);
4516 if (newex->ec_start == 0) { 4549 if (newex->ec_start == 0) {
4517 /* 4550 /*
4518 * No extent in extent-tree contains block @newex->ec_start, 4551 * No extent in extent-tree contains block @newex->ec_start,
@@ -4520,37 +4553,19 @@ static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
4520 */ 4553 */
4521 if (es.len == 0) 4554 if (es.len == 0)
4522 /* A hole found. */ 4555 /* A hole found. */
4523 return EXT_CONTINUE; 4556 return 0;
4524 4557
4525 if (es.start > newex->ec_block) { 4558 if (es.start > newex->ec_block) {
4526 /* A hole found. */ 4559 /* A hole found. */
4527 newex->ec_len = min(es.start - newex->ec_block, 4560 newex->ec_len = min(es.start - newex->ec_block,
4528 newex->ec_len); 4561 newex->ec_len);
4529 return EXT_CONTINUE; 4562 return 0;
4530 } 4563 }
4531 4564
4532 flags |= FIEMAP_EXTENT_DELALLOC;
4533 newex->ec_len = es.start + es.len - newex->ec_block; 4565 newex->ec_len = es.start + es.len - newex->ec_block;
4534 } 4566 }
4535 4567
4536 if (ex && ext4_ext_is_uninitialized(ex)) 4568 return next_del;
4537 flags |= FIEMAP_EXTENT_UNWRITTEN;
4538
4539 if (next == EXT_MAX_BLOCKS)
4540 flags |= FIEMAP_EXTENT_LAST;
4541
4542 blksize_bits = inode->i_sb->s_blocksize_bits;
4543 logical = (__u64)newex->ec_block << blksize_bits;
4544 physical = (__u64)newex->ec_start << blksize_bits;
4545 length = (__u64)newex->ec_len << blksize_bits;
4546
4547 ret = fiemap_fill_next_extent(fieinfo, logical, physical,
4548 length, flags);
4549 if (ret < 0)
4550 return ret;
4551 if (ret == 1)
4552 return EXT_BREAK;
4553 return EXT_CONTINUE;
4554} 4569}
4555/* fiemap flags we can handle specified here */ 4570/* fiemap flags we can handle specified here */
4556#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 4571#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
@@ -4772,6 +4787,7 @@ out_mutex:
4772 mutex_unlock(&inode->i_mutex); 4787 mutex_unlock(&inode->i_mutex);
4773 return err; 4788 return err;
4774} 4789}
4790
4775int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4791int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4776 __u64 start, __u64 len) 4792 __u64 start, __u64 len)
4777{ 4793{
@@ -4799,11 +4815,11 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4799 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; 4815 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
4800 4816
4801 /* 4817 /*
4802 * Walk the extent tree gathering extent information. 4818 * Walk the extent tree gathering extent information
4803 * ext4_ext_fiemap_cb will push extents back to user. 4819 * and pushing extents back to the user.
4804 */ 4820 */
4805 error = ext4_ext_walk_space(inode, start_blk, len_blks, 4821 error = ext4_fill_fiemap_extents(inode, start_blk,
4806 ext4_ext_fiemap_cb, fieinfo); 4822 len_blks, fieinfo);
4807 } 4823 }
4808 4824
4809 return error; 4825 return error;