From b720303df7352d4a7a1f61e467e0a124913c0d41 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Wed, 12 May 2010 00:00:00 -0400 Subject: ext4: fix memory leaks in error path handling of ext4_ext_zeroout() When EIO occurs after bio is submitted, there is no memory free operation for bio, which results in memory leakage. And there is also no check against bio_alloc() for bio. Acked-by: Dave Kleikamp Signed-off-by: Jing Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 236b834b4ca8..228eeaf2dccf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2544,7 +2544,7 @@ static void bi_complete(struct bio *bio, int error) /* FIXME!! we need to try to merge to left or right after zero-out */ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { - int ret = -EIO; + int ret; struct bio *bio; int blkbits, blocksize; sector_t ee_pblock; @@ -2568,6 +2568,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) len = ee_len; bio = bio_alloc(GFP_NOIO, len); + if (!bio) + return -ENOMEM; + bio->bi_sector = ee_pblock; bio->bi_bdev = inode->i_sb->s_bdev; @@ -2595,17 +2598,15 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) submit_bio(WRITE, bio); wait_for_completion(&event); - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - ret = 0; - else { - ret = -EIO; - break; + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + bio_put(bio); + return -EIO; } bio_put(bio); ee_len -= done; ee_pblock += done << (blkbits - 9); } - return ret; + return 0; } #define EXT4_EXT_ZERO_LEN 7 -- cgit v1.2.2 From 21ca087a3891efab4d45488db8febee474d26c68 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 16 May 2010 06:00:00 -0400 Subject: ext4: Do not zero out uninitialized extents beyond i_size The extents code will sometimes zero out blocks and mark them as initialized instead of splitting an extent into several smaller ones. This optimization however, causes problems if the extent is beyond i_size because fsck will complain if there are uninitialized blocks after i_size as this can not be distinguished from an inode that has an incorrect i_size field. https://bugzilla.kernel.org/show_bug.cgi?id=15742 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 67 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 16 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 228eeaf2dccf..ee611daf0748 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2631,11 +2631,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; struct ext4_extent_header *eh; - ext4_lblk_t ee_block; + ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; ext4_fsblk_t newblock; int err = 0; int ret = 0; + int may_zeroout; + + ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" + "block %llu, max_blocks %u\n", inode->i_ino, + (unsigned long long)iblock, max_blocks); + + eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; + if (eof_block < iblock + max_blocks) + eof_block = iblock + max_blocks; depth = ext_depth(inode); eh = path[depth].p_hdr; @@ -2644,16 +2654,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (iblock - ee_block); newblock = iblock - ee_block + ext_pblock(ex); + ex2 = ex; orig_ex.ee_block = ex->ee_block; orig_ex.ee_len = cpu_to_le16(ee_len); ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); + /* + * It is safe to convert extent to initialized via explicit + * zeroout only if extent is fully insde i_size or new_size. + */ + may_zeroout = ee_block + ee_len <= eof_block; + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ - if (ee_len <= 2*EXT4_EXT_ZERO_LEN) { + if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2684,7 +2701,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (allocated > max_blocks) { unsigned int newdepth; /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ - if (allocated <= EXT4_EXT_ZERO_LEN) { + if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { /* * iblock == ee_block is handled by the zerouout * at the beginning. @@ -2760,7 +2777,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2784,8 +2801,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * update the extent length after successful insert of the * split extent */ - orig_ex.ee_len = cpu_to_le16(ee_len - - ext4_ext_get_actual_len(ex3)); + ee_len -= ext4_ext_get_actual_len(ex3); + orig_ex.ee_len = cpu_to_le16(ee_len); + may_zeroout = ee_block + ee_len <= eof_block; + depth = newdepth; ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, iblock, path); @@ -2809,7 +2828,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * otherwise give the extent a chance to merge to left */ if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - iblock != ee_block) { + iblock != ee_block && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2878,7 +2897,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex, 0); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2938,14 +2957,21 @@ static int ext4_split_unwritten_extents(handle_t *handle, struct ext4_extent *ex2 = NULL; struct ext4_extent *ex3 = NULL; struct ext4_extent_header *eh; - ext4_lblk_t ee_block; + ext4_lblk_t ee_block, eof_block; unsigned int allocated, ee_len, depth; ext4_fsblk_t newblock; int err = 0; + int may_zeroout; + + ext_debug("ext4_split_unwritten_extents: inode %lu, logical" + "block %llu, max_blocks %u\n", inode->i_ino, + (unsigned long long)iblock, max_blocks); + + eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> + inode->i_sb->s_blocksize_bits; + if (eof_block < iblock + max_blocks) + eof_block = iblock + max_blocks; - ext_debug("ext4_split_unwritten_extents: inode %lu," - "iblock %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)iblock, max_blocks); depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; @@ -2953,11 +2979,18 @@ static int ext4_split_unwritten_extents(handle_t *handle, ee_len = ext4_ext_get_actual_len(ex); allocated = ee_len - (iblock - ee_block); newblock = iblock - ee_block + ext_pblock(ex); + ex2 = ex; orig_ex.ee_block = ex->ee_block; orig_ex.ee_len = cpu_to_le16(ee_len); ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); + /* + * It is safe to convert extent to initialized via explicit + * zeroout only if extent is fully insde i_size or new_size. + */ + may_zeroout = ee_block + ee_len <= eof_block; + /* * If the uninitialized extent begins at the same logical * block where the write begins, and the write completely @@ -2992,7 +3025,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ex3->ee_len = cpu_to_le16(allocated - max_blocks); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -3016,8 +3049,10 @@ static int ext4_split_unwritten_extents(handle_t *handle, * update the extent length after successful insert of the * split extent */ - orig_ex.ee_len = cpu_to_le16(ee_len - - ext4_ext_get_actual_len(ex3)); + ee_len -= ext4_ext_get_actual_len(ex3); + orig_ex.ee_len = cpu_to_le16(ee_len); + may_zeroout = ee_block + ee_len <= eof_block; + depth = newdepth; ext4_ext_drop_refs(path); path = ext4_ext_find_extent(inode, iblock, path); @@ -3063,7 +3098,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, goto out; insert: err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); - if (err == -ENOSPC) { + if (err == -ENOSPC && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; -- cgit v1.2.2 From 6d19c42b7cf81c39632b6d4dbc514e8449bcd346 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Sun, 16 May 2010 14:00:00 -0400 Subject: ext4: Prevent creation of files larger than RLIMIT_FSIZE using fallocate Currently using posix_fallocate one can bypass an RLIMIT_FSIZE limit and create a file larger than the limit. Add a check for that. Signed-off-by: Nikanth Karthikesan Signed-off-by: Amit Arora Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ee611daf0748..8a8f9f0be911 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3708,6 +3708,11 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) */ credits = ext4_chunk_trans_blocks(inode, max_blocks); mutex_lock(&inode->i_mutex); + ret = inode_newsize_ok(inode, (len + offset)); + if (ret) { + mutex_unlock(&inode->i_mutex); + return ret; + } retry: while (ret >= 0 && ret < max_blocks) { block = block + ret; -- cgit v1.2.2 From e35fd6609b2fee54484d520deccb8f18bf7d38f3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 19:00:00 -0400 Subject: ext4: Add new abstraction ext4_map_blocks() underneath ext4_get_blocks() Jack up ext4_get_blocks() and add a new function, ext4_map_blocks() which uses a much smaller structure, struct ext4_map_blocks which is 20 bytes, as opposed to a struct buffer_head, which nearly 5 times bigger on an x86_64 machine. By switching things to use ext4_map_blocks(), we can save stack space by using ext4_map_blocks() since we can avoid allocating a struct buffer_head on the stack. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 237 ++++++++++++++++++++++++++---------------------------- 1 file changed, 114 insertions(+), 123 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8a8f9f0be911..37f938789344 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2611,7 +2611,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) #define EXT4_EXT_ZERO_LEN 7 /* - * This function is called by ext4_ext_get_blocks() if someone tries to write + * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized * extent into multiple extents (upto three - one initialized and two * uninitialized). @@ -2621,10 +2621,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * c> Splits in three extents: Somone is writing in middle of the extent */ static int ext4_ext_convert_to_initialized(handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path, - ext4_lblk_t iblock, - unsigned int max_blocks) + struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path) { struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex1 = NULL; @@ -2640,20 +2639,20 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)iblock, max_blocks); + (unsigned long long)map->m_lblk, map->m_len); eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; - if (eof_block < iblock + max_blocks) - eof_block = iblock + max_blocks; + if (eof_block < map->m_lblk + map->m_len) + eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - allocated = ee_len - (iblock - ee_block); - newblock = iblock - ee_block + ext_pblock(ex); + allocated = ee_len - (map->m_lblk - ee_block); + newblock = map->m_lblk - ee_block + ext_pblock(ex); ex2 = ex; orig_ex.ee_block = ex->ee_block; @@ -2683,10 +2682,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, return allocated; } - /* ex1: ee_block to iblock - 1 : uninitialized */ - if (iblock > ee_block) { + /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ + if (map->m_lblk > ee_block) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } @@ -2695,15 +2694,15 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, * we insert ex3, if ex1 is NULL. This is to avoid temporary * overlap of blocks. */ - if (!ex1 && allocated > max_blocks) - ex2->ee_len = cpu_to_le16(max_blocks); + if (!ex1 && allocated > map->m_len) + ex2->ee_len = cpu_to_le16(map->m_len); /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > max_blocks) { + if (allocated > map->m_len) { unsigned int newdepth; /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) { /* - * iblock == ee_block is handled by the zerouout + * map->m_lblk == ee_block is handled by the zerouout * at the beginning. * Mark first half uninitialized. * Mark second half initialized and zero out the @@ -2716,7 +2715,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_dirty(handle, inode, path + depth); ex3 = &newex; - ex3->ee_block = cpu_to_le32(iblock); + ex3->ee_block = cpu_to_le32(map->m_lblk); ext4_ext_store_pblock(ex3, newblock); ex3->ee_len = cpu_to_le16(allocated); err = ext4_ext_insert_extent(handle, inode, path, @@ -2729,7 +2728,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } else if (err) @@ -2751,8 +2750,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ depth = ext_depth(inode); ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, - iblock, path); + path = ext4_ext_find_extent(inode, map->m_lblk, + path); if (IS_ERR(path)) { err = PTR_ERR(path); return err; @@ -2772,9 +2771,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, return allocated; } ex3 = &newex; - ex3->ee_block = cpu_to_le32(iblock + max_blocks); - ext4_ext_store_pblock(ex3, newblock + max_blocks); - ex3->ee_len = cpu_to_le16(allocated - max_blocks); + ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); + ext4_ext_store_pblock(ex3, newblock + map->m_len); + ex3->ee_len = cpu_to_le16(allocated - map->m_len); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, 0); if (err == -ENOSPC && may_zeroout) { @@ -2787,7 +2786,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zeroed the full extent */ - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } else if (err) @@ -2807,7 +2806,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, depth = newdepth; ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, iblock, path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; @@ -2821,14 +2820,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (err) goto out; - allocated = max_blocks; + allocated = map->m_len; /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying * to insert a extent in the middle zerout directly * otherwise give the extent a chance to merge to left */ if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN && - iblock != ee_block && may_zeroout) { + map->m_lblk != ee_block && may_zeroout) { err = ext4_ext_zeroout(inode, &orig_ex); if (err) goto fix_extent_len; @@ -2838,7 +2837,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zero out the first half */ - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } } @@ -2849,12 +2848,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ if (ex1 && ex1 != ex) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } - /* ex2: iblock to iblock + maxblocks-1 : initialised */ - ex2->ee_block = cpu_to_le32(iblock); + /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */ + ex2->ee_block = cpu_to_le32(map->m_lblk); ext4_ext_store_pblock(ex2, newblock); ex2->ee_len = cpu_to_le16(allocated); if (ex2 != ex) @@ -2924,7 +2923,7 @@ fix_extent_len: } /* - * This function is called by ext4_ext_get_blocks() from + * This function is called by ext4_ext_map_blocks() from * ext4_get_blocks_dio_write() when DIO to write * to an uninitialized extent. * @@ -2947,9 +2946,8 @@ fix_extent_len: */ static int ext4_split_unwritten_extents(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, struct ext4_ext_path *path, - ext4_lblk_t iblock, - unsigned int max_blocks, int flags) { struct ext4_extent *ex, newex, orig_ex; @@ -2965,20 +2963,20 @@ static int ext4_split_unwritten_extents(handle_t *handle, ext_debug("ext4_split_unwritten_extents: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)iblock, max_blocks); + (unsigned long long)map->m_lblk, map->m_len); eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits; - if (eof_block < iblock + max_blocks) - eof_block = iblock + max_blocks; + if (eof_block < map->m_lblk + map->m_len) + eof_block = map->m_lblk + map->m_len; depth = ext_depth(inode); eh = path[depth].p_hdr; ex = path[depth].p_ext; ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - allocated = ee_len - (iblock - ee_block); - newblock = iblock - ee_block + ext_pblock(ex); + allocated = ee_len - (map->m_lblk - ee_block); + newblock = map->m_lblk - ee_block + ext_pblock(ex); ex2 = ex; orig_ex.ee_block = ex->ee_block; @@ -2996,16 +2994,16 @@ static int ext4_split_unwritten_extents(handle_t *handle, * block where the write begins, and the write completely * covers the extent, then we don't need to split it. */ - if ((iblock == ee_block) && (allocated <= max_blocks)) + if ((map->m_lblk == ee_block) && (allocated <= map->m_len)) return allocated; err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; - /* ex1: ee_block to iblock - 1 : uninitialized */ - if (iblock > ee_block) { + /* ex1: ee_block to map->m_lblk - 1 : uninitialized */ + if (map->m_lblk > ee_block) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } @@ -3014,15 +3012,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, * we insert ex3, if ex1 is NULL. This is to avoid temporary * overlap of blocks. */ - if (!ex1 && allocated > max_blocks) - ex2->ee_len = cpu_to_le16(max_blocks); + if (!ex1 && allocated > map->m_len) + ex2->ee_len = cpu_to_le16(map->m_len); /* ex3: to ee_block + ee_len : uninitialised */ - if (allocated > max_blocks) { + if (allocated > map->m_len) { unsigned int newdepth; ex3 = &newex; - ex3->ee_block = cpu_to_le32(iblock + max_blocks); - ext4_ext_store_pblock(ex3, newblock + max_blocks); - ex3->ee_len = cpu_to_le16(allocated - max_blocks); + ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len); + ext4_ext_store_pblock(ex3, newblock + map->m_len); + ex3->ee_len = cpu_to_le16(allocated - map->m_len); ext4_ext_mark_uninitialized(ex3); err = ext4_ext_insert_extent(handle, inode, path, ex3, flags); if (err == -ENOSPC && may_zeroout) { @@ -3035,7 +3033,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zeroed the full extent */ - /* blocks available from iblock */ + /* blocks available from map->m_lblk */ return allocated; } else if (err) @@ -3055,7 +3053,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, depth = newdepth; ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, iblock, path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); if (IS_ERR(path)) { err = PTR_ERR(path); goto out; @@ -3069,7 +3067,7 @@ static int ext4_split_unwritten_extents(handle_t *handle, if (err) goto out; - allocated = max_blocks; + allocated = map->m_len; } /* * If there was a change of depth as part of the @@ -3078,15 +3076,15 @@ static int ext4_split_unwritten_extents(handle_t *handle, */ if (ex1 && ex1 != ex) { ex1 = ex; - ex1->ee_len = cpu_to_le16(iblock - ee_block); + ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block); ext4_ext_mark_uninitialized(ex1); ex2 = &newex; } /* - * ex2: iblock to iblock + maxblocks-1 : to be direct IO written, - * uninitialised still. + * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written + * using direct I/O, uninitialised still. */ - ex2->ee_block = cpu_to_le32(iblock); + ex2->ee_block = cpu_to_le32(map->m_lblk); ext4_ext_store_pblock(ex2, newblock); ex2->ee_len = cpu_to_le16(allocated); ext4_ext_mark_uninitialized(ex2); @@ -3188,10 +3186,9 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, unsigned int max_blocks, + struct ext4_map_blocks *map, struct ext4_ext_path *path, int flags, - unsigned int allocated, struct buffer_head *bh_result, - ext4_fsblk_t newblock) + unsigned int allocated, ext4_fsblk_t newblock) { int ret = 0; int err = 0; @@ -3199,15 +3196,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" "block %llu, max_blocks %u, flags %d, allocated %u", - inode->i_ino, (unsigned long long)iblock, max_blocks, + inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, flags, allocated); ext4_ext_show_leaf(inode, path); /* get_block() before submit the IO, split the extent */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - ret = ext4_split_unwritten_extents(handle, - inode, path, iblock, - max_blocks, flags); + ret = ext4_split_unwritten_extents(handle, inode, map, + path, flags); /* * Flag the inode(non aio case) or end_io struct (aio case) * that this IO needs to convertion to written when IO is @@ -3218,7 +3214,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); if (ext4_should_dioread_nolock(inode)) - set_buffer_uninit(bh_result); + map->m_flags |= EXT4_MAP_UNINIT; goto out; } /* IO end_io complete, convert the filled extent to written */ @@ -3246,14 +3242,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * the buffer head will be unmapped so that * a read from the block returns 0s. */ - set_buffer_unwritten(bh_result); + map->m_flags |= EXT4_MAP_UNWRITTEN; goto out1; } /* buffered write, writepage time, convert*/ - ret = ext4_ext_convert_to_initialized(handle, inode, - path, iblock, - max_blocks); + ret = ext4_ext_convert_to_initialized(handle, inode, map, path); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); out: @@ -3262,7 +3256,7 @@ out: goto out2; } else allocated = ret; - set_buffer_new(bh_result); + map->m_flags |= EXT4_MAP_NEW; /* * if we allocated more blocks than requested * we need to make sure we unmap the extra block @@ -3270,11 +3264,11 @@ out: * unmapped later when we find the buffer_head marked * new. */ - if (allocated > max_blocks) { + if (allocated > map->m_len) { unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, - newblock + max_blocks, - allocated - max_blocks); - allocated = max_blocks; + newblock + map->m_len, + allocated - map->m_len); + allocated = map->m_len; } /* @@ -3288,13 +3282,13 @@ out: ext4_da_update_reserve_space(inode, allocated, 0); map_out: - set_buffer_mapped(bh_result); + map->m_flags |= EXT4_MAP_MAPPED; out1: - if (allocated > max_blocks) - allocated = max_blocks; + if (allocated > map->m_len) + allocated = map->m_len; ext4_ext_show_leaf(inode, path); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; + map->m_pblk = newblock; + map->m_len = allocated; out2: if (path) { ext4_ext_drop_refs(path); @@ -3320,10 +3314,8 @@ out2: * * return < 0, error case. */ -int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, - ext4_lblk_t iblock, - unsigned int max_blocks, struct buffer_head *bh_result, - int flags) +int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, + struct ext4_map_blocks *map, int flags) { struct ext4_ext_path *path = NULL; struct ext4_extent_header *eh; @@ -3334,12 +3326,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; - __clear_bit(BH_New, &bh_result->b_state); ext_debug("blocks %u/%u requested for inode %lu\n", - iblock, max_blocks, inode->i_ino); + map->m_lblk, map->m_len, inode->i_ino); /* check in cache */ - cache_type = ext4_ext_in_cache(inode, iblock, &newex); + cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); if (cache_type) { if (cache_type == EXT4_EXT_CACHE_GAP) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { @@ -3352,12 +3343,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* we should allocate requested block */ } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { /* block is already allocated */ - newblock = iblock + newblock = map->m_lblk - le32_to_cpu(newex.ee_block) + ext_pblock(&newex); /* number of remaining blocks in the extent */ allocated = ext4_ext_get_actual_len(&newex) - - (iblock - le32_to_cpu(newex.ee_block)); + (map->m_lblk - le32_to_cpu(newex.ee_block)); goto out; } else { BUG(); @@ -3365,7 +3356,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, } /* find extent for this block */ - path = ext4_ext_find_extent(inode, iblock, NULL); + path = ext4_ext_find_extent(inode, map->m_lblk, NULL); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -3382,7 +3373,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address " "iblock: %d, depth: %d pblock %lld", - iblock, depth, path[depth].p_block); + map->m_lblk, depth, path[depth].p_block); err = -EIO; goto out2; } @@ -3400,12 +3391,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ - if (in_range(iblock, ee_block, ee_len)) { - newblock = iblock - ee_block + ee_start; + if (in_range(map->m_lblk, ee_block, ee_len)) { + newblock = map->m_lblk - ee_block + ee_start; /* number of remaining blocks in the extent */ - allocated = ee_len - (iblock - ee_block); - ext_debug("%u fit into %u:%d -> %llu\n", iblock, - ee_block, ee_len, newblock); + allocated = ee_len - (map->m_lblk - ee_block); + ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, + ee_block, ee_len, newblock); /* Do not put uninitialized extent in the cache */ if (!ext4_ext_is_uninitialized(ex)) { @@ -3415,8 +3406,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goto out; } ret = ext4_ext_handle_uninitialized_extents(handle, - inode, iblock, max_blocks, path, - flags, allocated, bh_result, newblock); + inode, map, path, flags, allocated, + newblock); return ret; } } @@ -3430,7 +3421,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * put just found gap into cache to speed up * subsequent requests */ - ext4_ext_put_gap_in_cache(inode, path, iblock); + ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); goto out2; } /* @@ -3438,11 +3429,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ /* find neighbour allocated blocks */ - ar.lleft = iblock; + ar.lleft = map->m_lblk; err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft); if (err) goto out2; - ar.lright = iblock; + ar.lright = map->m_lblk; err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright); if (err) goto out2; @@ -3453,26 +3444,26 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is * EXT_UNINIT_MAX_LEN. */ - if (max_blocks > EXT_INIT_MAX_LEN && + if (map->m_len > EXT_INIT_MAX_LEN && !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) - max_blocks = EXT_INIT_MAX_LEN; - else if (max_blocks > EXT_UNINIT_MAX_LEN && + map->m_len = EXT_INIT_MAX_LEN; + else if (map->m_len > EXT_UNINIT_MAX_LEN && (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) - max_blocks = EXT_UNINIT_MAX_LEN; + map->m_len = EXT_UNINIT_MAX_LEN; - /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ - newex.ee_block = cpu_to_le32(iblock); - newex.ee_len = cpu_to_le16(max_blocks); + /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ + newex.ee_block = cpu_to_le32(map->m_lblk); + newex.ee_len = cpu_to_le16(map->m_len); err = ext4_ext_check_overlap(inode, &newex, path); if (err) allocated = ext4_ext_get_actual_len(&newex); else - allocated = max_blocks; + allocated = map->m_len; /* allocate new block */ ar.inode = inode; - ar.goal = ext4_ext_find_goal(inode, path, iblock); - ar.logical = iblock; + ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); + ar.logical = map->m_lblk; ar.len = allocated; if (S_ISREG(inode->i_mode)) ar.flags = EXT4_MB_HINT_DATA; @@ -3506,7 +3497,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, EXT4_STATE_DIO_UNWRITTEN); } if (ext4_should_dioread_nolock(inode)) - set_buffer_uninit(bh_result); + map->m_flags |= EXT4_MAP_UNINIT; } if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { @@ -3518,7 +3509,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goto out2; } last_ex = EXT_LAST_EXTENT(eh); - if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) + if (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + ext4_ext_get_actual_len(last_ex)) EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; } @@ -3536,9 +3527,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); - if (allocated > max_blocks) - allocated = max_blocks; - set_buffer_new(bh_result); + if (allocated > map->m_len) + allocated = map->m_len; + map->m_flags |= EXT4_MAP_NEW; /* * Update reserved blocks/metadata blocks after successful @@ -3552,18 +3543,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * when it is _not_ an uninitialized extent. */ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { - ext4_ext_put_in_cache(inode, iblock, allocated, newblock, + ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, EXT4_EXT_CACHE_EXTENT); ext4_update_inode_fsync_trans(handle, inode, 1); } else ext4_update_inode_fsync_trans(handle, inode, 0); out: - if (allocated > max_blocks) - allocated = max_blocks; + if (allocated > map->m_len) + allocated = map->m_len; ext4_ext_show_leaf(inode, path); - set_buffer_mapped(bh_result); - bh_result->b_bdev = inode->i_sb->s_bdev; - bh_result->b_blocknr = newblock; + map->m_flags |= EXT4_MAP_MAPPED; + map->m_pblk = newblock; + map->m_len = allocated; out2: if (path) { ext4_ext_drop_refs(path); @@ -3729,7 +3720,7 @@ retry: if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); - printk(KERN_ERR "%s: ext4_ext_get_blocks " + printk(KERN_ERR "%s: ext4_ext_map_blocks " "returned error inode#%lu, block=%u, " "max_blocks=%u", __func__, inode->i_ino, block, max_blocks); @@ -3806,7 +3797,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); - printk(KERN_ERR "%s: ext4_ext_get_blocks " + printk(KERN_ERR "%s: ext4_ext_map_blocks " "returned error inode#%lu, block=%u, " "max_blocks=%u", __func__, inode->i_ino, block, max_blocks); -- cgit v1.2.2 From 2ed886852adfcb070bf350e66a0da0d98b2f3ab5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 20:00:00 -0400 Subject: ext4: Convert callers of ext4_get_blocks() to use ext4_map_blocks() This saves a huge amount of stack space by avoiding unnecesary struct buffer_head's from being allocated on the stack. In addition, to make the code easier to understand, collapse and refactor ext4_get_block(), ext4_get_block_write(), noalloc_get_block_write(), into a single function. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 37f938789344..d6801eb7232b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3667,13 +3667,12 @@ static void ext4_falloc_update_inode(struct inode *inode, long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { handle_t *handle; - ext4_lblk_t block; loff_t new_size; unsigned int max_blocks; int ret = 0; int ret2 = 0; int retries = 0; - struct buffer_head map_bh; + struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; /* @@ -3687,13 +3686,13 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) if (S_ISDIR(inode->i_mode)) return -ENODEV; - block = offset >> blkbits; + map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because * If blocksize = 4096 offset = 3072 and len = 2048 */ max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - block; + - map.m_lblk; /* * credits to insert 1 extent into extent tree */ @@ -3706,16 +3705,14 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) } retry: while (ret >= 0 && ret < max_blocks) { - block = block + ret; - max_blocks = max_blocks - ret; + map.m_lblk = map.m_lblk + ret; + map.m_len = max_blocks = max_blocks - ret; handle = ext4_journal_start(inode, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); break; } - map_bh.b_state = 0; - ret = ext4_get_blocks(handle, inode, block, - max_blocks, &map_bh, + ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); if (ret <= 0) { #ifdef EXT4FS_DEBUG @@ -3729,14 +3726,14 @@ retry: ret2 = ext4_journal_stop(handle); break; } - if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len, + if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, blkbits) >> blkbits)) new_size = offset + len; else - new_size = (block + ret) << blkbits; + new_size = (map.m_lblk + ret) << blkbits; ext4_falloc_update_inode(inode, mode, new_size, - buffer_new(&map_bh)); + (map.m_flags & EXT4_MAP_NEW)); ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); if (ret2) @@ -3765,42 +3762,39 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, ssize_t len) { handle_t *handle; - ext4_lblk_t block; unsigned int max_blocks; int ret = 0; int ret2 = 0; - struct buffer_head map_bh; + struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; - block = offset >> blkbits; + map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because * If blocksize = 4096 offset = 3072 and len = 2048 */ - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - - block; + max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - + map.m_lblk); /* * credits to insert 1 extent into extent tree */ credits = ext4_chunk_trans_blocks(inode, max_blocks); while (ret >= 0 && ret < max_blocks) { - block = block + ret; - max_blocks = max_blocks - ret; + map.m_lblk += ret; + map.m_len = (max_blocks -= ret); handle = ext4_journal_start(inode, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); break; } - map_bh.b_state = 0; - ret = ext4_get_blocks(handle, inode, block, - max_blocks, &map_bh, + ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); printk(KERN_ERR "%s: ext4_ext_map_blocks " "returned error inode#%lu, block=%u, " "max_blocks=%u", __func__, - inode->i_ino, block, max_blocks); + inode->i_ino, map.m_lblk, map.m_len); } ext4_mark_inode_dirty(handle, inode); ret2 = ext4_journal_stop(handle); -- cgit v1.2.2 From 24676da469f50f433baa347845639662c561d1f6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 21:00:00 -0400 Subject: ext4: Convert calls of ext4_error() to EXT4_ERROR_INODE() EXT4_ERROR_INODE() tends to provide better error information and in a more consistent format. Some errors were not even identifying the inode or directory which was corrupted, which made them not very useful. Addresses-Google-Bug: #2507977 Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d6801eb7232b..91c3873970e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -439,10 +439,10 @@ static int __ext4_ext_check(const char *function, struct inode *inode, return 0; corrupted: - __ext4_error(inode->i_sb, function, - "bad header/extent in inode #%lu: %s - magic %x, " + ext4_error_inode(function, inode, + "bad header/extent: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", - inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), + error_msg, le16_to_cpu(eh->eh_magic), le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), max, le16_to_cpu(eh->eh_depth), depth); @@ -1622,9 +1622,7 @@ int ext4_ext_try_to_merge(struct inode *inode, merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) - ext4_error(inode->i_sb, - "inode#%lu, eh->eh_entries = 0!", - inode->i_ino); + EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!"); } return merge_done; -- cgit v1.2.2 From 12e9b892002d9af057655d35b44db8ee9243b0dc Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sun, 16 May 2010 22:00:00 -0400 Subject: ext4: Use bitops to read/modify i_flags in struct ext4_inode_info At several places we modify EXT4_I(inode)->i_flags without holding i_mutex (ext4_do_update_inode, ...). These modifications are racy and we can lose updates to i_flags. So convert handling of i_flags to use bitops which are atomic. https://bugzilla.kernel.org/show_bug.cgi?id=15792 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91c3873970e4..42d8ce91adbd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3498,7 +3498,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, map->m_flags |= EXT4_MAP_UNINIT; } - if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { + if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { if (unlikely(!eh->eh_entries)) { EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 ee_block %d", @@ -3509,7 +3509,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, last_ex = EXT_LAST_EXTENT(eh); if (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + ext4_ext_get_actual_len(last_ex)) - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { @@ -3650,7 +3650,7 @@ static void ext4_falloc_update_inode(struct inode *inode, * can proceed even if the new size is the same as i_size. */ if (new_size > i_size_read(inode)) - EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; + ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } } @@ -3677,7 +3677,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) * currently supporting (pre)allocate mode for extent-based * files _only_ */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; /* preallocation to directories is currently not supported */ @@ -3922,7 +3922,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int error = 0; /* fallback to generic here if not in extents fmt */ - if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return generic_block_fiemap(inode, fieinfo, start, len, ext4_get_block); -- cgit v1.2.2 From f70f362b4a6fe47c239dbfb3efc0cc2c10e4f09c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 16 May 2010 23:00:00 -0400 Subject: ext4: Avoid crashing on NULL ptr dereference on a filesystem error If the EOFBLOCK_FL flag is set when it should not be and the inode is zero length, then eh_entries is zero, and ex is NULL, so dereferencing ex to print ex->ee_block causes a kernel OOPS in ext4_ext_map_blocks(). On top of that, the error message which is printed isn't very helpful. So we fix this by printing something more explanatory which doesn't involve trying to print ex->ee_block. Addresses-Google-Bug: #2655740 Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 42d8ce91adbd..e40d2b793f2d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3370,8 +3370,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ if (unlikely(path[depth].p_ext == NULL && depth != 0)) { EXT4_ERROR_INODE(inode, "bad extent address " - "iblock: %d, depth: %d pblock %lld", - map->m_lblk, depth, path[depth].p_block); + "lblock: %lu, depth: %d pblock %lld", + (unsigned long) map->m_lblk, depth, + path[depth].p_block); err = -EIO; goto out2; } @@ -3501,8 +3502,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) { if (unlikely(!eh->eh_entries)) { EXT4_ERROR_INODE(inode, - "eh->eh_entries == 0 ee_block %d", - ex->ee_block); + "eh->eh_entries == 0 and " + "EOFBLOCKS_FL set"); err = -EIO; goto out2; } -- cgit v1.2.2 From 786ec7915e530936b9eb2e3d12274145cab7aa7d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 17 May 2010 00:00:00 -0400 Subject: ext4: Clear the EXT4_EOFBLOCKS_FL flag only when warranted Dimitry Monakhov discovered an edge case where it was possible for the EXT4_EOFBLOCKS_FL flag could get cleared unnecessarily. This is true; I have a test case that can be exercised via downloading and decompressing the file: wget ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/ext4-testcases/eofblocks-fl-test-case.img.bz2 bunzip2 eofblocks-fl-test-case.img dd if=/dev/zero of=eofblocks-fl-test-case.img bs=1k seek=17925 bs=1k count=1 conv=notrunc However, triggering it in real life is highly unlikely since it requires an extremely fragmented sparse file with a hole in exactly the right place in the extent tree. (It actually took quite a bit of work to generate this test case.) Still, it's nice to get even extreme corner cases to be correct, so this patch makes sure that we don't clear the EXT4_EOFBLOCKS_FL incorrectly even in this corner case. Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e40d2b793f2d..ffcaa1137def 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3319,7 +3319,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent newex, *ex, *last_ex; ext4_fsblk_t newblock; - int err = 0, depth, ret, cache_type; + int i, err = 0, depth, ret, cache_type; unsigned int allocated = 0; struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; @@ -3508,8 +3508,20 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, goto out2; } last_ex = EXT_LAST_EXTENT(eh); - if (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) - + ext4_ext_get_actual_len(last_ex)) + /* + * If the current leaf block was reached by looking at + * the last index block all the way down the tree, and + * we are extending the inode beyond the last extent + * in the current leaf block, then clear the + * EOFBLOCKS_FL flag. + */ + for (i = depth-1; i >= 0; i--) { + if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr)) + break; + } + if ((i < 0) && + (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) + + ext4_ext_get_actual_len(last_ex))) ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); -- cgit v1.2.2 From 0617b83fa239db9743a18ce6cc0e556f4d0fd567 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 17 May 2010 01:00:00 -0400 Subject: ext4: restart ext4_ext_remove_space() after transaction restart If i_data_sem was internally dropped due to transaction restart, it is necessary to restart path look-up because extents tree was possibly modified by ext4_get_block(). https://bugzilla.kernel.org/show_bug.cgi?id=15827 Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" Acked-by: Jan Kara --- fs/ext4/extents.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ffcaa1137def..0a47becf668a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -107,11 +107,8 @@ static int ext4_ext_truncate_extend_restart(handle_t *handle, if (err <= 0) return err; err = ext4_truncate_restart_trans(handle, inode, needed); - /* - * We have dropped i_data_sem so someone might have cached again - * an extent we are going to truncate. - */ - ext4_ext_invalidate_cache(inode); + if (err == 0) + err = -EAGAIN; return err; } @@ -2359,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) int depth = ext_depth(inode); struct ext4_ext_path *path; handle_t *handle; - int i = 0, err = 0; + int i, err; ext_debug("truncate since %u\n", start); @@ -2368,23 +2365,26 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) if (IS_ERR(handle)) return PTR_ERR(handle); +again: ext4_ext_invalidate_cache(inode); /* * We start scanning from right side, freeing all the blocks * after i_size and walking into the tree depth-wise. */ + depth = ext_depth(inode); path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS); if (path == NULL) { ext4_journal_stop(handle); return -ENOMEM; } + path[0].p_depth = depth; path[0].p_hdr = ext_inode_hdr(inode); if (ext4_ext_check(inode, path[0].p_hdr, depth)) { err = -EIO; goto out; } - path[0].p_depth = depth; + i = err = 0; while (i >= 0 && err == 0) { if (i == depth) { @@ -2478,6 +2478,8 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) out: ext4_ext_drop_refs(path); kfree(path); + if (err == -EAGAIN) + goto again; ext4_journal_stop(handle); return err; -- cgit v1.2.2 From 60e6679e28518ccd67169c4a539d8cc7490eb8a6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 17 May 2010 07:00:00 -0400 Subject: ext4: Drop whitespace at end of lines This patch was generated using: #!/usr/bin/perl -i while (<>) { s/[ ]+$//; print; } Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0a47becf668a..377309c1af65 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -182,10 +182,10 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) { /* * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME - * block groups per flexgroup, reserve the first block - * group for directories and special files. Regular + * block groups per flexgroup, reserve the first block + * group for directories and special files. Regular * files will start at the second block group. This - * tends to speed up directory access and improves + * tends to speed up directory access and improves * fsck times. */ block_group &= ~(flex_size-1); @@ -2034,7 +2034,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, struct ext4_ext_cache *cex; int ret = EXT4_EXT_CACHE_NO; - /* + /* * We borrow i_block_reservation_lock to protect i_cached_extent */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); -- cgit v1.2.2