From ca0c9584b1f16bd5911893647cb7f1be82e60554 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 3 Oct 2009 19:48:22 +0900 Subject: this_cpu: Straight transformations Use this_cpu_ptr and __this_cpu_ptr in locations where straight transformations are possible because per_cpu_ptr is used with either smp_processor_id() or raw_smp_processor_id(). cc: David Howells Acked-by: Tejun Heo cc: Ingo Molnar cc: Rusty Russell cc: Eric Dumazet Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..d527fd384582 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3932,7 +3932,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) * per cpu locality group is to reduce the contention between block * request from multiple CPUs. */ - ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); + ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups); /* we're going to use group allocation */ ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; -- cgit v1.2.2 From 2de770a406b06dfc619faabbf5d85c835ed3f2e1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:25:49 -0500 Subject: ext4: fix potential buffer head leak when add_dirent_to_buf() returns ENOSPC Previously add_dirent_to_buf() did not free its passed-in buffer head in the case of ENOSPC, since in some cases the caller still needed it. However, this led to potential buffer head leaks since not all callers dealt with this correctly. Fix this by making simplifying the freeing convention; now add_dirent_to_buf() *never* frees the passed-in buffer head, and leaves that to the responsibility of its caller. This makes things cleaner and easier to prove that the code is neither leaking buffer heads or calling brelse() one time too many. Signed-off-by: "Theodore Ts'o" Cc: Curt Wohlgemuth Cc: stable@kernel.org --- fs/ext4/namei.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6d2c1b897fc7..fde08c919d12 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1292,9 +1292,6 @@ errout: * add_dirent_to_buf will attempt search the directory block for * space. It will return -ENOSPC if no space is available, and -EIO * and -EEXIST if directory entry already exists. - * - * NOTE! bh is NOT released in the case where ENOSPC is returned. In - * all other cases bh is released. */ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, struct inode *inode, struct ext4_dir_entry_2 *de, @@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, top = bh->b_data + blocksize - reclen; while ((char *) de <= top) { if (!ext4_check_dir_entry("ext4_add_entry", dir, de, - bh, offset)) { - brelse(bh); + bh, offset)) return -EIO; - } - if (ext4_match(namelen, name, de)) { - brelse(bh); + if (ext4_match(namelen, name, de)) return -EEXIST; - } nlen = EXT4_DIR_REC_LEN(de->name_len); rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); if ((de->inode? rlen - nlen: rlen) >= reclen) @@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, err = ext4_journal_get_write_access(handle, bh); if (err) { ext4_std_error(dir->i_sb, err); - brelse(bh); return err; } @@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, err = ext4_handle_dirty_metadata(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); - brelse(bh); return 0; } @@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, if (!(de)) return retval; - return add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + brelse(bh); + return retval; } /* @@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if(!bh) return retval; retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (retval != -ENOSPC) + if (retval != -ENOSPC) { + brelse(bh); return retval; + } if (blocks == 1 && !dx_fallback && EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) @@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); - return add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + brelse(bh); + return retval; } /* @@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, goto journal_error; err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (err != -ENOSPC) { - bh = NULL; + if (err != -ENOSPC) goto cleanup; - } /* Block full, should compress but for now just split */ dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", @@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (!de) goto cleanup; err = add_dirent_to_buf(handle, dentry, inode, de, bh); - bh = NULL; goto cleanup; journal_error: -- cgit v1.2.2 From 503358ae01b70ce6909d19dd01287093f6b6271c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:24:46 -0500 Subject: ext4: avoid divide by zero when trying to mount a corrupted file system If s_log_groups_per_flex is greater than 31, then groups_per_flex will will overflow and cause a divide by zero error. This can cause kernel BUG if such a file system is mounted. Thanks to Nageswara R Sastry for analyzing the failure and providing an initial patch. http://bugzilla.kernel.org/show_bug.cgi?id=14287 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d4ca92aab514..8662b2e6e9f9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1673,14 +1673,14 @@ static int ext4_fill_flex_info(struct super_block *sb) size_t size; int i; - if (!sbi->s_es->s_log_groups_per_flex) { + sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; + groups_per_flex = 1 << sbi->s_log_groups_per_flex; + + if (groups_per_flex < 2) { sbi->s_log_groups_per_flex = 0; return 1; } - sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; - groups_per_flex = 1 << sbi->s_log_groups_per_flex; - /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << -- cgit v1.2.2 From f868a48d06f8886cb0367568a12367fa4f21ea0d Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Mon, 23 Nov 2009 07:25:48 -0500 Subject: ext4: fix the returned block count if EXT4_IOC_MOVE_EXT fails If the EXT4_IOC_MOVE_EXT ioctl fails, the number of blocks that were exchanged before the failure should be returned to the userspace caller. Unfortunately, currently if the block size is not the same as the page size, the returned block count that is returned is the page-aligned block count instead of the actual block count. This commit addresses this bug. Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 139 ++++++++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 66 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 25b6b1457360..83f8c9e47c60 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -661,6 +661,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * @donor_inode: donor inode * @from: block offset of orig_inode * @count: block count to be replaced + * @err: pointer to save return value * * Replace original inode extents and donor inode extents page by page. * We implement this replacement in the following three steps: @@ -671,19 +672,18 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * 3. Change the block information of donor inode to point at the saved * original inode blocks in the dummy extents. * - * Return 0 on success, or a negative error value on failure. + * Return replaced block count. */ static int mext_replace_branches(handle_t *handle, struct inode *orig_inode, struct inode *donor_inode, ext4_lblk_t from, - ext4_lblk_t count) + ext4_lblk_t count, int *err) { struct ext4_ext_path *orig_path = NULL; struct ext4_ext_path *donor_path = NULL; struct ext4_extent *oext, *dext; struct ext4_extent tmp_dext, tmp_oext; ext4_lblk_t orig_off = from, donor_off = from; - int err = 0; int depth; int replaced_count = 0; int dext_alen; @@ -691,13 +691,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, mext_double_down_write(orig_inode, donor_inode); /* Get the original extent for the block "orig_off" */ - err = get_ext_path(orig_inode, orig_off, &orig_path); - if (err) + *err = get_ext_path(orig_inode, orig_off, &orig_path); + if (*err) goto out; /* Get the donor extent for the head */ - err = get_ext_path(donor_inode, donor_off, &donor_path); - if (err) + *err = get_ext_path(donor_inode, donor_off, &donor_path); + if (*err) goto out; depth = ext_depth(orig_inode); oext = orig_path[depth].p_ext; @@ -707,9 +707,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, dext = donor_path[depth].p_ext; tmp_dext = *dext; - err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, donor_off, count); - if (err) + if (*err) goto out; /* Loop for the donor extents */ @@ -718,7 +718,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, if (!dext) { ext4_error(donor_inode->i_sb, __func__, "The extent for donor must be found"); - err = -EIO; + *err = -EIO; goto out; } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { ext4_error(donor_inode->i_sb, __func__, @@ -726,20 +726,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, "extent(%u) should be equal", donor_off, le32_to_cpu(tmp_dext.ee_block)); - err = -EIO; + *err = -EIO; goto out; } /* Set donor extent to orig extent */ - err = mext_leaf_block(handle, orig_inode, + *err = mext_leaf_block(handle, orig_inode, orig_path, &tmp_dext, &orig_off); - if (err < 0) + if (*err) goto out; /* Set orig extent to donor extent */ - err = mext_leaf_block(handle, donor_inode, + *err = mext_leaf_block(handle, donor_inode, donor_path, &tmp_oext, &donor_off); - if (err < 0) + if (*err) goto out; dext_alen = ext4_ext_get_actual_len(&tmp_dext); @@ -753,35 +753,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, if (orig_path) ext4_ext_drop_refs(orig_path); - err = get_ext_path(orig_inode, orig_off, &orig_path); - if (err) + *err = get_ext_path(orig_inode, orig_off, &orig_path); + if (*err) goto out; depth = ext_depth(orig_inode); oext = orig_path[depth].p_ext; - if (le32_to_cpu(oext->ee_block) + - ext4_ext_get_actual_len(oext) <= orig_off) { - err = 0; - goto out; - } tmp_oext = *oext; if (donor_path) ext4_ext_drop_refs(donor_path); - err = get_ext_path(donor_inode, donor_off, &donor_path); - if (err) + *err = get_ext_path(donor_inode, donor_off, &donor_path); + if (*err) goto out; depth = ext_depth(donor_inode); dext = donor_path[depth].p_ext; - if (le32_to_cpu(dext->ee_block) + - ext4_ext_get_actual_len(dext) <= donor_off) { - err = 0; - goto out; - } tmp_dext = *dext; - err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, donor_off, count - replaced_count); - if (err) + if (*err) goto out; } @@ -796,7 +786,7 @@ out: } mext_double_up_write(orig_inode, donor_inode); - return err; + return replaced_count; } /** @@ -808,16 +798,17 @@ out: * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped * @uninit: orig extent is uninitialized or not + * @err: pointer to save return value * * Save the data in original inode blocks and replace original inode extents * with donor inode extents by calling mext_replace_branches(). - * Finally, write out the saved data in new original inode blocks. Return 0 - * on success, or a negative error value on failure. + * Finally, write out the saved data in new original inode blocks. Return + * replaced block count. */ static int move_extent_per_page(struct file *o_filp, struct inode *donor_inode, pgoff_t orig_page_offset, int data_offset_in_page, - int block_len_in_page, int uninit) + int block_len_in_page, int uninit, int *err) { struct inode *orig_inode = o_filp->f_dentry->d_inode; struct address_space *mapping = orig_inode->i_mapping; @@ -829,9 +820,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, long long offs = orig_page_offset << PAGE_CACHE_SHIFT; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int w_flags = 0; - unsigned int tmp_data_len, data_len; + unsigned int tmp_data_size, data_size, replaced_size; void *fsdata; - int ret, i, jblocks; + int i, jblocks; + int err2 = 0; + int replaced_count = 0; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; /* @@ -841,8 +834,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; handle = ext4_journal_start(orig_inode, jblocks); if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - return ret; + *err = PTR_ERR(handle); + return 0; } if (segment_eq(get_fs(), KERNEL_DS)) @@ -858,9 +851,9 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, * Just swap data blocks between orig and donor. */ if (uninit) { - ret = mext_replace_branches(handle, orig_inode, - donor_inode, orig_blk_offset, - block_len_in_page); + replaced_count = mext_replace_branches(handle, orig_inode, + donor_inode, orig_blk_offset, + block_len_in_page, err); /* Clear the inode cache not to refer to the old data */ ext4_ext_invalidate_cache(orig_inode); @@ -870,27 +863,28 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, offs = (long long)orig_blk_offset << orig_inode->i_blkbits; - /* Calculate data_len */ + /* Calculate data_size */ if ((orig_blk_offset + block_len_in_page - 1) == ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { /* Replace the last block */ - tmp_data_len = orig_inode->i_size & (blocksize - 1); + tmp_data_size = orig_inode->i_size & (blocksize - 1); /* - * If data_len equal zero, it shows data_len is multiples of + * If data_size equal zero, it shows data_size is multiples of * blocksize. So we set appropriate value. */ - if (tmp_data_len == 0) - tmp_data_len = blocksize; + if (tmp_data_size == 0) + tmp_data_size = blocksize; - data_len = tmp_data_len + + data_size = tmp_data_size + ((block_len_in_page - 1) << orig_inode->i_blkbits); - } else { - data_len = block_len_in_page << orig_inode->i_blkbits; - } + } else + data_size = block_len_in_page << orig_inode->i_blkbits; + + replaced_size = data_size; - ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, + *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, &page, &fsdata); - if (unlikely(ret < 0)) + if (unlikely(*err < 0)) goto out; if (!PageUptodate(page)) { @@ -911,10 +905,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, /* Release old bh and drop refs */ try_to_release_page(page, 0); - ret = mext_replace_branches(handle, orig_inode, donor_inode, - orig_blk_offset, block_len_in_page); - if (ret < 0) - goto out; + replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, + orig_blk_offset, block_len_in_page, + &err2); + if (err2) { + if (replaced_count) { + block_len_in_page = replaced_count; + replaced_size = + block_len_in_page << orig_inode->i_blkbits; + } else + goto out; + } /* Clear the inode cache not to refer to the old data */ ext4_ext_invalidate_cache(orig_inode); @@ -928,16 +929,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, bh = bh->b_this_page; for (i = 0; i < block_len_in_page; i++) { - ret = ext4_get_block(orig_inode, + *err = ext4_get_block(orig_inode, (sector_t)(orig_blk_offset + i), bh, 0); - if (ret < 0) + if (*err < 0) goto out; if (bh->b_this_page != NULL) bh = bh->b_this_page; } - ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, + *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, page, fsdata); page = NULL; @@ -951,7 +952,10 @@ out: out2: ext4_journal_stop(handle); - return ret < 0 ? ret : 0; + if (err2) + *err = err2; + + return replaced_count; } /** @@ -1367,15 +1371,17 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, while (orig_page_offset <= seq_end_page) { /* Swap original branches with new branches */ - ret1 = move_extent_per_page(o_filp, donor_inode, + block_len_in_page = move_extent_per_page( + o_filp, donor_inode, orig_page_offset, data_offset_in_page, - block_len_in_page, uninit); - if (ret1 < 0) - goto out; - orig_page_offset++; + block_len_in_page, uninit, + &ret1); + /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; + if (ret1 < 0) + goto out; if (*moved_len > len) { ext4_error(orig_inode->i_sb, __func__, "We replaced blocks too much! " @@ -1385,6 +1391,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, goto out; } + orig_page_offset++; data_offset_in_page = 0; rest_blocks -= block_len_in_page; if (rest_blocks > blocks_per_page) -- cgit v1.2.2 From fc04cb49a898c372a22b21fffc47f299d8710801 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Mon, 23 Nov 2009 07:24:43 -0500 Subject: ext4: fix lock order problem in ext4_move_extents() ext4_move_extents() checks the logical block contiguousness of original file with ext4_find_extent() and mext_next_extent(). Therefore the extent which ext4_ext_path structure indicates must not be changed between above functions. But in current implementation, there is no i_data_sem protection between ext4_ext_find_extent() and mext_next_extent(). So the extent which ext4_ext_path structure indicates may be overwritten by delalloc. As a result, ext4_move_extents() will exchange wrong blocks between original and donor files. I change the place where acquire/release i_data_sem to solve this problem. Moreover, I changed move_extent_per_page() to start transaction first, and then acquire i_data_sem. Without this change, there is a possibility of the deadlock between mmap() and ext4_move_extents(): * NOTE: "A", "B" and "C" mean different processes A-1: ext4_ext_move_extents() acquires i_data_sem of two inodes. B: do_page_fault() starts the transaction (T), and then tries to acquire i_data_sem. But process "A" is already holding it, so it is kept waiting. C: While "A" and "B" running, kjournald2 tries to commit transaction (T) but it is under updating, so kjournald2 waits for it. A-2: Call ext4_journal_start with holding i_data_sem, but transaction (T) is locked. Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 117 +++++++++++++++++++++++--------------------------- 1 file changed, 53 insertions(+), 64 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 83f8c9e47c60..a7410b34c5ed 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -77,12 +77,14 @@ static int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent **extent) { + struct ext4_extent_header *eh; int ppos, leaf_ppos = path->p_depth; ppos = leaf_ppos; if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { /* leaf block */ *extent = ++path[ppos].p_ext; + path[ppos].p_block = ext_pblock(path[ppos].p_ext); return 0; } @@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, ext_block_hdr(path[cur_ppos+1].p_bh); } + path[leaf_ppos].p_ext = *extent = NULL; + + eh = path[leaf_ppos].p_hdr; + if (le16_to_cpu(eh->eh_entries) == 0) + /* empty leaf is found */ + return -ENODATA; + /* leaf block */ path[leaf_ppos].p_ext = *extent = EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); + path[leaf_ppos].p_block = + ext_pblock(path[leaf_ppos].p_ext); return 0; } } @@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, } /** - * mext_double_down_read - Acquire two inodes' read semaphore - * - * @orig_inode: original inode structure - * @donor_inode: donor inode structure - * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. - */ -static void -mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) -{ - struct inode *first = orig_inode, *second = donor_inode; - - /* - * Use the inode number to provide the stable locking order instead - * of its address, because the C language doesn't guarantee you can - * compare pointers that don't come from the same array. - */ - if (donor_inode->i_ino < orig_inode->i_ino) { - first = donor_inode; - second = orig_inode; - } - - down_read(&EXT4_I(first)->i_data_sem); - down_read(&EXT4_I(second)->i_data_sem); -} - -/** - * mext_double_down_write - Acquire two inodes' write semaphore + * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem * * @orig_inode: original inode structure * @donor_inode: donor inode structure - * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. + * Acquire write lock of i_data_sem of the two inodes (orig and donor) by + * i_ino order. */ static void -mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) +double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { struct inode *first = orig_inode, *second = donor_inode; @@ -207,28 +193,14 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) } /** - * mext_double_up_read - Release two inodes' read semaphore + * double_up_write_data_sem - Release two inodes' write lock of i_data_sem * * @orig_inode: original inode structure to be released its lock first * @donor_inode: donor inode structure to be released its lock second - * Release read semaphore of two inodes (orig and donor). + * Release write lock of i_data_sem of two inodes (orig and donor). */ static void -mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) -{ - up_read(&EXT4_I(orig_inode)->i_data_sem); - up_read(&EXT4_I(donor_inode)->i_data_sem); -} - -/** - * mext_double_up_write - Release two inodes' write semaphore - * - * @orig_inode: original inode structure to be released its lock first - * @donor_inode: donor inode structure to be released its lock second - * Release write semaphore of two inodes (orig and donor). - */ -static void -mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) +double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { up_write(&EXT4_I(orig_inode)->i_data_sem); up_write(&EXT4_I(donor_inode)->i_data_sem); @@ -688,8 +660,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, int replaced_count = 0; int dext_alen; - mext_double_down_write(orig_inode, donor_inode); - /* Get the original extent for the block "orig_off" */ *err = get_ext_path(orig_inode, orig_off, &orig_path); if (*err) @@ -785,7 +755,6 @@ out: kfree(donor_path); } - mext_double_up_write(orig_inode, donor_inode); return replaced_count; } @@ -851,6 +820,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, * Just swap data blocks between orig and donor. */ if (uninit) { + /* + * Protect extent trees against block allocations + * via delalloc + */ + double_down_write_data_sem(orig_inode, donor_inode); replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, orig_blk_offset, block_len_in_page, err); @@ -858,6 +832,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, /* Clear the inode cache not to refer to the old data */ ext4_ext_invalidate_cache(orig_inode); ext4_ext_invalidate_cache(donor_inode); + double_up_write_data_sem(orig_inode, donor_inode); goto out2; } @@ -905,6 +880,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, /* Release old bh and drop refs */ try_to_release_page(page, 0); + /* Protect extent trees against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, orig_blk_offset, block_len_in_page, &err2); @@ -913,14 +890,18 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, block_len_in_page = replaced_count; replaced_size = block_len_in_page << orig_inode->i_blkbits; - } else + } else { + double_up_write_data_sem(orig_inode, donor_inode); goto out; + } } /* Clear the inode cache not to refer to the old data */ ext4_ext_invalidate_cache(orig_inode); ext4_ext_invalidate_cache(donor_inode); + double_up_write_data_sem(orig_inode, donor_inode); + if (!page_has_buffers(page)) create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); @@ -1236,16 +1217,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } - /* protect orig and donor against a truncate */ + /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) return ret1; - mext_double_down_read(orig_inode, donor_inode); + /* Protect extent tree against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, donor_start, &len, *moved_len); - mext_double_up_read(orig_inode, donor_inode); if (ret1) goto out; @@ -1308,6 +1289,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ext4_ext_get_actual_len(ext_cur), block_end + 1) - max(le32_to_cpu(ext_cur->ee_block), block_start); + /* Discard preallocations of two inodes */ + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); + while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { seq_blocks += add_blocks; @@ -1359,14 +1344,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, seq_start = le32_to_cpu(ext_cur->ee_block); rest_blocks = seq_blocks; - /* Discard preallocations of two inodes */ - down_write(&EXT4_I(orig_inode)->i_data_sem); - ext4_discard_preallocations(orig_inode); - up_write(&EXT4_I(orig_inode)->i_data_sem); - - down_write(&EXT4_I(donor_inode)->i_data_sem); - ext4_discard_preallocations(donor_inode); - up_write(&EXT4_I(donor_inode)->i_data_sem); + /* + * Up semaphore to avoid following problems: + * a. transaction deadlock among ext4_journal_start, + * ->write_begin via pagefault, and jbd2_journal_commit + * b. racing with ->readpage, ->write_begin, and ext4_get_block + * in move_extent_per_page + */ + double_up_write_data_sem(orig_inode, donor_inode); while (orig_page_offset <= seq_end_page) { @@ -1381,14 +1366,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; if (ret1 < 0) - goto out; + break; if (*moved_len > len) { ext4_error(orig_inode->i_sb, __func__, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); ret1 = -EIO; - goto out; + break; } orig_page_offset++; @@ -1400,6 +1385,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, block_len_in_page = rest_blocks; } + double_down_write_data_sem(orig_inode, donor_inode); + if (ret1 < 0) + break; + /* Decrease buffer counter */ if (holecheck_path) ext4_ext_drop_refs(holecheck_path); @@ -1429,7 +1418,7 @@ out: ext4_ext_drop_refs(holecheck_path); kfree(holecheck_path); } - + double_up_write_data_sem(orig_inode, donor_inode); ret2 = mext_inode_double_unlock(orig_inode, donor_inode); if (ret1) -- cgit v1.2.2 From 49bd22bc4d603a2a4fc2a6a60e156cbea52eb494 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Mon, 23 Nov 2009 07:24:41 -0500 Subject: ext4: fix possible recursive locking warning in EXT4_IOC_MOVE_EXT If CONFIG_PROVE_LOCKING is enabled, the double_down_write_data_sem() will trigger a false-positive warning of a recursive lock. Since we take i_data_sem for the two inodes ordered by their inode numbers, this isn't a problem. Use of down_write_nested() will notify the lock dependency checker machinery that there is no problem here. This problem was reported by Brian Rogers: http://marc.info/?l=linux-ext4&m=125115356928011&w=1 Reported-by: Brian Rogers Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index a7410b34c5ed..2ca6aa3f34e6 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -189,7 +189,7 @@ double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) } down_write(&EXT4_I(first)->i_data_sem); - down_write(&EXT4_I(second)->i_data_sem); + down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); } /** -- cgit v1.2.2 From 92c28159dce22913aef6aa811ce6fb0f7f3790b1 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Mon, 23 Nov 2009 07:24:50 -0500 Subject: ext4: fix spelling typos in move_extent.c Fix a few spelling typos in move_extent.c Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 2ca6aa3f34e6..5a106e02fd9c 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -568,7 +568,7 @@ out: * @tmp_oext: the extent that will belong to the donor inode * @orig_off: block offset of original inode * @donor_off: block offset of donor inode - * @max_count: the maximun length of extents + * @max_count: the maximum length of extents * * Return 0 on success, or a negative error value on failure. */ @@ -1073,7 +1073,7 @@ mext_check_arguments(struct inode *orig_inode, } if (!*len) { - ext4_debug("ext4 move extent: len shoudld not be 0 " + ext4_debug("ext4 move extent: len should not be 0 " "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; -- cgit v1.2.2 From 567f3e9a70d71e5c9be03701b8578be77857293b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 14 Nov 2009 08:19:05 -0500 Subject: ext4: plug a buffer_head leak in an error path of ext4_iget() One of the invalid error paths in ext4_iget() forgot to brelse() the inode buffer head. Fix it by adding a brelse() in the common error return path, which also simplifies function. Thanks to Andi Kleen reporting the problem. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2c8caa51addb..554c6798597c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4781,7 +4781,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct ext4_iloc iloc; struct ext4_inode *raw_inode; struct ext4_inode_info *ei; - struct buffer_head *bh; struct inode *inode; long ret; int block; @@ -4793,11 +4792,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT4_I(inode); + iloc.bh = 0; ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) goto bad_inode; - bh = iloc.bh; raw_inode = ext4_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); @@ -4820,7 +4819,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (inode->i_mode == 0 || !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ - brelse(bh); ret = -ESTALE; goto bad_inode; } @@ -4852,7 +4850,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { - brelse(bh); ret = -EIO; goto bad_inode; } @@ -4905,10 +4902,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) /* Validate block references which are part of inode */ ret = ext4_check_inode_blockref(inode); } - if (ret) { - brelse(bh); + if (ret) goto bad_inode; - } if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; @@ -4936,7 +4931,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) init_special_inode(inode, inode->i_mode, new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } else { - brelse(bh); ret = -EIO; ext4_error(inode->i_sb, __func__, "bogus i_mode (%o) for inode=%lu", @@ -4949,6 +4943,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; bad_inode: + brelse(iloc.bh); iget_failed(inode); return ERR_PTR(ret); } -- cgit v1.2.2 From cf40db137cc2b2a1b3f6850247ac2b181d9d3847 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 21:00:01 -0500 Subject: ext4: remove failed journal checksum check Now that we are checking for failed journal checksums in the jbd2 layer, we don't need to check in the ext4 mount path --- since a checksum fail will result in ext4_load_journal() returning an error, causing the file system to refuse to be mounted until e2fsck can deal with the problem. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8662b2e6e9f9..04c66907b2fe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2721,26 +2721,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) goto failed_mount3; - if (!(sb->s_flags & MS_RDONLY) && - EXT4_SB(sb)->s_journal->j_failed_commit) { - ext4_msg(sb, KERN_CRIT, "error: " - "ext4_fill_super: Journal transaction " - "%u is corrupt", - EXT4_SB(sb)->s_journal->j_failed_commit); - if (test_opt(sb, ERRORS_RO)) { - ext4_msg(sb, KERN_CRIT, - "Mounting filesystem read-only"); - sb->s_flags |= MS_RDONLY; - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - } - if (test_opt(sb, ERRORS_PANIC)) { - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, 1); - goto failed_mount4; - } - } } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { ext4_msg(sb, KERN_ERR, "required journal recovery " -- cgit v1.2.2 From beac2da7565e42be59963824899825d0cc624295 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:25:08 -0500 Subject: ext4: add tracepoint for ext4_forget() Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 554c6798597c..13de1dd751f5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -89,6 +89,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, might_sleep(); + trace_ext4_forget(inode, is_metadata, blocknr); BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " -- cgit v1.2.2 From 50689696867d95b38d9c7be640a311494a04fb86 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:17:34 -0500 Subject: ext4: make sure directory and symlink blocks are revoked When an inode gets unlinked, the functions ext4_clear_blocks() and ext4_remove_blocks() call ext4_forget() for all the buffer heads corresponding to the deleted inode's data blocks. If the inode is a directory or a symlink, the is_metadata parameter must be non-zero so ext4_forget() will revoke them via jbd2_journal_revoke(). Otherwise, if these blocks are reused for a data file, and the system crashes before a journal checkpoint, the journal replay could end up corrupting these data blocks. Thanks to Curt Wohlgemuth for pointing out potential problems in this area. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/extents.c | 2 +- fs/ext4/inode.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 715264b4bae4..74dcff84c3a8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2074,7 +2074,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, ext_debug("free last %u blocks starting %llu\n", num, start); for (i = 0; i < num; i++) { bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, 0, inode, bh, start + i); + ext4_forget(handle, metadata, inode, bh, start + i); } ext4_free_blocks(handle, inode, start, num, metadata); } else if (from == le32_to_cpu(ex->ee_block) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 13de1dd751f5..c420aaba6e9c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4121,6 +4121,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *last) { __le32 *p; + int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode); + if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); @@ -4151,11 +4153,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, *p = 0; tbh = sb_find_get_block(inode->i_sb, nr); - ext4_forget(handle, 0, inode, tbh, nr); + ext4_forget(handle, is_metadata, inode, tbh, nr); } } - ext4_free_blocks(handle, inode, block_to_free, count, 0); + ext4_free_blocks(handle, inode, block_to_free, count, is_metadata); } /** -- cgit v1.2.2 From 30c6e07a92ea4cb87160d32ffa9bce172576ae4c Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 15 Nov 2009 15:30:58 -0500 Subject: ext4: fix i_flags access in ext4_da_writepages_trans_blocks() We need to be testing the i_flags field in the ext4 specific portion of the inode, instead of the (confusingly aliased) i_flags field in the generic struct inode. Signed-off-by: Julia Lawall Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c420aaba6e9c..9c097489af89 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2789,7 +2789,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) * number of contiguous block. So we will limit * number of contiguous block to a sane value */ - if (!(inode->i_flags & EXT4_EXTENTS_FL) && + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) && (max_blocks > EXT4_MAX_TRANS_DATA)) max_blocks = EXT4_MAX_TRANS_DATA; -- cgit v1.2.2 From 86ebfd08a1930ccedb8eac0aeb1ed4b8b6a41dbc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 15 Nov 2009 15:30:52 -0500 Subject: ext4: journal all modifications in ext4_xattr_set_handle ext4_xattr_set_handle() was zeroing out an inode outside of journaling constraints; this is one of the accesses that was causing the crc errors in journal replay as seen in kernel.org bugzilla #14354. Reviewed-by: Andreas Dilger Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/xattr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fed5b01d7a8d..025701926f9a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -988,6 +988,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (error) goto cleanup; + error = ext4_journal_get_write_access(handle, is.iloc.bh); + if (error) + goto cleanup; + if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); @@ -1013,9 +1017,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (flags & XATTR_CREATE) goto cleanup; } - error = ext4_journal_get_write_access(handle, is.iloc.bh); - if (error) - goto cleanup; if (!value) { if (!is.s.not_found) error = ext4_xattr_ibody_set(handle, inode, &i, &is); -- cgit v1.2.2 From 3f8fb9490efbd300887470a2a880a64e04dcc3f5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:24:52 -0500 Subject: ext4: don't update the superblock in ext4_statfs() commit a71ce8c6c9bf269b192f352ea555217815cf027e updated ext4_statfs() to update the on-disk superblock counters, but modified this buffer directly without any journaling of the change. This is one of the accesses that was causing the crc errors in journal replay as seen in kernel.org bugzilla #14354. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 04c66907b2fe..f2d5ec77c1e9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3648,13 +3648,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); - ext4_free_blocks_count_set(es, buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) buf->f_bavail = 0; buf->f_files = le32_to_cpu(es->s_inodes_count); buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); - es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT4_NAME_LEN; fsid = le64_to_cpup((void *)es->s_uuid) ^ le64_to_cpup((void *)es->s_uuid + sizeof(u64)); -- cgit v1.2.2 From 8dadb198cb70ef811916668fe67eeec82e8858dd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:24:38 -0500 Subject: ext4: fix uninit block bitmap initialization when s_meta_first_bg is non-zero The number of old-style block group descriptor blocks is s_meta_first_bg when the meta_bg feature flag is set. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/balloc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1d0418980f8d..f3032c919a22 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -761,7 +761,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, ext4_group_t group) { - return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; + if (!ext4_bg_has_super(sb, group)) + return 0; + + if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) + return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); + else + return EXT4_SB(sb)->s_gdb_count; } /** -- cgit v1.2.2 From 1032988c71f3f85483b2b4319684d1205a704c02 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 15 Nov 2009 15:29:56 -0500 Subject: ext4: fix block validity checks so they work correctly with meta_bg The block validity checks used by ext4_data_block_valid() wasn't correctly written to check file systems with the meta_bg feature. Fix this. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/block_validity.c | 2 +- fs/ext4/inode.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 50784ef07563..dc79b75d8f70 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb) if (ext4_bg_has_super(sb, i) && ((i < 5) || ((i % flex_size) == 0))) add_system_zone(sbi, ext4_group_first_block_no(sb, i), - sbi->s_gdb_count + 1); + ext4_bg_num_gdb(sb, i) + 1); gdp = ext4_get_group_desc(sb, i, NULL); ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); if (ret) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c097489af89..0c0ddc1401e4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4884,10 +4884,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && - ((ei->i_file_acl < - (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + - EXT4_SB(sb)->s_gdb_count)) || - (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { + !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { ext4_error(sb, __func__, "bad extended attribute block %llu in inode #%lu", ei->i_file_acl, inode->i_ino); -- cgit v1.2.2 From 6b17d902fdd241adfa4ce780df20547b28bf5801 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:24:57 -0500 Subject: ext4: avoid issuing unnecessary barriers We don't to issue an I/O barrier on an error or if we force commit because we are doing data journaling. Signed-off-by: "Theodore Ts'o" Cc: Jan Kara Cc: stable@kernel.org --- fs/ext4/fsync.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 2b1531266ee2..a3c25076aef1 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -60,7 +60,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) ret = flush_aio_dio_completed_IO(inode); if (ret < 0) - goto out; + return ret; /* * data=writeback: * The caller's filemap_fdatawrite()/wait will sync the data. @@ -79,10 +79,8 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) * (they were dirtied by commit). But that's OK - the blocks are * safe in-journal, which is all fsync() needs to ensure. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - goto out; - } + if (ext4_should_journal_data(inode)) + return ext4_force_commit(inode->i_sb); if (!journal) ret = sync_mapping_buffers(inode->i_mapping); -- cgit v1.2.2 From 2bba702d4f88d7b010ec37e2527b552588404ae7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 23 Nov 2009 07:24:48 -0500 Subject: ext4: fix error handling in ext4_ind_get_blocks() When an error happened in ext4_splice_branch we failed to notice that in ext4_ind_get_blocks and mapped the buffer anyway. Fix the problem by checking for error properly. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0c0ddc1401e4..3673ec7b1c98 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1022,7 +1022,7 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, if (!err) err = ext4_splice_branch(handle, inode, iblock, partial, indirect_blks, count); - else + if (err) goto cleanup; set_buffer_new(bh_result); -- cgit v1.2.2 From 5328e635315734d42080de9a5a1ee87bf4cae0a4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 19 Nov 2009 14:25:42 -0500 Subject: ext4: make trim/discard optional (and off by default) It is anticipated that when sb_issue_discard starts doing real work on trim-capable devices, we may see issues. Make this mount-time optional, and default it to off until we know that things are working out OK. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 + fs/ext4/mballoc.c | 21 +++++++++++++-------- fs/ext4/super.c | 14 +++++++++++++- 3 files changed, 27 insertions(+), 9 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8825515eeddd..05ce38b981cb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -750,6 +750,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ +#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */ #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define set_opt(o, opt) o |= EXT4_MOUNT_##opt diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..6e5a23a2cc25 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) struct ext4_group_info *db; int err, count = 0, count2 = 0; struct ext4_free_data *entry; - ext4_fsblk_t discard_block; struct list_head *l, *ltmp; list_for_each_safe(l, ltmp, &txn->t_private_list) { @@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) page_cache_release(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->group); - discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) - + entry->start_blk - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, - entry->count); - sb_issue_discard(sb, discard_block, entry->count); - + if (test_opt(sb, DISCARD)) { + ext4_fsblk_t discard_block; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + + discard_block = (ext4_fsblk_t)entry->group * + EXT4_BLOCKS_PER_GROUP(sb) + + entry->start_blk + + le32_to_cpu(es->s_first_data_block); + trace_ext4_discard_blocks(sb, + (unsigned long long)discard_block, + entry->count); + sb_issue_discard(sb, discard_block, entry->count); + } kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f2d5ec77c1e9..2b382f6f80c1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -899,6 +899,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NO_AUTO_DA_ALLOC)) seq_puts(seq, ",noauto_da_alloc"); + if (test_opt(sb, DISCARD)) + seq_puts(seq, ",discard"); + ext4_show_quota_options(seq, sb); return 0; @@ -1079,7 +1082,8 @@ enum { Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_block_validity, Opt_noblock_validity, - Opt_inode_readahead_blks, Opt_journal_ioprio + Opt_inode_readahead_blks, Opt_journal_ioprio, + Opt_discard, Opt_nodiscard, }; static const match_table_t tokens = { @@ -1144,6 +1148,8 @@ static const match_table_t tokens = { {Opt_auto_da_alloc, "auto_da_alloc=%u"}, {Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL}, }; @@ -1565,6 +1571,12 @@ set_qf_format: else set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; + case Opt_discard: + set_opt(sbi->s_mount_opt, DISCARD); + break; + case Opt_nodiscard: + clear_opt(sbi->s_mount_opt, DISCARD); + break; default: ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " -- cgit v1.2.2 From e3bb52ae2bb9573e84c17b8e3560378d13a5c798 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 19 Nov 2009 14:28:50 -0500 Subject: ext4: make "norecovery" an alias for "noload" Users on the linux-ext4 list recently complained about differences across filesystems w.r.t. how to mount without a journal replay. In the discussion it was noted that xfs's "norecovery" option is perhaps more descriptively accurate than "noload," so let's make that an alias for ext4. Also show this status in /proc/mounts Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2b382f6f80c1..5a2db612950b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -902,6 +902,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, DISCARD)) seq_puts(seq, ",discard"); + if (test_opt(sb, NOLOAD)) + seq_puts(seq, ",norecovery"); + ext4_show_quota_options(seq, sb); return 0; @@ -1108,6 +1111,7 @@ static const match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_noload, "noload"}, + {Opt_noload, "norecovery"}, {Opt_nobh, "nobh"}, {Opt_bh, "bh"}, {Opt_commit, "commit=%u"}, -- cgit v1.2.2 From d6797d14b1640d088652c72508b529a3aea479e3 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 20:52:12 -0500 Subject: ext4: move ext4_forget() to ext4_jbd2.c The ext4_forget() function better belongs in ext4_jbd2.c. This will allow us to do some cleanup of the ext4_journal_revoke() and ext4_journal_forget() functions, as well as giving us better error reporting since we can report the caller of ext4_forget() when things go wrong. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 -- fs/ext4/ext4_jbd2.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4_jbd2.h | 7 +++++++ fs/ext4/inode.c | 53 -------------------------------------------------- 4 files changed, 63 insertions(+), 55 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 05ce38b981cb..57c4e03afa0a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1393,8 +1393,6 @@ extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); extern void ext4_mb_put_buddy_cache_lock(struct super_block *, ext4_group_t, int); /* inode.c */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int, int *); struct buffer_head *ext4_bread(handle_t *, struct inode *, diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 6a9409920dee..913f85715433 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -4,6 +4,8 @@ #include "ext4_jbd2.h" +#include + int __ext4_journal_get_undo_access(const char *where, handle_t *handle, struct buffer_head *bh) { @@ -64,6 +66,60 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, return err; } +/* + * The ext4 forget function must perform a revoke if we are freeing data + * which has been journaled. Metadata (eg. indirect blocks) must be + * revoked in all cases. + * + * "bh" may be NULL: a metadata block may have been freed from memory + * but there may still be a record of it in the journal, and that record + * still needs to be revoked. + * + * If the handle isn't valid we're not journaling, but we still need to + * call into ext4_journal_revoke() to put the buffer head. + */ +int __ext4_forget(const char *where, handle_t *handle, int is_metadata, + struct inode *inode, struct buffer_head *bh, + ext4_fsblk_t blocknr) +{ + int err; + + might_sleep(); + + trace_ext4_forget(inode, is_metadata, blocknr); + BUFFER_TRACE(bh, "enter"); + + jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " + "data mode %x\n", + bh, is_metadata, inode->i_mode, + test_opt(inode->i_sb, DATA_FLAGS)); + + /* Never use the revoke function if we are doing full data + * journaling: there is no need to, and a V1 superblock won't + * support it. Otherwise, only skip the revoke on un-journaled + * data blocks. */ + + if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + (!is_metadata && !ext4_should_journal_data(inode))) { + if (bh) { + BUFFER_TRACE(bh, "call jbd2_journal_forget"); + return __ext4_journal_forget(where, handle, bh); + } + return 0; + } + + /* + * data!=journal && (is_metadata || should_journal_data(inode)) + */ + BUFFER_TRACE(bh, "call ext4_journal_revoke"); + err = __ext4_journal_revoke(where, handle, blocknr, bh); + if (err) + ext4_abort(inode->i_sb, __func__, + "error %d when attempting revoke", err); + BUFFER_TRACE(bh, "exit"); + return err; +} + int __ext4_journal_get_create_access(const char *where, handle_t *handle, struct buffer_head *bh) { diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a2865980342f..dc0b34a903eb 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -139,6 +139,10 @@ int __ext4_journal_forget(const char *where, handle_t *handle, int __ext4_journal_revoke(const char *where, handle_t *handle, ext4_fsblk_t blocknr, struct buffer_head *bh); +int __ext4_forget(const char *where, handle_t *handle, int is_metadata, + struct inode *inode, struct buffer_head *bh, + ext4_fsblk_t blocknr); + int __ext4_journal_get_create_access(const char *where, handle_t *handle, struct buffer_head *bh); @@ -151,6 +155,9 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, __ext4_journal_get_write_access(__func__, (handle), (bh)) #define ext4_journal_revoke(handle, blocknr, bh) \ __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) +#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ + __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ + (block_nr)) #define ext4_journal_get_create_access(handle, bh) \ __ext4_journal_get_create_access(__func__, (handle), (bh)) #define ext4_journal_forget(handle, bh) \ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3673ec7b1c98..fa37f9504ece 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -70,59 +70,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); } -/* - * The ext4 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. - * - * "bh" may be NULL: a metadata block may have been freed from memory - * but there may still be a record of it in the journal, and that record - * still needs to be revoked. - * - * If the handle isn't valid we're not journaling, but we still need to - * call into ext4_journal_revoke() to put the buffer head. - */ -int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext4_fsblk_t blocknr) -{ - int err; - - might_sleep(); - - trace_ext4_forget(inode, is_metadata, blocknr); - BUFFER_TRACE(bh, "enter"); - - jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " - "data mode %x\n", - bh, is_metadata, inode->i_mode, - test_opt(inode->i_sb, DATA_FLAGS)); - - /* Never use the revoke function if we are doing full data - * journaling: there is no need to, and a V1 superblock won't - * support it. Otherwise, only skip the revoke on un-journaled - * data blocks. */ - - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || - (!is_metadata && !ext4_should_journal_data(inode))) { - if (bh) { - BUFFER_TRACE(bh, "call jbd2_journal_forget"); - return ext4_journal_forget(handle, bh); - } - return 0; - } - - /* - * data!=journal && (is_metadata || should_journal_data(inode)) - */ - BUFFER_TRACE(bh, "call ext4_journal_revoke"); - err = ext4_journal_revoke(handle, blocknr, bh); - if (err) - ext4_abort(inode->i_sb, __func__, - "error %d when attempting revoke", err); - BUFFER_TRACE(bh, "exit"); - return err; -} - /* * Work out how many blocks we need to proceed with the next chunk of a * truncate transaction. -- cgit v1.2.2 From e4684b3fbb848446683feecb4aee133344c93933 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 24 Nov 2009 11:05:59 -0500 Subject: ext4: fold ext4_journal_revoke() into ext4_forget() The only caller of ext4_journal_revoke() is ext4_forget(), so we can fold ext4_journal_revoke() into ext4_forget() to simplify the code and shorten the call stack. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_jbd2.c | 30 +++++++++++------------------- fs/ext4/ext4_jbd2.h | 12 +----------- 2 files changed, 12 insertions(+), 30 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 913f85715433..92c88a8f734d 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -50,22 +50,6 @@ int __ext4_journal_forget(const char *where, handle_t *handle, return err; } -int __ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh) -{ - int err = 0; - - if (ext4_handle_valid(handle)) { - err = jbd2_journal_revoke(handle, blocknr, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, - handle, err); - } - else - bforget(bh); - return err; -} - /* * The ext4 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be @@ -94,6 +78,12 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, bh, is_metadata, inode->i_mode, test_opt(inode->i_sb, DATA_FLAGS)); + /* In the no journal case, we can just do a bforget and return */ + if (!ext4_handle_valid(handle)) { + bforget(bh); + return 0; + } + /* Never use the revoke function if we are doing full data * journaling: there is no need to, and a V1 superblock won't * support it. Otherwise, only skip the revoke on un-journaled @@ -111,11 +101,13 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, /* * data!=journal && (is_metadata || should_journal_data(inode)) */ - BUFFER_TRACE(bh, "call ext4_journal_revoke"); - err = __ext4_journal_revoke(where, handle, blocknr, bh); - if (err) + BUFFER_TRACE(bh, "call jbd2_journal_revoke"); + err = jbd2_journal_revoke(handle, blocknr, bh); + if (err) { + ext4_journal_abort_handle(where, __func__, bh, handle, err); ext4_abort(inode->i_sb, __func__, "error %d when attempting revoke", err); + } BUFFER_TRACE(bh, "exit"); return err; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dc0b34a903eb..f9fb4bb69577 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -116,12 +116,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); /* - * Wrapper functions with which ext4 calls into JBD. The intent here is - * to allow these to be turned into appropriate stubs so ext4 can control - * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't - * been done yet. + * Wrapper functions with which ext4 calls into JBD. */ - void ext4_journal_abort_handle(const char *caller, const char *err_fn, struct buffer_head *bh, handle_t *handle, int err); @@ -135,10 +131,6 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, int __ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh); -/* When called with an invalid handle, this will still do a put on the BH */ -int __ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh); - int __ext4_forget(const char *where, handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); @@ -153,8 +145,6 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, __ext4_journal_get_undo_access(__func__, (handle), (bh)) #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, (handle), (bh)) -#define ext4_journal_revoke(handle, blocknr, bh) \ - __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ (block_nr)) -- cgit v1.2.2 From b7e57e7c2a41826e51fe060fae5158bfc7a04e81 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 21:00:13 -0500 Subject: ext4: fold ext4_journal_forget() into ext4_forget() Convert the last two callers of ext4_journal_forget() to use ext4_forget() instead, and then fold ext4_journal_forget() into ext4_forget(). This reduces are code complexity and shortens our call stack. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_jbd2.c | 22 +++++----------------- fs/ext4/ext4_jbd2.h | 6 ------ fs/ext4/inode.c | 16 ++++++++++++++-- 3 files changed, 19 insertions(+), 25 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 92c88a8f734d..b57e5c711b6d 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -34,22 +34,6 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle, return err; } -int __ext4_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = 0; - - if (ext4_handle_valid(handle)) { - err = jbd2_journal_forget(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, - handle, err); - } - else - bforget(bh); - return err; -} - /* * The ext4 forget function must perform a revoke if we are freeing data * which has been journaled. Metadata (eg. indirect blocks) must be @@ -93,7 +77,11 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata, (!is_metadata && !ext4_should_journal_data(inode))) { if (bh) { BUFFER_TRACE(bh, "call jbd2_journal_forget"); - return __ext4_journal_forget(where, handle, bh); + err = jbd2_journal_forget(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + return err; } return 0; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index f9fb4bb69577..84bc98ab9f0d 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -127,10 +127,6 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, int __ext4_journal_get_write_access(const char *where, handle_t *handle, struct buffer_head *bh); -/* When called with an invalid handle, this will still do a put on the BH */ -int __ext4_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh); - int __ext4_forget(const char *where, handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); @@ -150,8 +146,6 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, (block_nr)) #define ext4_journal_get_create_access(handle, bh) \ __ext4_journal_get_create_access(__func__, (handle), (bh)) -#define ext4_journal_forget(handle, bh) \ - __ext4_journal_forget(__func__, (handle), (bh)) #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index fa37f9504ece..72c694323492 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -767,7 +767,13 @@ failed: /* Allocation failed, free what we already allocated */ for (i = 1; i <= n ; i++) { BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); - ext4_journal_forget(handle, branch[i].bh); + /* + * Note: is_metadata is 0 because branch[i].bh is + * newly allocated, so there is no need to revoke the + * block. If we do, it's harmless, but not necessary. + */ + ext4_forget(handle, 0, inode, branch[i].bh, + branch[i].bh->b_blocknr); } for (i = 0; i < indirect_blks; i++) ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); @@ -852,7 +858,13 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, err_out: for (i = 1; i <= num; i++) { BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); - ext4_journal_forget(handle, where[i].bh); + /* + * Note: is_metadata is 0 because branch[i].bh is + * newly allocated, so there is no need to revoke the + * block. If we do, it's harmless, but not necessary. + */ + ext4_forget(handle, 0, inode, where[i].bh, + where[i].bh->b_blocknr); ext4_free_blocks(handle, inode, le32_to_cpu(where[i-1].key), 1, 0); } -- cgit v1.2.2 From 4433871130f36585fde38e7dd817433296648945 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 07:44:56 -0500 Subject: ext4: fold ext4_free_blocks() and ext4_mb_free_blocks() ext4_mb_free_blocks() is only called by ext4_free_blocks(), and the latter function doesn't really do much. So merge the two functions together, such that ext4_free_blocks() is now found in fs/ext4/mballoc.c. This saves about 200 bytes of compiled text space. Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 38 -------------------------------------- fs/ext4/ext4.h | 7 +++---- fs/ext4/mballoc.c | 30 +++++++++++++++++++++++------- 3 files changed, 26 insertions(+), 49 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index f3032c919a22..22bc7435d913 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -498,44 +498,6 @@ error_return: return; } -/** - * ext4_free_blocks() -- Free given blocks and update quota - * @handle: handle for this transaction - * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count - * @metadata: Are these metadata blocks - */ -void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata) -{ - struct super_block *sb; - unsigned long dquot_freed_blocks; - - /* this isn't the right place to decide whether block is metadata - * inode.c/extents.c knows better, but for safety ... */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; - - /* We need to make sure we don't reuse - * block released untill the transaction commit. - * writeback mode have weak data consistency so - * don't force data as metadata when freeing block - * for writeback mode. - */ - if (metadata == 0 && !ext4_should_writeback_data(inode)) - metadata = 1; - - sb = inode->i_sb; - - ext4_mb_free_blocks(handle, inode, block, count, - metadata, &dquot_freed_blocks); - if (dquot_freed_blocks) - vfs_dq_free_block(inode, dquot_freed_blocks); - return; -} - /** * ext4_has_free_blocks() * @sbi: in-core super block structure. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 57c4e03afa0a..210e1b53e91f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1325,8 +1325,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t goal, unsigned long *count, int *errp); extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); -extern void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, int metadata); extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count); extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); @@ -1385,8 +1383,9 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int); extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); -extern void ext4_mb_free_blocks(handle_t *, struct inode *, - ext4_fsblk_t, unsigned long, int, unsigned long *); +extern void ext4_free_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, + int metadata); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6e5a23a2cc25..0dca90be1afb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4427,18 +4427,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, return 0; } -/* - * Main entry point into mballoc to free blocks +/** + * ext4_free_blocks() -- Free given blocks and update quota + * @handle: handle for this transaction + * @inode: inode + * @block: start physical block to free + * @count: number of blocks to count + * @metadata: Are these metadata blocks */ -void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata, unsigned long *freed) +void ext4_free_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t block, unsigned long count, + int metadata) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; struct ext4_allocation_context *ac = NULL; struct ext4_group_desc *gdp; struct ext4_super_block *es; + unsigned long freed = 0; unsigned int overflow; ext4_grpblk_t bit; struct buffer_head *gd_bh; @@ -4448,7 +4454,15 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, int err = 0; int ret; - *freed = 0; + /* + * We need to make sure we don't reuse the freed block until + * after the transaction is committed, which we can do by + * treating the block as metadata, below. We make an + * exception if the inode is to be written in writeback mode + * since writeback mode has weak data consistency guarantees. + */ + if (!ext4_should_writeback_data(inode)) + metadata = 1; sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; @@ -4577,7 +4591,7 @@ do_more: ext4_mb_release_desc(&e4b); - *freed += count; + freed += count; /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); @@ -4597,6 +4611,8 @@ do_more: } sb->s_dirt = 1; error_return: + if (freed) + vfs_dq_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); if (ac) -- cgit v1.2.2 From e6362609b6c71c5b802026be9cf263bbdd67a50e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 23 Nov 2009 07:17:05 -0500 Subject: ext4: call ext4_forget() from ext4_free_blocks() Add the facility for ext4_forget() to be called from ext4_free_blocks(). This simplifies the code in a large number of places, and centralizes most of the work of calling ext4_forget() into a single place. Also fix a bug in the extents migration code; it wasn't calling ext4_forget() when releasing the indirect blocks during the conversion. As a result, if the system cashed during or shortly after the extents migration, and the released indirect blocks get reused as data blocks, the journal replay would corrupt the data blocks. With this new patch, fixing this bug was as simple as adding the EXT4_FREE_BLOCKS_FORGET flags to the call to ext4_free_blocks(). Signed-off-by: "Theodore Ts'o" Cc: "Aneesh Kumar K.V" --- fs/ext4/ext4.h | 10 +++++++-- fs/ext4/extents.c | 24 ++++++++------------ fs/ext4/inode.c | 67 +++++++++++++++++++++---------------------------------- fs/ext4/mballoc.c | 49 +++++++++++++++++++++++++++++----------- fs/ext4/migrate.c | 23 +++++++++++++------ fs/ext4/xattr.c | 8 ++++--- 6 files changed, 100 insertions(+), 81 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 210e1b53e91f..4cfc2f0edb3f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -375,6 +375,12 @@ struct ext4_new_group_data { #define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ EXT4_GET_BLOCKS_DIO_CREATE_EXT) +/* + * Flags used by ext4_free_blocks + */ +#define EXT4_FREE_BLOCKS_METADATA 0x0001 +#define EXT4_FREE_BLOCKS_FORGET 0x0002 + /* * ioctl commands */ @@ -1384,8 +1390,8 @@ extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata); + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74dcff84c3a8..2c4a9321fb14 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1007,7 +1007,8 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext4_free_blocks(handle, inode, ablocks[i], 1, 1); + ext4_free_blocks(handle, inode, 0, ablocks[i], 1, + EXT4_FREE_BLOCKS_METADATA); } } kfree(ablocks); @@ -1957,7 +1958,6 @@ errout: static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { - struct buffer_head *bh; int err; ext4_fsblk_t leaf; @@ -1973,9 +1973,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, if (err) return err; ext_debug("index is empty, remove it, free block %llu\n", leaf); - bh = sb_find_get_block(inode->i_sb, leaf); - ext4_forget(handle, 1, inode, bh, leaf); - ext4_free_blocks(handle, inode, leaf, 1, 1); + ext4_free_blocks(handle, inode, 0, leaf, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return err; } @@ -2042,12 +2041,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, struct ext4_extent *ex, ext4_lblk_t from, ext4_lblk_t to) { - struct buffer_head *bh; unsigned short ee_len = ext4_ext_get_actual_len(ex); - int i, metadata = 0; + int flags = EXT4_FREE_BLOCKS_FORGET; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - metadata = 1; + flags |= EXT4_FREE_BLOCKS_METADATA; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -2072,11 +2070,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, num = le32_to_cpu(ex->ee_block) + ee_len - from; start = ext_pblock(ex) + ee_len - num; ext_debug("free last %u blocks starting %llu\n", num, start); - for (i = 0; i < num; i++) { - bh = sb_find_get_block(inode->i_sb, start + i); - ext4_forget(handle, metadata, inode, bh, start + i); - } - ext4_free_blocks(handle, inode, start, num, metadata); + ext4_free_blocks(handle, inode, 0, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", @@ -3319,8 +3313,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), 0); + ext4_free_blocks(handle, inode, 0, ext_pblock(&newex), + ext4_ext_get_actual_len(&newex), 0); goto out2; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 72c694323492..3b28e1fbfc90 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -669,7 +669,7 @@ allocated: return ret; failed_out: for (i = 0; i < index; i++) - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); return ret; } @@ -765,20 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, return err; failed: /* Allocation failed, free what we already allocated */ + ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); for (i = 1; i <= n ; i++) { - BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); /* - * Note: is_metadata is 0 because branch[i].bh is - * newly allocated, so there is no need to revoke the - * block. If we do, it's harmless, but not necessary. + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. */ - ext4_forget(handle, 0, inode, branch[i].bh, - branch[i].bh->b_blocknr); + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, + EXT4_FREE_BLOCKS_FORGET); } - for (i = 0; i < indirect_blks; i++) - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); + for (i = n+1; i < indirect_blks; i++) + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); - ext4_free_blocks(handle, inode, new_blocks[i], num, 0); + ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); return err; } @@ -857,18 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, err_out: for (i = 1; i <= num; i++) { - BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); /* - * Note: is_metadata is 0 because branch[i].bh is - * newly allocated, so there is no need to revoke the - * block. If we do, it's harmless, but not necessary. + * branch[i].bh is newly allocated, so there is no + * need to revoke the block, which is why we don't + * need to set EXT4_FREE_BLOCKS_METADATA. */ - ext4_forget(handle, 0, inode, where[i].bh, - where[i].bh->b_blocknr); - ext4_free_blocks(handle, inode, - le32_to_cpu(where[i-1].key), 1, 0); + ext4_free_blocks(handle, inode, where[i].bh, 0, 1, + EXT4_FREE_BLOCKS_FORGET); } - ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), + blks, 0); return err; } @@ -4080,7 +4078,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *last) { __le32 *p; - int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode); + int flags = EXT4_FREE_BLOCKS_FORGET; + + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + flags |= EXT4_FREE_BLOCKS_METADATA; if (try_to_extend_transaction(handle, inode)) { if (bh) { @@ -4096,27 +4097,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, } } - /* - * Any buffers which are on the journal will be in memory. We - * find them on the hash table so jbd2_journal_revoke() will - * run jbd2_journal_forget() on them. We've already detached - * each block from the file, so bforget() in - * jbd2_journal_forget() should be safe. - * - * AKPM: turn on bforget in jbd2_journal_forget()!!! - */ - for (p = first; p < last; p++) { - u32 nr = le32_to_cpu(*p); - if (nr) { - struct buffer_head *tbh; - - *p = 0; - tbh = sb_find_get_block(inode->i_sb, nr); - ext4_forget(handle, is_metadata, inode, tbh, nr); - } - } + for (p = first; p < last; p++) + *p = 0; - ext4_free_blocks(handle, inode, block_to_free, count, is_metadata); + ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); } /** @@ -4304,7 +4288,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, blocks_for_truncate(inode)); } - ext4_free_blocks(handle, inode, nr, 1, 1); + ext4_free_blocks(handle, inode, 0, nr, 1, + EXT4_FREE_BLOCKS_METADATA); if (parent_bh) { /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0dca90be1afb..78de5d3c5dce 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4436,8 +4436,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * @metadata: Are these metadata blocks */ void ext4_free_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t block, unsigned long count, - int metadata) + struct buffer_head *bh, ext4_fsblk_t block, + unsigned long count, int flags) { struct buffer_head *bitmap_bh = NULL; struct super_block *sb = inode->i_sb; @@ -4454,15 +4454,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, int err = 0; int ret; - /* - * We need to make sure we don't reuse the freed block until - * after the transaction is committed, which we can do by - * treating the block as metadata, below. We make an - * exception if the inode is to be written in writeback mode - * since writeback mode has weak data consistency guarantees. - */ - if (!ext4_should_writeback_data(inode)) - metadata = 1; + if (bh) { + if (block) + BUG_ON(block != bh->b_blocknr); + else + block = bh->b_blocknr; + } sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; @@ -4476,7 +4473,32 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } ext4_debug("freeing block %llu\n", block); - trace_ext4_free_blocks(inode, block, count, metadata); + trace_ext4_free_blocks(inode, block, count, flags); + + if (flags & EXT4_FREE_BLOCKS_FORGET) { + struct buffer_head *tbh = bh; + int i; + + BUG_ON(bh && (count > 1)); + + for (i = 0; i < count; i++) { + if (!bh) + tbh = sb_find_get_block(inode->i_sb, + block + i); + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, + inode, tbh, block + i); + } + } + + /* + * We need to make sure we don't reuse the freed block until + * after the transaction is committed, which we can do by + * treating the block as metadata, below. We make an + * exception if the inode is to be written in writeback mode + * since writeback mode has weak data consistency guarantees. + */ + if (!ext4_should_writeback_data(inode)) + flags |= EXT4_FREE_BLOCKS_METADATA; ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (ac) { @@ -4552,7 +4574,8 @@ do_more: err = ext4_mb_load_buddy(sb, block_group, &e4b); if (err) goto error_return; - if (metadata && ext4_handle_valid(handle)) { + + if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) { struct ext4_free_data *new_entry; /* * blocks being freed are metadata. these blocks shouldn't diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index a93d5b80f3e2..d641e13e740e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle, for (i = 0; i < max_entries; i++) { if (tmp_idata[i]) { extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, - le32_to_cpu(tmp_idata[i]), 1, 1); + ext4_free_blocks(handle, inode, 0, + le32_to_cpu(tmp_idata[i]), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); } } put_bh(bh); extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); return 0; } @@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle, } put_bh(bh); extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); + ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); return 0; } @@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) /* ei->i_data[EXT4_IND_BLOCK] */ if (i_data[0]) { extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, - le32_to_cpu(i_data[0]), 1, 1); + ext4_free_blocks(handle, inode, 0, + le32_to_cpu(i_data[0]), 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); } /* ei->i_data[EXT4_DIND_BLOCK] */ @@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, } put_bh(bh); extend_credit_for_blkdel(handle, inode); - ext4_free_blocks(handle, inode, block, 1, 1); + ext4_free_blocks(handle, inode, 0, block, 1, + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return retval; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 025701926f9a..910bf9a59cb3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ea_bdebug(bh, "refcount now=0; freeing"); if (ce) mb_cache_entry_free(ce); - ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1); get_bh(bh); - ext4_forget(handle, 1, inode, bh, bh->b_blocknr); + ext4_free_blocks(handle, inode, bh, 0, 1, + EXT4_FREE_BLOCKS_METADATA | + EXT4_FREE_BLOCKS_FORGET); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); error = ext4_handle_dirty_metadata(handle, inode, bh); @@ -832,7 +833,8 @@ inserted: new_bh = sb_getblk(sb, block); if (!new_bh) { getblk_failed: - ext4_free_blocks(handle, inode, block, 1, 1); + ext4_free_blocks(handle, inode, 0, block, 1, + EXT4_FREE_BLOCKS_METADATA); error = -EIO; goto cleanup; } -- cgit v1.2.2 From 1585d8d89ae1791d8f957731f5655700fbcc5664 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 20:48:34 -0500 Subject: ext4: add check for wraparound in ext4_data_block_valid() Signed-off-by: "Theodore Ts'o" --- fs/ext4/block_validity.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index dc79b75d8f70..4df8621ec31c 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, struct rb_node *n = sbi->system_blks.rb_node; if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count < start_blk) || (start_blk + count > ext4_blocks_count(sbi->s_es))) return 0; while (n) { -- cgit v1.2.2 From 9084d4719784b00ff0bf9c9580007fac8277dbcb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 22 Nov 2009 20:48:42 -0500 Subject: ext4: use ext4_data_block_valid() in ext4_free_blocks() The block validity framework does a more comprehensive set of checks, and it saves object code space to use the ext4_data_block_valid() than the limited open-coded version that had been in ext4_free_blocks(). Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 78de5d3c5dce..ab2dad1dfb7e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4463,9 +4463,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - block + count > ext4_blocks_count(es)) { + if (!ext4_data_block_valid(sbi, block, count)) { ext4_error(sb, __func__, "Freeing blocks not in datazone - " "block = %llu, count = %lu", block, count); -- cgit v1.2.2 From 94d7c16cbbbd0e03841fcf272bcaf0620ad39618 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Tue, 24 Nov 2009 10:19:57 -0500 Subject: ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT At the beginning of ext4_move_extent(), we call ext4_discard_preallocations() to discard inode PAs of orig and donor inodes. But in the following case, blocks can be double freed, so move ext4_discard_preallocations() to the end of ext4_move_extents(). 1. Discard inode PAs of orig and donor inodes with ext4_discard_preallocations() in ext4_move_extents(). orig : [ DATA1 ] donor: [ DATA2 ] 2. While data blocks are exchanging between orig and donor inodes, new inode PAs is created to orig by other process's block allocation. (Since there are semaphore gaps in ext4_move_extents().) And new inode PAs is used partially (2-1). 2-1 Create new inode PAs to orig inode orig : [ DATA1 | used PA1 | free PA1 ] donor: [ DATA2 ] 3. Donor inode which has old orig inode's blocks is deleted after EXT4_IOC_MOVE_EXT finished (3-1, 3-2). So the block bitmap corresponds to old orig inode's blocks are freed. 3-1 After EXT4_IOC_MOVE_EXT finished orig : [ DATA2 | free PA1 ] donor: [ DATA1 | used PA1 ] 3-2 Delete donor inode orig : [ DATA2 | free PA1 ] donor: [ FREE SPACE(DATA1) | FREE SPACE(used PA1) ] 4. The double-free of blocks is occurred, when close() is called to orig inode. Because ext4_discard_preallocations() for orig inode frees used PA1 and free PA1, though used PA1 is already freed in 3. 4-1 Double-free of blocks is occurred orig : [ DATA2 | FREE SPACE(free PA1) ] donor: [ FREE SPACE(DATA1) | DOUBLE FREE(used PA1) ] Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 5a106e02fd9c..3478889e00b3 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1289,10 +1289,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ext4_ext_get_actual_len(ext_cur), block_end + 1) - max(le32_to_cpu(ext_cur->ee_block), block_start); - /* Discard preallocations of two inodes */ - ext4_discard_preallocations(orig_inode); - ext4_discard_preallocations(donor_inode); - while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) { seq_blocks += add_blocks; @@ -1410,6 +1406,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, } out: + if (*moved_len) { + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); + } + if (orig_path) { ext4_ext_drop_refs(orig_path); kfree(orig_path); -- cgit v1.2.2 From 446aaa6e7e993b38a6f21c6acfa68f3f1af3dbe3 Mon Sep 17 00:00:00 2001 From: Kazuya Mio Date: Tue, 24 Nov 2009 10:28:48 -0500 Subject: ext4: initialize moved_len before calling ext4_move_extents() The move_extent.moved_len is used to pass back the number of exchanged blocks count to user space. Currently the caller must clear this field; but we spend more code space checking for this requirement than simply zeroing the field ourselves, so let's just make life easier for everyone all around. Signed-off-by: Kazuya Mio Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 1 + fs/ext4/move_extent.c | 14 +++----------- 2 files changed, 4 insertions(+), 11 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c1cdf613e725..31e5ee0c858f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -239,6 +239,7 @@ setversion_out: } } + me.moved_len = 0; err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); fput(donor_filp); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3478889e00b3..445ecd7616a6 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -947,7 +947,6 @@ out2: * @orig_start: logical start offset in block for orig * @donor_start: logical start offset in block for donor * @len: the number of blocks to be moved - * @moved_len: moved block length * * Check the arguments of ext4_move_extents() whether the files can be * exchanged with each other. @@ -955,8 +954,8 @@ out2: */ static int mext_check_arguments(struct inode *orig_inode, - struct inode *donor_inode, __u64 orig_start, - __u64 donor_start, __u64 *len, __u64 moved_len) + struct inode *donor_inode, __u64 orig_start, + __u64 donor_start, __u64 *len) { ext4_lblk_t orig_blocks, donor_blocks; unsigned int blkbits = orig_inode->i_blkbits; @@ -1010,13 +1009,6 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - if (moved_len) { - ext4_debug("ext4 move extent: moved_len should be 0 " - "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, - donor_inode->i_ino); - return -EINVAL; - } - if ((orig_start > EXT_MAX_BLOCK) || (donor_start > EXT_MAX_BLOCK) || (*len > EXT_MAX_BLOCK) || @@ -1226,7 +1218,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, - donor_start, &len, *moved_len); + donor_start, &len); if (ret1) goto out; -- cgit v1.2.2 From ac48b0a1d068887141581bea8285de5fcab182b0 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Tue, 24 Nov 2009 10:31:56 -0500 Subject: ext4: move_extent_per_page() cleanup Integrate duplicate lines (acquire/release semaphore and invalidate extent cache in move_extent_per_page()) into mext_replace_branches(), to reduce source and object code size. Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 445ecd7616a6..cad1e2edda7e 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -660,6 +660,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, int replaced_count = 0; int dext_alen; + /* Protect extent trees against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); + /* Get the original extent for the block "orig_off" */ *err = get_ext_path(orig_inode, orig_off, &orig_path); if (*err) @@ -755,6 +758,11 @@ out: kfree(donor_path); } + ext4_ext_invalidate_cache(orig_inode); + ext4_ext_invalidate_cache(donor_inode); + + double_up_write_data_sem(orig_inode, donor_inode); + return replaced_count; } @@ -820,19 +828,9 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, * Just swap data blocks between orig and donor. */ if (uninit) { - /* - * Protect extent trees against block allocations - * via delalloc - */ - double_down_write_data_sem(orig_inode, donor_inode); replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, orig_blk_offset, block_len_in_page, err); - - /* Clear the inode cache not to refer to the old data */ - ext4_ext_invalidate_cache(orig_inode); - ext4_ext_invalidate_cache(donor_inode); - double_up_write_data_sem(orig_inode, donor_inode); goto out2; } @@ -880,8 +878,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, /* Release old bh and drop refs */ try_to_release_page(page, 0); - /* Protect extent trees against block allocations via delalloc */ - double_down_write_data_sem(orig_inode, donor_inode); replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, orig_blk_offset, block_len_in_page, &err2); @@ -890,18 +886,10 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, block_len_in_page = replaced_count; replaced_size = block_len_in_page << orig_inode->i_blkbits; - } else { - double_up_write_data_sem(orig_inode, donor_inode); + } else goto out; - } } - /* Clear the inode cache not to refer to the old data */ - ext4_ext_invalidate_cache(orig_inode); - ext4_ext_invalidate_cache(donor_inode); - - double_up_write_data_sem(orig_inode, donor_inode); - if (!page_has_buffers(page)) create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); -- cgit v1.2.2 From 3f0ca309858ee186435c608ee9eaafd1c8dcb53a Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 24 Nov 2009 11:15:44 -0500 Subject: ext4: remove unused parameter wbc from __ext4_journalled_writepage() CC: Jan Kara Signed-off-by: Wu Fengguang Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3b28e1fbfc90..d3f99e9e8a31 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2558,7 +2558,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh) } static int __ext4_journalled_writepage(struct page *page, - struct writeback_control *wbc, unsigned int len) { struct address_space *mapping = page->mapping; @@ -2716,7 +2715,7 @@ static int ext4_writepage(struct page *page, * doesn't seem much point in redirtying the page here. */ ClearPageChecked(page); - return __ext4_journalled_writepage(page, wbc, len); + return __ext4_journalled_writepage(page, len); } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) -- cgit v1.2.2 From bf48aabb894fd0639ab72a26e8abbd7683ef23c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 28 Oct 2009 20:11:03 +0100 Subject: tree-wide: fix typos "offest" -> "offset" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch was generated by git grep -E -i -l 'offest' | xargs -r perl -p -i -e 's/offest/offset/' Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5c5bc5dafff8..618ca95cbb59 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4058,7 +4058,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, int k, err; *top = 0; - /* Make k index the deepest non-null offest + 1 */ + /* Make k index the deepest non-null offset + 1 */ for (k = depth; k > 1 && !offsets[k-1]; k--) ; partial = ext4_get_branch(inode, k, offsets, chain, &err); -- cgit v1.2.2 From af901ca181d92aac3a7dc265144a9081a86d8f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Sat, 14 Nov 2009 13:09:05 -0200 Subject: tree-wide: fix assorted typos all over the place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is "success", "unknown", "through", "performance", "[re|un]mapping" , "access", "default", "reasonable", "[con]currently", "temperature" , "channel", "[un]used", "application", "example","hierarchy", "therefore" , "[over|under]flow", "contiguous", "threshold", "enough" and others. Signed-off-by: André Goddard Rosa Signed-off-by: Jiri Kosina --- fs/ext4/inode.c | 6 +++--- fs/ext4/mballoc.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 618ca95cbb59..0282ec78cf8f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2932,7 +2932,7 @@ retry: ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); /* - * If we have a contigous extent of pages and we + * If we have a contiguous extent of pages and we * haven't done the I/O yet, map the blocks and submit * them for I/O. */ @@ -5370,7 +5370,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) * worse case, the indexs blocks spread over different block groups * * If datablocks are discontiguous, they are possible to spread over - * different block groups too. If they are contiugous, with flexbg, + * different block groups too. If they are contiuguous, with flexbg, * they could still across block group boundary. * * Also account for superblock, inode, quota and xattr blocks @@ -5446,7 +5446,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) * Calculate the journal credits for a chunk of data modification. * * This is called from DIO, fallocate or whoever calling - * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. + * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. * * journal buffers for data blocks are not included here, as DIO * and fallocate do no need to journal data buffers. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..74e495dabe09 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -142,7 +142,7 @@ * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The * value of s_mb_order2_reqs can be tuned via * /sys/fs/ext4//mb_order2_req. If the request len is equal to - * stripe size (sbi->s_stripe), we try to search for contigous block in + * stripe size (sbi->s_stripe), we try to search for contiguous block in * stripe size. This should result in better allocation on RAID setups. If * not, we search in the specific group using bitmap for best extents. The * tunable min_to_scan and max_to_scan control the behaviour here. -- cgit v1.2.2 From c09eef305dd43846360944ad072f051f964fa383 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 7 Dec 2009 10:38:16 -0500 Subject: ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks() Signed-off-by: Roel Kluin Signed-off-by: "Theodore Ts'o" --- fs/ext4/resize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3cfc343c41b5..3b2c5541d8a6 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb, goto exit_bh; if (IS_ERR(gdb = bclean(handle, sb, block))) { - err = PTR_ERR(bh); + err = PTR_ERR(gdb); goto exit_bh; } ext4_handle_dirty_metadata(handle, NULL, gdb); -- cgit v1.2.2 From 24b584240a0006ea7436cd35f5e8983eb76f1e6f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 7 Dec 2009 14:08:51 -0500 Subject: ext4: Use ext4 file system driver for ext2/ext3 file system mounts Add a new config option, CONFIG_EXT4_USE_FOR_EXT23 which if enabled, will cause ext4 to be used for either ext2 or ext3 file system mounts when ext2 or ext3 is not enabled in the configuration. This allows minimalist kernel fanatics to drop to file system drivers from their compiled kernel with out losing functionality. Signed-off-by: "Theodore Ts'o" --- fs/ext4/Kconfig | 10 ++++++++++ fs/ext4/super.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9f2d45d75b1a..a6b4e93833d6 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -26,6 +26,16 @@ config EXT4_FS If unsure, say N. +config EXT4_USE_FOR_EXT23 + bool "Use ext4 for ext2/ext3 file systems" + depends on !EXT3_FS || !EXT2_FS + default y + help + Allow the ext4 file system driver code to be used for ext2 or + ext3 file system mounts. This allows users to reduce their + compiled kernel size by using one file system driver for + ext2, ext3, and ext4 file systems. + config EXT4_FS_XATTR bool "Ext4 extended attributes" depends on EXT4_FS diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5a2db612950b..30476daf966e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3960,6 +3960,58 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } +#if !defined(CONTIG_EXT2_FS) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext2_fs_type = { + .owner = THIS_MODULE, + .name = "ext2", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static inline void register_as_ext2(void) +{ + int err = register_filesystem(&ext2_fs_type); + if (err) + printk(KERN_WARNING + "EXT4-fs: Unable to register as ext2 (%d)\n", err); +} + +static inline void unregister_as_ext2(void) +{ + unregister_filesystem(&ext2_fs_type); +} +#else +static inline void register_as_ext2(void) { } +static inline void unregister_as_ext2(void) { } +#endif + +#if !defined(CONTIG_EXT3_FS) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext3_fs_type = { + .owner = THIS_MODULE, + .name = "ext3", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static inline void register_as_ext3(void) +{ + int err = register_filesystem(&ext3_fs_type); + if (err) + printk(KERN_WARNING + "EXT4-fs: Unable to register as ext3 (%d)\n", err); +} + +static inline void unregister_as_ext3(void) +{ + unregister_filesystem(&ext3_fs_type); +} +#else +static inline void register_as_ext3(void) { } +static inline void unregister_as_ext3(void) { } +#endif + static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, .name = "ext4", @@ -3989,11 +4041,15 @@ static int __init init_ext4_fs(void) err = init_inodecache(); if (err) goto out1; + register_as_ext2(); + register_as_ext3(); err = register_filesystem(&ext4_fs_type); if (err) goto out; return 0; out: + unregister_as_ext2(); + unregister_as_ext3(); destroy_inodecache(); out1: exit_ext4_xattr(); @@ -4009,6 +4065,8 @@ out4: static void __exit exit_ext4_fs(void) { + unregister_as_ext2(); + unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); destroy_inodecache(); exit_ext4_xattr(); -- cgit v1.2.2 From b9a4207d5e911b938f73079a83cc2ae10524ec7f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 8 Dec 2009 21:24:33 -0500 Subject: ext4: Avoid data / filesystem corruption when write fails to copy data When ext4_write_begin fails after allocating some blocks or generic_perform_write fails to copy data to write, we truncate blocks already instantiated beyond i_size. Although these blocks were never inside i_size, we have to truncate the pagecache of these blocks so that corresponding buffers get unmapped. Otherwise subsequent __block_prepare_write (called because we are retrying the write) will find the buffers mapped, not call ->get_block, and thus the page will be backed by already freed blocks leading to filesystem and data corruption. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d3f99e9e8a31..0e2ea572856c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1492,6 +1492,16 @@ static int do_journal_get_write_access(handle_t *handle, return ext4_journal_get_write_access(handle, bh); } +/* + * Truncate blocks that were not used by write. We have to truncate the + * pagecache as well so that corresponding buffers get properly unmapped. + */ +static void ext4_truncate_failed_write(struct inode *inode) +{ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); +} + static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1557,7 +1567,7 @@ retry: ext4_journal_stop(handle); if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might * still be on the orphan list; we need to @@ -1667,7 +1677,7 @@ static int ext4_ordered_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1709,7 +1719,7 @@ static int ext4_writeback_write_end(struct file *file, ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -1772,7 +1782,7 @@ static int ext4_journalled_write_end(struct file *file, if (!ret) ret = ret2; if (pos + len > inode->i_size) { - ext4_truncate(inode); + ext4_truncate_failed_write(inode); /* * If truncate failed early the inode might still be * on the orphan list; we need to make sure the inode @@ -3048,7 +3058,7 @@ retry: * i_size_read because we hold i_mutex. */ if (pos + len > inode->i_size) - ext4_truncate(inode); + ext4_truncate_failed_write(inode); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) -- cgit v1.2.2 From d4edac314e9ad0b21ba20ba8bc61b61f186f79e1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 8 Dec 2009 21:48:58 -0500 Subject: ext4: wait for log to commit when umounting There is a potential race when a transaction is committing right when the file system is being umounting. This could reduce in a race because EXT4_SB(sb)->s_group_info could be freed in ext4_put_super before the commit code calls a callback so the mballoc code can release freed blocks in the transaction, resulting in a panic trying to access the freed s_group_info. The fix is to wait for the transaction to finish committing before we shutdown the multiblock allocator. Signed-off-by: Josef Bacik Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 30476daf966e..8ab0c9518473 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb) if (sb->s_dirt) ext4_commit_super(sb, 1); - ext4_release_system_zone(sb); - ext4_mb_release(sb); - ext4_ext_release(sb); - ext4_xattr_put_super(sb); if (sbi->s_journal) { err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; @@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb) ext4_abort(sb, __func__, "Couldn't clean up the journal"); } + + ext4_release_system_zone(sb); + ext4_mb_release(sb); + ext4_ext_release(sb); + ext4_xattr_put_super(sb); + if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); -- cgit v1.2.2 From b844167edc7fcafda9623955c05e4c1b3c32ebc7 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Tue, 8 Dec 2009 22:18:25 -0500 Subject: ext4: remove blocks from inode prealloc list on failure This fixes a leak of blocks in an inode prealloc list if device failures cause ext4_mb_mark_diskspace_used() to fail. Signed-off-by: Curt Wohlgemuth Acked-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ab2dad1dfb7e..19635c341994 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3010,6 +3010,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) trace_ext4_mballoc_prealloc(ac); } +/* + * Called on failure; free up any blocks from the inode PA for this + * context. We don't need this for MB_GROUP_PA because we only change + * pa_free in ext4_mb_release_context(), but on failure, we've already + * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed. + */ +static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) +{ + struct ext4_prealloc_space *pa = ac->ac_pa; + int len; + + if (pa && pa->pa_type == MB_INODE_PA) { + len = ac->ac_b_ex.fe_len; + pa->pa_free += len; + } + +} + /* * use blocks preallocated to inode */ @@ -4295,6 +4313,7 @@ repeat: ac->ac_status = AC_STATUS_CONTINUE; goto repeat; } else if (*errp) { + ext4_discard_allocated_blocks(ac); ac->ac_b_ex.fe_len = 0; ar->len = 0; ext4_mb_show_ac(ac); -- cgit v1.2.2 From 8aa6790f876e81f5a2211fe1711a5fe3fe2d7b20 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 8 Dec 2009 22:41:52 -0500 Subject: ext4: ext4_get_reserved_space() must return bytes instead of blocks Signed-off-by: Dmitry Monakhov Reviewed-by: Eric Sandeen Acked-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0e2ea572856c..2da74f57a10b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1010,7 +1010,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode) EXT4_I(inode)->i_reserved_meta_blocks; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - return total; + return (total << inode->i_blkbits); } /* * Calculate the number of metadata blocks need to reserve -- cgit v1.2.2 From 5aca07eb7d8f14d90c740834d15ca15277f4820c Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 8 Dec 2009 22:42:15 -0500 Subject: ext4: quota macros cleanup Currently all quota block reservation macros contains hard-coded "2" aka MAXQUOTAS value. This is no good because in some places it is not obvious to understand what does this digit represent. Let's introduce new macro with self descriptive name. Signed-off-by: Dmitry Monakhov Acked-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_jbd2.h | 8 ++++++-- fs/ext4/extents.c | 2 +- fs/ext4/inode.c | 2 +- fs/ext4/migrate.c | 4 ++-- fs/ext4/namei.c | 8 ++++---- 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 84bc98ab9f0d..2c2b262bd31b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -49,7 +49,7 @@ #define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ EXT4_XATTR_TRANS_BLOCKS - 2 + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* * Define the number of metadata blocks we need to account to modify data. @@ -57,7 +57,7 @@ * This include super block, inode block, quota blocks and xattr blocks */ #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ - 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + EXT4_MAXQUOTAS_TRANS_BLOCKS(sb)) /* Delete operations potentially hit one directory's namespace plus an * entire inode, plus arbitrary amounts of bitmap/indirection data. Be @@ -92,6 +92,7 @@ * but inode, sb and group updates are done only once */ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) + #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) #else @@ -99,6 +100,9 @@ #define EXT4_QUOTA_INIT_BLOCKS(sb) 0 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0 #endif +#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) +#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) int ext4_mark_iloc_dirty(handle_t *handle, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2c4a9321fb14..5967f18fd7e7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2161,7 +2161,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, correct_index = 1; credits += (ext_depth(inode)) + 1; } - credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); err = ext4_ext_truncate_extend_restart(handle, inode, credits); if (err) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2da74f57a10b..1b1b7d918114 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5175,7 +5175,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) /* (user+group)*(old+new) structure, inode write (sb, * inode block, ? - but truncate inode update has it) */ - handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ + handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); if (IS_ERR(handle)) { error = PTR_ERR(handle); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index d641e13e740e..81415814b00b 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) * So allocate a credit of 3. We may update * quota (user and group). */ - needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); + needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); if (ext4_journal_extend(handle, needed) != 0) retval = ext4_journal_restart(handle, needed); @@ -486,7 +486,7 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) + EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + 1); if (IS_ERR(handle)) { retval = PTR_ERR(handle); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index fde08c919d12..17a17e10dd60 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1769,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1803,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1840,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2253,7 +2253,7 @@ static int ext4_symlink(struct inode *dir, retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + - 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); -- cgit v1.2.2 From 194074acacebc169ded90a4657193f5180015051 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 8 Dec 2009 22:42:28 -0500 Subject: ext4: fix incorrect block reservation on quota transfer. Inside ->setattr() call both ATTR_UID and ATTR_GID may be valid This means that we may end-up with transferring all quotas. Add we have to reserve QUOTA_DEL_BLOCKS for all quotas, as we do in case of QUOTA_INIT_BLOCKS. Signed-off-by: Dmitry Monakhov Reviewed-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1b1b7d918114..958c3ff800e9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5176,7 +5176,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) /* (user+group)*(old+new) structure, inode write (sb, * inode block, ? - but truncate inode update has it) */ handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ - EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); + EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3); if (IS_ERR(handle)) { error = PTR_ERR(handle); goto err_out; -- cgit v1.2.2 From b436b9bef84de6893e86346d8fbf7104bc520645 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 8 Dec 2009 23:51:10 -0500 Subject: ext4: Wait for proper transaction commit on fsync We cannot rely on buffer dirty bits during fsync because pdflush can come before fsync is called and clear dirty bits without forcing a transaction commit. What we do is that we track which transaction has last changed the inode and which transaction last changed allocation and force it to disk on fsync. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 7 +++++++ fs/ext4/ext4_jbd2.h | 13 +++++++++++++ fs/ext4/extents.c | 14 ++++++++++++-- fs/ext4/fsync.c | 46 +++++++++++++++++----------------------------- fs/ext4/inode.c | 29 +++++++++++++++++++++++++++++ fs/ext4/super.c | 2 ++ 6 files changed, 80 insertions(+), 31 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4cfc2f0edb3f..ab31e65d46d0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -709,6 +709,13 @@ struct ext4_inode_info { struct list_head i_aio_dio_complete_list; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; + + /* + * Transactions that contain inode's metadata needed to complete + * fsync and fdatasync, respectively. + */ + tid_t i_sync_tid; + tid_t i_datasync_tid; }; /* diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 2c2b262bd31b..05eca817d704 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -249,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) return 0; } +static inline void ext4_update_inode_fsync_trans(handle_t *handle, + struct inode *inode, + int datasync) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + + if (ext4_handle_valid(handle)) { + ei->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + ei->i_datasync_tid = handle->h_transaction->t_tid; + } +} + /* super.c */ int ext4_force_commit(struct super_block *sb); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5967f18fd7e7..700206e525da 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3058,6 +3058,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { ret = ext4_convert_unwritten_extents_dio(handle, inode, path); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); goto out2; } /* buffered IO case */ @@ -3085,6 +3087,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ret = ext4_ext_convert_to_initialized(handle, inode, path, iblock, max_blocks); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out: if (ret <= 0) { err = ret; @@ -3323,10 +3327,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ext4_ext_get_actual_len(&newex); set_buffer_new(bh_result); - /* Cache only when it is _not_ an uninitialized extent */ - if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) + /* + * Cache the extent and update transaction to commit on fdatasync only + * when it is _not_ an uninitialized extent. + */ + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { ext4_ext_put_in_cache(inode, iblock, allocated, newblock, EXT4_EXT_CACHE_EXTENT); + ext4_update_inode_fsync_trans(handle, inode, 1); + } else + ext4_update_inode_fsync_trans(handle, inode, 0); out: if (allocated > max_blocks) allocated = max_blocks; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a3c25076aef1..0b22497d92e1 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -51,25 +51,30 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; + struct ext4_inode_info *ei = EXT4_I(inode); journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - int err, ret = 0; + int ret; + tid_t commit_tid; J_ASSERT(ext4_journal_current_handle() == NULL); trace_ext4_sync_file(file, dentry, datasync); + if (inode->i_sb->s_flags & MS_RDONLY) + return 0; + ret = flush_aio_dio_completed_IO(inode); if (ret < 0) return ret; + + if (!journal) + return simple_fsync(file, dentry, datasync); + /* - * data=writeback: + * data=writeback,ordered: * The caller's filemap_fdatawrite()/wait will sync the data. - * sync_inode() will sync the metadata - * - * data=ordered: - * The caller's filemap_fdatawrite() will write the data and - * sync_inode() will write the inode if it is dirty. Then the caller's - * filemap_fdatawait() will wait on the pages. + * Metadata is in the journal, we wait for proper transaction to + * commit here. * * data=journal: * filemap_fdatawrite won't do anything (the buffers are clean). @@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) if (ext4_should_journal_data(inode)) return ext4_force_commit(inode->i_sb); - if (!journal) - ret = sync_mapping_buffers(inode->i_mapping); - - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; - - /* - * The VFS has written the file data. If the inode is unaltered - * then we need not start a commit. - */ - if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, /* sys_fsync did this */ - }; - err = sync_inode(inode, &wbc); - if (ret == 0) - ret = err; - } -out: - if (journal && (journal->j_flags & JBD2_BARRIER)) + commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; + if (jbd2_log_start_commit(journal, commit_tid)) + jbd2_log_wait_commit(journal, commit_tid); + else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 958c3ff800e9..f1bc1e338828 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -983,6 +983,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, goto cleanup; set_buffer_new(bh_result); + + ext4_update_inode_fsync_trans(handle, inode, 1); got_it: map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); if (count > blocks_to_boundary) @@ -4738,6 +4740,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct ext4_inode *raw_inode; struct ext4_inode_info *ei; struct inode *inode; + journal_t *journal = EXT4_SB(sb)->s_journal; long ret; int block; @@ -4802,6 +4805,31 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); + /* + * Set transaction id's of transactions that have to be committed + * to finish f[data]sync. We set them to currently running transaction + * as we cannot be sure that the inode or some of its metadata isn't + * part of the transaction - the inode could have been reclaimed and + * now it is reread from disk. + */ + if (journal) { + transaction_t *transaction; + tid_t tid; + + spin_lock(&journal->j_state_lock); + if (journal->j_running_transaction) + transaction = journal->j_running_transaction; + else + transaction = journal->j_committing_transaction; + if (transaction) + tid = transaction->t_tid; + else + tid = journal->j_commit_sequence; + spin_unlock(&journal->j_state_lock); + ei->i_sync_tid = tid; + ei->i_datasync_tid = tid; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > @@ -5056,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle, err = rc; ei->i_state &= ~EXT4_STATE_NEW; + ext4_update_inode_fsync_trans(handle, inode, 0); out_brelse: brelse(bh); ext4_std_error(inode->i_sb, err); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8ab0c9518473..2b13dcfcf775 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) spin_lock_init(&(ei->i_block_reservation_lock)); INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); ei->cur_aio_dio = NULL; + ei->i_sync_tid = 0; + ei->i_datasync_tid = 0; return &ei->vfs_inode; } -- cgit v1.2.2 From 4a58579b9e4e2a35d57e6c9c8483e52f6f1b7fd6 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Sun, 6 Dec 2009 23:38:31 -0500 Subject: ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT This patch fixes three problems in the handling of the EXT4_IOC_MOVE_EXT ioctl: 1. In current EXT4_IOC_MOVE_EXT, there are read access mode checks for original and donor files, but they allow the illegal write access to donor file, since donor file is overwritten by original file data. To fix this problem, change access mode checks of original (r->r/w) and donor (r->w) files. 2. Disallow the use of donor files that have a setuid or setgid bits. 3. Call mnt_want_write() and mnt_drop_write() before and after ext4_move_extents() calling to get write access to a mount. Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 30 ++++++++++++++++++------------ fs/ext4/move_extent.c | 7 +++++++ 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 31e5ee0c858f..b63d193126db 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -221,32 +221,38 @@ setversion_out: struct file *donor_filp; int err; + if (!(filp->f_mode & FMODE_READ) || + !(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (copy_from_user(&me, (struct move_extent __user *)arg, sizeof(me))) return -EFAULT; + me.moved_len = 0; donor_filp = fget(me.donor_fd); if (!donor_filp) return -EBADF; - if (!capable(CAP_DAC_OVERRIDE)) { - if ((current->real_cred->fsuid != inode->i_uid) || - !(inode->i_mode & S_IRUSR) || - !(donor_filp->f_dentry->d_inode->i_mode & - S_IRUSR)) { - fput(donor_filp); - return -EACCES; - } + if (!(donor_filp->f_mode & FMODE_WRITE)) { + err = -EBADF; + goto mext_out; } - me.moved_len = 0; + err = mnt_want_write(filp->f_path.mnt); + if (err) + goto mext_out; + err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); - fput(donor_filp); + mnt_drop_write(filp->f_path.mnt); + if (me.moved_len > 0) + file_remove_suid(donor_filp); if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) - return -EFAULT; - + err = -EFAULT; +mext_out: + fput(donor_filp); return err; } diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index cad1e2edda7e..82c415be87a4 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -957,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } + if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { + ext4_debug("ext4 move extent: suid or sgid is set" + " to donor file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + /* Ext4 move extent does not support swapfile */ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { ext4_debug("ext4 move extent: The argument files should " -- cgit v1.2.2 From a1de02dccf906faba2ee2d99cac56799bda3b96a Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 4 Feb 2010 23:58:38 -0500 Subject: ext4: fix async i/o writes beyond 4GB to a sparse file The "offset" member in ext4_io_end holds bytes, not blocks, so ext4_lblk_t is wrong - and too small (u32). This caused the async i/o writes to sparse files beyond 4GB to fail when they wrapped around to 0. Also fix up the type of arguments to ext4_convert_unwritten_extents(), it gets ssize_t from ext4_end_aio_dio_nolock() and ext4_ext_direct_IO(). Reported-by: Giel de Nijs Signed-off-by: Eric Sandeen --- fs/ext4/ext4.h | 6 +++--- fs/ext4/extents.c | 2 +- fs/ext4/inode.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 874d169a193e..602d5ad6f5e7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -139,8 +139,8 @@ typedef struct ext4_io_end { struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ int error; /* I/O error code */ - ext4_lblk_t offset; /* offset in the file */ - size_t size; /* size of the extent */ + loff_t offset; /* offset in the file */ + ssize_t size; /* size of the extent */ struct work_struct work; /* data work queue */ } ext4_io_end_t; @@ -1744,7 +1744,7 @@ extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - loff_t len); + ssize_t len); extern int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, unsigned int max_blocks, struct buffer_head *bh, int flags); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 765a4826b118..c56877972b0e 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3603,7 +3603,7 @@ retry: * Returns 0 on success. */ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - loff_t len) + ssize_t len) { handle_t *handle; ext4_lblk_t block; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..2059c34ac4c8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3551,7 +3551,7 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) { struct inode *inode = io->inode; loff_t offset = io->offset; - size_t size = io->size; + ssize_t size = io->size; int ret = 0; ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," -- cgit v1.2.2 From 15121c18a22ae483279f76dc9e554334b800d0f7 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 15 Feb 2010 20:17:55 -0500 Subject: ext4: Fix optional-arg mount options We have 2 mount options, "barrier" and "auto_da_alloc" which may or may not take a 1/0 argument. This causes the ext4 superblock mount code to subtract uninitialized pointers and pass the result to kmalloc, which results in very noisy failures. Per Ted's suggestion, initialize the args struct so that we know whether match_token() found an argument for the option, and skip match_int() if not. Also, return error (0) from parse_options if we thought we found an argument, but match_int() Fails. Reported-by: Michael S. Tsirkin Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 735c20d5fd56..68a55dffb360 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1229,6 +1229,11 @@ static int parse_options(char *options, struct super_block *sb, if (!*p) continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = 0; token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: @@ -1518,10 +1523,11 @@ set_qf_format: clear_opt(sbi->s_mount_opt, BARRIER); break; case Opt_barrier: - if (match_int(&args[0], &option)) { - set_opt(sbi->s_mount_opt, BARRIER); - break; - } + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) set_opt(sbi->s_mount_opt, BARRIER); else @@ -1594,10 +1600,11 @@ set_qf_format: set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; case Opt_auto_da_alloc: - if (match_int(&args[0], &option)) { - clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); - break; - } + if (args[0].from) { + if (match_int(&args[0], &option)) + return 0; + } else + option = 1; /* No argument, default to 1 */ if (option) clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); else -- cgit v1.2.2 From 1f2acb6017d8528135ec3b01ab7cd2be6ea0630b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 22 Jan 2010 17:40:42 -0500 Subject: ext4: Add block validity check when truncating indirect block mapped inodes Add checks to ext4_free_branches() to make sure a block number found in an indirect block are valid before trying to free it. If a bad block number is found, stop freeing the indirect block immediately, since the file system is corrupt and we will need to run fsck anyway. This also avoids spamming the logs, and specifically avoids driver-level "attempt to access beyond end of device" errors obscure what is really going on. If you get *really*, *really*, *really* unlucky, without this patch, a supposed indirect block containing garbage might contain a reference to a primary block group descriptor, in which case ext4_free_branches() could end up zero'ing out a block group descriptor block, and if then one of the block bitmaps for a block group described by that bg descriptor block is not in memory, and is read in by ext4_read_block_bitmap(). This function calls ext4_valid_block_bitmap(), which assumes that bg_inode_table() was validated at mount time and hasn't been modified since. Since this assumption is no longer valid, it's possible for the value (ext4_inode_table(sb, desc) - group_first_block) to go negative, which will cause ext4_find_next_zero_bit() to trigger a kernel GPF. Addresses-Google-Bug: #2220436 Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 + fs/ext4/inode.c | 39 ++++++++++++++++++++++++++++++--------- fs/ext4/mballoc.c | 7 ++++--- 3 files changed, 35 insertions(+), 12 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 602d5ad6f5e7..307ecd13a762 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -377,6 +377,7 @@ struct ext4_new_group_data { */ #define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_FORGET 0x0002 +#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 /* * ioctl commands diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2059c34ac4c8..3e8afd969236 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4130,18 +4130,27 @@ no_top: * We release `count' blocks on disk, but (last - first) may be greater * than `count' because there can be holes in there. */ -static void ext4_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, - ext4_fsblk_t block_to_free, - unsigned long count, __le32 *first, - __le32 *last) +static int ext4_clear_blocks(handle_t *handle, struct inode *inode, + struct buffer_head *bh, + ext4_fsblk_t block_to_free, + unsigned long count, __le32 *first, + __le32 *last) { __le32 *p; - int flags = EXT4_FREE_BLOCKS_FORGET; + int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) flags |= EXT4_FREE_BLOCKS_METADATA; + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, + count)) { + ext4_error(inode->i_sb, __func__, "inode #%lu: " + "attempt to clear blocks %llu len %lu, invalid", + inode->i_ino, (unsigned long long) block_to_free, + count); + return 1; + } + if (try_to_extend_transaction(handle, inode)) { if (bh) { BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); @@ -4160,6 +4169,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, *p = 0; ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); + return 0; } /** @@ -4215,9 +4225,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, } else if (nr == block_to_free + count) { count++; } else { - ext4_clear_blocks(handle, inode, this_bh, - block_to_free, - count, block_to_free_p, p); + if (ext4_clear_blocks(handle, inode, this_bh, + block_to_free, count, + block_to_free_p, p)) + break; block_to_free = nr; block_to_free_p = p; count = 1; @@ -4281,6 +4292,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!nr) continue; /* A hole */ + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), + nr, 1)) { + ext4_error(inode->i_sb, __func__, + "indirect mapped block in inode " + "#%lu invalid (level %d, blk #%lu)", + inode->i_ino, depth, + (unsigned long) nr); + break; + } + /* Go read the buffer for the next level down */ bh = sb_bread(inode->i_sb, nr); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d34afad3e137..d129c1039f1d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4476,10 +4476,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; - if (!ext4_data_block_valid(sbi, block, count)) { + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && + !ext4_data_block_valid(sbi, block, count)) { ext4_error(sb, __func__, - "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); + "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); goto error_return; } -- cgit v1.2.2 From f8ec9d6837241865cf99bed97bb99f4399fd5a03 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 Jan 2010 01:00:21 -0500 Subject: ext4: Add new tracepoints to debug delayed allocation space functions Add tracepoints for ext4_da_reserve_space(), ext4_da_update_reserve_space(), and ext4_da_release_space(). Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e8afd969236..1a3d7b232cd7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1061,6 +1061,7 @@ void ext4_da_update_reserve_space(struct inode *inode, int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); + trace_ext4_da_update_reserve_space(inode, used); if (unlikely(used > ei->i_reserved_data_blocks)) { ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " "with only %d reserved data blocks\n", @@ -1846,6 +1847,7 @@ repeat: spin_lock(&ei->i_block_reservation_lock); md_reserved = ei->i_reserved_meta_blocks; md_needed = ext4_calc_metadata_amount(inode, lblock); + trace_ext4_da_reserve_space(inode, md_needed); spin_unlock(&ei->i_block_reservation_lock); /* -- cgit v1.2.2 From a214238d3bb03723f820b0a398928d8e1637c987 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 9 Dec 2009 21:09:58 -0500 Subject: ext4: Do not override ext2 or ext3 if built they are built as modules The CONFIG_EXT4_USE_FOR_EXT23 option must not try to take over the ext2 or ext3 file systems if the those file system drivers are configured to be built as mdoules. Signed-off-by: "Theodore Ts'o" --- fs/ext4/Kconfig | 2 +- fs/ext4/super.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index a6b4e93833d6..9acf7e808139 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -28,7 +28,7 @@ config EXT4_FS config EXT4_USE_FOR_EXT23 bool "Use ext4 for ext2/ext3 file systems" - depends on !EXT3_FS || !EXT2_FS + depends on EXT3_FS=n || EXT2_FS=n default y help Allow the ext4 file system driver code to be used for ext2 or diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2b13dcfcf775..8b58a144c31b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3964,7 +3964,7 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } -#if !defined(CONTIG_EXT2_FS) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", @@ -3990,7 +3990,7 @@ static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } #endif -#if !defined(CONTIG_EXT3_FS) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", -- cgit v1.2.2 From fab3a549e204172236779f502eccb4f9bf0dc87d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 9 Dec 2009 21:30:02 -0500 Subject: ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem) Fix the following potential circular locking dependency between mm->mmap_sem and ei->i_data_sem: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.32-04115-gec044c5 #37 ------------------------------------------------------- ureadahead/1855 is trying to acquire lock: (&mm->mmap_sem){++++++}, at: [] might_fault+0x5c/0xac but task is already holding lock: (&ei->i_data_sem){++++..}, at: [] ext4_fiemap+0x11b/0x159 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&ei->i_data_sem){++++..}: [] __lock_acquire+0xb67/0xd0f [] lock_acquire+0xdc/0x102 [] down_read+0x51/0x84 [] ext4_get_blocks+0x50/0x2a5 [] ext4_get_block+0xab/0xef [] do_mpage_readpage+0x198/0x48d [] mpage_readpages+0xd0/0x114 [] ext4_readpages+0x1d/0x1f [] __do_page_cache_readahead+0x12f/0x1bc [] ra_submit+0x21/0x25 [] filemap_fault+0x19f/0x32c [] __do_fault+0x55/0x3a2 [] handle_mm_fault+0x327/0x734 [] do_page_fault+0x292/0x2aa [] page_fault+0x25/0x30 [] clear_user+0x38/0x3c [] padzero+0x20/0x31 [] load_elf_binary+0x8bc/0x17ed [] search_binary_handler+0xc2/0x259 [] load_script+0x1b8/0x1cc [] search_binary_handler+0xc2/0x259 [] do_execve+0x1ce/0x2cf [] sys_execve+0x43/0x5a [] stub_execve+0x6a/0xc0 -> #0 (&mm->mmap_sem){++++++}: [] __lock_acquire+0xa11/0xd0f [] lock_acquire+0xdc/0x102 [] might_fault+0x89/0xac [] fiemap_fill_next_extent+0x95/0xda [] ext4_ext_fiemap_cb+0x138/0x157 [] ext4_ext_walk_space+0x178/0x1f1 [] ext4_fiemap+0x13c/0x159 [] do_vfs_ioctl+0x348/0x4d6 [] sys_ioctl+0x56/0x79 [] system_call_fastpath+0x16/0x1b other info that might help us debug this: 1 lock held by ureadahead/1855: #0: (&ei->i_data_sem){++++..}, at: [] ext4_fiemap+0x11b/0x159 stack backtrace: Pid: 1855, comm: ureadahead Not tainted 2.6.32-04115-gec044c5 #37 Call Trace: [] print_circular_bug+0xa8/0xb7 [] __lock_acquire+0xa11/0xd0f [] ? sched_clock+0x9/0xd [] lock_acquire+0xdc/0x102 [] ? might_fault+0x5c/0xac [] might_fault+0x89/0xac [] ? might_fault+0x5c/0xac [] ? __kmalloc+0x13b/0x18c [] fiemap_fill_next_extent+0x95/0xda [] ext4_ext_fiemap_cb+0x138/0x157 [] ? ext4_ext_fiemap_cb+0x0/0x157 [] ext4_ext_walk_space+0x178/0x1f1 [] ext4_fiemap+0x13c/0x159 [] ? might_fault+0x5c/0xac [] do_vfs_ioctl+0x348/0x4d6 [] ? __up_read+0x8d/0x95 [] ? retint_swapgs+0x13/0x1b [] sys_ioctl+0x56/0x79 [] system_call_fastpath+0x16/0x1b Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 700206e525da..3a7928f825e4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1762,7 +1762,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, while (block < last && block != EXT_MAX_BLOCK) { num = last - block; /* find extent for this block */ + down_read(&EXT4_I(inode)->i_data_sem); path = ext4_ext_find_extent(inode, block, path); + up_read(&EXT4_I(inode)->i_data_sem); if (IS_ERR(path)) { err = PTR_ERR(path); path = NULL; @@ -3724,10 +3726,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Walk the extent tree gathering extent information. * ext4_ext_fiemap_cb will push extents back to user. */ - down_read(&EXT4_I(inode)->i_data_sem); error = ext4_ext_walk_space(inode, start_blk, len_blks, ext4_ext_fiemap_cb, fieinfo); - up_read(&EXT4_I(inode)->i_data_sem); } return error; -- cgit v1.2.2 From 5a20bdfcdc5c5e5f0647d8d99a998066ef5496ac Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 30 Nov 2009 23:58:32 +0100 Subject: ext4: Support for 64-bit quota format Add support for new 64-bit quota format. It is enough to add proper mount options handling. The rest is done by the generic code. Signed-off-by: Jan Kara --- fs/ext4/super.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d4ca92aab514..10483fa7c05c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -765,9 +765,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq, #if defined(CONFIG_QUOTA) struct ext4_sb_info *sbi = EXT4_SB(sb); - if (sbi->s_jquota_fmt) - seq_printf(seq, ",jqfmt=%s", - (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); + if (sbi->s_jquota_fmt) { + char *fmtname = ""; + + switch (sbi->s_jquota_fmt) { + case QFMT_VFS_OLD: + fmtname = "vfsold"; + break; + case QFMT_VFS_V0: + fmtname = "vfsv0"; + break; + case QFMT_VFS_V1: + fmtname = "vfsv1"; + break; + } + seq_printf(seq, ",jqfmt=%s", fmtname); + } if (sbi->s_qf_names[USRQUOTA]) seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); @@ -1074,9 +1087,9 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, - Opt_usrquota, Opt_grpquota, Opt_i_version, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, + Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, + Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio @@ -1125,6 +1138,7 @@ static const match_table_t tokens = { {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, + {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, {Opt_grpquota, "grpquota"}, {Opt_noquota, "noquota"}, {Opt_quota, "quota"}, @@ -1425,6 +1439,9 @@ clear_qf_name: goto set_qf_format; case Opt_jqfmt_vfsv0: qfmt = QFMT_VFS_V0; + goto set_qf_format; + case Opt_jqfmt_vfsv1: + qfmt = QFMT_VFS_V1; set_qf_format: if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != qfmt) { @@ -1467,6 +1484,7 @@ set_qf_format: case Opt_offgrpjquota: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: + case Opt_jqfmt_vfsv1: ext4_msg(sb, KERN_ERR, "journaled quota options not supported"); break; -- cgit v1.2.2 From e4c570c4cb7a95dbfafa3d016d2739bf3fdfe319 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 14 Dec 2009 18:00:26 -0800 Subject: task_struct: make journal_info conditional journal_info in task_struct is used in journaling file system only. So introduce CONFIG_FS_JOURNAL_INFO and make it conditional. Signed-off-by: Hiroshi Shimamoto Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Steven Whitehouse Cc: KONISHI Ryusuke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9acf7e808139..e5f6774846e4 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -2,6 +2,7 @@ config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select JBD2 select CRC16 + select FS_JOURNAL_INFO help This is the next generation of the ext3 filesystem. -- cgit v1.2.2 From e7d2860b690d4f3bed6824757c540579638e3d1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Goddard=20Rosa?= Date: Mon, 14 Dec 2009 18:01:06 -0800 Subject: tree-wide: convert open calls to remove spaces to skip_spaces() lib function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes use of skip_spaces() defined in lib/string.c for removing leading spaces from strings all over the tree. It decreases lib.a code size by 47 bytes and reuses the function tree-wide: text data bss dec hex filename 64688 584 592 65864 10148 (TOTALS-BEFORE) 64641 584 592 65817 10119 (TOTALS-AFTER) Also, while at it, if we see (*str && isspace(*str)), we can be sure to remove the first condition (*str) as the second one (isspace(*str)) also evaluates to 0 whenever *str == 0, making it redundant. In other words, "a char equals zero is never a space". Julia Lawall tried the semantic patch (http://coccinelle.lip6.fr) below, and found occurrences of this pattern on 3 more files: drivers/leds/led-class.c drivers/leds/ledtrig-timer.c drivers/video/output.c @@ expression str; @@ ( // ignore skip_spaces cases while (*str && isspace(*str)) { \(str++;\|++str;\) } | - *str && isspace(*str) ) Signed-off-by: André Goddard Rosa Cc: Julia Lawall Cc: Martin Schwidefsky Cc: Jeff Dike Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Richard Purdie Cc: Neil Brown Cc: Kyle McMartin Cc: Henrique de Moraes Holschuh Cc: David Howells Cc: Cc: Samuel Ortiz Cc: Patrick McHardy Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/super.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 768c111a77ec..827bde1f2594 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2137,11 +2137,8 @@ static int parse_strtoul(const char *buf, { char *endp; - while (*buf && isspace(*buf)) - buf++; - *value = simple_strtoul(buf, &endp, 0); - while (*endp && isspace(*endp)) - endp++; + *value = simple_strtoul(skip_spaces(buf), &endp, 0); + endp = skip_spaces(endp); if (*endp || *value > max) return -EINVAL; -- cgit v1.2.2 From 431547b3c4533b8c7fd150ab36980b9a3147797b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Nov 2009 09:52:56 +0000 Subject: sanitize xattr handler prototypes Add a flags argument to struct xattr_handler and pass it to all xattr handler methods. This allows using the same methods for multiple handlers, e.g. for the ACL methods which perform exactly the same action for the access and default ACLs, just using a different underlying attribute. With a little more groundwork it'll also allow sharing the methods for the regular user/trusted/secure handlers in extN, ocfs2 and jffs2 like it's already done for xfs in this patch. Also change the inode argument to the handlers to a dentry to allow using the handlers mechnism for filesystems that require it later, e.g. cifs. [with GFS2 bits updated by Steven Whitehouse ] Signed-off-by: Christoph Hellwig Reviewed-by: James Morris Acked-by: Joel Becker Signed-off-by: Al Viro --- fs/ext4/acl.c | 74 +++++++++++++++--------------------------------- fs/ext4/xattr.c | 31 +++++++++++--------- fs/ext4/xattr_security.c | 20 ++++++------- fs/ext4/xattr_trusted.c | 20 ++++++------- fs/ext4/xattr_user.c | 25 ++++++++-------- 5 files changed, 73 insertions(+), 97 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 0df88b2a69b0..8a2a29d35a6f 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -364,12 +364,12 @@ out: * Extended attribute handlers */ static size_t -ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, - const char *name, size_t name_len) +ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len, + const char *name, size_t name_len, int type) { const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); - if (!test_opt(inode->i_sb, POSIX_ACL)) + if (!test_opt(dentry->d_sb, POSIX_ACL)) return 0; if (list && size <= list_len) memcpy(list, POSIX_ACL_XATTR_ACCESS, size); @@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, } static size_t -ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, - const char *name, size_t name_len) +ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len, + const char *name, size_t name_len, int type) { const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); - if (!test_opt(inode->i_sb, POSIX_ACL)) + if (!test_opt(dentry->d_sb, POSIX_ACL)) return 0; if (list && size <= list_len) memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); @@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, } static int -ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) +ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer, + size_t size, int type) { struct posix_acl *acl; int error; - if (!test_opt(inode->i_sb, POSIX_ACL)) + if (strcmp(name, "") != 0) + return -EINVAL; + if (!test_opt(dentry->d_sb, POSIX_ACL)) return -EOPNOTSUPP; - acl = ext4_get_acl(inode, type); + acl = ext4_get_acl(dentry->d_inode, type); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) @@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) } static int -ext4_xattr_get_acl_access(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); -} - -static int -ext4_xattr_get_acl_default(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); -} - -static int -ext4_xattr_set_acl(struct inode *inode, int type, const void *value, - size_t size) +ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags, int type) { + struct inode *inode = dentry->d_inode; handle_t *handle; struct posix_acl *acl; int error, retries = 0; + if (strcmp(name, "") != 0) + return -EINVAL; if (!test_opt(inode->i_sb, POSIX_ACL)) return -EOPNOTSUPP; if (!is_owner_or_cap(inode)) @@ -466,34 +454,18 @@ release_and_out: return error; } -static int -ext4_xattr_set_acl_access(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); -} - -static int -ext4_xattr_set_acl_default(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); -} - struct xattr_handler ext4_xattr_acl_access_handler = { .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, .list = ext4_xattr_list_acl_access, - .get = ext4_xattr_get_acl_access, - .set = ext4_xattr_set_acl_access, + .get = ext4_xattr_get_acl, + .set = ext4_xattr_set_acl, }; struct xattr_handler ext4_xattr_acl_default_handler = { .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, .list = ext4_xattr_list_acl_default, - .get = ext4_xattr_get_acl_default, - .set = ext4_xattr_set_acl_default, + .get = ext4_xattr_get_acl, + .set = ext4_xattr_set_acl, }; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 910bf9a59cb3..83218bebbc7c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *, struct mb_cache_entry **); static void ext4_xattr_rehash(struct ext4_xattr_header *, struct ext4_xattr_entry *); -static int ext4_xattr_list(struct inode *inode, char *buffer, +static int ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size); static struct mb_cache *ext4_xattr_cache; @@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index) ssize_t ext4_listxattr(struct dentry *dentry, char *buffer, size_t size) { - return ext4_xattr_list(dentry->d_inode, buffer, size); + return ext4_xattr_list(dentry, buffer, size); } static int @@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name, } static int -ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, +ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry, char *buffer, size_t buffer_size) { size_t rest = buffer_size; @@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, ext4_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(inode, buffer, rest, + size_t size = handler->list(dentry, buffer, rest, entry->e_name, - entry->e_name_len); + entry->e_name_len, + handler->flags); if (buffer) { if (size > rest) return -ERANGE; @@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry, } static int -ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) +ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) { + struct inode *inode = dentry->d_inode; struct buffer_head *bh = NULL; int error; @@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size) goto cleanup; } ext4_xattr_cache_insert(bh); - error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size); + error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); cleanup: brelse(bh); @@ -385,8 +387,9 @@ cleanup: } static int -ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) +ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) { + struct inode *inode = dentry->d_inode; struct ext4_xattr_ibody_header *header; struct ext4_inode *raw_inode; struct ext4_iloc iloc; @@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size) error = ext4_xattr_check_names(IFIRST(header), end); if (error) goto cleanup; - error = ext4_xattr_list_entries(inode, IFIRST(header), + error = ext4_xattr_list_entries(dentry, IFIRST(header), buffer, buffer_size); cleanup: @@ -423,12 +426,12 @@ cleanup: * used / required on success. */ static int -ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) +ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { int i_error, b_error; - down_read(&EXT4_I(inode)->xattr_sem); - i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size); + down_read(&EXT4_I(dentry->d_inode)->xattr_sem); + i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size); if (i_error < 0) { b_error = 0; } else { @@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) buffer += i_error; buffer_size -= i_error; } - b_error = ext4_xattr_block_list(inode, buffer, buffer_size); + b_error = ext4_xattr_block_list(dentry, buffer, buffer_size); if (b_error < 0) i_error = 0; } - up_read(&EXT4_I(inode)->xattr_sem); + up_read(&EXT4_I(dentry->d_inode)->xattr_sem); return i_error + b_error; } diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index ca5f89fc6cae..983c253999a7 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -12,8 +12,8 @@ #include "xattr.h" static size_t -ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) +ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) { const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; const size_t total_len = prefix_len + name_len + 1; @@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size, } static int -ext4_xattr_security_get(struct inode *inode, const char *name, - void *buffer, size_t size) +ext4_xattr_security_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, - buffer, size); + return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY, + name, buffer, size); } static int -ext4_xattr_security_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +ext4_xattr_security_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name, - value, size, flags); + return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY, + name, value, size, flags); } int diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index ac1a52cf2a37..15b50edc6587 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -14,8 +14,8 @@ #include "xattr.h" static size_t -ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) +ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) { const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; @@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, } static int -ext4_xattr_trusted_get(struct inode *inode, const char *name, - void *buffer, size_t size) +ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer, + size_t size, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, - buffer, size); + return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED, + name, buffer, size); } static int -ext4_xattr_trusted_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +ext4_xattr_trusted_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name, - value, size, flags); + return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED, + name, value, size, flags); } struct xattr_handler ext4_xattr_trusted_handler = { diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index d91aa61b42aa..c4ce05746ce1 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -13,13 +13,13 @@ #include "xattr.h" static size_t -ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) +ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) { const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; - if (!test_opt(inode->i_sb, XATTR_USER)) + if (!test_opt(dentry->d_sb, XATTR_USER)) return 0; if (list && total_len <= list_size) { @@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size, } static int -ext4_xattr_user_get(struct inode *inode, const char *name, - void *buffer, size_t size) +ext4_xattr_user_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) { if (strcmp(name, "") == 0) return -EINVAL; - if (!test_opt(inode->i_sb, XATTR_USER)) + if (!test_opt(dentry->d_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); + return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER, + name, buffer, size); } static int -ext4_xattr_user_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +ext4_xattr_user_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { if (strcmp(name, "") == 0) return -EINVAL; - if (!test_opt(inode->i_sb, XATTR_USER)) + if (!test_opt(dentry->d_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name, - value, size, flags); + return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER, + name, value, size, flags); } struct xattr_handler ext4_xattr_user_handler = { -- cgit v1.2.2 From b6e3224fb20954f155e41ec5709b2ab70b50ae2d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Dec 2009 13:23:24 -0800 Subject: Revert "task_struct: make journal_info conditional" This reverts commit e4c570c4cb7a95dbfafa3d016d2739bf3fdfe319, as requested by Alexey: "I think I gave a good enough arguments to not merge it. To iterate: * patch makes impossible to start using ext3 on EXT3_FS=n kernels without reboot. * this is done only for one pointer on task_struct" None of config options which define task_struct are tristate directly or effectively." Requested-by: Alexey Dobriyan Acked-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e5f6774846e4..9acf7e808139 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -2,7 +2,6 @@ config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select JBD2 select CRC16 - select FS_JOURNAL_INFO help This is the next generation of the ext3 filesystem. -- cgit v1.2.2 From a9e7f4472075fb6937c545af3f6329e9946bbe66 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 14 Dec 2009 15:21:14 +0300 Subject: ext4: Convert to generic reserved quota's space management. This patch also fixes write vs chown race condition. Acked-by: "Theodore Ts'o" Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext4/ext4.h | 6 +++++- fs/ext4/inode.c | 16 +++++++--------- fs/ext4/super.c | 5 +++++ 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ab31e65d46d0..56f9271ee8cc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -704,6 +704,10 @@ struct ext4_inode_info { __u16 i_extra_isize; spinlock_t i_block_reservation_lock; +#ifdef CONFIG_QUOTA + /* quota space reservation, managed internally by quota code */ + qsize_t i_reserved_quota; +#endif /* completed async DIOs that might need unwritten extents handling */ struct list_head i_aio_dio_complete_list; @@ -1435,7 +1439,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); -extern qsize_t ext4_get_reserved_space(struct inode *inode); +extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5352db1a3086..a31980dd9b86 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1003,17 +1003,12 @@ out: return err; } -qsize_t ext4_get_reserved_space(struct inode *inode) +#ifdef CONFIG_QUOTA +qsize_t *ext4_get_reserved_space(struct inode *inode) { - unsigned long long total; - - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - total = EXT4_I(inode)->i_reserved_data_blocks + - EXT4_I(inode)->i_reserved_meta_blocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - - return (total << inode->i_blkbits); + return &EXT4_I(inode)->i_reserved_quota; } +#endif /* * Calculate the number of metadata blocks need to reserve * to allocate @blocks for non extent file based file @@ -4794,6 +4789,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); ei->i_disksize = inode->i_size; +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_block_group = iloc.block_group; ei->i_last_alloc_group = ~0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 827bde1f2594..6ed9aa91f27d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -704,6 +704,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_allocated_meta_blocks = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); +#ifdef CONFIG_QUOTA + ei->i_reserved_quota = 0; +#endif INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); ei->cur_aio_dio = NULL; ei->i_sync_tid = 0; @@ -1014,7 +1017,9 @@ static const struct dquot_operations ext4_quota_operations = { .reserve_space = dquot_reserve_space, .claim_space = dquot_claim_space, .release_rsv = dquot_release_reserved_space, +#ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, +#endif .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, .free_inode = dquot_free_inode, -- cgit v1.2.2 From d21cd8f163ac44b15c465aab7306db931c606908 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 10 Dec 2009 03:31:45 +0000 Subject: ext4: Fix potential quota deadlock We have to delay vfs_dq_claim_space() until allocation context destruction. Currently we have following call-trace: ext4_mb_new_blocks() /* task is already holding ac->alloc_semp */ ->ext4_mb_mark_diskspace_used ->vfs_dq_claim_space() /* acquire dqptr_sem here. Possible deadlock */ ->ext4_mb_release_context() /* drop ac->alloc_semp here */ Let's move quota claiming to ext4_da_update_reserve_space() ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.32-rc7 #18 ------------------------------------------------------- write-truncate-/3465 is trying to acquire lock: (&s->s_dquot.dqptr_sem){++++..}, at: [] dquot_claim_space+0x3b/0x1b0 but task is already holding lock: (&meta_group_info[i]->alloc_sem){++++..}, at: [] ext4_mb_load_buddy+0xb2/0x370 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&meta_group_info[i]->alloc_sem){++++..}: [] __lock_acquire+0xd7b/0x1260 [] lock_acquire+0xba/0xd0 [] down_read+0x51/0x90 [] ext4_mb_load_buddy+0xb2/0x370 [] ext4_mb_free_blocks+0x46c/0x870 [] ext4_free_blocks+0x73/0x130 [] ext4_ext_truncate+0x76c/0x8d0 [] ext4_truncate+0x187/0x5e0 [] vmtruncate+0x6b/0x70 [] inode_setattr+0x62/0x190 [] ext4_setattr+0x25a/0x370 [] notify_change+0x151/0x340 [] do_truncate+0x6d/0xa0 [] may_open+0x1d4/0x200 [] do_filp_open+0x1eb/0x910 [] do_sys_open+0x6d/0x140 [] sys_open+0x2e/0x40 [] sysenter_do_call+0x12/0x32 -> #2 (&ei->i_data_sem){++++..}: [] __lock_acquire+0xd7b/0x1260 [] lock_acquire+0xba/0xd0 [] down_read+0x51/0x90 [] ext4_get_blocks+0x47/0x450 [] ext4_getblk+0x61/0x1d0 [] ext4_bread+0x1f/0xa0 [] ext4_quota_write+0x12c/0x310 [] qtree_write_dquot+0x93/0x120 [] v2_write_dquot+0x28/0x30 [] dquot_commit+0xab/0xf0 [] ext4_write_dquot+0x77/0x90 [] ext4_mark_dquot_dirty+0x2f/0x50 [] dquot_alloc_inode+0x101/0x180 [] ext4_new_inode+0x602/0xf00 [] ext4_create+0x89/0x150 [] vfs_create+0xa2/0xc0 [] do_filp_open+0x7a7/0x910 [] do_sys_open+0x6d/0x140 [] sys_open+0x2e/0x40 [] sysenter_do_call+0x12/0x32 -> #1 (&sb->s_type->i_mutex_key#7/4){+.+...}: [] __lock_acquire+0xd7b/0x1260 [] lock_acquire+0xba/0xd0 [] mutex_lock_nested+0x65/0x2d0 [] vfs_load_quota_inode+0x4bd/0x5a0 [] vfs_quota_on_path+0x5f/0x70 [] ext4_quota_on+0x112/0x190 [] sys_quotactl+0x44a/0x8a0 [] sysenter_do_call+0x12/0x32 -> #0 (&s->s_dquot.dqptr_sem){++++..}: [] __lock_acquire+0x1091/0x1260 [] lock_acquire+0xba/0xd0 [] down_read+0x51/0x90 [] dquot_claim_space+0x3b/0x1b0 [] ext4_mb_mark_diskspace_used+0x36f/0x380 [] ext4_mb_new_blocks+0x34a/0x530 [] ext4_ext_get_blocks+0x122b/0x13c0 [] ext4_get_blocks+0x226/0x450 [] mpage_da_map_blocks+0xc3/0xaa0 [] ext4_da_writepages+0x506/0x790 [] do_writepages+0x22/0x50 [] __filemap_fdatawrite_range+0x6d/0x80 [] filemap_flush+0x2b/0x30 [] ext4_alloc_da_blocks+0x5c/0x60 [] ext4_release_file+0x75/0xb0 [] __fput+0xf9/0x210 [] fput+0x27/0x30 [] filp_close+0x4c/0x80 [] put_files_struct+0x6e/0xd0 [] exit_files+0x47/0x60 [] do_exit+0x144/0x710 [] do_group_exit+0x38/0xa0 [] get_signal_to_deliver+0x2ac/0x410 [] do_notify_resume+0xb9/0x890 [] work_notifysig+0x13/0x21 other info that might help us debug this: 3 locks held by write-truncate-/3465: #0: (jbd2_handle){+.+...}, at: [] start_this_handle+0x38f/0x5c0 #1: (&ei->i_data_sem){++++..}, at: [] ext4_get_blocks+0xb6/0x450 #2: (&meta_group_info[i]->alloc_sem){++++..}, at: [] ext4_mb_load_buddy+0xb2/0x370 stack backtrace: Pid: 3465, comm: write-truncate- Not tainted 2.6.32-rc7 #18 Call Trace: [] ? printk+0x1d/0x22 [] print_circular_bug+0xca/0xd0 [] __lock_acquire+0x1091/0x1260 [] ? sched_clock_local+0xd2/0x170 [] ? trace_hardirqs_off_caller+0x20/0xd0 [] lock_acquire+0xba/0xd0 [] ? dquot_claim_space+0x3b/0x1b0 [] down_read+0x51/0x90 [] ? dquot_claim_space+0x3b/0x1b0 [] dquot_claim_space+0x3b/0x1b0 [] ext4_mb_mark_diskspace_used+0x36f/0x380 [] ext4_mb_new_blocks+0x34a/0x530 [] ? ext4_ext_find_extent+0x25d/0x280 [] ext4_ext_get_blocks+0x122b/0x13c0 [] ? sched_clock_local+0xd2/0x170 [] ? sched_clock_cpu+0x120/0x160 [] ? cpu_clock+0x4f/0x60 [] ? trace_hardirqs_off_caller+0x20/0xd0 [] ? down_write+0x8c/0xa0 [] ext4_get_blocks+0x226/0x450 [] ? sched_clock_cpu+0x120/0x160 [] ? cpu_clock+0x4f/0x60 [] ? trace_hardirqs_off+0xb/0x10 [] mpage_da_map_blocks+0xc3/0xaa0 [] ? find_get_pages_tag+0x16c/0x180 [] ? find_get_pages_tag+0x0/0x180 [] ? __mpage_da_writepage+0x16d/0x1a0 [] ? pagevec_lookup_tag+0x2e/0x40 [] ? write_cache_pages+0xdb/0x3d0 [] ? __mpage_da_writepage+0x0/0x1a0 [] ext4_da_writepages+0x506/0x790 [] ? cpu_clock+0x4f/0x60 [] ? sched_clock_local+0xd2/0x170 [] ? sched_clock_cpu+0x120/0x160 [] ? sched_clock_cpu+0x120/0x160 [] ? ext4_da_writepages+0x0/0x790 [] do_writepages+0x22/0x50 [] __filemap_fdatawrite_range+0x6d/0x80 [] filemap_flush+0x2b/0x30 [] ext4_alloc_da_blocks+0x5c/0x60 [] ext4_release_file+0x75/0xb0 [] __fput+0xf9/0x210 [] fput+0x27/0x30 [] filp_close+0x4c/0x80 [] put_files_struct+0x6e/0xd0 [] exit_files+0x47/0x60 [] do_exit+0x144/0x710 [] ? lock_release_holdtime+0x33/0x210 [] ? _spin_unlock_irq+0x27/0x30 [] do_group_exit+0x38/0xa0 [] ? trace_hardirqs_on+0xb/0x10 [] get_signal_to_deliver+0x2ac/0x410 [] do_notify_resume+0xb9/0x890 [] ? trace_hardirqs_off_caller+0x20/0xd0 [] ? lock_release_holdtime+0x33/0x210 [] ? autoremove_wake_function+0x0/0x50 [] ? trace_hardirqs_on_caller+0x134/0x190 [] ? trace_hardirqs_on+0xb/0x10 [] ? security_file_permission+0x14/0x20 [] ? vfs_write+0x131/0x190 [] ? do_sync_write+0x0/0x120 [] ? sysenter_do_call+0x27/0x32 [] work_notifysig+0x13/0x21 CC: Theodore Ts'o Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext4/inode.c | 9 +++++++-- fs/ext4/mballoc.c | 6 ------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a31980dd9b86..ddd0a9c87d9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1046,7 +1046,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks) static void ext4_da_update_reserve_space(struct inode *inode, int used) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free; + int total, mdb, mdb_free, mdb_claim = 0; spin_lock(&EXT4_I(inode)->i_block_reservation_lock); /* recalculate the number of metablocks still need to be reserved */ @@ -1059,7 +1059,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) if (mdb_free) { /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; + BUG_ON(mdb_free < mdb_claim); + mdb_free -= mdb_claim; /* update fs dirty blocks counter */ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); @@ -1070,8 +1072,11 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) /* update per-inode reservations */ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); EXT4_I(inode)->i_reserved_data_blocks -= used; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + vfs_dq_claim_block(inode, used + mdb_claim); + /* * free those over-booking quota for metadata blocks */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b1fd3daadc9c..d34afad3e137 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2755,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) /* release all the reserved blocks if non delalloc */ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); - else { - percpu_counter_sub(&sbi->s_dirtyblocks_counter, - ac->ac_b_ex.fe_len); - /* convert reserved quota blocks to real quota blocks */ - vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len); - } if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, -- cgit v1.2.2 From 39bc680a8160bb9d6743f7873b535d553ff61058 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Thu, 10 Dec 2009 16:36:27 +0000 Subject: ext4: fix sleep inside spinlock issue with quota and dealloc (#14739) Unlock i_block_reservation_lock before vfs_dq_reserve_block(). This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=14739 CC: Theodore Ts'o Signed-off-by: Dmitry Monakhov Signed-off-by: Jan Kara --- fs/ext4/inode.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ddd0a9c87d9e..ab807963a614 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1816,19 +1816,17 @@ repeat: md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; total = md_needed + nrblocks; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* * Make quota reservation here to prevent quota overflow * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) { - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + if (vfs_dq_reserve_block(inode, total)) return -EDQUOT; - } if (ext4_claim_free_blocks(sbi, total)) { - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); vfs_dq_release_reservation_block(inode, total); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); @@ -1836,10 +1834,11 @@ repeat: } return -ENOSPC; } + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); EXT4_I(inode)->i_reserved_data_blocks += nrblocks; - EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; - + EXT4_I(inode)->i_reserved_meta_blocks += md_needed; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return 0; /* success */ } -- cgit v1.2.2 From 149feb00d7c3f4f06896b245533c957b54e3e052 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Mon, 14 Dec 2009 09:24:20 -0500 Subject: ext4: remove unused #include Remove unused #include ('s) in fs/ext4/block_validity.c fs/ext4/mballoc.h Signed-off-by: Huang Weiyi Signed-off-by: "Theodore Ts'o" --- fs/ext4/block_validity.c | 1 - fs/ext4/mballoc.h | 1 - 2 files changed, 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 4df8621ec31c..a60ab9aad57d 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include "ext4.h" diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 0ca811061bc7..436521cae456 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include "ext4_jbd2.h" -- cgit v1.2.2 From 84c664730374248adaf420c0846a6158d64413c7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 21 Dec 2009 10:54:09 -0500 Subject: ext4: Don't ask about supporting ext2/3 in ext4 if ext4 is not configured Don't offer to build ext2/3 support into ext4 if ext4 itself is not configured on. Signed-off-by: David Howells Signed-off-by: "Theodore Ts'o" --- fs/ext4/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9acf7e808139..9ed1bb1f319f 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -28,6 +28,7 @@ config EXT4_FS config EXT4_USE_FOR_EXT23 bool "Use ext4 for ext2/ext3 file systems" + depends on EXT4_FS depends on EXT3_FS=n || EXT2_FS=n default y help -- cgit v1.2.2 From 51b7e3c9fbe7d22d4e355101e9a73b44fc5c9feb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 21 Dec 2009 10:56:09 -0500 Subject: ext4: add module aliases for ext2 and ext3 Add module aliases for ext2 and ext3 when CONFIG_EXT4_USE_FOR_EXT23 is set. This makes the existing user-space stuff like mkinitrd working as is. Signed-off-by: Takashi Iwai Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6ed9aa91f27d..0a06fe6f2bc2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4005,6 +4005,7 @@ static inline void unregister_as_ext2(void) { unregister_filesystem(&ext2_fs_type); } +MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } @@ -4031,6 +4032,7 @@ static inline void unregister_as_ext3(void) { unregister_filesystem(&ext3_fs_type); } +MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } -- cgit v1.2.2 From 034fb4c95fc0fed4ec4a50778127b92c6f2aec01 Mon Sep 17 00:00:00 2001 From: Surbhi Palande Date: Mon, 14 Dec 2009 09:53:52 -0500 Subject: ext4: replace BUG() with return -EIO in ext4_ext_get_blocks This patch fixes the Kernel BZ #14286. When the address of an extent corresponding to a valid block is corrupted, a -EIO should be reported instead of a BUG(). This situation should not normally not occur except in the case of a corrupted filesystem. If however it does, then the system should not panic directly but depending on the mount time options appropriate action should be taken. If the mount options so permit, the I/O should be gracefully aborted by returning a -EIO. http://bugzilla.kernel.org/show_bug.cgi?id=14286 Signed-off-by: Surbhi Palande Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3a7928f825e4..8fd6c567964a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3190,7 +3190,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_ext_find_extent() */ - BUG_ON(path[depth].p_ext == NULL && depth != 0); + if (path[depth].p_ext == NULL && depth != 0) { + ext4_error(inode->i_sb, __func__, "bad extent address " + "inode: %lu, iblock: %d, depth: %d", + inode->i_ino, iblock, depth); + err = -EIO; + goto out2; + } eh = path[depth].p_hdr; ex = path[depth].p_ext; -- cgit v1.2.2 From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 23 Dec 2009 06:52:08 -0500 Subject: ext4, jbd2: Add barriers for file systems with exernal journals This is a bit complicated because we are trying to optimize when we send barriers to the fs data disk. We could just throw in an extra barrier to the data disk whenever we send a barrier to the journal disk, but that's not always strictly necessary. We only need to send a barrier during a commit when there are data blocks which are must be written out due to an inode written in ordered mode, or if fsync() depends on the commit to force data blocks to disk. Finally, before we drop transactions from the beginning of the journal during a checkpoint operation, we need to guarantee that any blocks that were flushed out to the data disk are firmly on the rust platter before we drop the transaction from the journal. Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4. Signed-off-by: "Theodore Ts'o" --- fs/ext4/fsync.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 0b22497d92e1..98bd140aad01 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; - if (jbd2_log_start_commit(journal, commit_tid)) + if (jbd2_log_start_commit(journal, commit_tid)) { + /* + * When the journal is on a different device than the + * fs data disk, we need to issue the barrier in + * writeback mode. (In ordered mode, the jbd2 layer + * will take care of issuing the barrier. In + * data=journal, all of the data blocks are written to + * the journal device.) + */ + if (ext4_should_writeback_data(inode) && + (journal->j_fs_dev != journal->j_dev) && + (journal->j_flags & JBD2_BARRIER)) + blkdev_issue_flush(inode->i_sb->s_bdev, NULL); jbd2_log_wait_commit(journal, commit_tid); - else if (journal->j_flags & JBD2_BARRIER) + } else if (journal->j_flags & JBD2_BARRIER) blkdev_issue_flush(inode->i_sb->s_bdev, NULL); return ret; } -- cgit v1.2.2 From a6b43e382568a49cc64291bfaddf896652d44f70 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 23 Dec 2009 07:48:08 -0500 Subject: ext4: fix unsigned long long printk warning in super.c sparc64 allmodconfig: fs/ext4/super.c: In function `lifetime_write_kbytes_show': fs/ext4/super.c:2174: warning: long long unsigned int format, long unsigned int arg (arg 4) fs/ext4/super.c:2174: warning: long long unsigned int format, long unsigned int arg (arg 4) Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0a06fe6f2bc2..7cccb35c0f4d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2174,9 +2174,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, struct super_block *sb = sbi->s_buddy_cache->i_sb; return snprintf(buf, PAGE_SIZE, "%llu\n", - sbi->s_kbytes_written + + (unsigned long long)(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); + EXT4_SB(sb)->s_sectors_written_start) >> 1))); } static ssize_t inode_readahead_blks_store(struct ext4_attr *a, -- cgit v1.2.2 From d3533d72e7478a61a3e1936956fc825289a2acf4 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 23 Dec 2009 07:52:31 -0500 Subject: ext4: Eliminate potential double free on error path b_entry_name and buffer are initially NULL, are initialized within a loop to the result of calling kmalloc, and are freed at the bottom of this loop. The loop contains gotos to cleanup, which also frees b_entry_name and buffer. Some of these gotos are before the reinitializations of b_entry_name and buffer. To maintain the invariant that b_entry_name and buffer are NULL at the top of the loop, and thus acceptable arguments to kfree, these variables are now set to NULL after the kfrees. This seems to be the simplest solution. A more complicated solution would be to introduce more labels in the error handling code at the end of the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r@ identifier E; expression E1; iterator I; statement S; @@ *kfree(E); ... when != E = E1 when != I(E,...) S when != &E *kfree(E); // Signed-off-by: Julia Lawall Signed-off-by: "Theodore Ts'o" --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 83218bebbc7c..f3a2f7ed45aa 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1332,6 +1332,8 @@ retry: goto cleanup; kfree(b_entry_name); kfree(buffer); + b_entry_name = NULL; + buffer = NULL; brelse(is->iloc.bh); kfree(is); kfree(bs); -- cgit v1.2.2 From c8afb44682fcef6273e8b8eb19fab13ddd05b386 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 23 Dec 2009 07:58:12 -0500 Subject: ext4: flush delalloc blocks when space is low Creating many small files in rapid succession on a small filesystem can lead to spurious ENOSPC; on a 104MB filesystem: for i in `seq 1 22500`; do echo -n > $SCRATCH_MNT/$i echo XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX > $SCRATCH_MNT/$i done leads to ENOSPC even though after a sync, 40% of the fs is free again. This is because we reserve worst-case metadata for delalloc writes, and when data is allocated that worst-case reservation is not usually needed. When freespace is low, kicking off an async writeback will start converting that worst-case space usage into something more realistic, almost always freeing up space to continue. This resolves the testcase for me, and survives all 4 generic ENOSPC tests in xfstests. We'll still need a hard synchronous sync to squeeze out the last bit, but this fixes things up to a large degree. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab807963a614..282621f18c10 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2993,11 +2993,18 @@ static int ext4_nonda_switch(struct super_block *sb) if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { /* - * free block count is less that 150% of dirty blocks - * or free blocks is less that watermark + * free block count is less than 150% of dirty blocks + * or free blocks is less than watermark */ return 1; } + /* + * Even if we don't switch but are nearing capacity, + * start pushing delalloc when 1/2 of free blocks are dirty. + */ + if (free_blocks < 2 * dirty_blocks) + writeback_inodes_sb_if_idle(sb); + return 0; } -- cgit v1.2.2 From 2faf2e19dd0e060eeb32442858ef495ac3083277 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 25 Dec 2009 15:46:07 -0500 Subject: ext4: return correct wbc.nr_to_write in ext4_da_writepages When ext4_da_writepages increases the nr_to_write in writeback_control then it must always re-base the return value. Originally there was a (misguided) attempt prevent wbc.nr_to_write from going negative. In fact, it's necessary to allow nr_to_write to be negative so that wb_writeback() can correctly calculate how many pages were actually written. Signed-off-by: Richard Kennedy Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 282621f18c10..3e3b45458ef8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2967,8 +2967,7 @@ retry: out_writepages: if (!no_nrwrite_index_update) wbc->no_nrwrite_index_update = 0; - if (wbc->nr_to_write > nr_to_writebump) - wbc->nr_to_write -= nr_to_writebump; + wbc->nr_to_write -= nr_to_writebump; wbc->range_start = range_start; trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); return ret; -- cgit v1.2.2 From 515f41c33a9d44a964264c9511ad2c869af1fac3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 29 Dec 2009 23:39:06 -0500 Subject: ext4: Ensure zeroout blocks have no dirty metadata This fixes a bug (found by Curt Wohlgemuth) in which new blocks returned from an extent created with ext4_ext_zeroout() can have dirty metadata still associated with them. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8fd6c567964a..91ae46098ea4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3023,6 +3023,14 @@ out: return err; } +static void unmap_underlying_metadata_blocks(struct block_device *bdev, + sector_t block, int count) +{ + int i; + for (i = 0; i < count; i++) + unmap_underlying_metadata(bdev, block + i); +} + static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, @@ -3098,6 +3106,18 @@ out: } else allocated = ret; set_buffer_new(bh_result); + /* + * if we allocated more blocks than requested + * we need to make sure we unmap the extra block + * allocated. The actual needed block will get + * unmapped later when we find the buffer_head marked + * new. + */ + if (allocated > max_blocks) { + unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, + newblock + max_blocks, + allocated - max_blocks); + } map_out: set_buffer_mapped(bh_result); out1: -- cgit v1.2.2 From 0637c6f4135f592f094207c7c21e7c0fc5557834 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 30 Dec 2009 14:20:45 -0500 Subject: ext4: Patch up how we claim metadata blocks for quota purposes As reported in Kernel Bugzilla #14936, commit d21cd8f triggered a BUG in the function ext4_da_update_reserve_space() found in fs/ext4/inode.c. The root cause of this BUG() was caused by the fact that ext4_calc_metadata_amount() can severely over-estimate how many metadata blocks will be needed, especially when using direct block-mapped files. In addition, it can also badly *under* estimate how much space is needed, since ext4_calc_metadata_amount() assumes that the blocks are contiguous, and this is not always true. If the application is writing blocks to a sparse file, the number of metadata blocks necessary can be severly underestimated by the functions ext4_da_reserve_space(), ext4_da_update_reserve_space() and ext4_da_release_space(). This was the cause of the dq_claim_space reports found on kerneloops.org. Unfortunately, doing this right means that we need to massively over-estimate the amount of free space needed. So in some cases we may need to force the inode to be written to disk asynchronously in to avoid spurious quota failures. http://bugzilla.kernel.org/show_bug.cgi?id=14936 Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 157 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 84 insertions(+), 73 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e3b45458ef8..84eeb8f515a3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1043,43 +1043,47 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks) return ext4_indirect_calc_metadata_amount(inode, blocks); } +/* + * Called with i_data_sem down, which is important since we can call + * ext4_discard_preallocations() from here. + */ static void ext4_da_update_reserve_space(struct inode *inode, int used) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free, mdb_claim = 0; - - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - used; - mdb = ext4_calc_metadata_amount(inode, total); - - /* figure out how many metablocks to release */ - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - - if (mdb_free) { - /* Account for allocated meta_blocks */ - mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; - BUG_ON(mdb_free < mdb_claim); - mdb_free -= mdb_claim; - - /* update fs dirty blocks counter */ + struct ext4_inode_info *ei = EXT4_I(inode); + int mdb_free = 0; + + spin_lock(&ei->i_block_reservation_lock); + if (unlikely(used > ei->i_reserved_data_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " + "with only %d reserved data blocks\n", + __func__, inode->i_ino, used, + ei->i_reserved_data_blocks); + WARN_ON(1); + used = ei->i_reserved_data_blocks; + } + + /* Update per-inode reservations */ + ei->i_reserved_data_blocks -= used; + used += ei->i_allocated_meta_blocks; + ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + ei->i_allocated_meta_blocks = 0; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); + + if (ei->i_reserved_data_blocks == 0) { + /* + * We can release all of the reserved metadata blocks + * only when we have written all of the delayed + * allocation blocks. + */ + mdb_free = ei->i_allocated_meta_blocks; percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); - EXT4_I(inode)->i_allocated_meta_blocks = 0; - EXT4_I(inode)->i_reserved_meta_blocks = mdb; + ei->i_allocated_meta_blocks = 0; } - - /* update per-inode reservations */ - BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= used; - percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_claim_block(inode, used + mdb_claim); - - /* - * free those over-booking quota for metadata blocks - */ + /* Update quota subsystem */ + vfs_dq_claim_block(inode, used); if (mdb_free) vfs_dq_release_reservation_block(inode, mdb_free); @@ -1088,7 +1092,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * there aren't any writers on the inode, we can discard the * inode's preallocations. */ - if (!total && (atomic_read(&inode->i_writecount) == 0)) + if ((ei->i_reserved_data_blocks == 0) && + (atomic_read(&inode->i_writecount) == 0)) ext4_discard_preallocations(inode); } @@ -1801,7 +1806,8 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) { int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - unsigned long md_needed, mdblocks, total = 0; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long md_needed, md_reserved, total = 0; /* * recalculate the amount of metadata blocks to reserve @@ -1809,35 +1815,44 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) * worse case is one extent per block */ repeat: - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; - mdblocks = ext4_calc_metadata_amount(inode, total); - BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); - - md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; + spin_lock(&ei->i_block_reservation_lock); + md_reserved = ei->i_reserved_meta_blocks; + md_needed = ext4_calc_metadata_amount(inode, nrblocks); total = md_needed + nrblocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + spin_unlock(&ei->i_block_reservation_lock); /* * Make quota reservation here to prevent quota overflow * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) + if (vfs_dq_reserve_block(inode, total)) { + /* + * We tend to badly over-estimate the amount of + * metadata blocks which are needed, so if we have + * reserved any metadata blocks, try to force out the + * inode and see if we have any better luck. + */ + if (md_reserved && retries++ <= 3) + goto retry; return -EDQUOT; + } if (ext4_claim_free_blocks(sbi, total)) { vfs_dq_release_reservation_block(inode, total); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { + retry: + if (md_reserved) + write_inode_now(inode, (retries == 3)); yield(); goto repeat; } return -ENOSPC; } - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - EXT4_I(inode)->i_reserved_data_blocks += nrblocks; - EXT4_I(inode)->i_reserved_meta_blocks += md_needed; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + spin_lock(&ei->i_block_reservation_lock); + ei->i_reserved_data_blocks += nrblocks; + ei->i_reserved_meta_blocks += md_needed; + spin_unlock(&ei->i_block_reservation_lock); return 0; /* success */ } @@ -1845,49 +1860,45 @@ repeat: static void ext4_da_release_space(struct inode *inode, int to_free) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free, release; + struct ext4_inode_info *ei = EXT4_I(inode); if (!to_free) return; /* Nothing to release, exit */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - if (!EXT4_I(inode)->i_reserved_data_blocks) { + if (unlikely(to_free > ei->i_reserved_data_blocks)) { /* - * if there is no reserved blocks, but we try to free some - * then the counter is messed up somewhere. - * but since this function is called from invalidate - * page, it's harmless to return without any action + * if there aren't enough reserved blocks, then the + * counter is messed up somewhere. Since this + * function is called from invalidate page, it's + * harmless to return without any action. */ - printk(KERN_INFO "ext4 delalloc try to release %d reserved " - "blocks for inode %lu, but there is no reserved " - "data blocks\n", to_free, inode->i_ino); - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - return; + ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " + "ino %lu, to_free %d with only %d reserved " + "data blocks\n", inode->i_ino, to_free, + ei->i_reserved_data_blocks); + WARN_ON(1); + to_free = ei->i_reserved_data_blocks; } + ei->i_reserved_data_blocks -= to_free; - /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - to_free; - mdb = ext4_calc_metadata_amount(inode, total); - - /* figure out how many metablocks to release */ - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - - release = to_free + mdb_free; - - /* update fs dirty blocks counter for truncate case */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); + if (ei->i_reserved_data_blocks == 0) { + /* + * We can release all of the reserved metadata blocks + * only when we have written all of the delayed + * allocation blocks. + */ + to_free += ei->i_allocated_meta_blocks; + ei->i_allocated_meta_blocks = 0; + } - /* update per-inode reservations */ - BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= to_free; + /* update fs dirty blocks counter */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - EXT4_I(inode)->i_reserved_meta_blocks = mdb; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_release_reservation_block(inode, release); + vfs_dq_release_reservation_block(inode, to_free); } static void ext4_da_page_release_reservation(struct page *page, -- cgit v1.2.2 From ee5f4d9cdf32fd99172d11665c592a288c2b1ff4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 Jan 2010 02:36:15 -0500 Subject: ext4: Fix accounting of reserved metadata blocks Commit 0637c6f had a typo which caused the reserved metadata blocks to not be released correctly. Fix this. Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 84eeb8f515a3..bdaa92a29e0e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1076,9 +1076,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * only when we have written all of the delayed * allocation blocks. */ - mdb_free = ei->i_allocated_meta_blocks; + mdb_free = ei->i_reserved_meta_blocks; + ei->i_reserved_meta_blocks = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); - ei->i_allocated_meta_blocks = 0; } spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); @@ -1889,8 +1889,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) * only when we have written all of the delayed * allocation blocks. */ - to_free += ei->i_allocated_meta_blocks; - ei->i_allocated_meta_blocks = 0; + to_free += ei->i_reserved_meta_blocks; + ei->i_reserved_meta_blocks = 0; } /* update fs dirty blocks counter */ -- cgit v1.2.2 From 9d0be50230b333005635967f7ecd4897dbfd181b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 Jan 2010 02:41:30 -0500 Subject: ext4: Calculate metadata requirements more accurately In the past, ext4_calc_metadata_amount(), and its sub-functions ext4_ext_calc_metadata_amount() and ext4_indirect_calc_metadata_amount() badly over-estimated the number of metadata blocks that might be required for delayed allocation blocks. This didn't matter as much when functions which managed the reserved metadata blocks were more aggressive about dropping reserved metadata blocks as delayed allocation blocks were written, but unfortunately they were too aggressive. This was fixed in commit 0637c6f, but as a result the over-estimation by ext4_calc_metadata_amount() would lead to reserving 2-3 times the number of pending delayed allocation blocks as potentially required metadata blocks. So if there are 1 megabytes of blocks which have been not yet been allocation, up to 3 megabytes of space would get reserved out of the user's quota and from the file system free space pool until all of the inode's data blocks have been allocated. This commit addresses this problem by much more accurately estimating the number of metadata blocks that will be required. It will still somewhat over-estimate the number of blocks needed, since it must make a worst case estimate not knowing which physical blocks will be needed, but it is much more accurate than before. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ fs/ext4/ext4_extents.h | 3 ++- fs/ext4/extents.c | 49 +++++++++++++++++++++++++-------------- fs/ext4/inode.c | 62 +++++++++++++++++++++++++++++--------------------- fs/ext4/super.c | 1 + 5 files changed, 73 insertions(+), 44 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 56f9271ee8cc..af7b62699ea9 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -699,6 +699,8 @@ struct ext4_inode_info { unsigned int i_reserved_meta_blocks; unsigned int i_allocated_meta_blocks; unsigned short i_delalloc_reserved_flag; + sector_t i_da_metadata_calc_last_lblock; + int i_da_metadata_calc_len; /* on-disk additional length */ __u16 i_extra_isize; diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 2ca686454e87..bdb6ce7e2eb4 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); } -extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); +extern int ext4_ext_calc_metadata_amount(struct inode *inode, + sector_t lblocks); extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 91ae46098ea4..7d7b74e94687 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) * to allocate @blocks * Worse case is one block per extent */ -int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) +int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) { - int lcap, icap, rcap, leafs, idxs, num; - int newextents = blocks; - - rcap = ext4_ext_space_root_idx(inode, 0); - lcap = ext4_ext_space_block(inode, 0); - icap = ext4_ext_space_block_idx(inode, 0); + struct ext4_inode_info *ei = EXT4_I(inode); + int idxs, num = 0; - /* number of new leaf blocks needed */ - num = leafs = (newextents + lcap - 1) / lcap; + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx)); /* - * Worse case, we need separate index block(s) - * to link all new leaf blocks + * If the new delayed allocation block is contiguous with the + * previous da block, it can share index blocks with the + * previous block, so we only need to allocate a new index + * block every idxs leaf blocks. At ldxs**2 blocks, we need + * an additional index block, and at ldxs**3 blocks, yet + * another index blocks. */ - idxs = (leafs + icap - 1) / icap; - do { - num += idxs; - idxs = (idxs + icap - 1) / icap; - } while (idxs > rcap); + if (ei->i_da_metadata_calc_len && + ei->i_da_metadata_calc_last_lblock+1 == lblock) { + if ((ei->i_da_metadata_calc_len % idxs) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { + num++; + ei->i_da_metadata_calc_len = 0; + } else + ei->i_da_metadata_calc_len++; + ei->i_da_metadata_calc_last_lblock++; + return num; + } - return num; + /* + * In the worst case we need a new set of index blocks at + * every level of the inode's extent tree. + */ + ei->i_da_metadata_calc_len = 1; + ei->i_da_metadata_calc_last_lblock = lblock; + return ext_depth(inode) + 1; } static int diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bdaa92a29e0e..c818972c8302 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1009,38 +1009,44 @@ qsize_t *ext4_get_reserved_space(struct inode *inode) return &EXT4_I(inode)->i_reserved_quota; } #endif + /* * Calculate the number of metadata blocks need to reserve - * to allocate @blocks for non extent file based file + * to allocate a new block at @lblocks for non extent file based file */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) +static int ext4_indirect_calc_metadata_amount(struct inode *inode, + sector_t lblock) { - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ind_blks, dind_blks, tind_blks; - - /* number of new indirect blocks needed */ - ind_blks = (blocks + icap - 1) / icap; + struct ext4_inode_info *ei = EXT4_I(inode); + int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; + int blk_bits; - dind_blks = (ind_blks + icap - 1) / icap; + if (lblock < EXT4_NDIR_BLOCKS) + return 0; - tind_blks = 1; + lblock -= EXT4_NDIR_BLOCKS; - return ind_blks + dind_blks + tind_blks; + if (ei->i_da_metadata_calc_len && + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { + ei->i_da_metadata_calc_len++; + return 0; + } + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; + ei->i_da_metadata_calc_len = 1; + blk_bits = roundup_pow_of_two(lblock + 1); + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; } /* * Calculate the number of metadata blocks need to reserve - * to allocate given number of blocks + * to allocate a block located at @lblock */ -static int ext4_calc_metadata_amount(struct inode *inode, int blocks) +static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) { - if (!blocks) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_calc_metadata_amount(inode, blocks); + return ext4_ext_calc_metadata_amount(inode, lblock); - return ext4_indirect_calc_metadata_amount(inode, blocks); + return ext4_indirect_calc_metadata_amount(inode, lblock); } /* @@ -1078,6 +1084,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) */ mdb_free = ei->i_reserved_meta_blocks; ei->i_reserved_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); } spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); @@ -1802,12 +1809,15 @@ static int ext4_journalled_write_end(struct file *file, return ret ? ret : copied; } -static int ext4_da_reserve_space(struct inode *inode, int nrblocks) +/* + * Reserve a single block located at lblock + */ +static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) { int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - unsigned long md_needed, md_reserved, total = 0; + unsigned long md_needed, md_reserved; /* * recalculate the amount of metadata blocks to reserve @@ -1817,8 +1827,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) repeat: spin_lock(&ei->i_block_reservation_lock); md_reserved = ei->i_reserved_meta_blocks; - md_needed = ext4_calc_metadata_amount(inode, nrblocks); - total = md_needed + nrblocks; + md_needed = ext4_calc_metadata_amount(inode, lblock); spin_unlock(&ei->i_block_reservation_lock); /* @@ -1826,7 +1835,7 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, total)) { + if (vfs_dq_reserve_block(inode, md_needed + 1)) { /* * We tend to badly over-estimate the amount of * metadata blocks which are needed, so if we have @@ -1838,8 +1847,8 @@ repeat: return -EDQUOT; } - if (ext4_claim_free_blocks(sbi, total)) { - vfs_dq_release_reservation_block(inode, total); + if (ext4_claim_free_blocks(sbi, md_needed + 1)) { + vfs_dq_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { retry: if (md_reserved) @@ -1850,7 +1859,7 @@ repeat: return -ENOSPC; } spin_lock(&ei->i_block_reservation_lock); - ei->i_reserved_data_blocks += nrblocks; + ei->i_reserved_data_blocks++; ei->i_reserved_meta_blocks += md_needed; spin_unlock(&ei->i_block_reservation_lock); @@ -1891,6 +1900,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) */ to_free += ei->i_reserved_meta_blocks; ei->i_reserved_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; } /* update fs dirty blocks counter */ @@ -2504,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * XXX: __block_prepare_write() unmaps passed block, * is it OK? */ - ret = ext4_da_reserve_space(inode, 1); + ret = ext4_da_reserve_space(inode, iblock); if (ret) /* not enough space to reserve */ return ret; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7cccb35c0f4d..735c20d5fd56 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA -- cgit v1.2.2 From 1db913823c0f8360fccbd24ca67eb073966a5ffd Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 22 Jan 2010 17:06:20 -0500 Subject: ext4: Handle -EDQUOT error on write We need to release the journal before we do a write_inode. Otherwise we could deadlock. Signed-off-by: Aneesh Kumar K.V --- fs/ext4/inode.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c818972c8302..60b3a19e9927 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1835,24 +1835,12 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, md_needed + 1)) { - /* - * We tend to badly over-estimate the amount of - * metadata blocks which are needed, so if we have - * reserved any metadata blocks, try to force out the - * inode and see if we have any better luck. - */ - if (md_reserved && retries++ <= 3) - goto retry; + if (vfs_dq_reserve_block(inode, md_needed + 1)) return -EDQUOT; - } if (ext4_claim_free_blocks(sbi, md_needed + 1)) { vfs_dq_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - retry: - if (md_reserved) - write_inode_now(inode, (retries == 3)); yield(); goto repeat; } @@ -3032,7 +3020,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret, retries = 0; + int ret, retries = 0, quota_retries = 0; struct page *page; pgoff_t index; unsigned from, to; @@ -3091,6 +3079,22 @@ retry: if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; + + if ((ret == -EDQUOT) && + EXT4_I(inode)->i_reserved_meta_blocks && + (quota_retries++ < 3)) { + /* + * Since we often over-estimate the number of meta + * data blocks required, we may sometimes get a + * spurios out of quota error even though there would + * be enough space once we write the data blocks and + * find out how many meta data blocks were _really_ + * required. So try forcing the inode write to see if + * that helps. + */ + write_inode_now(inode, (quota_retries == 3)); + goto retry; + } out: return ret; } -- cgit v1.2.2 From 5f634d064c709ea02c3cdaa850a08323a4a4bf28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Jan 2010 04:00:31 -0500 Subject: ext4: Fix quota accounting error with fallocate When we fallocate a region of the file which we had recently written, and which is still in the page cache marked as delayed allocated blocks we need to make sure we don't do the quota update on writepage path. This is because the needed quota updated would have already be done by fallocate. Signed-off-by: Aneesh Kumar K.V --- fs/ext4/ext4.h | 2 ++ fs/ext4/extents.c | 21 +++++++++++++++++++++ fs/ext4/inode.c | 44 +++++++++++++++++++++++++++++++------------- 3 files changed, 54 insertions(+), 13 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index af7b62699ea9..b98de17e542a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1443,6 +1443,8 @@ extern int ext4_block_truncate_page(handle_t *handle, extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); +extern void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7d7b74e94687..3b6ff72026f0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3132,7 +3132,19 @@ out: unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, newblock + max_blocks, allocated - max_blocks); + allocated = max_blocks; } + + /* + * If we have done fallocate with the offset that is already + * delayed allocated, we would have block reservation + * and quota reservation done in the delayed write path. + * But fallocate would have already updated quota and block + * count for this offset. So cancel these reservation + */ + if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + ext4_da_update_reserve_space(inode, allocated, 0); + map_out: set_buffer_mapped(bh_result); out1: @@ -3368,8 +3380,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); + if (allocated > max_blocks) + allocated = max_blocks; set_buffer_new(bh_result); + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + ext4_da_update_reserve_space(inode, allocated, 1); + /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an uninitialized extent. diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60b3a19e9927..c955f6490b78 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1053,11 +1053,12 @@ static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) * Called with i_data_sem down, which is important since we can call * ext4_discard_preallocations() from here. */ -static void ext4_da_update_reserve_space(struct inode *inode, int used) +void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - int mdb_free = 0; + int mdb_free = 0, allocated_meta_blocks = 0; spin_lock(&ei->i_block_reservation_lock); if (unlikely(used > ei->i_reserved_data_blocks)) { @@ -1073,6 +1074,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) ei->i_reserved_data_blocks -= used; used += ei->i_allocated_meta_blocks; ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + allocated_meta_blocks = ei->i_allocated_meta_blocks; ei->i_allocated_meta_blocks = 0; percpu_counter_sub(&sbi->s_dirtyblocks_counter, used); @@ -1090,9 +1092,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); /* Update quota subsystem */ - vfs_dq_claim_block(inode, used); - if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + if (quota_claim) { + vfs_dq_claim_block(inode, used); + if (mdb_free) + vfs_dq_release_reservation_block(inode, mdb_free); + } else { + /* + * We did fallocate with an offset that is already delayed + * allocated. So on delayed allocated writeback we should + * not update the quota for allocated blocks. But then + * converting an fallocate region to initialized region would + * have caused a metadata allocation. So claim quota for + * that + */ + if (allocated_meta_blocks) + vfs_dq_claim_block(inode, allocated_meta_blocks); + vfs_dq_release_reservation_block(inode, mdb_free + used); + } /* * If we have done all the pending block allocations and if @@ -1292,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, */ EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; } - } + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. We don't + * support fallocate for non extent files. So we can update + * reserve space here. + */ + if ((retval > 0) && + (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + ext4_da_update_reserve_space(inode, retval, 1); + } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. - */ - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) - ext4_da_update_reserve_space(inode, retval); - up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { int ret = check_block_validity(inode, "file system " -- cgit v1.2.2 From 1296cc85c26e94eb865d03f82140f27d598de467 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 15 Jan 2010 01:27:59 -0500 Subject: ext4: Drop EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE flag We should update reserve space if it is delalloc buffer and that is indicated by EXT4_GET_BLOCKS_DELALLOC_RESERVE flag. So use EXT4_GET_BLOCKS_DELALLOC_RESERVE in place of EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE Signed-off-by: Aneesh Kumar K.V --- fs/ext4/ext4.h | 7 ++----- fs/ext4/extents.c | 4 ++-- fs/ext4/inode.c | 8 ++++---- 3 files changed, 8 insertions(+), 11 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b98de17e542a..874d169a193e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -361,14 +361,11 @@ struct ext4_new_group_data { so set the magic i_delalloc_reserve_flag after taking the inode allocation semaphore for */ #define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 - /* Call ext4_da_update_reserve_space() after successfully - allocating the blocks */ -#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0010 -#define EXT4_GET_BLOCKS_CONVERT 0x0020 +#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* Convert extent to initialized after direct IO complete */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3b6ff72026f0..765a4826b118 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3142,7 +3142,7 @@ out: * But fallocate would have already updated quota and block * count for this offset. So cancel these reservation */ - if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ext4_da_update_reserve_space(inode, allocated, 0); map_out: @@ -3388,7 +3388,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * Update reserved blocks/metadata blocks after successful * block allocation which had been deferred till now. */ - if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) ext4_da_update_reserve_space(inode, allocated, 1); /* diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c955f6490b78..e11952404e02 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1316,7 +1316,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * reserve space here. */ if ((retval > 0) && - (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) ext4_da_update_reserve_space(inode, retval, 1); } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) @@ -2219,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * variables are updated after the blocks have been allocated. */ new.b_state = 0; - get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | - EXT4_GET_BLOCKS_DELALLOC_RESERVE); + get_blocks_flags = EXT4_GET_BLOCKS_CREATE; if (mpd->b_state & (1 << BH_Delay)) - get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; + get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; + blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, &new, get_blocks_flags); if (blks < 0) { -- cgit v1.2.2 From 19f5fb7ad679bb361222c7916086435020c37cce Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 24 Jan 2010 14:34:07 -0500 Subject: ext4: Use bitops to read/modify EXT4_I(inode)->i_state At several places we modify EXT4_I(inode)->i_state without holding i_mutex (ext4_release_file, ext4_bmap, ext4_journalled_writepage, ext4_do_update_inode, ...). These modifications are racy and we can lose updates to i_state. So convert handling of i_state to use bitops which are atomic. Cc: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 41 +++++++++++++++++++++++++++++------------ fs/ext4/extents.c | 8 ++++---- fs/ext4/file.c | 4 ++-- fs/ext4/ialloc.c | 3 ++- fs/ext4/inode.c | 38 ++++++++++++++++++++------------------ fs/ext4/migrate.c | 6 +++--- fs/ext4/xattr.c | 22 +++++++++++----------- 7 files changed, 71 insertions(+), 51 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 307ecd13a762..ffe1334c891e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -313,17 +313,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) return flags & EXT4_OTHER_FLMASK; } -/* - * Inode dynamic state flags - */ -#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ -#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ -#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ -#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ -#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ -#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ -#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/ - /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { __u32 group; /* Group number for this data */ @@ -631,7 +620,7 @@ struct ext4_inode_info { * near to their parent directory's inode. */ ext4_group_t i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ + unsigned long i_state_flags; /* Dynamic state flags */ ext4_lblk_t i_dir_start_lookup; #ifdef CONFIG_EXT4_FS_XATTR @@ -1051,6 +1040,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } + +/* + * Inode dynamic state flags + */ +enum { + EXT4_STATE_JDATA, /* journaled data exists */ + EXT4_STATE_NEW, /* inode is newly created */ + EXT4_STATE_XATTR, /* has in-inode xattrs */ + EXT4_STATE_NO_EXPAND, /* No space for expansion */ + EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ + EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ + EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ +}; + +static inline int ext4_test_inode_state(struct inode *inode, int bit) +{ + return test_bit(bit, &EXT4_I(inode)->i_state_flags); +} + +static inline void ext4_set_inode_state(struct inode *inode, int bit) +{ + set_bit(bit, &EXT4_I(inode)->i_state_flags); +} + +static inline void ext4_clear_inode_state(struct inode *inode, int bit) +{ + clear_bit(bit, &EXT4_I(inode)->i_state_flags); +} #else /* Assume that user mode programs are passing in an ext4fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c56877972b0e..54616157c0f3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3076,7 +3076,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, if (io) io->flag = DIO_AIO_UNWRITTEN; else - EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; + ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); goto out; } /* async DIO end_io complete, convert the filled extent to written */ @@ -3362,8 +3362,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (io) io->flag = DIO_AIO_UNWRITTEN; else - EXT4_I(inode)->i_state |= - EXT4_STATE_DIO_UNWRITTEN;; + ext4_set_inode_state(inode, + EXT4_STATE_DIO_UNWRITTEN); } } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); @@ -3739,7 +3739,7 @@ static int ext4_xattr_fiemap(struct inode *inode, int error = 0; /* in-inode? */ - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { struct ext4_iloc iloc; int offset; /* offset of xattr in inode */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9630583cef28..f6071ce0b155 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -35,9 +35,9 @@ */ static int ext4_release_file(struct inode *inode, struct file *filp) { - if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { + if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { ext4_alloc_da_blocks(inode); - EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; + ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f3624ead4f6c..2fab5adae1e2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1029,7 +1029,8 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - ei->i_state = EXT4_STATE_NEW; + ei->i_state_flags = 0; + ext4_set_inode_state(inode, EXT4_STATE_NEW); ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1a3d7b232cd7..3e530119d7f0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1307,7 +1307,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, * i_data's format changing. Force the migrate * to fail by clearing migrate flags */ - EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); } /* @@ -1794,7 +1794,7 @@ static int ext4_journalled_write_end(struct file *file, new_i_size = pos + copied; if (new_i_size > inode->i_size) i_size_write(inode, pos+copied); - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; + ext4_set_inode_state(inode, EXT4_STATE_JDATA); if (new_i_size > EXT4_I(inode)->i_disksize) { ext4_update_i_disksize(inode, new_i_size); ret2 = ext4_mark_inode_dirty(handle, inode); @@ -2632,7 +2632,7 @@ static int __ext4_journalled_writepage(struct page *page, ret = err; walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; + ext4_set_inode_state(inode, EXT4_STATE_JDATA); out: return ret; } @@ -3303,7 +3303,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) filemap_write_and_wait(mapping); } - if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { + if (EXT4_JOURNAL(inode) && + ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: @@ -3322,7 +3323,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) * everything they get. */ - EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; + ext4_clear_inode_state(inode, EXT4_STATE_JDATA); journal = EXT4_JOURNAL(inode); jbd2_journal_lock_updates(journal); err = jbd2_journal_flush(journal); @@ -3790,8 +3791,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { ext4_free_io_end(iocb->private); iocb->private = NULL; - } else if (ret > 0 && (EXT4_I(inode)->i_state & - EXT4_STATE_DIO_UNWRITTEN)) { + } else if (ret > 0 && ext4_test_inode_state(inode, + EXT4_STATE_DIO_UNWRITTEN)) { int err; /* * for non AIO case, since the IO is already @@ -3801,7 +3802,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, offset, ret); if (err < 0) ret = err; - EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; + ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } return ret; } @@ -4457,7 +4458,7 @@ void ext4_truncate(struct inode *inode) return; if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) - ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; + ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { ext4_ext_truncate(inode); @@ -4743,7 +4744,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { /* We have all inode data except xattrs in memory here. */ return __ext4_get_inode_loc(inode, iloc, - !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); + !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); } void ext4_set_inode_flags(struct inode *inode) @@ -4837,7 +4838,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); - ei->i_state = 0; + ei->i_state_flags = 0; ei->i_dir_start_lookup = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. @@ -4920,7 +4921,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) - ei->i_state |= EXT4_STATE_XATTR; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); } } else ei->i_extra_isize = 0; @@ -5060,7 +5061,7 @@ static int ext4_do_update_inode(handle_t *handle, /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ - if (ei->i_state & EXT4_STATE_NEW) + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); ext4_get_inode_flags(ei); @@ -5156,7 +5157,7 @@ static int ext4_do_update_inode(handle_t *handle, rc = ext4_handle_dirty_metadata(handle, inode, bh); if (!err) err = rc; - ei->i_state &= ~EXT4_STATE_NEW; + ext4_clear_inode_state(inode, EXT4_STATE_NEW); ext4_update_inode_fsync_trans(handle, inode, 0); out_brelse: @@ -5580,8 +5581,8 @@ static int ext4_expand_extra_isize(struct inode *inode, entry = IFIRST(header); /* No extended attributes present */ - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || - header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, new_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; @@ -5625,7 +5626,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) err = ext4_reserve_inode_write(handle, inode, &iloc); if (ext4_handle_valid(handle) && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && - !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { + !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { /* * We need extra buffer credits since we may write into EA block * with this same handle. If journal_extend fails, then it will @@ -5639,7 +5640,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) sbi->s_want_extra_isize, iloc, handle); if (ret) { - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + ext4_set_inode_state(inode, + EXT4_STATE_NO_EXPAND); if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { ext4_warning(inode->i_sb, __func__, diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 81415814b00b..46a4101e0aec 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -365,12 +365,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, * happened after we started the migrate. We need to * fail the migrate */ - if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { + if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { retval = -EAGAIN; up_write(&EXT4_I(inode)->i_data_sem); goto err_out; } else - EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); /* * We have the extent map build with the tmp inode. * Now copy the i_data across @@ -533,7 +533,7 @@ int ext4_ext_migrate(struct inode *inode) * allocation. */ down_read((&EXT4_I(inode)->i_data_sem)); - EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; + ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); up_read((&EXT4_I(inode)->i_data_sem)); handle = ext4_journal_start(inode, 1); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f3a2f7ed45aa..c619a7ea670d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -267,7 +267,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *end; int error; - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) return -ENODATA; error = ext4_get_inode_loc(inode, &iloc); if (error) @@ -396,7 +396,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) void *end; int error; - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) return 0; error = ext4_get_inode_loc(inode, &iloc); if (error) @@ -908,7 +908,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, is->s.base = is->s.first = IFIRST(header); is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { error = ext4_xattr_check_names(IFIRST(header), is->s.end); if (error) return error; @@ -940,10 +940,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); - EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); } else { header->h_magic = cpu_to_le32(0); - EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; + ext4_clear_inode_state(inode, EXT4_STATE_XATTR); } return 0; } @@ -986,8 +986,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (strlen(name) > 255) return -ERANGE; down_write(&EXT4_I(inode)->xattr_sem); - no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); error = ext4_get_inode_loc(inode, &is.iloc); if (error) @@ -997,10 +997,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (error) goto cleanup; - if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) { struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; + ext4_clear_inode_state(inode, EXT4_STATE_NEW); } error = ext4_xattr_ibody_find(inode, &i, &is); @@ -1052,7 +1052,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); if (!value) - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1067,7 +1067,7 @@ cleanup: brelse(is.iloc.bh); brelse(bs.bh); if (no_expand == 0) - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); up_write(&EXT4_I(inode)->xattr_sem); return error; } -- cgit v1.2.2 From f710b4b96ba292dfed2153afc47e9063b0abfd89 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 25 Jan 2010 03:31:32 -0500 Subject: ext4: Reserve INCOMPAT_EA_INODE and INCOMPAT_DIRDATA feature codepoints Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ffe1334c891e..61cf3b3cde4e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -284,6 +284,7 @@ struct flex_groups { #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ @@ -1144,6 +1145,8 @@ static inline void ext4_clear_inode_state(struct inode *inode, int bit) #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 +#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ +#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ -- cgit v1.2.2 From d6b198bc8a67deee5984fb9506f000ae4fce9d75 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Sun, 17 Jan 2010 19:10:07 -0200 Subject: fix ext3/ext4 comment typo compain -> complain Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Jiri Kosina --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 735c20d5fd56..adcb713a5ac5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -302,7 +302,7 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, * write out the superblock safely. * * We'll just use the jbd2_journal_abort() error code to record an error in - * the journal instead. On recovery, the journal will compain about + * the journal instead. On recovery, the journal will complain about * that error until we've noted it down and cleared it. */ -- cgit v1.2.2 From 1537a3638cbf741d3826c1002026cce487a6bee0 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 29 Jan 2010 15:57:49 +0800 Subject: tree-wide: fix 'lenght' typo in comments and code Some misspelled occurences of 'octet' and some comments were also fixed as I was on it. Signed-off-by: Daniel Mack Cc: Jiri Kosina Cc: Joe Perches Cc: Junio C Hamano Signed-off-by: Jiri Kosina --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d34afad3e137..b794dd8141a0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -69,7 +69,7 @@ * * pa_lstart -> the logical start block for this prealloc space * pa_pstart -> the physical start block for this prealloc space - * pa_len -> lenght for this prealloc space + * pa_len -> length for this prealloc space * pa_free -> free space available in this prealloc space * * The inode preallocation space is used looking at the _logical_ start -- cgit v1.2.2 From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 3 Feb 2010 08:01:28 +0800 Subject: tree-wide: Assorted spelling fixes In particular, several occurances of funny versions of 'success', 'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address', 'beginning', 'desirable', 'separate' and 'necessary' are fixed. Signed-off-by: Daniel Mack Cc: Joe Perches Cc: Junio C Hamano Signed-off-by: Jiri Kosina --- fs/ext4/move_extent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 82c415be87a4..12a9ec73a888 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -928,7 +928,7 @@ out2: } /** - * mext_check_argumants - Check whether move extent can be done + * mext_check_arguments - Check whether move extent can be done * * @orig_inode: original inode * @donor_inode: donor inode -- cgit v1.2.2 From 12062dddda450976b129dcb1bacd91acaf4d8030 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 15 Feb 2010 14:19:27 -0500 Subject: ext4: move __func__ into a macro for ext4_warning, ext4_error Just a pet peeve of mine; we had a mishash of calls with either __func__ or "function_name" and the latter tends to get out of sync. I think it's easier to just hide the __func__ in a macro, and it'll be consistent from then on. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 29 +++++--------- fs/ext4/dir.c | 4 +- fs/ext4/ext4.h | 7 +++- fs/ext4/ext4_jbd2.c | 2 +- fs/ext4/extents.c | 9 +++-- fs/ext4/ialloc.c | 27 +++++-------- fs/ext4/inode.c | 43 +++++++++------------ fs/ext4/mballoc.c | 27 ++++++------- fs/ext4/move_extent.c | 12 +++--- fs/ext4/namei.c | 51 +++++++++++-------------- fs/ext4/resize.c | 102 +++++++++++++++++++------------------------------- fs/ext4/super.c | 11 +++--- fs/ext4/xattr.c | 32 +++++++--------- 13 files changed, 146 insertions(+), 210 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 22bc7435d913..720061a0ee53 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -97,8 +97,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks used to prevent allocation * essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __func__, - "Checksum bad for group %u", block_group); + ext4_error(sb, "Checksum bad for group %u", + block_group); ext4_free_blks_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); @@ -210,10 +210,8 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); if (block_group >= ngroups) { - ext4_error(sb, "ext4_get_group_desc", - "block_group >= groups_count - " - "block_group = %u, groups_count = %u", - block_group, ngroups); + ext4_error(sb, "block_group >= groups_count - block_group = %u," + " groups_count = %u", block_group, ngroups); return NULL; } @@ -221,8 +219,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext4_error(sb, "ext4_get_group_desc", - "Group descriptor not loaded - " + ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); return NULL; @@ -282,9 +279,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb, return 1; err_out: - ext4_error(sb, __func__, - "Invalid block bitmap - " - "block_group = %d, block = %llu", + ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", block_group, bitmap_blk); return 0; } @@ -311,8 +306,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_block_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, __func__, - "Cannot read block bitmap - " + ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -354,8 +348,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) set_bitmap_uptodate(bh); if (bh_submit_read(bh) < 0) { put_bh(bh); - ext4_error(sb, __func__, - "Cannot read block bitmap - " + ext4_error(sb, "Cannot read block bitmap - " "block_group = %u, block_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -419,8 +412,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, desc), sbi->s_itb_per_group)) { - ext4_error(sb, __func__, - "Adding blocks in system zones - " + ext4_error(sb, "Adding blocks in system zones - " "Block = %llu, count = %lu", block, count); goto error_return; @@ -453,8 +445,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, BUFFER_TRACE(bitmap_bh, "clear bit"); if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), bit + i, bitmap_bh->b_data)) { - ext4_error(sb, __func__, - "bit already cleared for block %llu", + ext4_error(sb, "bit already cleared for block %llu", (ext4_fsblk_t)(block + i)); BUFFER_TRACE(bitmap_bh, "bit already cleared"); } else { diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 9dc93168e262..0e0bef3ba91e 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -83,7 +83,7 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, error_msg = "inode out of bounds"; if (error_msg != NULL) - ext4_error(dir->i_sb, function, + __ext4_error(dir->i_sb, function, "bad entry in directory #%lu: %s - " "offset=%u, inode=%u, rec_len=%d, name_len=%d", dir->i_ino, error_msg, offset, @@ -150,7 +150,7 @@ static int ext4_readdir(struct file *filp, */ if (!bh) { if (!dir_has_error) { - ext4_error(sb, __func__, "directory #%lu " + ext4_error(sb, "directory #%lu " "contains a hole at offset %Lu", inode->i_ino, (unsigned long long) filp->f_pos); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 61cf3b3cde4e..509437ffb71b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1486,13 +1486,16 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern void ext4_error(struct super_block *, const char *, const char *, ...) +extern void __ext4_error(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) extern void __ext4_std_error(struct super_block *, const char *, int); extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning(struct super_block *, const char *, const char *, ...) +extern void __ext4_warning(struct super_block *, const char *, + const char *, ...) __attribute__ ((format (printf, 3, 4))); +#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) extern void ext4_msg(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b57e5c711b6d..2f407c487e6b 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -132,7 +132,7 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "IO error syncing inode, " "inode=%lu, block=%llu", inode->i_ino, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54616157c0f3..bd808915ad2f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -440,7 +440,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode, return 0; corrupted: - ext4_error(inode->i_sb, function, + __ext4_error(inode->i_sb, function, "bad header/extent in inode #%lu: %s - magic %x, " "entries %u, max %u(%u), depth %u(%u)", inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), @@ -1538,8 +1538,9 @@ int ext4_ext_try_to_merge(struct inode *inode, merge_done = 1; WARN_ON(eh->eh_entries == 0); if (!eh->eh_entries) - ext4_error(inode->i_sb, "ext4_ext_try_to_merge", - "inode#%lu, eh->eh_entries = 0!", inode->i_ino); + ext4_error(inode->i_sb, + "inode#%lu, eh->eh_entries = 0!", + inode->i_ino); } return merge_done; @@ -3238,7 +3239,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this is why assert can't be put in ext4_ext_find_extent() */ if (path[depth].p_ext == NULL && depth != 0) { - ext4_error(inode->i_sb, __func__, "bad extent address " + ext4_error(inode->i_sb, "bad extent address " "inode: %lu, iblock: %d, depth: %d", inode->i_ino, iblock, depth); err = -EIO; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2fab5adae1e2..e4aaf619b56d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -76,8 +76,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, /* If checksum is bad mark all blocks and inodes use to prevent * allocation, essentially implementing a per-group read-only flag. */ if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { - ext4_error(sb, __func__, "Checksum bad for group %u", - block_group); + ext4_error(sb, "Checksum bad for group %u", block_group); ext4_free_blks_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0); @@ -111,8 +110,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) bitmap_blk = ext4_inode_bitmap(sb, desc); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { - ext4_error(sb, __func__, - "Cannot read inode bitmap - " + ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -153,8 +151,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) set_bitmap_uptodate(bh); if (bh_submit_read(bh) < 0) { put_bh(bh); - ext4_error(sb, __func__, - "Cannot read inode bitmap - " + ext4_error(sb, "Cannot read inode bitmap - " "block_group = %u, inode_bitmap = %llu", block_group, bitmap_blk); return NULL; @@ -229,8 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) es = EXT4_SB(sb)->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error(sb, "ext4_free_inode", - "reserved or nonexistent inode %lu", ino); + ext4_error(sb, "reserved or nonexistent inode %lu", ino); goto error_return; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -248,8 +244,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), bit, bitmap_bh->b_data); if (!cleared) - ext4_error(sb, "ext4_free_inode", - "bit already cleared for inode %lu", ino); + ext4_error(sb, "bit already cleared for inode %lu", ino); else { gdp = ext4_get_group_desc(sb, block_group, &bh2); @@ -736,8 +731,7 @@ static int ext4_claim_inode(struct super_block *sb, if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { ext4_unlock_group(sb, group); - ext4_error(sb, __func__, - "reserved inode or inode > inodes count - " + ext4_error(sb, "reserved inode or inode > inodes count - " "block_group = %u, inode=%lu", group, ino + group * EXT4_INODES_PER_GROUP(sb)); return 1; @@ -1099,8 +1093,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { - ext4_warning(sb, __func__, - "bad orphan ino %lu! e2fsck was run?", ino); + ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino); goto error; } @@ -1108,8 +1101,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (!bitmap_bh) { - ext4_warning(sb, __func__, - "inode bitmap error for orphan %lu", ino); + ext4_warning(sb, "inode bitmap error for orphan %lu", ino); goto error; } @@ -1141,8 +1133,7 @@ iget_failed: err = PTR_ERR(inode); inode = NULL; bad_orphan: - ext4_warning(sb, __func__, - "bad orphan inode %lu! e2fsck was run?", ino); + ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, ext4_test_bit(bit, bitmap_bh->b_data)); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e530119d7f0..536067bcf75b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -194,7 +194,7 @@ void ext4_delete_inode(struct inode *inode) inode->i_size = 0; err = ext4_mark_inode_dirty(handle, inode); if (err) { - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "couldn't mark inode dirty (err %d)", err); goto stop_handle; } @@ -212,7 +212,7 @@ void ext4_delete_inode(struct inode *inode) if (err > 0) err = ext4_journal_restart(handle, 3); if (err != 0) { - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "couldn't extend journal (err %d)", err); stop_handle: ext4_journal_stop(handle); @@ -323,8 +323,7 @@ static int ext4_block_to_path(struct inode *inode, offsets[n++] = i_block & (ptrs - 1); final = ptrs; } else { - ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max in inode %lu", + ext4_warning(inode->i_sb, "block %lu > max in inode %lu", i_block + direct_blocks + indirect_blocks + double_blocks, inode->i_ino); } @@ -344,7 +343,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, if (blk && unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { - ext4_error(inode->i_sb, function, + __ext4_error(inode->i_sb, function, "invalid block reference %u " "in inode #%lu", blk, inode->i_ino); return -EIO; @@ -1125,7 +1124,7 @@ static int check_block_validity(struct inode *inode, const char *msg, sector_t logical, sector_t phys, int len) { if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { - ext4_error(inode->i_sb, msg, + __ext4_error(inode->i_sb, msg, "inode #%lu logical block %llu mapped to %llu " "(size %d)", inode->i_ino, (unsigned long long) logical, @@ -4147,7 +4146,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, count)) { - ext4_error(inode->i_sb, __func__, "inode #%lu: " + ext4_error(inode->i_sb, "inode #%lu: " "attempt to clear blocks %llu len %lu, invalid", inode->i_ino, (unsigned long long) block_to_free, count); @@ -4255,7 +4254,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "circular indirect block detected, " "inode=%lu, block=%llu", inode->i_ino, @@ -4297,7 +4296,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), nr, 1)) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "indirect mapped block in inode " "#%lu invalid (level %d, blk #%lu)", inode->i_ino, depth, @@ -4313,7 +4312,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * (should be rare). */ if (!bh) { - ext4_error(inode->i_sb, "ext4_free_branches", + ext4_error(inode->i_sb, "Read failure, inode=%lu, block=%llu", inode->i_ino, nr); continue; @@ -4628,9 +4627,8 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (!bh) { - ext4_error(sb, "ext4_get_inode_loc", "unable to read " - "inode block - inode=%lu, block=%llu", - inode->i_ino, block); + ext4_error(sb, "unable to read inode block - " + "inode=%lu, block=%llu", inode->i_ino, block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -4728,9 +4726,8 @@ make_io: submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_error(sb, __func__, - "unable to read inode block - inode=%lu, " - "block=%llu", inode->i_ino, block); + ext4_error(sb, "unable to read inode block - inode=%lu," + " block=%llu", inode->i_ino, block); brelse(bh); return -EIO; } @@ -4941,8 +4938,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = 0; if (ei->i_file_acl && !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { - ext4_error(sb, __func__, - "bad extended attribute block %llu in inode #%lu", + ext4_error(sb, "bad extended attribute block %llu inode #%lu", ei->i_file_acl, inode->i_ino); ret = -EIO; goto bad_inode; @@ -4988,8 +4984,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } else { ret = -EIO; - ext4_error(inode->i_sb, __func__, - "bogus i_mode (%o) for inode=%lu", + ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu", inode->i_mode, inode->i_ino); goto bad_inode; } @@ -5228,10 +5223,8 @@ int ext4_write_inode(struct inode *inode, int wait) if (wait) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - ext4_error(inode->i_sb, __func__, - "IO error syncing inode, " - "inode=%lu, block=%llu", - inode->i_ino, + ext4_error(inode->i_sb, "IO error syncing inode, " + "inode=%lu, block=%llu", inode->i_ino, (unsigned long long)iloc.bh->b_blocknr); err = -EIO; } @@ -5644,7 +5637,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) EXT4_STATE_NO_EXPAND); if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete" " some EAs or run e2fsck.", inode->i_ino); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d129c1039f1d..415e11f1e9ee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2709,8 +2709,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, len = ac->ac_b_ex.fe_len; if (!ext4_data_block_valid(sbi, block, len)) { - ext4_error(sb, __func__, - "Allocating blocks %llu-%llu which overlap " + ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata\n", block, block+len); /* File system mounted not to panic on error * Fix the bitmap and repeat the block allocation @@ -3623,15 +3622,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - ext4_error(sb, __func__, "Error in reading block " - "bitmap for %u", group); + ext4_error(sb, "Error reading block bitmap for %u", group); return 0; } err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, __func__, "Error in loading buddy " - "information for %u", group); + ext4_error(sb, "Error loading buddy information for %u", group); put_bh(bitmap_bh); return 0; } @@ -3804,15 +3801,15 @@ repeat: err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, __func__, "Error in loading buddy " - "information for %u", group); + ext4_error(sb, "Error loading buddy information for %u", + group); continue; } bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - ext4_error(sb, __func__, "Error in reading block " - "bitmap for %u", group); + ext4_error(sb, "Error reading block bitmap for %u", + group); ext4_mb_release_desc(&e4b); continue; } @@ -4077,8 +4074,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, __func__, "Error in loading buddy " - "information for %u", group); + ext4_error(sb, "Error loading buddy information for %u", + group); continue; } ext4_lock_group(sb, group); @@ -4478,8 +4475,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, es = EXT4_SB(sb)->s_es; if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_data_block_valid(sbi, block, count)) { - ext4_error(sb, __func__, - "Freeing blocks not in datazone - " + ext4_error(sb, "Freeing blocks not in datazone - " "block = %llu, count = %lu", block, count); goto error_return; } @@ -4548,8 +4544,7 @@ do_more: in_range(block + count - 1, ext4_inode_table(sb, gdp), EXT4_SB(sb)->s_itb_per_group)) { - ext4_error(sb, __func__, - "Freeing blocks in system zone - " + ext4_error(sb, "Freeing blocks in system zone - " "Block = %llu, count = %lu", block, count); /* err = 0. ext4_std_error should be a no op */ goto error_return; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 82c415be87a4..1654eb862d74 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -152,12 +152,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, int ret = 0; if (inode1 == NULL) { - ext4_error(inode2->i_sb, function, + __ext4_error(inode2->i_sb, function, "Both inodes should not be NULL: " "inode1 NULL inode2 %lu", inode2->i_ino); ret = -EIO; } else if (inode2 == NULL) { - ext4_error(inode1->i_sb, function, + __ext4_error(inode1->i_sb, function, "Both inodes should not be NULL: " "inode1 %lu inode2 NULL", inode1->i_ino); ret = -EIO; @@ -526,7 +526,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, * new_ext |-------| */ if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { - ext4_error(orig_inode->i_sb, __func__, + ext4_error(orig_inode->i_sb, "new_ext_end(%u) should be less than or equal to " "oext->ee_block(%u) + oext_alen(%d) - 1", new_ext_end, le32_to_cpu(oext->ee_block), @@ -689,12 +689,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, while (1) { /* The extent for donor must be found. */ if (!dext) { - ext4_error(donor_inode->i_sb, __func__, + ext4_error(donor_inode->i_sb, "The extent for donor must be found"); *err = -EIO; goto out; } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { - ext4_error(donor_inode->i_sb, __func__, + ext4_error(donor_inode->i_sb, "Donor offset(%u) and the first block of donor " "extent(%u) should be equal", donor_off, @@ -1351,7 +1351,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (ret1 < 0) break; if (*moved_len > len) { - ext4_error(orig_inode->i_sb, __func__, + ext4_error(orig_inode->i_sb, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 17a17e10dd60..bd2dc0b71c8c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -383,8 +383,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (root->info.hash_version != DX_HASH_TEA && root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_LEGACY) { - ext4_warning(dir->i_sb, __func__, - "Unrecognised inode hash code %d", + ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", root->info.hash_version); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -399,8 +398,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, hash = hinfo->hash; if (root->info.unused_flags & 1) { - ext4_warning(dir->i_sb, __func__, - "Unimplemented inode hash flags: %#06x", + ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", root->info.unused_flags); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -408,8 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, } if ((indirect = root->info.indirect_levels) > 1) { - ext4_warning(dir->i_sb, __func__, - "Unimplemented inode hash depth: %#06x", + ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", root->info.indirect_levels); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -421,8 +418,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, if (dx_get_limit(entries) != dx_root_limit(dir, root->info.info_length)) { - ext4_warning(dir->i_sb, __func__, - "dx entry: limit != root limit"); + ext4_warning(dir->i_sb, "dx entry: limit != root limit"); brelse(bh); *err = ERR_BAD_DX_DIR; goto fail; @@ -433,7 +429,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { - ext4_warning(dir->i_sb, __func__, + ext4_warning(dir->i_sb, "dx entry: no count or count > limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -478,7 +474,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { - ext4_warning(dir->i_sb, __func__, + ext4_warning(dir->i_sb, "dx entry: limit != node limit"); brelse(bh); *err = ERR_BAD_DX_DIR; @@ -494,7 +490,7 @@ fail2: } fail: if (*err == ERR_BAD_DX_DIR) - ext4_warning(dir->i_sb, __func__, + ext4_warning(dir->i_sb, "Corrupt dir inode %ld, running e2fsck is " "recommended.", dir->i_ino); return NULL; @@ -947,9 +943,8 @@ restart: wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* read error, skip block & hope for the best */ - ext4_error(sb, __func__, "reading directory #%lu " - "offset %lu", dir->i_ino, - (unsigned long)block); + ext4_error(sb, "reading directory #%lu offset %lu", + dir->i_ino, (unsigned long)block); brelse(bh); goto next; } @@ -1041,7 +1036,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q retval = ext4_htree_next_block(dir, hash, frame, frames, NULL); if (retval < 0) { - ext4_warning(sb, __func__, + ext4_warning(sb, "error reading index page in directory #%lu", dir->i_ino); *err = retval; @@ -1071,14 +1066,13 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru __u32 ino = le32_to_cpu(de->inode); brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { - ext4_error(dir->i_sb, "ext4_lookup", - "bad inode number: %u", ino); + ext4_error(dir->i_sb, "bad inode number: %u", ino); return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); if (unlikely(IS_ERR(inode))) { if (PTR_ERR(inode) == -ESTALE) { - ext4_error(dir->i_sb, __func__, + ext4_error(dir->i_sb, "deleted inode referenced: %u", ino); return ERR_PTR(-EIO); @@ -1110,7 +1104,7 @@ struct dentry *ext4_get_parent(struct dentry *child) brelse(bh); if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { - ext4_error(child->d_inode->i_sb, "ext4_get_parent", + ext4_error(child->d_inode->i_sb, "bad inode number: %u", ino); return ERR_PTR(-EIO); } @@ -1410,7 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, de = (struct ext4_dir_entry_2 *)((char *)fde + ext4_rec_len_from_disk(fde->rec_len, blocksize)); if ((char *) de >= (((char *) root) + blocksize)) { - ext4_error(dir->i_sb, __func__, + ext4_error(dir->i_sb, "invalid rec_len for '..' in inode %lu", dir->i_ino); brelse(bh); @@ -1575,8 +1569,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels && (dx_get_count(frames->entries) == dx_get_limit(frames->entries))) { - ext4_warning(sb, __func__, - "Directory index full!"); + ext4_warning(sb, "Directory index full!"); err = -ENOSPC; goto cleanup; } @@ -1916,11 +1909,11 @@ static int empty_dir(struct inode *inode) if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { if (err) - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "error %d reading directory #%lu offset 0", err, inode->i_ino); else - ext4_warning(inode->i_sb, __func__, + ext4_warning(inode->i_sb, "bad directory (dir #%lu) - no data block", inode->i_ino); return 1; @@ -1931,7 +1924,7 @@ static int empty_dir(struct inode *inode) !le32_to_cpu(de1->inode) || strcmp(".", de->name) || strcmp("..", de1->name)) { - ext4_warning(inode->i_sb, "empty_dir", + ext4_warning(inode->i_sb, "bad directory (dir #%lu) - no `.' or `..'", inode->i_ino); brelse(bh); @@ -1949,7 +1942,7 @@ static int empty_dir(struct inode *inode) offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); if (!bh) { if (err) - ext4_error(sb, __func__, + ext4_error(sb, "error %d reading directory" " #%lu offset %u", err, inode->i_ino, offset); @@ -2163,7 +2156,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) if (retval) goto end_rmdir; if (!EXT4_DIR_LINK_EMPTY(inode)) - ext4_warning(inode->i_sb, "ext4_rmdir", + ext4_warning(inode->i_sb, "empty directory has too many links (%d)", inode->i_nlink); inode->i_version++; @@ -2215,7 +2208,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) goto end_unlink; if (!inode->i_nlink) { - ext4_warning(inode->i_sb, "ext4_unlink", + ext4_warning(inode->i_sb, "Deleting nonexistent file (%lu), %d", inode->i_ino, inode->i_nlink); inode->i_nlink = 1; @@ -2462,7 +2455,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } } if (retval) { - ext4_warning(old_dir->i_sb, "ext4_rename", + ext4_warning(old_dir->i_sb, "Deleting old file (%lu), %d, error=%d", old_dir->i_ino, old_dir->i_nlink, retval); } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3b2c5541d8a6..5692c48754a0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -48,65 +48,54 @@ static int verify_group_input(struct super_block *sb, ext4_get_group_no_and_offset(sb, start, NULL, &offset); if (group != sbi->s_groups_count) - ext4_warning(sb, __func__, - "Cannot add at group %u (only %u groups)", + ext4_warning(sb, "Cannot add at group %u (only %u groups)", input->group, sbi->s_groups_count); else if (offset != 0) - ext4_warning(sb, __func__, "Last group not full"); + ext4_warning(sb, "Last group not full"); else if (input->reserved_blocks > input->blocks_count / 5) - ext4_warning(sb, __func__, "Reserved blocks too high (%u)", + ext4_warning(sb, "Reserved blocks too high (%u)", input->reserved_blocks); else if (free_blocks_count < 0) - ext4_warning(sb, __func__, "Bad blocks count %u", + ext4_warning(sb, "Bad blocks count %u", input->blocks_count); else if (!(bh = sb_bread(sb, end - 1))) - ext4_warning(sb, __func__, - "Cannot read last block (%llu)", + ext4_warning(sb, "Cannot read last block (%llu)", end - 1); else if (outside(input->block_bitmap, start, end)) - ext4_warning(sb, __func__, - "Block bitmap not in group (block %llu)", + ext4_warning(sb, "Block bitmap not in group (block %llu)", (unsigned long long)input->block_bitmap); else if (outside(input->inode_bitmap, start, end)) - ext4_warning(sb, __func__, - "Inode bitmap not in group (block %llu)", + ext4_warning(sb, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || outside(itend - 1, start, end)) - ext4_warning(sb, __func__, - "Inode table not in group (blocks %llu-%llu)", + ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); else if (input->inode_bitmap == input->block_bitmap) - ext4_warning(sb, __func__, - "Block bitmap same as inode bitmap (%llu)", + ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)", (unsigned long long)input->block_bitmap); else if (inside(input->block_bitmap, input->inode_table, itend)) - ext4_warning(sb, __func__, - "Block bitmap (%llu) in inode table (%llu-%llu)", + ext4_warning(sb, "Block bitmap (%llu) in inode table " + "(%llu-%llu)", (unsigned long long)input->block_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->inode_bitmap, input->inode_table, itend)) - ext4_warning(sb, __func__, - "Inode bitmap (%llu) in inode table (%llu-%llu)", + ext4_warning(sb, "Inode bitmap (%llu) in inode table " + "(%llu-%llu)", (unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_table, itend - 1); else if (inside(input->block_bitmap, start, metaend)) - ext4_warning(sb, __func__, - "Block bitmap (%llu) in GDT table" - " (%llu-%llu)", + ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)", (unsigned long long)input->block_bitmap, start, metaend - 1); else if (inside(input->inode_bitmap, start, metaend)) - ext4_warning(sb, __func__, - "Inode bitmap (%llu) in GDT table" - " (%llu-%llu)", + ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)", (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || inside(itend - 1, start, metaend)) - ext4_warning(sb, __func__, - "Inode table (%llu-%llu) overlaps" - "GDT table (%llu-%llu)", + ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " + "(%llu-%llu)", (unsigned long long)input->inode_table, itend - 1, start, metaend - 1); else @@ -364,8 +353,7 @@ static int verify_reserved_gdb(struct super_block *sb, while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ - ext4_warning(sb, __func__, - "reserved GDT %llu" + ext4_warning(sb, "reserved GDT %llu" " missing grp %d (%llu)", blk, grp, grp * @@ -420,8 +408,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, */ if (EXT4_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, __func__, - "won't resize using backup superblock at %llu", + ext4_warning(sb, "won't resize using backup superblock at %llu", (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); return -EPERM; } @@ -444,8 +431,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, data = (__le32 *)dind->b_data; if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { - ext4_warning(sb, __func__, - "new group %u GDT block %llu not reserved", + ext4_warning(sb, "new group %u GDT block %llu not reserved", input->group, gdblock); err = -EINVAL; goto exit_dind; @@ -468,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, GFP_NOFS); if (!n_group_desc) { err = -ENOMEM; - ext4_warning(sb, __func__, + ext4_warning(sb, "not enough memory for %lu groups", gdb_num + 1); goto exit_inode; } @@ -567,8 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, /* Get each reserved primary GDT block and verify it holds backups */ for (res = 0; res < reserved_gdb; res++, blk++) { if (le32_to_cpu(*data) != blk) { - ext4_warning(sb, __func__, - "reserved block %llu" + ext4_warning(sb, "reserved block %llu" " not at offset %ld", blk, (long)(data - (__le32 *)dind->b_data)); @@ -713,8 +698,7 @@ static void update_backups(struct super_block *sb, */ exit_err: if (err) { - ext4_warning(sb, __func__, - "can't update backup for group %u (err %d), " + ext4_warning(sb, "can't update backup for group %u (err %d), " "forcing fsck on next reboot", group, err); sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); @@ -753,20 +737,19 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ext4_warning(sb, __func__, - "Can't resize non-sparse filesystem further"); + ext4_warning(sb, "Can't resize non-sparse filesystem further"); return -EPERM; } if (ext4_blocks_count(es) + input->blocks_count < ext4_blocks_count(es)) { - ext4_warning(sb, __func__, "blocks_count overflow"); + ext4_warning(sb, "blocks_count overflow"); return -EINVAL; } if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < le32_to_cpu(es->s_inodes_count)) { - ext4_warning(sb, __func__, "inodes_count overflow"); + ext4_warning(sb, "inodes_count overflow"); return -EINVAL; } @@ -774,14 +757,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || !le16_to_cpu(es->s_reserved_gdt_blocks)) { - ext4_warning(sb, __func__, + ext4_warning(sb, "No reserved GDT blocks, can't resize"); return -EPERM; } inode = ext4_iget(sb, EXT4_RESIZE_INO); if (IS_ERR(inode)) { - ext4_warning(sb, __func__, - "Error opening resize inode"); + ext4_warning(sb, "Error opening resize inode"); return PTR_ERR(inode); } } @@ -810,8 +792,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { - ext4_warning(sb, __func__, - "multiple resizers run on filesystem!"); + ext4_warning(sb, "multiple resizers run on filesystem!"); err = -EBUSY; goto exit_journal; } @@ -997,13 +978,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, " too large to resize to %llu blocks safely\n", sb->s_id, n_blocks_count); if (sizeof(sector_t) < 8) - ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); + ext4_warning(sb, "CONFIG_LBDAF not enabled"); return -EINVAL; } if (n_blocks_count < o_blocks_count) { - ext4_warning(sb, __func__, - "can't shrink FS - resize aborted"); + ext4_warning(sb, "can't shrink FS - resize aborted"); return -EBUSY; } @@ -1011,15 +991,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); if (last == 0) { - ext4_warning(sb, __func__, - "need to use ext2online to resize further"); + ext4_warning(sb, "need to use ext2online to resize further"); return -EPERM; } add = EXT4_BLOCKS_PER_GROUP(sb) - last; if (o_blocks_count + add < o_blocks_count) { - ext4_warning(sb, __func__, "blocks_count overflow"); + ext4_warning(sb, "blocks_count overflow"); return -EINVAL; } @@ -1027,16 +1006,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, add = n_blocks_count - o_blocks_count; if (o_blocks_count + add < n_blocks_count) - ext4_warning(sb, __func__, - "will only finish group (%llu" - " blocks, %u new)", + ext4_warning(sb, "will only finish group (%llu blocks, %u new)", o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ bh = sb_bread(sb, o_blocks_count + add - 1); if (!bh) { - ext4_warning(sb, __func__, - "can't read last block, resize aborted"); + ext4_warning(sb, "can't read last block, resize aborted"); return -ENOSPC; } brelse(bh); @@ -1047,14 +1023,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, handle = ext4_journal_start_sb(sb, 3); if (IS_ERR(handle)) { err = PTR_ERR(handle); - ext4_warning(sb, __func__, "error %d on journal start", err); + ext4_warning(sb, "error %d on journal start", err); goto exit_put; } mutex_lock(&EXT4_SB(sb)->s_resize_lock); if (o_blocks_count != ext4_blocks_count(es)) { - ext4_warning(sb, __func__, - "multiple resizers run on filesystem!"); + ext4_warning(sb, "multiple resizers run on filesystem!"); mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); err = -EBUSY; @@ -1063,8 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh))) { - ext4_warning(sb, __func__, - "error %d on journal write access", err); + ext4_warning(sb, "error %d on journal write access", err); mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 68a55dffb360..1c85bb67e6eb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -333,7 +333,7 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } -void ext4_error(struct super_block *sb, const char *function, +void __ext4_error(struct super_block *sb, const char *function, const char *fmt, ...) { va_list args; @@ -450,7 +450,7 @@ void ext4_msg (struct super_block * sb, const char *prefix, va_end(args); } -void ext4_warning(struct super_block *sb, const char *function, +void __ext4_warning(struct super_block *sb, const char *function, const char *fmt, ...) { va_list args; @@ -507,7 +507,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) return; - ext4_warning(sb, __func__, + ext4_warning(sb, "updating to rev %d because of new feature flag, " "running e2fsck is recommended", EXT4_DYNAMIC_REV); @@ -3367,10 +3367,9 @@ static void ext4_clear_journal_err(struct super_block *sb, char nbuf[16]; errstr = ext4_decode_error(sb, j_errno, nbuf); - ext4_warning(sb, __func__, "Filesystem error recorded " + ext4_warning(sb, "Filesystem error recorded " "from previous mount: %s", errstr); - ext4_warning(sb, __func__, "Marking fs in need of " - "filesystem check."); + ext4_warning(sb, "Marking fs in need of filesystem check."); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c619a7ea670d..627c98abbed9 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -227,7 +227,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { -bad_block: ext4_error(inode->i_sb, __func__, +bad_block: + ext4_error(inode->i_sb, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -371,7 +372,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) ea_bdebug(bh, "b_count=%d, refcount=%d", atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "inode %lu: bad block %llu", inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; @@ -665,9 +666,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i, atomic_read(&(bs->bh->b_count)), le32_to_cpu(BHDR(bs->bh)->h_refcount)); if (ext4_xattr_check_block(bs->bh)) { - ext4_error(sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -880,9 +880,8 @@ cleanup_dquot: goto cleanup; bad_block: - ext4_error(inode->i_sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; #undef header @@ -1195,9 +1194,8 @@ retry: if (!bh) goto cleanup; if (ext4_xattr_check_block(bh)) { - ext4_error(inode->i_sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); error = -EIO; goto cleanup; } @@ -1372,16 +1370,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) goto cleanup; bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); if (!bh) { - ext4_error(inode->i_sb, __func__, - "inode %lu: block %llu read error", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: block %llu read error", + inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; } if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext4_error(inode->i_sb, __func__, - "inode %lu: bad block %llu", inode->i_ino, - EXT4_I(inode)->i_file_acl); + ext4_error(inode->i_sb, "inode %lu: bad block %llu", + inode->i_ino, EXT4_I(inode)->i_file_acl); goto cleanup; } ext4_xattr_release_block(handle, inode, bh); @@ -1506,7 +1502,7 @@ again: } bh = sb_bread(inode->i_sb, ce->e_block); if (!bh) { - ext4_error(inode->i_sb, __func__, + ext4_error(inode->i_sb, "inode %lu: block %lu read error", inode->i_ino, (unsigned long) ce->e_block); } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= -- cgit v1.2.2 From 9aaab0589baa61d637a52badddbff2d74f35a955 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 15 Feb 2010 14:26:16 -0500 Subject: ext4: add missing error checking to ext4_expand_extra_isize_ea() Signed-off-by: Roel Kluin --- fs/ext4/xattr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 627c98abbed9..efc16a4b7ceb 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1300,6 +1300,8 @@ retry: /* Remove the chosen entry from the inode */ error = ext4_xattr_ibody_set(handle, inode, &i, is); + if (error) + goto cleanup; entry = IFIRST(header); if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) -- cgit v1.2.2 From aca92ff6f57c000d1b4523e383c8bd6b8269b8b1 Mon Sep 17 00:00:00 2001 From: Leonard Michlmayr Date: Thu, 4 Mar 2010 17:07:28 -0500 Subject: ext4: correctly calculate number of blocks for fiemap ext4_fiemap() rounds the length of the requested range down to blocksize, which is is not the true number of blocks that cover the requested region. This problem is especially impressive if the user requests only the first byte of a file: not a single extent will be reported. We fix this by calculating the last block of the region and then subtract to find the number of blocks in the extents. Signed-off-by: Leonard Michlmayr Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bd808915ad2f..7d54850f7136 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3768,7 +3768,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { ext4_lblk_t start_blk; - ext4_lblk_t len_blks; int error = 0; /* fallback to generic here if not in extents fmt */ @@ -3782,8 +3781,14 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { error = ext4_xattr_fiemap(inode, fieinfo); } else { + ext4_lblk_t len_blks; + __u64 last_blk; + start_blk = start >> inode->i_sb->s_blocksize_bits; - len_blks = len >> inode->i_sb->s_blocksize_bits; + last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits; + if (last_blk >= EXT_MAX_BLOCK) + last_blk = EXT_MAX_BLOCK-1; + len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1; /* * Walk the extent tree gathering extent information. -- cgit v1.2.2 From 73b50c1c92666d326b5fa2c945d46509f2f6d91f Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Tue, 16 Feb 2010 15:06:29 -0500 Subject: ext4: Fix BUG_ON at fs/buffer.c:652 in no journal mode Calls to ext4_handle_dirty_metadata should only pass in an inode pointer for inode-specific metadata, and not for shared metadata blocks such as inode table blocks, block group descriptors, the superblock, etc. The BUG_ON can get tripped when updating a special device (such as a block device) that is opened (so that i_mapping is set in fs/block_dev.c) and the file system is mounted in no journal mode. Addresses-Google-Bug: #2404870 Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4_jbd2.c | 2 +- fs/ext4/ialloc.c | 2 +- fs/ext4/inode.c | 6 +++--- fs/ext4/namei.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 2f407c487e6b..53d2764d71ca 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -125,7 +125,7 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, ext4_journal_abort_handle(where, __func__, bh, handle, err); } else { - if (inode && bh) + if (inode) mark_buffer_dirty_inode(bh, inode); else mark_buffer_dirty(bh); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index e4aaf619b56d..004c9da9e5c6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -898,7 +898,7 @@ repeat_in_this_group: BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, - inode, + NULL, inode_bitmap_bh); if (err) goto fail; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 536067bcf75b..ecac8c5a6f5c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5120,7 +5120,7 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); sb->s_dirt = 1; ext4_handle_sync(handle); - err = ext4_handle_dirty_metadata(handle, inode, + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); } } @@ -5149,7 +5149,7 @@ static int ext4_do_update_inode(handle_t *handle, } BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - rc = ext4_handle_dirty_metadata(handle, inode, bh); + rc = ext4_handle_dirty_metadata(handle, NULL, bh); if (!err) err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); @@ -5701,7 +5701,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) err = jbd2_journal_get_write_access(handle, iloc.bh); if (!err) err = ext4_handle_dirty_metadata(handle, - inode, + NULL, iloc.bh); brelse(iloc.bh); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index bd2dc0b71c8c..a13948f8242f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2017,7 +2017,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2089,7 +2089,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = -- cgit v1.2.2 From 003cb608a2533d0927a83bc4e07e46d7a622eda9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 2 Feb 2010 14:39:01 +0900 Subject: percpu: add __percpu sparse annotations to fs Add __percpu sparse annotations to fs. These annotations are to make sparse consider percpu variables to be in a different address space and warn if accessed without going through percpu accessors. This patch doesn't affect normal builds. Signed-off-by: Tejun Heo Cc: "Theodore Ts'o" Cc: Trond Myklebust Cc: Alex Elder Cc: Christoph Hellwig Cc: Alexander Viro --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 874d169a193e..4cedc91ec59d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1014,7 +1014,7 @@ struct ext4_sb_info { atomic_t s_lock_busy; /* locality groups */ - struct ext4_locality_group *s_locality_groups; + struct ext4_locality_group __percpu *s_locality_groups; /* for write statistics */ unsigned long s_sectors_written_start; -- cgit v1.2.2 From c8d46e41bc744c8fa0092112af3942fcd46c8b18 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Wed, 24 Feb 2010 09:52:53 -0500 Subject: ext4: Add flag to files with blocks intentionally past EOF fallocate() may potentially instantiate blocks past EOF, depending on the flags used when it is called. e2fsck currently has a test for blocks past i_size, and it sometimes trips up - noticeably on xfstests 013 which runs fsstress. This patch from Jiayang does fix it up - it (along with e2fsprogs updates and other patches recently from Aneesh) has survived many fsstress runs in a row. Signed-off-by: Eric Sandeen Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 5 +++-- fs/ext4/extents.c | 22 +++++++++++++++++++++- fs/ext4/inode.c | 9 ++++++++- fs/ext4/ioctl.c | 9 +++++++++ 4 files changed, 41 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 509437ffb71b..74664ca19e22 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -285,10 +285,11 @@ struct flex_groups { #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ -#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ +#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */ +#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */ /* Flags that should be inherited by new inodes from their parent. */ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7d54850f7136..a2c21aa09e2b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3186,7 +3186,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, { struct ext4_ext_path *path = NULL; struct ext4_extent_header *eh; - struct ext4_extent newex, *ex; + struct ext4_extent newex, *ex, *last_ex; ext4_fsblk_t newblock; int err = 0, depth, ret, cache_type; unsigned int allocated = 0; @@ -3367,6 +3367,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, EXT4_STATE_DIO_UNWRITTEN); } } + + if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { + if (eh->eh_entries) { + last_ex = EXT_LAST_EXTENT(eh); + if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) + + ext4_ext_get_actual_len(last_ex)) + EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + } else { + WARN_ON(eh->eh_entries == 0); + ext4_error(inode->i_sb, __func__, + "inode#%lu, eh->eh_entries = 0!", inode->i_ino); + } + } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { /* free data blocks we just allocated */ @@ -3500,6 +3513,13 @@ static void ext4_falloc_update_inode(struct inode *inode, i_size_write(inode, new_size); if (new_size > EXT4_I(inode)->i_disksize) ext4_update_i_disksize(inode, new_size); + } else { + /* + * Mark that we allocate beyond EOF so the subsequent truncate + * can proceed even if the new size is the same as i_size. + */ + if (new_size > i_size_read(inode)) + EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL; } } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ecac8c5a6f5c..edb7edc99f71 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4456,6 +4456,8 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; + EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; + if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); @@ -5305,7 +5307,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { + attr->ia_valid & ATTR_SIZE && + (attr->ia_size < inode->i_size || + (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) { handle_t *handle; handle = ext4_journal_start(inode, 3); @@ -5336,6 +5340,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) goto err_out; } } + /* ext4_truncate will clear the flag */ + if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) + ext4_truncate(inode); } rc = inode_setattr(inode, attr); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index b63d193126db..2220feb2dcc1 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -92,6 +92,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) flags &= ~EXT4_EXTENTS_FL; } + if (flags & EXT4_EOFBLOCKS_FL) { + /* we don't support adding EOFBLOCKS flag */ + if (!(oldflags & EXT4_EOFBLOCKS_FL)) { + err = -EOPNOTSUPP; + goto flags_out; + } + } else if (oldflags & EXT4_EOFBLOCKS_FL) + ext4_truncate(inode); + handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); -- cgit v1.2.2 From 482a74258fd08d30bf2ab0f5549afab5a5c9daba Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 24 Feb 2010 11:35:32 -0500 Subject: ext4: mount flags manipulation cleanup Replace intermediate EXT4_MOUNT_XXX flags manipulation to corresponding macro. Signed-off-by: Dmitry Monakhov Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1c85bb67e6eb..7e8b1b4236d3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -796,10 +796,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) + if (test_opt(sb, USRQUOTA)) seq_puts(seq, ",usrquota"); - if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) + if (test_opt(sb, GRPQUOTA)) seq_puts(seq, ",grpquota"); #endif } @@ -1383,14 +1383,13 @@ static int parse_options(char *options, struct super_block *sb, data_opt = EXT4_MOUNT_WRITEBACK_DATA; datacheck: if (is_remount) { - if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) - != data_opt) { + if (test_opt(sb, DATA_FLAGS) != data_opt) { ext4_msg(sb, KERN_ERR, "Cannot change data mode on remount"); return 0; } } else { - sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; + clear_opt(sbi->s_mount_opt, DATA_FLAGS); sbi->s_mount_opt |= data_opt; } break; @@ -1625,18 +1624,14 @@ set_qf_format: } #ifdef CONFIG_QUOTA if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && - sbi->s_qf_names[USRQUOTA]) + if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) clear_opt(sbi->s_mount_opt, USRQUOTA); - if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && - sbi->s_qf_names[GRPQUOTA]) + if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) clear_opt(sbi->s_mount_opt, GRPQUOTA); - if ((sbi->s_qf_names[USRQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || - (sbi->s_qf_names[GRPQUOTA] && - (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { + if ((sbi->s_qf_names[USRQUOTA] && test_opt(sb, GRPQUOTA)) || + (sbi->s_qf_names[GRPQUOTA] && test_opt(sb, USRQUOTA))) { ext4_msg(sb, KERN_ERR, "old and new quota " "format mixing"); return 0; @@ -2452,11 +2447,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, POSIX_ACL); #endif if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) - sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + set_opt(sbi->s_mount_opt, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) - sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + set_opt(sbi->s_mount_opt, ORDERED_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) - sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; + set_opt(sbi->s_mount_opt, WRITEBACK_DATA); if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); @@ -2484,7 +2479,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || @@ -3520,7 +3515,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ext4_abort(sb, __func__, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - ((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); es = sbi->s_es; -- cgit v1.2.2 From 56c50f11f4d11cb14d78fe52330efb69d219c62f Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 1 Mar 2010 23:28:41 -0500 Subject: ext4: trivial quota cleanup The patch is aimed to reorganize and simplify quota code a bit. Quota code is itself complex enough, but we can make it more readable in some places: - Move quota option parsing to separate functions. - Simplify old-quota and journaled-quota mix check. Signed-off-by: Dmitry Monakhov Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 123 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 69 insertions(+), 54 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7e8b1b4236d3..79937e921988 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1206,6 +1206,64 @@ static ext4_fsblk_t get_sb_block(void **data) #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) +#ifdef CONFIG_QUOTA +static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + char *qname; + + if (sb_any_quota_loaded(sb) && + !sbi->s_qf_names[qtype]) { + ext4_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); + return 0; + } + qname = match_strdup(args); + if (!qname) { + ext4_msg(sb, KERN_ERR, + "Not enough memory for storing quotafile name"); + return 0; + } + if (sbi->s_qf_names[qtype] && + strcmp(sbi->s_qf_names[qtype], qname)) { + ext4_msg(sb, KERN_ERR, + "%s quota file already specified", QTYPE2NAME(qtype)); + kfree(qname); + return 0; + } + sbi->s_qf_names[qtype] = qname; + if (strchr(sbi->s_qf_names[qtype], '/')) { + ext4_msg(sb, KERN_ERR, + "quotafile must be on filesystem root"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + set_opt(sbi->s_mount_opt, QUOTA); + return 1; +} + +static int clear_qf_name(struct super_block *sb, int qtype) +{ + + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (sb_any_quota_loaded(sb) && + sbi->s_qf_names[qtype]) { + ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" + " when quota turned on"); + return 0; + } + /* + * The space will be released later when all options are confirmed + * to be correct + */ + sbi->s_qf_names[qtype] = NULL; + return 1; +} +#endif + static int parse_options(char *options, struct super_block *sb, unsigned long *journal_devnum, unsigned int *journal_ioprio, @@ -1217,8 +1275,7 @@ static int parse_options(char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype, qfmt; - char *qname; + int qfmt; #endif if (!options) @@ -1401,63 +1458,22 @@ static int parse_options(char *options, struct super_block *sb, break; #ifdef CONFIG_QUOTA case Opt_usrjquota: - qtype = USRQUOTA; - goto set_qf_name; - case Opt_grpjquota: - qtype = GRPQUOTA; -set_qf_name: - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { - ext4_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); + if (!set_qf_name(sb, USRQUOTA, &args[0])) return 0; - } - qname = match_strdup(&args[0]); - if (!qname) { - ext4_msg(sb, KERN_ERR, - "Not enough memory for " - "storing quotafile name"); - return 0; - } - if (sbi->s_qf_names[qtype] && - strcmp(sbi->s_qf_names[qtype], qname)) { - ext4_msg(sb, KERN_ERR, - "%s quota file already " - "specified", QTYPE2NAME(qtype)); - kfree(qname); - return 0; - } - sbi->s_qf_names[qtype] = qname; - if (strchr(sbi->s_qf_names[qtype], '/')) { - ext4_msg(sb, KERN_ERR, - "quotafile must be on " - "filesystem root"); - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; + break; + case Opt_grpjquota: + if (!set_qf_name(sb, GRPQUOTA, &args[0])) return 0; - } - set_opt(sbi->s_mount_opt, QUOTA); break; case Opt_offusrjquota: - qtype = USRQUOTA; - goto clear_qf_name; + if (!clear_qf_name(sb, USRQUOTA)) + return 0; + break; case Opt_offgrpjquota: - qtype = GRPQUOTA; -clear_qf_name: - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { - ext4_msg(sb, KERN_ERR, "Cannot change " - "journaled quota options when " - "quota turned on"); + if (!clear_qf_name(sb, GRPQUOTA)) return 0; - } - /* - * The space will be released later when all options - * are confirmed to be correct - */ - sbi->s_qf_names[qtype] = NULL; break; + case Opt_jqfmt_vfsold: qfmt = QFMT_VFS_OLD; goto set_qf_format; @@ -1630,8 +1646,7 @@ set_qf_format: if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) clear_opt(sbi->s_mount_opt, GRPQUOTA); - if ((sbi->s_qf_names[USRQUOTA] && test_opt(sb, GRPQUOTA)) || - (sbi->s_qf_names[GRPQUOTA] && test_opt(sb, USRQUOTA))) { + if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { ext4_msg(sb, KERN_ERR, "old and new quota " "format mixing"); return 0; -- cgit v1.2.2 From cc483f102c3f703e853c96f95a654f0106fb2603 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 1 Mar 2010 19:06:35 -0500 Subject: ext4: Fix fencepost error in chosing choosing group vs file preallocation. The ext4 multiblock allocator decides whether to use group or file preallocation based on the file size. When the file size reaches s_mb_stream_request (default is 16 blocks), it changes to use a file-specific preallocation. This is cool, but it has a tiny problem. See a simple script: mkfs.ext4 -b 1024 /dev/sda8 1000000 mount -t ext4 -o nodelalloc /dev/sda8 /mnt/ext4 for((i=0;i<5;i++)) do cat /mnt/4096>>/mnt/ext4/a #4096 is a file with 4096 characters. cat /mnt/4096>>/mnt/ext4/b done debuge4fs -R 'stat a' /dev/sda8|grep BLOCKS -A 1 And you get BLOCKS: (0-14):8705-8719, (15):2356, (16-19):8465-8468 So there are 3 extents, a bit strange for the lonely 15th logical block. As we write to the 16 blocks, we choose file preallocation in ext4_mb_group_or_file, but in ext4_mb_normalize_request, we meet with the 16*1024 range, so no preallocation will be carried. file b then reserves the space after '2356', so when when write 16, we start from another part. This patch just change the check in ext4_mb_group_or_file, so that for the lonely 15 we will still use group preallocation. After the patch, we will get: debuge4fs -R 'stat a' /dev/sda8|grep BLOCKS -A 1 BLOCKS: (0-15):8705-8720, (16-19):8465-8468 Looks more sane. Thanks. Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 415e11f1e9ee..72d5ceb18523 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3935,7 +3935,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) /* don't use group allocation for large files */ size = max(size, isize); - if (size >= sbi->s_mb_stream_request) { + if (size > sbi->s_mb_stream_request) { ac->ac_flags |= EXT4_MB_STREAM_ALLOC; return; } -- cgit v1.2.2 From 437ca0fda3b442dff9e591581b5e1ffdfec24660 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 1 Mar 2010 22:29:21 -0500 Subject: ext4: deprecate obsoleted mount options Declare following list of mount options as deprecated: - bsddf, miniddf - grpid, bsdgroups, nogrpid, sysvgroups Declare following list of default mount options as deprecated: - bsdgroups Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 79937e921988..c832508d5515 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1205,6 +1205,8 @@ static ext4_fsblk_t get_sb_block(void **data) } #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) +static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" + "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; #ifdef CONFIG_QUOTA static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) @@ -1294,16 +1296,23 @@ static int parse_options(char *options, struct super_block *sb, token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); clear_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_minix_df: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); set_opt(sbi->s_mount_opt, MINIX_DF); + break; case Opt_grpid: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); set_opt(sbi->s_mount_opt, GRPID); + break; case Opt_nogrpid: + ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); clear_opt(sbi->s_mount_opt, GRPID); + break; case Opt_resuid: if (match_int(&args[0], &option)) @@ -2449,8 +2458,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) def_mount_opts = le32_to_cpu(es->s_default_mount_opts); if (def_mount_opts & EXT4_DEFM_DEBUG) set_opt(sbi->s_mount_opt, DEBUG); - if (def_mount_opts & EXT4_DEFM_BSDGROUPS) + if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { + ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", + "2.6.38"); set_opt(sbi->s_mount_opt, GRPID); + } if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); #ifdef CONFIG_EXT4_FS_XATTR -- cgit v1.2.2 From f39490bcd1691d65dc33689222a12e1fc13dd824 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 1 Mar 2010 23:14:36 -0500 Subject: ext4: fix error handling in migrate Set i_nlink to zero for temporary inode from very beginning. otherwise we may fail to start new journal handle and this inode will be unreferenced but with i_nlink == 1 Since we hold inode reference it can not be pruned. Also add missed journal_start retval check. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/migrate.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 46a4101e0aec..8b87bd0eac95 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -503,14 +503,10 @@ int ext4_ext_migrate(struct inode *inode) } i_size_write(tmp_inode, i_size_read(inode)); /* - * We don't want the inode to be reclaimed - * if we got interrupted in between. We have - * this tmp inode carrying reference to the - * data blocks of the original file. We set - * the i_nlink to zero at the last stage after - * switching the original file to extent format + * Set the i_nlink to zero so it will be deleted later + * when we drop inode reference. */ - tmp_inode->i_nlink = 1; + tmp_inode->i_nlink = 0; ext4_ext_tree_init(handle, tmp_inode); ext4_orphan_add(handle, tmp_inode); @@ -537,6 +533,16 @@ int ext4_ext_migrate(struct inode *inode) up_read((&EXT4_I(inode)->i_data_sem)); handle = ext4_journal_start(inode, 1); + if (IS_ERR(handle)) { + /* + * It is impossible to update on-disk structures without + * a handle, so just rollback in-core changes and live other + * work to orphan_list_cleanup() + */ + ext4_orphan_del(NULL, tmp_inode); + retval = PTR_ERR(handle); + goto out; + } ei = EXT4_I(inode); i_data = ei->i_data; @@ -618,15 +624,8 @@ err_out: /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); - - /* - * Set the i_nlink to zero so that - * generic_drop_inode really deletes the - * inode - */ - tmp_inode->i_nlink = 0; - ext4_journal_stop(handle); +out: unlock_new_inode(tmp_inode); iput(tmp_inode); -- cgit v1.2.2 From da1dafca84413145f5ac59998b4cdd06fb89f721 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 1 Mar 2010 23:15:02 -0500 Subject: ext4: explicitly remove inode from orphan list after failed direct io Otherwise non-empty orphan list will be triggered on umount. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index edb7edc99f71..427f4690ad6d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3438,6 +3438,9 @@ retry: * but cannot extend i_size. Bail out and pretend * the write failed... */ ret = PTR_ERR(handle); + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + goto out; } if (inode->i_nlink) -- cgit v1.2.2 From 6e3617e579e070d3655a93ee9ed7149113e795e0 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Mon, 1 Mar 2010 23:29:39 -0500 Subject: ext4: Handle non empty on-disk orphan link In case of truncate errors we explicitly remove inode from in-core orphan list via orphan_del(NULL, inode) without modifying the on-disk list. But later on, the same inode may be inserted in the orphan list again which will result the on-disk linked list getting corrupted. If inode i_dtime contains valid value, then skip on-disk list modification. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a13948f8242f..608d21f873ec 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2013,6 +2013,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto out_unlock; + /* + * Due to previous errors inode may be already a part of on-disk + * orphan list. If so skip on-disk list modification. + */ + if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= + (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) + goto mem_insert; /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); @@ -2030,6 +2037,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * * This is safe: on error we're going to ignore the orphan list * anyway on the next recovery. */ +mem_insert: if (!err) list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); -- cgit v1.2.2 From b8b8afe236e97b6359d46d3a3f8c46455e192271 Mon Sep 17 00:00:00 2001 From: Toshiyuki Okajima Date: Tue, 2 Mar 2010 00:21:35 -0500 Subject: ext4: make "offset" consistent in ext4_check_dir_entry() The callers of ext4_check_dir_entry() usually pass in the "file offset" (ext4_readdir, htree_dirblock_to_tree, search_dirblock, ext4_dx_find_entry, empty_dir), but a few callers (add_dirent_to_buf, ext4_delete_entry) only pass in the buffer offset. To accomodate those last two (which would be hard to fix otherwise), this patch changes ext4_check_dir_entry() to print the physical block number and the relative offset as well as the passed-in offset. Signed-off-by: Toshiyuki Okajima Signed-off-by: "Theodore Ts'o" --- fs/ext4/dir.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 0e0bef3ba91e..29857ddd9e26 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -84,9 +84,11 @@ int ext4_check_dir_entry(const char *function, struct inode *dir, if (error_msg != NULL) __ext4_error(dir->i_sb, function, - "bad entry in directory #%lu: %s - " - "offset=%u, inode=%u, rec_len=%d, name_len=%d", - dir->i_ino, error_msg, offset, + "bad entry in directory #%lu: %s - block=%llu" + "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", + dir->i_ino, error_msg, + (unsigned long long) bh->b_blocknr, + (unsigned) (offset%bh->b_size), offset, le32_to_cpu(de->inode), rlen, de->name_len); return error_msg == NULL ? 1 : 0; -- cgit v1.2.2 From c7064ef13b2181a489836349f9baf87df0dab28f Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Tue, 2 Mar 2010 13:28:44 -0500 Subject: ext4: mechanical rename some of the direct I/O get_block's identifiers This commit renames some of the direct I/O's block allocation flags, variables, and functions introduced in Mingming's "Direct IO for holes and fallocate" patches so that they can be used by ext4's buffered write path as well. Also changed the related function comments accordingly to cover both direct write and buffered write cases. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 18 ++++++------ fs/ext4/extents.c | 24 ++++++++-------- fs/ext4/fsync.c | 2 +- fs/ext4/inode.c | 84 +++++++++++++++++++++++++------------------------------ fs/ext4/super.c | 2 +- 5 files changed, 61 insertions(+), 69 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 74664ca19e22..c831a580bd76 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -133,7 +133,7 @@ struct mpage_da_data { int pages_written; int retval; }; -#define DIO_AIO_UNWRITTEN 0x1 +#define EXT4_IO_UNWRITTEN 0x1 typedef struct ext4_io_end { struct list_head list; /* per-file finished AIO list */ struct inode *inode; /* file being written to */ @@ -355,13 +355,13 @@ struct ext4_new_group_data { /* caller is from the direct IO path, request to creation of an unitialized extents if not allocated, split the uninitialized extent if blocks has been preallocated already*/ -#define EXT4_GET_BLOCKS_DIO 0x0008 +#define EXT4_GET_BLOCKS_PRE_IO 0x0008 #define EXT4_GET_BLOCKS_CONVERT 0x0010 -#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ +#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) - /* Convert extent to initialized after direct IO complete */ -#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_DIO_CREATE_EXT) + /* Convert extent to initialized after IO complete */ +#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ + EXT4_GET_BLOCKS_IO_CREATE_EXT) /* * Flags used by ext4_free_blocks @@ -700,8 +700,8 @@ struct ext4_inode_info { qsize_t i_reserved_quota; #endif - /* completed async DIOs that might need unwritten extents handling */ - struct list_head i_aio_dio_complete_list; + /* completed IOs that might need unwritten extents handling */ + struct list_head i_completed_io_list; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; @@ -1461,7 +1461,7 @@ extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); -extern int flush_aio_dio_completed_IO(struct inode *inode); +extern int flush_completed_IO(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, int used, int quota_claim); /* ioctl.c */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a2c21aa09e2b..90ba8d9df697 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1619,7 +1619,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, BUG_ON(path[depth].p_hdr == NULL); /* try to insert block into found extent and return */ - if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) + if (ex && (flag != EXT4_GET_BLOCKS_PRE_IO) && ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", ext4_ext_is_uninitialized(newext), @@ -1740,7 +1740,7 @@ has_space: merge: /* try to merge extents to the right */ - if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) + if (flag != EXT4_GET_BLOCKS_PRE_IO) ext4_ext_try_to_merge(inode, path, nearex); /* try to merge extents to the left */ @@ -2984,7 +2984,7 @@ fix_extent_len: ext4_ext_dirty(handle, inode, path + depth); return err; } -static int ext4_convert_unwritten_extents_dio(handle_t *handle, +static int ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { @@ -3064,8 +3064,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, flags, allocated); ext4_ext_show_leaf(inode, path); - /* DIO get_block() before submit the IO, split the extent */ - if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { + /* get_block() before submit the IO, split the extent */ + if (flags == EXT4_GET_BLOCKS_PRE_IO) { ret = ext4_split_unwritten_extents(handle, inode, path, iblock, max_blocks, flags); @@ -3075,14 +3075,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * completed */ if (io) - io->flag = DIO_AIO_UNWRITTEN; + io->flag = EXT4_IO_UNWRITTEN; else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); goto out; } - /* async DIO end_io complete, convert the filled extent to written */ - if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { - ret = ext4_convert_unwritten_extents_dio(handle, inode, + /* IO end_io complete, convert the filled extent to written */ + if (flags == EXT4_GET_BLOCKS_CONVERT) { + ret = ext4_convert_unwritten_extents_endio(handle, inode, path); if (ret >= 0) ext4_update_inode_fsync_trans(handle, inode, 1); @@ -3359,9 +3359,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * For non asycn direct IO case, flag the inode state * that we need to perform convertion when IO is done. */ - if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { + if (flags == EXT4_GET_BLOCKS_PRE_IO) { if (io) - io->flag = DIO_AIO_UNWRITTEN; + io->flag = EXT4_IO_UNWRITTEN; else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); @@ -3656,7 +3656,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, map_bh.b_state = 0; ret = ext4_get_blocks(handle, inode, block, max_blocks, &map_bh, - EXT4_GET_BLOCKS_DIO_CONVERT_EXT); + EXT4_GET_BLOCKS_IO_CONVERT_EXT); if (ret <= 0) { WARN_ON(ret <= 0); printk(KERN_ERR "%s: ext4_ext_get_blocks " diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 98bd140aad01..0d0c3239c1cd 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -63,7 +63,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) if (inode->i_sb->s_flags & MS_RDONLY) return 0; - ret = flush_aio_dio_completed_IO(inode); + ret = flush_completed_IO(inode); if (ret < 0) return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 427f4690ad6d..28f116bdc405 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3468,7 +3468,7 @@ out: return ret; } -static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, +static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = NULL; @@ -3476,28 +3476,14 @@ static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; int dio_credits; - ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", + ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", inode->i_ino, create); /* - * DIO VFS code passes create = 0 flag for write to - * the middle of file. It does this to avoid block - * allocation for holes, to prevent expose stale data - * out when there is parallel buffered read (which does - * not hold the i_mutex lock) while direct IO write has - * not completed. DIO request on holes finally falls back - * to buffered IO for this reason. - * - * For ext4 extent based file, since we support fallocate, - * new allocated extent as uninitialized, for holes, we - * could fallocate blocks for holes, thus parallel - * buffered IO read will zero out the page when read on - * a hole while parallel DIO write to the hole has not completed. - * - * when we come here, we know it's a direct IO write to - * to the middle of file ( DIO_MAX_BLOCKS) max_blocks = DIO_MAX_BLOCKS; @@ -3524,19 +3510,20 @@ static void ext4_free_io_end(ext4_io_end_t *io) iput(io->inode); kfree(io); } -static void dump_aio_dio_list(struct inode * inode) + +static void dump_completed_IO(struct inode * inode) { #ifdef EXT4_DEBUG struct list_head *cur, *before, *after; ext4_io_end_t *io, *io0, *io1; - if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ - ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); + if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ + ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); return; } - ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); - list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ + ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); + list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ cur = &io->list; before = cur->prev; io0 = container_of(before, ext4_io_end_t, list); @@ -3552,21 +3539,21 @@ static void dump_aio_dio_list(struct inode * inode) /* * check a range of space and convert unwritten extents to written. */ -static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) +static int ext4_end_io_nolock(ext4_io_end_t *io) { struct inode *inode = io->inode; loff_t offset = io->offset; ssize_t size = io->size; int ret = 0; - ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," + ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", io, inode->i_ino, io->list.next, io->list.prev); if (list_empty(&io->list)) return ret; - if (io->flag != DIO_AIO_UNWRITTEN) + if (io->flag != EXT4_IO_UNWRITTEN) return ret; if (offset + size <= i_size_read(inode)) @@ -3584,17 +3571,18 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) io->flag = 0; return ret; } + /* * work on completed aio dio IO, to convert unwritten extents to extents */ -static void ext4_end_aio_dio_work(struct work_struct *work) +static void ext4_end_io_work(struct work_struct *work) { ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); struct inode *inode = io->inode; int ret = 0; mutex_lock(&inode->i_mutex); - ret = ext4_end_aio_dio_nolock(io); + ret = ext4_end_io_nolock(io); if (ret >= 0) { if (!list_empty(&io->list)) list_del_init(&io->list); @@ -3602,32 +3590,35 @@ static void ext4_end_aio_dio_work(struct work_struct *work) } mutex_unlock(&inode->i_mutex); } + /* * This function is called from ext4_sync_file(). * - * When AIO DIO IO is completed, the work to convert unwritten - * extents to written is queued on workqueue but may not get immediately + * When IO is completed, the work to convert unwritten extents to + * written is queued on workqueue but may not get immediately * scheduled. When fsync is called, we need to ensure the * conversion is complete before fsync returns. - * The inode keeps track of a list of completed AIO from DIO path - * that might needs to do the conversion. This function walks through - * the list and convert the related unwritten extents to written. + * The inode keeps track of a list of pending/completed IO that + * might needs to do the conversion. This function walks through + * the list and convert the related unwritten extents for completed IO + * to written. + * The function return the number of pending IOs on success. */ -int flush_aio_dio_completed_IO(struct inode *inode) +int flush_completed_IO(struct inode *inode) { ext4_io_end_t *io; int ret = 0; int ret2 = 0; - if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) + if (list_empty(&EXT4_I(inode)->i_completed_io_list)) return ret; - dump_aio_dio_list(inode); - while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ - io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, + dump_completed_IO(inode); + while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){ + io = list_entry(EXT4_I(inode)->i_completed_io_list.next, ext4_io_end_t, list); /* - * Calling ext4_end_aio_dio_nolock() to convert completed + * Calling ext4_end_io_nolock() to convert completed * IO to written. * * When ext4_sync_file() is called, run_queue() may already @@ -3640,7 +3631,7 @@ int flush_aio_dio_completed_IO(struct inode *inode) * avoid double converting from both fsync and background work * queue work. */ - ret = ext4_end_aio_dio_nolock(io); + ret = ext4_end_io_nolock(io); if (ret < 0) ret2 = ret; else @@ -3662,7 +3653,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) io->offset = 0; io->size = 0; io->error = 0; - INIT_WORK(&io->work, ext4_end_aio_dio_work); + INIT_WORK(&io->work, ext4_end_io_work); INIT_LIST_HEAD(&io->list); } @@ -3685,7 +3676,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, size); /* if not aio dio with unwritten extents, just free io and return */ - if (io_end->flag != DIO_AIO_UNWRITTEN){ + if (io_end->flag != EXT4_IO_UNWRITTEN){ ext4_free_io_end(io_end); iocb->private = NULL; return; @@ -3700,9 +3691,10 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, /* Add the io_end to per-inode completed aio dio list*/ list_add_tail(&io_end->list, - &EXT4_I(io_end->inode)->i_aio_dio_complete_list); + &EXT4_I(io_end->inode)->i_completed_io_list); iocb->private = NULL; } + /* * For ext4 extent files, ext4 will do direct-io write to holes, * preallocated extents, and those write extend the file, no need to @@ -3772,7 +3764,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, - ext4_get_block_dio_write, + ext4_get_block_write, ext4_end_io_dio); if (iocb->private) EXT4_I(inode)->cur_aio_dio = NULL; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c832508d5515..dc7a97e79e3b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -708,7 +708,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; #endif - INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); + INIT_LIST_HEAD(&ei->i_completed_io_list); ei->cur_aio_dio = NULL; ei->i_sync_tid = 0; ei->i_datasync_tid = 0; -- cgit v1.2.2 From 744692dc059845b2a3022119871846e74d4f6e11 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Thu, 4 Mar 2010 16:14:02 -0500 Subject: ext4: use ext4_get_block_write in buffer write Allocate uninitialized extent before ext4 buffer write and convert the extent to initialized after io completes. The purpose is to make sure an extent can only be marked initialized after it has been written with new data so we can safely drop the i_mutex lock in ext4 DIO read without exposing stale data. This helps to improve multi-thread DIO read performance on high-speed disks. Skip the nobh and data=journal mount cases to make things simple for now. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 +++- fs/ext4/ext4_jbd2.h | 24 ++++++ fs/ext4/extents.c | 22 +++--- fs/ext4/inode.c | 213 ++++++++++++++++++++++++++++++++++++++++++---------- fs/ext4/super.c | 37 ++++++++- 5 files changed, 256 insertions(+), 55 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c831a580bd76..dee45800dc95 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -138,7 +138,7 @@ typedef struct ext4_io_end { struct list_head list; /* per-file finished AIO list */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ - int error; /* I/O error code */ + struct page *page; /* page struct for buffer write */ loff_t offset; /* offset in the file */ ssize_t size; /* size of the extent */ struct work_struct work; /* data work queue */ @@ -361,7 +361,7 @@ struct ext4_new_group_data { EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* Convert extent to initialized after IO complete */ #define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ - EXT4_GET_BLOCKS_IO_CREATE_EXT) + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) /* * Flags used by ext4_free_blocks @@ -702,6 +702,7 @@ struct ext4_inode_info { /* completed IOs that might need unwritten extents handling */ struct list_head i_completed_io_list; + spinlock_t i_completed_io_lock; /* current io_end structure for async DIO write*/ ext4_io_end_t *cur_aio_dio; @@ -752,6 +753,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ +#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ @@ -1781,6 +1783,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 len, __u64 *moved_len); +/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ +enum ext4_state_bits { + BH_Uninit /* blocks are allocated but uninitialized on disk */ + = BH_JBDPrivateStart, +}; + +BUFFER_FNS(Uninit, uninit) +TAS_BUFFER_FNS(Uninit, uninit) + /* * Add new method to test wether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 05eca817d704..b79ad5126468 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -304,4 +304,28 @@ static inline int ext4_should_writeback_data(struct inode *inode) return 0; } +/* + * This function controls whether or not we should try to go down the + * dioread_nolock code paths, which makes it safe to avoid taking + * i_mutex for direct I/O reads. This only works for extent-based + * files, and it doesn't work for nobh or if data journaling is + * enabled, since the dioread_nolock code uses b_private to pass + * information back to the I/O completion handler, and this conflicts + * with the jbd's use of b_private. + */ +static inline int ext4_should_dioread_nolock(struct inode *inode) +{ + if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) + return 0; + if (test_opt(inode->i_sb, NOBH)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return 0; + if (ext4_should_journal_data(inode)) + return 0; + return 1; +} + #endif /* _EXT4_JBD2_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 90ba8d9df697..c7f166ab50eb 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1619,7 +1619,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, BUG_ON(path[depth].p_hdr == NULL); /* try to insert block into found extent and return */ - if (ex && (flag != EXT4_GET_BLOCKS_PRE_IO) + if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) && ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", ext4_ext_is_uninitialized(newext), @@ -1740,7 +1740,7 @@ has_space: merge: /* try to merge extents to the right */ - if (flag != EXT4_GET_BLOCKS_PRE_IO) + if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) ext4_ext_try_to_merge(inode, path, nearex); /* try to merge extents to the left */ @@ -3065,7 +3065,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_ext_show_leaf(inode, path); /* get_block() before submit the IO, split the extent */ - if (flags == EXT4_GET_BLOCKS_PRE_IO) { + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { ret = ext4_split_unwritten_extents(handle, inode, path, iblock, max_blocks, flags); @@ -3078,10 +3078,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, io->flag = EXT4_IO_UNWRITTEN; else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); + if (ext4_should_dioread_nolock(inode)) + set_buffer_uninit(bh_result); goto out; } /* IO end_io complete, convert the filled extent to written */ - if (flags == EXT4_GET_BLOCKS_CONVERT) { + if ((flags & EXT4_GET_BLOCKS_CONVERT)) { ret = ext4_convert_unwritten_extents_endio(handle, inode, path); if (ret >= 0) @@ -3351,21 +3353,21 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ ext4_ext_mark_uninitialized(&newex); /* - * io_end structure was created for every async - * direct IO write to the middle of the file. - * To avoid unecessary convertion for every aio dio rewrite - * to the mid of file, here we flag the IO that is really - * need the convertion. + * io_end structure was created for every IO write to an + * uninitialized extent. To avoid unecessary conversion, + * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state * that we need to perform convertion when IO is done. */ - if (flags == EXT4_GET_BLOCKS_PRE_IO) { + if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if (io) io->flag = EXT4_IO_UNWRITTEN; else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } + if (ext4_should_dioread_nolock(inode)) + set_buffer_uninit(bh_result); } if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 28f116bdc405..d291310aef6b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" @@ -1534,6 +1535,8 @@ static void ext4_truncate_failed_write(struct inode *inode) ext4_truncate(inode); } +static int ext4_get_block_write(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1575,8 +1578,12 @@ retry: } *pagep = page; - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_get_block); + if (ext4_should_dioread_nolock(inode)) + ret = block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, ext4_get_block_write); + else + ret = block_write_begin(file, mapping, pos, len, flags, pagep, + fsdata, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -2092,6 +2099,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); + if (buffer_uninit(exbh)) + set_buffer_uninit(bh); cur_logical++; pblock++; } while ((bh = bh->b_this_page) != head); @@ -2221,6 +2230,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) */ new.b_state = 0; get_blocks_flags = EXT4_GET_BLOCKS_CREATE; + if (ext4_should_dioread_nolock(mpd->inode)) + get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; if (mpd->b_state & (1 << BH_Delay)) get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; @@ -2636,6 +2647,9 @@ out: return ret; } +static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); +static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); + /* * Note that we don't need to start a transaction unless we're journaling data * because we should have holes filled from ext4_page_mkwrite(). We even don't @@ -2683,7 +2697,7 @@ static int ext4_writepage(struct page *page, int ret = 0; loff_t size; unsigned int len; - struct buffer_head *page_bufs; + struct buffer_head *page_bufs = NULL; struct inode *inode = page->mapping->host; trace_ext4_writepage(inode, page); @@ -2759,7 +2773,11 @@ static int ext4_writepage(struct page *page, if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) ret = nobh_writepage(page, noalloc_get_block_write, wbc); - else + else if (page_bufs && buffer_uninit(page_bufs)) { + ext4_set_bh_endio(page_bufs, inode); + ret = block_write_full_page_endio(page, noalloc_get_block_write, + wbc, ext4_end_io_buffer_write); + } else ret = block_write_full_page(page, noalloc_get_block_write, wbc); @@ -3347,10 +3365,44 @@ ext4_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); } +static void ext4_free_io_end(ext4_io_end_t *io) +{ + BUG_ON(!io); + if (io->page) + put_page(io->page); + iput(io->inode); + kfree(io); +} + +static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset) +{ + struct buffer_head *head, *bh; + unsigned int curr_off = 0; + + if (!page_has_buffers(page)) + return; + head = bh = page_buffers(page); + do { + if (offset <= curr_off && test_clear_buffer_uninit(bh) + && bh->b_private) { + ext4_free_io_end(bh->b_private); + bh->b_private = NULL; + bh->b_end_io = NULL; + } + curr_off = curr_off + bh->b_size; + bh = bh->b_this_page; + } while (bh != head); +} + static void ext4_invalidatepage(struct page *page, unsigned long offset) { journal_t *journal = EXT4_JOURNAL(page->mapping->host); + /* + * free any io_end structure allocated for buffers to be discarded + */ + if (ext4_should_dioread_nolock(page->mapping->host)) + ext4_invalidatepage_free_endio(page, offset); /* * If it's a full truncate we just forget about the pending dirtying */ @@ -3471,10 +3523,11 @@ out: static int ext4_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { - handle_t *handle = NULL; + handle_t *handle = ext4_journal_current_handle(); int ret = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; int dio_credits; + int started = 0; ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", inode->i_ino, create); @@ -3485,37 +3538,36 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, */ create = EXT4_GET_BLOCKS_IO_CREATE_EXT; - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); - handle = ext4_journal_start(inode, dio_credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; + if (!handle) { + if (max_blocks > DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); + handle = ext4_journal_start(inode, dio_credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + started = 1; } + ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, create); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; } - ext4_journal_stop(handle); + if (started) + ext4_journal_stop(handle); out: return ret; } -static void ext4_free_io_end(ext4_io_end_t *io) -{ - BUG_ON(!io); - iput(io->inode); - kfree(io); -} - static void dump_completed_IO(struct inode * inode) { #ifdef EXT4_DEBUG struct list_head *cur, *before, *after; ext4_io_end_t *io, *io0, *io1; + unsigned long flags; if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); @@ -3523,6 +3575,7 @@ static void dump_completed_IO(struct inode * inode) } ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); + spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ cur = &io->list; before = cur->prev; @@ -3533,6 +3586,7 @@ static void dump_completed_IO(struct inode * inode) ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", io, inode->i_ino, io0, io1); } + spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); #endif } @@ -3556,9 +3610,7 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) if (io->flag != EXT4_IO_UNWRITTEN) return ret; - if (offset + size <= i_size_read(inode)) - ret = ext4_convert_unwritten_extents(inode, offset, size); - + ret = ext4_convert_unwritten_extents(inode, offset, size); if (ret < 0) { printk(KERN_EMERG "%s: failed to convert unwritten" "extents to written extents, error is %d" @@ -3577,18 +3629,25 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) */ static void ext4_end_io_work(struct work_struct *work) { - ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); - struct inode *inode = io->inode; - int ret = 0; + ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); + struct inode *inode = io->inode; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long flags; + int ret; mutex_lock(&inode->i_mutex); ret = ext4_end_io_nolock(io); - if (ret >= 0) { - if (!list_empty(&io->list)) - list_del_init(&io->list); - ext4_free_io_end(io); + if (ret < 0) { + mutex_unlock(&inode->i_mutex); + return; } + + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + if (!list_empty(&io->list)) + list_del_init(&io->list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); mutex_unlock(&inode->i_mutex); + ext4_free_io_end(io); } /* @@ -3607,15 +3666,18 @@ static void ext4_end_io_work(struct work_struct *work) int flush_completed_IO(struct inode *inode) { ext4_io_end_t *io; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long flags; int ret = 0; int ret2 = 0; - if (list_empty(&EXT4_I(inode)->i_completed_io_list)) + if (list_empty(&ei->i_completed_io_list)) return ret; dump_completed_IO(inode); - while (!list_empty(&EXT4_I(inode)->i_completed_io_list)){ - io = list_entry(EXT4_I(inode)->i_completed_io_list.next, + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + while (!list_empty(&ei->i_completed_io_list)){ + io = list_entry(ei->i_completed_io_list.next, ext4_io_end_t, list); /* * Calling ext4_end_io_nolock() to convert completed @@ -3631,20 +3693,23 @@ int flush_completed_IO(struct inode *inode) * avoid double converting from both fsync and background work * queue work. */ + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); ret = ext4_end_io_nolock(io); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); if (ret < 0) ret2 = ret; else list_del_init(&io->list); } + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); return (ret2 < 0) ? ret2 : 0; } -static ext4_io_end_t *ext4_init_io_end (struct inode *inode) +static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) { ext4_io_end_t *io = NULL; - io = kmalloc(sizeof(*io), GFP_NOFS); + io = kmalloc(sizeof(*io), flags); if (io) { igrab(inode); @@ -3652,7 +3717,7 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode) io->flag = 0; io->offset = 0; io->size = 0; - io->error = 0; + io->page = NULL; INIT_WORK(&io->work, ext4_end_io_work); INIT_LIST_HEAD(&io->list); } @@ -3665,6 +3730,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, { ext4_io_end_t *io_end = iocb->private; struct workqueue_struct *wq; + unsigned long flags; + struct ext4_inode_info *ei; /* if not async direct IO or dio with 0 bytes write, just return */ if (!io_end || !size) @@ -3684,17 +3751,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, io_end->offset = offset; io_end->size = size; + io_end->flag = EXT4_IO_UNWRITTEN; wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; /* queue the work to convert unwritten extents to written */ queue_work(wq, &io_end->work); /* Add the io_end to per-inode completed aio dio list*/ - list_add_tail(&io_end->list, - &EXT4_I(io_end->inode)->i_completed_io_list); + ei = EXT4_I(io_end->inode); + spin_lock_irqsave(&ei->i_completed_io_lock, flags); + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); iocb->private = NULL; } +static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) +{ + ext4_io_end_t *io_end = bh->b_private; + struct workqueue_struct *wq; + struct inode *inode; + unsigned long flags; + + if (!test_clear_buffer_uninit(bh) || !io_end) + goto out; + + if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) { + printk("sb umounted, discard end_io request for inode %lu\n", + io_end->inode->i_ino); + ext4_free_io_end(io_end); + goto out; + } + + io_end->flag = EXT4_IO_UNWRITTEN; + inode = io_end->inode; + + /* Add the io_end to per-inode completed io list*/ + spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); + list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); + spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); + + wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq; + /* queue the work to convert unwritten extents to written */ + queue_work(wq, &io_end->work); +out: + bh->b_private = NULL; + bh->b_end_io = NULL; + clear_buffer_uninit(bh); + end_buffer_async_write(bh, uptodate); +} + +static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode) +{ + ext4_io_end_t *io_end; + struct page *page = bh->b_page; + loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT; + size_t size = bh->b_size; + +retry: + io_end = ext4_init_io_end(inode, GFP_ATOMIC); + if (!io_end) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: allocation fail\n", __func__); + schedule(); + goto retry; + } + io_end->offset = offset; + io_end->size = size; + /* + * We need to hold a reference to the page to make sure it + * doesn't get evicted before ext4_end_io_work() has a chance + * to convert the extent from written to unwritten. + */ + io_end->page = page; + get_page(io_end->page); + + bh->b_private = io_end; + bh->b_end_io = ext4_end_io_buffer_write; + return 0; +} + /* * For ext4 extent files, ext4 will do direct-io write to holes, * preallocated extents, and those write extend the file, no need to @@ -3748,7 +3883,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, iocb->private = NULL; EXT4_I(inode)->cur_aio_dio = NULL; if (!is_sync_kiocb(iocb)) { - iocb->private = ext4_init_io_end(inode); + iocb->private = ext4_init_io_end(inode, GFP_NOFS); if (!iocb->private) return -ENOMEM; /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dc7a97e79e3b..5e8f9077b0fc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -709,6 +709,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_reserved_quota = 0; #endif INIT_LIST_HEAD(&ei->i_completed_io_list); + spin_lock_init(&ei->i_completed_io_lock); ei->cur_aio_dio = NULL; ei->i_sync_tid = 0; ei->i_datasync_tid = 0; @@ -926,6 +927,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) if (test_opt(sb, NOLOAD)) seq_puts(seq, ",norecovery"); + if (test_opt(sb, DIOREAD_NOLOCK)) + seq_puts(seq, ",dioread_nolock"); + ext4_show_quota_options(seq, sb); return 0; @@ -1109,6 +1113,7 @@ enum { Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, + Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, }; @@ -1176,6 +1181,8 @@ static const match_table_t tokens = { {Opt_auto_da_alloc, "auto_da_alloc=%u"}, {Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"}, + {Opt_dioread_nolock, "dioread_nolock"}, + {Opt_dioread_lock, "dioread_lock"}, {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL}, @@ -1640,6 +1647,12 @@ set_qf_format: case Opt_nodiscard: clear_opt(sbi->s_mount_opt, DISCARD); break; + case Opt_dioread_nolock: + set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + break; + case Opt_dioread_lock: + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + break; default: ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " @@ -2795,7 +2808,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); - goto failed_mount4; + goto failed_mount_wq; } else { clear_opt(sbi->s_mount_opt, DATA_FLAGS); set_opt(sbi->s_mount_opt, WRITEBACK_DATA); @@ -2808,7 +2821,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); - goto failed_mount4; + goto failed_mount_wq; } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { @@ -2847,7 +2860,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { ext4_msg(sb, KERN_ERR, "Journal does not support " "requested data journaling mode"); - goto failed_mount4; + goto failed_mount_wq; } default: break; @@ -2855,13 +2868,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); no_journal: - if (test_opt(sb, NOBH)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " "its supported only with writeback mode"); clear_opt(sbi->s_mount_opt, NOBH); } + if (test_opt(sb, DIOREAD_NOLOCK)) { + ext4_msg(sb, KERN_WARNING, "dioread_nolock option is " + "not supported with nobh mode"); + goto failed_mount_wq; + } } EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); if (!EXT4_SB(sb)->dio_unwritten_wq) { @@ -2926,6 +2943,18 @@ no_journal: "requested data journaling mode"); clear_opt(sbi->s_mount_opt, DELALLOC); } + if (test_opt(sb, DIOREAD_NOLOCK)) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " + "option - requested data journaling mode"); + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + } + if (sb->s_blocksize < PAGE_SIZE) { + ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " + "option - block size is too small"); + clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); + } + } err = ext4_setup_system_zone(sb); if (err) { -- cgit v1.2.2 From b7adc1f363e72e9131a582cc2cb00eaf83f51a39 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Tue, 2 Mar 2010 13:26:36 -0500 Subject: ext4: Use direct_IO_no_locking in ext4 dio read Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d291310aef6b..92214d4e5afa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3474,7 +3474,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + if (rw == READ && ext4_should_dioread_nolock(inode)) + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, + inode->i_sb->s_bdev, iov, + offset, nr_segs, + ext4_get_block, NULL); + else + ret = blockdev_direct_IO(rw, iocb, inode, + inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) -- cgit v1.2.2 From 273df556b6ee2065bfe96edab5888d3dc9b108d8 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Tue, 2 Mar 2010 11:46:09 -0500 Subject: ext4: Convert BUG_ON checks to use ext4_error() instead Convert a bunch of BUG_ONs to emit a ext4_error() message and return EIO. This is a first pass and most notably does _not_ cover mballoc.c, which is a morass of void functions. Signed-off-by: Frank Mayhar Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 10 +++ fs/ext4/extents.c | 189 ++++++++++++++++++++++++++++++++++++++++++------------ fs/ext4/inode.c | 18 +++++- fs/ext4/super.c | 36 +++++++++++ 4 files changed, 209 insertions(+), 44 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index dee45800dc95..3d85bbb09f1b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -53,6 +53,12 @@ #define ext4_debug(f, a...) do {} while (0) #endif +#define EXT4_ERROR_INODE(inode, fmt, a...) \ + ext4_error_inode(__func__, (inode), (fmt), ## a); + +#define EXT4_ERROR_FILE(file, fmt, a...) \ + ext4_error_file(__func__, (file), (fmt), ## a); + /* data type for block offset of block group */ typedef int ext4_grpblk_t; @@ -1492,6 +1498,10 @@ extern int ext4_group_extend(struct super_block *sb, extern void __ext4_error(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); #define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) +extern void ext4_error_inode(const char *, struct inode *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); +extern void ext4_error_file(const char *, struct file *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); extern void __ext4_std_error(struct super_block *, const char *, int); extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c7f166ab50eb..4bb69206f175 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -703,7 +703,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, } eh = ext_block_hdr(bh); ppos++; - BUG_ON(ppos > depth); + if (unlikely(ppos > depth)) { + put_bh(bh); + EXT4_ERROR_INODE(inode, + "ppos %d > depth %d", ppos, depth); + goto err; + } path[ppos].p_bh = bh; path[ppos].p_hdr = eh; i--; @@ -749,7 +754,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (err) return err; - BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); + if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) { + EXT4_ERROR_INODE(inode, + "logical %d == ei_block %d!", + logical, le32_to_cpu(curp->p_idx->ei_block)); + return -EIO; + } len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; if (logical > le32_to_cpu(curp->p_idx->ei_block)) { /* insert after */ @@ -779,9 +789,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode, ext4_idx_store_pblock(ix, ptr); le16_add_cpu(&curp->p_hdr->eh_entries, 1); - BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) - > le16_to_cpu(curp->p_hdr->eh_max)); - BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); + if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries) + > le16_to_cpu(curp->p_hdr->eh_max))) { + EXT4_ERROR_INODE(inode, + "logical %d == ei_block %d!", + logical, le32_to_cpu(curp->p_idx->ei_block)); + return -EIO; + } + if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) { + EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!"); + return -EIO; + } err = ext4_ext_dirty(handle, inode, curp); ext4_std_error(inode->i_sb, err); @@ -819,7 +837,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, /* if current leaf will be split, then we should use * border from split point */ - BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); + if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!"); + return -EIO; + } if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { border = path[depth].p_ext[1].ee_block; ext_debug("leaf will be split." @@ -860,7 +881,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, /* initialize new leaf */ newblock = ablocks[--a]; - BUG_ON(newblock == 0); + if (unlikely(newblock == 0)) { + EXT4_ERROR_INODE(inode, "newblock == 0!"); + err = -EIO; + goto cleanup; + } bh = sb_getblk(inode->i_sb, newblock); if (!bh) { err = -EIO; @@ -880,7 +905,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ex = EXT_FIRST_EXTENT(neh); /* move remainder of path[depth] to the new leaf */ - BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); + if (unlikely(path[depth].p_hdr->eh_entries != + path[depth].p_hdr->eh_max)) { + EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!", + path[depth].p_hdr->eh_entries, + path[depth].p_hdr->eh_max); + err = -EIO; + goto cleanup; + } /* start copy from next extent */ /* TODO: we could do it by single memmove */ m = 0; @@ -927,7 +959,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, /* create intermediate indexes */ k = depth - at - 1; - BUG_ON(k < 0); + if (unlikely(k < 0)) { + EXT4_ERROR_INODE(inode, "k %d < 0!", k); + err = -EIO; + goto cleanup; + } if (k) ext_debug("create %d intermediate indices\n", k); /* insert new index into current index block */ @@ -964,8 +1000,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, EXT_MAX_INDEX(path[i].p_hdr)); - BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != - EXT_LAST_INDEX(path[i].p_hdr)); + if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) != + EXT_LAST_INDEX(path[i].p_hdr))) { + EXT4_ERROR_INODE(inode, + "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!", + le32_to_cpu(path[i].p_ext->ee_block)); + err = -EIO; + goto cleanup; + } while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ext_debug("%d: move %d:%llu in new index %llu\n", i, le32_to_cpu(path[i].p_idx->ei_block), @@ -1203,7 +1245,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *ex; int depth, ee_len; - BUG_ON(path == NULL); + if (unlikely(path == NULL)) { + EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); + return -EIO; + } depth = path->p_depth; *phys = 0; @@ -1217,15 +1262,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path, ex = path[depth].p_ext; ee_len = ext4_ext_get_actual_len(ex); if (*logical < le32_to_cpu(ex->ee_block)) { - BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); + if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { + EXT4_ERROR_INODE(inode, + "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!", + *logical, le32_to_cpu(ex->ee_block)); + return -EIO; + } while (--depth >= 0) { ix = path[depth].p_idx; - BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); + if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, + "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!", + ix != NULL ? ix->ei_block : 0, + EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ? + EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0, + depth); + return -EIO; + } } return 0; } - BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); + if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { + EXT4_ERROR_INODE(inode, + "logical %d < ee_block %d + ee_len %d!", + *logical, le32_to_cpu(ex->ee_block), ee_len); + return -EIO; + } *logical = le32_to_cpu(ex->ee_block) + ee_len - 1; *phys = ext_pblock(ex) + ee_len - 1; @@ -1251,7 +1314,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; - BUG_ON(path == NULL); + if (unlikely(path == NULL)) { + EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical); + return -EIO; + } depth = path->p_depth; *phys = 0; @@ -1265,17 +1331,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, ex = path[depth].p_ext; ee_len = ext4_ext_get_actual_len(ex); if (*logical < le32_to_cpu(ex->ee_block)) { - BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); + if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) { + EXT4_ERROR_INODE(inode, + "first_extent(path[%d].p_hdr) != ex", + depth); + return -EIO; + } while (--depth >= 0) { ix = path[depth].p_idx; - BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); + if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) { + EXT4_ERROR_INODE(inode, + "ix != EXT_FIRST_INDEX *logical %d!", + *logical); + return -EIO; + } } *logical = le32_to_cpu(ex->ee_block); *phys = ext_pblock(ex); return 0; } - BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); + if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) { + EXT4_ERROR_INODE(inode, + "logical %d < ee_block %d + ee_len %d!", + *logical, le32_to_cpu(ex->ee_block), ee_len); + return -EIO; + } if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { /* next allocated block in this leaf */ @@ -1414,8 +1495,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, eh = path[depth].p_hdr; ex = path[depth].p_ext; - BUG_ON(ex == NULL); - BUG_ON(eh == NULL); + + if (unlikely(ex == NULL || eh == NULL)) { + EXT4_ERROR_INODE(inode, + "ex %p == NULL or eh %p == NULL", ex, eh); + return -EIO; + } if (depth == 0) { /* there is no tree at all */ @@ -1613,10 +1698,16 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext4_lblk_t next; unsigned uninitialized = 0; - BUG_ON(ext4_ext_get_actual_len(newext) == 0); + if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { + EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); + return -EIO; + } depth = ext_depth(inode); ex = path[depth].p_ext; - BUG_ON(path[depth].p_hdr == NULL); + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + return -EIO; + } /* try to insert block into found extent and return */ if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO) @@ -1788,7 +1879,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, } depth = ext_depth(inode); - BUG_ON(path[depth].p_hdr == NULL); + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + err = -EIO; + break; + } ex = path[depth].p_ext; next = ext4_ext_next_allocated_block(path); @@ -1839,7 +1934,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } - BUG_ON(cbex.ec_len == 0); + if (unlikely(cbex.ec_len == 0)) { + EXT4_ERROR_INODE(inode, "cbex.ec_len == 0"); + err = -EIO; + break; + } err = func(inode, path, &cbex, ex, cbdata); ext4_ext_drop_refs(path); @@ -1982,7 +2081,10 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, /* free index block */ path--; leaf = idx_pblock(path->p_idx); - BUG_ON(path->p_hdr->eh_entries == 0); + if (unlikely(path->p_hdr->eh_entries == 0)) { + EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); + return -EIO; + } err = ext4_ext_get_access(handle, inode, path); if (err) return err; @@ -2120,8 +2222,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; - BUG_ON(eh == NULL); - + if (unlikely(path[depth].p_hdr == NULL)) { + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + return -EIO; + } /* find where to start removing */ ex = EXT_LAST_EXTENT(eh); @@ -3240,10 +3344,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this situation is possible, though, _during_ tree modification; * this is why assert can't be put in ext4_ext_find_extent() */ - if (path[depth].p_ext == NULL && depth != 0) { - ext4_error(inode->i_sb, "bad extent address " - "inode: %lu, iblock: %d, depth: %d", - inode->i_ino, iblock, depth); + if (unlikely(path[depth].p_ext == NULL && depth != 0)) { + EXT4_ERROR_INODE(inode, "bad extent address " + "iblock: %d, depth: %d pblock %lld", + iblock, depth, path[depth].p_block); err = -EIO; goto out2; } @@ -3371,16 +3475,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, } if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) { - if (eh->eh_entries) { - last_ex = EXT_LAST_EXTENT(eh); - if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) - + ext4_ext_get_actual_len(last_ex)) - EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; - } else { - WARN_ON(eh->eh_entries == 0); - ext4_error(inode->i_sb, __func__, - "inode#%lu, eh->eh_entries = 0!", inode->i_ino); - } + if (unlikely(!eh->eh_entries)) { + EXT4_ERROR_INODE(inode, + "eh->eh_entries == 0 ee_block %d", + ex->ee_block); + err = -EIO; + goto out2; + } + last_ex = EXT_LAST_EXTENT(eh); + if (iblock + ar.len > le32_to_cpu(last_ex->ee_block) + + ext4_ext_get_actual_len(last_ex)) + EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL; } err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 92214d4e5afa..c717a74f2178 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -607,7 +607,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, if (*err) goto failed_out; - BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); + if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + count %lu > %d!", + current_block, count, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } target -= count; /* allocate blocks for indirect blocks */ @@ -643,7 +650,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode, ar.flags = EXT4_MB_HINT_DATA; current_block = ext4_mb_new_blocks(handle, &ar, err); - BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); + if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) { + EXT4_ERROR_INODE(inode, + "current_block %llu + ar.len %d > %d!", + current_block, ar.len, + EXT4_MAX_BLOCK_FILE_PHYS); + *err = -EIO; + goto failed_out; + } if (*err && (target == blks)) { /* diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5e8f9077b0fc..5a18e9ec7cf9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -347,6 +347,42 @@ void __ext4_error(struct super_block *sb, const char *function, ext4_handle_error(sb); } +void ext4_error_inode(const char *function, struct inode *inode, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", + inode->i_sb->s_id, function, inode->i_ino, current->comm); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(inode->i_sb); +} + +void ext4_error_file(const char *function, struct file *file, + const char *fmt, ...) +{ + va_list args; + struct inode *inode = file->f_dentry->d_inode; + char pathname[80], *path; + + va_start(args, fmt); + path = d_path(&(file->f_path), pathname, sizeof(pathname)); + if (!path) + path = "(unknown)"; + printk(KERN_CRIT + "EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", + inode->i_sb->s_id, function, inode->i_ino, current->comm, path); + vprintk(fmt, args); + printk("\n"); + va_end(args); + + ext4_handle_error(inode->i_sb); +} + static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]) { -- cgit v1.2.2 From 67eeb5685d2a211c0252ac7884142e503c759500 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Tue, 2 Mar 2010 08:08:51 -0500 Subject: ext4: Fix ext4_quota_write cross block boundary behaviour We always assume what dquot update result in changes in one data block But ext4_quota_write() function may handle cross block boundary writes In fact if this ever happen it will result in incorrect journal credits reservation, and later a BUG_ON. As soon this never happen the boundary cross loop is NOOP. In order to make things straight let's remove this loop and assert cross boundary condition. Signed-off-by: Dmitry Monakhov Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 69 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 35 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5a18e9ec7cf9..ad1ee5f21bab 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4010,9 +4010,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); int err = 0; int offset = off & (sb->s_blocksize - 1); - int tocopy; int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; - size_t towrite = len; struct buffer_head *bh; handle_t *handle = journal_current_handle(); @@ -4022,52 +4020,53 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, (unsigned long long)off, (unsigned long long)len); return -EIO; } + /* + * Since we account only one data block in transaction credits, + * then it is impossible to cross a block boundary. + */ + if (sb->s_blocksize - offset < len) { + ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because not block aligned", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); - while (towrite > 0) { - tocopy = sb->s_blocksize - offset < towrite ? - sb->s_blocksize - offset : towrite; - bh = ext4_bread(handle, inode, blk, 1, &err); - if (!bh) + bh = ext4_bread(handle, inode, blk, 1, &err); + if (!bh) + goto out; + if (journal_quota) { + err = ext4_journal_get_write_access(handle, bh); + if (err) { + brelse(bh); goto out; - if (journal_quota) { - err = ext4_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - goto out; - } } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, tocopy); - flush_dcache_page(bh->b_page); - unlock_buffer(bh); - if (journal_quota) - err = ext4_handle_dirty_metadata(handle, NULL, bh); - else { - /* Always do at least ordered writes for quotas */ - err = ext4_jbd2_file_inode(handle, inode); - mark_buffer_dirty(bh); - } - brelse(bh); - if (err) - goto out; - offset = 0; - towrite -= tocopy; - data += tocopy; - blk++; } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, len); + flush_dcache_page(bh->b_page); + unlock_buffer(bh); + if (journal_quota) + err = ext4_handle_dirty_metadata(handle, NULL, bh); + else { + /* Always do at least ordered writes for quotas */ + err = ext4_jbd2_file_inode(handle, inode); + mark_buffer_dirty(bh); + } + brelse(bh); out: - if (len == towrite) { + if (err) { mutex_unlock(&inode->i_mutex); return err; } - if (inode->i_size < off+len-towrite) { - i_size_write(inode, off+len-towrite); + if (inode->i_size < off + len) { + i_size_write(inode, off + len); EXT4_I(inode)->i_disksize = inode->i_size; } inode->i_mtime = inode->i_ctime = CURRENT_TIME; ext4_mark_inode_dirty(handle, inode); mutex_unlock(&inode->i_mutex); - return len - towrite; + return len; } #endif -- cgit v1.2.2 From 3899167dbd6832a3d8d7171b425257ad46b6c40c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jan 2010 20:10:29 -0500 Subject: Get rid of mnt_mountpoint abuses in ext4 path to mnt/mnt->mnt_root is no worse than that to mnt->mnt_parent/mnt->mnt_mountpoint *and* needs no pinning the sucker down (mnt is not going away and mnt->mnt_root won't change) Signed-off-by: Al Viro --- fs/ext4/file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9630583cef28..56eee3d796c2 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -116,11 +116,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp) * devices or filesystem images. */ memset(buf, 0, sizeof(buf)); - path.mnt = mnt->mnt_parent; - path.dentry = mnt->mnt_mountpoint; - path_get(&path); + path.mnt = mnt; + path.dentry = mnt->mnt_root; cp = d_path(&path, buf, sizeof(buf)); - path_put(&path); if (!IS_ERR(cp)) { memcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); -- cgit v1.2.2 From 9b1d0998d24f9c207d5fbdd0b8bac07284e0eda7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 3 Mar 2010 16:19:32 -0500 Subject: ext4: Release page references acquired in ext4_da_block_invalidatepages We forget to release page references we acquire in ext4_da_block_invalidatepages. Luckily, this function gets called only if we are not able to allocate blocks for delay-allocated data so that function should better never be called. Also cleanup handling of index variable. Reported-by: Wu Fengguang Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c717a74f2178..f55df7192b95 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2157,17 +2157,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, break; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - index = page->index; - if (index > end) + if (page->index > end) break; - index++; - BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); block_invalidatepage(page, 0); ClearPageUptodate(page); unlock_page(page); } + index = pvec.pages[nr_pages - 1]->index + 1; + pagevec_release(&pvec); } return; } -- cgit v1.2.2 From 5661bd6861b7490394e29aaf74dca812188272e4 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 3 Mar 2010 23:53:39 -0500 Subject: ext4: cleanup to use ext4_group_first_block_no() This is a cleanup and simplification patch which takes some open-coded calculations to calculate the first block number of a group and converts them to use the (already defined) ext4_group_first_block_no() function. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Andreas Dilger --- fs/ext4/balloc.c | 3 +-- fs/ext4/extents.c | 3 +-- fs/ext4/mballoc.c | 29 +++++++++++------------------ fs/ext4/mballoc.h | 7 +------ 4 files changed, 14 insertions(+), 28 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 720061a0ee53..fadbc4d3a202 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -130,8 +130,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * to make sure we calculate the right free blocks */ group_blocks = ext4_blocks_count(sbi->s_es) - - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); + ext4_group_first_block_no(sb, ngroups - 1); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4bb69206f175..3c0bae11a097 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, if (S_ISREG(inode->i_mode)) block_group++; } - bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block); + bg_start = ext4_group_first_block_no(inode->i_sb, block_group); last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; /* diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 72d5ceb18523..11568697b192 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -441,10 +441,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, for (i = 0; i < count; i++) { if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { ext4_fsblk_t blocknr; - blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); + + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += first + i; - blocknr += - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_grp_locked_error(sb, e4b->bd_group, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %u)", @@ -1255,10 +1254,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { ext4_fsblk_t blocknr; - blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); + + blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr += block; - blocknr += - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_grp_locked_error(sb, e4b->bd_group, __func__, "double-free of inode" " %lu's block %llu(bit %u in group %u)", @@ -1631,7 +1629,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, int max; int err; struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); - struct ext4_super_block *es = sbi->s_es; struct ext4_free_extent ex; if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) @@ -1648,8 +1645,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { ext4_fsblk_t start; - start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + - ex.fe_start + le32_to_cpu(es->s_first_data_block); + start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) + + ex.fe_start; /* use do_div to get remainder (would be 64-bit modulo) */ if (do_div(start, sbi->s_stripe) == 0) { ac->ac_found++; @@ -1803,8 +1800,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, BUG_ON(sbi->s_stripe == 0); /* find first stripe-aligned block in group */ - first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) - + le32_to_cpu(sbi->s_es->s_first_data_block); + first_group_block = ext4_group_first_block_no(sb, e4b->bd_group); + a = first_group_block + sbi->s_stripe - 1; do_div(a, sbi->s_stripe); i = (a * sbi->s_stripe) - first_group_block; @@ -2560,12 +2557,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ext4_unlock_group(sb, entry->group); if (test_opt(sb, DISCARD)) { ext4_fsblk_t discard_block; - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - discard_block = (ext4_fsblk_t)entry->group * - EXT4_BLOCKS_PER_GROUP(sb) - + entry->start_blk - + le32_to_cpu(es->s_first_data_block); + discard_block = entry->start_blk + + ext4_group_first_block_no(sb, entry->group); trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, entry->count); @@ -3525,8 +3519,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, if (bit >= end) break; next = mb_find_next_bit(bitmap_bh->b_data, end, bit); - start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + - le32_to_cpu(sbi->s_es->s_first_data_block); + start = ext4_group_first_block_no(sb, group) + bit; mb_debug(1, " free preallocated %u/%u in group %u\n", (unsigned) start, (unsigned) next - bit, (unsigned) group); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 436521cae456..9b2deed652b7 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -225,11 +225,6 @@ struct ext4_buddy { static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, struct ext4_free_extent *fex) { - ext4_fsblk_t block; - - block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + fex->fe_start - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); - return block; + return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start; } #endif -- cgit v1.2.2 From bda00de7e8569b1fcde27b68fa59e74e14c5f93a Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 3 Mar 2010 23:53:25 -0500 Subject: ext4: cleanup to use ext4_grp_offs_to_block() More cleanup to convert open-coded calculations of the first block number of a free extent to use ext4_grp_offs_to_block() instead. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Andreas Dilger --- fs/ext4/mballoc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 11568697b192..37d2438e0cb4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2697,9 +2697,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (err) goto out_err; - block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) - + ac->ac_b_ex.fe_start - + le32_to_cpu(es->s_first_data_block); + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); len = ac->ac_b_ex.fe_len; if (!ext4_data_block_valid(sbi, block, len)) { @@ -3154,9 +3152,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; - goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + - ac->ac_g_ex.fe_start + - le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); + goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex); /* * search for the prealloc space that is having * minimal distance from the goal block. -- cgit v1.2.2 From 731eb1a03a8445cde2cb23ecfb3580c6fa7bb690 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 3 Mar 2010 23:55:01 -0500 Subject: ext4: consolidate in_range() definitions There are duplicate macro definitions of in_range() in mballoc.h and balloc.c. This consolidates these two definitions into ext4.h, and changes extents.c to use in_range() as well. Signed-off-by: Akinobu Mita Signed-off-by: "Theodore Ts'o" Cc: Andreas Dilger --- fs/ext4/balloc.c | 3 --- fs/ext4/ext4.h | 2 ++ fs/ext4/extents.c | 4 ++-- fs/ext4/mballoc.h | 2 -- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index fadbc4d3a202..d2f37a5516c7 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -188,9 +188,6 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * when a file system is mounted (see ext4_fill_super). */ - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - /** * ext4_get_group_desc() -- load group descriptor from disk * @sb: super block diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3d85bbb09f1b..9b179163f1de 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1819,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh) set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); } +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3c0bae11a097..94c8ee81f5e1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2051,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && cex->ec_type != EXT4_EXT_CACHE_EXTENT); - if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { + if (in_range(block, cex->ec_block, cex->ec_len)) { ex->ee_block = cpu_to_le32(cex->ec_block); ext4_ext_store_pblock(ex, cex->ec_start); ex->ee_len = cpu_to_le16(cex->ec_len); @@ -3364,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, */ ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ - if (iblock >= ee_block && iblock < ee_block + ee_len) { + if (in_range(iblock, ee_block, ee_len)) { newblock = iblock - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (iblock - ee_block); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 9b2deed652b7..b619322c76f0 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -220,8 +220,6 @@ struct ext4_buddy { #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, struct ext4_free_extent *fex) { -- cgit v1.2.2 From 5fd5249aa36fad98c9fd5edced352939e54f9324 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Thu, 4 Mar 2010 00:31:06 -0500 Subject: ext4: Fix insertion point of extent in mext_insert_across_blocks() If the leaf node has 2 extent space or fewer and EXT4_IOC_MOVE_EXT ioctl is called with the file offset where after the 2nd extent covers, mext_insert_across_blocks() always tries to insert extent into the first extent. As a result, the file gets corrupted because of wrong extent order. The patch fixes this problem. Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1654eb862d74..9eca1c0ec546 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -252,6 +252,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, } o_start->ee_len = start_ext->ee_len; + eblock = le32_to_cpu(start_ext->ee_block); new_flag = 1; } else if (start_ext->ee_len && new_ext->ee_len && @@ -262,6 +263,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, * orig |------------------------------| */ o_start->ee_len = start_ext->ee_len; + eblock = le32_to_cpu(start_ext->ee_block); new_flag = 1; } else if (!start_ext->ee_len && new_ext->ee_len && @@ -502,6 +504,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, le32_to_cpu(oext->ee_block) + oext_alen) { start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - le32_to_cpu(oext->ee_block)); + start_ext.ee_block = oext->ee_block; copy_extent_status(oext, &start_ext); } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { prev_ext = oext - 1; @@ -515,6 +518,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, start_ext.ee_len = cpu_to_le16( ext4_ext_get_actual_len(prev_ext) + new_ext_alen); + start_ext.ee_block = oext->ee_block; copy_extent_status(prev_ext, &start_ext); new_ext.ee_len = 0; } -- cgit v1.2.2 From 7247c0caa23d94a1cb6b307edba9dc45fb0798d4 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Thu, 4 Mar 2010 00:34:58 -0500 Subject: ext4: Fix the NULL reference in double_down_write_data_sem() If EXT4_IOC_MOVE_EXT ioctl is called with NULL donor_fd, fget() in ext4_ioctl() gets inappropriate file structure for donor; so we need to do this check earlier, before calling double_down_write_data_sem(). Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/move_extent.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 9eca1c0ec546..7e99f4e72bf5 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -953,14 +953,6 @@ mext_check_arguments(struct inode *orig_inode, unsigned int blkbits = orig_inode->i_blkbits; unsigned int blocksize = 1 << blkbits; - /* Regular file check */ - if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { - ext4_debug("ext4 move extent: The argument files should be " - "regular file [ino:orig %lu, donor %lu]\n", - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { ext4_debug("ext4 move extent: suid or sgid is set" " to donor file [ino:orig %lu, donor %lu]\n", @@ -1208,6 +1200,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } + /* Regular file check */ + if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { + ext4_debug("ext4 move extent: The argument files should be " + "regular file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) -- cgit v1.2.2 From c437b2733520599a2c6e0dbcdeae611319f84707 Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Thu, 4 Mar 2010 00:39:24 -0500 Subject: ext4: Code cleanup for EXT4_IOC_MOVE_EXT ioctl a) Fix sparse warning in ext4_ioctl() b) Remove unneeded variable in mext_leaf_block() c) Fix spelling typo in mext_check_arguments() Signed-off-by: Akira Fujita Signed-off-by: "Theodore Ts'o" --- fs/ext4/ioctl.c | 3 ++- fs/ext4/move_extent.c | 4 +--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 2220feb2dcc1..016d0249294f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -258,7 +258,8 @@ setversion_out: if (me.moved_len > 0) file_remove_suid(donor_filp); - if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) + if (copy_to_user((struct move_extent __user *)arg, + &me, sizeof(me))) err = -EFAULT; mext_out: fput(donor_filp); diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7e99f4e72bf5..aa5fe28d180f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -477,7 +477,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, struct ext4_extent *oext, *o_start, *o_end, *prev_ext; struct ext4_extent new_ext, start_ext, end_ext; ext4_lblk_t new_ext_end; - ext4_fsblk_t new_phys_end; int oext_alen, new_ext_alen, end_ext_alen; int depth = ext_depth(orig_inode); int ret; @@ -491,7 +490,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, new_ext.ee_len = dext->ee_len; new_ext_alen = ext4_ext_get_actual_len(&new_ext); new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; - new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; /* * Case: original extent is first @@ -932,7 +930,7 @@ out2: } /** - * mext_check_argumants - Check whether move extent can be done + * mext_check_arguments - Check whether move extent can be done * * @orig_inode: original inode * @donor_inode: donor inode -- cgit v1.2.2 From 5dd4056db84387975140ff2568eaa0406f07985e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:00 -0500 Subject: dquot: cleanup space allocation / freeing routines Get rid of the alloc_space, free_space, reserve_space, claim_space and release_rsv dquot operations - they are always called from the filesystem and if a filesystem really needs their own (which none currently does) it can just call into it's own routine directly. Move shared logic into the common __dquot_alloc_space, dquot_claim_space_nodirty and __dquot_free_space low-level methods, and rationalize the wrappers around it to move as much as possible code into the common block for CONFIG_QUOTA vs not. Also rename all these helpers to be named dquot_* instead of vfs_dq_*. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/inode.c | 20 +++++++++++--------- fs/ext4/mballoc.c | 6 +++--- fs/ext4/super.c | 5 ----- fs/ext4/xattr.c | 8 ++++---- 4 files changed, 18 insertions(+), 21 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..9f607ea411c8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1093,9 +1093,9 @@ void ext4_da_update_reserve_space(struct inode *inode, /* Update quota subsystem */ if (quota_claim) { - vfs_dq_claim_block(inode, used); + dquot_claim_block(inode, used); if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + dquot_release_reservation_block(inode, mdb_free); } else { /* * We did fallocate with an offset that is already delayed @@ -1106,8 +1106,8 @@ void ext4_da_update_reserve_space(struct inode *inode, * that */ if (allocated_meta_blocks) - vfs_dq_claim_block(inode, allocated_meta_blocks); - vfs_dq_release_reservation_block(inode, mdb_free + used); + dquot_claim_block(inode, allocated_meta_blocks); + dquot_release_reservation_block(inode, mdb_free + used); } /* @@ -1836,6 +1836,7 @@ static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned long md_needed, md_reserved; + int ret; /* * recalculate the amount of metadata blocks to reserve @@ -1853,11 +1854,12 @@ repeat: * later. Real quota accounting is done at pages writeout * time. */ - if (vfs_dq_reserve_block(inode, md_needed + 1)) - return -EDQUOT; + ret = dquot_reserve_block(inode, md_needed + 1); + if (ret) + return ret; if (ext4_claim_free_blocks(sbi, md_needed + 1)) { - vfs_dq_release_reservation_block(inode, md_needed + 1); + dquot_release_reservation_block(inode, md_needed + 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); goto repeat; @@ -1914,7 +1916,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_release_reservation_block(inode, to_free); + dquot_release_reservation_block(inode, to_free); } static void ext4_da_page_release_reservation(struct page *page, @@ -5641,7 +5643,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) * i_size has been changed by generic_commit_write() and we thus need * to include the updated inode in the current transaction. * - * Also, vfs_dq_alloc_block() will always dirty the inode when blocks + * Also, dquot_alloc_block() will always dirty the inode when blocks * are allocated to the file. * * If the inode is marked synchronous, we don't honour that here - doing diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d34afad3e137..0b905781e8e6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4254,7 +4254,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, return 0; } reserv_blks = ar->len; - while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { + while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { ar->flags |= EXT4_MB_HINT_NOPREALLOC; ar->len--; } @@ -4331,7 +4331,7 @@ out2: kmem_cache_free(ext4_ac_cachep, ac); out1: if (inquota && ar->len < inquota) - vfs_dq_free_block(ar->inode, inquota - ar->len); + dquot_free_block(ar->inode, inquota - ar->len); out3: if (!ar->len) { if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) @@ -4646,7 +4646,7 @@ do_more: sb->s_dirt = 1; error_return: if (freed) - vfs_dq_free_block(inode, freed); + dquot_free_block(inode, freed); brelse(bitmap_bh); ext4_std_error(sb, err); if (ac) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 735c20d5fd56..fa8f4deda652 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1014,15 +1014,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, static const struct dquot_operations ext4_quota_operations = { .initialize = dquot_initialize, .drop = dquot_drop, - .alloc_space = dquot_alloc_space, - .reserve_space = dquot_reserve_space, - .claim_space = dquot_claim_space, - .release_rsv = dquot_release_reserved_space, #ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, #endif .alloc_inode = dquot_alloc_inode, - .free_space = dquot_free_space, .free_inode = dquot_free_inode, .transfer = dquot_transfer, .write_dquot = ext4_write_dquot, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index f3a2f7ed45aa..ab3a95ee5e7e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -494,7 +494,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); if (ce) @@ -787,8 +787,8 @@ inserted: else { /* The old block is released after updating the inode. */ - error = -EDQUOT; - if (vfs_dq_alloc_block(inode, 1)) + error = dquot_alloc_block(inode, 1); + if (error) goto cleanup; error = ext4_journal_get_write_access(handle, new_bh); @@ -876,7 +876,7 @@ cleanup: return error; cleanup_dquot: - vfs_dq_free_block(inode, 1); + dquot_free_block(inode, 1); goto cleanup; bad_block: -- cgit v1.2.2 From 63936ddaa16b9486e2d426ed7b09f559a5c60f87 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:01 -0500 Subject: dquot: cleanup inode allocation / freeing routines Get rid of the alloc_inode and free_inode dquot operations - they are always called from the filesystem and if a filesystem really needs their own (which none currently does) it can just call into it's own routine directly. Also get rid of the vfs_dq_alloc/vfs_dq_free wrappers and always call the lowlevel dquot_alloc_inode / dqout_free_inode routines directly, which now lose the number argument which is always 1. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/ialloc.c | 10 +++++----- fs/ext4/super.c | 2 -- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f3624ead4f6c..b0d744cf8b95 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -219,7 +219,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) */ vfs_dq_init(inode); ext4_xattr_delete_inode(handle, inode); - vfs_dq_free_inode(inode); + dquot_free_inode(inode); vfs_dq_drop(inode); is_directory = S_ISDIR(inode->i_mode); @@ -1034,10 +1034,10 @@ got: ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; - if (vfs_dq_alloc_inode(inode)) { - err = -EDQUOT; + vfs_dq_init(inode); + err = dquot_alloc_inode(inode); + if (err) goto fail_drop; - } err = ext4_init_acl(handle, inode, dir); if (err) @@ -1074,7 +1074,7 @@ really_out: return ret; fail_free_drop: - vfs_dq_free_inode(inode); + dquot_free_inode(inode); fail_drop: vfs_dq_drop(inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fa8f4deda652..d231da8798e3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1017,8 +1017,6 @@ static const struct dquot_operations ext4_quota_operations = { #ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, #endif - .alloc_inode = dquot_alloc_inode, - .free_inode = dquot_free_inode, .transfer = dquot_transfer, .write_dquot = ext4_write_dquot, .acquire_dquot = ext4_acquire_dquot, -- cgit v1.2.2 From b43fa8284d7790d9cca32c9c55e24f29be2fa33b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:03 -0500 Subject: dquot: cleanup dquot transfer routine Get rid of the transfer dquot operation - it is now always called from the filesystem and if a filesystem really needs it's own (which none currently does) it can just call into it's own routine directly. Rename the now static low-level dquot_transfer helper to __dquot_transfer and vfs_dq_transfer to dquot_transfer to have a consistent namespace, and make the new dquot_transfer return a normal negative errno value which all callers expect. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/inode.c | 2 +- fs/ext4/super.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9f607ea411c8..6a002a6d0624 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5263,7 +5263,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) error = PTR_ERR(handle); goto err_out; } - error = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + error = dquot_transfer(inode, attr); if (error) { ext4_journal_stop(handle); return error; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d231da8798e3..b4253fb7bab6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1017,7 +1017,6 @@ static const struct dquot_operations ext4_quota_operations = { #ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, #endif - .transfer = dquot_transfer, .write_dquot = ext4_write_dquot, .acquire_dquot = ext4_acquire_dquot, .release_dquot = ext4_release_dquot, -- cgit v1.2.2 From 257ba15cedf1288f0c96118d7e63947231d27278 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:04 -0500 Subject: dquot: move dquot drop responsibility into the filesystem Currently clear_inode calls vfs_dq_drop directly. This means we tie the quota code into the VFS. Get rid of that and make the filesystem responsible for the drop inside the ->clear_inode superblock operation. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b4253fb7bab6..56554c8850ec 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -761,6 +761,7 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { + vfs_dq_drop(inode); ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, -- cgit v1.2.2 From 9f7547580263d4a55efe06ce5cfd567f568be6e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:05 -0500 Subject: dquot: cleanup dquot drop routine Get rid of the drop dquot operation - it is now always called from the filesystem and if a filesystem really needs it's own (which none currently does) it can just call into it's own routine directly. Rename the now static low-level dquot_drop helper to __dquot_drop and vfs_dq_drop to dquot_drop to have a consistent namespace. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/ialloc.c | 4 ++-- fs/ext4/super.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b0d744cf8b95..ca8986e4b528 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -220,7 +220,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) vfs_dq_init(inode); ext4_xattr_delete_inode(handle, inode); dquot_free_inode(inode); - vfs_dq_drop(inode); + dquot_drop(inode); is_directory = S_ISDIR(inode->i_mode); @@ -1077,7 +1077,7 @@ fail_free_drop: dquot_free_inode(inode); fail_drop: - vfs_dq_drop(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; unlock_new_inode(inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 56554c8850ec..035516c80df2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -761,7 +761,7 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { - vfs_dq_drop(inode); + dquot_drop(inode); ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, @@ -1014,7 +1014,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, static const struct dquot_operations ext4_quota_operations = { .initialize = dquot_initialize, - .drop = dquot_drop, #ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, #endif -- cgit v1.2.2 From 907f4554e2521cb28b0009d17167760650a9561c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:06 -0500 Subject: dquot: move dquot initialization responsibility into the filesystem Currently various places in the VFS call vfs_dq_init directly. This means we tie the quota code into the VFS. Get rid of that and make the filesystem responsible for the initialization. For most metadata operations this is a straight forward move into the methods, but for truncate and open it's a bit more complicated. For truncate we currently only call vfs_dq_init for the sys_truncate case because open already takes care of it for ftruncate and open(O_TRUNC) - the new code causes an additional vfs_dq_init for those which is harmless. For open the initialization is moved from do_filp_open into the open method, which means it happens slightly earlier now, and only for regular files. The latter is fine because we don't need to initialize it for operations on special files, and we already do it as part of the namespace operations for directories. Add a dquot_file_open helper that filesystems that support generic quotas can use to fill in ->open. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/file.c | 2 +- fs/ext4/inode.c | 5 +++++ fs/ext4/namei.c | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9630583cef28..85fa464a24ad 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -127,7 +127,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) sb->s_dirt = 1; } } - return generic_file_open(inode, filp); + return dquot_file_open(inode, filp); } const struct file_operations ext4_file_operations = { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6a002a6d0624..eaa22ae9f1f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -170,6 +170,9 @@ void ext4_delete_inode(struct inode *inode) handle_t *handle; int err; + if (!is_bad_inode(inode)) + vfs_dq_init(inode); + if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); truncate_inode_pages(&inode->i_data, 0); @@ -5251,6 +5254,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; + if (ia_valid & ATTR_SIZE) + vfs_dq_init(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { handle_t *handle; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 17a17e10dd60..20f55c2e7571 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1766,6 +1766,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; int err, retries = 0; + vfs_dq_init(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1800,6 +1802,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; + vfs_dq_init(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -1837,6 +1841,8 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; + vfs_dq_init(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + @@ -2136,7 +2142,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ + vfs_dq_init(dir); vfs_dq_init(dentry->d_inode); + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2195,7 +2203,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go * in separate transaction */ + vfs_dq_init(dir); vfs_dq_init(dentry->d_inode); + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2250,6 +2260,8 @@ static int ext4_symlink(struct inode *dir, if (l > dir->i_sb->s_blocksize) return -ENAMETOOLONG; + vfs_dq_init(dir); + retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + @@ -2308,6 +2320,8 @@ static int ext4_link(struct dentry *old_dentry, if (inode->i_nlink >= EXT4_LINK_MAX) return -EMLINK; + vfs_dq_init(dir); + /* * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing * otherwise has the potential to corrupt the orphan inode list. @@ -2358,6 +2372,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct ext4_dir_entry_2 *old_de, *new_de; int retval, force_da_alloc = 0; + vfs_dq_init(old_dir); + vfs_dq_init(new_dir); + old_bh = new_bh = dir_bh = NULL; /* Initialize quotas before so that eventual writes go -- cgit v1.2.2 From 871a293155a24554e153538d36e3a80fa169aefb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Mar 2010 09:05:07 -0500 Subject: dquot: cleanup dquot initialize routine Get rid of the initialize dquot operation - it is now always called from the filesystem and if a filesystem really needs it's own (which none currently does) it can just call into it's own routine directly. Rename the now static low-level dquot_initialize helper to __dquot_initialize and vfs_dq_init to dquot_initialize to have a consistent namespace. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext4/file.c | 1 + fs/ext4/ialloc.c | 4 ++-- fs/ext4/inode.c | 4 ++-- fs/ext4/namei.c | 24 ++++++++++++------------ fs/ext4/super.c | 5 ++--- 5 files changed, 19 insertions(+), 19 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 85fa464a24ad..a08a12998c49 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index ca8986e4b528..9bb2bb9f67ad 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -217,7 +217,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - vfs_dq_init(inode); + dquot_initialize(inode); ext4_xattr_delete_inode(handle, inode); dquot_free_inode(inode); dquot_drop(inode); @@ -1034,7 +1034,7 @@ got: ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; - vfs_dq_init(inode); + dquot_initialize(inode); err = dquot_alloc_inode(inode); if (err) goto fail_drop; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index eaa22ae9f1f6..bec222ca9ba4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -171,7 +171,7 @@ void ext4_delete_inode(struct inode *inode) int err; if (!is_bad_inode(inode)) - vfs_dq_init(inode); + dquot_initialize(inode); if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); @@ -5255,7 +5255,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) return error; if (ia_valid & ATTR_SIZE) - vfs_dq_init(inode); + dquot_initialize(inode); if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { handle_t *handle; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 20f55c2e7571..7f3d2d75a0dc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1766,7 +1766,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, struct inode *inode; int err, retries = 0; - vfs_dq_init(dir); + dquot_initialize(dir); retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + @@ -1802,7 +1802,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - vfs_dq_init(dir); + dquot_initialize(dir); retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + @@ -1841,7 +1841,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; - vfs_dq_init(dir); + dquot_initialize(dir); retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + @@ -2142,8 +2142,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go in * separate transaction */ - vfs_dq_init(dir); - vfs_dq_init(dentry->d_inode); + dquot_initialize(dir); + dquot_initialize(dentry->d_inode); handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) @@ -2203,8 +2203,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) /* Initialize quotas before so that eventual writes go * in separate transaction */ - vfs_dq_init(dir); - vfs_dq_init(dentry->d_inode); + dquot_initialize(dir); + dquot_initialize(dentry->d_inode); handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); if (IS_ERR(handle)) @@ -2260,7 +2260,7 @@ static int ext4_symlink(struct inode *dir, if (l > dir->i_sb->s_blocksize) return -ENAMETOOLONG; - vfs_dq_init(dir); + dquot_initialize(dir); retry: handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + @@ -2320,7 +2320,7 @@ static int ext4_link(struct dentry *old_dentry, if (inode->i_nlink >= EXT4_LINK_MAX) return -EMLINK; - vfs_dq_init(dir); + dquot_initialize(dir); /* * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing @@ -2372,15 +2372,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct ext4_dir_entry_2 *old_de, *new_de; int retval, force_da_alloc = 0; - vfs_dq_init(old_dir); - vfs_dq_init(new_dir); + dquot_initialize(old_dir); + dquot_initialize(new_dir); old_bh = new_bh = dir_bh = NULL; /* Initialize quotas before so that eventual writes go * in separate transaction */ if (new_dentry->d_inode) - vfs_dq_init(new_dentry->d_inode); + dquot_initialize(new_dentry->d_inode); handle = ext4_journal_start(old_dir, 2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 035516c80df2..edcf3b0239d1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1013,7 +1013,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); static const struct dquot_operations ext4_quota_operations = { - .initialize = dquot_initialize, #ifdef CONFIG_QUOTA .get_reserved_space = ext4_get_reserved_space, #endif @@ -1931,7 +1930,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - vfs_dq_init(inode); + dquot_initialize(inode); if (inode->i_nlink) { ext4_msg(sb, KERN_DEBUG, "%s: truncating inode %lu to %lld bytes", @@ -3700,7 +3699,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) * Process 1 Process 2 * ext4_create() quota_sync() * jbd2_journal_start() write_dquot() - * vfs_dq_init() down(dqio_mutex) + * dquot_initialize() down(dqio_mutex) * down(dqio_mutex) jbd2_journal_start() * */ -- cgit v1.2.2 From 64e290ec69be39f1887fa0b403c1e417b6b038e7 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 4 Mar 2010 22:25:21 -0500 Subject: ext4: fix up rb_root initializations to use RB_ROOT ext4 uses rb_node = NULL; to zero rb_root at few places. Using RB_ROOT as the initializer is more portable in case the underlying implementation of rbtrees changes in the future. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: "Theodore Ts'o" Cc: Eric Paris --- fs/ext4/block_validity.c | 4 ++-- fs/ext4/dir.c | 2 +- fs/ext4/mballoc.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index a60ab9aad57d..983f0e127493 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -205,14 +205,14 @@ void ext4_release_system_zone(struct super_block *sb) entry = rb_entry(n, struct ext4_system_zone, node); kmem_cache_free(ext4_system_zone_cachep, entry); if (!parent) - EXT4_SB(sb)->system_blks.rb_node = NULL; + EXT4_SB(sb)->system_blks = RB_ROOT; else if (parent->rb_left == n) parent->rb_left = NULL; else if (parent->rb_right == n) parent->rb_right = NULL; n = parent; } - EXT4_SB(sb)->system_blks.rb_node = NULL; + EXT4_SB(sb)->system_blks = RB_ROOT; } /* diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 29857ddd9e26..86cb6d86a048 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -305,7 +305,7 @@ static void free_rb_tree_fname(struct rb_root *root) kfree(old); } if (!parent) - root->rb_node = NULL; + *root = RB_ROOT; else if (parent->rb_left == n) parent->rb_left = NULL; else if (parent->rb_right == n) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 37d2438e0cb4..abb11e328b65 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2253,7 +2253,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); init_rwsem(&meta_group_info[i]->alloc_sem); - meta_group_info[i]->bb_free_root.rb_node = NULL; + meta_group_info[i]->bb_free_root = RB_ROOT; #ifdef DOUBLE_CHECK { -- cgit v1.2.2 From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/ext4/ext4.h | 2 +- fs/ext4/inode.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4cedc91ec59d..50af1a2c65e7 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1416,7 +1416,7 @@ int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode(struct inode *, int); +extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e11952404e02..d01a6cdbf854 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5177,7 +5177,7 @@ out_brelse: * `stuff()' is running, and the new i_size will be lost. Plus the inode * will no longer be on the superblock's dirty inode list. */ -int ext4_write_inode(struct inode *inode, int wait) +int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) { int err; @@ -5191,7 +5191,7 @@ int ext4_write_inode(struct inode *inode, int wait) return -EIO; } - if (!wait) + if (wbc->sync_mode != WB_SYNC_ALL) return 0; err = ext4_force_commit(inode->i_sb); @@ -5201,7 +5201,7 @@ int ext4_write_inode(struct inode *inode, int wait) err = ext4_get_inode_loc(inode, &iloc); if (err) return err; - if (wait) + if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { ext4_error(inode->i_sb, __func__, -- cgit v1.2.2 From 52cf25d0ab7f78eeecc59ac652ed5090f69b619e Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Tue, 19 Jan 2010 02:58:23 +0100 Subject: Driver core: Constify struct sysfs_ops in struct kobj_type Constify struct sysfs_ops. This is part of the ops structure constification effort started by Arjan van de Ven et al. Benefits of this constification: * prevents modification of data that is shared (referenced) by many other structure instances at runtime * detects/prevents accidental (but not intentional) modification attempts on archs that enforce read-only kernel data at runtime * potentially better optimized code as the compiler can assume that the const data cannot be changed * the compiler/linker move const data into .rodata and therefore exclude them from false sharing Signed-off-by: Emese Revfy Acked-by: David Teigland Acked-by: Matt Domsch Acked-by: Maciej Sosnowski Acked-by: Hans J. Koch Acked-by: Pekka Enberg Acked-by: Jens Axboe Acked-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2b83b96cb2eb..ce84a6ed4a48 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2358,7 +2358,7 @@ static void ext4_sb_release(struct kobject *kobj) } -static struct sysfs_ops ext4_attr_ops = { +static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, }; -- cgit v1.2.2 From d330a5befb88875a9b3d2db62f9b74dadf660b13 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 14 Mar 2010 18:17:54 -0400 Subject: ext4: Fix estimate of # of blocks needed to write indirect-mapped files http://bugzilla.kernel.org/show_bug.cgi?id=15420 Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 986120f30066..11119e07233b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1035,7 +1035,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, sector_t lblock) { struct ext4_inode_info *ei = EXT4_I(inode); - int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; + sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1); int blk_bits; if (lblock < EXT4_NDIR_BLOCKS) @@ -1050,7 +1050,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, } ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; ei->i_da_metadata_calc_len = 1; - blk_bits = roundup_pow_of_two(lblock + 1); + blk_bits = order_base_2(lblock); return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; } -- cgit v1.2.2 From 37f328eb60a94779dd020084209fc4db2d6444a0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 24 Mar 2010 20:06:41 -0400 Subject: ext4: Fix spelling of CONTIG_FS_EXT3 to CONFIG_FS_EXT3 Oops. (Blush.) Thanks to Sedat Dilek for pointing this out. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ce84a6ed4a48..f4b038f51f31 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4068,7 +4068,7 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } -#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", @@ -4095,7 +4095,7 @@ static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } #endif -#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", -- cgit v1.2.2 From ba69f9ab7df844125898104e854e063b47c26637 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 24 Mar 2010 20:18:37 -0400 Subject: ext4: Don't use delayed allocation by default when used instead of ext3 When ext4 driver is used to mount a filesystem instead of the ext3 file system driver (through CONFIG_EXT4_USE_FOR_EXT23), do not enable delayed allocation by default since some ext3 users and application writers have developed unfortunate expectations about the safety of writing files on systems subject to sudden and violent death without using fsync(). Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f4b038f51f31..29c6875d9b4d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -68,7 +68,21 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); +static int ext4_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt); +#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext3_fs_type = { + .owner = THIS_MODULE, + .name = "ext3", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) +#else +#define IS_EXT3_SB(sb) (0) +#endif ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) @@ -2539,7 +2553,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * enable delayed allocation by default * Use -o nodelalloc to turn it off */ - set_opt(sbi->s_mount_opt, DELALLOC); + if (!IS_EXT3_SB(sb)) + set_opt(sbi->s_mount_opt, DELALLOC); if (!parse_options((char *) data, sb, &journal_devnum, &journal_ioprio, NULL, 0)) @@ -4096,14 +4111,6 @@ static inline void unregister_as_ext2(void) { } #endif #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) -static struct file_system_type ext3_fs_type = { - .owner = THIS_MODULE, - .name = "ext3", - .get_sb = ext4_get_sb, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - static inline void register_as_ext3(void) { int err = register_filesystem(&ext3_fs_type); -- cgit v1.2.2 From c4caae25187ff3f5e837c6f04eb1acc2723c72d3 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 23 Mar 2010 21:32:00 -0400 Subject: ext4: Fixed inode allocator to correctly track a flex_bg's used_dirs When used_dirs was introduced for the flex_groups struct, it looks like the accounting was not put into place properly, in some places manipulating free_inodes rather than used_dirs. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 361c0b9962a8..57f6eef6ccd6 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -263,7 +263,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ext4_group_t f; f = ext4_flex_group(sbi, block_group); - atomic_dec(&sbi->s_flex_groups[f].free_inodes); + atomic_dec(&sbi->s_flex_groups[f].used_dirs); } } @@ -773,7 +773,7 @@ static int ext4_claim_inode(struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + atomic_inc(&sbi->s_flex_groups[f].used_dirs); } } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); -- cgit v1.2.2 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- fs/ext4/block_validity.c | 1 + fs/ext4/inode.c | 1 + fs/ext4/mballoc.c | 1 + fs/ext4/migrate.c | 1 + fs/ext4/move_extent.c | 1 + fs/ext4/xattr_security.c | 1 + 6 files changed, 6 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 983f0e127493..538c48655084 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "ext4.h" struct ext4_system_zone { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 11119e07233b..5381802d6052 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 54df209d2eed..bde9d0b170c2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -23,6 +23,7 @@ #include "mballoc.h" #include +#include #include /* diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 8b87bd0eac95..34dcfc52ef44 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -13,6 +13,7 @@ */ #include +#include #include "ext4_jbd2.h" #include "ext4_extents.h" diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index aa5fe28d180f..d1fc662cc311 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -15,6 +15,7 @@ #include #include +#include #include "ext4_jbd2.h" #include "ext4_extents.h" #include "ext4.h" diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 983c253999a7..8b145e98df07 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" -- cgit v1.2.2 From 8b472d739b2ddd8ab7fb278874f696cd95b25a5e Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sat, 3 Apr 2010 16:45:06 -0400 Subject: ext4: Fix possible lost inode write in no journal mode In the no-journal case, ext4_write_inode() will fetch the bh and call sync_dirty_buffer() on it. However, if the bh has already been written and the bh reclaimed for some other purpose, AND if the inode is the only one in the inode table block in use, then ext4_get_inode_loc() will not read the inode table block from disk, but as an optimization, fill the block with zero's assuming that its caller will copy in the on-disk version of the inode. This is not done by ext4_write_inode(), so the contents of the inode can simply get lost. The fix is to use __ext4_get_inode_loc() with in_mem set to 0, instead of ext4_get_inode_loc(). Long term the API needs to be fixed so it's obvious why latter is not safe. Addresses-Google-Bug: #2526446 Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 11119e07233b..87e3c70d0692 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5374,7 +5374,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } else { struct ext4_iloc iloc; - err = ext4_get_inode_loc(inode, &iloc); + err = __ext4_get_inode_loc(inode, &iloc, 0); if (err) return err; if (wbc->sync_mode == WB_SYNC_ALL) -- cgit v1.2.2 From fd2dd9fbaf9e498ec63eef298921e36556f7214c Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sat, 3 Apr 2010 17:44:16 -0400 Subject: ext4: Fix buffer head leaks after calls to ext4_get_inode_loc() Calls to ext4_get_inode_loc() returns with a reference to a buffer head in iloc->bh. The callers of this function in ext4_write_inode() when in no journal mode and in ext4_xattr_fiemap() don't release the buffer head after using it. Addresses-Google-Bug: #2548165 Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 1 + fs/ext4/inode.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs/ext4') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 94c8ee81f5e1..236b834b4ca8 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3879,6 +3879,7 @@ static int ext4_xattr_fiemap(struct inode *inode, physical += offset; length = EXT4_SB(inode->i_sb)->s_inode_size - offset; flags |= FIEMAP_EXTENT_DATA_INLINE; + brelse(iloc.bh); } else { /* external block */ physical = EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 87e3c70d0692..ba1eee847e32 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5385,6 +5385,7 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) (unsigned long long)iloc.bh->b_blocknr); err = -EIO; } + brelse(iloc.bh); } return err; } -- cgit v1.2.2 From b90f687018e6d6c77d981b09203780f7001407e5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 20 Apr 2010 16:51:59 -0400 Subject: ext4: Issue the discard operation *before* releasing the blocks to be reused Otherwise, we can end up having data corruption because the blocks could get reused and then discarded! https://bugzilla.kernel.org/show_bug.cgi?id=15579 Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/ext4') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 54df209d2eed..e5ab41b559c0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2534,6 +2534,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) mb_debug(1, "gonna free %u blocks in group %u (0x%p):", entry->count, entry->group, entry); + if (test_opt(sb, DISCARD)) { + ext4_fsblk_t discard_block; + + discard_block = entry->start_blk + + ext4_group_first_block_no(sb, entry->group); + trace_ext4_discard_blocks(sb, + (unsigned long long)discard_block, + entry->count); + sb_issue_discard(sb, discard_block, entry->count); + } + err = ext4_mb_load_buddy(sb, entry->group, &e4b); /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); @@ -2555,16 +2566,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) page_cache_release(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->group); - if (test_opt(sb, DISCARD)) { - ext4_fsblk_t discard_block; - - discard_block = entry->start_blk + - ext4_group_first_block_no(sb, entry->group); - trace_ext4_discard_blocks(sb, - (unsigned long long)discard_block, - entry->count); - sb_issue_discard(sb, discard_block, entry->count); - } kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); } -- cgit v1.2.2