From 0411ba7902e09111d6f2041b4697a597d2cf7736 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 10 Jun 2010 13:10:53 +0200 Subject: ext3: Fix set but unused variables [tytso@mit.edu: Fix compilation with CONFIG_JBD_DEBUG enabled] Acked-by: tytso@mit.edu cc: linux-ext4@vger.kernel.org Signed-off-by: Andi Kleen Signed-off-by: Jan Kara --- fs/ext3/namei.c | 3 +-- fs/ext3/resize.c | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index ee184084ca42..2b35ddb70d65 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1447,7 +1447,6 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; - unsigned long offset; struct buffer_head * bh; struct ext3_dir_entry_2 *de; struct super_block * sb; @@ -1469,7 +1468,7 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry, ext3_mark_inode_dirty(handle, dir); } blocks = dir->i_size >> sb->s_blocksize_bits; - for (block = 0, offset = 0; block < blocks; block++) { + for (block = 0; block < blocks; block++) { bh = ext3_bread(handle, dir, block, 0, &retval); if(!bh) return retval; diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 54351ac7cef9..0ccd7b12b73c 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -964,7 +964,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, ext3_fsblk_t n_blocks_count) { ext3_fsblk_t o_blocks_count; - unsigned long o_groups_count; ext3_grpblk_t last; ext3_grpblk_t add; struct buffer_head * bh; @@ -976,7 +975,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, * yet: we're going to revalidate es->s_blocks_count after * taking the s_resize_lock below. */ o_blocks_count = le32_to_cpu(es->s_blocks_count); - o_groups_count = EXT3_SB(sb)->s_groups_count; if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n", -- cgit v1.2.2 From 4c4d3901225518ed1a4c938ba15ba09842a00770 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Jun 2010 10:20:39 +0200 Subject: ext3: remove vestiges of nobh support The nobh option was only supported for writeback mode, but given that all write paths (except mmapped writed) actually create buffer heads, it effectively was a no-op already. Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara --- fs/ext3/inode.c | 16 +--------------- fs/ext3/super.c | 17 ++++------------- 2 files changed, 5 insertions(+), 28 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 735f0190ec2a..a786db403efc 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1625,10 +1625,7 @@ static int ext3_writeback_writepage(struct page *page, goto out_fail; } - if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) - ret = nobh_writepage(page, ext3_get_block, wbc); - else - ret = block_write_full_page(page, ext3_get_block, wbc); + ret = block_write_full_page(page, ext3_get_block, wbc); err = ext3_journal_stop(handle); if (!ret) @@ -1922,17 +1919,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, length = blocksize - (offset & (blocksize - 1)); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); - /* - * For "nobh" option, we can only work if we don't need to - * read-in the page - otherwise we create buffers to do the IO. - */ - if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && - ext3_should_writeback_data(inode) && PageUptodate(page)) { - zero_user(page, offset, length); - set_page_dirty(page); - goto unlock; - } - if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6c953bb255e7..9650a956fd0e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -661,9 +661,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) */ seq_puts(seq, ",barrier="); seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); - if (test_opt(sb, NOBH)) - seq_puts(seq, ",nobh"); - seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); if (test_opt(sb, DATA_ERR_ABORT)) seq_puts(seq, ",data_err=abort"); @@ -1255,10 +1252,12 @@ set_qf_format: *n_blocks_count = option; break; case Opt_nobh: - set_opt(sbi->s_mount_opt, NOBH); + ext3_msg(sb, KERN_WARNING, + "warning: ignoring deprecated nobh option"); break; case Opt_bh: - clear_opt(sbi->s_mount_opt, NOBH); + ext3_msg(sb, KERN_WARNING, + "warning: ignoring deprecated bh option"); break; default: ext3_msg(sb, KERN_ERR, @@ -2001,14 +2000,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) break; } - if (test_opt(sb, NOBH)) { - if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) { - ext3_msg(sb, KERN_WARNING, - "warning: ignoring nobh option - " - "it is supported only with writeback mode"); - clear_opt(sbi->s_mount_opt, NOBH); - } - } /* * The journal_load will have done any necessary log recovery, * so we can safely mount the rest of the filesystem now. -- cgit v1.2.2 From f25f624263445785b94f39739a6339ba9ed3275d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Jul 2010 21:04:31 +0200 Subject: ext3: Avoid filesystem corruption after a crash under heavy delete load It can happen that ext3_free_branches calls ext3_forget() for an indirect block in an earlier transaction than a transaction in which we clear pointer to this indirect block. Thus if we crash before a transaction clearing the block pointer is committed, we will see indirect block pointing to already freed blocks and complain during orphan list cleanup. The fix is simple: Make sure ext3_forget() is called in the transaction doing block pointer clearing. This is a backport of an ext4 fix by Amir G. Signed-off-by: Jan Kara --- fs/ext3/inode.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs/ext3') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a786db403efc..436e5bbccbc2 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2269,27 +2269,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, (__le32*)bh->b_data + addr_per_block, depth); - /* - * We've probably journalled the indirect block several - * times during the truncate. But it's no longer - * needed and we now drop it from the transaction via - * journal_revoke(). - * - * That's easy if it's exclusively part of this - * transaction. But if it's part of the committing - * transaction then journal_forget() will simply - * brelse() it. That means that if the underlying - * block is reallocated in ext3_get_block(), - * unmap_underlying_metadata() will find this block - * and will try to get rid of it. damn, damn. - * - * If this block has already been committed to the - * journal, a revoke record will be written. And - * revoke records must be emitted *before* clearing - * this block's bit in the bitmaps. - */ - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - /* * Everything below this this pointer has been * released. Now let this top-of-subtree go. @@ -2313,6 +2292,31 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, truncate_restart_transaction(handle, inode); } + /* + * We've probably journalled the indirect block several + * times during the truncate. But it's no longer + * needed and we now drop it from the transaction via + * journal_revoke(). + * + * That's easy if it's exclusively part of this + * transaction. But if it's part of the committing + * transaction then journal_forget() will simply + * brelse() it. That means that if the underlying + * block is reallocated in ext3_get_block(), + * unmap_underlying_metadata() will find this block + * and will try to get rid of it. damn, damn. Thus + * we don't allow a block to be reallocated until + * a transaction freeing it has fully committed. + * + * We also have to make sure journal replay after a + * crash does not overwrite non-journaled data blocks + * with old metadata when the block got reallocated for + * data. Thus we have to store a revoke record for a + * block in the same transaction in which we free the + * block. + */ + ext3_forget(handle, 1, inode, bh, bh->b_blocknr); + ext3_free_blocks(handle, inode, nr, 1); if (parent_bh) { -- cgit v1.2.2 From aa32a796389bedbcf1c7714385b18714a0743810 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 23 Jul 2010 12:49:41 +0200 Subject: ext3: default to ordered mode data=writeback mode is dangerous as it leads to higher data loss and stale data exposure when systems crash. It should not be the default, especially when all major distros ensure their ext3 filesystems default to ordered mode. Change the default mode to the safer data=ordered mode, because we should be caring far more about avoiding stale data exposure than performance. CC: linux-ext4@vger.kernel.org Signed-off-by: Dave Chinner Acked-by: Eric Sandeen Signed-off-by: Jan Kara --- fs/ext3/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext3') diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index 522b15498f45..e8c6ba0e4a3e 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig @@ -31,6 +31,7 @@ config EXT3_FS config EXT3_DEFAULTS_TO_ORDERED bool "Default to 'data=ordered' in ext3" depends on EXT3_FS + default y help The journal mode options for ext3 have different tradeoffs between when data is guaranteed to be on disk and -- cgit v1.2.2 From 5f11e6a44059f728dddd8d0dbe5b4368ea93575b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 5 Aug 2010 12:38:26 +0200 Subject: ext3: Fix dirtying of journalled buffers in data=journal mode In data=journal mode, we still use block_write_begin() to prepare page for writing. This function can occasionally mark buffer dirty which violates journalling assumptions - when a buffer is part of a transaction, it should be dirty and a buffer can be already part of a forget list of some transaction when block_write_begin() gets called. This violation of journalling assumptions then results in "JBD: Spotted dirty metadata buffer..." warnings. In fact, temporary dirtying the buffer while the page is still locked does not really cause problems to the journalling because we won't write the buffer until the page gets unlocked. So we just have to make sure to clear dirty bits before unlocking the page. Reviewed-by: "Theodore Ts'o" Signed-off-by: Jan Kara --- fs/ext3/inode.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'fs/ext3') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 436e5bbccbc2..001eb0e2d48e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1149,9 +1149,25 @@ static int walk_page_buffers( handle_t *handle, static int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh) { + int dirty = buffer_dirty(bh); + int ret; + if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; - return ext3_journal_get_write_access(handle, bh); + /* + * __block_prepare_write() could have dirtied some buffers. Clean + * the dirty bit as jbd2_journal_get_write_access() could complain + * otherwise about fs integrity issues. Setting of the dirty bit + * by __block_prepare_write() isn't a real problem here as we clear + * the bit before releasing a page lock and thus writeback cannot + * ever write the buffer. + */ + if (dirty) + clear_buffer_dirty(bh); + ret = ext3_journal_get_write_access(handle, bh); + if (!ret && dirty) + ret = ext3_journal_dirty_metadata(handle, bh); + return ret; } /* -- cgit v1.2.2