aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Sandeen <sandeen@redhat.com>2012-10-28 22:24:57 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-10-28 22:24:57 -0400
commitffb5387e85d528fb6d0d924abfa3fbf0fc484071 (patch)
treeb4e444479eada05eb93c3f1b2099a018c5aef116
parent8f0d8163b50e01f398b14bcd4dc039ac5ab18d64 (diff)
ext4: fix unjournaled inode bitmap modification
commit 119c0d4460b001e44b41dcf73dc6ee794b98bd31 changed ext4_new_inode() such that the inode bitmap was being modified outside a transaction, which could lead to corruption, and was discovered when journal_checksum found a bad checksum in the journal during log replay. Nix ran into this when using the journal_async_commit mount option, which enables journal checksumming. The ensuing journal replay failures due to the bad checksums led to filesystem corruption reported as the now infamous "Apparent serious progressive ext4 data corruption bug" [ Changed by tytso to only call ext4_journal_get_write_access() only when we're fairly certain that we're going to allocate the inode. ] I've tested this by mounting with journal_checksum and running fsstress then dropping power; I've also tested by hacking DM to create snapshots w/o first quiescing, which allows me to test journal replay repeatedly w/o actually power-cycling the box. Without the patch I hit a journal checksum error every time. With this fix it survives many iterations. Reported-by: Nix <nix@esperi.org.uk> Signed-off-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
-rw-r--r--fs/ext4/ialloc.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4facdd29a350..3a100e7a62a8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -725,6 +725,10 @@ repeat_in_this_group:
725 "inode=%lu", ino + 1); 725 "inode=%lu", ino + 1);
726 continue; 726 continue;
727 } 727 }
728 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
729 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
730 if (err)
731 goto fail;
728 ext4_lock_group(sb, group); 732 ext4_lock_group(sb, group);
729 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); 733 ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
730 ext4_unlock_group(sb, group); 734 ext4_unlock_group(sb, group);
@@ -738,6 +742,11 @@ repeat_in_this_group:
738 goto out; 742 goto out;
739 743
740got: 744got:
745 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
746 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
747 if (err)
748 goto fail;
749
741 /* We may have to initialize the block bitmap if it isn't already */ 750 /* We may have to initialize the block bitmap if it isn't already */
742 if (ext4_has_group_desc_csum(sb) && 751 if (ext4_has_group_desc_csum(sb) &&
743 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 752 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -771,11 +780,6 @@ got:
771 goto fail; 780 goto fail;
772 } 781 }
773 782
774 BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
775 err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
776 if (err)
777 goto fail;
778
779 BUFFER_TRACE(group_desc_bh, "get_write_access"); 783 BUFFER_TRACE(group_desc_bh, "get_write_access");
780 err = ext4_journal_get_write_access(handle, group_desc_bh); 784 err = ext4_journal_get_write_access(handle, group_desc_bh);
781 if (err) 785 if (err)
@@ -823,11 +827,6 @@ got:
823 } 827 }
824 ext4_unlock_group(sb, group); 828 ext4_unlock_group(sb, group);
825 829
826 BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
827 err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
828 if (err)
829 goto fail;
830
831 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); 830 BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
832 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); 831 err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
833 if (err) 832 if (err)