aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-12-14 15:27:50 -0500
committerTheodore Ts'o <tytso@mit.edu>2010-12-14 15:27:50 -0500
commit1449032be17abb69116dbc393f67ceb8bd034f92 (patch)
treef62757457241c2fdc14105afc12cb2718f7a2e68 /fs
parente8a7e48bb248a1196484d3f8afa53bded2b24e71 (diff)
ext4: Turn off multiple page-io submission by default
Jon Nelson has found a test case which causes postgresql to fail with the error: psql:t.sql:4: ERROR: invalid page header in block 38269 of relation base/16384/16581 Under memory pressure, it looks like part of a file can end up getting replaced by zero's. Until we can figure out the cause, we'll roll back the change and use block_write_full_page() instead of ext4_bio_write_page(). The new, more efficient writing function can be used via the mount option mblk_io_submit, so we can test and fix the new page I/O code. To reproduce the problem, install postgres 8.4 or 9.0, and pin enough memory such that the system just at the end of triggering writeback before running the following sql script: begin; create temporary table foo as select x as a, ARRAY[x] as b FROM generate_series(1, 10000000 ) AS x; create index foo_a_idx on foo (a); create index foo_b_idx on foo USING GIN (b); rollback; If the temporary table is created on a hard drive partition which is encrypted using dm_crypt, then under memory pressure, approximately 30-40% of the time, pgsql will issue the above failure. This patch should fix this problem, and the problem will come back if the file system is mounted with the mblk_io_submit mount option. Reported-by: Jon Nelson <jnelson@jamponi.net> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/super.c14
3 files changed, 17 insertions, 3 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6a5edea2d70b..94ce3d7a1c4b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -910,6 +910,7 @@ struct ext4_inode_info {
910#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 910#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
911#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 911#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
912#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 912#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
913#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
913#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 914#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
914#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 915#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
915#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 916#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bdbe69902207..e659597b690b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2125,9 +2125,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2125 */ 2125 */
2126 if (unlikely(journal_data && PageChecked(page))) 2126 if (unlikely(journal_data && PageChecked(page)))
2127 err = __ext4_journalled_writepage(page, len); 2127 err = __ext4_journalled_writepage(page, len);
2128 else 2128 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
2129 err = ext4_bio_write_page(&io_submit, page, 2129 err = ext4_bio_write_page(&io_submit, page,
2130 len, mpd->wbc); 2130 len, mpd->wbc);
2131 else
2132 err = block_write_full_page(page,
2133 noalloc_get_block_write, mpd->wbc);
2131 2134
2132 if (!err) 2135 if (!err)
2133 mpd->pages_written++; 2136 mpd->pages_written++;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e32195d6aac3..fb15c9c0be74 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1026,6 +1026,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1026 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1026 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1027 seq_puts(seq, ",nodelalloc"); 1027 seq_puts(seq, ",nodelalloc");
1028 1028
1029 if (test_opt(sb, MBLK_IO_SUBMIT))
1030 seq_puts(seq, ",mblk_io_submit");
1029 if (sbi->s_stripe) 1031 if (sbi->s_stripe)
1030 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1032 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1031 /* 1033 /*
@@ -1239,8 +1241,8 @@ enum {
1239 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1241 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1240 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1242 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1241 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1243 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1242 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1244 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1243 Opt_block_validity, Opt_noblock_validity, 1245 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1244 Opt_inode_readahead_blks, Opt_journal_ioprio, 1246 Opt_inode_readahead_blks, Opt_journal_ioprio,
1245 Opt_dioread_nolock, Opt_dioread_lock, 1247 Opt_dioread_nolock, Opt_dioread_lock,
1246 Opt_discard, Opt_nodiscard, 1248 Opt_discard, Opt_nodiscard,
@@ -1304,6 +1306,8 @@ static const match_table_t tokens = {
1304 {Opt_resize, "resize"}, 1306 {Opt_resize, "resize"},
1305 {Opt_delalloc, "delalloc"}, 1307 {Opt_delalloc, "delalloc"},
1306 {Opt_nodelalloc, "nodelalloc"}, 1308 {Opt_nodelalloc, "nodelalloc"},
1309 {Opt_mblk_io_submit, "mblk_io_submit"},
1310 {Opt_nomblk_io_submit, "nomblk_io_submit"},
1307 {Opt_block_validity, "block_validity"}, 1311 {Opt_block_validity, "block_validity"},
1308 {Opt_noblock_validity, "noblock_validity"}, 1312 {Opt_noblock_validity, "noblock_validity"},
1309 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1313 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1725,6 +1729,12 @@ set_qf_format:
1725 case Opt_nodelalloc: 1729 case Opt_nodelalloc:
1726 clear_opt(sbi->s_mount_opt, DELALLOC); 1730 clear_opt(sbi->s_mount_opt, DELALLOC);
1727 break; 1731 break;
1732 case Opt_mblk_io_submit:
1733 set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
1734 break;
1735 case Opt_nomblk_io_submit:
1736 clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
1737 break;
1728 case Opt_stripe: 1738 case Opt_stripe:
1729 if (match_int(&args[0], &option)) 1739 if (match_int(&args[0], &option))
1730 return 0; 1740 return 0;