aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-12 19:10:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-12 19:10:29 -0400
commit3280fb313911a80b13de52d9fe37f39668f2138e (patch)
tree6903a275ab00172a88cf51ab1d570e4a3e1a286b
parentf1b2a5ace996de339292d4035f9f5b294aecd11e (diff)
parentf319fb8bf6899e08bdb8d1e09a4e7a129dfa2312 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix kconfig typo and extra whitespace ext4: fix build failure without procfs ext4: add an option to control error handling on file data jbd2: don't dirty original metadata buffer on abort ext4: add checks for errors from jbd2 jbd2: fix error handling for checkpoint io jbd2: abort when failed to log metadata buffers
-rw-r--r--Documentation/filesystems/ext4.txt5
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ioctl.c12
-rw-r--r--fs/ext4/super.c41
-rw-r--r--fs/jbd2/checkpoint.c49
-rw-r--r--fs/jbd2/commit.c10
-rw-r--r--fs/jbd2/journal.c28
-rw-r--r--fs/jbd2/recovery.c7
-rw-r--r--include/linux/jbd2.h5
10 files changed, 131 insertions, 32 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 74484e696405..eb154ef36c2a 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -223,6 +223,11 @@ errors=remount-ro(*) Remount the filesystem read-only on an error.
223errors=continue Keep going on a filesystem error. 223errors=continue Keep going on a filesystem error.
224errors=panic Panic and halt the machine if an error occurs. 224errors=panic Panic and halt the machine if an error occurs.
225 225
226data_err=ignore(*) Just print an error message if an error occurs
227 in a file data buffer in ordered mode.
228data_err=abort Abort the journal if an error occurs in a file
229 data buffer in ordered mode.
230
226grpid Give objects the same group ID as their creator. 231grpid Give objects the same group ID as their creator.
227bsdgroups 232bsdgroups
228 233
diff --git a/fs/Kconfig b/fs/Kconfig
index 40183d94b683..f54a157a0296 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -170,8 +170,8 @@ config EXT4DEV_COMPAT
170 help 170 help
171 Starting with 2.6.28, the name of the ext4 filesystem was 171 Starting with 2.6.28, the name of the ext4 filesystem was
172 renamed from ext4dev to ext4. Unfortunately there are some 172 renamed from ext4dev to ext4. Unfortunately there are some
173 lagecy userspace programs (such as klibc's fstype) have 173 legacy userspace programs (such as klibc's fstype) have
174 "ext4dev" hardcoded. 174 "ext4dev" hardcoded.
175 175
176 To enable backwards compatibility so that systems that are 176 To enable backwards compatibility so that systems that are
177 still expecting to mount ext4 filesystems using ext4dev, 177 still expecting to mount ext4 filesystems using ext4dev,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f46a513a5157..6690a41cdd9f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -540,6 +540,8 @@ do { \
540#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 540#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
541#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 541#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 542#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
543#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
544
543/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ 545/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
544#ifndef _LINUX_EXT2_FS_H 546#ifndef _LINUX_EXT2_FS_H
545#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 547#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index ea27eaa0cfe5..dc99b4776d58 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -192,7 +192,7 @@ setversion_out:
192 case EXT4_IOC_GROUP_EXTEND: { 192 case EXT4_IOC_GROUP_EXTEND: {
193 ext4_fsblk_t n_blocks_count; 193 ext4_fsblk_t n_blocks_count;
194 struct super_block *sb = inode->i_sb; 194 struct super_block *sb = inode->i_sb;
195 int err; 195 int err, err2;
196 196
197 if (!capable(CAP_SYS_RESOURCE)) 197 if (!capable(CAP_SYS_RESOURCE))
198 return -EPERM; 198 return -EPERM;
@@ -206,8 +206,10 @@ setversion_out:
206 206
207 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 207 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
209 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 209 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
211 if (err == 0)
212 err = err2;
211 mnt_drop_write(filp->f_path.mnt); 213 mnt_drop_write(filp->f_path.mnt);
212 214
213 return err; 215 return err;
@@ -215,7 +217,7 @@ setversion_out:
215 case EXT4_IOC_GROUP_ADD: { 217 case EXT4_IOC_GROUP_ADD: {
216 struct ext4_new_group_data input; 218 struct ext4_new_group_data input;
217 struct super_block *sb = inode->i_sb; 219 struct super_block *sb = inode->i_sb;
218 int err; 220 int err, err2;
219 221
220 if (!capable(CAP_SYS_RESOURCE)) 222 if (!capable(CAP_SYS_RESOURCE))
221 return -EPERM; 223 return -EPERM;
@@ -230,8 +232,10 @@ setversion_out:
230 232
231 err = ext4_group_add(sb, &input); 233 err = ext4_group_add(sb, &input);
232 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 234 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
233 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 235 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
234 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 236 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
237 if (err == 0)
238 err = err2;
235 mnt_drop_write(filp->f_path.mnt); 239 mnt_drop_write(filp->f_path.mnt);
236 240
237 return err; 241 return err;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0e661c569660..fb940c22ab0d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -507,7 +507,8 @@ static void ext4_put_super(struct super_block *sb)
507 ext4_mb_release(sb); 507 ext4_mb_release(sb);
508 ext4_ext_release(sb); 508 ext4_ext_release(sb);
509 ext4_xattr_put_super(sb); 509 ext4_xattr_put_super(sb);
510 jbd2_journal_destroy(sbi->s_journal); 510 if (jbd2_journal_destroy(sbi->s_journal) < 0)
511 ext4_abort(sb, __func__, "Couldn't clean up the journal");
511 sbi->s_journal = NULL; 512 sbi->s_journal = NULL;
512 if (!(sb->s_flags & MS_RDONLY)) { 513 if (!(sb->s_flags & MS_RDONLY)) {
513 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 514 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -777,6 +778,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
777 seq_printf(seq, ",inode_readahead_blks=%u", 778 seq_printf(seq, ",inode_readahead_blks=%u",
778 sbi->s_inode_readahead_blks); 779 sbi->s_inode_readahead_blks);
779 780
781 if (test_opt(sb, DATA_ERR_ABORT))
782 seq_puts(seq, ",data_err=abort");
783
780 ext4_show_quota_options(seq, sb); 784 ext4_show_quota_options(seq, sb);
781 return 0; 785 return 0;
782} 786}
@@ -906,6 +910,7 @@ enum {
906 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 910 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
907 Opt_journal_checksum, Opt_journal_async_commit, 911 Opt_journal_checksum, Opt_journal_async_commit,
908 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 912 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
913 Opt_data_err_abort, Opt_data_err_ignore,
909 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 914 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
910 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 915 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
911 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 916 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -952,6 +957,8 @@ static match_table_t tokens = {
952 {Opt_data_journal, "data=journal"}, 957 {Opt_data_journal, "data=journal"},
953 {Opt_data_ordered, "data=ordered"}, 958 {Opt_data_ordered, "data=ordered"},
954 {Opt_data_writeback, "data=writeback"}, 959 {Opt_data_writeback, "data=writeback"},
960 {Opt_data_err_abort, "data_err=abort"},
961 {Opt_data_err_ignore, "data_err=ignore"},
955 {Opt_offusrjquota, "usrjquota="}, 962 {Opt_offusrjquota, "usrjquota="},
956 {Opt_usrjquota, "usrjquota=%s"}, 963 {Opt_usrjquota, "usrjquota=%s"},
957 {Opt_offgrpjquota, "grpjquota="}, 964 {Opt_offgrpjquota, "grpjquota="},
@@ -1186,6 +1193,12 @@ static int parse_options(char *options, struct super_block *sb,
1186 sbi->s_mount_opt |= data_opt; 1193 sbi->s_mount_opt |= data_opt;
1187 } 1194 }
1188 break; 1195 break;
1196 case Opt_data_err_abort:
1197 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1198 break;
1199 case Opt_data_err_ignore:
1200 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1201 break;
1189#ifdef CONFIG_QUOTA 1202#ifdef CONFIG_QUOTA
1190 case Opt_usrjquota: 1203 case Opt_usrjquota:
1191 qtype = USRQUOTA; 1204 qtype = USRQUOTA;
@@ -2218,6 +2231,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2218 goto failed_mount; 2231 goto failed_mount;
2219 } 2232 }
2220 2233
2234#ifdef CONFIG_PROC_FS
2221 if (ext4_proc_root) 2235 if (ext4_proc_root)
2222 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); 2236 sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2223 2237
@@ -2225,6 +2239,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2225 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, 2239 proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2226 &ext4_ui_proc_fops, 2240 &ext4_ui_proc_fops,
2227 &sbi->s_inode_readahead_blks); 2241 &sbi->s_inode_readahead_blks);
2242#endif
2228 2243
2229 bgl_lock_init(&sbi->s_blockgroup_lock); 2244 bgl_lock_init(&sbi->s_blockgroup_lock);
2230 2245
@@ -2534,6 +2549,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2534 journal->j_flags |= JBD2_BARRIER; 2549 journal->j_flags |= JBD2_BARRIER;
2535 else 2550 else
2536 journal->j_flags &= ~JBD2_BARRIER; 2551 journal->j_flags &= ~JBD2_BARRIER;
2552 if (test_opt(sb, DATA_ERR_ABORT))
2553 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2554 else
2555 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2537 spin_unlock(&journal->j_state_lock); 2556 spin_unlock(&journal->j_state_lock);
2538} 2557}
2539 2558
@@ -2853,7 +2872,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
2853 journal_t *journal = EXT4_SB(sb)->s_journal; 2872 journal_t *journal = EXT4_SB(sb)->s_journal;
2854 2873
2855 jbd2_journal_lock_updates(journal); 2874 jbd2_journal_lock_updates(journal);
2856 jbd2_journal_flush(journal); 2875 if (jbd2_journal_flush(journal) < 0)
2876 goto out;
2877
2857 lock_super(sb); 2878 lock_super(sb);
2858 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && 2879 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
2859 sb->s_flags & MS_RDONLY) { 2880 sb->s_flags & MS_RDONLY) {
@@ -2862,6 +2883,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
2862 ext4_commit_super(sb, es, 1); 2883 ext4_commit_super(sb, es, 1);
2863 } 2884 }
2864 unlock_super(sb); 2885 unlock_super(sb);
2886
2887out:
2865 jbd2_journal_unlock_updates(journal); 2888 jbd2_journal_unlock_updates(journal);
2866} 2889}
2867 2890
@@ -2962,7 +2985,13 @@ static void ext4_write_super_lockfs(struct super_block *sb)
2962 2985
2963 /* Now we set up the journal barrier. */ 2986 /* Now we set up the journal barrier. */
2964 jbd2_journal_lock_updates(journal); 2987 jbd2_journal_lock_updates(journal);
2965 jbd2_journal_flush(journal); 2988
2989 /*
2990 * We don't want to clear needs_recovery flag when we failed
2991 * to flush the journal.
2992 */
2993 if (jbd2_journal_flush(journal) < 0)
2994 return;
2966 2995
2967 /* Journal blocked and flushed, clear needs_recovery flag. */ 2996 /* Journal blocked and flushed, clear needs_recovery flag. */
2968 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 2997 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -3402,8 +3431,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3402 * otherwise be livelocked... 3431 * otherwise be livelocked...
3403 */ 3432 */
3404 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 3433 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3405 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3434 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3406 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3435 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3436 if (err) {
3437 path_put(&nd.path);
3438 return err;
3439 }
3407 } 3440 }
3408 3441
3409 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 3442 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 42895d369458..9203c3332f17 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -94,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
94 int ret = 0; 94 int ret = 0;
95 struct buffer_head *bh = jh2bh(jh); 95 struct buffer_head *bh = jh2bh(jh);
96 96
97 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 97 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
98 JBUFFER_TRACE(jh, "remove from checkpoint list"); 99 JBUFFER_TRACE(jh, "remove from checkpoint list");
99 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 100 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
100 jbd_unlock_bh_state(bh); 101 jbd_unlock_bh_state(bh);
@@ -176,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
176 * buffers. Note that we take the buffers in the opposite ordering 177 * buffers. Note that we take the buffers in the opposite ordering
177 * from the one in which they were submitted for IO. 178 * from the one in which they were submitted for IO.
178 * 179 *
180 * Return 0 on success, and return <0 if some buffers have failed
181 * to be written out.
182 *
179 * Called with j_list_lock held. 183 * Called with j_list_lock held.
180 */ 184 */
181static void __wait_cp_io(journal_t *journal, transaction_t *transaction) 185static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
182{ 186{
183 struct journal_head *jh; 187 struct journal_head *jh;
184 struct buffer_head *bh; 188 struct buffer_head *bh;
185 tid_t this_tid; 189 tid_t this_tid;
186 int released = 0; 190 int released = 0;
191 int ret = 0;
187 192
188 this_tid = transaction->t_tid; 193 this_tid = transaction->t_tid;
189restart: 194restart:
190 /* Did somebody clean up the transaction in the meanwhile? */ 195 /* Did somebody clean up the transaction in the meanwhile? */
191 if (journal->j_checkpoint_transactions != transaction || 196 if (journal->j_checkpoint_transactions != transaction ||
192 transaction->t_tid != this_tid) 197 transaction->t_tid != this_tid)
193 return; 198 return ret;
194 while (!released && transaction->t_checkpoint_io_list) { 199 while (!released && transaction->t_checkpoint_io_list) {
195 jh = transaction->t_checkpoint_io_list; 200 jh = transaction->t_checkpoint_io_list;
196 bh = jh2bh(jh); 201 bh = jh2bh(jh);
@@ -210,6 +215,9 @@ restart:
210 spin_lock(&journal->j_list_lock); 215 spin_lock(&journal->j_list_lock);
211 goto restart; 216 goto restart;
212 } 217 }
218 if (unlikely(buffer_write_io_error(bh)))
219 ret = -EIO;
220
213 /* 221 /*
214 * Now in whatever state the buffer currently is, we know that 222 * Now in whatever state the buffer currently is, we know that
215 * it has been written out and so we can drop it from the list 223 * it has been written out and so we can drop it from the list
@@ -219,6 +227,8 @@ restart:
219 jbd2_journal_remove_journal_head(bh); 227 jbd2_journal_remove_journal_head(bh);
220 __brelse(bh); 228 __brelse(bh);
221 } 229 }
230
231 return ret;
222} 232}
223 233
224#define NR_BATCH 64 234#define NR_BATCH 64
@@ -242,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
242 * Try to flush one buffer from the checkpoint list to disk. 252 * Try to flush one buffer from the checkpoint list to disk.
243 * 253 *
244 * Return 1 if something happened which requires us to abort the current 254 * Return 1 if something happened which requires us to abort the current
245 * scan of the checkpoint list. 255 * scan of the checkpoint list. Return <0 if the buffer has failed to
256 * be written out.
246 * 257 *
247 * Called with j_list_lock held and drops it if 1 is returned 258 * Called with j_list_lock held and drops it if 1 is returned
248 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 259 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -274,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
274 jbd2_log_wait_commit(journal, tid); 285 jbd2_log_wait_commit(journal, tid);
275 ret = 1; 286 ret = 1;
276 } else if (!buffer_dirty(bh)) { 287 } else if (!buffer_dirty(bh)) {
288 ret = 1;
289 if (unlikely(buffer_write_io_error(bh)))
290 ret = -EIO;
277 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 291 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
278 BUFFER_TRACE(bh, "remove from checkpoint"); 292 BUFFER_TRACE(bh, "remove from checkpoint");
279 __jbd2_journal_remove_checkpoint(jh); 293 __jbd2_journal_remove_checkpoint(jh);
@@ -281,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
281 jbd_unlock_bh_state(bh); 295 jbd_unlock_bh_state(bh);
282 jbd2_journal_remove_journal_head(bh); 296 jbd2_journal_remove_journal_head(bh);
283 __brelse(bh); 297 __brelse(bh);
284 ret = 1;
285 } else { 298 } else {
286 /* 299 /*
287 * Important: we are about to write the buffer, and 300 * Important: we are about to write the buffer, and
@@ -314,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
314 * to disk. We submit larger chunks of data at once. 327 * to disk. We submit larger chunks of data at once.
315 * 328 *
316 * The journal should be locked before calling this function. 329 * The journal should be locked before calling this function.
330 * Called with j_checkpoint_mutex held.
317 */ 331 */
318int jbd2_log_do_checkpoint(journal_t *journal) 332int jbd2_log_do_checkpoint(journal_t *journal)
319{ 333{
@@ -339,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
339 * OK, we need to start writing disk blocks. Take one transaction 353 * OK, we need to start writing disk blocks. Take one transaction
340 * and write it. 354 * and write it.
341 */ 355 */
356 result = 0;
342 spin_lock(&journal->j_list_lock); 357 spin_lock(&journal->j_list_lock);
343 if (!journal->j_checkpoint_transactions) 358 if (!journal->j_checkpoint_transactions)
344 goto out; 359 goto out;
@@ -357,7 +372,7 @@ restart:
357 int batch_count = 0; 372 int batch_count = 0;
358 struct buffer_head *bhs[NR_BATCH]; 373 struct buffer_head *bhs[NR_BATCH];
359 struct journal_head *jh; 374 struct journal_head *jh;
360 int retry = 0; 375 int retry = 0, err;
361 376
362 while (!retry && transaction->t_checkpoint_list) { 377 while (!retry && transaction->t_checkpoint_list) {
363 struct buffer_head *bh; 378 struct buffer_head *bh;
@@ -371,6 +386,8 @@ restart:
371 } 386 }
372 retry = __process_buffer(journal, jh, bhs, &batch_count, 387 retry = __process_buffer(journal, jh, bhs, &batch_count,
373 transaction); 388 transaction);
389 if (retry < 0 && !result)
390 result = retry;
374 if (!retry && (need_resched() || 391 if (!retry && (need_resched() ||
375 spin_needbreak(&journal->j_list_lock))) { 392 spin_needbreak(&journal->j_list_lock))) {
376 spin_unlock(&journal->j_list_lock); 393 spin_unlock(&journal->j_list_lock);
@@ -395,14 +412,18 @@ restart:
395 * Now we have cleaned up the first transaction's checkpoint 412 * Now we have cleaned up the first transaction's checkpoint
396 * list. Let's clean up the second one 413 * list. Let's clean up the second one
397 */ 414 */
398 __wait_cp_io(journal, transaction); 415 err = __wait_cp_io(journal, transaction);
416 if (!result)
417 result = err;
399 } 418 }
400out: 419out:
401 spin_unlock(&journal->j_list_lock); 420 spin_unlock(&journal->j_list_lock);
402 result = jbd2_cleanup_journal_tail(journal);
403 if (result < 0) 421 if (result < 0)
404 return result; 422 jbd2_journal_abort(journal, result);
405 return 0; 423 else
424 result = jbd2_cleanup_journal_tail(journal);
425
426 return (result < 0) ? result : 0;
406} 427}
407 428
408/* 429/*
@@ -418,8 +439,9 @@ out:
418 * This is the only part of the journaling code which really needs to be 439 * This is the only part of the journaling code which really needs to be
419 * aware of transaction aborts. Checkpointing involves writing to the 440 * aware of transaction aborts. Checkpointing involves writing to the
420 * main filesystem area rather than to the journal, so it can proceed 441 * main filesystem area rather than to the journal, so it can proceed
421 * even in abort state, but we must not update the journal superblock if 442 * even in abort state, but we must not update the super block if
422 * we have an abort error outstanding. 443 * checkpointing may have failed. Otherwise, we would lose some metadata
444 * buffers which should be written-back to the filesystem.
423 */ 445 */
424 446
425int jbd2_cleanup_journal_tail(journal_t *journal) 447int jbd2_cleanup_journal_tail(journal_t *journal)
@@ -428,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
428 tid_t first_tid; 450 tid_t first_tid;
429 unsigned long blocknr, freed; 451 unsigned long blocknr, freed;
430 452
453 if (is_journal_aborted(journal))
454 return 1;
455
431 /* OK, work out the oldest transaction remaining in the log, and 456 /* OK, work out the oldest transaction remaining in the log, and
432 * the log block it starts at. 457 * the log block it starts at.
433 * 458 *
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0d3814a35ed1..0abe02c4242a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -504,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
504 jh = commit_transaction->t_buffers; 504 jh = commit_transaction->t_buffers;
505 505
506 /* If we're in abort mode, we just un-journal the buffer and 506 /* If we're in abort mode, we just un-journal the buffer and
507 release it for background writing. */ 507 release it. */
508 508
509 if (is_journal_aborted(journal)) { 509 if (is_journal_aborted(journal)) {
510 clear_buffer_jbddirty(jh2bh(jh));
510 JBUFFER_TRACE(jh, "journal is aborting: refile"); 511 JBUFFER_TRACE(jh, "journal is aborting: refile");
511 jbd2_journal_refile_buffer(journal, jh); 512 jbd2_journal_refile_buffer(journal, jh);
512 /* If that was the last one, we need to clean up 513 /* If that was the last one, we need to clean up
@@ -683,6 +684,8 @@ start_journal_io:
683 printk(KERN_WARNING 684 printk(KERN_WARNING
684 "JBD2: Detected IO errors while flushing file data " 685 "JBD2: Detected IO errors while flushing file data "
685 "on %s\n", journal->j_devname); 686 "on %s\n", journal->j_devname);
687 if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
688 jbd2_journal_abort(journal, err);
686 err = 0; 689 err = 0;
687 } 690 }
688 691
@@ -783,6 +786,9 @@ wait_for_iobuf:
783 /* AKPM: bforget here */ 786 /* AKPM: bforget here */
784 } 787 }
785 788
789 if (err)
790 jbd2_journal_abort(journal, err);
791
786 jbd_debug(3, "JBD: commit phase 5\n"); 792 jbd_debug(3, "JBD: commit phase 5\n");
787 793
788 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 794 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -881,6 +887,8 @@ restart_loop:
881 if (buffer_jbddirty(bh)) { 887 if (buffer_jbddirty(bh)) {
882 JBUFFER_TRACE(jh, "add to new checkpointing trans"); 888 JBUFFER_TRACE(jh, "add to new checkpointing trans");
883 __jbd2_journal_insert_checkpoint(jh, commit_transaction); 889 __jbd2_journal_insert_checkpoint(jh, commit_transaction);
890 if (is_journal_aborted(journal))
891 clear_buffer_jbddirty(bh);
884 JBUFFER_TRACE(jh, "refile for checkpoint writeback"); 892 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
885 __jbd2_journal_refile_buffer(jh); 893 __jbd2_journal_refile_buffer(jh);
886 jbd_unlock_bh_state(bh); 894 jbd_unlock_bh_state(bh);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 01c3901c3a07..783de118de92 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1451,9 +1451,12 @@ recovery_error:
1451 * 1451 *
1452 * Release a journal_t structure once it is no longer in use by the 1452 * Release a journal_t structure once it is no longer in use by the
1453 * journaled object. 1453 * journaled object.
1454 * Return <0 if we couldn't clean up the journal.
1454 */ 1455 */
1455void jbd2_journal_destroy(journal_t *journal) 1456int jbd2_journal_destroy(journal_t *journal)
1456{ 1457{
1458 int err = 0;
1459
1457 /* Wait for the commit thread to wake up and die. */ 1460 /* Wait for the commit thread to wake up and die. */
1458 journal_kill_thread(journal); 1461 journal_kill_thread(journal);
1459 1462
@@ -1476,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal)
1476 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1479 J_ASSERT(journal->j_checkpoint_transactions == NULL);
1477 spin_unlock(&journal->j_list_lock); 1480 spin_unlock(&journal->j_list_lock);
1478 1481
1479 /* We can now mark the journal as empty. */
1480 journal->j_tail = 0;
1481 journal->j_tail_sequence = ++journal->j_transaction_sequence;
1482 if (journal->j_sb_buffer) { 1482 if (journal->j_sb_buffer) {
1483 jbd2_journal_update_superblock(journal, 1); 1483 if (!is_journal_aborted(journal)) {
1484 /* We can now mark the journal as empty. */
1485 journal->j_tail = 0;
1486 journal->j_tail_sequence =
1487 ++journal->j_transaction_sequence;
1488 jbd2_journal_update_superblock(journal, 1);
1489 } else {
1490 err = -EIO;
1491 }
1484 brelse(journal->j_sb_buffer); 1492 brelse(journal->j_sb_buffer);
1485 } 1493 }
1486 1494
@@ -1492,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal)
1492 jbd2_journal_destroy_revoke(journal); 1500 jbd2_journal_destroy_revoke(journal);
1493 kfree(journal->j_wbuf); 1501 kfree(journal->j_wbuf);
1494 kfree(journal); 1502 kfree(journal);
1503
1504 return err;
1495} 1505}
1496 1506
1497 1507
@@ -1717,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal)
1717 spin_lock(&journal->j_list_lock); 1727 spin_lock(&journal->j_list_lock);
1718 while (!err && journal->j_checkpoint_transactions != NULL) { 1728 while (!err && journal->j_checkpoint_transactions != NULL) {
1719 spin_unlock(&journal->j_list_lock); 1729 spin_unlock(&journal->j_list_lock);
1730 mutex_lock(&journal->j_checkpoint_mutex);
1720 err = jbd2_log_do_checkpoint(journal); 1731 err = jbd2_log_do_checkpoint(journal);
1732 mutex_unlock(&journal->j_checkpoint_mutex);
1721 spin_lock(&journal->j_list_lock); 1733 spin_lock(&journal->j_list_lock);
1722 } 1734 }
1723 spin_unlock(&journal->j_list_lock); 1735 spin_unlock(&journal->j_list_lock);
1736
1737 if (is_journal_aborted(journal))
1738 return -EIO;
1739
1724 jbd2_cleanup_journal_tail(journal); 1740 jbd2_cleanup_journal_tail(journal);
1725 1741
1726 /* Finally, mark the journal as really needing no recovery. 1742 /* Finally, mark the journal as really needing no recovery.
@@ -1742,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal)
1742 J_ASSERT(journal->j_head == journal->j_tail); 1758 J_ASSERT(journal->j_head == journal->j_tail);
1743 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1759 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1744 spin_unlock(&journal->j_state_lock); 1760 spin_unlock(&journal->j_state_lock);
1745 return err; 1761 return 0;
1746} 1762}
1747 1763
1748/** 1764/**
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 058f50f65b76..73063285b13f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -225,7 +225,7 @@ do { \
225 */ 225 */
226int jbd2_journal_recover(journal_t *journal) 226int jbd2_journal_recover(journal_t *journal)
227{ 227{
228 int err; 228 int err, err2;
229 journal_superblock_t * sb; 229 journal_superblock_t * sb;
230 230
231 struct recovery_info info; 231 struct recovery_info info;
@@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal)
263 journal->j_transaction_sequence = ++info.end_transaction; 263 journal->j_transaction_sequence = ++info.end_transaction;
264 264
265 jbd2_journal_clear_revoke(journal); 265 jbd2_journal_clear_revoke(journal);
266 sync_blockdev(journal->j_fs_dev); 266 err2 = sync_blockdev(journal->j_fs_dev);
267 if (!err)
268 err = err2;
269
267 return err; 270 return err;
268} 271}
269 272
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 66c3499478b5..d2e91ea998fd 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -967,6 +967,9 @@ struct journal_s
967#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ 967#define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */
968#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ 968#define JBD2_LOADED 0x010 /* The journal superblock has been loaded */
969#define JBD2_BARRIER 0x020 /* Use IDE barriers */ 969#define JBD2_BARRIER 0x020 /* Use IDE barriers */
970#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
971 * data write error in ordered
972 * mode */
970 973
971/* 974/*
972 * Function declarations for the journaling transaction and buffer 975 * Function declarations for the journaling transaction and buffer
@@ -1060,7 +1063,7 @@ extern void jbd2_journal_clear_features
1060 (journal_t *, unsigned long, unsigned long, unsigned long); 1063 (journal_t *, unsigned long, unsigned long, unsigned long);
1061extern int jbd2_journal_create (journal_t *); 1064extern int jbd2_journal_create (journal_t *);
1062extern int jbd2_journal_load (journal_t *journal); 1065extern int jbd2_journal_load (journal_t *journal);
1063extern void jbd2_journal_destroy (journal_t *); 1066extern int jbd2_journal_destroy (journal_t *);
1064extern int jbd2_journal_recover (journal_t *journal); 1067extern int jbd2_journal_recover (journal_t *journal);
1065extern int jbd2_journal_wipe (journal_t *, int); 1068extern int jbd2_journal_wipe (journal_t *, int);
1066extern int jbd2_journal_skip_recovery (journal_t *); 1069extern int jbd2_journal_skip_recovery (journal_t *);